View | Details | Raw Unified
Collapse All | Expand All

(-) speech_tools/grammar/wfst/wfst_train.cc (-43 / +43 lines)
 Lines 194-200   enum EST_tprob_type {tprob_string, tprob Link Here 
    for example
    for example
    \begin{verbatim}
    \begin{verbatim}
       EST_DiscreteProbistribution pdf;
       EST_DiscreteProbistribution pdf;
       for (int i=pdf.item_start(); i < pdf.item_end(); i=pdf.item_next(i))
       for (long i=pdf.item_start(); i < pdf.item_end(); i=pdf.item_next(i))
       {
       {
          EST_String name;
          EST_String name;
          double prob;
          double prob;
 Lines 265-281   public: Link Here 
    /// 
    /// 
    double frequency(const int i) const; 
    double frequency(const int i) const; 
    /// Used for iterating through members of the distribution
    /// Used for iterating through members of the distribution
    int item_start() const;
    long item_start() const;
    /// Used for iterating through members of the distribution
    /// Used for iterating through members of the distribution
    int item_next(int idx) const;
    long item_next(long idx) const;
    /// Used for iterating through members of the distribution
    /// Used for iterating through members of the distribution
    int item_end(int idx) const;
    int item_end(long idx) const;
    /// During iteration returns name given index 
    /// During iteration returns name given index 
    const EST_String &item_name(int idx) const;
    const EST_String &item_name(long idx) const;
    /// During iteration returns name and frequency given index  
    /// During iteration returns name and frequency given index  
    void item_freq(int idx,EST_String &s,double &freq) const;
    void item_freq(long idx,EST_String &s,double &freq) const;
    /// During iteration returns name and probability given index
    /// During iteration returns name and probability given index
    void item_prob(int idx,EST_String &s,double &prob) const;
    void item_prob(long idx,EST_String &s,double &prob) const;
    /// Returns discrete vocabulary of distribution
    /// Returns discrete vocabulary of distribution
    inline const EST_Discrete *const get_discrete() const { return discrete; };
    inline const EST_Discrete *const get_discrete() const { return discrete; };
 Lines 537-543   ostream & operator <<(ostream &s, WImpur Link Here 
    }
    }
    else if (imp.t == wnim_class)
    else if (imp.t == wnim_class)
    {
    {
	int i;
	long i;
	EST_String name;
	EST_String name;
	double prob;
	double prob;
 Lines 305-319   double EST_DiscreteProbDistribution::ent Link Here 
}
}
//  For iterating through members of a probability distribution
//  For iterating through members of a probability distribution
int EST_DiscreteProbDistribution::item_start(void) const
long EST_DiscreteProbDistribution::item_start(void) const
{
{
    if (type == tprob_discrete)
    if (type == tprob_discrete)
	return 0;
	return 0;
    else
    else
	return (int)scounts.list.head();
	return (long)scounts.list.head();
}
}
int EST_DiscreteProbDistribution::item_end(int idx) const
int EST_DiscreteProbDistribution::item_end(long idx) const
{
{
    if (type == tprob_discrete)
    if (type == tprob_discrete)
	return (idx >= icounts.length());
	return (idx >= icounts.length());
 Lines 321-335   int EST_DiscreteProbDistribution::item_e Link Here 
	return ((EST_Litem *)idx == 0);
	return ((EST_Litem *)idx == 0);
}
}
int EST_DiscreteProbDistribution::item_next(int idx) const
long EST_DiscreteProbDistribution::item_next(long idx) const
{
{
    if (type == tprob_discrete)
    if (type == tprob_discrete)
	return ++idx;
	return ++idx;
    else
    else
	return (int)next((EST_Litem *)idx);
	return (long)next((EST_Litem *)idx);
}
}
const EST_String &EST_DiscreteProbDistribution::item_name(int idx) const
const EST_String &EST_DiscreteProbDistribution::item_name(long idx) const
{
{
    if (type == tprob_discrete)
    if (type == tprob_discrete)
	return discrete->name(idx);
	return discrete->name(idx);
 Lines 337-343   const EST_String &EST_DiscreteProbDistri Link Here 
	return scounts.list((EST_Litem *)idx).k;
	return scounts.list((EST_Litem *)idx).k;
}
}
void EST_DiscreteProbDistribution::item_freq(int idx,EST_String &s,double &freq) const
void EST_DiscreteProbDistribution::item_freq(long idx,EST_String &s,double &freq) const
{
{
    if (type == tprob_discrete)
    if (type == tprob_discrete)
    {
    {
 Lines 351-357   void EST_DiscreteProbDistribution::item_ Link Here 
    }
    }
}
}
void EST_DiscreteProbDistribution::item_prob(int idx,EST_String &s,double &prob) const
void EST_DiscreteProbDistribution::item_prob(long idx,EST_String &s,double &prob) const
{
{
    if (type == tprob_discrete)
    if (type == tprob_discrete)
    {
    {
 Lines 368-374   void EST_DiscreteProbDistribution::item_ Link Here 
ostream & operator<<(ostream &s, const EST_DiscreteProbDistribution &pd)
ostream & operator<<(ostream &s, const EST_DiscreteProbDistribution &pd)
{
{
    // Output best with probabilities
    // Output best with probabilities
    int i;
    long i;
    double prob;
    double prob;
    double sum=0;
    double sum=0;
    EST_String name;
    EST_String name;
 Lines 180-186   bool EST_BackoffNgrammarState::accumulat Link Here 
					  const double count)
					  const double count)
{
{
//    int i;
//    long i;
//    cerr << "accumulate level " << p_level << " : ";
//    cerr << "accumulate level " << p_level << " : ";
//    for(i=0;i<words.n();i++)
//    for(i=0;i<words.n();i++)
//    {
//    {
 Lines 302-308   void EST_BackoffNgrammarState::print_fre Link Here 
    // not right - just print out, then recurse through children
    // not right - just print out, then recurse through children
    // change to use 'backoff_traverse'
    // change to use 'backoff_traverse'
    
    
    int k;
    long k;
    double freq;
    double freq;
    EST_String name;
    EST_String name;
    for (k=p_pdf.item_start();
    for (k=p_pdf.item_start();
 Lines 369-375   void EST_BackoffNgrammarState::zap() Link Here 
{
{
    // recursively delete this state and all its children
    // recursively delete this state and all its children
    int k;
    long k;
    double freq;
    double freq;
    EST_String name;
    EST_String name;
    for (k=p_pdf.item_start();
    for (k=p_pdf.item_start();
 Lines 452-458   bool EST_BackoffNgrammarState::set_backo Link Here 
void EST_BackoffNgrammarState::frequency_of_frequencies(EST_DVector &ff)
void EST_BackoffNgrammarState::frequency_of_frequencies(EST_DVector &ff)
{
{
    int k,max=ff.n();
    long k; int max=ff.n();
    double freq;
    double freq;
    EST_String name;
    EST_String name;
    for (k=p_pdf.item_start();
    for (k=p_pdf.item_start();
 Lines 911-917   void EST_Ngrammar::accumulate(const EST_ Link Here 
{
{
    
    
    /*
    /*
       int i;
       long i;
       for(i=0;i<words.n();i++)
       for(i=0;i<words.n();i++)
       {
       {
       cerr << vocab_pdf.item_name(words(i));
       cerr << vocab_pdf.item_name(words(i));
 Lines 1581-1587   void EST_Ngrammar::prune_backoff_represe Link Here 
    // remove any branches with zero frequency count
    // remove any branches with zero frequency count
    
    
    // find children of this state with zero freq and zap them
    // find children of this state with zero freq and zap them
    int k;
    long k;
    double freq;
    double freq;
    EST_String name;
    EST_String name;
    for (k=start_state->pdf_const().item_start();
    for (k=start_state->pdf_const().item_start();
 Lines 2320-2326   void EST_Ngrammar::print_freqs(ostream & Link Here 
	backoff_representation->print_freqs(os,p_order);
	backoff_representation->print_freqs(os,p_order);
    else
    else
    {
    {
	int i,j,k;
	int i,j; long k;
	EST_IVector window(p_order-1);
	EST_IVector window(p_order-1);
	
	
	for (i=0; i < p_num_states; i++)
	for (i=0; i < p_num_states; i++)
 Lines 2661-2667   EST_Ngrammar::backoff_traverse(EST_Backo Link Here 
    function(start_state,params);
    function(start_state,params);
    
    
    // and recurse down the tree
    // and recurse down the tree
    int k;
    long k;
    double freq;
    double freq;
    EST_String name;
    EST_String name;
    for (k=start_state->pdf_const().item_start();
    for (k=start_state->pdf_const().item_start();
 Lines 2692-2698   EST_Ngrammar::backoff_traverse(EST_Backo Link Here 
    {
    {
	// and recurse down the tree if we haven't
	// and recurse down the tree if we haven't
	// reached the level yet
	// reached the level yet
	int k;
	long k;
	double freq;
	double freq;
	EST_String name;
	EST_String name;
	
	
 Lines 281-287   EST_read_status Link Here 
load_ngram_cstr_bin(const EST_String filename, EST_Ngrammar &n)
load_ngram_cstr_bin(const EST_String filename, EST_Ngrammar &n)
{
{
    EST_TokenStream ts;
    EST_TokenStream ts;
    int i,j,k,order;
    int i,j,order; long k;
    int num_entries;
    int num_entries;
    double approx_num_samples = 0.0;
    double approx_num_samples = 0.0;
    long freq_data_start, freq_data_end;
    long freq_data_start, freq_data_end;
 Lines 407-413   EST_write_status Link Here 
save_ngram_htk_ascii_sub(const EST_String &word, ostream *ost, 
save_ngram_htk_ascii_sub(const EST_String &word, ostream *ost, 
			 EST_Ngrammar &n, double floor)
			 EST_Ngrammar &n, double floor)
{
{
    int k;
    long k;
    EST_String name;
    EST_String name;
    double freq;
    double freq;
    EST_StrVector this_ngram(2); // assumes bigram
    EST_StrVector this_ngram(2); // assumes bigram
 Lines 734-740   save_ngram_cstr_ascii(const EST_String f Link Here 
    // awb's format
    // awb's format
    (void)trace;
    (void)trace;
    ostream *ost;
    ostream *ost;
    int i,k;
    int i; long k;
    
    
    if (filename == "-")
    if (filename == "-")
	ost = &cout;
	ost = &cout;
 Lines 831-837   save_ngram_cstr_bin(const EST_String fil Link Here 
    if (n.representation() == EST_Ngrammar::sparse)
    if (n.representation() == EST_Ngrammar::sparse)
	return misc_write_error;
	return misc_write_error;
    
    
    int i,k;
    int i; long k;
    FILE *ofd;
    FILE *ofd;
    double lfreq = -1;
    double lfreq = -1;
    double count = -1;
    double count = -1;
 Lines 117-123   smooth_ExponentialFit(EST_DVector &N, in Link Here 
void make_f_of_f(EST_BackoffNgrammarState *s,void *params)
void make_f_of_f(EST_BackoffNgrammarState *s,void *params)
{
{
    int k;
    long k;
    double freq;
    double freq;
    EST_String name;
    EST_String name;
 Lines 138-144   void make_f_of_f(EST_BackoffNgrammarStat Link Here 
void get_max_f(EST_BackoffNgrammarState *s,void *params)
void get_max_f(EST_BackoffNgrammarState *s,void *params)
{
{
    int k;
    long k;
    double freq;
    double freq;
    EST_String name;
    EST_String name;
 Lines 158-164   void get_max_f(EST_BackoffNgrammarState Link Here 
void map_f_of_f(EST_BackoffNgrammarState *s,void *params)
void map_f_of_f(EST_BackoffNgrammarState *s,void *params)
{
{
    int k;
    long k;
    double freq;
    double freq;
    EST_String name;
    EST_String name;
 Lines 184-190   void map_f_of_f(EST_BackoffNgrammarState Link Here 
void zero_small_f(EST_BackoffNgrammarState *s,void *params)
void zero_small_f(EST_BackoffNgrammarState *s,void *params)
{
{
    int k;
    long k;
    double freq;
    double freq;
    EST_String name;
    EST_String name;
 Lines 204-210   void zero_small_f(EST_BackoffNgrammarSta Link Here 
void frequency_of_frequencies(EST_DVector &ff, EST_Ngrammar &n,int this_order)
void frequency_of_frequencies(EST_DVector &ff, EST_Ngrammar &n,int this_order)
{
{
  int i,k,size;
  int i,size; long k;
  double max=0.0;
  double max=0.0;
  // if ff has zero size, do complete frequency of frequencies
  // if ff has zero size, do complete frequency of frequencies
 Lines 302-308   void frequency_of_frequencies(EST_DVecto Link Here 
void map_frequencies(EST_Ngrammar &n, const EST_DVector &map, const int this_order)
void map_frequencies(EST_Ngrammar &n, const EST_DVector &map, const int this_order)
{
{
  int i,k;
  int i; long k;
  switch(n.representation())
  switch(n.representation())
 Lines 71-77   EST_PredictionSuffixTree_tree_node::prin Link Here 
	// Base -- print from pd 
	// Base -- print from pd 
	EST_String s;
	EST_String s;
	double freq;
	double freq;
	for (int i = pd.item_start(); 
	for (long i = pd.item_start(); 
	     !pd.item_end(i); 
	     !pd.item_end(i); 
	     i=pd.item_next(i))
	     i=pd.item_next(i))
	{
	{
 Lines 98-104   EST_PredictionSuffixTree_tree_node::prin Link Here 
	EST_String s;
	EST_String s;
	double prob;
	double prob;
	os << get_path() << " :";
	os << get_path() << " :";
	for (int i = pd.item_start(); !pd.item_end(i) ; i=pd.item_next(i))
	for (long i = pd.item_start(); !pd.item_end(i) ; i=pd.item_next(i))
	{
	{
	    pd.item_prob(i,s,prob);
	    pd.item_prob(i,s,prob);
	    os << " " << s << " " << prob;
	    os << " " << s << " " << prob;
 Lines 74-80   void fs_build_backoff_ngrams(EST_Ngramma Link Here 
				 EST_Ngrammar &ngram)
				 EST_Ngrammar &ngram)
{
{
    // Build all the backoff grammars back to uni-grams
    // Build all the backoff grammars back to uni-grams
    int i,j,k,l;
    int i,j,l; long k;
    for (i=0; i < ngram.order()-1; i++)
    for (i=0; i < ngram.order()-1; i++)
	backoff_ngrams[i].init(i+1,EST_Ngrammar::dense,
	backoff_ngrams[i].init(i+1,EST_Ngrammar::dense,
 Lines 110-116   int fs_backoff_smooth(EST_Ngrammar *back Link Here 
{
{
    // For all ngrams which are too infrequent, adjust their
    // For all ngrams which are too infrequent, adjust their
    // frequencies based on their backoff probabilities
    // frequencies based on their backoff probabilities
    int i,j;
    int i; long j;
    double occurs;
    double occurs;
    double backoff_prob;
    double backoff_prob;
 Lines 299-305   static LISP find_best_split(EST_WFST &wf Link Here 
    LISP *ssplits;
    LISP *ssplits;
    gc_protect(&splits);
    gc_protect(&splits);
    EST_String sname;
    EST_String sname;
    int b,best_b,i;
    int b,best_b; long i;
    int num_pdfs;
    int num_pdfs;
    double best_score, score, sfreq;
    double best_score, score, sfreq;
 Lines 374-380   static double score_pdf_combine(EST_Disc Link Here 
    // Find score of (a+b) vs (all-(a+b))
    // Find score of (a+b) vs (all-(a+b))
    EST_DiscreteProbDistribution ab(a);
    EST_DiscreteProbDistribution ab(a);
    EST_DiscreteProbDistribution all_but_ab(all);
    EST_DiscreteProbDistribution all_but_ab(all);
    int i;
    long i;
    EST_String sname;
    EST_String sname;
    double sfreq, score;
    double sfreq, score;
    for (i=b.item_start(); !b.item_end(i);
    for (i=b.item_start(); !b.item_end(i);
 Lines 506-512   static double find_score_if_split(EST_WF Link Here 
    EST_DiscreteProbDistribution pdf_split(&wfst.in_symbols());
    EST_DiscreteProbDistribution pdf_split(&wfst.in_symbols());
    EST_DiscreteProbDistribution pdf_remain(&wfst.in_symbols());
    EST_DiscreteProbDistribution pdf_remain(&wfst.in_symbols());
    int in, tostate, id;
    int in, tostate, id;
    int i;
    long i;
    double sfreq;
    double sfreq;
    EST_String sname;
    EST_String sname;