--- a/include/EST_String.h +++ b/include/EST_String.h @@ -574,10 +574,10 @@ */ //@{ friend int fcompare(const EST_String &a, const EST_String &b, - const unsigned char *table=NULL); + const unsigned char *table); friend int fcompare(const EST_String &a, const char *b, - const unsigned char *table=NULL); + const unsigned char *table); /// friend inline int fcompare(const EST_String &a, const EST_String &b, const EST_String &table) diff --git a/grammar/ngram/EST_Ngrammar.cc b/grammar/ngram/EST_Ngrammar.cc index a87e458..aa709ff 100644 --- a/grammar/ngram/EST_Ngrammar.cc +++ b/grammar/ngram/EST_Ngrammar.cc @@ -1503,7 +1503,7 @@ bool EST_Ngrammar::compute_backoff_weights(const int mincount, backoff_restore_unigram_states(); - Good_Turing_discount(*this,maxcount); + Good_Turing_discount(*this,maxcount,0.5); // and since some frequencies will have been set to zero // we have to prune away those branches of the tree diff --git a/grammar/ngram/freqsmooth.cc b/grammar/ngram/freqsmooth.cc index 238322a..cc6f638 100644 --- a/grammar/ngram/freqsmooth.cc +++ b/grammar/ngram/freqsmooth.cc @@ -60,7 +60,7 @@ void Ngram_freqsmooth(EST_Ngrammar &ngram,int smooth_thresh1, EST_Ngrammar *backoff_ngrams; backoff_ngrams = new EST_Ngrammar[ngram.order()-1]; - Good_Turing_smooth(ngram,smooth_thresh1); + Good_Turing_smooth(ngram,smooth_thresh1,0.5); fs_build_backoff_ngrams(backoff_ngrams,ngram); diff --git a/grammar/ngram/ngrammar_aux.cc b/grammar/ngram/ngrammar_aux.cc index b71c2d1..c4ac5e8 100644 --- a/grammar/ngram/ngrammar_aux.cc +++ b/grammar/ngram/ngrammar_aux.cc @@ -430,13 +430,13 @@ Good_Turing_smooth(EST_Ngrammar &ngrammar, int maxcount, int mincount) EST_DVector freqs,mapped_freqs; // grammar is of a single order - simple // Find frequency distribution - frequency_of_frequencies(freqs,ngrammar); + frequency_of_frequencies(freqs,ngrammar,0); // smoothing should be optional - to do smoothed_frequency_distribution_ExponentialFit(freqs,maxcount-1); // Build map of frequencies adjusted_frequencies_BasicGoodTuring(mapped_freqs,freqs,maxcount); // Map all frequencies in grammar to Good Turing Smoothed values - map_frequencies(ngrammar,mapped_freqs); + map_frequencies(ngrammar,mapped_freqs,0); } break; diff --git a/include/EST_Ngrammar.h b/include/EST_Ngrammar.h index 03ffe0c..7691e67 100644 --- a/include/EST_Ngrammar.h +++ b/include/EST_Ngrammar.h @@ -538,18 +538,18 @@ public: double floor); friend EST_write_status save_ngram_htk_ascii(const EST_String filename, EST_Ngrammar &n, - double floor=0.0); + double floor); //friend EST_write_status save_ngram_htk_binary(const EST_String filename, // EST_Ngrammar &n); friend EST_write_status save_ngram_cstr_ascii(const EST_String filename, EST_Ngrammar &n, - const bool trace=false, - double floor=0.0); + const bool trace, + double floor); friend EST_write_status save_ngram_cstr_bin(const EST_String filename, EST_Ngrammar &n, - const bool trace=false, - double floor=0.0); + const bool trace, + double floor); friend EST_write_status save_ngram_arpa(const EST_String filename, EST_Ngrammar &n); friend EST_write_status save_ngram_arpa_sub(ostream *ost, @@ -561,11 +561,11 @@ public: // Auxiliary functions // smoothing -friend void frequency_of_frequencies(EST_DVector &ff, EST_Ngrammar &n,int this_order=0); -friend void map_frequencies(EST_Ngrammar &n, const EST_DVector &map, const int this_order=0); -friend bool Good_Turing_smooth(EST_Ngrammar &n, int maxcount, int mincount=0); +friend void frequency_of_frequencies(EST_DVector &ff, EST_Ngrammar &n,int this_order); +friend void map_frequencies(EST_Ngrammar &n, const EST_DVector &map, const int this_order); +friend bool Good_Turing_smooth(EST_Ngrammar &n, int maxcount, int mincount); friend void Good_Turing_discount(EST_Ngrammar &ngrammar, const int maxcount, - const double default_discount=0.5); + const double default_discount); friend void fs_build_backoff_ngrams(EST_Ngrammar *backoff_ngrams, EST_Ngrammar &ngram); diff --git a/include/EST_TMatrix.h b/include/EST_TMatrix.h index 238eaef..51c6252 100644 --- a/include/EST_TMatrix.h +++ b/include/EST_TMatrix.h @@ -313,8 +313,9 @@ public: friend ostream& operator << (ostream &st,const EST_TMatrix<T> &a) {int i, j; for (i = 0; i < a.num_rows(); ++i) { - for (j = 0; j < a.num_columns(); ++j) + for (j = 0; j < a.num_columns(); ++j) { st << a.a_no_check(i, j) << " "; st << endl; + } } return st; } diff --git a/include/EST_TVector.h b/include/EST_TVector.h index 3658f09..79cb2d1 100644 --- a/include/EST_TVector.h +++ b/include/EST_TVector.h @@ -313,8 +313,9 @@ public: friend ostream& operator << (ostream &st, const EST_TVector<T> &m) { int i; - for (i = 0; i < m.n(); ++i) + for (i = 0; i < m.n(); ++i) { st << m(i) << " "; st << endl; + } return st; } diff --git a/lib/libestbase.a b/lib/libestbase.a index 8f11184..6e0f553 100644 Binary files a/lib/libestbase.a and b/lib/libestbase.a differ diff --git a/lib/libestools.a b/lib/libestools.a index fc6e862..ac65dea 100644 Binary files a/lib/libestools.a and b/lib/libestools.a differ diff --git a/lib/libeststring.a b/lib/libeststring.a index 131f511..da1fdca 100644 Binary files a/lib/libeststring.a and b/lib/libeststring.a differ diff --git a/main/ngram_build_main.cc b/main/ngram_build_main.cc index a78c8d8..b9aca8f 100644 --- a/main/ngram_build_main.cc +++ b/main/ngram_build_main.cc @@ -429,7 +429,7 @@ int main(int argc, char **argv) else if (al.present("-smooth") && !al.present("-backoff")) { int smoothcount = atoi(al.val("-smooth")); - if(!Good_Turing_smooth(ngrammar,smoothcount)) + if(!Good_Turing_smooth(ngrammar,smoothcount,0.5)) { cerr << "build_ngram: Failed to smooth " << order << "-gram" << endl; exit(1); diff --git a/stats/EST_cluster.o b/stats/EST_cluster.o index 1af22ce..8c120b4 100644 Binary files a/stats/EST_cluster.o and b/stats/EST_cluster.o differ diff --git a/stats/EST_multistats.o b/stats/EST_multistats.o index 8392d2c..02f16a4 100644 Binary files a/stats/EST_multistats.o and b/stats/EST_multistats.o differ