Gentoo Websites Logo
Go to: Gentoo Home Documentation Forums Lists Bugs Planet Store Wiki Get Gentoo!
View | Details | Raw Unified | Return to bug 685810
Collapse All | Expand All

(-)file_not_specified_in_diff (-3 / +3 lines)
Line  Link Here
0
-- a/include/EST_String.h
0
++ b/include/EST_String.h
Lines 574-583 Link Here
574
      */
574
      */
575
    //@{
575
    //@{
576
    friend int fcompare(const EST_String &a, const EST_String &b, 
576
    friend int fcompare(const EST_String &a, const EST_String &b, 
577
			const unsigned char *table=NULL);
577
			const unsigned char *table);
578
578
579
    friend int fcompare(const EST_String &a, const char *b, 
579
    friend int fcompare(const EST_String &a, const char *b, 
580
				const unsigned char *table=NULL);
580
				const unsigned char *table);
581
    ///
581
    ///
582
    friend inline int fcompare(const EST_String &a, const EST_String &b, 
582
    friend inline int fcompare(const EST_String &a, const EST_String &b, 
583
			       const EST_String &table) 
583
			       const EST_String &table) 
(-)a/grammar/ngram/EST_Ngrammar.cc (-1 / +1 lines)
Lines 1503-1509 bool EST_Ngrammar::compute_backoff_weights(const int mincount, Link Here
1503
    
1503
    
1504
    backoff_restore_unigram_states();
1504
    backoff_restore_unigram_states();
1505
    
1505
    
1506
    Good_Turing_discount(*this,maxcount);
1506
    Good_Turing_discount(*this,maxcount,0.5);
1507
    
1507
    
1508
    // and since some frequencies will have been set to zero
1508
    // and since some frequencies will have been set to zero
1509
    // we have to prune away those branches of the tree
1509
    // we have to prune away those branches of the tree
(-)a/grammar/ngram/freqsmooth.cc (-1 / +1 lines)
Lines 60-66 void Ngram_freqsmooth(EST_Ngrammar &ngram,int smooth_thresh1, Link Here
60
    EST_Ngrammar *backoff_ngrams;
60
    EST_Ngrammar *backoff_ngrams;
61
    backoff_ngrams = new EST_Ngrammar[ngram.order()-1];
61
    backoff_ngrams = new EST_Ngrammar[ngram.order()-1];
62
62
63
    Good_Turing_smooth(ngram,smooth_thresh1);
63
    Good_Turing_smooth(ngram,smooth_thresh1,0.5);
64
64
65
    fs_build_backoff_ngrams(backoff_ngrams,ngram);
65
    fs_build_backoff_ngrams(backoff_ngrams,ngram);
66
66
(-)a/grammar/ngram/ngrammar_aux.cc (-2 / +2 lines)
Lines 430-442 Good_Turing_smooth(EST_Ngrammar &ngrammar, int maxcount, int mincount) Link Here
430
	EST_DVector freqs,mapped_freqs;
430
	EST_DVector freqs,mapped_freqs;
431
	// grammar is of a single order - simple
431
	// grammar is of a single order - simple
432
	// Find frequency distribution
432
	// Find frequency distribution
433
	frequency_of_frequencies(freqs,ngrammar);
433
	frequency_of_frequencies(freqs,ngrammar,0);
434
	// smoothing should be optional - to do
434
	// smoothing should be optional - to do
435
	smoothed_frequency_distribution_ExponentialFit(freqs,maxcount-1);
435
	smoothed_frequency_distribution_ExponentialFit(freqs,maxcount-1);
436
	// Build map of frequencies
436
	// Build map of frequencies
437
	adjusted_frequencies_BasicGoodTuring(mapped_freqs,freqs,maxcount);
437
	adjusted_frequencies_BasicGoodTuring(mapped_freqs,freqs,maxcount);
438
	// Map all frequencies in grammar to Good Turing Smoothed values
438
	// Map all frequencies in grammar to Good Turing Smoothed values
439
	map_frequencies(ngrammar,mapped_freqs);
439
	map_frequencies(ngrammar,mapped_freqs,0);
440
	
440
	
441
    }
441
    }
442
    break;
442
    break;
(-)a/include/EST_Ngrammar.h (-9 / +9 lines)
Lines 538-555 public: Link Here
538
						     double floor);
538
						     double floor);
539
    friend EST_write_status save_ngram_htk_ascii(const EST_String filename, 
539
    friend EST_write_status save_ngram_htk_ascii(const EST_String filename, 
540
						 EST_Ngrammar &n,
540
						 EST_Ngrammar &n,
541
						 double floor=0.0);
541
						 double floor);
542
542
543
    //friend EST_write_status save_ngram_htk_binary(const EST_String filename, 
543
    //friend EST_write_status save_ngram_htk_binary(const EST_String filename, 
544
    //					  EST_Ngrammar &n);
544
    //					  EST_Ngrammar &n);
545
    friend EST_write_status save_ngram_cstr_ascii(const EST_String filename, 
545
    friend EST_write_status save_ngram_cstr_ascii(const EST_String filename, 
546
						  EST_Ngrammar &n,
546
						  EST_Ngrammar &n,
547
						  const bool trace=false,
547
						  const bool trace,
548
						  double floor=0.0);
548
						  double floor);
549
    friend EST_write_status save_ngram_cstr_bin(const EST_String filename, 
549
    friend EST_write_status save_ngram_cstr_bin(const EST_String filename, 
550
						EST_Ngrammar &n, 
550
						EST_Ngrammar &n, 
551
						const bool trace=false,
551
						const bool trace,
552
						double floor=0.0);
552
						double floor);
553
    friend EST_write_status save_ngram_arpa(const EST_String filename, 
553
    friend EST_write_status save_ngram_arpa(const EST_String filename, 
554
					    EST_Ngrammar &n);
554
					    EST_Ngrammar &n);
555
    friend EST_write_status save_ngram_arpa_sub(ostream *ost, 
555
    friend EST_write_status save_ngram_arpa_sub(ostream *ost, 
Lines 561-571 public: Link Here
561
    // Auxiliary functions
561
    // Auxiliary functions
562
    
562
    
563
    // smoothing
563
    // smoothing
564
friend void frequency_of_frequencies(EST_DVector &ff, EST_Ngrammar &n,int this_order=0);
564
friend void frequency_of_frequencies(EST_DVector &ff, EST_Ngrammar &n,int this_order);
565
friend void map_frequencies(EST_Ngrammar &n, const EST_DVector &map, const int this_order=0);
565
friend void map_frequencies(EST_Ngrammar &n, const EST_DVector &map, const int this_order);
566
friend bool Good_Turing_smooth(EST_Ngrammar &n, int maxcount, int mincount=0);
566
friend bool Good_Turing_smooth(EST_Ngrammar &n, int maxcount, int mincount);
567
friend void Good_Turing_discount(EST_Ngrammar &ngrammar, const int maxcount,
567
friend void Good_Turing_discount(EST_Ngrammar &ngrammar, const int maxcount,
568
				 const double default_discount=0.5);
568
				 const double default_discount);
569
569
570
friend void fs_build_backoff_ngrams(EST_Ngrammar *backoff_ngrams,
570
friend void fs_build_backoff_ngrams(EST_Ngrammar *backoff_ngrams,
571
				    EST_Ngrammar &ngram);
571
				    EST_Ngrammar &ngram);
(-)a/include/EST_TMatrix.h (-1 / +2 lines)
Lines 313-320 public: Link Here
313
  friend ostream& operator << (ostream &st,const EST_TMatrix<T> &a)
313
  friend ostream& operator << (ostream &st,const EST_TMatrix<T> &a)
314
    {int i, j; 
314
    {int i, j; 
315
        for (i = 0; i < a.num_rows(); ++i) {
315
        for (i = 0; i < a.num_rows(); ++i) {
316
            for (j = 0; j < a.num_columns(); ++j) 
316
            for (j = 0; j < a.num_columns(); ++j) {
317
                st << a.a_no_check(i, j) << " "; st << endl;
317
                st << a.a_no_check(i, j) << " "; st << endl;
318
			}
318
        }
319
        }
319
        return st;
320
        return st;
320
    }
321
    }
(-)a/include/EST_TVector.h (-1 / +2 lines)
Lines 313-320 public: Link Here
313
    friend ostream& operator << (ostream &st, const EST_TVector<T> &m)
313
    friend ostream& operator << (ostream &st, const EST_TVector<T> &m)
314
    {
314
    {
315
        int i; 
315
        int i; 
316
        for (i = 0; i < m.n(); ++i) 
316
        for (i = 0; i < m.n(); ++i) {
317
            st << m(i) << " "; st << endl; 
317
            st << m(i) << " "; st << endl; 
318
		}
318
        return st;
319
        return st;
319
    }
320
    }
320
321
(-)a/main/ngram_build_main.cc (-1 / +1 lines)
Lines 429-435 int main(int argc, char **argv) Link Here
429
    else if (al.present("-smooth") && !al.present("-backoff"))
429
    else if (al.present("-smooth") && !al.present("-backoff"))
430
    {
430
    {
431
	int smoothcount = atoi(al.val("-smooth"));
431
	int smoothcount = atoi(al.val("-smooth"));
432
	if(!Good_Turing_smooth(ngrammar,smoothcount))
432
	if(!Good_Turing_smooth(ngrammar,smoothcount,0.5))
433
	{
433
	{
434
	    cerr << "build_ngram: Failed to smooth " << order << "-gram" << endl;
434
	    cerr << "build_ngram: Failed to smooth " << order << "-gram" << endl;
435
	    exit(1);
435
	    exit(1);

Return to bug 685810