|
Lines 194-200
enum EST_tprob_type {tprob_string, tprob
|
Link Here
|
|---|
|
for example | for example |
\begin{verbatim} | \begin{verbatim} |
EST_DiscreteProbistribution pdf; | EST_DiscreteProbistribution pdf; |
for (int i=pdf.item_start(); i < pdf.item_end(); i=pdf.item_next(i)) |
for (long i=pdf.item_start(); i < pdf.item_end(); i=pdf.item_next(i)) |
{ | { |
EST_String name; | EST_String name; |
double prob; | double prob; |
|
|
/// | /// |
double frequency(const int i) const; | double frequency(const int i) const; |
/// Used for iterating through members of the distribution | /// Used for iterating through members of the distribution |
int item_start() const; |
long item_start() const; |
/// Used for iterating through members of the distribution | /// Used for iterating through members of the distribution |
int item_next(int idx) const; |
long item_next(long idx) const; |
/// Used for iterating through members of the distribution | /// Used for iterating through members of the distribution |
int item_end(int idx) const; |
int item_end(long idx) const; |
/// During iteration returns name given index | /// During iteration returns name given index |
const EST_String &item_name(int idx) const; |
const EST_String &item_name(long idx) const; |
/// During iteration returns name and frequency given index | /// During iteration returns name and frequency given index |
void item_freq(int idx,EST_String &s,double &freq) const; |
void item_freq(long idx,EST_String &s,double &freq) const; |
/// During iteration returns name and probability given index | /// During iteration returns name and probability given index |
void item_prob(int idx,EST_String &s,double &prob) const; |
void item_prob(long idx,EST_String &s,double &prob) const; |
| |
/// Returns discrete vocabulary of distribution | /// Returns discrete vocabulary of distribution |
inline const EST_Discrete *const get_discrete() const { return discrete; }; | inline const EST_Discrete *const get_discrete() const { return discrete; }; |
|
Lines 537-543
ostream & operator <<(ostream &s, WImpur
|
Link Here
|
|---|
|
} | } |
else if (imp.t == wnim_class) | else if (imp.t == wnim_class) |
{ | { |
int i; |
long i; |
EST_String name; | EST_String name; |
double prob; | double prob; |
| |
|
Lines 305-319
double EST_DiscreteProbDistribution::ent
|
Link Here
|
|---|
|
} | } |
| |
// For iterating through members of a probability distribution | // For iterating through members of a probability distribution |
int EST_DiscreteProbDistribution::item_start(void) const |
long EST_DiscreteProbDistribution::item_start(void) const |
{ | { |
if (type == tprob_discrete) | if (type == tprob_discrete) |
return 0; | return 0; |
else | else |
return (int)scounts.list.head(); |
return (long)scounts.list.head(); |
} | } |
| |
int EST_DiscreteProbDistribution::item_end(int idx) const |
int EST_DiscreteProbDistribution::item_end(long idx) const |
{ | { |
if (type == tprob_discrete) | if (type == tprob_discrete) |
return (idx >= icounts.length()); | return (idx >= icounts.length()); |
|
Lines 321-335
int EST_DiscreteProbDistribution::item_e
|
Link Here
|
|---|
|
return ((EST_Litem *)idx == 0); | return ((EST_Litem *)idx == 0); |
} | } |
| |
int EST_DiscreteProbDistribution::item_next(int idx) const |
long EST_DiscreteProbDistribution::item_next(long idx) const |
{ | { |
if (type == tprob_discrete) | if (type == tprob_discrete) |
return ++idx; | return ++idx; |
else | else |
return (int)next((EST_Litem *)idx); |
return (long)next((EST_Litem *)idx); |
} | } |
| |
const EST_String &EST_DiscreteProbDistribution::item_name(int idx) const |
const EST_String &EST_DiscreteProbDistribution::item_name(long idx) const |
{ | { |
if (type == tprob_discrete) | if (type == tprob_discrete) |
return discrete->name(idx); | return discrete->name(idx); |
|
Lines 337-343
const EST_String &EST_DiscreteProbDistri
|
Link Here
|
|---|
|
return scounts.list((EST_Litem *)idx).k; | return scounts.list((EST_Litem *)idx).k; |
} | } |
| |
void EST_DiscreteProbDistribution::item_freq(int idx,EST_String &s,double &freq) const |
void EST_DiscreteProbDistribution::item_freq(long idx,EST_String &s,double &freq) const |
{ | { |
if (type == tprob_discrete) | if (type == tprob_discrete) |
{ | { |
|
Lines 351-357
void EST_DiscreteProbDistribution::item_
|
Link Here
|
|---|
|
} | } |
} | } |
| |
void EST_DiscreteProbDistribution::item_prob(int idx,EST_String &s,double &prob) const |
void EST_DiscreteProbDistribution::item_prob(long idx,EST_String &s,double &prob) const |
{ | { |
if (type == tprob_discrete) | if (type == tprob_discrete) |
{ | { |
|
Lines 368-374
void EST_DiscreteProbDistribution::item_
|
Link Here
|
|---|
|
ostream & operator<<(ostream &s, const EST_DiscreteProbDistribution &pd) | ostream & operator<<(ostream &s, const EST_DiscreteProbDistribution &pd) |
{ | { |
// Output best with probabilities | // Output best with probabilities |
int i; |
long i; |
double prob; | double prob; |
double sum=0; | double sum=0; |
EST_String name; | EST_String name; |
|
Lines 180-186
bool EST_BackoffNgrammarState::accumulat
|
Link Here
|
|---|
|
const double count) | const double count) |
{ | { |
| |
// int i; |
// long i; |
// cerr << "accumulate level " << p_level << " : "; | // cerr << "accumulate level " << p_level << " : "; |
// for(i=0;i<words.n();i++) | // for(i=0;i<words.n();i++) |
// { | // { |
|
Lines 302-308
void EST_BackoffNgrammarState::print_fre
|
Link Here
|
|---|
|
// not right - just print out, then recurse through children | // not right - just print out, then recurse through children |
// change to use 'backoff_traverse' | // change to use 'backoff_traverse' |
| |
int k; |
long k; |
double freq; | double freq; |
EST_String name; | EST_String name; |
for (k=p_pdf.item_start(); | for (k=p_pdf.item_start(); |
|
Lines 369-375
void EST_BackoffNgrammarState::zap()
|
Link Here
|
|---|
|
{ | { |
| |
// recursively delete this state and all its children | // recursively delete this state and all its children |
int k; |
long k; |
double freq; | double freq; |
EST_String name; | EST_String name; |
for (k=p_pdf.item_start(); | for (k=p_pdf.item_start(); |
|
Lines 452-458
bool EST_BackoffNgrammarState::set_backo
|
Link Here
|
|---|
|
| |
void EST_BackoffNgrammarState::frequency_of_frequencies(EST_DVector &ff) | void EST_BackoffNgrammarState::frequency_of_frequencies(EST_DVector &ff) |
{ | { |
int k,max=ff.n(); |
long k; int max=ff.n(); |
double freq; | double freq; |
EST_String name; | EST_String name; |
for (k=p_pdf.item_start(); | for (k=p_pdf.item_start(); |
|
Lines 911-917
void EST_Ngrammar::accumulate(const EST_
|
Link Here
|
|---|
|
{ | { |
| |
/* | /* |
int i; |
long i; |
for(i=0;i<words.n();i++) | for(i=0;i<words.n();i++) |
{ | { |
cerr << vocab_pdf.item_name(words(i)); | cerr << vocab_pdf.item_name(words(i)); |
|
Lines 1581-1587
void EST_Ngrammar::prune_backoff_represe
|
Link Here
|
|---|
|
// remove any branches with zero frequency count | // remove any branches with zero frequency count |
| |
// find children of this state with zero freq and zap them | // find children of this state with zero freq and zap them |
int k; |
long k; |
double freq; | double freq; |
EST_String name; | EST_String name; |
for (k=start_state->pdf_const().item_start(); | for (k=start_state->pdf_const().item_start(); |
|
Lines 2320-2326
void EST_Ngrammar::print_freqs(ostream &
|
Link Here
|
|---|
|
backoff_representation->print_freqs(os,p_order); | backoff_representation->print_freqs(os,p_order); |
else | else |
{ | { |
int i,j,k; |
int i,j; long k; |
EST_IVector window(p_order-1); | EST_IVector window(p_order-1); |
| |
for (i=0; i < p_num_states; i++) | for (i=0; i < p_num_states; i++) |
|
Lines 2661-2667
EST_Ngrammar::backoff_traverse(EST_Backo
|
Link Here
|
|---|
|
function(start_state,params); | function(start_state,params); |
| |
// and recurse down the tree | // and recurse down the tree |
int k; |
long k; |
double freq; | double freq; |
EST_String name; | EST_String name; |
for (k=start_state->pdf_const().item_start(); | for (k=start_state->pdf_const().item_start(); |
|
Lines 2692-2698
EST_Ngrammar::backoff_traverse(EST_Backo
|
Link Here
|
|---|
|
{ | { |
// and recurse down the tree if we haven't | // and recurse down the tree if we haven't |
// reached the level yet | // reached the level yet |
int k; |
long k; |
double freq; | double freq; |
EST_String name; | EST_String name; |
| |
|
|
load_ngram_cstr_bin(const EST_String filename, EST_Ngrammar &n) | load_ngram_cstr_bin(const EST_String filename, EST_Ngrammar &n) |
{ | { |
EST_TokenStream ts; | EST_TokenStream ts; |
int i,j,k,order; |
int i,j,order; long k; |
int num_entries; | int num_entries; |
double approx_num_samples = 0.0; | double approx_num_samples = 0.0; |
long freq_data_start, freq_data_end; | long freq_data_start, freq_data_end; |
|
|
save_ngram_htk_ascii_sub(const EST_String &word, ostream *ost, | save_ngram_htk_ascii_sub(const EST_String &word, ostream *ost, |
EST_Ngrammar &n, double floor) | EST_Ngrammar &n, double floor) |
{ | { |
int k; |
long k; |
EST_String name; | EST_String name; |
double freq; | double freq; |
EST_StrVector this_ngram(2); // assumes bigram | EST_StrVector this_ngram(2); // assumes bigram |
|
Lines 734-740
save_ngram_cstr_ascii(const EST_String f
|
Link Here
|
|---|
|
// awb's format | // awb's format |
(void)trace; | (void)trace; |
ostream *ost; | ostream *ost; |
int i,k; |
int i; long k; |
| |
if (filename == "-") | if (filename == "-") |
ost = &cout; | ost = &cout; |
|
Lines 831-837
save_ngram_cstr_bin(const EST_String fil
|
Link Here
|
|---|
|
if (n.representation() == EST_Ngrammar::sparse) | if (n.representation() == EST_Ngrammar::sparse) |
return misc_write_error; | return misc_write_error; |
| |
int i,k; |
int i; long k; |
FILE *ofd; | FILE *ofd; |
double lfreq = -1; | double lfreq = -1; |
double count = -1; | double count = -1; |
|
Lines 117-123
smooth_ExponentialFit(EST_DVector &N, in
|
Link Here
|
|---|
|
| |
void make_f_of_f(EST_BackoffNgrammarState *s,void *params) | void make_f_of_f(EST_BackoffNgrammarState *s,void *params) |
{ | { |
int k; |
long k; |
double freq; | double freq; |
EST_String name; | EST_String name; |
| |
|
Lines 138-144
void make_f_of_f(EST_BackoffNgrammarStat
|
Link Here
|
|---|
|
| |
void get_max_f(EST_BackoffNgrammarState *s,void *params) | void get_max_f(EST_BackoffNgrammarState *s,void *params) |
{ | { |
int k; |
long k; |
double freq; | double freq; |
EST_String name; | EST_String name; |
| |
|
Lines 158-164
void get_max_f(EST_BackoffNgrammarState
|
Link Here
|
|---|
|
| |
void map_f_of_f(EST_BackoffNgrammarState *s,void *params) | void map_f_of_f(EST_BackoffNgrammarState *s,void *params) |
{ | { |
int k; |
long k; |
double freq; | double freq; |
EST_String name; | EST_String name; |
| |
|
Lines 184-190
void map_f_of_f(EST_BackoffNgrammarState
|
Link Here
|
|---|
|
| |
void zero_small_f(EST_BackoffNgrammarState *s,void *params) | void zero_small_f(EST_BackoffNgrammarState *s,void *params) |
{ | { |
int k; |
long k; |
double freq; | double freq; |
EST_String name; | EST_String name; |
| |
|
Lines 204-210
void zero_small_f(EST_BackoffNgrammarSta
|
Link Here
|
|---|
|
| |
void frequency_of_frequencies(EST_DVector &ff, EST_Ngrammar &n,int this_order) | void frequency_of_frequencies(EST_DVector &ff, EST_Ngrammar &n,int this_order) |
{ | { |
int i,k,size; |
int i,size; long k; |
double max=0.0; | double max=0.0; |
| |
// if ff has zero size, do complete frequency of frequencies | // if ff has zero size, do complete frequency of frequencies |
|
Lines 302-308
void frequency_of_frequencies(EST_DVecto
|
Link Here
|
|---|
|
| |
void map_frequencies(EST_Ngrammar &n, const EST_DVector &map, const int this_order) | void map_frequencies(EST_Ngrammar &n, const EST_DVector &map, const int this_order) |
{ | { |
int i,k; |
int i; long k; |
| |
| |
switch(n.representation()) | switch(n.representation()) |
|
Lines 71-77
EST_PredictionSuffixTree_tree_node::prin
|
Link Here
|
|---|
|
// Base -- print from pd | // Base -- print from pd |
EST_String s; | EST_String s; |
double freq; | double freq; |
for (int i = pd.item_start(); |
for (long i = pd.item_start(); |
!pd.item_end(i); | !pd.item_end(i); |
i=pd.item_next(i)) | i=pd.item_next(i)) |
{ | { |
|
Lines 98-104
EST_PredictionSuffixTree_tree_node::prin
|
Link Here
|
|---|
|
EST_String s; | EST_String s; |
double prob; | double prob; |
os << get_path() << " :"; | os << get_path() << " :"; |
for (int i = pd.item_start(); !pd.item_end(i) ; i=pd.item_next(i)) |
for (long i = pd.item_start(); !pd.item_end(i) ; i=pd.item_next(i)) |
{ | { |
pd.item_prob(i,s,prob); | pd.item_prob(i,s,prob); |
os << " " << s << " " << prob; | os << " " << s << " " << prob; |
|
Lines 74-80
void fs_build_backoff_ngrams(EST_Ngramma
|
Link Here
|
|---|
|
EST_Ngrammar &ngram) | EST_Ngrammar &ngram) |
{ | { |
// Build all the backoff grammars back to uni-grams | // Build all the backoff grammars back to uni-grams |
int i,j,k,l; |
int i,j,l; long k; |
| |
for (i=0; i < ngram.order()-1; i++) | for (i=0; i < ngram.order()-1; i++) |
backoff_ngrams[i].init(i+1,EST_Ngrammar::dense, | backoff_ngrams[i].init(i+1,EST_Ngrammar::dense, |
|
Lines 110-116
int fs_backoff_smooth(EST_Ngrammar *back
|
Link Here
|
|---|
|
{ | { |
// For all ngrams which are too infrequent, adjust their | // For all ngrams which are too infrequent, adjust their |
// frequencies based on their backoff probabilities | // frequencies based on their backoff probabilities |
int i,j; |
int i; long j; |
double occurs; | double occurs; |
double backoff_prob; | double backoff_prob; |
| |
|
Lines 299-305
static LISP find_best_split(EST_WFST &wf
|
Link Here
|
|---|
|
LISP *ssplits; | LISP *ssplits; |
gc_protect(&splits); | gc_protect(&splits); |
EST_String sname; | EST_String sname; |
int b,best_b,i; |
int b,best_b; long i; |
int num_pdfs; | int num_pdfs; |
double best_score, score, sfreq; | double best_score, score, sfreq; |
| |
|
Lines 374-380
static double score_pdf_combine(EST_Disc
|
Link Here
|
|---|
|
// Find score of (a+b) vs (all-(a+b)) | // Find score of (a+b) vs (all-(a+b)) |
EST_DiscreteProbDistribution ab(a); | EST_DiscreteProbDistribution ab(a); |
EST_DiscreteProbDistribution all_but_ab(all); | EST_DiscreteProbDistribution all_but_ab(all); |
int i; |
long i; |
EST_String sname; | EST_String sname; |
double sfreq, score; | double sfreq, score; |
for (i=b.item_start(); !b.item_end(i); | for (i=b.item_start(); !b.item_end(i); |
|
Lines 506-512
static double find_score_if_split(EST_WF
|
Link Here
|
|---|
|
EST_DiscreteProbDistribution pdf_split(&wfst.in_symbols()); | EST_DiscreteProbDistribution pdf_split(&wfst.in_symbols()); |
EST_DiscreteProbDistribution pdf_remain(&wfst.in_symbols()); | EST_DiscreteProbDistribution pdf_remain(&wfst.in_symbols()); |
int in, tostate, id; | int in, tostate, id; |
int i; |
long i; |
double sfreq; | double sfreq; |
EST_String sname; | EST_String sname; |
| |