_SuffixArrayLanguageModel.cpp

Go to the documentation of this file.
00001 
00006 #include  "_SuffixArrayLanguageModel.h"
00007 #include <iostream>
00008 #include <fstream>
00009 #include "math.h"
00010 
00011 using namespace std;
00012 
00013 
00014 C_SuffixArrayLanguageModel::C_SuffixArrayLanguageModel()
00015 {
00016 
00017 }
00018 
00019 C_SuffixArrayLanguageModel::~C_SuffixArrayLanguageModel()
00020 {
00021         
00022 }
00023 
00024 
00047 C_SuffixArrayLanguageModel::C_SuffixArrayLanguageModel(const char *  cfgFileName)
00048 {
00049   
00050         fstream cfgFile;
00051         cfgFile.open(cfgFileName,ios::in);
00052 
00053         if(!cfgFile){
00054                 fprintf(stderr,"Configuration file does not exist! quit!!\n");
00055                 exit(0);
00056         }
00057 
00058         //-----------------------------------------------------------------------------
00059         //reading parameters
00060         char paraName[1024];
00061         char corpusFileNameStem[1024];
00062         corpusFileNameStem[0]=0;
00063         this->maxFreqForDiscounting=-1;
00064 
00065         this->interpolationStrategy = 'e';      //default interpolation strategy: equally weighted n-gram conditional prob
00066         this->maxN = 5;         // default value; consider up to 5 words        
00067                 
00068         while(!cfgFile.eof()){
00069                 cfgFile>>paraName;
00070 
00071                 if(strcmp(paraName,"CORPUS")==0){
00072                         cfgFile>>corpusFileNameStem;
00073                 }               
00074                 else if(strcmp(paraName,"N")==0){
00075                         cfgFile>>this->maxN;
00076                 }
00077                 else if(strcmp(paraName,"MAX_FREQ_DISC")==0){
00078                         cfgFile>>maxFreqForDiscounting;
00079                 }               
00080                 else if(strcmp(paraName,"INTERPOLATION_STRATEGY")==0){
00081                         cfgFile>>this->interpolationStrategy;
00082                 }       
00083 
00084                 paraName[0]=0;
00085                 
00086         }
00087         
00088         //load corpus and suffix array
00089         if(strlen(corpusFileNameStem)==0){
00090                 cerr<<"CORPUS need to be specified in the configuration file. This should be the corpus name used for LM.\n";
00091                 exit(-1);
00092         }
00093         this->loadData_forSearch(corpusFileNameStem, false, true);      //call the constructor of the super class to load suffix array for corpusName, with vocabulary, no offset, 
00094         
00095         
00096         //if apply discounting construct the discounting map
00097         if(this->maxFreqForDiscounting<=0){
00098                 this->applyDiscounting = false;
00099         }
00100         else{
00101                 this->applyDiscounting = true;          
00102                 this->constructDiscountingMap();        //scan the corpus and construct the count of counts table and then discounting map              
00103         }       
00104 
00105         //get vocID for sentEnd
00106         this->vocIdForSentEnd = this->voc->returnId(C_String("_END_OF_SENTENCE_"));
00107 
00108         if(this->vocIdForSentEnd==0){
00109                 cerr<<"VocID for _END_OF_SENTENCE_ can not be found. Critical error.\n";
00110                 exit(0);
00111         }
00112 
00113         this->vocIdForSentStart = this->voc->returnId(C_String("_SENTENCE_START_"));
00114         if(this->vocIdForSentStart==0){
00115                 cerr<<"VocID for _SENTENCE_START_ can not be found. Critical error.\n";
00116                 exit(0);
00117         }
00118         
00119         this->vocIdForCorpusEnd = this->voc->returnId(C_String("_END_OF_CORPUS_"));
00120         if(this->vocIdForCorpusEnd==0){
00121                 cerr<<"VocID for _END_OF_CORPUS_ can not be found. Critical error.\n";
00122                 exit(0);
00123         }
00124 
00125         this->interpolationStrategy = 'e';                      //default: interpolation strategy: equally weighted n-gram conditional prob
00126 
00127 }
00128 
00129 
00135 void C_SuffixArrayLanguageModel::constructDiscountingMap()
00136 {
00137         int i,j;
00138         unsigned int * countOfCountsTable = (unsigned int *) malloc(sizeof(unsigned int)*this->maxN*this->maxFreqForDiscounting);
00139 
00140         if(countOfCountsTable==NULL){
00141                 cerr<<"Count of counts table can not be initialized. Exit\n";
00142                 exit(0);
00143         }
00144         
00145         //initialize count of counts table
00146         for(int c=0;c<this->maxN*this->maxFreqForDiscounting;c++){
00147                 countOfCountsTable[c]=0;
00148         }
00149                 
00150         //initialize the scanning list
00151         S_nGramScanningInfoElement * nGramScanningList = (S_nGramScanningInfoElement *) malloc(sizeof(S_nGramScanningInfoElement)*this->maxN);
00152         for(i=0;i<this->maxN;i++){
00153                 nGramScanningList[i].freqSoFar=0;
00154                 nGramScanningList[i].vocId = 0;
00155                 nGramScanningList[i].freqThreshForOutput = (unsigned int) -1;   //default, do not output
00156         }
00157 
00158         bool stillMeaningful = true;    
00159         TextLenType saPos=0;
00160 
00161         while(stillMeaningful && ( saPos<this->corpusSize ) ){
00162 
00163                 TextLenType posInCorpus = this->suffix_list[saPos];
00164                 IndexType wordInCorpus = this->corpus_list[posInCorpus];
00165 
00166                 if(wordInCorpus<this->sentIdStart){     //SA positions pointing to sentID are not interesting
00167                         
00168                         if((wordInCorpus!=this->vocIdForSentStart)&&(wordInCorpus!=this->vocIdForSentEnd)&&(wordInCorpus!=this->vocIdForCorpusEnd)){    //n-grams start with <s> and </s>, or <end of corpus> are not interested
00169                         
00170                                 bool quit =false;
00171                                 i=0;
00172 
00173                                 while(!quit && (i<this->maxN)){
00174                                         wordInCorpus = this->corpus_list[posInCorpus+i];
00175                                         if(                                             
00176                                                 (wordInCorpus<this->sentIdStart)&&
00177                                                 (wordInCorpus!=this->vocIdForSentEnd)&&
00178                                                 (wordInCorpus!=this->vocIdForSentStart)&&
00179                                                 (wordInCorpus==nGramScanningList[i].vocId)){    //still match
00180 
00181                                                 nGramScanningList[i].freqSoFar++;
00182                                         }
00183                                         else{   //we will have new (i+1) and longer n-grams soon, before that check if we should increase the count of counts for n because of this n-gram type
00184                                                                         
00185                                                 bool validNgramUpSoFar = true;
00186                                                 unsigned int freqSoFar;
00187                                                 
00188                                                 for(j=i;j<this->maxN;j++){                              
00189                                                         
00190                                                         
00191                                                         if(nGramScanningList[j].vocId==0){              //a NULL word, then this n-gram and longer ones in the scan window are invalid
00192                                                                 validNgramUpSoFar = false;
00193                                                         }
00194 
00195                                                         if(validNgramUpSoFar){          //perform actions depends on actionType
00196                                                                 
00197                                                                 freqSoFar = nGramScanningList[j].freqSoFar;
00198                                                                 if( (freqSoFar > 0) && ( freqSoFar <= this->maxFreqForDiscounting) ){
00199                                                                         //increase the count for (j+1)-gram with freq freqSoFar
00200                                                                         countOfCountsTable[j*this->maxFreqForDiscounting+freqSoFar-1]++;
00201                                                                 }                                                       
00202                                                         }
00203 
00204                                                         //finished output, now clear the list from point of i
00205                                                         if((posInCorpus+j)<this->corpusSize){
00206                                                                 wordInCorpus = this->corpus_list[posInCorpus+j];
00207                                                         }
00208                                                         else{
00209                                                                 wordInCorpus = 0;       //out of bound for corpus
00210                                                         }
00211 
00212                                                         if((wordInCorpus==0)||(wordInCorpus>=this->sentIdStart)||(wordInCorpus==this->vocIdForSentEnd)||(wordInCorpus==this->vocIdForSentStart)){
00213                                                                 wordInCorpus=0; //write 0 for <sentId>, <s> and </s>
00214                                                                 nGramScanningList[j].freqSoFar = 0;
00215                                                         }
00216                                                         else{
00217                                                                 nGramScanningList[j].freqSoFar = 1;
00218                                                         }
00219 
00220                                                         nGramScanningList[j].vocId = wordInCorpus;                                                      
00221                                                 }
00222 
00223                                                 quit=true;      //at i+1 gram, already not match, no need to check for longer
00224                                         }
00225 
00226                                         i++;
00227                                 }
00228                         }
00229                 }
00230                 else{
00231                         stillMeaningful = false;        //once vocID is getting larger/equal than sentIdStart, everything follows it are <sentId> and no actual text
00232                 }
00233 
00234                 saPos++;
00235         }
00236 
00237         //at the end of corpus (according to suffix order)
00238         bool validNgramUpSoFar = true;
00239         unsigned int freqSoFar;
00240         for(i=0;i<this->maxN;i++){
00241                 if(nGramScanningList[i].vocId==0){      //invalide word
00242                         validNgramUpSoFar = false;
00243                 }
00244 
00245                 if(validNgramUpSoFar){
00246                         
00247                         freqSoFar = nGramScanningList[i].freqSoFar;
00248                         if( (freqSoFar > 0) && ( freqSoFar <= this->maxFreqForDiscounting) ){
00249                                 //increase the count for (i+1)-gram with freq freqSoFar
00250                                 countOfCountsTable[i*this->maxFreqForDiscounting+freqSoFar-1]++;
00251                         }                       
00252                 }
00253         }
00254 
00255         //now, use Good-Turing discounting to create frequency mapping
00256         //still assign N*Freq table for simplicity, even though that for each N, only maxFreq-1 freq type will be discounted
00257         this->discountingMap = (double *) malloc(sizeof(double) * this->maxN * this->maxFreqForDiscounting);
00258                 
00259         for(i=0;i<this->maxN;i++){
00260                 //for (i+1)-gram
00261                         
00262                 unsigned int * ccTableForThisN = countOfCountsTable + i*this->maxFreqForDiscounting;
00263                 double * discountingMapForThisN = this->discountingMap + i*this->maxFreqForDiscounting;
00264 
00265                 for(int freq=0;freq<(this->maxFreqForDiscounting-1);freq++){    //only goes to maxFreq-1, because we can not discount maxFreq
00266                         //for all (freq+1) ngrams
00267                         if((ccTableForThisN[freq]>0)&&(ccTableForThisN[freq+1]>0)){     //both freq exists
00268                                 discountingMapForThisN[freq] = (double)(ccTableForThisN[freq+1]*(freq+2))/(double)(ccTableForThisN[freq]);                      
00269                         }
00270                         else{
00271                                 discountingMapForThisN[freq] = -1;
00272                         }
00273                 }
00274 
00275                 discountingMapForThisN[this->maxFreqForDiscounting-1] = -1;     //won't be used, just for consistency
00276         }
00277 
00278 
00279         free(countOfCountsTable);
00280 
00281 }
00282 
00291 void C_SuffixArrayLanguageModel::calcNgramMatchingInfoTokenFreqOnlyExtendingCurrentMatch(TextLenType currentMatchStart, unsigned char currentMatchLen, IndexType nextWord, double *freqTable, TextLenType &updatedMatchingStart, unsigned char &updatedMatchingLen)
00292 {
00293         vector<IndexType> nGram;        
00294 
00295         if(currentMatchStart!=(TextLenType) -1){        //-1 will be <unk>
00296                 if(currentMatchLen==this->maxN){        //we consider only up to this->maxN for the extended n-gram
00297                         currentMatchStart++;
00298                         currentMatchLen--;
00299                 }
00300 
00301                 for(TextLenType pos=currentMatchStart; pos<(currentMatchStart+currentMatchLen); pos++){
00302                         nGram.push_back(this->corpus_list[pos]);
00303                 }
00304         }
00305 
00306         nGram.push_back(nextWord);
00307 
00308         int sentLen = nGram.size();
00309         
00310         //construct the n-gram search table     
00311         S_sentSearchTableElement * table = this->constructNgramSearchTable4SentWithLCP(nGram);
00312 
00313         int startPosForNgram;
00314         int startPosForLongestMatchingWithNextWord;
00315         int cellIndexForLongestMatchingWithNextWord;
00316 
00317         bool stillMatched = true;
00318         bool atLeastOneMatched = false;
00319 
00320         int indexForNgram;
00321 
00322         unsigned int totalOccurrences;
00323         unsigned int totalOccurrencesOfHistory;
00324 
00325         //for unigram
00326         indexForNgram = sentLen - 1;
00327         if(table[indexForNgram].found){
00328                 totalOccurrences = table[indexForNgram].endingPosInSA - table[indexForNgram].startPosInSA + 1;
00329                 if(this->applyDiscounting){
00330                         freqTable[0] = this->discountFreq(1, totalOccurrences);
00331                 }
00332                 else{
00333                         freqTable[0] = totalOccurrences; 
00334                 }
00335 
00336                 freqTable[1] = this->corpusSize;
00337                 cellIndexForLongestMatchingWithNextWord = indexForNgram;
00338                 startPosForLongestMatchingWithNextWord = sentLen-1;
00339                 atLeastOneMatched = true;
00340         }
00341         else{
00342                 stillMatched = false;
00343         }
00344 
00345         int n=2;        //considering 2-gram and longer n-gram now
00346         startPosForNgram = sentLen - 2;
00347         while((stillMatched)&&(startPosForNgram>=0)){
00348                 
00349                 indexForNgram = (n-1) * sentLen + startPosForNgram;
00350                 int indexForHistory = (n-2) * sentLen +  startPosForNgram;
00351                 
00352                 if(table[indexForNgram].found){
00353                                                 
00354                         totalOccurrences = table[indexForNgram].endingPosInSA - table[indexForNgram].startPosInSA + 1;  
00355                         totalOccurrencesOfHistory = table[indexForHistory].endingPosInSA - table[indexForHistory].startPosInSA + 1;
00356 
00357                         
00358                         if(this->applyDiscounting){
00359                                 freqTable[2*n-2] = this->discountFreq(n, totalOccurrences);                             
00360                         }
00361                         else{
00362                                 freqTable[2*n-2] = (double)totalOccurrences;
00363                         }
00364 
00365                         freqTable[2*n-1] = (double) totalOccurrencesOfHistory;  //do not discount the history
00366                         
00367                         if(n<this->maxN){       //new history is at most this->maxFreqForDiscounting-1 words long
00368                                 cellIndexForLongestMatchingWithNextWord = indexForNgram;
00369                                 startPosForLongestMatchingWithNextWord = startPosForNgram;
00370                         }
00371                 }
00372                 else{
00373                         stillMatched = false;
00374                 }
00375 
00376                 startPosForNgram--;
00377                 n++;
00378         }
00379 
00380         if(atLeastOneMatched){  //at least one n-gram can be matched with 'nextWord'
00381                 updatedMatchingStart = this->suffix_list[table[cellIndexForLongestMatchingWithNextWord].startPosInSA];
00382                 updatedMatchingLen = (unsigned char) (sentLen - startPosForLongestMatchingWithNextWord);
00383         }
00384         else{
00385                 updatedMatchingStart = (TextLenType) -1;
00386                 updatedMatchingLen = 0;
00387         }
00388 
00389         free(table);
00390 
00391 }
00392 
00393 
00394 //given observedFreq of n-gram, return discounted freq using Good-Turing smoothing
00395 double C_SuffixArrayLanguageModel::discountFreq(int n, unsigned int observedFreq)
00396 {
00397         if(n>=this->maxN){      //do not discount
00398                 return (double) observedFreq;
00399         }
00400 
00401         if(observedFreq>=(this->maxFreqForDiscounting-1)){      //no discounting for high freq
00402                 return (double) observedFreq;
00403         }
00404 
00405         //else, check the discount map
00406         double discountedFreq = this->discountingMap[ (n-1) * this->maxFreqForDiscounting + observedFreq -1];
00407 
00408         if(discountedFreq>0){
00409                 return discountedFreq;
00410         }
00411 
00412         //else, no discounting
00413         return (double) observedFreq;
00414 }
00415 
00416 
00418 LMState C_SuffixArrayLanguageModel::beginOfSentenceState()
00419 {
00420         
00421         this->resetLmStates();
00422         this->initialLmState(); 
00423 
00424         return 0;
00425 }
00426 
00427 void C_SuffixArrayLanguageModel::initialLmState()
00428 {
00429         //add sentence start
00430         S_LMStateInfo sentStartNode;
00431         sentStartNode.locationInCorpus.posInCorpus = 1; //if corpus is indexed correctly position 1 should be <s>
00432         sentStartNode.locationInCorpus.len = 1;
00433         sentStartNode.cachedNextWordExtension.clear();
00434 
00435         this->allLMStates.push_back(sentStartNode);
00436         this->ngramLocation2LmStateId.insert(make_pair(sentStartNode.locationInCorpus, 0));
00437 }
00438 
00439 void C_SuffixArrayLanguageModel::resetLmStates()
00440 {
00441         this->allLMStates.clear();
00442         this->ngramLocation2LmStateId.clear();
00443 }
00444 
00445 
00454 double C_SuffixArrayLanguageModel::logProb(LMState lmState, IndexType nextWord, LMState & nextState)
00455 {
00456         if(lmState>=this->allLMStates.size()){
00457                 cerr<<"Invalid LM State: "<<lmState<<endl;
00458                 exit(-1);
00459         }
00460 
00461         //first check if we have already seen this 'nextWord' before
00462         map< IndexType, S_CachedLmInfo>::iterator iterNextWordExtensionCache;
00463         iterNextWordExtensionCache = this->allLMStates[lmState].cachedNextWordExtension.find( nextWord );
00464 
00465         if(iterNextWordExtensionCache==this->allLMStates[lmState].cachedNextWordExtension.end()){ //we haven't seen this lmState+word yet
00466 
00467                 //search for it in the corpus
00468                 S_NgramLocationInCorpus correspondingNgramLocation = this->allLMStates[lmState].locationInCorpus;
00469                 S_NgramLocationInCorpus updatedNgramLocation;
00470                 
00471                 double logProb = this->logProbFromFreq(
00472                         correspondingNgramLocation.posInCorpus, 
00473                         correspondingNgramLocation.len, 
00474                         nextWord, 
00475                         updatedNgramLocation.posInCorpus,
00476                         updatedNgramLocation.len);
00477                 
00478                 //caching the logprob of 'nextword' given the lmState
00479                 int updatedLmStateId;
00480                 map<S_NgramLocationInCorpus, int, lt_ngramLocationInCorpus>::iterator iterNgramLocation2LmStateId;
00481                 iterNgramLocation2LmStateId = this->ngramLocation2LmStateId.find(updatedNgramLocation);
00482                 if(iterNgramLocation2LmStateId==this->ngramLocation2LmStateId.end()){   //this updated lm state does not exist yet
00483                         S_LMStateInfo newLmStateNode;
00484 
00485                         newLmStateNode.locationInCorpus = updatedNgramLocation;
00486                         newLmStateNode.cachedNextWordExtension.clear();
00487                         
00488                         this->allLMStates.push_back(newLmStateNode);
00489                         updatedLmStateId = this->allLMStates.size() -1 ;
00490                         this->ngramLocation2LmStateId.insert(make_pair(updatedNgramLocation, updatedLmStateId));
00491                 }
00492                 else{
00493                         updatedLmStateId = iterNgramLocation2LmStateId->second;
00494                 }
00495 
00496                 //cache this
00497                 S_CachedLmInfo cachedLmInfo;
00498                 cachedLmInfo.logProb = logProb;
00499                 cachedLmInfo.nextState = updatedLmStateId;
00500 
00501                 this->allLMStates[lmState].cachedNextWordExtension.insert(make_pair(nextWord, cachedLmInfo));
00502 
00503                 //updated next state
00504                 nextState = updatedLmStateId;
00505                 
00506                 return logProb;
00507         }
00508 
00509         nextState = iterNextWordExtensionCache->second.nextState;
00510 
00511         return iterNextWordExtensionCache->second.logProb;
00512 }
00513 
00514 
00524 double C_SuffixArrayLanguageModel::logProb(LMState lmState, vector<IndexType> phrase, LMState & nextState)
00525 {
00526         double logProb = 0;
00527         
00528         if (phrase.size() == 0) {
00529                 nextState = lmState;
00530                 return logProb;
00531         }
00532 
00533         for(int i=0;i<phrase.size();i++){
00534                 logProb+=this->logProb(lmState, phrase[i], nextState);
00535                 lmState = nextState;
00536         }
00537 
00538         return logProb;
00539 }
00540 
00544 double C_SuffixArrayLanguageModel::logProbEnd(LMState lmState)
00545 {
00546         LMState dummyNextState;
00547         return this->logProb(lmState, this->vocIdForSentEnd, dummyNextState);
00548 }
00549 
00559 double C_SuffixArrayLanguageModel::logProbFromFreq(TextLenType currentMatchStart, unsigned char currentMatchLen, IndexType nextWord, TextLenType &updatedMatchingStart, unsigned char &updatedMatchingLen)
00560 {
00561 
00562         double logProb;
00563 
00564         double * freqTable = (double *) malloc(sizeof(double)*2*(this->maxN));
00565         memset(freqTable, 0, 2*this->maxN*sizeof(double));
00566 
00567         this->calcNgramMatchingInfoTokenFreqOnlyExtendingCurrentMatch(currentMatchStart, currentMatchLen, nextWord, freqTable, updatedMatchingStart, updatedMatchingLen);
00568 
00569         logProb = this->calcLogProb(freqTable);
00570 
00571         free(freqTable);
00572 
00573         return logProb;
00574 
00575 }
00576 
00577 double C_SuffixArrayLanguageModel::calcLogProb(double *freq)
00578 {
00579         switch(this->interpolationStrategy){
00580         case 'e':
00581                 return this->calcLogProb_equalWeightedInterpolation(freq);
00582                 break;
00583         case 'i':
00584                 return this->calcLogProb_ibmHeuristicInterpolation(freq);
00585                 break;
00586         case 'm':
00587                 return this->calcLogProb_maxProbInterpolation(freq);
00588                 break;
00589         default:
00590                 cerr<<"Unknown interpolation strategy!\n";
00591                 exit(0);
00592         }
00593 }
00594 
00595 double C_SuffixArrayLanguageModel::calcLogProb_equalWeightedInterpolation(double *freq)
00596 {
00597         double prob = 0.0;
00598 
00599         
00600         if(freq[0]>0){
00601 
00602                 int i=0;
00603                 bool stillMatched = true;
00604 
00605                 while(stillMatched && (i<this->maxN)){
00606                         if(freq[2*i]>0){
00607                                 prob+=freq[2*i]/freq[2*i+1];
00608                         }
00609                         else{
00610                                 stillMatched = false;
00611                         }
00612 
00613                         i++;
00614                 }
00615 
00616                 return log(prob/(double)this->maxN);
00617         }
00618         else{   //unknown word
00619                 return SALM_LOG_PROB_UNK;
00620         }
00621 }
00622 
00623 double C_SuffixArrayLanguageModel::calcLogProb_ibmHeuristicInterpolation(double *freq)
00624 {
00625         double prob = 0.0;
00626         if(freq[0]==0){ //unknown word
00627                 return SALM_LOG_PROB_UNK;
00628         }
00629 
00630         double remainingWeightSum = 1.0;
00631 
00632         //find the first non-zero match
00633         int i = this->maxN - 1;
00634 
00635         while(freq[2*i]==0){    //will stop for sure because freq[0]!=0
00636                 i--;
00637         }
00638 
00639         for(int j=i;j>=0;j--){
00640                 //for (j+1)-gram
00641                 double historyFreq = freq[2*j+1];
00642                 double logHistoryFreq = log(historyFreq);
00643                 if(logHistoryFreq>1){
00644                         logHistoryFreq = 1.0;   //cap it to 1
00645                 }
00646 
00647                 double reliability = 0.1*logHistoryFreq+0.3;    //heuristics for reliability of the history
00648                 double adjustedWeights = remainingWeightSum *  reliability;
00649                 
00650                 prob+=adjustedWeights * freq[2*i]/freq[2*i+1];
00651 
00652                 remainingWeightSum -= adjustedWeights;
00653         }
00654 
00655         return log(prob);       
00656 }
00657 
00658 double C_SuffixArrayLanguageModel::calcLogProb_maxProbInterpolation(double *freq)
00659 {
00660         double maxProb = 0.0;
00661         
00662         if(freq[0]>0){
00663 
00664                 int i=0;
00665                 bool stillMatched = true;
00666 
00667                 while(stillMatched && (i<this->maxN)){
00668                         if(freq[2*i]>0){
00669                                 double prob=freq[2*i]/freq[2*i+1];
00670 
00671                                 if(prob>maxProb){
00672                                         maxProb = prob;
00673                                 }
00674                         }
00675                         else{
00676                                 stillMatched = false;
00677                         }
00678 
00679                         i++;
00680                 }
00681 
00682                 return log(maxProb);
00683         }
00684         else{   //unknown word
00685                 return SALM_LOG_PROB_UNK;
00686         }
00687 }
00688 
00689 IndexType C_SuffixArrayLanguageModel::returnVocId(C_String aWord)
00690 {
00691         return this->voc->returnId(aWord);
00692 }

Generated on Fri Jul 6 23:11:07 2007 for SALM by  doxygen 1.5.1