_SuffixArrayScanningBase.h

Go to the documentation of this file.
00001 #if !defined (_HEADER_SUFFIX_ARRAY_SCANNING_BASE_CLASS_)
00002 #define _HEADER_SUFFIX_ARRAY_SCANNING_BASE_CLASS_
00003 
00004 
00005 #include "_SuffixArrayApplicationBase.h"
00006 
00007 
00008 
00009 
00017 class C_SuffixArrayScanningBase : public C_SuffixArrayApplicationBase
00018 {
00019 public: 
00020         void setNgramOutputFreqThresh(int n, unsigned int freqThresh);
00021         void scanSuffixArrayForHighFreqNgramType();
00022         void scanSuffixArrayForCountofCounts(int maxFreqConsidered);
00023         void scanSuffixArrayForTypeToken();
00024 
00025         C_SuffixArrayScanningBase(const char * filename, unsigned int maxN);
00026         C_SuffixArrayScanningBase();
00027         ~C_SuffixArrayScanningBase();
00028 
00029 protected:
00030         void setParam_maxFreqConsidered(int maxFreqConsidered);
00031         void constructCountOfCountsTable();
00032         void initializeForScanning(const char * filename, unsigned int maxN);
00033         
00034         int maxN;
00035         int maxFreqConsidered;
00036         
00037         unsigned int * countOfCountsTable;
00038         
00039         IndexType vocIdForSentStart;
00040         IndexType vocIdForSentEnd;
00041         IndexType vocIdForCorpusEnd;
00042 
00043 private:
00044         void scanSuffixArray(char actionType);  
00045 
00046         S_nGramScanningInfoElement * nGramScanningList; 
00047         
00048         
00049         unsigned int * typeFreq;
00050         unsigned int * tokenFreq;
00051 };
00052 
00053 #endif

Generated on Fri Jul 6 23:11:08 2007 for SALM by  doxygen 1.5.1