00001 #if !defined (_HEADER_SUFFIX_ARRAY_SCANNING_BASE_CLASS_) 00002 #define _HEADER_SUFFIX_ARRAY_SCANNING_BASE_CLASS_ 00003 00004 00005 #include "_SuffixArrayApplicationBase.h" 00006 00007 00008 00009 00017 class C_SuffixArrayScanningBase : public C_SuffixArrayApplicationBase 00018 { 00019 public: 00020 void setNgramOutputFreqThresh(int n, unsigned int freqThresh); 00021 void scanSuffixArrayForHighFreqNgramType(); 00022 void scanSuffixArrayForCountofCounts(int maxFreqConsidered); 00023 void scanSuffixArrayForTypeToken(); 00024 00025 C_SuffixArrayScanningBase(const char * filename, unsigned int maxN); 00026 C_SuffixArrayScanningBase(); 00027 ~C_SuffixArrayScanningBase(); 00028 00029 protected: 00030 void setParam_maxFreqConsidered(int maxFreqConsidered); 00031 void constructCountOfCountsTable(); 00032 void initializeForScanning(const char * filename, unsigned int maxN); 00033 00034 int maxN; 00035 int maxFreqConsidered; 00036 00037 unsigned int * countOfCountsTable; 00038 00039 IndexType vocIdForSentStart; 00040 IndexType vocIdForSentEnd; 00041 IndexType vocIdForCorpusEnd; 00042 00043 private: 00044 void scanSuffixArray(char actionType); 00045 00046 S_nGramScanningInfoElement * nGramScanningList; 00047 00048 00049 unsigned int * typeFreq; 00050 unsigned int * tokenFreq; 00051 }; 00052 00053 #endif
1.5.1