00001 #if !defined(__SUFFIXARRAYAPPLICATIONBASE_H__INCLUDED_) 00002 #define __SUFFIXARRAYAPPLICATIONBASE_H__INCLUDED_ 00003 00004 #include "salm_shared.h" 00005 #include "_IDVocabulary.h" 00006 #include "_String.h" 00007 00008 using namespace std; 00009 00010 typedef struct level1BucketElement 00011 { 00012 TextLenType first; 00013 TextLenType last; 00014 } S_level1BucketElement; 00015 00016 00023 class C_SuffixArrayApplicationBase 00024 { 00025 public: 00026 void loadData(const char *fileNameStem, bool noVoc, bool noOffset, bool noLevel1Bucket); 00027 TextLenType returnCorpusSize(); 00028 00029 C_SuffixArrayApplicationBase(); 00030 virtual ~C_SuffixArrayApplicationBase(); 00031 00032 protected: 00033 TextLenType corpusSize; 00034 00035 void loadVoc(const char * filename); 00036 void loadOffset(const char * filename); 00037 void loadSuffix(const char * filename); 00038 void loadCorpusAndInitMem(const char * filename); 00039 00040 bool noVocabulary; 00041 bool noOffset; 00042 bool noLevel1Bucket; 00043 00044 C_IDVocabulary * voc; 00045 IndexType sentIdStart; 00046 IndexType vocIdForSentStart; 00047 IndexType vocIdForSentEnd; 00048 IndexType vocIdForCorpusEnd; 00049 00050 IndexType * corpus_list; 00051 unsigned char * offset_list; 00052 TextLenType * suffix_list; 00053 00054 S_level1BucketElement * level1Buckets; 00055 00056 }; 00057 00058 #endif // !defined(__SUFFIXARRAYAPPLICATIONBASE_H__INCLUDED_)
1.5.1