_SuffixArrayApplicationBase.h

Go to the documentation of this file.
00001 #if !defined(__SUFFIXARRAYAPPLICATIONBASE_H__INCLUDED_)
00002 #define __SUFFIXARRAYAPPLICATIONBASE_H__INCLUDED_
00003 
00004 #include "salm_shared.h"
00005 #include "_IDVocabulary.h"
00006 #include "_String.h"
00007 
00008 using namespace std;
00009 
00010 typedef struct level1BucketElement
00011 {
00012         TextLenType first;
00013         TextLenType last;
00014 } S_level1BucketElement;
00015 
00016 
00023 class C_SuffixArrayApplicationBase  
00024 {
00025 public:
00026         void loadData(const char *fileNameStem, bool noVoc, bool noOffset, bool noLevel1Bucket);
00027         TextLenType returnCorpusSize();
00028 
00029         C_SuffixArrayApplicationBase();
00030         virtual ~C_SuffixArrayApplicationBase();
00031 
00032 protected:      
00033         TextLenType corpusSize;
00034 
00035         void loadVoc(const char * filename);
00036         void loadOffset(const char * filename);
00037         void loadSuffix(const char * filename);
00038         void loadCorpusAndInitMem(const char * filename);
00039 
00040         bool noVocabulary;
00041         bool noOffset;
00042         bool noLevel1Bucket;
00043 
00044         C_IDVocabulary * voc;
00045         IndexType sentIdStart;
00046         IndexType vocIdForSentStart;
00047         IndexType vocIdForSentEnd;
00048         IndexType vocIdForCorpusEnd;
00049 
00050         IndexType * corpus_list;
00051         unsigned char * offset_list;
00052         TextLenType * suffix_list;
00053 
00054         S_level1BucketElement * level1Buckets;
00055 
00056 };
00057 
00058 #endif // !defined(__SUFFIXARRAYAPPLICATIONBASE_H__INCLUDED_)

Generated on Fri Jul 6 23:11:07 2007 for SALM by  doxygen 1.5.1