1 | #ifndef INDEX_CW_H
|
---|
2 | #define INDEX_CW_H
|
---|
3 |
|
---|
4 | #define SUBDIRSIZE 1000
|
---|
5 | #define WORD_NOT_FOUND 0xFFFFFFFF
|
---|
6 |
|
---|
7 | /* Structure to hold a the number of occurences of a word in a document */
|
---|
8 | /* Build an ordered tree based on string ordering */
|
---|
9 |
|
---|
10 | #define TREEMAXDEPTH 16
|
---|
11 | typedef struct WordTreeFreqTag
|
---|
12 | {
|
---|
13 | struct WordTreeFreqTag *less;
|
---|
14 | struct WordTreeFreqTag *more;
|
---|
15 | char *word;
|
---|
16 | short count;
|
---|
17 | } WordTreeFreq;
|
---|
18 |
|
---|
19 | typedef WordTreeFreq *WordTreeFreqPtr;
|
---|
20 |
|
---|
21 | typedef struct DocDataTag
|
---|
22 | {
|
---|
23 | size_t pos;
|
---|
24 | size_t dfreq;
|
---|
25 | short wfreq;
|
---|
26 | }DocData;
|
---|
27 |
|
---|
28 | typedef DocData *DocDataPtr;
|
---|
29 |
|
---|
30 | typedef struct FreqPosTag
|
---|
31 | {
|
---|
32 | unsigned long freq;
|
---|
33 | unsigned long offset;
|
---|
34 | } FreqPos;
|
---|
35 |
|
---|
36 | typedef FreqPos *FreqPosPtr;
|
---|
37 |
|
---|
38 |
|
---|
39 | void walkTree(FILE *fp, WordTreeFreqPtr theTree);
|
---|
40 | void printTreeToFile(char *workDir, long docNum, WordTreeFreqPtr wordFreqTree);
|
---|
41 |
|
---|
42 |
|
---|
43 | int createTreeFreqNode(char *wordBuff, WordTreeFreqPtr *theTree);
|
---|
44 | int addWordToWds(char *wordBuff, WordTreeFreqPtr *theTree);
|
---|
45 | void vapeFreqs( WordTreeFreqPtr theTree);
|
---|
46 | void createSubDir(char *workDir, long dirNum, char *pref);
|
---|
47 | void createDir(char *workDir, char *name);
|
---|
48 | void strlower(char *str);
|
---|
49 | void addToBuff(char *buff, char ch, int len);
|
---|
50 | long printToOut(FILE *fp, char *name, char *workDir, long docNum, char opt);
|
---|
51 |
|
---|
52 | char getOpt(int argc, char **argv);
|
---|
53 | char *generateName(const char *dirname, const char *filename);
|
---|
54 | long pipeViaZcat(char *new_name, char *workDir,
|
---|
55 | long docNum, char opt);
|
---|
56 | long regularFile(char *new_name, char *workDir,
|
---|
57 | long docNum, char opt);
|
---|
58 | long traveldir(const char *dirname, char *workDir,
|
---|
59 | int depth, long docNum, char opt);
|
---|
60 | void createIndexes(char *workDir, long maxDocNum);
|
---|
61 | void mergeFiles(char *workDir, long maxDocNum,int depth);
|
---|
62 | unsigned long makeInteresting(char *workDir, long docNum);
|
---|
63 | void indexFiles(char *workDir, long maxDocNum, unsigned long wordCount);
|
---|
64 | long getWordAndCount(char *wordbuff,FILE *fp);
|
---|
65 | void getWord(char *wordbuff,FILE *fp);
|
---|
66 | int cmpDocData(const void *a,const void *b);
|
---|
67 |
|
---|
68 | unsigned long ulMin(unsigned long a,unsigned long b);
|
---|
69 | size_t countTillEnd(FILE *fp, size_t recSize);
|
---|
70 | int fixLittlePointers(char *buff,
|
---|
71 | char *wordsArray[],
|
---|
72 | char *digitsArray[],
|
---|
73 | int count,
|
---|
74 | size_t fsize);
|
---|
75 |
|
---|
76 | int countWords(char *buff, size_t len);
|
---|
77 | void createDocFreqOrderedNdx(char *workDir,unsigned long wordCount);
|
---|
78 | void mergeTempFreqIndex(char *workDir,long tempCount);
|
---|
79 | void reorder(char *dir, long wordCount);
|
---|
80 | int cmpFreqPos(const void *a,const void *b);
|
---|
81 | size_t allocateEnough(FreqPosPtr *space, long num);
|
---|
82 | // HashPtr * buildTable(FILE *wordFp,FILE *freqFp, size_t wordCount);
|
---|
83 | #endif
|
---|