source: trunk/greenstone3-extensions/vishnu/src/ckindexer/index_cw.h@ 8189

Last change on this file since 8189 was 8189, checked in by kjdon, 20 years ago

first version of Imperial College's Visualiser code

  • Property svn:keywords set to Author Date Id Revision
File size: 2.6 KB
Line 
1#ifndef INDEX_CW_H
2#define INDEX_CW_H
3
4#define SUBDIRSIZE 1000
5#define WORD_NOT_FOUND 0xFFFFFFFF
6
7/* Structure to hold a the number of occurences of a word in a document */
8/* Build an ordered tree based on string ordering */
9
10#define TREEMAXDEPTH 16
11typedef struct WordTreeFreqTag
12{
13 struct WordTreeFreqTag *less;
14 struct WordTreeFreqTag *more;
15 char *word;
16 short count;
17} WordTreeFreq;
18
19typedef WordTreeFreq *WordTreeFreqPtr;
20
21typedef struct DocDataTag
22{
23 size_t pos;
24 size_t dfreq;
25 short wfreq;
26}DocData;
27
28typedef DocData *DocDataPtr;
29
30typedef struct FreqPosTag
31{
32 unsigned long freq;
33 unsigned long offset;
34} FreqPos;
35
36typedef FreqPos *FreqPosPtr;
37
38
39void walkTree(FILE *fp, WordTreeFreqPtr theTree);
40void printTreeToFile(char *workDir, long docNum, WordTreeFreqPtr wordFreqTree);
41
42
43int createTreeFreqNode(char *wordBuff, WordTreeFreqPtr *theTree);
44int addWordToWds(char *wordBuff, WordTreeFreqPtr *theTree);
45void vapeFreqs( WordTreeFreqPtr theTree);
46void createSubDir(char *workDir, long dirNum, char *pref);
47void createDir(char *workDir, char *name);
48void strlower(char *str);
49void addToBuff(char *buff, char ch, int len);
50long printToOut(FILE *fp, char *name, char *workDir, long docNum, char opt);
51
52char getOpt(int argc, char **argv);
53char *generateName(const char *dirname, const char *filename);
54long pipeViaZcat(char *new_name, char *workDir,
55 long docNum, char opt);
56long regularFile(char *new_name, char *workDir,
57 long docNum, char opt);
58long traveldir(const char *dirname, char *workDir,
59 int depth, long docNum, char opt);
60void createIndexes(char *workDir, long maxDocNum);
61void mergeFiles(char *workDir, long maxDocNum,int depth);
62unsigned long makeInteresting(char *workDir, long docNum);
63void indexFiles(char *workDir, long maxDocNum, unsigned long wordCount);
64long getWordAndCount(char *wordbuff,FILE *fp);
65void getWord(char *wordbuff,FILE *fp);
66int cmpDocData(const void *a,const void *b);
67
68unsigned long ulMin(unsigned long a,unsigned long b);
69size_t countTillEnd(FILE *fp, size_t recSize);
70int fixLittlePointers(char *buff,
71 char *wordsArray[],
72 char *digitsArray[],
73 int count,
74 size_t fsize);
75
76int countWords(char *buff, size_t len);
77void createDocFreqOrderedNdx(char *workDir,unsigned long wordCount);
78void mergeTempFreqIndex(char *workDir,long tempCount);
79void reorder(char *dir, long wordCount);
80int cmpFreqPos(const void *a,const void *b);
81size_t allocateEnough(FreqPosPtr *space, long num);
82// HashPtr * buildTable(FILE *wordFp,FILE *freqFp, size_t wordCount);
83#endif
Note: See TracBrowser for help on using the repository browser.