1 | #include <sys/stat.h>
|
---|
2 | #include <sys/types.h>
|
---|
3 | #include <dirent.h>
|
---|
4 | #include <fcntl.h>
|
---|
5 | #include <unistd.h>
|
---|
6 | #include <stdio.h>
|
---|
7 | #include <stdlib.h>
|
---|
8 | #include <string.h>
|
---|
9 | #include <assert.h>
|
---|
10 | #include <ctype.h>
|
---|
11 |
|
---|
12 | #include "index_cw.h"
|
---|
13 |
|
---|
14 | void walkTree(FILE *fp, WordTreeFreqPtr wordFreqTree)
|
---|
15 | {
|
---|
16 | assert(fp);
|
---|
17 | assert(wordFreqTree);
|
---|
18 | if(wordFreqTree->less)
|
---|
19 | walkTree(fp, wordFreqTree->less);
|
---|
20 | {
|
---|
21 | fprintf(fp,"%s %d\n", wordFreqTree->word, wordFreqTree->count);
|
---|
22 | }
|
---|
23 | if(wordFreqTree->more)
|
---|
24 | walkTree(fp, wordFreqTree->more);
|
---|
25 | }
|
---|
26 |
|
---|
27 | /*
|
---|
28 | Print the entire list of possibly interesting
|
---|
29 | words for all the documents
|
---|
30 | */
|
---|
31 | void printTreeToFile(char *workDir, long docNum, WordTreeFreqPtr wordFreqTree)
|
---|
32 | {
|
---|
33 |
|
---|
34 | FILE *fp;
|
---|
35 | char wdsNameBuff[200];
|
---|
36 |
|
---|
37 | assert(workDir);
|
---|
38 |
|
---|
39 | if(docNum == 1 || docNum % SUBDIRSIZE == 0)
|
---|
40 | createSubDir(workDir,docNum / SUBDIRSIZE,"dir");
|
---|
41 |
|
---|
42 | sprintf(wdsNameBuff,"%s/dir%ld/doc%ld.txt",workDir,docNum / SUBDIRSIZE,docNum);
|
---|
43 | fp=fopen(wdsNameBuff,"w");
|
---|
44 | if(fp==NULL)
|
---|
45 | {
|
---|
46 | fprintf(stderr,"MG_GET: Could not create %s\n",wdsNameBuff);
|
---|
47 | exit (1);
|
---|
48 | }
|
---|
49 | if(wordFreqTree)walkTree(fp, wordFreqTree);
|
---|
50 | else fprintf(stderr,"printTreeToFile with null tree\n");
|
---|
51 | fclose(fp);
|
---|
52 | }
|
---|
53 |
|
---|
54 | void createSubDir(char *workDir, long dirNum, char *pref)
|
---|
55 | {
|
---|
56 | char newName[256];
|
---|
57 | sprintf(newName,"%s/%s%ld",workDir,pref,dirNum);
|
---|
58 | mkdir(newName,0777);
|
---|
59 | }
|
---|
60 |
|
---|
61 | void createDir(char *workDir, char *name)
|
---|
62 | {
|
---|
63 | char newName[256];
|
---|
64 | sprintf(newName,"%s/%s",workDir,name);
|
---|
65 | mkdir(newName,0777);
|
---|
66 | }
|
---|
67 |
|
---|
68 |
|
---|
69 | /*
|
---|
70 | create and initialise a tree node
|
---|
71 | */
|
---|
72 | int createTreeFreqNode(char *wordBuff,
|
---|
73 | WordTreeFreqPtr *theTree)
|
---|
74 | {
|
---|
75 | //fprintf(stderr,"createTreeFreqNode %s\n",wordBuff);
|
---|
76 | *theTree=(WordTreeFreqPtr)malloc(sizeof(WordTreeFreq));
|
---|
77 | assert(*theTree);
|
---|
78 | (*theTree)->less=NULL;
|
---|
79 | (*theTree)->more=NULL;
|
---|
80 | (*theTree)->count=1;
|
---|
81 | (*theTree)->word = malloc(strlen(wordBuff)+2);
|
---|
82 | assert((*theTree)->word);
|
---|
83 | strcpy((*theTree)->word,wordBuff);
|
---|
84 | return 0;
|
---|
85 | }
|
---|
86 |
|
---|
87 | /*
|
---|
88 | Put a word in the tree pointed to by theTree from the document docNum
|
---|
89 | */
|
---|
90 |
|
---|
91 | int addWordToWds(char *wordBuff, WordTreeFreqPtr *theTree)
|
---|
92 | {
|
---|
93 | int cmp;
|
---|
94 |
|
---|
95 | //fprintf(stderr,"addWordToWds %s\n",wordBuff);
|
---|
96 | if (*theTree==NULL)
|
---|
97 | return createTreeFreqNode(wordBuff,theTree);
|
---|
98 | else
|
---|
99 | {
|
---|
100 | cmp=strcmp(wordBuff,(*theTree)->word);
|
---|
101 | if (cmp==0)
|
---|
102 | {
|
---|
103 | (*theTree)->count++;
|
---|
104 | return 0;
|
---|
105 | }
|
---|
106 | else
|
---|
107 | if (cmp < 0)
|
---|
108 | return addWordToWds(wordBuff, &(*theTree)->less);
|
---|
109 | else
|
---|
110 | return addWordToWds(wordBuff, &(*theTree)->more);
|
---|
111 | }
|
---|
112 | }
|
---|
113 |
|
---|
114 | /*
|
---|
115 | Free the tree of WordTreeFreq structures
|
---|
116 | */
|
---|
117 | void vapeFreqs( WordTreeFreqPtr theTree)
|
---|
118 | {
|
---|
119 | if(theTree==NULL) return;
|
---|
120 | vapeFreqs(theTree->less);
|
---|
121 | vapeFreqs(theTree->more);
|
---|
122 | free(theTree->word);
|
---|
123 | free(theTree);
|
---|
124 | }
|
---|
125 |
|
---|
126 |
|
---|
127 |
|
---|