1 | #include "sysfuncs.h"
|
---|
2 |
|
---|
3 | #include "filestats.h"
|
---|
4 | #include "netorder.h"
|
---|
5 | #include "memlib.h"
|
---|
6 | #include "messages.h"
|
---|
7 | #include "huffman.h"
|
---|
8 | #include "bitio_m_mem.h"
|
---|
9 | #include "bitio_m.h"
|
---|
10 | #include "bitio_stdio.h"
|
---|
11 | #include "huffman_stdio.h"
|
---|
12 |
|
---|
13 | #include "mg_files.h"
|
---|
14 | #include "mg.h"
|
---|
15 | #include "backend.h" /* compressed_dict struct */
|
---|
16 | #include "text_get.h"
|
---|
17 | #include "words.h"
|
---|
18 |
|
---|
19 | #define EOD '\002'
|
---|
20 | #define EOP '\003'
|
---|
21 |
|
---|
22 | /* These are defined in mgquery, and they need to be here for backend.o */
|
---|
23 | /* Messy I know but this was meant to be a quick hack :-) */
|
---|
24 | #if defined(PARADOCNUM) || defined(NZDL)
|
---|
25 | int *Paragraph = NULL;
|
---|
26 | int Documents = 0;
|
---|
27 | #endif
|
---|
28 |
|
---|
29 | int
|
---|
30 | main (int argc, char **argv)
|
---|
31 | {
|
---|
32 | FILE *text, *text_idx, *para;
|
---|
33 | File *text_comp_dict = NULL, *text_aux_dict = NULL, *text_fast_comp_dict = NULL;
|
---|
34 |
|
---|
35 | int ch;
|
---|
36 | unsigned long start, finish, len;
|
---|
37 | u_long total_chars = 0;
|
---|
38 | u_char *c_buffer = 0;
|
---|
39 | int buf_len = -1;
|
---|
40 | u_char *uc_buffer = 0;
|
---|
41 | int ULen;
|
---|
42 | compression_dict *cd = NULL;
|
---|
43 | compressed_text_header cth;
|
---|
44 | u_long doc;
|
---|
45 | int count;
|
---|
46 | char *filename = "";
|
---|
47 | char path[512];
|
---|
48 | opterr = 0;
|
---|
49 | msg_prefix = argv[0];
|
---|
50 |
|
---|
51 | while ((ch = getopt (argc, argv, "f:d:h")) != -1)
|
---|
52 | switch (ch)
|
---|
53 | {
|
---|
54 | case 'f': /* input file */
|
---|
55 | filename = optarg;
|
---|
56 | break;
|
---|
57 | case 'd':
|
---|
58 | set_basepath (optarg);
|
---|
59 | break;
|
---|
60 | case 'h':
|
---|
61 | case '?':
|
---|
62 | fprintf (stderr, "usage: %s [-h] [-d directory] -f name\n",
|
---|
63 | argv[0]);
|
---|
64 | exit (1);
|
---|
65 | }
|
---|
66 |
|
---|
67 | /* Open files */
|
---|
68 | text = open_file (filename, TEXT_SUFFIX, "rb", MAGIC_TEXT, MG_ABORT);
|
---|
69 | fread ((char *)&cth, sizeof (cth), 1, text);
|
---|
70 |
|
---|
71 | text_idx = open_file (filename, TEXT_IDX_SUFFIX, "rb", MAGIC_TEXI, MG_ABORT);
|
---|
72 | fread ((char *)&cth, sizeof (cth), 1, text_idx);
|
---|
73 |
|
---|
74 | NTOHUL(cth.num_of_docs);
|
---|
75 | NTOHD(cth.num_of_bytes); /* [RJM 07/97: 4G limit] */
|
---|
76 | NTOHUL(cth.num_of_words);
|
---|
77 | NTOHUL(cth.length_of_longest_doc);
|
---|
78 | NTOHD(cth.ratio);
|
---|
79 |
|
---|
80 | para = open_file (filename, INVF_PARAGRAPH_SUFFIX, "rb", MAGIC_PARAGRAPH, MG_CONTINUE);
|
---|
81 |
|
---|
82 | sprintf (path, FILE_NAME_FORMAT, get_basepath(), filename, TEXT_DICT_FAST_SUFFIX); /* [RPAP - Feb 97: WIN32 Port] */
|
---|
83 | if (!(text_fast_comp_dict = Fopen (path, "rb", MAGIC_FAST_DICT)))
|
---|
84 | {
|
---|
85 | sprintf (path, FILE_NAME_FORMAT, get_basepath(), filename, TEXT_DICT_SUFFIX); /* [RPAP - Feb 97: WIN32 Port] */
|
---|
86 | text_comp_dict = Fopen (path, "rb", MAGIC_DICT);
|
---|
87 | if (!text_comp_dict)
|
---|
88 | FatalError (1, "Could not open compressed dictionary");
|
---|
89 | sprintf (path, FILE_NAME_FORMAT, get_basepath(), filename, TEXT_DICT_AUX_SUFFIX); /* [RPAP - Feb 97: WIN32 Port] */
|
---|
90 | text_aux_dict = Fopen (path, "rb", MAGIC_AUX_DICT);
|
---|
91 | }
|
---|
92 |
|
---|
93 | cd = LoadCompDict (text_comp_dict, text_aux_dict, text_fast_comp_dict);
|
---|
94 |
|
---|
95 |
|
---|
96 | /* Uncompress text */
|
---|
97 |
|
---|
98 | doc = 1;
|
---|
99 | fread ((char *)&start, sizeof (start), 1, text_idx);
|
---|
100 | if (para)
|
---|
101 | fread ((char *)&count, sizeof (count), 1, para);
|
---|
102 | else
|
---|
103 | count = 1;
|
---|
104 |
|
---|
105 | for (;;)
|
---|
106 | {
|
---|
107 | fread ((char *) &finish, sizeof (finish), 1, text_idx);
|
---|
108 | len = finish - start;
|
---|
109 |
|
---|
110 | if ((int) len > buf_len)
|
---|
111 | {
|
---|
112 | if (c_buffer)
|
---|
113 | {
|
---|
114 | Xfree (c_buffer);
|
---|
115 | Xfree (uc_buffer);
|
---|
116 | }
|
---|
117 | if (!(c_buffer = Xmalloc (len)))
|
---|
118 | FatalError (1, "Cannot allocate memory for compressed buffer");
|
---|
119 | if (!(uc_buffer = Xmalloc ((int) (cth.ratio * 1.01 * len) + 100)))
|
---|
120 | FatalError (1, "Cannot allocate memory for uncompressed buffer");
|
---|
121 | buf_len = len;
|
---|
122 | }
|
---|
123 |
|
---|
124 | fread (c_buffer, 1, len, text);
|
---|
125 | DecodeText (cd, c_buffer, len, uc_buffer, &ULen);
|
---|
126 | fwrite (uc_buffer, ULen, sizeof (u_char), stdout);
|
---|
127 | fflush (stdout);
|
---|
128 |
|
---|
129 | total_chars += ULen;
|
---|
130 |
|
---|
131 | if (++doc > cth.num_of_docs)
|
---|
132 | {
|
---|
133 | if (para)
|
---|
134 | fputc (EOP, stdout);
|
---|
135 | fputc (EOD, stdout);
|
---|
136 | fflush (stdout);
|
---|
137 | break;
|
---|
138 | }
|
---|
139 |
|
---|
140 | if (!--count)
|
---|
141 | {
|
---|
142 | /* End of document */
|
---|
143 | fputc (EOD, stdout);
|
---|
144 | if (para)
|
---|
145 | fread ((char *)&count, sizeof (count), 1, para);
|
---|
146 | else
|
---|
147 | count = 1;
|
---|
148 | }
|
---|
149 | else
|
---|
150 | fputc (EOP, stdout);
|
---|
151 | fflush (stdout);
|
---|
152 | start = finish;
|
---|
153 | }
|
---|
154 |
|
---|
155 | /* Close files */
|
---|
156 |
|
---|
157 | fclose (text);
|
---|
158 | fclose (text_idx);
|
---|
159 | if (para)
|
---|
160 | fclose (para);
|
---|
161 |
|
---|
162 | #if 0
|
---|
163 | printf ("\n#Total chars output = %lu\n", total_chars);
|
---|
164 | #endif
|
---|
165 |
|
---|
166 | return 0;
|
---|
167 | }
|
---|