source: main/trunk/greenstone2/common-src/indexers/mg/src/text/mg_decompress_text.c@ 25147

Last change on this file since 25147 was 25147, checked in by kjdon, 12 years ago

merged 64_bit_Greenstone branch into trunk, rev 25139

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 4.0 KB
Line 
1#include "sysfuncs.h"
2
3#include "filestats.h"
4#include "netorder.h"
5#include "memlib.h"
6#include "messages.h"
7#include "huffman.h"
8#include "bitio_m_mem.h"
9#include "bitio_m.h"
10#include "bitio_stdio.h"
11#include "huffman_stdio.h"
12
13#include "mg_files.h"
14#include "mg.h"
15#include "backend.h" /* compressed_dict struct */
16#include "text_get.h"
17#include "words.h"
18
19#define EOD '\002'
20#define EOP '\003'
21
22/* These are defined in mgquery, and they need to be here for backend.o */
23/* Messy I know but this was meant to be a quick hack :-) */
24#if defined(PARADOCNUM) || defined(NZDL)
25int *Paragraph = NULL;
26int Documents = 0;
27#endif
28
29int
30main (int argc, char **argv)
31{
32 FILE *text, *text_idx, *para;
33 File *text_comp_dict = NULL, *text_aux_dict = NULL, *text_fast_comp_dict = NULL;
34
35 int ch;
36 mg_u_long start, finish, len;
37 mg_u_long total_chars = 0;
38 u_char *c_buffer = 0;
39 int buf_len = -1;
40 u_char *uc_buffer = 0;
41 int ULen;
42 compression_dict *cd = NULL;
43 compressed_text_header cth;
44 mg_u_long doc;
45 int count;
46 char *filename = "";
47 char path[512];
48 opterr = 0;
49 msg_prefix = argv[0];
50
51 while ((ch = getopt (argc, argv, "f:d:h")) != -1)
52 switch (ch)
53 {
54 case 'f': /* input file */
55 filename = optarg;
56 break;
57 case 'd':
58 set_basepath (optarg);
59 break;
60 case 'h':
61 case '?':
62 fprintf (stderr, "usage: %s [-h] [-d directory] -f name\n",
63 argv[0]);
64 exit (1);
65 }
66
67 /* Open files */
68 text = open_file (filename, TEXT_SUFFIX, "rb", MAGIC_TEXT, MG_ABORT);
69 fread ((char *)&cth, sizeof (cth), 1, text);
70
71 text_idx = open_file (filename, TEXT_IDX_SUFFIX, "rb", MAGIC_TEXI, MG_ABORT);
72 fread ((char *)&cth, sizeof (cth), 1, text_idx);
73
74 NTOHUL(cth.num_of_docs);
75 NTOHD(cth.num_of_bytes); /* [RJM 07/97: 4G limit] */
76 NTOHUL(cth.num_of_words);
77 NTOHUL(cth.length_of_longest_doc);
78 NTOHD(cth.ratio);
79
80 para = open_file (filename, INVF_PARAGRAPH_SUFFIX, "rb", MAGIC_PARAGRAPH, MG_CONTINUE);
81
82 sprintf (path, FILE_NAME_FORMAT, get_basepath(), filename, TEXT_DICT_FAST_SUFFIX); /* [RPAP - Feb 97: WIN32 Port] */
83 if (!(text_fast_comp_dict = Fopen (path, "rb", MAGIC_FAST_DICT)))
84 {
85 sprintf (path, FILE_NAME_FORMAT, get_basepath(), filename, TEXT_DICT_SUFFIX); /* [RPAP - Feb 97: WIN32 Port] */
86 text_comp_dict = Fopen (path, "rb", MAGIC_DICT);
87 if (!text_comp_dict)
88 FatalError (1, "Could not open compressed dictionary");
89 sprintf (path, FILE_NAME_FORMAT, get_basepath(), filename, TEXT_DICT_AUX_SUFFIX); /* [RPAP - Feb 97: WIN32 Port] */
90 text_aux_dict = Fopen (path, "rb", MAGIC_AUX_DICT);
91 }
92
93 cd = LoadCompDict (text_comp_dict, text_aux_dict, text_fast_comp_dict);
94
95
96 /* Uncompress text */
97
98 doc = 1;
99 fread ((char *)&start, sizeof (start), 1, text_idx);
100 if (para)
101 fread ((char *)&count, sizeof (count), 1, para);
102 else
103 count = 1;
104
105 for (;;)
106 {
107 fread ((char *) &finish, sizeof (finish), 1, text_idx);
108 len = finish - start;
109
110 if ((int) len > buf_len)
111 {
112 if (c_buffer)
113 {
114 Xfree (c_buffer);
115 Xfree (uc_buffer);
116 }
117 if (!(c_buffer = Xmalloc (len)))
118 FatalError (1, "Cannot allocate memory for compressed buffer");
119 if (!(uc_buffer = Xmalloc ((int) (cth.ratio * 1.01 * len) + 100)))
120 FatalError (1, "Cannot allocate memory for uncompressed buffer");
121 buf_len = len;
122 }
123
124 fread (c_buffer, 1, len, text);
125 DecodeText (cd, c_buffer, len, uc_buffer, &ULen);
126 fwrite (uc_buffer, ULen, sizeof (u_char), stdout);
127 fflush (stdout);
128
129 total_chars += ULen;
130
131 if (++doc > cth.num_of_docs)
132 {
133 if (para)
134 fputc (EOP, stdout);
135 fputc (EOD, stdout);
136 fflush (stdout);
137 break;
138 }
139
140 if (!--count)
141 {
142 /* End of document */
143 fputc (EOD, stdout);
144 if (para)
145 fread ((char *)&count, sizeof (count), 1, para);
146 else
147 count = 1;
148 }
149 else
150 fputc (EOP, stdout);
151 fflush (stdout);
152 start = finish;
153 }
154
155 /* Close files */
156
157 fclose (text);
158 fclose (text_idx);
159 if (para)
160 fclose (para);
161
162#if 0
163 printf ("\n#Total chars output = %u\n", total_chars);
164#endif
165
166 return 0;
167}
Note: See TracBrowser for help on using the repository browser.