source: gsdl/trunk/trunk/mg/src/text/text.h@ 16583

Last change on this file since 16583 was 16583, checked in by davidb, 16 years ago

Undoing change commited in r16582

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 4.5 KB
Line 
1/**************************************************************************
2 *
3 * text.h -- Header file for compression related stuff
4 * Copyright (C) 1994 Neil Sharman
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: text.h 16583 2008-07-29 10:20:36Z davidb $
21 *
22 **************************************************************************/
23
24
25
26#ifndef H_TEXT
27#define H_TEXT
28
29#include "huffman.h"
30
31
32
33
34
35/*****************************************************************************
36 *
37 * There are several different methods of compressing the text of the
38 * database. The following defines list the different methods of text
39 * compression
40 *
41 */
42
43
44/* The dictionary contains all the fragments that occur in the collection
45 i.e. escapes are not possible */
46#define MG_COMPLETE_DICTIONARY 0
47
48
49/* Certain words have been deleted from the dictionary. The words deleted
50 have been used to create the frequency huffman codes of the characters.
51 This dictionary has an escape code and may be used to compress novel
52 words. This dictionary may fail if there is a novel character. */
53#define MG_PARTIAL_DICTIONARY 1
54
55
56/* This dictionary has an escape so that novel words and non-words can be
57 coded. The method for coding the novel words and non-words is determined
58 by a dictionary parameter. */
59#define MG_SEED_DICTIONARY 2
60
61
62
63
64
65
66
67
68
69/*****************************************************************************
70 *
71 * With a seed dictionary there are several methods for coding the novel
72 * words and non-words the following defined values specify the different
73 * methods of coding.
74 *
75 */
76
77
78/* Code novel words and non-words character by character using huffman codes.
79 The huffman codes for the word and non-word lengths and characters are
80 generated from the distribution of lengths and characters in the
81 dictionary. */
82#define MG_NOVEL_HUFFMAN_CHARS 0
83
84
85/* This method codes novel words using binary codes. The novel words are stored
86 in a auxillary dictionary which is built by pass two. */
87#define MG_NOVEL_BINARY 1
88
89
90/* This method codes novel words using delta codes. The novel words are stored
91 in a auxillary dictionary which is built by pass two. */
92#define MG_NOVEL_DELTA 2
93
94
95/* This method codes novel words using hybrid version of delta. The novel
96 words are stored in a auxillary dictionary which is built by pass two. */
97#define MG_NOVEL_HYBRID 3
98
99
100/* This method codes novel words using hybrid version of delta and a MTF
101 operation. The novel words are stored in a auxillary dictionary which
102 is built by pass two. */
103#define MG_NOVEL_HYBRID_MTF 4
104
105
106
107
108
109
110/* This specified an amount of extra space allocated in the
111 compression_dict_header for adding new parameters. As new
112 parameters are added this should be decreased. */
113#define TEXT_PARAMS 15
114
115
116
117typedef struct compression_dict_header
118 {
119 u_long dict_type;
120 u_long novel_method;
121 u_long params[TEXT_PARAMS];
122 u_long num_words[2];
123 u_long num_word_chars[2];
124 u_long lookback;
125 }
126compression_dict_header;
127
128
129typedef struct comp_frags_header
130 {
131 huff_data hd;
132 u_long uncompressed_size;
133 u_long huff_words_size[MAX_HUFFCODE_LEN + 1];
134 }
135comp_frags_header;
136
137
138
139typedef struct compressed_text_header
140 {
141 u_long num_of_docs;
142 u_long num_of_words; /* number of words in collection */
143 double num_of_bytes; /* [RJM 07/97: 4G limit] */
144 double ratio;
145 u_long length_of_longest_doc; /* (characters) */
146 u_long dummy; /* added to make the structure the same on different architectures */
147 }
148compressed_text_header;
149
150
151typedef struct compression_stats_header
152 {
153 u_long num_docs;
154 u_long dummy; /* added to make the structure the same on different architectures */
155 double num_bytes; /* [RJM 07/97: 4G limit] */
156 }
157compression_stats_header;
158
159
160typedef struct frags_stats_header
161 {
162 u_long num_frags;
163 u_long mem_for_frags;
164 }
165frags_stats_header;
166
167
168typedef struct aux_frags_header
169 {
170 u_long num_frags;
171 u_long mem_for_frags;
172 }
173aux_frags_header;
174
175
176#endif
Note: See TracBrowser for help on using the repository browser.