source: trunk/gsdl/src/recpt/comtypes.h@ 6584

Last change on this file since 6584 was 6584, checked in by kjdon, 20 years ago

Fiddled around with segmenting for chinese text. Haven't changed how the
segmentation is done, or what character ranges are used.
But when its done is now controlled by the collect.cfg. There is a new
option, separate_cjk, values true or false, default false. Segmentation
is only done if this is set to true. This is passed as a global option to
all plugins by the import.pl script, so the user just needs to add it
once to the config file, not as an option to all plugins.
The queryaction uses this option too to determine whether or not to segment
the query.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 11.5 KB
Line 
1/**********************************************************************
2 *
3 * comtypes.h --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26
27#ifndef COMTYPES_H
28#define COMTYPES_H
29
30#include "gsdlconf.h"
31#include "text_t.h"
32
33#if defined(GSDL_USE_OBJECTSPACE)
34# include <ospace\std\vector>
35# include <ospace\std\list>
36# include <ospace\std\map>
37#elif defined(GSDL_USE_STL_H)
38# include <vector.h>
39# include <list.h>
40# include <map.h>
41#else
42# include <vector>
43# include <list>
44# include <map>
45#endif
46
47
48enum comerror_t {noError, authenticationFailure, protocolError,
49 configurationError, systemProblem, syntaxError};
50text_t get_comerror_string (comerror_t err);
51
52// ShortColInfo ::= SEQUENCE {
53// name GeneralString,
54// host GeneralString,
55// port INTEGER
56// }
57struct ShortColInfo_t {
58 void clear ();
59 ShortColInfo_t () {clear();}
60
61 text_t name;
62 text_t host;
63 int port;
64};
65
66
67// ColInfoResponse ::= SEQUENCE {
68// shortInfo [0] IMPLICIT ShortCollectionInfo,
69// isPublic [2] IMPLICIT BOOLEAN, -- whether has anonymous access
70// isBeta [3] IMPLICIT BOOLEAN, -- beta if still under development
71// buildDate [4] IMPLICIT GeneralizedTime,
72// ccsCols [5] IMPLICIT StringSet, -- collections that form cross-col search
73// languages [6] IMPLICIT StringSet, -- languages in the collection
74// numDocs [7] IMPLICIT INTEGER,
75// numSections [8] IMPLICIT INTEGER OPTIONAL,
76// numWords [9] IMPLICIT INTEGER OPTIONAL,
77// numBytes [10] IMPLICIT INTEGER OPTIONAL
78// collectionmeta [11] IMPLICIT StringSet
79// format [12] IMPLICIT StringSet
80// building [13] IMPLICIT StringSet
81// receptionist [14] IMPLICIT GeneralString
82// buildType [15] IMPLICIT GeneralString
83// searchTypes [16] IMPLICIT StringSet
84// }
85struct ColInfoResponse_t {
86 void clear ();
87 ColInfoResponse_t () {clear();}
88
89 ShortColInfo_t shortInfo;
90 bool isPublic;
91 bool isBeta;
92 bool isSegmented;
93 unsigned long buildDate;
94 text_tarray ccsCols; // empty if collection does not use cross-collection searching
95 text_tarray languages;
96 unsigned long numDocs; // 0 if not known
97 unsigned long numSections; // 0 if not known
98 unsigned long numWords; // 0 if not known
99 unsigned long numBytes; // 0 if not known
100 text_tmap collectionmeta;
101 text_tmap format;
102 text_tmap building;
103 text_t httpdomain; // GRB: could these two http items need removing
104 text_t httpprefix;
105 text_t receptionist;
106 text_t buildType; // 'mg' or 'mgpp'
107 text_t authenticate; // 'document' or 'collection'
108 text_t auth_group; // 'mygroup' 'yourgroup'
109 text_tmap public_documents; // the acl to allow access to listed documents
110 text_tmap private_documents; // the acl to disallow access to listed documents
111 text_tarray searchTypes; // form, plain, empty if collection uses mg, or has no searching facility
112};
113
114
115// -- filter options which might be supported for the QueryFilter
116// --
117// -- onePerQuery StartResults integer
118// -- onePerQuery EndResults integer
119// -- onePerQuery QueryType enumerated (boolean, ranked)
120// -- onePerTerm Term string ???
121// -- onePerTerm Casefold boolean
122// -- onePerTerm Stem boolean
123// -- onePerTerm Index enumerated
124// -- onePerTerm Subcollection enumerated
125// --
126// -- filter options which might be supported for the BrowseFilter
127// --
128// -- onePerQuery StartResults integer
129// -- onePerQuery EndResults integer
130// -- onePerQuery ParentNode string ("" will return the browsing available)
131// --
132// -- The NullFilter always returns the set it was given, it doesn't have
133// -- any options
134
135// InfoFiltersResponse ::= SEQUENCE {
136// filterNames StringSet
137// }
138struct InfoFiltersResponse_t {
139 void clear ();
140
141 text_tset filterNames;
142};
143
144// InfoFilterOptionsRequest ::= SEQUENCE {
145// filterName GeneralString
146// }
147struct InfoFilterOptionsRequest_t {
148 void clear ();
149
150 text_t filterName;
151};
152
153// FilterOption ::= SEQUENCE {
154// name GeneralString,
155// type ENUMERATED {booleant(0), integert(1), enumeratedt(2), stringt(3)},
156// repeatable ENUMERATED {onePerQuery(0), onePerTerm(1), nPerTerm(2)},
157// defaultValue GeneralString,
158// -- the interpretation of the validValues depends on the type
159// -- for boolean: the first value is the false value, the second is true value
160// -- for integer: the first value is the minimum, the second the maximum
161// -- for enumerated: all values a listed
162// -- for string: this value is ignored
163// validValues StringSequence
164// }
165struct FilterOption_t {
166 void clear ();
167 void check_defaultValue ();
168 FilterOption_t () {clear();}
169
170 text_t name;
171
172 enum type_t {booleant=0, integert=1, enumeratedt=2, stringt=3};
173 type_t type;
174
175 enum repeatable_t {onePerQuery=0, onePerTerm=1, nPerTerm=2};
176 repeatable_t repeatable;
177
178 text_t defaultValue;
179 text_tarray validValues;
180};
181
182bool operator==(const FilterOption_t &x, const FilterOption_t &y);
183bool operator<(const FilterOption_t &x, const FilterOption_t &y);
184
185
186typedef map<text_t, FilterOption_t, lttext_t> FilterOption_tmap;
187
188
189// InfoFilterOptionsResponse ::= SEQUENCE {
190// filterOptions SET OF FilterOption
191// }
192struct InfoFilterOptionsResponse_t {
193 void clear ();
194
195 FilterOption_tmap filterOptions;
196};
197
198
199// OptionValue ::= SEQUENCE {
200// name GeneralString,
201// value GeneralString
202// }
203struct OptionValue_t {
204 void clear ();
205
206 text_t name;
207 text_t value;
208};
209
210typedef vector<OptionValue_t> OptionValue_tarray;
211
212
213// -- Terms are presented in the same order that they are requested,
214// -- any information relating to the terms is in reference to the
215// -- index specified for that term.
216//
217// FilterRequest ::= SEQUENCE {
218// filterName [0] GeneralString,
219// filterOptions [1] IMPLICIT SEQUENCE OF OptionValue,
220// docSet [2] IMPLICIT StringSequence, -- the OID "" represents everything
221// filterResultOptions [3] IMPLICIT BIT STRING {termFreq(0), matchTerms(1), OID(2),
222// subCol(3), ranking(4), docFreq(5),
223// metadata(6)}
224//
225// -- the next set of options are for the metadata request,
226// -- they can be left blank if metadata is not wanted
227// requestParams [4] IMPLICIT GeneralString, -- used to negotiate the metadata content
228// refParams [5] IMPLICIT GeneralString, -- used to decide whether to return a
229// -- reference to the data or the actual data
230// fields [6] IMPLICIT StringSet
231// getParents [7] IMPLICIT BOOLEAN -- gets metadata of all parents too
232// }
233#define FRtermFreq 1
234#define FRmatchTerms 2
235#define FROID 4
236#define FRsubCol 8
237#define FRranking 16
238#define FRdocFreq 32
239#define FRmetadata 64
240// used to indicate a full text browse query for mgpp
241#define FRfullTextBrowse 128
242
243struct FilterRequest_t {
244 void clear ();
245 FilterRequest_t () {clear();}
246
247 text_t filterName;
248 OptionValue_tarray filterOptions;
249 text_tarray docSet; // empty if not used
250 int filterResultOptions; // use the FR* defines above
251
252 text_t requestParams; // empty if not used
253 text_t refParams; // empty if not used
254 text_tset fields; // empty if not used
255 bool getParents; // defaults to false
256};
257
258
259// TermInfo ::= SEQUENCE {
260// term [0] GeneralString,
261// freq [1] IMPLICIT INTEGER, -- 0 if not requested
262// matchTerms [2] IMPLICIT StringSequence -- empty if not requested
263// }
264struct TermInfo_t {
265 void clear ();
266 TermInfo_t () {clear();}
267
268 text_t term;
269 int freq; // 0 if not requested
270 text_tarray matchTerms; // empty if not requested
271};
272
273typedef vector<TermInfo_t> TermInfo_tarray;
274
275
276// MetadataInfo ::= SEQUENCE {
277// params [0] IMPLICIT GeneralString,
278// isRef [1] IMPLICIT BOOLEAN,
279// values [3] IMPLICIT SEQUENCE OF GeneralString,
280// name [4] IMPLICIT GeneralString
281// }
282struct MetadataInfo_t {
283 text_t params;
284 bool isRef;
285 text_tarray values;
286 MetadataInfo_t *parent;
287
288 void clear ();
289 MetadataInfo_t ();
290 MetadataInfo_t (const MetadataInfo_t &x); // copy constructor
291 ~MetadataInfo_t ();
292 MetadataInfo_t &operator=(const MetadataInfo_t &x);
293};
294
295typedef map<text_t, MetadataInfo_t, lttext_t> MetadataInfo_tmap;
296
297// ResultDocInfo ::= SEQUENCE {
298// OID [0] IMPLICIT GeneralString,
299// ranking [1] IMPLICIT INTEGER, -- 0 if not requested, range 0-10000
300// docFreq [2] IMPLICIT SEQUENCE OF INTEGER, -- empty if not requested
301// metadata [3] IMPLICIT SEQUENCE OF MetadataInfo, -- no longer a SEQUENCE (SET maybe??)
302// classifier_metadata_type [4] IMPLICIT GeneralString, -- empty if not requested
303// classifier_metadata_offset [5] IMPLICIT INTEGER, -- 0 if not requested
304// }
305struct ResultDocInfo_t {
306 void clear ();
307 ResultDocInfo_t () {clear();}
308
309 text_t OID;
310 int result_num; // place in results list
311 int ranking; // 0 if not requested (real ranking*10000)
312 int num_terms_matched; // not available on all versions of mg
313 int num_phrase_match; // not available on all versions of mg
314 vector<int> docFreq; // empty if not requested
315 MetadataInfo_tmap metadata; // empty if not requested
316 text_t classifier_metadata_type; // empty if not requested
317 int classifier_metadata_offset; // 0 if not requested
318 ResultDocInfo_t &operator=(const ResultDocInfo_t &x);
319};
320
321typedef vector<ResultDocInfo_t> ResultDocInfo_tarray;
322
323
324// FilterResponse ::= SEQUENCE {
325// numDocs [0] IMPLICIT INTEGER,
326// isApprox [1] ENUMERATED {Exact(0), Approximate(1), MoreThan(2)}, -- whether numDocs is approximate
327// termInfo [2] IMPLICIT SEQUENCE OF TermInfo, -- empty if not required
328// docInfo [3] IMPLICIT SEQUENCE OF ResultDocInfo -- empty if not required
329// }
330
331enum isapprox {Exact=0, Approximate=1, MoreThan=2};
332
333struct FilterResponse_t {
334 void clear ();
335 FilterResponse_t () {clear();}
336
337 int numDocs;
338 isapprox isApprox;
339 TermInfo_tarray termInfo; // empty if not requested
340 ResultDocInfo_tarray docInfo; // empty if not requested
341
342 FilterResponse_t &operator=(const FilterResponse_t &x);
343};
344
345
346// DocumentRequest ::= SEQUENCE {
347// OID GeneralString,
348// docType GeneralString,
349// docFormat GeneralString
350// }
351struct DocumentRequest_t {
352 void clear ();
353 DocumentRequest_t () {clear();}
354
355 text_t OID;
356 text_t docType;
357 text_t docFormat;
358};
359
360
361// DocumentResponse ::= SEQUENCE {
362// doc OCTET STRING
363// }
364
365struct DocumentResponse_t {
366 void clear ();
367 DocumentResponse_t () {clear();}
368
369 text_t doc;
370};
371
372
373#endif
374
Note: See TracBrowser for help on using the repository browser.