source: gsdl/trunk/src/recpt/OIDtools.cpp@ 14390

Last change on this file since 14390 was 14390, checked in by mdewsnip, 15 years ago

Efficiency improvement: only get the filter to retrieve metadata in get_children() if some has been requested. Otherwise, the filter makes an unnecessary request to the GDBM database for each child node. This makes a HUGE difference in some cases (eg. the NLNZ Papers Past newspapers collection).

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 10.6 KB
Line 
1/**********************************************************************
2 *
3 * OIDtools.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "OIDtools.h"
27#include <assert.h>
28
29// returns (in top) the top level of OID (i.e. everything
30// up until the first dot)
31void get_top (const text_t &OID, text_t &top) {
32
33 top.clear();
34 if (OID.empty()) return;
35
36 text_t::const_iterator begin = OID.begin();
37 text_t::const_iterator end = OID.end();
38
39 top.appendrange (begin, findchar(begin, end, '.'));
40}
41
42
43// checks if OID is top level (i.e. contains no dots)
44bool is_top (const text_t &OID) {
45
46 if (OID.empty()) return true;
47
48 text_t::const_iterator here = OID.begin();
49 text_t::const_iterator end = OID.end();
50 here = findchar (here, end, '.');
51
52 if (here == end) return true;
53 return false;
54}
55
56
57// get_parents_array loads the parents array with all the parents of the
58// document or classification specified by OID (not including OID itself)
59void get_parents_array (const text_t &OID, text_tarray &parents) {
60
61 text_t::const_iterator here = OID.begin ();
62 text_t::const_iterator end = OID.end ();
63 text_t thisparent;
64
65 while (here != end) {
66 if (*here == '.') parents.push_back(thisparent);
67 thisparent.push_back(*here);
68 ++here;
69 }
70}
71
72
73// get_info does a protocol call and returns (in response) the metadata
74// associated with OID. Metadata should be loaded with whatever
75// metadata fields are to be requested.
76
77bool get_info (const text_t &OID, const text_t &collection, const text_t &lang,
78 const text_tset &metadata, bool getParents,
79 recptproto *collectproto, FilterResponse_t &response,
80 ostream &logout) {
81
82 response.clear();
83
84 comerror_t err = noError;
85 FilterRequest_t request;
86 request.clear();
87
88 request.filterName = "NullFilter";
89 request.filterLang = lang;
90 request.filterResultOptions = FRmetadata;
91 request.getParents = getParents;
92 request.fields = metadata;
93 request.docSet.push_back (OID);
94 assert (collectproto != NULL);
95
96 collectproto->filter (collection, request, response, err, logout);
97 if (err != noError) {
98 outconvertclass text_t2ascii;
99 logout << text_t2ascii
100 << "Error: call to filter failed for " << OID
101 << " in OIDtools::get_info ("
102 << get_comerror_string (err) << ")\n";
103 return false;
104 }
105
106 return true;
107}
108
109// overloaded, to allow "custom" filter options.
110bool get_info (const text_t &OID, const text_t &collection, const text_t &lang,
111 const text_tset &metadata, const OptionValue_tarray &options,
112 bool getParents,
113 recptproto *collectproto, FilterResponse_t &response,
114 ostream &logout) {
115
116 response.clear();
117
118 comerror_t err = noError;
119 FilterRequest_t request;
120
121 request.filterName = "NullFilter";
122 request.filterLang = lang;
123 request.filterResultOptions = FRmetadata;
124 request.getParents = getParents;
125 request.filterOptions = options;
126 request.fields = metadata;
127 request.docSet.push_back (OID);
128
129 assert (collectproto != NULL);
130 collectproto->filter (collection, request, response, err, logout);
131 if (err != noError) {
132 outconvertclass text_t2ascii;
133 logout << text_t2ascii
134 << "Error: call to filter failed for " << OID
135 << " in OIDtools::get_info ("
136 << get_comerror_string (err) << ")\n";
137 return false;
138 }
139
140 return true;
141}
142
143bool get_info (const text_tarray &OIDs, const text_t &collection, const text_t &lang,
144 const text_tset &metadata, bool getParents,
145 recptproto *collectproto, FilterResponse_t &response,
146 ostream &logout) {
147
148 response.clear();
149 if (OIDs.empty()) return true;
150
151 comerror_t err = noError;
152 FilterRequest_t request;
153
154 request.filterName = "NullFilter";
155 request.filterLang = lang;
156 request.filterResultOptions = FRmetadata;
157 request.getParents = getParents;
158 request.fields = metadata;
159
160 request.docSet = OIDs;
161
162 collectproto->filter (collection, request, response, err, logout);
163 if (err != noError) {
164 outconvertclass text_t2ascii;
165 logout << text_t2ascii
166 << "Error: call to filter failed in OIDtools::get_info ("
167 << get_comerror_string (err) << ")\n";
168 return false;
169 }
170
171 return true;
172}
173
174
175
176// has_children returns true if OID has children
177bool has_children (const text_t &OID, const text_t &collection, const text_t &lang,
178 recptproto *collectproto, ostream &logout) {
179
180 FilterResponse_t response;
181 text_tset metadata;
182 metadata.insert ("haschildren");
183
184 if (get_info (OID, collection, lang, metadata, false, collectproto, response, logout)) {
185 if (response.docInfo[0].metadata["haschildren"].values[0] == "1")
186 return true;
187 }
188 return false;
189}
190
191
192// get_children does a protocol call and returns (in response) the OIDs and
193// metadata of all the children of OID. The metadata set should be loaded
194// with whatever metadata fields are to be requested.
195
196bool get_children (const text_t &OID, const text_t &collection, const text_t &lang,
197 const text_tset &metadata, bool getParents,
198 recptproto *collectproto, FilterResponse_t &response,
199 ostream &logout) {
200
201 response.clear();
202
203 comerror_t err = noError;
204 FilterRequest_t request;
205 OptionValue_t option;
206
207 option.name = "ParentNode";
208 option.value = OID;
209 request.filterOptions.push_back (option);
210 request.filterName = "BrowseFilter";
211 request.filterLang = lang;
212 request.filterResultOptions = FROID;
213
214 // Efficiency improvement: only get the filter to retrieve metadata if some has been requested
215 // Otherwise, the filter makes an unnecessary request to the GDBM database for each child node
216 // By Michael Dewsnip, DL Consulting Ltd
217 if (metadata.size() > 0)
218 {
219 request.filterResultOptions |= FRmetadata;
220 }
221
222 request.fields = metadata;
223 request.getParents = getParents;
224
225 collectproto->filter (collection, request, response, err, logout);
226
227 if (err != noError) {
228 outconvertclass text_t2ascii;
229 logout << text_t2ascii
230 << "Error: call to filter failed for " << OID
231 << " in OIDtools::get_children ("
232 << get_comerror_string (err) << ")\n";
233 return false;
234 }
235 return true;
236}
237
238// get_parent returns the parent of the document or classification
239// specified by OID
240text_t get_parent (const text_t& OID)
241{
242 if (OID.empty() || is_top (OID)) return g_EmptyText;
243
244 text_t::const_iterator begin = OID.begin();
245 text_t::const_iterator here = (OID.end() - 1);
246
247 while (here >= begin) {
248 if (*here == '.')
249 break;
250 if (here == begin)
251 break;
252 --here;
253 }
254
255 if (here != begin) {
256 text_t parentOID;
257 parentOID.appendrange(begin, here);
258 return parentOID;
259 }
260
261 return g_EmptyText;
262}
263
264// takes an OID like ".2 and replaces the " with parent
265void translate_parent (text_t &OID, const text_t &parent) {
266
267 text_t::const_iterator here = OID.begin();
268 text_t::const_iterator end = OID.end();
269 text_t temp;
270
271 while (here != end) {
272 if (*here == '"') temp += parent;
273 else temp.push_back (*here);
274 ++here;
275 }
276 OID = temp;
277}
278
279// shrink_parent does the opposite to translate_parent
280void shrink_parent (text_t &OID) {
281
282 text_tarray tmp;
283 splitchar (OID.begin(), OID.end(), '.', tmp);
284 OID = "\"." + tmp.back();
285}
286
287// checks if OID uses ".fc", ".lc", ".pr", "rt", ".ns",
288// or ".ps" syntax (first child, last child, parent, root,
289// next sibling, previous sibling)
290bool needs_translating (const text_t &OID) {
291
292 if (OID.size() < 4) return false;
293
294 text_t tail = substr (OID.end()-3, OID.end());
295 if (tail == ".fc" || tail == ".lc" || tail == ".pr" ||
296 tail == ".rt" || tail == ".ns" || tail == ".ps") return true;
297
298 return false;
299}
300
301// strips the ".fc", ".lc", ".pr", ".ns",
302// or ".ps" suffix from the end of OID
303void strip_suffix (text_t &OID) {
304
305 text_t tail = substr (OID.end()-3, OID.end());
306 while (tail == ".fc" || tail == ".lc" || tail == ".pr" ||
307 tail == ".rt" || tail == ".ns" || tail == ".ps") {
308 OID.erase (OID.end()-3, OID.end());
309 tail = substr (OID.end()-3, OID.end());
310 }
311}
312
313static void recurse_contents (ResultDocInfo_t section, const bool &is_classify,
314 const text_t &collection, const text_t &lang,
315 const text_tset &metadata,
316 recptproto *collectproto, FilterResponse_t &response,
317 ostream &logout) {
318
319 int haschildren = section.metadata["haschildren"].values[0].getint();
320 const text_t &doctype = section.metadata["doctype"].values[0];
321
322 if ((haschildren == 1) && ((!is_classify) || (doctype == "classify"))) {
323 FilterResponse_t tmp;
324 bool getParents = false;
325 get_children (section.OID, collection, lang, metadata, getParents, collectproto, tmp, logout);
326 ResultDocInfo_tarray::iterator thisdoc = tmp.docInfo.begin();
327 ResultDocInfo_tarray::iterator lastdoc = tmp.docInfo.end();
328 while (thisdoc != lastdoc) {
329 response.docInfo.push_back (*thisdoc);
330 recurse_contents (*thisdoc, is_classify, collection, lang, metadata,
331 collectproto, response, logout);
332 ++thisdoc;
333 }
334 }
335}
336
337// get_contents returns OIDs and metadata of all contents
338// below (and including) OID.
339void get_contents (const text_t &topOID, const bool &is_classify,
340 text_tset &metadata, const text_t &collection, const text_t &lang,
341 recptproto *collectproto, FilterResponse_t &response,
342 ostream &logout) {
343
344 if (topOID.empty()) return;
345 response.clear();
346
347 metadata.insert ("haschildren");
348 metadata.insert ("doctype");
349
350 // get topOIDs info
351 if (get_info (topOID, collection, lang, metadata, false, collectproto, response, logout)) {
352 recurse_contents (response.docInfo[0], is_classify, collection, lang,
353 metadata, collectproto, response, logout);
354 }
355}
356
357// is_child_of returns true if OID2 is a child of OID1
358bool is_child_of(const text_t &OID1, const text_t &OID2) {
359
360 text_t parent = get_parent(OID2);
361
362 while (!parent.empty()) {
363 if (parent == OID1) return true;
364 parent = get_parent(parent);
365 }
366 return false;
367}
Note: See TracBrowser for help on using the repository browser.