root/gsdl/trunk/src/recpt/OIDtools.cpp @ 14390

Revision 14390, 10.6 KB (checked in by mdewsnip, 13 years ago)

Efficiency improvement: only get the filter to retrieve metadata in get_children() if some has been requested. Otherwise, the filter makes an unnecessary request to the GDBM database for each child node. This makes a HUGE difference in some cases (eg. the NLNZ Papers Past newspapers collection).

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1/**********************************************************************
2 *
3 * OIDtools.cpp --
4 * Copyright (C) 1999  The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "OIDtools.h"
27#include <assert.h>
28
29// returns (in top) the top level of OID (i.e. everything
30// up until the first dot)
31void get_top (const text_t &OID, text_t &top) {
32
33  top.clear();
34  if (OID.empty()) return;
35
36  text_t::const_iterator begin = OID.begin();
37  text_t::const_iterator end = OID.end();
38
39  top.appendrange (begin, findchar(begin, end, '.'));
40}   
41
42
43// checks if OID is top level (i.e. contains no dots)
44bool is_top (const text_t &OID) {
45
46  if (OID.empty()) return true;
47
48  text_t::const_iterator here = OID.begin();
49  text_t::const_iterator end = OID.end();
50  here = findchar (here, end, '.');
51
52  if (here == end) return true;
53  return false;
54}
55
56
57// get_parents_array loads the parents array with all the parents of the
58// document or classification specified by OID (not including OID itself)
59void get_parents_array (const text_t &OID, text_tarray &parents) {
60
61  text_t::const_iterator here = OID.begin ();
62  text_t::const_iterator end = OID.end ();
63  text_t thisparent;
64
65  while (here != end) {
66    if (*here == '.') parents.push_back(thisparent);
67    thisparent.push_back(*here);
68    ++here;
69  }
70}
71
72
73// get_info does a protocol call and returns (in response) the metadata
74// associated with OID. Metadata should be loaded with whatever
75// metadata fields are to be requested.
76
77bool get_info (const text_t &OID, const text_t &collection, const text_t &lang,
78           const text_tset &metadata, bool getParents,
79           recptproto *collectproto, FilterResponse_t &response,
80           ostream &logout) {
81
82  response.clear();
83
84  comerror_t err = noError;
85  FilterRequest_t request;
86  request.clear();
87
88  request.filterName = "NullFilter";
89  request.filterLang = lang;
90  request.filterResultOptions = FRmetadata;
91  request.getParents = getParents;
92  request.fields = metadata;
93  request.docSet.push_back (OID);
94  assert (collectproto != NULL);
95
96  collectproto->filter (collection, request, response, err, logout);
97  if (err != noError) {
98    outconvertclass text_t2ascii;
99    logout << text_t2ascii
100       << "Error: call to filter failed for " << OID
101       << " in OIDtools::get_info ("
102       << get_comerror_string (err) << ")\n";
103    return false;
104  }
105 
106  return true;
107}
108
109// overloaded, to allow "custom" filter options.
110bool get_info (const text_t &OID, const text_t &collection, const text_t &lang,
111           const text_tset &metadata, const OptionValue_tarray &options,
112           bool getParents,
113           recptproto *collectproto, FilterResponse_t &response,
114           ostream &logout) {
115
116  response.clear();
117
118  comerror_t err = noError;
119  FilterRequest_t request;
120
121  request.filterName = "NullFilter";
122  request.filterLang = lang;
123  request.filterResultOptions = FRmetadata;
124  request.getParents = getParents;
125  request.filterOptions = options;
126  request.fields = metadata;
127  request.docSet.push_back (OID);
128
129  assert (collectproto != NULL);
130  collectproto->filter (collection, request, response, err, logout);
131  if (err != noError) {
132    outconvertclass text_t2ascii;
133    logout << text_t2ascii
134       << "Error: call to filter failed for " << OID
135       << " in OIDtools::get_info ("
136       << get_comerror_string (err) << ")\n";
137    return false;
138  }
139 
140  return true;
141}
142
143bool get_info (const text_tarray &OIDs, const text_t &collection, const text_t &lang,
144           const text_tset &metadata, bool getParents,
145           recptproto *collectproto, FilterResponse_t &response,
146           ostream &logout) {
147
148  response.clear();
149  if (OIDs.empty()) return true;
150
151  comerror_t err = noError;
152  FilterRequest_t request;
153
154  request.filterName = "NullFilter";
155  request.filterLang = lang;
156  request.filterResultOptions = FRmetadata;
157  request.getParents = getParents;
158  request.fields = metadata;
159
160  request.docSet = OIDs;
161 
162  collectproto->filter (collection, request, response, err, logout);
163  if (err != noError) {
164    outconvertclass text_t2ascii;
165    logout << text_t2ascii
166       << "Error: call to filter failed in OIDtools::get_info ("
167       << get_comerror_string (err) << ")\n";
168    return false;
169  }
170
171  return true;
172}
173
174
175
176// has_children returns true if OID has children
177bool has_children (const text_t &OID, const text_t &collection, const text_t &lang,
178           recptproto *collectproto, ostream &logout) {
179
180  FilterResponse_t response;
181  text_tset metadata;
182  metadata.insert ("haschildren");
183
184  if (get_info (OID, collection, lang, metadata, false, collectproto,   response, logout)) {
185    if (response.docInfo[0].metadata["haschildren"].values[0] == "1")
186      return true;
187  }
188  return false;
189}
190
191
192// get_children does a protocol call and returns (in response) the OIDs and
193// metadata of all the children of OID. The metadata set should be loaded
194// with whatever metadata fields are to be requested.
195
196bool get_children (const text_t &OID, const text_t &collection, const text_t &lang,
197           const text_tset &metadata, bool getParents,
198           recptproto *collectproto, FilterResponse_t &response,
199           ostream &logout) {
200 
201  response.clear();
202
203  comerror_t err = noError;
204  FilterRequest_t request;
205  OptionValue_t option;
206
207  option.name = "ParentNode";
208  option.value = OID;
209  request.filterOptions.push_back (option);
210  request.filterName = "BrowseFilter";
211  request.filterLang = lang;
212  request.filterResultOptions = FROID;
213
214  // Efficiency improvement: only get the filter to retrieve metadata if some has been requested
215  // Otherwise, the filter makes an unnecessary request to the GDBM database for each child node
216  // By Michael Dewsnip, DL Consulting Ltd
217  if (metadata.size() > 0)
218  {
219    request.filterResultOptions |= FRmetadata;
220  }
221
222  request.fields = metadata;
223  request.getParents = getParents;
224
225  collectproto->filter (collection, request, response, err, logout);
226
227  if (err != noError) {
228    outconvertclass text_t2ascii;
229    logout << text_t2ascii
230       << "Error: call to filter failed for " << OID
231       << " in OIDtools::get_children ("
232       << get_comerror_string (err) << ")\n";
233    return false;
234  }
235  return true;
236}
237
238// get_parent returns the parent of the document or classification
239// specified by OID
240text_t get_parent (const text_t& OID)
241{
242  if (OID.empty() || is_top (OID)) return g_EmptyText;
243
244  text_t::const_iterator begin = OID.begin();
245  text_t::const_iterator here = (OID.end() - 1);
246
247  while (here >= begin) {
248    if (*here == '.')
249      break;
250    if (here == begin)
251      break;
252    --here;
253  }
254
255  if (here != begin) {
256    text_t parentOID;
257    parentOID.appendrange(begin, here);
258    return parentOID;
259  }
260
261  return g_EmptyText;
262}
263
264// takes an OID like ".2 and replaces the " with parent
265void translate_parent (text_t &OID, const text_t &parent) {
266
267  text_t::const_iterator here = OID.begin();
268  text_t::const_iterator end = OID.end();
269  text_t temp;
270
271  while (here != end) {
272    if (*here == '"') temp += parent;
273    else temp.push_back (*here);
274    ++here;
275  }
276  OID = temp;
277}
278
279// shrink_parent does the opposite to translate_parent
280void shrink_parent (text_t &OID) {
281 
282  text_tarray tmp;
283  splitchar (OID.begin(), OID.end(), '.', tmp);
284  OID = "\"." + tmp.back();
285}
286
287// checks if OID uses ".fc", ".lc", ".pr", "rt", ".ns",
288// or ".ps" syntax (first child, last child, parent, root,
289// next sibling, previous sibling)
290bool needs_translating (const text_t &OID) {
291
292  if (OID.size() < 4) return false;
293
294  text_t tail = substr (OID.end()-3, OID.end());
295  if (tail == ".fc" || tail == ".lc" || tail == ".pr" ||
296      tail == ".rt" || tail == ".ns" || tail == ".ps") return true;
297
298  return false;
299}
300
301// strips the ".fc", ".lc", ".pr", ".ns",
302// or ".ps" suffix from the end of OID
303void strip_suffix (text_t &OID) {
304
305  text_t tail = substr (OID.end()-3, OID.end());
306  while (tail == ".fc" || tail == ".lc" || tail == ".pr" ||
307     tail == ".rt" || tail == ".ns" || tail == ".ps") {
308    OID.erase (OID.end()-3, OID.end());
309    tail = substr (OID.end()-3, OID.end());
310  }
311}
312
313static void recurse_contents (ResultDocInfo_t section, const bool &is_classify,
314                  const text_t &collection, const text_t &lang,
315                  const text_tset &metadata,
316                  recptproto *collectproto, FilterResponse_t &response,
317                  ostream &logout) {
318
319  int haschildren = section.metadata["haschildren"].values[0].getint();
320  const text_t &doctype = section.metadata["doctype"].values[0];
321
322  if ((haschildren == 1) && ((!is_classify) || (doctype == "classify"))) {
323    FilterResponse_t tmp;
324    bool getParents = false;
325    get_children (section.OID, collection, lang, metadata, getParents, collectproto, tmp, logout);
326    ResultDocInfo_tarray::iterator thisdoc = tmp.docInfo.begin();
327    ResultDocInfo_tarray::iterator lastdoc = tmp.docInfo.end();
328    while (thisdoc != lastdoc) {
329      response.docInfo.push_back (*thisdoc);
330      recurse_contents (*thisdoc, is_classify, collection, lang, metadata,
331            collectproto, response, logout);
332      ++thisdoc;
333    }
334  }
335}
336
337// get_contents returns OIDs and metadata of all contents
338// below (and including) OID.
339void get_contents (const text_t &topOID, const bool &is_classify,
340           text_tset &metadata, const text_t &collection, const text_t &lang,
341           recptproto *collectproto, FilterResponse_t &response,
342           ostream &logout) {
343
344  if (topOID.empty()) return;
345  response.clear();
346
347  metadata.insert ("haschildren");
348  metadata.insert ("doctype");
349
350  // get topOIDs info
351  if (get_info (topOID, collection, lang, metadata, false, collectproto, response, logout)) {
352    recurse_contents (response.docInfo[0], is_classify, collection, lang,
353              metadata, collectproto, response, logout);
354  }
355}
356
357// is_child_of returns true if OID2 is a child of OID1
358bool is_child_of(const text_t &OID1, const text_t &OID2) {
359
360  text_t parent = get_parent(OID2);
361
362  while (!parent.empty()) {
363    if (parent == OID1) return true;
364    parent = get_parent(parent);
365  }
366  return false;
367}
Note: See TracBrowser for help on using the browser.