source: gsdl/trunk/runtime-src/src/oaiservr/abstractlistaction.cpp@ 16720

Last change on this file since 16720 was 16720, checked in by mdewsnip, 16 years ago

Removed some unused variables.

  • Property svn:keywords set to Author Date Id Revision
File size: 11.9 KB
Line 
1#include "abstractlistaction.h"
2#include "recptprototools.h"
3
4#include "oaitools.h"
5
6//--------------------------------------------------------------------------------------------------
7
8bool abstractlistaction::output_content(ostream &output, recptproto *protocol, oaiargs &params)
9{
10 bool prevDocSeen;
11 ResumptionToken *token = NULL;
12
13 // start the call; clear down the total number of output documents
14 this->outputDocs = 0;
15
16 // We don't actually handle resumptionTokens yet; if we get one, ignore it
17 if (params["resumptionToken"] != "") {
18 token = new ResumptionToken(params["resumptionToken"]);
19 }
20
21 this->replyToken = NULL;
22
23 // if we've been asked for a set, then use it!
24 if (params["set"] != "") {
25 // get the children of this set
26 text_t gsdlSet = params["set"];
27 text_t gsdlCollect = "";
28
29 // given 'demo:CL2', toGSDL returns 'demo' in gsdlCollect and 'CL2' in gsdlSet. If there is no further
30 // set specified after the name of the collection however, then gsdlSet is empty.
31 oaiclassifier::toGSDL(gsdlCollect, gsdlSet);
32
33 // If gsdlSet is empty, then the user is requesting all the identifiers for the collection, so
34 // we simply output all docs via their oai_id tag. But if a specific subset IS requested, then
35 // use recurse_set() to traverse any sub classifiers to find the relevant docs.
36 if(gsdlSet == ""){
37 ColInfoResponse_t cinfo;
38 comerror_t err;
39 this->output_content_for_col(output, protocol, gsdlCollect, cinfo, err, params);
40 }
41 else {
42 if (this->check_classifier(protocol, gsdlCollect, gsdlSet)) {
43 this->recurse_set(output, protocol, gsdlCollect, gsdlSet, params, token);
44 }
45 }
46 }
47 // output all records in all hierarchies
48 else {
49 this->output_content_for_all(output, protocol, params);
50 }
51
52 // If - regardless of set required - no documents have been seen, throw an error.
53 if (this->configuration->getOAIVersion() >= 200 && this->prevDocSeen == false) {
54 errorType = "noRecordsMatch";
55 this->output_error(output, errorType);
56
57 return false;
58 }
59
60 // do a resumption token if required; errors cancel a token...
61 if (this->replyToken != NULL && this->errorType == "") {
62 // Don't add any whitespace around the resumption token as it can confuse harvesters/validators
63 output << " <resumptionToken>" << this->replyToken->getToken() << "</resumptionToken>" << endl;
64 }
65
66 return true;
67}
68
69//--------------------------------------------------------------------------------------------------
70
71void abstractlistaction::output_content_for_col(ostream &output, recptproto *protocol, text_t &gsdlCollect,
72 ColInfoResponse_t &cinfo, comerror_t &err, oaiargs &params)
73{ int startDoc = 0;
74
75 // get the collection information
76 protocol->get_collectinfo(gsdlCollect, cinfo, err, *this->logout);
77
78 // check resumption token
79 if (params["resumptionToken"] != "") {
80 ResumptionToken token(params["resumptionToken"]);
81 if (token.getCollection() == gsdlCollect) {
82 startDoc = token.getPosition() - 1; // first document is said to be 1..
83 }
84 }
85
86 // If numDocs is 0, do nothing - this->prevDocSeen will stay false if this is the only collection
87 // looked at, or will keep whatever value it had prior to this col (ensures that if the flag has
88 // been set to true by a previous collection that this won't overwrite it to be false).
89 if (cinfo.numDocs > 0) {
90 int errorCount = 0; // Count the number of errors found in the given collection
91
92 for (long i = startDoc; i < cinfo.numDocs; ++i) {
93 if (errorCount > 3) { // If num errors reaches the cut-off value, bail.
94 cerr << "Error: too many records(" << errorCount << ") in the " << gsdlCollect
95 << " collection have invalid or non-existant oai_ids - skipping remainder of collection.\n";
96 return;
97 }
98
99 text_t oai_id = "oai.";
100 oai_id += i;
101
102 text_t gsdl_id = oaiclassifier::getGSDL_OID(gsdlCollect, oai_id, protocol, *this->logout);
103
104 if (gsdl_id == "") { // If the string is empty, then the document didn't have an oai_id, so
105 ++errorCount; // increase error count
106 continue;
107 }
108
109
110 // Check that the item with the 0ID 'gsdl_id' has a lastmodified field that fits within
111 // the required date range (if specified).
112 if (this->inDateRange(params["from"], params["until"], gsdlCollect, gsdl_id, protocol, output)) {
113 if (this->output_document(output, protocol, gsdlCollect, gsdl_id, params["metadataPrefix"])) {
114 // this should be an IF statement, where prevDocSeen is only set to true if the above
115 // function call returns true (indicating that the doc supported the metadata prefix) but
116 // for some reason this is always false. This means that if no doc in the requested set supports
117 // the metadata format, the "no records match" error that should be thrown won't be...
118 //
119 // GRB: the above comment is no longer true; proper checks are made
120 this->prevDocSeen = true;
121 ++this->outputDocs;
122 }
123 }
124
125 // if we've output the number of resumption documents; prepare a resumptionToken
126 if (this->outputDocs == this->configuration->resumeAfter()) {
127 this->replyToken = new ResumptionToken(gsdlCollect, "", "");
128 this->replyToken->setPosition("", i+2);
129 break;
130 }
131 }
132
133 cinfo.clear(); // Clear for next collection to use (if there is one).
134 }
135}
136
137//--------------------------------------------------------------------------------------------
138// Returns true if at least one document record is found
139void abstractlistaction::output_content_for_all(ostream &output, recptproto *protocol, oaiargs &params)
140{
141 ColInfoResponse_t cinfo;
142 comerror_t err;
143 text_tarray collections;
144 text_t gsdlCollect = "";
145 ResumptionToken *token = NULL;
146
147 // get a list of the collections available
148 collections = this->configuration->getCollectionsList();
149 // protocol->get_collection_list(collections, err, output);
150
151 if (params["resumptionToken"] != "") {
152 token = new ResumptionToken(params["resumptionToken"]);
153 }
154
155 for(int current_col = 0; current_col < collections.size(); ++current_col){
156 gsdlCollect = collections[current_col];
157
158 // ignore all leading collections before the one that matches the resumptiontoken
159 if (token != NULL &&
160 token->getCollection() != gsdlCollect)
161 { continue;
162 }
163
164 this->output_content_for_col(output, protocol, gsdlCollect, cinfo, err, params);
165
166 // once we've output at least one collection, continue
167 // outputting all others until the resumption total hits
168 token = NULL;
169
170 if (this->outputDocs == this->configuration->resumeAfter()) {
171 break;
172 }
173 }
174}
175
176//-------------------------------------------------------------------------------------------------
177// Check that the requested from/until dates don't include a time, as this would be asking for too
178// fine a level of granularity, one that greenstone doesn't support. An OAI error must be thrown.
179/*
180bool abstractlistaction::granularityTooFine(text_t &from, text_t &until)
181{
182 if (from != "" && from.){
183
184 }
185
186}
187*/
188//-------------------------------------------------------------------------------------------------
189
190bool abstractlistaction::check_classifier(recptproto *protocol, const text_t &collection,
191 const text_t &classifier)
192{ text_t topClass;
193 FilterResponse_t response;
194 text_tset metadata;
195 ofstream logout("oai.log", ios::app);
196
197 // exclude false children of a top-level classifier immediately...
198 if (!get_info(classifier, collection, "", metadata, false, protocol, response, logout)) {
199 return false;
200 }
201
202 // now check the top-level parent
203 metadata.insert("supportsmemberof");
204
205 text_t::const_iterator dot = findchar(classifier.begin(), classifier.end(), '.');
206 if (dot != classifier.end()) {
207 topClass = substr(classifier.begin(), dot);
208 }
209 else {
210 topClass = classifier;
211 }
212
213 if (!get_info(topClass, collection, "", metadata, false, protocol, response, logout)) {
214 return false;
215 }
216
217 if (response.docInfo[0].metadata["supportsmemberof"].values.size() == 0) {
218 return false;
219 }
220
221 if (response.docInfo[0].metadata["supportsmemberof"].values[0] != "true") {
222 return false;
223 }
224
225 return true;
226}
227
228void abstractlistaction::recurse_set(ostream &output, recptproto *protocol, const text_t &collection,
229 const text_t &classifier, oaiargs &params, ResumptionToken *resumptionToken)
230{
231 // metadata for this call
232 FilterResponse_t response;
233 text_tset metadata;
234 ofstream logout("oai.log", ios::app);
235 text_t from = params["from"];
236 text_t until = params["until"];
237 text_t metadataPrefix = params["metadataPrefix"];
238 // ResumptionToken resumptionToken(params["resumptionToken"]);
239 int startPos = 0;
240
241 // This is a recursive function, and so just because the current set is empty doesn't mean we necessarily
242 // want to throw a 'noRecordsMatch' error; another set (parent/sibling/child) may have had documents. It
243 // is therefore not enough to check that the response object in the current iteration has no docs - we
244 // must also verify that NO OTHER set has had any documents. This is done with the 'prevDocSeen' flag.
245 // It is set to FALSE initially, but as soon as we see a set that isn't empty, it is set to TRUE. The
246 // 'noRecordsMatch' error will only be thrown if, after all appropriate sets have been recursed into,
247 // the 'prevDocSeen' flag is still FALSE. The function returns false if no docs were seen, allowing us to
248 // throw the noRecordsMatch error.
249
250 // bool prevDocSeen = false;
251
252 get_children(classifier, collection, "", metadata, false, protocol, response, *this->logout);
253
254 if (params["resumptionToken"] != "") {
255 // if we're at a resumptionToken
256 if (classifier == resumptionToken->getNode()) {
257 startPos = resumptionToken->getPosition();
258 }
259 else {
260 text_t fullNode = resumptionToken->getNode();
261 text_t::iterator leafIter = fullNode.begin() + classifier.size();
262
263 // if the next character isn't a dot, blow up!
264 if (*leafIter != '.') {
265 // fatal error;
266 exit(1);
267 }
268
269 // get the first '.' after the current classifier point;
270 text_t::iterator separator = findchar(leafIter + 1, fullNode.end(), '.');
271
272 // now, create a new subpath
273 text_t nextNode = substr(fullNode.begin(), separator);
274
275 // seek forward; TODO: improve performance of this
276 for (int c = 0; c < response.numDocs; ++c) {
277 if (response.docInfo[c].OID == nextNode) {
278 startPos = c;
279 break;
280 }
281 }
282 }
283
284 // We need to subtract one from the startPos value to turn it into an index value
285 startPos--;
286 }
287
288 for (int c = startPos; c < response.numDocs; ++c) {
289 text_t child = response.docInfo[c].OID;
290
291 // distinguish classifiers and documents by checking whether OID
292 // starts with CL or not
293 text_t childHead;
294 text_t::const_iterator start = child.begin();
295 text_t::const_iterator here = child.begin();
296 here += 2;
297 childHead = substr(start, here);
298
299 // documents we output now
300 if (childHead != "CL") {
301 // Check that the item with the 0ID 'gsdl_id' has a lastmodified field that fits
302 // within the required date range (if specified)
303 if (this->inDateRange(from, until, collection, child, protocol, output)) {
304 // TODO: check that the document can be disseminated in the required metadataPrefix
305
306 if (this->output_document(output, protocol, collection, child, metadataPrefix)) {
307 this->prevDocSeen = true;
308 ++this->outputDocs;
309 }
310 }
311 }
312 // children which are classifiers are recursed
313 else {
314 if (resumptionToken != NULL) {
315 int depth = countchar(classifier.begin(), classifier.end(), '.');
316 resumptionToken->setOffset(depth, c+2);
317 }
318 this->recurse_set(output, protocol, collection, child, params, resumptionToken);
319 }
320
321 if (this->outputDocs == this->configuration->resumeAfter()) {
322 this->replyToken = new ResumptionToken(collection, params["set"], "");
323 this->replyToken->setPosition(classifier, c+2);
324 break;
325 }
326 }
327}
328
329
330
331
Note: See TracBrowser for help on using the repository browser.