source: gsdl/trunk/runtime-src/src/oaiservr/abstractlistaction.cpp@ 16722

Last change on this file since 16722 was 16722, checked in by mdewsnip, 16 years ago

Hacked around a problem with the metadataPrefix being undefined when a resumption token is used with ListRecords.

  • Property svn:keywords set to Author Date Id Revision
File size: 12.0 KB
Line 
1#include "abstractlistaction.h"
2#include "recptprototools.h"
3
4#include "oaitools.h"
5
6//--------------------------------------------------------------------------------------------------
7
8bool abstractlistaction::output_content(ostream &output, recptproto *protocol, oaiargs &params)
9{
10 bool prevDocSeen;
11 ResumptionToken *token = NULL;
12
13 // start the call; clear down the total number of output documents
14 this->outputDocs = 0;
15
16 // We don't actually handle resumptionTokens yet; if we get one, ignore it
17 if (params["resumptionToken"] != "") {
18 token = new ResumptionToken(params["resumptionToken"]);
19 }
20
21 this->replyToken = NULL;
22
23 // if we've been asked for a set, then use it!
24 if (params["set"] != "") {
25 // get the children of this set
26 text_t gsdlSet = params["set"];
27 text_t gsdlCollect = "";
28
29 // given 'demo:CL2', toGSDL returns 'demo' in gsdlCollect and 'CL2' in gsdlSet. If there is no further
30 // set specified after the name of the collection however, then gsdlSet is empty.
31 oaiclassifier::toGSDL(gsdlCollect, gsdlSet);
32
33 // If gsdlSet is empty, then the user is requesting all the identifiers for the collection, so
34 // we simply output all docs via their oai_id tag. But if a specific subset IS requested, then
35 // use recurse_set() to traverse any sub classifiers to find the relevant docs.
36 if(gsdlSet == ""){
37 ColInfoResponse_t cinfo;
38 comerror_t err;
39 this->output_content_for_col(output, protocol, gsdlCollect, cinfo, err, params);
40 }
41 else {
42 if (this->check_classifier(protocol, gsdlCollect, gsdlSet)) {
43 this->recurse_set(output, protocol, gsdlCollect, gsdlSet, params, token);
44 }
45 }
46 }
47 // output all records in all hierarchies
48 else {
49 this->output_content_for_all(output, protocol, params);
50 }
51
52 // If - regardless of set required - no documents have been seen, throw an error.
53 if (this->configuration->getOAIVersion() >= 200 && this->prevDocSeen == false) {
54 errorType = "noRecordsMatch";
55 this->output_error(output, errorType);
56
57 return false;
58 }
59
60 // do a resumption token if required; errors cancel a token...
61 if (this->replyToken != NULL && this->errorType == "") {
62 // Don't add any whitespace around the resumption token as it can confuse harvesters/validators
63 output << " <resumptionToken>" << this->replyToken->getToken() << "</resumptionToken>" << endl;
64 }
65
66 return true;
67}
68
69//--------------------------------------------------------------------------------------------------
70
71void abstractlistaction::output_content_for_col(ostream &output, recptproto *protocol, text_t &gsdlCollect,
72 ColInfoResponse_t &cinfo, comerror_t &err, oaiargs &params)
73{ int startDoc = 0;
74
75 // get the collection information
76 protocol->get_collectinfo(gsdlCollect, cinfo, err, *this->logout);
77
78 text_t metadataPrefix = params["metadataPrefix"];
79
80 // check resumption token
81 if (params["resumptionToken"] != "") {
82 ResumptionToken token(params["resumptionToken"]);
83 if (token.getCollection() == gsdlCollect) {
84 startDoc = token.getPosition() - 1; // first document is said to be 1..
85 metadataPrefix = "oai_dc"; // TO DO: This should come from the resumption token
86 }
87 }
88
89 // If numDocs is 0, do nothing - this->prevDocSeen will stay false if this is the only collection
90 // looked at, or will keep whatever value it had prior to this col (ensures that if the flag has
91 // been set to true by a previous collection that this won't overwrite it to be false).
92 if (cinfo.numDocs > 0) {
93 int errorCount = 0; // Count the number of errors found in the given collection
94
95 for (long i = startDoc; i < cinfo.numDocs; ++i) {
96 if (errorCount > 3) { // If num errors reaches the cut-off value, bail.
97 cerr << "Error: too many records(" << errorCount << ") in the " << gsdlCollect
98 << " collection have invalid or non-existant oai_ids - skipping remainder of collection.\n";
99 return;
100 }
101
102 text_t oai_id = "oai.";
103 oai_id += i;
104
105 text_t gsdl_id = oaiclassifier::getGSDL_OID(gsdlCollect, oai_id, protocol, *this->logout);
106
107 if (gsdl_id == "") { // If the string is empty, then the document didn't have an oai_id, so
108 ++errorCount; // increase error count
109 continue;
110 }
111
112
113 // Check that the item with the 0ID 'gsdl_id' has a lastmodified field that fits within
114 // the required date range (if specified).
115 if (this->inDateRange(params["from"], params["until"], gsdlCollect, gsdl_id, protocol, output)) {
116 if (this->output_document(output, protocol, gsdlCollect, gsdl_id, metadataPrefix)) {
117 // this should be an IF statement, where prevDocSeen is only set to true if the above
118 // function call returns true (indicating that the doc supported the metadata prefix) but
119 // for some reason this is always false. This means that if no doc in the requested set supports
120 // the metadata format, the "no records match" error that should be thrown won't be...
121 //
122 // GRB: the above comment is no longer true; proper checks are made
123 this->prevDocSeen = true;
124 ++this->outputDocs;
125 }
126 }
127
128 // if we've output the number of resumption documents; prepare a resumptionToken
129 if (this->outputDocs == this->configuration->resumeAfter()) {
130 this->replyToken = new ResumptionToken(gsdlCollect, "", "");
131 this->replyToken->setPosition("", i+2);
132 break;
133 }
134 }
135
136 cinfo.clear(); // Clear for next collection to use (if there is one).
137 }
138}
139
140//--------------------------------------------------------------------------------------------
141// Returns true if at least one document record is found
142void abstractlistaction::output_content_for_all(ostream &output, recptproto *protocol, oaiargs &params)
143{
144 ColInfoResponse_t cinfo;
145 comerror_t err;
146 text_tarray collections;
147 text_t gsdlCollect = "";
148 ResumptionToken *token = NULL;
149
150 // get a list of the collections available
151 collections = this->configuration->getCollectionsList();
152 // protocol->get_collection_list(collections, err, output);
153
154 if (params["resumptionToken"] != "") {
155 token = new ResumptionToken(params["resumptionToken"]);
156 }
157
158 for(int current_col = 0; current_col < collections.size(); ++current_col){
159 gsdlCollect = collections[current_col];
160
161 // ignore all leading collections before the one that matches the resumptiontoken
162 if (token != NULL &&
163 token->getCollection() != gsdlCollect)
164 { continue;
165 }
166
167 this->output_content_for_col(output, protocol, gsdlCollect, cinfo, err, params);
168
169 // once we've output at least one collection, continue
170 // outputting all others until the resumption total hits
171 token = NULL;
172
173 if (this->outputDocs == this->configuration->resumeAfter()) {
174 break;
175 }
176 }
177}
178
179//-------------------------------------------------------------------------------------------------
180// Check that the requested from/until dates don't include a time, as this would be asking for too
181// fine a level of granularity, one that greenstone doesn't support. An OAI error must be thrown.
182/*
183bool abstractlistaction::granularityTooFine(text_t &from, text_t &until)
184{
185 if (from != "" && from.){
186
187 }
188
189}
190*/
191//-------------------------------------------------------------------------------------------------
192
193bool abstractlistaction::check_classifier(recptproto *protocol, const text_t &collection,
194 const text_t &classifier)
195{ text_t topClass;
196 FilterResponse_t response;
197 text_tset metadata;
198 ofstream logout("oai.log", ios::app);
199
200 // exclude false children of a top-level classifier immediately...
201 if (!get_info(classifier, collection, "", metadata, false, protocol, response, logout)) {
202 return false;
203 }
204
205 // now check the top-level parent
206 metadata.insert("supportsmemberof");
207
208 text_t::const_iterator dot = findchar(classifier.begin(), classifier.end(), '.');
209 if (dot != classifier.end()) {
210 topClass = substr(classifier.begin(), dot);
211 }
212 else {
213 topClass = classifier;
214 }
215
216 if (!get_info(topClass, collection, "", metadata, false, protocol, response, logout)) {
217 return false;
218 }
219
220 if (response.docInfo[0].metadata["supportsmemberof"].values.size() == 0) {
221 return false;
222 }
223
224 if (response.docInfo[0].metadata["supportsmemberof"].values[0] != "true") {
225 return false;
226 }
227
228 return true;
229}
230
231void abstractlistaction::recurse_set(ostream &output, recptproto *protocol, const text_t &collection,
232 const text_t &classifier, oaiargs &params, ResumptionToken *resumptionToken)
233{
234 // metadata for this call
235 FilterResponse_t response;
236 text_tset metadata;
237 ofstream logout("oai.log", ios::app);
238 text_t from = params["from"];
239 text_t until = params["until"];
240 text_t metadataPrefix = params["metadataPrefix"];
241 // ResumptionToken resumptionToken(params["resumptionToken"]);
242 int startPos = 0;
243
244 // This is a recursive function, and so just because the current set is empty doesn't mean we necessarily
245 // want to throw a 'noRecordsMatch' error; another set (parent/sibling/child) may have had documents. It
246 // is therefore not enough to check that the response object in the current iteration has no docs - we
247 // must also verify that NO OTHER set has had any documents. This is done with the 'prevDocSeen' flag.
248 // It is set to FALSE initially, but as soon as we see a set that isn't empty, it is set to TRUE. The
249 // 'noRecordsMatch' error will only be thrown if, after all appropriate sets have been recursed into,
250 // the 'prevDocSeen' flag is still FALSE. The function returns false if no docs were seen, allowing us to
251 // throw the noRecordsMatch error.
252
253 // bool prevDocSeen = false;
254
255 get_children(classifier, collection, "", metadata, false, protocol, response, *this->logout);
256
257 if (params["resumptionToken"] != "") {
258 // if we're at a resumptionToken
259 if (classifier == resumptionToken->getNode()) {
260 startPos = resumptionToken->getPosition();
261 }
262 else {
263 text_t fullNode = resumptionToken->getNode();
264 text_t::iterator leafIter = fullNode.begin() + classifier.size();
265
266 // if the next character isn't a dot, blow up!
267 if (*leafIter != '.') {
268 // fatal error;
269 exit(1);
270 }
271
272 // get the first '.' after the current classifier point;
273 text_t::iterator separator = findchar(leafIter + 1, fullNode.end(), '.');
274
275 // now, create a new subpath
276 text_t nextNode = substr(fullNode.begin(), separator);
277
278 // seek forward; TODO: improve performance of this
279 for (int c = 0; c < response.numDocs; ++c) {
280 if (response.docInfo[c].OID == nextNode) {
281 startPos = c;
282 break;
283 }
284 }
285 }
286
287 // We need to subtract one from the startPos value to turn it into an index value
288 startPos--;
289 }
290
291 for (int c = startPos; c < response.numDocs; ++c) {
292 text_t child = response.docInfo[c].OID;
293
294 // distinguish classifiers and documents by checking whether OID
295 // starts with CL or not
296 text_t childHead;
297 text_t::const_iterator start = child.begin();
298 text_t::const_iterator here = child.begin();
299 here += 2;
300 childHead = substr(start, here);
301
302 // documents we output now
303 if (childHead != "CL") {
304 // Check that the item with the 0ID 'gsdl_id' has a lastmodified field that fits
305 // within the required date range (if specified)
306 if (this->inDateRange(from, until, collection, child, protocol, output)) {
307 // TODO: check that the document can be disseminated in the required metadataPrefix
308
309 if (this->output_document(output, protocol, collection, child, metadataPrefix)) {
310 this->prevDocSeen = true;
311 ++this->outputDocs;
312 }
313 }
314 }
315 // children which are classifiers are recursed
316 else {
317 if (resumptionToken != NULL) {
318 int depth = countchar(classifier.begin(), classifier.end(), '.');
319 resumptionToken->setOffset(depth, c+2);
320 }
321 this->recurse_set(output, protocol, collection, child, params, resumptionToken);
322 }
323
324 if (this->outputDocs == this->configuration->resumeAfter()) {
325 this->replyToken = new ResumptionToken(collection, params["set"], "");
326 this->replyToken->setPosition(classifier, c+2);
327 break;
328 }
329 }
330}
331
332
333
334
Note: See TracBrowser for help on using the repository browser.