source: gsdl/trunk/runtime-src/src/oaiservr/abstractlistaction.cpp@ 16712

Last change on this file since 16712 was 16712, checked in by mdewsnip, 16 years ago

Removed the abstractlistaction::validateAction() function and duplicated the code in listidsaction::validateAction() and listrecsaction::validateAction(), since the handling of the metadataPrefix is slightly different.

  • Property svn:keywords set to Author Date Id Revision
File size: 12.1 KB
Line 
1#include "abstractlistaction.h"
2#include "recptprototools.h"
3
4#include "oaitools.h"
5
6//--------------------------------------------------------------------------------------------------
7
8bool abstractlistaction::output_content(ostream &output, recptproto *protocol, oaiargs &params)
9{
10 text_t from = params["from"];
11 text_t until = params["until"];
12 text_t metaFormat = params["metadataPrefix"];
13 bool prevDocSeen;
14 ResumptionToken *token = NULL;
15
16 // start the call; clear down the total number of output documents
17 this->outputDocs = 0;
18
19 // We don't actually handle resumptionTokens yet; if we get one, ignore it
20 if (params["resumptionToken"] != "") {
21 token = new ResumptionToken(params["resumptionToken"]);
22 }
23
24 this->replyToken = NULL;
25
26 // if we've been asked for a set, then use it!
27 if (params["set"] != "") {
28 // get the children of this set
29 text_t gsdlSet = params["set"];
30 text_t gsdlCollect = "";
31
32 // given 'demo:CL2', toGSDL returns 'demo' in gsdlCollect and 'CL2' in gsdlSet. If there is no further
33 // set specified after the name of the collection however, then gsdlSet is empty.
34 oaiclassifier::toGSDL(gsdlCollect, gsdlSet);
35
36 // If gsdlSet is empty, then the user is requesting all the identifiers for the collection, so
37 // we simply output all docs via their oai_id tag. But if a specific subset IS requested, then
38 // use recurse_set() to traverse any sub classifiers to find the relevant docs.
39 if(gsdlSet == ""){
40 ColInfoResponse_t cinfo;
41 comerror_t err;
42 this->output_content_for_col(output, protocol, gsdlCollect, cinfo, err, params);
43 }
44 else {
45 if (this->check_classifier(protocol, gsdlCollect, gsdlSet)) {
46 this->recurse_set(output, protocol, gsdlCollect, gsdlSet, params, token);
47 }
48 }
49 }
50 // output all records in all hierarchies
51 else {
52 this->output_content_for_all(output, protocol, params);
53 }
54
55 // If - regardless of set required - no documents have been seen, throw an error.
56 if (this->configuration->getOAIVersion() >= 200 && this->prevDocSeen == false) {
57 errorType = "noRecordsMatch";
58 this->output_error(output, errorType);
59
60 return false;
61 }
62
63 // do a resumption token if required; errors cancel a token...
64 if (this->replyToken != NULL && this->errorType == "") {
65 // Don't add any whitespace around the resumption token as it can confuse harvesters/validators
66 output << " <resumptionToken>" << this->replyToken->getToken() << "</resumptionToken>" << endl;
67 }
68
69 return true;
70}
71
72//--------------------------------------------------------------------------------------------------
73
74void abstractlistaction::output_content_for_col(ostream &output, recptproto *protocol, text_t &gsdlCollect,
75 ColInfoResponse_t &cinfo, comerror_t &err, oaiargs &params)
76{ int startDoc = 0;
77
78 // get the collection information
79 protocol->get_collectinfo(gsdlCollect, cinfo, err, *this->logout);
80
81 // check resumption token
82 if (params["resumptionToken"] != "") {
83 ResumptionToken token(params["resumptionToken"]);
84 if (token.getCollection() == gsdlCollect) {
85 startDoc = token.getPosition() - 1; // first document is said to be 1..
86 }
87 }
88
89 // If numDocs is 0, do nothing - this->prevDocSeen will stay false if this is the only collection
90 // looked at, or will keep whatever value it had prior to this col (ensures that if the flag has
91 // been set to true by a previous collection that this won't overwrite it to be false).
92 if (cinfo.numDocs > 0) {
93 int errorCount = 0; // Count the number of errors found in the given collection
94 text_t from = params["from"];
95 text_t until = params["until"];
96
97 for (long i = startDoc; i < cinfo.numDocs; ++i) {
98 if (errorCount > 3) { // If num errors reaches the cut-off value, bail.
99 cerr << "Error: too many records(" << errorCount << ") in the " << gsdlCollect
100 << " collection have invalid or non-existant oai_ids - skipping remainder of collection.\n";
101 return;
102 }
103
104 text_t oai_id = "oai.";
105 oai_id += i;
106
107 text_t gsdl_id = oaiclassifier::getGSDL_OID(gsdlCollect, oai_id, protocol, *this->logout);
108
109 if (gsdl_id == "") { // If the string is empty, then the document didn't have an oai_id, so
110 ++errorCount; // increase error count
111 continue;
112 }
113
114
115 // Check that the item with the 0ID 'gsdl_id' has a lastmodified field that fits within
116 // the required date range (if specified).
117 if (this->inDateRange(params["from"], params["until"], gsdlCollect, gsdl_id, protocol, output)) {
118 if (this->output_document(output, protocol, gsdlCollect, gsdl_id, params["metadataPrefix"])) {
119 // this should be an IF statement, where prevDocSeen is only set to true if the above
120 // function call returns true (indicating that the doc supported the metadata prefix) but
121 // for some reason this is always false. This means that if no doc in the requested set supports
122 // the metadata format, the "no records match" error that should be thrown won't be...
123 //
124 // GRB: the above comment is no longer true; proper checks are made
125 this->prevDocSeen = true;
126 ++this->outputDocs;
127 }
128 }
129
130 // if we've output the number of resumption documents; prepare a resumptionToken
131 if (this->outputDocs == this->configuration->resumeAfter()) {
132 this->replyToken = new ResumptionToken(gsdlCollect, "", "");
133 this->replyToken->setPosition("", i+2);
134 break;
135 }
136 }
137
138 cinfo.clear(); // Clear for next collection to use (if there is one).
139 }
140}
141
142//--------------------------------------------------------------------------------------------
143// Returns true if at least one document record is found
144void abstractlistaction::output_content_for_all(ostream &output, recptproto *protocol, oaiargs &params)
145{
146 ColInfoResponse_t cinfo;
147 comerror_t err;
148 text_tarray collections;
149 text_t gsdlCollect = "";
150 ResumptionToken *token = NULL;
151
152 // get a list of the collections available
153 collections = this->configuration->getCollectionsList();
154 // protocol->get_collection_list(collections, err, output);
155
156 if (params["resumptionToken"] != "") {
157 token = new ResumptionToken(params["resumptionToken"]);
158 }
159
160 for(int current_col = 0; current_col < collections.size(); ++current_col){
161 gsdlCollect = collections[current_col];
162
163 // ignore all leading collections before the one that matches the resumptiontoken
164 if (token != NULL &&
165 token->getCollection() != gsdlCollect)
166 { continue;
167 }
168
169 this->output_content_for_col(output, protocol, gsdlCollect, cinfo, err, params);
170
171 // once we've output at least one collection, continue
172 // outputting all others until the resumption total hits
173 token = NULL;
174
175 if (this->outputDocs == this->configuration->resumeAfter()) {
176 break;
177 }
178 }
179}
180
181//-------------------------------------------------------------------------------------------------
182// Check that the requested from/until dates don't include a time, as this would be asking for too
183// fine a level of granularity, one that greenstone doesn't support. An OAI error must be thrown.
184/*
185bool abstractlistaction::granularityTooFine(text_t &from, text_t &until)
186{
187 if (from != "" && from.){
188
189 }
190
191}
192*/
193//-------------------------------------------------------------------------------------------------
194
195bool abstractlistaction::check_classifier(recptproto *protocol, const text_t &collection,
196 const text_t &classifier)
197{ text_t topClass;
198 FilterResponse_t response;
199 text_tset metadata;
200 ofstream logout("oai.log", ios::app);
201
202 // exclude false children of a top-level classifier immediately...
203 if (!get_info(classifier, collection, "", metadata, false, protocol, response, logout)) {
204 return false;
205 }
206
207 // now check the top-level parent
208 metadata.insert("supportsmemberof");
209
210 text_t::const_iterator dot = findchar(classifier.begin(), classifier.end(), '.');
211 if (dot != classifier.end()) {
212 topClass = substr(classifier.begin(), dot);
213 }
214 else {
215 topClass = classifier;
216 }
217
218 if (!get_info(topClass, collection, "", metadata, false, protocol, response, logout)) {
219 return false;
220 }
221
222 if (response.docInfo[0].metadata["supportsmemberof"].values.size() == 0) {
223 return false;
224 }
225
226 if (response.docInfo[0].metadata["supportsmemberof"].values[0] != "true") {
227 return false;
228 }
229
230 return true;
231}
232
233void abstractlistaction::recurse_set(ostream &output, recptproto *protocol, const text_t &collection,
234 const text_t &classifier, oaiargs &params, ResumptionToken *resumptionToken)
235{
236 // metadata for this call
237 FilterResponse_t response;
238 text_tset metadata;
239 ofstream logout("oai.log", ios::app);
240 text_t from = params["from"];
241 text_t until = params["until"];
242 text_t metadataPrefix = params["metadataPrefix"];
243 // ResumptionToken resumptionToken(params["resumptionToken"]);
244 int startPos = 0;
245
246 // This is a recursive function, and so just because the current set is empty doesn't mean we necessarily
247 // want to throw a 'noRecordsMatch' error; another set (parent/sibling/child) may have had documents. It
248 // is therefore not enough to check that the response object in the current iteration has no docs - we
249 // must also verify that NO OTHER set has had any documents. This is done with the 'prevDocSeen' flag.
250 // It is set to FALSE initially, but as soon as we see a set that isn't empty, it is set to TRUE. The
251 // 'noRecordsMatch' error will only be thrown if, after all appropriate sets have been recursed into,
252 // the 'prevDocSeen' flag is still FALSE. The function returns false if no docs were seen, allowing us to
253 // throw the noRecordsMatch error.
254
255 // bool prevDocSeen = false;
256
257 get_children(classifier, collection, "", metadata, false, protocol, response, *this->logout);
258
259 if (params["resumptionToken"] != "") {
260 // if we're at a resumptionToken
261 if (classifier == resumptionToken->getNode()) {
262 startPos = resumptionToken->getPosition();
263 }
264 else {
265 text_t fullNode = resumptionToken->getNode();
266 text_t::iterator leafIter = fullNode.begin() + classifier.size();
267
268 // if the next character isn't a dot, blow up!
269 if (*leafIter != '.') {
270 // fatal error;
271 exit(1);
272 }
273
274 // get the first '.' after the current classifier point;
275 text_t::iterator separator = findchar(leafIter + 1, fullNode.end(), '.');
276
277 // now, create a new subpath
278 text_t nextNode = substr(fullNode.begin(), separator);
279
280 // seek forward; TODO: improve performance of this
281 for (int c = 0; c < response.numDocs; ++c) {
282 if (response.docInfo[c].OID == nextNode) {
283 startPos = c;
284 break;
285 }
286 }
287 }
288
289 // We need to subtract one from the startPos value to turn it into an index value
290 startPos--;
291 }
292
293 for (int c = startPos; c < response.numDocs; ++c) {
294 text_t child = response.docInfo[c].OID;
295
296 // distinguish classifiers and documents by checking whether OID
297 // starts with CL or not
298 text_t childHead;
299 text_t::const_iterator start = child.begin();
300 text_t::const_iterator here = child.begin();
301 here += 2;
302 childHead = substr(start, here);
303
304 // documents we output now
305 if (childHead != "CL") {
306 // Check that the item with the 0ID 'gsdl_id' has a lastmodified field that fits
307 // within the required date range (if specified)
308 if (this->inDateRange(from, until, collection, child, protocol, output)) {
309 // TODO: check that the document can be disseminated in the required metadataPrefix
310
311 if (this->output_document(output, protocol, collection, child, metadataPrefix)) {
312 this->prevDocSeen = true;
313 ++this->outputDocs;
314 }
315 }
316 }
317 // children which are classifiers are recursed
318 else {
319 if (resumptionToken != NULL) {
320 int depth = countchar(classifier.begin(), classifier.end(), '.');
321 resumptionToken->setOffset(depth, c+2);
322 }
323 this->recurse_set(output, protocol, collection, child, params, resumptionToken);
324 }
325
326 if (this->outputDocs == this->configuration->resumeAfter()) {
327 this->replyToken = new ResumptionToken(collection, params["set"], "");
328 this->replyToken->setPosition(classifier, c+2);
329 break;
330 }
331 }
332}
333
334
335
336
Note: See TracBrowser for help on using the repository browser.