source: gsdl/trunk/runtime-src/src/oaiservr/abstractlistaction.cpp@ 20590

Last change on this file since 20590 was 20590, checked in by mdewsnip, 15 years ago

Completely rewrote the resumption token support, as its buginess finally tipped the "I can't stand it any more" scale...

  • Property svn:keywords set to Author Date Id Revision
File size: 6.9 KB
Line 
1#include "abstractlistaction.h"
2#include "OIDtools.h"
3#include "recptprototools.h"
4
5#include "oaitools.h"
6
7
8bool abstractlistaction::check_classifier(recptproto *protocol, const text_t &collection, const text_t &set_name)
9{
10 text_tset metadata;
11 FilterResponse_t response;
12 return get_info(set_name, collection, "", metadata, false, protocol, response, *this->logout);
13}
14
15
16bool abstractlistaction::output_content(ostream &output, recptproto *protocol, oaiargs &params)
17{
18 // Reset variables
19 this->output_docs = 0;
20
21 text_t set_name = params["set"];
22 text_t position = params["position"];
23
24 // Process the resumptionToken if there is one
25 if (params["resumptionToken"] != "")
26 {
27 ResumptionToken resumption_token(params["resumptionToken"]);
28 set_name = resumption_token.getSet();
29 position = resumption_token.getPosition();
30 }
31
32 // Case for "set" argument present -- output just the records in the specified set
33 if (set_name != "")
34 {
35 // Separate the collection name and Greenstone classifier OID from the set name
36 text_t collection_name = "";
37 text_t gsdl_classifier_OID = set_name;
38 oaiclassifier::toGSDL(collection_name, gsdl_classifier_OID);
39
40 // If output_content_for_set() returns false a resumption token has been output, so it's time to stop
41 if (output_content_for_set(output, protocol, params, collection_name, gsdl_classifier_OID, set_name) == false)
42 {
43 return true;
44 }
45 }
46
47 // Case for no "set" argument present -- output all records in all collections
48 else
49 {
50 // Get a list of the collections available
51 text_tarray& collections = this->configuration->getCollectionsList();
52 if (collections.size() == 0)
53 {
54 return false;
55 }
56
57 // Get the current collection from the position value
58 text_t collection_name = "";
59 oaiclassifier::toGSDL(collection_name, position);
60
61 // Find the starting collection
62 text_tarray::iterator collection_iterator = collections.begin();
63 while (collection_iterator != collections.end())
64 {
65 if (collection_name == "" || collection_name == *collection_iterator)
66 {
67 break;
68 }
69
70 collection_iterator++;
71 }
72
73 // Now loop through the remaining collections
74 while (collection_iterator != collections.end())
75 {
76 // If output_content_for_set() returns false a resumption token has been output, so it's time to stop
77 if (output_content_for_set(output, protocol, params, *collection_iterator, "", "") == false)
78 {
79 return true;
80 }
81
82 collection_iterator++;
83 }
84 }
85
86 // If no records were output throw an error
87 if (this->configuration->getOAIVersion() >= 200 && this->output_docs == 0)
88 {
89 errorType = "noRecordsMatch";
90 this->output_error(output, errorType);
91 return false;
92 }
93
94 return true;
95}
96
97
98bool abstractlistaction::output_content_for_set(ostream &output, recptproto *protocol, oaiargs &params, text_t collection_name, text_t gsdl_classifier_OID, text_t set_name)
99{
100 // Check if the set is actually a collection
101 if (gsdl_classifier_OID == "")
102 {
103 gsdl_classifier_OID = "oai";
104 }
105
106 text_t metadata_prefix = params["metadataPrefix"];
107 text_t from = params["from"];
108 text_t until = params["until"];
109 text_t position = "";
110
111 // Process the resumptionToken if there is one
112 if (params["resumptionToken"] != "")
113 {
114 ResumptionToken resumption_token(params["resumptionToken"]);
115 metadata_prefix = resumption_token.getMetadataPrefix();
116 from = resumption_token.getFrom();
117 until = resumption_token.getUntil();
118 position = resumption_token.getPosition();
119 }
120
121 // Get the list of identifiers in this collection
122 // Collections should not contain too many identifiers otherwise this will use a lot of time and memory
123 text_tset metadata; // Must be empty for efficiency
124 FilterResponse_t identifiers_response;
125 get_children(gsdl_classifier_OID, collection_name, "", metadata, false, protocol, identifiers_response, *this->logout);
126
127 // Find the starting position, if necessary
128 ResultDocInfo_tarray::iterator identifier_iterator = identifiers_response.docInfo.begin();
129 if (output_docs == 0)
130 {
131 while (identifier_iterator != identifiers_response.docInfo.end())
132 {
133 if (position == "" || position == (collection_name + ":" + (*identifier_iterator).OID))
134 {
135 break;
136 }
137
138 identifier_iterator++;
139 }
140 }
141
142 // Now loop through displaying the next matching records
143 while (identifier_iterator != identifiers_response.docInfo.end())
144 {
145 position = (*identifier_iterator).OID;
146
147 text_t document_OID = position;
148 if (starts_with(document_OID, "oai."))
149 {
150 document_OID = oaiclassifier::getGSDL_OID(collection_name, document_OID, protocol, *this->logout);
151 }
152
153 // Check this OID is in the (optional) date range specified
154 if (this->in_date_range(output, protocol, params, collection_name, document_OID, from, until))
155 {
156 // If we've output the desired number of records return a resumptionToken and we're done
157 if (this->output_docs == this->configuration->resumeAfter())
158 {
159 // Get the buildDate from the build.cfg file
160 ColInfoResponse_t cinfo;
161 comerror_t err;
162 protocol->get_collectinfo(collection_name, cinfo, err, cerr);
163
164 ResumptionToken resumption_token(cinfo.buildDate, set_name, metadata_prefix, from, until, collection_name + ":" + position);
165
166 // Don't add any whitespace around the resumption token as it can confuse harvesters/validators
167 output << " <resumptionToken>" << resumption_token.getResumptionTokenString() << "</resumptionToken>" << endl;
168 return false;
169 }
170
171 // Otherwise output this record and increment the count
172 this->output_document(output, protocol, collection_name, document_OID, metadata_prefix);
173 this->output_docs++;
174 }
175
176 identifier_iterator++;
177 }
178
179 return true;
180}
181
182
183bool abstractlistaction::in_date_range(ostream &output, recptproto *protocol, oaiargs &params,
184 text_t& collection, text_t oai_OID, text_t from, text_t until)
185{
186 // If no "from" or "until" value is specified every record matches, so we don't need to go any further
187 if (from == "" && until == "")
188 {
189 return true;
190 }
191
192 // Get the datestamp from the document as sections do not have this metadata
193 text_t document_OID;
194 get_top(oai_OID, document_OID);
195
196 // Request the lastmodified value for this document
197 text_tset metadata;
198 metadata.insert("lastmodified");
199 FilterResponse_t response;
200 if (!get_info(document_OID, collection, "", metadata, false, protocol, response, *this->logout))
201 {
202 return false;
203 }
204
205 text_t last_modified_date;
206 this->getLastModifiedDate(response.docInfo[0], last_modified_date);
207
208 // Check this record is not before the "from" value, if it exists
209 if (from != "" && last_modified_date < from)
210 {
211 // Too early
212 return false;
213 }
214
215 // Check this record is not after the "until" value, if it exists
216 if (until != "" && last_modified_date > until)
217 {
218 // Too late
219 return false;
220 }
221
222 // Just right
223 return true;
224}
Note: See TracBrowser for help on using the repository browser.