source: gsdl/trunk/runtime-src/src/oaiservr/abstractlistaction.cpp@ 20628

Last change on this file since 20628 was 20628, checked in by mdewsnip, 15 years ago

Now only uses cinfo.buildDate in the resumption token if a set has been specified (ie. we're only looking through one collection).

  • Property svn:keywords set to Author Date Id Revision
File size: 7.0 KB
Line 
1#include "abstractlistaction.h"
2#include "OIDtools.h"
3#include "recptprototools.h"
4
5#include "oaitools.h"
6
7
8bool abstractlistaction::check_classifier(recptproto *protocol, const text_t &collection, const text_t &set_name)
9{
10 text_tset metadata;
11 FilterResponse_t response;
12 return get_info(set_name, collection, "", metadata, false, protocol, response, *this->logout);
13}
14
15
16bool abstractlistaction::output_content(ostream &output, recptproto *protocol, oaiargs &params)
17{
18 // Reset variables
19 this->output_docs = 0;
20
21 text_t set_name = params["set"];
22 text_t position = "";
23
24 // Process the resumptionToken if there is one
25 if (params["resumptionToken"] != "")
26 {
27 ResumptionToken resumption_token(params["resumptionToken"]);
28 set_name = resumption_token.getSet();
29 position = resumption_token.getPosition();
30 }
31
32 // Case for "set" argument present -- output just the records in the specified set
33 if (set_name != "")
34 {
35 // Separate the collection name and Greenstone classifier OID from the set name
36 text_t collection_name = "";
37 text_t gsdl_classifier_OID = set_name;
38 oaiclassifier::toGSDL(collection_name, gsdl_classifier_OID);
39
40 // If output_content_for_set() returns false a resumption token has been output, so it's time to stop
41 if (output_content_for_set(output, protocol, params, collection_name, gsdl_classifier_OID, set_name) == false)
42 {
43 return true;
44 }
45 }
46
47 // Case for no "set" argument present -- output all records in all collections
48 else
49 {
50 // Get a list of the collections available
51 text_tarray& collections = this->configuration->getCollectionsList();
52 if (collections.size() == 0)
53 {
54 return false;
55 }
56
57 // Get the current collection from the position value
58 text_t collection_name = "";
59 oaiclassifier::toGSDL(collection_name, position);
60
61 // Find the starting collection
62 text_tarray::iterator collection_iterator = collections.begin();
63 while (collection_iterator != collections.end())
64 {
65 if (collection_name == "" || collection_name == *collection_iterator)
66 {
67 break;
68 }
69
70 collection_iterator++;
71 }
72
73 // Now loop through the remaining collections
74 while (collection_iterator != collections.end())
75 {
76 // If output_content_for_set() returns false a resumption token has been output, so it's time to stop
77 if (output_content_for_set(output, protocol, params, *collection_iterator, "", "") == false)
78 {
79 return true;
80 }
81
82 collection_iterator++;
83 }
84 }
85
86 // If no records were output throw an error
87 if (this->configuration->getOAIVersion() >= 200 && this->output_docs == 0)
88 {
89 errorType = "noRecordsMatch";
90 this->output_error(output, errorType);
91 return false;
92 }
93
94 return true;
95}
96
97
98bool abstractlistaction::output_content_for_set(ostream &output, recptproto *protocol, oaiargs &params, text_t collection_name, text_t gsdl_classifier_OID, text_t set_name)
99{
100 // Check if the set is actually a collection
101 if (gsdl_classifier_OID == "")
102 {
103 gsdl_classifier_OID = "oai";
104 }
105
106 text_t metadata_prefix = params["metadataPrefix"];
107 text_t from = params["from"];
108 text_t until = params["until"];
109 text_t position = "";
110
111 // Process the resumptionToken if there is one
112 if (params["resumptionToken"] != "")
113 {
114 ResumptionToken resumption_token(params["resumptionToken"]);
115 metadata_prefix = resumption_token.getMetadataPrefix();
116 from = resumption_token.getFrom();
117 until = resumption_token.getUntil();
118 position = resumption_token.getPosition();
119 }
120
121 // Get the list of identifiers in this collection
122 // Collections should not contain too many identifiers otherwise this will use a lot of time and memory
123 text_tset metadata; // Must be empty for efficiency
124 FilterResponse_t identifiers_response;
125 get_children(gsdl_classifier_OID, collection_name, "", metadata, false, protocol, identifiers_response, *this->logout);
126
127 // Find the starting position, if necessary
128 ResultDocInfo_tarray::iterator identifier_iterator = identifiers_response.docInfo.begin();
129 if (output_docs == 0)
130 {
131 while (identifier_iterator != identifiers_response.docInfo.end())
132 {
133 if (position == "" || position == (collection_name + ":" + (*identifier_iterator).OID))
134 {
135 break;
136 }
137
138 identifier_iterator++;
139 }
140 }
141
142 // Now loop through displaying the next matching records
143 while (identifier_iterator != identifiers_response.docInfo.end())
144 {
145 position = (*identifier_iterator).OID;
146
147 text_t document_OID = position;
148 if (starts_with(document_OID, "oai."))
149 {
150 document_OID = oaiclassifier::getGSDL_OID(collection_name, document_OID, protocol, *this->logout);
151 }
152
153 // Check this OID is in the (optional) date range specified
154 if (this->in_date_range(output, protocol, params, collection_name, document_OID, from, until))
155 {
156 // If we've output the desired number of records return a resumptionToken and we're done
157 if (this->output_docs == this->configuration->resumeAfter())
158 {
159 // If a set has been specified, we can use the collection's buildDate in the resumption token
160 text_t date_stamp = "";
161 if (set_name != "")
162 {
163 ColInfoResponse_t cinfo;
164 comerror_t err;
165 protocol->get_collectinfo(collection_name, cinfo, err, cerr);
166 date_stamp = cinfo.buildDate;
167 }
168
169 ResumptionToken resumption_token(date_stamp, set_name, metadata_prefix, from, until, collection_name + ":" + position);
170
171 // Don't add any whitespace around the resumption token as it can confuse harvesters/validators
172 output << " <resumptionToken>" << resumption_token.getResumptionTokenString() << "</resumptionToken>" << endl;
173 return false;
174 }
175
176 // Otherwise output this record and increment the count
177 this->output_document(output, protocol, collection_name, document_OID, metadata_prefix);
178 this->output_docs++;
179 }
180
181 identifier_iterator++;
182 }
183
184 return true;
185}
186
187
188bool abstractlistaction::in_date_range(ostream &output, recptproto *protocol, oaiargs &params,
189 text_t& collection, text_t oai_OID, text_t from, text_t until)
190{
191 // If no "from" or "until" value is specified every record matches, so we don't need to go any further
192 if (from == "" && until == "")
193 {
194 return true;
195 }
196
197 // Get the datestamp from the document as sections do not have this metadata
198 text_t document_OID;
199 get_top(oai_OID, document_OID);
200
201 // Request the lastmodified value for this document
202 text_tset metadata;
203 metadata.insert("lastmodified");
204 FilterResponse_t response;
205 if (!get_info(document_OID, collection, "", metadata, false, protocol, response, *this->logout))
206 {
207 return false;
208 }
209
210 text_t last_modified_date;
211 this->getLastModifiedDate(response.docInfo[0], last_modified_date);
212
213 // Check this record is not before the "from" value, if it exists
214 if (from != "" && last_modified_date < from)
215 {
216 // Too early
217 return false;
218 }
219
220 // Check this record is not after the "until" value, if it exists
221 if (until != "" && last_modified_date > until)
222 {
223 // Too late
224 return false;
225 }
226
227 // Just right
228 return true;
229}
Note: See TracBrowser for help on using the repository browser.