source: main/trunk/greenstone2/runtime-src/src/oaiservr/abstractlistaction.cpp@ 27528

Last change on this file since 27528 was 27528, checked in by kjdon, 11 years ago

implemented oaisupercollection. add to oai.cfg and the server will make a new set containing the specified collections

  • Property svn:keywords set to Author Date Id Revision
File size: 8.7 KB
Line 
1/**********************************************************************
2 *
3 * abstractlistaction.cpp --
4 *
5 * Copyright (C) 2004-2010 The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26
27#include "abstractlistaction.h"
28#include "OIDtools.h"
29#include "recptprototools.h"
30
31#include "oaitools.h"
32
33
34bool abstractlistaction::check_classifier(recptproto *protocol, const text_t &collection, const text_t &set_name)
35{
36 text_tset metadata;
37 FilterResponse_t response;
38 return get_info(set_name, collection, "", metadata, false, protocol, response, *this->logout);
39}
40
41
42bool abstractlistaction::output_content(ostream &output, recptproto *protocol, oaiargs &params)
43{
44 // Reset variables
45 this->output_docs = 0;
46
47 text_t set_name = params["set"];
48 text_t position = "";
49
50 // Process the resumptionToken if there is one
51 if (params["resumptionToken"] != "")
52 {
53 ResumptionToken resumption_token(params["resumptionToken"]);
54 set_name = resumption_token.getSet();
55 position = resumption_token.getPosition();
56 }
57
58 // is it a super collection??
59 bool is_super_collection = false;
60
61 if (set_name != "")
62 {
63 text_tarray super_colls = this->configuration->getSuperCollectionsList();
64 for (int s = 0; s<super_colls.size(); s++) {
65 if (set_name == super_colls[s]) {
66 is_super_collection = true;
67 break;
68 }
69 }
70 }
71
72 // Case for "set" argument present that is not a super collection -- output just the records in the specified set
73 if (set_name != "" && !is_super_collection) {
74 // Separate the collection name and Greenstone classifier OID from the set name
75 text_t collection_name = "";
76 text_t gsdl_classifier_OID = set_name;
77 oaiclassifier::toGSDL(collection_name, gsdl_classifier_OID);
78
79 // If output_content_for_set() returns false a resumption token has been output, so it's time to stop
80 if (output_content_for_set(output, protocol, params, collection_name, gsdl_classifier_OID, set_name) == false)
81 {
82 return true;
83 }
84 }
85
86 // Case for no "set" argument present -- output all records in all collections
87 // or we are a super collection
88 else
89 {
90 // Get a list of the collections available
91 text_tarray collections;
92
93 if (is_super_collection) {
94 collections = this->configuration->getSuperCollectionCollections(set_name);
95 } else {
96 collections = this->configuration->getCollectionsList();
97 }
98 if (collections.size() == 0)
99 {
100 return false;
101 }
102
103 // Get the current collection from the position value
104 text_t collection_name = "";
105 oaiclassifier::toGSDL(collection_name, position);
106
107 // Find the starting collection
108 text_tarray::iterator collection_iterator = collections.begin();
109 while (collection_iterator != collections.end())
110 {
111 if (collection_name == "" || collection_name == *collection_iterator)
112 {
113 break;
114 }
115
116 collection_iterator++;
117 }
118
119 // Now loop through the remaining collections
120 while (collection_iterator != collections.end())
121 {
122 // If output_content_for_set() returns false a resumption token has been output, so it's time to stop
123 if (output_content_for_set(output, protocol, params, *collection_iterator, "", "") == false)
124 {
125 return true;
126 }
127
128 collection_iterator++;
129 }
130 }
131
132 // If no records were output throw an error
133 if (this->configuration->getOAIVersion() >= 200 && this->output_docs == 0)
134 {
135 errorType = "noRecordsMatch";
136 this->output_error(output, errorType);
137 return false;
138 }
139
140 return true;
141}
142
143bool abstractlistaction::output_content_for_set(ostream &output, recptproto *protocol, oaiargs &params, text_t collection_name, text_t gsdl_classifier_OID, text_t set_name)
144{
145 // Check if the set is actually a collection
146 if (gsdl_classifier_OID == "")
147 {
148 gsdl_classifier_OID = "oai";
149 }
150
151 text_t metadata_prefix = params["metadataPrefix"];
152 text_t from = params["from"];
153 text_t until = params["until"];
154 text_t position = "";
155
156 // Process the resumptionToken if there is one
157 if (params["resumptionToken"] != "")
158 {
159 ResumptionToken resumption_token(params["resumptionToken"]);
160 metadata_prefix = resumption_token.getMetadataPrefix();
161 from = resumption_token.getFrom();
162 until = resumption_token.getUntil();
163 position = resumption_token.getPosition();
164 }
165
166 // Get the list of identifiers in this collection
167 // Collections should not contain too many identifiers otherwise this will use a lot of time and memory
168 text_tset metadata; // Must be empty for efficiency
169 FilterResponse_t identifiers_response;
170 get_children(gsdl_classifier_OID, collection_name, "", metadata, false, protocol, identifiers_response, *this->logout);
171
172 // Find the starting position, if necessary
173 ResultDocInfo_tarray::iterator identifier_iterator = identifiers_response.docInfo.begin();
174 if (output_docs == 0)
175 {
176 while (identifier_iterator != identifiers_response.docInfo.end())
177 {
178 if (position == "" || position == (collection_name + ":" + (*identifier_iterator).OID))
179 {
180 break;
181 }
182
183 identifier_iterator++;
184 }
185 }
186
187 // Now loop through displaying the next matching records
188 while (identifier_iterator != identifiers_response.docInfo.end())
189 {
190 position = (*identifier_iterator).OID;
191
192 text_t document_OID = position;
193 if (starts_with(document_OID, "oai."))
194 {
195 document_OID = oaiclassifier::getGSDL_OID(collection_name, document_OID, protocol, *this->logout);
196 }
197
198 // Check this OID is in the (optional) date range specified
199 if (this->in_date_range(output, protocol, params, collection_name, document_OID, from, until))
200 {
201 // If we've output the desired number of records return a resumptionToken and we're done
202 if (this->output_docs == this->configuration->resumeAfter())
203 {
204 // If a set has been specified, we can use the collection's buildDate in the resumption token
205 text_t date_stamp = "";
206 if (set_name != "")
207 {
208 ColInfoResponse_t cinfo;
209 comerror_t err;
210 protocol->get_collectinfo(collection_name, cinfo, err, cerr);
211 date_stamp = cinfo.buildDate;
212 }
213
214 ResumptionToken resumption_token(date_stamp, set_name, metadata_prefix, from, until, collection_name + ":" + position);
215
216 // Don't add any whitespace around the resumption token as it can confuse harvesters/validators
217 output << " <resumptionToken>" << resumption_token.getResumptionTokenString() << "</resumptionToken>" << endl;
218 return false;
219 }
220
221 // Otherwise output this record and increment the count
222 this->output_document(output, protocol, collection_name, document_OID, metadata_prefix);
223 this->output_docs++;
224 }
225
226 identifier_iterator++;
227 }
228
229 return true;
230}
231
232
233bool abstractlistaction::in_date_range(ostream &output, recptproto *protocol, oaiargs &params,
234 text_t& collection, text_t oai_OID, text_t from, text_t until)
235{
236 // If no "from" or "until" value is specified every record matches, so we don't need to go any further
237 if (from == "" && until == "")
238 {
239 return true;
240 }
241
242 // Get the datestamp from the document as sections do not have this metadata
243 text_t document_OID;
244 get_top(oai_OID, document_OID);
245
246 // Request the oailastmodified value for this document
247 text_tset metadata;
248 metadata.insert("oailastmodified");
249 metadata.insert("gs.OAIDateStamp");
250 FilterResponse_t response;
251 if (!get_info(document_OID, collection, "", metadata, false, protocol, response, *this->logout))
252 {
253 return false;
254 }
255
256 text_t last_modified_date;
257 this->getLastModifiedDate(response.docInfo[0], last_modified_date);
258
259 // Check this record is not before the "from" value, if it exists
260 if (from != "" && last_modified_date < from)
261 {
262 // Too early
263 return false;
264 }
265
266 // Check this record is not after the "until" value, if it exists
267 if (until != "" && last_modified_date > until)
268 {
269 // Too late
270 return false;
271 }
272
273 // Just right
274 return true;
275}
Note: See TracBrowser for help on using the repository browser.