source: main/trunk/greenstone2/runtime-src/src/oaiservr/abstractlistaction.cpp@ 31387

Last change on this file since 31387 was 31387, checked in by ak19, 7 years ago

Round 1 of commits for getting OAI deletion policy to work with GS2 (server end). The perl code writing out the OAI db and the GS3 server code implementing the deletion policy had already been completed earlier (end 2016).

  • Property svn:keywords set to Author Date Id Revision
File size: 8.8 KB
Line 
1/**********************************************************************
2 *
3 * abstractlistaction.cpp --
4 *
5 * Copyright (C) 2004-2010 The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26
27#include "abstractlistaction.h"
28#include "OIDtools.h"
29#include "recptprototools.h"
30
31#include "oaitools.h"
32
33
34bool abstractlistaction::check_classifier(recptproto *protocol, const text_t &collection, const text_t &set_name)
35{
36 text_tset metadata;
37 FilterResponse_t response;
38 return get_info(set_name, collection, "", metadata, false, protocol, response, *this->logout);
39}
40
41
42bool abstractlistaction::output_content(ostream &output, recptproto *protocol, oaiargs &params)
43{
44 // Reset variables
45 this->output_docs = 0;
46
47 text_t set_name = params["set"];
48 text_t position = "";
49
50 // Process the resumptionToken if there is one
51 if (params["resumptionToken"] != "")
52 {
53 ResumptionToken resumption_token(params["resumptionToken"]);
54 set_name = resumption_token.getSet();
55 position = resumption_token.getPosition();
56 }
57
58 // is it a super collection??
59 bool is_super_collection = false;
60
61 if (set_name != "")
62 {
63 text_tarray super_colls = this->configuration->getSuperCollectionsList();
64 for (int s = 0; s<super_colls.size(); s++) {
65 if (set_name == super_colls[s]) {
66 is_super_collection = true;
67 break;
68 }
69 }
70 }
71
72 // Case for "set" argument present that is not a super collection -- output just the records in the specified set
73 if (set_name != "" && !is_super_collection) {
74 // Separate the collection name and Greenstone classifier OID from the set name
75 text_t collection_name = "";
76 text_t gsdl_classifier_OID = set_name;
77 oaiclassifier::toGSDL(collection_name, gsdl_classifier_OID);
78
79 // If output_content_for_set() returns false a resumption token has been output, so it's time to stop
80 if (output_content_for_set(output, protocol, params, collection_name, gsdl_classifier_OID, set_name) == false)
81 {
82 return true;
83 }
84 }
85
86 // Case for no "set" argument present - output all records in all collections & super colls
87 // Case for super collection set - output all records in all collections in this super coll
88 else
89 {
90 // Get a list of the collections available
91 text_tarray collections;
92
93 if (is_super_collection) {
94 collections = this->configuration->getSuperCollectionCollections(set_name);
95 } else {
96
97 collections = this->configuration->getAllCollectionsList();//this->configuration->getCollectionsList();
98
99 }
100 if (collections.size() == 0)
101 {
102 return false;
103 }
104
105 // Get the current collection from the position value
106 text_t collection_name = "";
107 oaiclassifier::toGSDL(collection_name, position);
108
109 // Find the starting collection
110 text_tarray::iterator collection_iterator = collections.begin();
111 while (collection_iterator != collections.end())
112 {
113 if (collection_name == "" || collection_name == *collection_iterator)
114 {
115 break;
116 }
117
118 collection_iterator++;
119 }
120
121 // Now loop through the remaining collections
122 while (collection_iterator != collections.end())
123 {
124 // If output_content_for_set() returns false a resumption token has been output, so it's time to stop
125 if (output_content_for_set(output, protocol, params, *collection_iterator, "", set_name) == false)
126 {
127 return true;
128 }
129
130 collection_iterator++;
131 }
132 }
133
134 // If no records were output throw an error
135 if (this->configuration->getOAIVersion() >= 200 && this->output_docs == 0)
136 {
137 errorType = "noRecordsMatch";
138 this->output_error(output, errorType);
139 return false;
140 }
141
142 return true;
143}
144
145bool abstractlistaction::output_content_for_set(ostream &output, recptproto *protocol, oaiargs &params, text_t collection_name, text_t gsdl_classifier_OID, text_t set_name)
146{
147 // Check if the set is actually a collection
148 if (gsdl_classifier_OID == "")
149 {
150 gsdl_classifier_OID = "oai";
151 }
152
153 text_t metadata_prefix = params["metadataPrefix"];
154 text_t from = params["from"];
155 text_t until = params["until"];
156 text_t position = "";
157
158 // Process the resumptionToken if there is one
159 if (params["resumptionToken"] != "")
160 {
161 ResumptionToken resumption_token(params["resumptionToken"]);
162 metadata_prefix = resumption_token.getMetadataPrefix();
163 from = resumption_token.getFrom();
164 until = resumption_token.getUntil();
165 position = resumption_token.getPosition();
166 }
167
168 // Get the list of identifiers in this collection
169 // Collections should not contain too many identifiers otherwise this will use a lot of time and memory
170 text_tset metadata; // Must be empty for efficiency
171 FilterResponse_t identifiers_response;
172 get_children(gsdl_classifier_OID, collection_name, "", metadata, false, protocol, identifiers_response, *this->logout, FROAI);
173
174 // Find the starting position, if necessary
175 ResultDocInfo_tarray::iterator identifier_iterator = identifiers_response.docInfo.begin();
176 if (output_docs == 0)
177 {
178 while (identifier_iterator != identifiers_response.docInfo.end())
179 {
180 if (position == "" || position == (collection_name + ":" + (*identifier_iterator).OID))
181 {
182 break;
183 }
184
185 identifier_iterator++;
186 }
187 }
188
189 // Now loop through displaying the next matching records
190 while (identifier_iterator != identifiers_response.docInfo.end())
191 {
192 position = (*identifier_iterator).OID;
193
194 text_t document_OID = position;
195 if (starts_with(document_OID, "oai."))
196 {
197 document_OID = oaiclassifier::getGSDL_OID(collection_name, document_OID, protocol, *this->logout);
198 }
199
200 // Check this OID is in the (optional) date range specified
201 if (this->in_date_range(output, protocol, params, collection_name, document_OID, from, until))
202 {
203 // If we've output the desired number of records return a resumptionToken and we're done
204 if (this->output_docs == this->configuration->resumeAfter())
205 {
206 // If a set has been specified, we can use the collection's buildDate in the resumption token
207 text_t date_stamp = "";
208 if (set_name != "")
209 {
210 ColInfoResponse_t cinfo;
211 comerror_t err;
212 protocol->get_collectinfo(collection_name, cinfo, err, cerr);
213 date_stamp = cinfo.buildDate;
214 }
215
216 ResumptionToken resumption_token(date_stamp, set_name, metadata_prefix, from, until, collection_name + ":" + position);
217
218 // Don't add any whitespace around the resumption token as it can confuse harvesters/validators
219 output << " <resumptionToken>" << resumption_token.getResumptionTokenString() << "</resumptionToken>" << endl;
220 return false;
221 }
222
223 // Otherwise output this record and increment the count
224 this->output_document(output, protocol, collection_name, document_OID, metadata_prefix);
225 this->output_docs++;
226 }
227
228 identifier_iterator++;
229 }
230
231 return true;
232}
233
234
235bool abstractlistaction::in_date_range(ostream &output, recptproto *protocol, oaiargs &params,
236 text_t& collection, text_t oai_OID, text_t from, text_t until)
237{
238 // If no "from" or "until" value is specified every record matches, so we don't need to go any further
239 if (from == "" && until == "")
240 {
241 return true;
242 }
243
244 // Get the datestamp from the document as sections do not have this metadata
245 text_t document_OID;
246 get_top(oai_OID, document_OID);
247
248 // Request the oailastmodified value for this document
249 text_tset metadata;
250 metadata.insert("oailastmodified");
251 metadata.insert("gs.OAIDateStamp");
252 FilterResponse_t response;
253 if (!get_oai_info(document_OID, collection, "", metadata, false, protocol, response, *this->logout))
254 {
255 return false;
256 }
257
258 text_t last_modified_date;
259 this->getLastModifiedDate(response.docInfo[0], last_modified_date);
260
261 // Check this record is not before the "from" value, if it exists
262 if (from != "" && last_modified_date < from)
263 {
264 // Too early
265 return false;
266 }
267
268 // Check this record is not after the "until" value, if it exists
269 if (until != "" && last_modified_date > until)
270 {
271 // Too late
272 return false;
273 }
274
275 // Just right
276 return true;
277}
Note: See TracBrowser for help on using the repository browser.