source: main/trunk/greenstone2/runtime-src/src/oaiservr/metaformat.cpp@ 24874

Last change on this file since 24874 was 22739, checked in by mdewsnip, 14 years ago

Added copyright header to runtime-src/src/oaiserver/*.cpp and runtime-src/src/oaiserver/*.h.

  • Property svn:keywords set to Author Date Id Revision
File size: 7.7 KB
Line 
1/**********************************************************************
2 *
3 * metaformat.cpp --
4 *
5 * Copyright (C) 2004-2010 The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26
27#include <fstream>
28#include "metaformat.h"
29#include "gsdltools.h"
30#include "gsdlunicode.h"
31#include "recptprototools.h"
32
33
34metaformat::metaformat()
35{
36}
37
38text_t metaformat::get_mapping(const text_t &collection, const text_t &collectionField)
39{
40 if (this->oaiConfigure == NULL) {
41 return "";
42 }
43
44 return this->oaiConfigure->getMapping(collection, collectionField, this->formatName());
45}
46
47void metaformat::output_item(ostream &output, outconvertclass &outconvert,
48 bool &headerDone, const text_t &label,
49 const text_tarray &values)
50{
51
52 if (!headerDone && (values.size() > 0)) {
53 this->output_metadata_header(output);
54 headerDone = true;
55 }
56
57 for (int item = 0; item < values.size(); ++item) {
58 if (this->oaiConfigure->getOAIVersion() >= 200) { // TODO: GRB: This code may need to be subclassed by dc for 200 and later...
59 output << outconvert << " <" << this->formatPrefix() << ":" << label << ">" << xml_safe(values[item]) << "</" << this->formatPrefix() << ":" << label << ">\n";
60 }
61 else {
62 output << outconvert << " <" << label << ">" << xml_safe(values[item]) << "</" << label << ">\n";
63 }
64 }
65}
66
67bool metaformat::output_custom_metadata(ostream &output, outconvertclass &outconvert, bool &headerDone, const text_t &collection, ResultDocInfo_t &docInfo) {
68 return false;
69}
70bool metaformat::scan_metadata(ostream &output, const text_t &collection, ResultDocInfo_t &docInfo,
71 bool doOutput)
72{
73 bool headerDone = false;
74 MetadataInfo_tmap::iterator here = docInfo.metadata.begin();
75 MetadataInfo_tmap::iterator end = docInfo.metadata.end();
76
77 utf8outconvertclass utf8convert; // we want to output metadata in utf8
78
79 // metaItem is used initially to identify the rfc1807 (etc) metadata items. It is
80 // then used to hold the name of the metadata item, such as "title" or "subject".
81 text_t metaItem;
82 text_t::const_iterator start, last; // Use two iterators to go through metaItem
83
84 while (here != end) {
85 start = last = here->first.begin();
86
87 if (here->first.size() < this->formatPrefix().size() ||
88 here->first[this->formatPrefix().size()] != '.') {
89 metaItem == "";
90 }
91 else {
92 last += this->formatPrefix().size(); // Move last so that it is at the
93 // '.'
94 metaItem = substr(start, last); // Gets the substring starting at start and going up to (but
95 // not including) last. This should be "dc" (for example)
96 }
97
98 // Map the element using the "oaimapping" specification from the oai.cfg/collect.cfg files, if defined
99 text_t mapTo = this->get_mapping(collection, here->first);
100 if (mapTo != "") {
101 if (doOutput) {
102 if (this->is_valid_element(mapTo)) {
103 this->output_item(output, utf8convert, headerDone, mapTo, here->second.values);
104 }
105 }
106 else {
107 if (here->second.values.size() > 0) {
108 return true;
109 }
110 }
111 }
112
113 // Otherwise try to map the element automatically
114 // For example, dc.X is mapped to oai_dc.X
115 else if (metaItem == this->formatPrefix()) {
116 metaItem = substr(last+1, here->first.end()); // Get the rest of the metadata tag (it's name) but without the '.'
117 // remove xxx^ eg Coverage^Spatial becomes spatial
118 // this is for qualified dublin core. May affect other sets later if they
119 // validly have ^ in them.
120 text_t::iterator hat = findchar(metaItem.begin(), metaItem.end(), '^');
121 if (hat != metaItem.end()) {
122 metaItem = substr(hat+1, metaItem.end());
123 }
124 lc(metaItem.begin(),metaItem.begin()+1); // We want lowercase, but some of the fields in qualified dublin core have internal upper case, eg instructionalMethod. So we assume that lowercasing the first letter is enough
125 if (doOutput) {
126 if (this->is_valid_element(metaItem)) {
127
128 this->output_item(output, utf8convert, headerDone, metaItem, here->second.values);
129 }
130 }
131 else {
132 if (here->second.values.size() > 0) {
133 return true;
134 }
135 }
136 }
137 else {
138 }
139
140 ++here;
141 }
142
143 if (!doOutput) {
144 return false;
145 }
146 // specific metadata formats might need to do some custom metadata that is not just a standard mapping. eg oai_dc outputting an identifier that is a link
147 this->output_custom_metadata(output, utf8convert, headerDone, collection, docInfo);
148 if (headerDone) {
149
150 this->output_metadata_footer(output);
151 }
152
153 return headerDone;
154}
155
156text_t metaformat::get_metadata_value(ResultDocInfo_t &docInfo, const text_t &meta_name) {
157 MetadataInfo_tmap::iterator here = docInfo.metadata.find(meta_name);
158 if (here == docInfo.metadata.end()) {
159 return "";
160 }
161 return here->second.values[0];
162
163}
164
165void metaformat::get_metadata_values(ResultDocInfo_t &docInfo, const text_t &meta_name, text_tarray &values) {
166 MetadataInfo_tmap::iterator here = docInfo.metadata.find(meta_name);
167 if (here != docInfo.metadata.end()) {
168 values = here->second.values;
169 }
170}
171
172bool metaformat::is_available(const text_t &collection, ResultDocInfo_t &docInfo)
173{
174 ofstream o("dummy", ios::out);
175 return this->scan_metadata(o, collection, docInfo, false);
176}
177
178bool metaformat::is_valid_element(text_t &meta_name)
179{
180 if (elementSet.count(meta_name)==1) return true;
181 return false;
182
183}
184
185bool metaformat::output_metadata(ostream &output, const text_t &collection, ResultDocInfo_t &docInfo)
186{
187 return this->scan_metadata(output, collection, docInfo, true);
188}
189
190bool metaformat::output_record(ostream &output, recptproto *protocol, const text_t &collection,
191 const text_t &OID)
192{
193 FilterResponse_t response;
194 text_tset metadata;
195 ofstream logout("oai.log", ios::app);
196
197 // get the document information
198 if (!get_info(OID, collection, "", metadata, false, protocol, response, logout)) {
199 // TODO: error, bad request
200 // cerr << "Bad identifier or protocol " << OID << endl;
201 return false;
202 }
203
204 // check to see if it's a classifier
205 text_t childHead;
206 // int oaiVersion = this->oaiConfigure->getOAIVersion();
207 text_t::const_iterator start = OID.begin();
208 text_t::const_iterator here = OID.begin();
209 here += 2;
210 childHead = substr(start, here);
211
212 // if it isn't a document, kill it now
213 if (childHead == "CL") {
214 // cerr << "Not a document" << endl;
215 return false;
216 }
217
218 // output record header
219 output << "<record>\n";
220
221 // output header part of oai response
222 output << "<header>" << endl;
223 output << " <identifier>" << OID << "</identifier>" << endl;
224 // TODO: add modified date
225
226 output << "</header>" << endl;
227
228 // output metadata part of oai response
229 this->output_metadata(output, collection, response.docInfo[0]);
230
231 // output the description of the document
232 // output << "<about>\n";
233 // output << "</about>\n";
234
235 // close record
236 output << "</record>\n";
237
238 return true;
239}
Note: See TracBrowser for help on using the repository browser.