source: gsdl/trunk/runtime-src/src/oaiservr/metaformat.cpp@ 17546

Last change on this file since 17546 was 17546, checked in by mdewsnip, 16 years ago

OAI server now calls xml_safe() on metadata output, to prevent non-well-formed XML from messing things up.

  • Property svn:keywords set to Author Date Id Revision
File size: 5.0 KB
Line 
1#include <fstream>
2#include "metaformat.h"
3#include "gsdltools.h"
4#include "gsdlunicode.h"
5#include "recptprototools.h"
6
7
8metaformat::metaformat()
9{
10}
11
12text_t metaformat::get_mapping(const text_t &collection, const text_t &collectionField)
13{
14 if (this->oaiConfigure == NULL) {
15 return "";
16 }
17
18 return this->oaiConfigure->getMapping(collection, collectionField, this->formatPrefix());
19}
20
21void metaformat::output_item(ostream &output, outconvertclass &outconvert,
22 bool &headerDone, const text_t &label,
23 const text_tarray &values)
24{
25
26 if (!headerDone && (values.size() > 0)) {
27 this->output_metadata_header(output);
28 headerDone = true;
29 }
30
31 for (int item = 0; item < values.size(); ++item) {
32 if (this->oaiConfigure->getOAIVersion() >= 200) { // TODO: GRB: This code may need to be subclassed by dc for 200 and later...
33 output << outconvert << " <" << this->formatPrefix() << ":" << label << ">" << xml_safe(values[item]) << "</" << this->formatPrefix() << ":" << label << ">\n";
34 }
35 else {
36 output << outconvert << " <" << label << ">" << xml_safe(values[item]) << "</" << label << ">\n";
37 }
38 }
39}
40
41bool metaformat::scan_metadata(ostream &output, const text_t &collection, ResultDocInfo_t &docInfo,
42 bool doOutput)
43{
44 bool headerDone = false;
45 MetadataInfo_tmap::iterator here = docInfo.metadata.begin();
46 MetadataInfo_tmap::iterator end = docInfo.metadata.end();
47
48 utf8outconvertclass utf8convert; // we want to output metadata in utf8
49
50 // metaItem is used initially to identify the rfc1807 (etc) metadata items. It is
51 // then used to hold the name of the metadata item, such as "title" or "subject".
52 text_t metaItem;
53 text_t::const_iterator start, last; // Use two iterators to go through metaItem
54
55 while (here != end) {
56 start = last = here->first.begin();
57
58 if (here->first.size() < this->formatPrefix().size() ||
59 here->first[this->formatPrefix().size()] != '.') {
60 metaItem == "";
61 }
62 else {
63 last += this->formatPrefix().size() + 1; // Move last so that it is one place beyond where the "." should be.
64 metaItem = substr(start, last); // Gets the substring starting at start and going up to (but
65 // not including) last. This should be "dc." (for example)
66 }
67
68 if (metaItem == this->formatPrefix()) {
69 metaItem = substr(last, here->first.end()); // Get the rest of the metadata tag (it's name)
70 lc(metaItem); // Convert it to lowercase for putting in the xml tags
71
72 if (doOutput) {
73 this->output_item(output, utf8convert, headerDone, metaItem, here->second.values);
74 }
75 else {
76 if (here->second.values.size() > 0) {
77 return true;
78 }
79 }
80 }
81 else {
82 text_t mapTo = this->get_mapping(collection, here->first);
83 if (mapTo != "") {
84 // Do we actually want to do anything here? Doesn't getting here imply that this
85 // particular metadata is stuff we don't want?
86 if (doOutput) {
87 this->output_item(output, utf8convert, headerDone, mapTo, here->second.values);
88 }
89 else {
90 if (here->second.values.size() > 0) {
91 return true;
92 }
93 }
94 }
95 }
96
97 ++here;
98 }
99
100 if (!doOutput) {
101 return false;
102 }
103
104 if (headerDone) {
105 this->output_metadata_footer(output);
106 }
107
108 return headerDone;
109}
110
111
112bool metaformat::is_available(const text_t &collection, ResultDocInfo_t &docInfo)
113{
114 ofstream o("dummy", ios::out);
115 return this->scan_metadata(o, collection, docInfo, false);
116}
117
118bool metaformat::output_metadata(ostream &output, const text_t &collection, ResultDocInfo_t &docInfo)
119{
120 return this->scan_metadata(output, collection, docInfo, true);
121}
122
123bool metaformat::output_record(ostream &output, recptproto *protocol, const text_t &collection,
124 const text_t &OID)
125{
126 FilterResponse_t response;
127 text_tset metadata;
128 ofstream logout("oai.log", ios::app);
129
130 // get the document information
131 if (!get_info(OID, collection, "", metadata, false, protocol, response, logout)) {
132 // TODO: error, bad request
133 // cerr << "Bad identifier or protocol " << OID << endl;
134 return false;
135 }
136
137 // check to see if it's a classifier
138 text_t childHead;
139 // int oaiVersion = this->oaiConfigure->getOAIVersion();
140 text_t::const_iterator start = OID.begin();
141 text_t::const_iterator here = OID.begin();
142 here += 2;
143 childHead = substr(start, here);
144
145 // if it isn't a document, kill it now
146 if (childHead == "CL") {
147 // cerr << "Not a document" << endl;
148 return false;
149 }
150
151 // output record header
152 output << "<record>\n";
153
154 // output header part of oai response
155 output << "<header>" << endl;
156 output << " <identifier>" << OID << "</identifier>" << endl;
157 // TODO: add modified date
158
159 output << "</header>" << endl;
160
161 // output metadata part of oai response
162 this->output_metadata(output, collection, response.docInfo[0]);
163
164 // output the description of the document
165 // output << "<about>\n";
166 // output << "</about>\n";
167
168 // close record
169 output << "</record>\n";
170
171 return true;
172}
Note: See TracBrowser for help on using the repository browser.