source: gsdl/trunk/runtime-src/src/oaiservr/metaformat.cpp@ 18895

Last change on this file since 18895 was 18895, checked in by kjdon, 15 years ago

added is_valid_element to metaformat, which checks elementSet for the element name. metaformat subclass constructors must set up elementSet. Currently they do this in the code. would be good if it could read in from a file eventually, maybe?? When metaformat is outputting the m,etadata, it checks whether the element is valid before outputting. otherwise it will invalidate the response as it won't conform to the schema. also, changed where we lowercased the entire name to lowercasing only the first letter - some qdc fields have an internal upper case letter.

  • Property svn:keywords set to Author Date Id Revision
File size: 5.3 KB
Line 
1#include <fstream>
2#include "metaformat.h"
3#include "gsdltools.h"
4#include "gsdlunicode.h"
5#include "recptprototools.h"
6
7
8metaformat::metaformat()
9{
10}
11
12text_t metaformat::get_mapping(const text_t &collection, const text_t &collectionField)
13{
14 if (this->oaiConfigure == NULL) {
15 return "";
16 }
17
18 return this->oaiConfigure->getMapping(collection, collectionField, this->formatName());
19}
20
21void metaformat::output_item(ostream &output, outconvertclass &outconvert,
22 bool &headerDone, const text_t &label,
23 const text_tarray &values)
24{
25
26 if (!headerDone && (values.size() > 0)) {
27 this->output_metadata_header(output);
28 headerDone = true;
29 }
30
31 for (int item = 0; item < values.size(); ++item) {
32 if (this->oaiConfigure->getOAIVersion() >= 200) { // TODO: GRB: This code may need to be subclassed by dc for 200 and later...
33 output << outconvert << " <" << this->formatPrefix() << ":" << label << ">" << xml_safe(values[item]) << "</" << this->formatPrefix() << ":" << label << ">\n";
34 }
35 else {
36 output << outconvert << " <" << label << ">" << xml_safe(values[item]) << "</" << label << ">\n";
37 }
38 }
39}
40
41bool metaformat::scan_metadata(ostream &output, const text_t &collection, ResultDocInfo_t &docInfo,
42 bool doOutput)
43{
44 bool headerDone = false;
45 MetadataInfo_tmap::iterator here = docInfo.metadata.begin();
46 MetadataInfo_tmap::iterator end = docInfo.metadata.end();
47
48 utf8outconvertclass utf8convert; // we want to output metadata in utf8
49
50 // metaItem is used initially to identify the rfc1807 (etc) metadata items. It is
51 // then used to hold the name of the metadata item, such as "title" or "subject".
52 text_t metaItem;
53 text_t::const_iterator start, last; // Use two iterators to go through metaItem
54
55 while (here != end) {
56 start = last = here->first.begin();
57
58 if (here->first.size() < this->formatPrefix().size() ||
59 here->first[this->formatPrefix().size()] != '.') {
60 metaItem == "";
61 }
62 else {
63 last += this->formatPrefix().size(); // Move last so that it is at the
64 // '.'
65 metaItem = substr(start, last); // Gets the substring starting at start and going up to (but
66 // not including) last. This should be "dc" (for example)
67 }
68
69 if (metaItem == this->formatPrefix()) {
70 metaItem = substr(last+1, here->first.end()); // Get the rest of the metadata tag (it's name) but without the '.'
71 lc(metaItem.begin(),metaItem.begin()+1); // We want lowercase, but some of the fields in qualified dublin core have internal upper case, eg instructionalMethod. So we assume that lowercasing the first letter is enough
72 if (doOutput) {
73 if (this->is_valid_element(metaItem)) {
74
75 this->output_item(output, utf8convert, headerDone, metaItem, here->second.values);
76 }
77 }
78 else {
79 if (here->second.values.size() > 0) {
80 return true;
81 }
82 }
83 }
84 else {
85 text_t mapTo = this->get_mapping(collection, here->first);
86 if (mapTo != "") {
87 // Do we actually want to do anything here? Doesn't getting here imply that this
88 // particular metadata is stuff we don't want?
89 if (doOutput) {
90 if (this->is_valid_element(mapTo)) {
91 this->output_item(output, utf8convert, headerDone, mapTo, here->second.values);
92 }
93 }
94 else {
95 if (here->second.values.size() > 0) {
96 return true;
97 }
98 }
99 }
100 }
101
102 ++here;
103 }
104
105 if (!doOutput) {
106 return false;
107 }
108
109 if (headerDone) {
110 this->output_metadata_footer(output);
111 }
112
113 return headerDone;
114}
115
116
117bool metaformat::is_available(const text_t &collection, ResultDocInfo_t &docInfo)
118{
119 ofstream o("dummy", ios::out);
120 return this->scan_metadata(o, collection, docInfo, false);
121}
122
123bool metaformat::is_valid_element(text_t &meta_name)
124{
125 if (elementSet.count(meta_name)==1) return true;
126 return false;
127
128}
129
130bool metaformat::output_metadata(ostream &output, const text_t &collection, ResultDocInfo_t &docInfo)
131{
132 return this->scan_metadata(output, collection, docInfo, true);
133}
134
135bool metaformat::output_record(ostream &output, recptproto *protocol, const text_t &collection,
136 const text_t &OID)
137{
138 FilterResponse_t response;
139 text_tset metadata;
140 ofstream logout("oai.log", ios::app);
141
142 // get the document information
143 if (!get_info(OID, collection, "", metadata, false, protocol, response, logout)) {
144 // TODO: error, bad request
145 // cerr << "Bad identifier or protocol " << OID << endl;
146 return false;
147 }
148
149 // check to see if it's a classifier
150 text_t childHead;
151 // int oaiVersion = this->oaiConfigure->getOAIVersion();
152 text_t::const_iterator start = OID.begin();
153 text_t::const_iterator here = OID.begin();
154 here += 2;
155 childHead = substr(start, here);
156
157 // if it isn't a document, kill it now
158 if (childHead == "CL") {
159 // cerr << "Not a document" << endl;
160 return false;
161 }
162
163 // output record header
164 output << "<record>\n";
165
166 // output header part of oai response
167 output << "<header>" << endl;
168 output << " <identifier>" << OID << "</identifier>" << endl;
169 // TODO: add modified date
170
171 output << "</header>" << endl;
172
173 // output metadata part of oai response
174 this->output_metadata(output, collection, response.docInfo[0]);
175
176 // output the description of the document
177 // output << "<about>\n";
178 // output << "</about>\n";
179
180 // close record
181 output << "</record>\n";
182
183 return true;
184}
Note: See TracBrowser for help on using the repository browser.