root/gsdl/trunk/runtime-src/src/oaiservr/metaformat.cpp @ 18895

Revision 18895, 5.3 KB (checked in by kjdon, 11 years ago)

added is_valid_element to metaformat, which checks elementSet for the element name. metaformat subclass constructors must set up elementSet. Currently they do this in the code. would be good if it could read in from a file eventually, maybe?? When metaformat is outputting the m,etadata, it checks whether the element is valid before outputting. otherwise it will invalidate the response as it won't conform to the schema. also, changed where we lowercased the entire name to lowercasing only the first letter - some qdc fields have an internal upper case letter.

  • Property svn:keywords set to Author Date Id Revision
Line 
1#include <fstream>
2#include "metaformat.h"
3#include "gsdltools.h"
4#include "gsdlunicode.h"
5#include "recptprototools.h"
6
7
8metaformat::metaformat()
9{
10}
11
12text_t metaformat::get_mapping(const text_t &collection, const text_t &collectionField)
13{
14  if (this->oaiConfigure == NULL) {
15    return "";
16  }
17
18  return this->oaiConfigure->getMapping(collection, collectionField, this->formatName());
19}
20
21void metaformat::output_item(ostream &output, outconvertclass &outconvert,
22                 bool &headerDone, const text_t &label,
23                 const text_tarray &values)
24{
25
26  if (!headerDone && (values.size() > 0)) {
27    this->output_metadata_header(output);
28    headerDone = true;
29  }
30
31  for (int item = 0; item < values.size(); ++item) {
32    if (this->oaiConfigure->getOAIVersion() >= 200) { // TODO: GRB: This code may need to be subclassed by dc for 200 and later...
33      output << outconvert << "        <" << this->formatPrefix() << ":" << label << ">" << xml_safe(values[item]) << "</" << this->formatPrefix() << ":" << label << ">\n";
34    }
35    else {
36      output << outconvert << "        <" << label << ">" << xml_safe(values[item]) << "</" << label << ">\n";
37    }
38  }
39}
40
41bool metaformat::scan_metadata(ostream &output, const text_t &collection, ResultDocInfo_t &docInfo,
42                   bool doOutput)
43{
44  bool headerDone = false;
45  MetadataInfo_tmap::iterator here = docInfo.metadata.begin();
46  MetadataInfo_tmap::iterator end = docInfo.metadata.end();
47
48  utf8outconvertclass utf8convert; // we want to output metadata in utf8
49
50  // metaItem is used initially to identify the rfc1807 (etc) metadata items. It is
51  // then used to hold the name of the metadata item, such as "title" or "subject".
52  text_t metaItem;
53  text_t::const_iterator start, last; // Use two iterators to go through metaItem
54
55  while (here != end) {
56    start = last = here->first.begin();
57
58    if (here->first.size() < this->formatPrefix().size() ||
59    here->first[this->formatPrefix().size()] != '.') {
60      metaItem == "";
61    }
62    else {
63      last += this->formatPrefix().size(); // Move last so that it is at the
64      // '.'
65      metaItem = substr(start, last);          // Gets the substring starting at start and going up to (but
66                                               // not including) last. This should be "dc" (for example)
67    }
68
69    if (metaItem == this->formatPrefix()) {
70      metaItem = substr(last+1, here->first.end()); // Get the rest of the metadata tag (it's name) but without the '.'
71      lc(metaItem.begin(),metaItem.begin()+1); // We want lowercase, but some of the fields in qualified dublin core have internal upper case, eg instructionalMethod. So we assume that lowercasing the first letter is enough
72      if (doOutput) {
73    if (this->is_valid_element(metaItem)) {
74     
75      this->output_item(output, utf8convert, headerDone, metaItem, here->second.values);
76    }
77      }
78      else {
79    if (here->second.values.size() > 0) {
80      return true;
81    }
82      }
83    } 
84    else {
85      text_t mapTo = this->get_mapping(collection, here->first);
86      if (mapTo != "") {
87    // Do we actually want to do anything here? Doesn't getting here imply that this
88    // particular metadata is stuff we don't want?
89    if (doOutput) {
90      if (this->is_valid_element(mapTo)) {
91        this->output_item(output, utf8convert, headerDone, mapTo, here->second.values);
92      }
93    }
94    else {
95      if (here->second.values.size() > 0) {
96        return true;
97      }
98    }
99      }
100    }
101
102    ++here;
103  }
104
105  if (!doOutput) {
106    return false;
107  }
108 
109  if (headerDone) {
110    this->output_metadata_footer(output);
111  }
112 
113  return headerDone;
114}
115
116
117bool metaformat::is_available(const text_t &collection, ResultDocInfo_t &docInfo)
118{
119  ofstream o("dummy", ios::out);
120  return this->scan_metadata(o, collection, docInfo, false);
121}
122
123bool metaformat::is_valid_element(text_t &meta_name)
124{
125  if (elementSet.count(meta_name)==1) return true;
126  return false;
127 
128}
129
130bool metaformat::output_metadata(ostream &output, const text_t &collection, ResultDocInfo_t &docInfo)
131{
132  return this->scan_metadata(output, collection, docInfo, true);
133}
134
135bool metaformat::output_record(ostream &output, recptproto *protocol, const text_t &collection,
136                   const text_t &OID)
137{
138  FilterResponse_t response;
139  text_tset        metadata;
140  ofstream         logout("oai.log", ios::app);
141
142  // get the document information
143  if (!get_info(OID, collection, "", metadata, false, protocol, response, logout)) {
144    // TODO: error, bad request
145    //   cerr << "Bad identifier or protocol " << OID << endl;
146    return false;
147  }
148
149  // check to see if it's a classifier
150  text_t childHead;
151  //  int oaiVersion = this->oaiConfigure->getOAIVersion();
152  text_t::const_iterator start = OID.begin();
153  text_t::const_iterator here  = OID.begin();
154  here += 2;
155  childHead = substr(start, here);
156
157  // if it isn't a document, kill it now
158  if (childHead == "CL") {
159    //    cerr << "Not a document" << endl;
160    return false;
161  }
162
163  // output record header
164  output << "<record>\n";
165
166  // output header part of oai response
167  output << "<header>" << endl;
168  output << "  <identifier>" << OID << "</identifier>" << endl;
169  // TODO: add modified date
170
171  output << "</header>" << endl;
172
173  // output metadata part of oai response
174  this->output_metadata(output, collection, response.docInfo[0]);
175
176  // output the description of the document
177  //  output << "<about>\n";
178  //  output << "</about>\n";
179
180  // close record
181  output << "</record>\n";
182   
183  return true;
184}
Note: See TracBrowser for help on using the browser.