[27087] | 1 | package org.greenstone.gsdl3.service;
|
---|
| 2 |
|
---|
| 3 |
|
---|
| 4 | // Greenstone classes
|
---|
| 5 | import org.greenstone.gsdl3.util.*;
|
---|
| 6 | import org.greenstone.gsdl3.collection.Collection;
|
---|
| 7 | import org.greenstone.util.GlobalProperties;
|
---|
| 8 |
|
---|
| 9 | // XML classes
|
---|
| 10 | import org.w3c.dom.Document;
|
---|
| 11 | import org.w3c.dom.Element;
|
---|
| 12 | import org.w3c.dom.Node;
|
---|
| 13 | import org.w3c.dom.Attr;
|
---|
| 14 | import org.w3c.dom.Text;
|
---|
| 15 | import org.w3c.dom.NodeList;
|
---|
| 16 | import org.w3c.dom.NamedNodeMap;
|
---|
| 17 | import org.w3c.dom.ProcessingInstruction;
|
---|
| 18 |
|
---|
| 19 | // General Java classes
|
---|
| 20 | import java.io.BufferedReader;
|
---|
| 21 | import java.io.File;
|
---|
| 22 | import java.io.FileReader;
|
---|
| 23 | import java.io.Serializable;
|
---|
| 24 | import java.util.Vector;
|
---|
| 25 | import java.util.HashMap;
|
---|
| 26 | import java.util.Date;
|
---|
| 27 | import java.text.SimpleDateFormat;
|
---|
[29979] | 28 |
|
---|
[27087] | 29 | import org.apache.log4j.*;
|
---|
| 30 |
|
---|
| 31 | public class RSSRetrieve extends ServiceRack {
|
---|
| 32 |
|
---|
| 33 | static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.RSSRetrieve.class.getName());
|
---|
| 34 | protected static final String RSS_SERVICE = "RSSFeed";
|
---|
| 35 |
|
---|
| 36 | public boolean configure(Element info, Element extra_info) {
|
---|
| 37 | if (!super.configure(info, extra_info)){
|
---|
| 38 | return false;
|
---|
| 39 | }
|
---|
| 40 | logger.info("configuring RSSRetrieve...");
|
---|
| 41 |
|
---|
| 42 | // set up short_service_info_ - for now just has name and type
|
---|
[28966] | 43 | Element rss_service = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
|
---|
[27087] | 44 | rss_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
|
---|
| 45 | rss_service.setAttribute(GSXML.NAME_ATT, RSS_SERVICE);
|
---|
| 46 | this.short_service_info.appendChild(rss_service);
|
---|
| 47 |
|
---|
| 48 | return true;
|
---|
| 49 | }
|
---|
| 50 |
|
---|
| 51 | // this may get called but is not useful in the case of retrieve services
|
---|
[28966] | 52 | protected Element getServiceDescription(Document doc, String service_id, String lang, String subset) {
|
---|
[27087] | 53 |
|
---|
[28966] | 54 | Element rss_service = doc.createElement(GSXML.SERVICE_ELEM);
|
---|
[27087] | 55 | rss_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
|
---|
| 56 | rss_service.setAttribute(GSXML.NAME_ATT, service_id);
|
---|
| 57 | return rss_service;
|
---|
| 58 | }
|
---|
| 59 |
|
---|
| 60 | // Sends off a collection 'describe' message and returns the <collection> element of the response.
|
---|
| 61 | // This contains the collection meta from collectionConfig.xml. Used to construct header of RSS feed
|
---|
| 62 | protected Element getCollMetadata(UserContext userContext) {
|
---|
[28966] | 63 |
|
---|
| 64 | Document msg_doc = XMLConverter.newDOM();
|
---|
| 65 | Element mr_request_message = msg_doc.createElement(GSXML.MESSAGE_ELEM);
|
---|
[27087] | 66 | String to = this.cluster_name;
|
---|
[28966] | 67 | Element meta_request = GSXML.createBasicRequest(msg_doc, GSXML.REQUEST_TYPE_DESCRIBE, to, userContext);
|
---|
[27087] | 68 | mr_request_message.appendChild(meta_request);
|
---|
| 69 | Element meta_response = (Element) this.router.process(mr_request_message);
|
---|
| 70 | meta_response = (Element) GSXML.getChildByTagName(meta_response, GSXML.RESPONSE_ELEM);
|
---|
| 71 |
|
---|
| 72 | NodeList nl = meta_response.getElementsByTagName(GSXML.COLLECTION_ELEM);
|
---|
| 73 | Element collectionEl = (Element) nl.item(0);
|
---|
| 74 | return collectionEl;
|
---|
| 75 | }
|
---|
| 76 |
|
---|
| 77 |
|
---|
| 78 | /**
|
---|
| 79 | Generates the RSS feed XML by creating the header and footer with the contents
|
---|
| 80 | of the the collection's index/rss-items.rdf file embedded in the middle.
|
---|
| 81 | @return the RSS feed XML.
|
---|
| 82 | @see http://cyber.law.harvard.edu/rss/rss.html
|
---|
| 83 | */
|
---|
| 84 | protected Element processRSSFeed(Element request) {
|
---|
| 85 |
|
---|
| 86 | // Ask the MessageRouter for this collection's colConfig metadata
|
---|
| 87 | // from which the RSS header values will be constructed
|
---|
| 88 | UserContext userContext = new UserContext(request);
|
---|
| 89 | Element collMeta = getCollMetadata(userContext);
|
---|
| 90 |
|
---|
| 91 | //logger.error("**** collection metadata:");
|
---|
| 92 | //GSXML.elementToLogAsString(collMeta, true);
|
---|
[29979] | 93 |
|
---|
[27087] | 94 | // work out some commonly used variables such as lang and url_prefix
|
---|
| 95 | String lang = request.getAttribute("lang");
|
---|
| 96 | if(lang.equals("")) {
|
---|
| 97 | lang = "en";
|
---|
| 98 | }
|
---|
[29979] | 99 | //Get baseUrl from request
|
---|
| 100 | String baseURL = request.getAttribute("baseURL");
|
---|
| 101 |
|
---|
[27087] | 102 | // url_prefix is of the form http://domain/greenstone3/library/collection/_colname_/
|
---|
[29979] | 103 | //String url_prefix = GlobalProperties.getFullGSDL3WebAddress()+"/"+this.library_name+"/collection/"+this.cluster_name;
|
---|
| 104 | String url_prefix = baseURL+"library"+"/"+this.library_name+"/collection/"+this.cluster_name;
|
---|
[27087] | 105 |
|
---|
| 106 | // generate the header and footer
|
---|
[28966] | 107 | Document rssDoc = XMLConverter.newDOM();
|
---|
[27087] | 108 |
|
---|
| 109 | Element rssNode = rssDoc.createElement("rss"); // rootnode
|
---|
| 110 | rssNode.setAttribute("version", "2.0");
|
---|
| 111 |
|
---|
| 112 | String namespace_url = "http://www.w3.org/2000/xmlns/";
|
---|
| 113 | rssNode.setAttributeNS(namespace_url, "xmlns:content", "http://purl.org/rss/1.0/modules/content/");
|
---|
| 114 | rssNode.setAttributeNS(namespace_url, "xmlns:taxo", "http://purl.org/rss/1.0/modules/taxonomy/");
|
---|
| 115 | rssNode.setAttributeNS(namespace_url, "xmlns:dc", "http://purl.org/dc/elements/1.1/");
|
---|
| 116 | rssNode.setAttributeNS(namespace_url, "xmlns:syn", "http://purl.org/rss/1.0/modules/syndication/");
|
---|
| 117 | rssNode.setAttributeNS(namespace_url, "xmlns:admin", "http://webns.net/mvcb/");
|
---|
| 118 | rssDoc.appendChild(rssNode);
|
---|
| 119 |
|
---|
| 120 | // Setting the preproccessing header line (Utf-8) will be done in web/interfaces' rss.xsl
|
---|
| 121 | //ProcessingInstruction procInstruction = doc.createProcessingInstruction("xml","version=\"1.0\"");
|
---|
| 122 | //rssDoc.appendChild(procInstruction);
|
---|
| 123 |
|
---|
| 124 | Element channelNode = rssDoc.createElement("channel");
|
---|
| 125 | rssNode.appendChild(channelNode);
|
---|
| 126 |
|
---|
| 127 | Element childNode = rssDoc.createElement("title");
|
---|
| 128 | GSXML.setNodeText(childNode, this.cluster_name); //_collectionname_
|
---|
| 129 | channelNode.appendChild(childNode);
|
---|
| 130 |
|
---|
| 131 | // _httppageabout_: of form http://domain/greenstone3/library/collection/_colname_/page/about
|
---|
| 132 | childNode = rssDoc.createElement("link");
|
---|
| 133 | GSXML.setNodeText(childNode, url_prefix+"/page/about"); // _httppageabout_
|
---|
| 134 | channelNode.appendChild(childNode);
|
---|
| 135 |
|
---|
| 136 | // get the description string for the requested language, else fallback on en description if present
|
---|
| 137 | childNode = rssDoc.createElement("description");
|
---|
| 138 | NodeList descriptions = GSXML.getNamedElements(collMeta, GSXML.DISPLAY_TEXT_ELEM, GSXML.NAME_ATT, GSXML.DISPLAY_TEXT_DESCRIPTION);
|
---|
| 139 | //Element descriptEl = GSXML.getNamedElement(collMeta, GSXML.DISPLAY_TEXT_ELEM, GSXML.NAME_ATT, GSXML.DISPLAY_TEXT_DESCRIPTION);
|
---|
| 140 | Element descriptEl = null;
|
---|
| 141 | if(descriptions != null) {
|
---|
| 142 | for (int i = 0; i < descriptions.getLength(); i++) {
|
---|
| 143 | Element e = (Element) descriptions.item(i);
|
---|
| 144 | if(e.getAttribute("lang").equals(lang)) {
|
---|
| 145 | descriptEl = e;
|
---|
| 146 | break; // found the description for the requested language, finish loop
|
---|
| 147 | } else if(e.getAttribute("lang").equals("en")) {
|
---|
| 148 | descriptEl = e; // at least found english fall-back description, continue loop
|
---|
| 149 | }
|
---|
| 150 | }
|
---|
| 151 | }
|
---|
| 152 | String description = (descriptEl == null) ? "none" : GSXML.getNodeText(descriptEl);
|
---|
| 153 | GSXML.setNodeText(childNode, description); //_collectionextra_
|
---|
| 154 | channelNode.appendChild(childNode);
|
---|
| 155 |
|
---|
| 156 | childNode = rssDoc.createElement("language");
|
---|
| 157 | GSXML.setNodeText(childNode, lang); //_cgiargl_
|
---|
| 158 | channelNode.appendChild(childNode);
|
---|
| 159 |
|
---|
| 160 |
|
---|
| 161 | // RSS specification: http://cyber.law.harvard.edu/rss/rss.html
|
---|
| 162 | // pubDate is date of first publication of the item. Use collection.getEarliestDatestamp()
|
---|
| 163 | // lastBuildDate is the date the item was last modified. Use collection.getLastmodified()
|
---|
| 164 |
|
---|
| 165 | //HashMap<String, ModuleInterface> module_map = this.router.getModuleMap();
|
---|
| 166 | //Collection coll = (Collection)module_map.get(this.cluster_name);
|
---|
| 167 | Collection coll = (Collection)serviceCluster;
|
---|
| 168 | SimpleDateFormat dateFormat = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z");
|
---|
| 169 |
|
---|
| 170 | childNode = rssDoc.createElement("pubDate");
|
---|
| 171 | Date date = new Date(coll.getEarliestDatestamp()); // "Thu, 23 Aug 1999 07:00:00 GMT"
|
---|
| 172 | GSXML.setNodeText(childNode, dateFormat.format(date));
|
---|
| 173 | channelNode.appendChild(childNode);
|
---|
| 174 |
|
---|
| 175 | childNode = rssDoc.createElement("lastBuildDate");
|
---|
| 176 | date = new Date(coll.getLastmodified()); // "Thu, 23 Aug 1999 16:20:26 GMT"
|
---|
| 177 | GSXML.setNodeText(childNode, dateFormat.format(date));
|
---|
| 178 | channelNode.appendChild(childNode);
|
---|
| 179 |
|
---|
| 180 | childNode = rssDoc.createElement("managingEditor");
|
---|
| 181 | Element e = GSXML.getNamedElement(collMeta, GSXML.DISPLAY_TEXT_ELEM, GSXML.NAME_ATT, "creator");
|
---|
| 182 | String value = (e == null) ? "unknown" : GSXML.getNodeText(e);
|
---|
| 183 | GSXML.setNodeText(childNode, value); //_creator_
|
---|
| 184 | channelNode.appendChild(childNode);
|
---|
| 185 |
|
---|
| 186 | childNode = rssDoc.createElement("webMaster");
|
---|
| 187 | e = GSXML.getNamedElement(collMeta, GSXML.DISPLAY_TEXT_ELEM, GSXML.NAME_ATT, "maintainer");
|
---|
| 188 | value = (e == null) ? "unknown" : GSXML.getNodeText(e);
|
---|
| 189 | GSXML.setNodeText(childNode, value); //_maintainer_
|
---|
| 190 | channelNode.appendChild(childNode);
|
---|
| 191 |
|
---|
| 192 |
|
---|
| 193 | // <image> child of <channel> has title, url, link and description children
|
---|
| 194 | Element collIcon = GSXML.getNamedElement(collMeta, GSXML.DISPLAY_TEXT_ELEM, GSXML.NAME_ATT, "icon");
|
---|
| 195 | if(collIcon != null) { // since there is a collection image, create an imageNode
|
---|
| 196 |
|
---|
| 197 | Node imageNode = rssDoc.createElement("image");
|
---|
| 198 | channelNode.appendChild(imageNode);
|
---|
| 199 |
|
---|
| 200 | childNode = rssDoc.createElement("title");
|
---|
| 201 | GSXML.setNodeText(childNode, this.cluster_name); //_collectionname_
|
---|
| 202 | imageNode.appendChild(childNode);
|
---|
| 203 |
|
---|
| 204 | // need full URL for collection icon. Its name is in <displayItem name="icon_name.ext"/>
|
---|
| 205 | // URL is of the form domain/servlet/sites/localsite/collect/lucene-jdbm-demo/images/icon_name.ext
|
---|
| 206 | childNode = rssDoc.createElement("url");
|
---|
| 207 | String domain = GlobalProperties.getFullGSDL3WebAddress(); // remove servlet as it's included in site_http_address
|
---|
| 208 | domain = domain.substring(0, domain.lastIndexOf("/"));
|
---|
| 209 | String image_url = GSXML.getNodeText(collIcon); // name of image file
|
---|
| 210 | image_url = domain+"/"+this.site_http_address+"/"+"/collect/"+this.cluster_name+"/images/"+image_url;
|
---|
| 211 | GSXML.setNodeText(childNode, image_url); // _iconcollection_
|
---|
| 212 | imageNode.appendChild(childNode);
|
---|
| 213 |
|
---|
| 214 | childNode = rssDoc.createElement("link");
|
---|
| 215 | GSXML.setNodeText(childNode, url_prefix+"/page/about"); // _httppageabout_
|
---|
| 216 | imageNode.appendChild(childNode);
|
---|
| 217 |
|
---|
| 218 | childNode = rssDoc.createElement("description");
|
---|
| 219 | GSXML.setNodeText(childNode, description); //_collectionextra_
|
---|
| 220 | imageNode.appendChild(childNode);
|
---|
| 221 | }
|
---|
| 222 |
|
---|
| 223 |
|
---|
| 224 | // now add the contents of rss-items.rdf as a child of channel,
|
---|
| 225 | // passing in url_prefix for url resolution
|
---|
| 226 | Element rss_raw_data = loadDocument("rss-items.rdf", url_prefix);
|
---|
| 227 | if(rss_raw_data != null) {
|
---|
| 228 | NodeList rss_items = rss_raw_data.getElementsByTagName("item");
|
---|
| 229 | for(int i = 0; i < rss_items.getLength(); i++) {
|
---|
| 230 | channelNode.appendChild(rssDoc.importNode(rss_items.item(i), true));
|
---|
| 231 | }
|
---|
| 232 | }
|
---|
| 233 |
|
---|
| 234 | // generate the GS3 response message containing the RSS xml
|
---|
[28966] | 235 | Element result = rssDoc.createElement(GSXML.RESPONSE_ELEM);
|
---|
[27087] | 236 | result.setAttribute(GSXML.FROM_ATT, RSS_SERVICE);
|
---|
| 237 | result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
|
---|
| 238 | result.appendChild(rssNode); // body of <response> is simply the <rss> root element of the RSS feed
|
---|
| 239 | return result;
|
---|
| 240 |
|
---|
| 241 | }
|
---|
| 242 |
|
---|
| 243 | // load contents of rss-items.rdf file into an XML doc in memory, after doing the url_prefix replacements
|
---|
| 244 | protected Element loadDocument(String doc_name, String url_prefix) {
|
---|
| 245 | String document_encoding = "UTF-8";
|
---|
| 246 |
|
---|
| 247 | // try to find the document
|
---|
| 248 | File doc_file = new File(GSFile.collectionIndexDir(this.site_home, this.cluster_name)+File.separator+doc_name);
|
---|
| 249 |
|
---|
| 250 | if (!doc_file.exists()) {
|
---|
| 251 | logger.info("couldn't find file in coll "+this.cluster_name +", file "+doc_name);
|
---|
| 252 | return null;
|
---|
| 253 | }
|
---|
| 254 |
|
---|
| 255 | // the rss-items.rdf file has no root element, only multiple <item> elements (and subelements)
|
---|
| 256 | // Without a root element, it can't be read into a DOM object. So we read it into a regular String,
|
---|
| 257 | // then bookend the contents with temporary <rssrawdata></rssrawdata> elements to provide a root
|
---|
| 258 | // element and can read in that String as a DOM object.
|
---|
| 259 |
|
---|
| 260 | StringBuffer contents = new StringBuffer("<rssrawdata>\n");
|
---|
| 261 | try {
|
---|
| 262 | BufferedReader in = new BufferedReader(new FileReader(doc_file));
|
---|
| 263 | String line = null;
|
---|
| 264 | while((line = in.readLine()) != null) {
|
---|
| 265 | //line = line.replace("_httpcollection_", "/"+this.cluster_name);
|
---|
| 266 | line = line.replace("_httpdomain__httpcollection_", url_prefix);
|
---|
| 267 | contents.append(line);
|
---|
| 268 | }
|
---|
| 269 | contents.append("</rssrawdata>");
|
---|
| 270 | in.close(); // close the fileread handle
|
---|
| 271 |
|
---|
| 272 | } catch (Exception e) {
|
---|
| 273 | e.printStackTrace();
|
---|
| 274 | contents.append("couldn't read ");
|
---|
| 275 | contents.append(doc_file);
|
---|
| 276 | contents.append("\n</rssrawdata>");
|
---|
| 277 | }
|
---|
| 278 |
|
---|
| 279 |
|
---|
| 280 | Document the_doc = null;
|
---|
| 281 | try {
|
---|
| 282 | the_doc = this.converter.getDOM(contents.toString()); // String input converted to DOM
|
---|
| 283 | } catch (Exception e) {
|
---|
| 284 | logger.error("couldn't create a DOM from file "+doc_file.getPath());
|
---|
| 285 | return null;
|
---|
| 286 | }
|
---|
| 287 |
|
---|
| 288 | return the_doc.getDocumentElement();
|
---|
| 289 |
|
---|
| 290 | }
|
---|
| 291 |
|
---|
| 292 |
|
---|
| 293 | }
|
---|
| 294 |
|
---|