source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/RSSRetrieve.java@ 28966

Last change on this file since 28966 was 28966, checked in by kjdon, 10 years ago

Lots of changes. Mainly to do with removing this.doc from everywhere. Document is not thread safe. Now we tend to create a new Document everytime we are starting a new page/message etc. in service this.desc_doc is available as teh document to create service info stuff. But it should only be used for this and not for other messages. newDOM is now static for XMLConverter. method param changes for some GSXML methods.

File size: 11.9 KB
Line 
1package org.greenstone.gsdl3.service;
2
3
4// Greenstone classes
5import org.greenstone.gsdl3.util.*;
6import org.greenstone.gsdl3.collection.Collection;
7import org.greenstone.util.GlobalProperties;
8
9// XML classes
10import org.w3c.dom.Document;
11import org.w3c.dom.Element;
12import org.w3c.dom.Node;
13import org.w3c.dom.Attr;
14import org.w3c.dom.Text;
15import org.w3c.dom.NodeList;
16import org.w3c.dom.NamedNodeMap;
17import org.w3c.dom.ProcessingInstruction;
18
19// General Java classes
20import java.io.BufferedReader;
21import java.io.File;
22import java.io.FileReader;
23import java.io.Serializable;
24import java.util.Vector;
25import java.util.HashMap;
26import java.util.Date;
27import java.text.SimpleDateFormat;
28import org.apache.log4j.*;
29
30public class RSSRetrieve extends ServiceRack {
31
32 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.RSSRetrieve.class.getName());
33 protected static final String RSS_SERVICE = "RSSFeed";
34
35 public boolean configure(Element info, Element extra_info) {
36 if (!super.configure(info, extra_info)){
37 return false;
38 }
39 logger.info("configuring RSSRetrieve...");
40
41 // set up short_service_info_ - for now just has name and type
42 Element rss_service = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
43 rss_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
44 rss_service.setAttribute(GSXML.NAME_ATT, RSS_SERVICE);
45 this.short_service_info.appendChild(rss_service);
46
47 return true;
48 }
49
50 // this may get called but is not useful in the case of retrieve services
51 protected Element getServiceDescription(Document doc, String service_id, String lang, String subset) {
52
53 Element rss_service = doc.createElement(GSXML.SERVICE_ELEM);
54 rss_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
55 rss_service.setAttribute(GSXML.NAME_ATT, service_id);
56 return rss_service;
57 }
58
59 // Sends off a collection 'describe' message and returns the <collection> element of the response.
60 // This contains the collection meta from collectionConfig.xml. Used to construct header of RSS feed
61 protected Element getCollMetadata(UserContext userContext) {
62
63 Document msg_doc = XMLConverter.newDOM();
64 Element mr_request_message = msg_doc.createElement(GSXML.MESSAGE_ELEM);
65 String to = this.cluster_name;
66 Element meta_request = GSXML.createBasicRequest(msg_doc, GSXML.REQUEST_TYPE_DESCRIBE, to, userContext);
67 mr_request_message.appendChild(meta_request);
68 Element meta_response = (Element) this.router.process(mr_request_message);
69 meta_response = (Element) GSXML.getChildByTagName(meta_response, GSXML.RESPONSE_ELEM);
70
71 NodeList nl = meta_response.getElementsByTagName(GSXML.COLLECTION_ELEM);
72 Element collectionEl = (Element) nl.item(0);
73 return collectionEl;
74 }
75
76
77 /**
78 Generates the RSS feed XML by creating the header and footer with the contents
79 of the the collection's index/rss-items.rdf file embedded in the middle.
80 @return the RSS feed XML.
81 @see http://cyber.law.harvard.edu/rss/rss.html
82 */
83 protected Element processRSSFeed(Element request) {
84
85 // Ask the MessageRouter for this collection's colConfig metadata
86 // from which the RSS header values will be constructed
87 UserContext userContext = new UserContext(request);
88 Element collMeta = getCollMetadata(userContext);
89
90 //logger.error("**** collection metadata:");
91 //GSXML.elementToLogAsString(collMeta, true);
92
93 // work out some commonly used variables such as lang and url_prefix
94 String lang = request.getAttribute("lang");
95 if(lang.equals("")) {
96 lang = "en";
97 }
98
99 // url_prefix is of the form http://domain/greenstone3/library/collection/_colname_/
100 String url_prefix = GlobalProperties.getFullGSDL3WebAddress()+"/"+this.library_name+"/collection/"+this.cluster_name;
101
102
103 // generate the header and footer
104 Document rssDoc = XMLConverter.newDOM();
105
106 Element rssNode = rssDoc.createElement("rss"); // rootnode
107 rssNode.setAttribute("version", "2.0");
108
109 String namespace_url = "http://www.w3.org/2000/xmlns/";
110 rssNode.setAttributeNS(namespace_url, "xmlns:content", "http://purl.org/rss/1.0/modules/content/");
111 rssNode.setAttributeNS(namespace_url, "xmlns:taxo", "http://purl.org/rss/1.0/modules/taxonomy/");
112 rssNode.setAttributeNS(namespace_url, "xmlns:dc", "http://purl.org/dc/elements/1.1/");
113 rssNode.setAttributeNS(namespace_url, "xmlns:syn", "http://purl.org/rss/1.0/modules/syndication/");
114 rssNode.setAttributeNS(namespace_url, "xmlns:admin", "http://webns.net/mvcb/");
115 rssDoc.appendChild(rssNode);
116
117 // Setting the preproccessing header line (Utf-8) will be done in web/interfaces' rss.xsl
118 //ProcessingInstruction procInstruction = doc.createProcessingInstruction("xml","version=\"1.0\"");
119 //rssDoc.appendChild(procInstruction);
120
121 Element channelNode = rssDoc.createElement("channel");
122 rssNode.appendChild(channelNode);
123
124 Element childNode = rssDoc.createElement("title");
125 GSXML.setNodeText(childNode, this.cluster_name); //_collectionname_
126 channelNode.appendChild(childNode);
127
128 // _httppageabout_: of form http://domain/greenstone3/library/collection/_colname_/page/about
129 childNode = rssDoc.createElement("link");
130 GSXML.setNodeText(childNode, url_prefix+"/page/about"); // _httppageabout_
131 channelNode.appendChild(childNode);
132
133 // get the description string for the requested language, else fallback on en description if present
134 childNode = rssDoc.createElement("description");
135 NodeList descriptions = GSXML.getNamedElements(collMeta, GSXML.DISPLAY_TEXT_ELEM, GSXML.NAME_ATT, GSXML.DISPLAY_TEXT_DESCRIPTION);
136 //Element descriptEl = GSXML.getNamedElement(collMeta, GSXML.DISPLAY_TEXT_ELEM, GSXML.NAME_ATT, GSXML.DISPLAY_TEXT_DESCRIPTION);
137 Element descriptEl = null;
138 if(descriptions != null) {
139 for (int i = 0; i < descriptions.getLength(); i++) {
140 Element e = (Element) descriptions.item(i);
141 if(e.getAttribute("lang").equals(lang)) {
142 descriptEl = e;
143 break; // found the description for the requested language, finish loop
144 } else if(e.getAttribute("lang").equals("en")) {
145 descriptEl = e; // at least found english fall-back description, continue loop
146 }
147 }
148 }
149 String description = (descriptEl == null) ? "none" : GSXML.getNodeText(descriptEl);
150 GSXML.setNodeText(childNode, description); //_collectionextra_
151 channelNode.appendChild(childNode);
152
153 childNode = rssDoc.createElement("language");
154 GSXML.setNodeText(childNode, lang); //_cgiargl_
155 channelNode.appendChild(childNode);
156
157
158 // RSS specification: http://cyber.law.harvard.edu/rss/rss.html
159 // pubDate is date of first publication of the item. Use collection.getEarliestDatestamp()
160 // lastBuildDate is the date the item was last modified. Use collection.getLastmodified()
161
162 //HashMap<String, ModuleInterface> module_map = this.router.getModuleMap();
163 //Collection coll = (Collection)module_map.get(this.cluster_name);
164 Collection coll = (Collection)serviceCluster;
165 SimpleDateFormat dateFormat = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z");
166
167 childNode = rssDoc.createElement("pubDate");
168 Date date = new Date(coll.getEarliestDatestamp()); // "Thu, 23 Aug 1999 07:00:00 GMT"
169 GSXML.setNodeText(childNode, dateFormat.format(date));
170 channelNode.appendChild(childNode);
171
172 childNode = rssDoc.createElement("lastBuildDate");
173 date = new Date(coll.getLastmodified()); // "Thu, 23 Aug 1999 16:20:26 GMT"
174 GSXML.setNodeText(childNode, dateFormat.format(date));
175 channelNode.appendChild(childNode);
176
177 childNode = rssDoc.createElement("managingEditor");
178 Element e = GSXML.getNamedElement(collMeta, GSXML.DISPLAY_TEXT_ELEM, GSXML.NAME_ATT, "creator");
179 String value = (e == null) ? "unknown" : GSXML.getNodeText(e);
180 GSXML.setNodeText(childNode, value); //_creator_
181 channelNode.appendChild(childNode);
182
183 childNode = rssDoc.createElement("webMaster");
184 e = GSXML.getNamedElement(collMeta, GSXML.DISPLAY_TEXT_ELEM, GSXML.NAME_ATT, "maintainer");
185 value = (e == null) ? "unknown" : GSXML.getNodeText(e);
186 GSXML.setNodeText(childNode, value); //_maintainer_
187 channelNode.appendChild(childNode);
188
189
190 // <image> child of <channel> has title, url, link and description children
191 Element collIcon = GSXML.getNamedElement(collMeta, GSXML.DISPLAY_TEXT_ELEM, GSXML.NAME_ATT, "icon");
192 if(collIcon != null) { // since there is a collection image, create an imageNode
193
194 Node imageNode = rssDoc.createElement("image");
195 channelNode.appendChild(imageNode);
196
197 childNode = rssDoc.createElement("title");
198 GSXML.setNodeText(childNode, this.cluster_name); //_collectionname_
199 imageNode.appendChild(childNode);
200
201 // need full URL for collection icon. Its name is in <displayItem name="icon_name.ext"/>
202 // URL is of the form domain/servlet/sites/localsite/collect/lucene-jdbm-demo/images/icon_name.ext
203 childNode = rssDoc.createElement("url");
204 String domain = GlobalProperties.getFullGSDL3WebAddress(); // remove servlet as it's included in site_http_address
205 domain = domain.substring(0, domain.lastIndexOf("/"));
206 String image_url = GSXML.getNodeText(collIcon); // name of image file
207 image_url = domain+"/"+this.site_http_address+"/"+"/collect/"+this.cluster_name+"/images/"+image_url;
208 GSXML.setNodeText(childNode, image_url); // _iconcollection_
209 imageNode.appendChild(childNode);
210
211 childNode = rssDoc.createElement("link");
212 GSXML.setNodeText(childNode, url_prefix+"/page/about"); // _httppageabout_
213 imageNode.appendChild(childNode);
214
215 childNode = rssDoc.createElement("description");
216 GSXML.setNodeText(childNode, description); //_collectionextra_
217 imageNode.appendChild(childNode);
218 }
219
220
221 // now add the contents of rss-items.rdf as a child of channel,
222 // passing in url_prefix for url resolution
223 Element rss_raw_data = loadDocument("rss-items.rdf", url_prefix);
224 if(rss_raw_data != null) {
225 NodeList rss_items = rss_raw_data.getElementsByTagName("item");
226 for(int i = 0; i < rss_items.getLength(); i++) {
227 channelNode.appendChild(rssDoc.importNode(rss_items.item(i), true));
228 }
229 }
230
231 // generate the GS3 response message containing the RSS xml
232 Element result = rssDoc.createElement(GSXML.RESPONSE_ELEM);
233 result.setAttribute(GSXML.FROM_ATT, RSS_SERVICE);
234 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
235 result.appendChild(rssNode); // body of <response> is simply the <rss> root element of the RSS feed
236 return result;
237
238 }
239
240 // load contents of rss-items.rdf file into an XML doc in memory, after doing the url_prefix replacements
241 protected Element loadDocument(String doc_name, String url_prefix) {
242 String document_encoding = "UTF-8";
243
244 // try to find the document
245 File doc_file = new File(GSFile.collectionIndexDir(this.site_home, this.cluster_name)+File.separator+doc_name);
246
247 if (!doc_file.exists()) {
248 logger.info("couldn't find file in coll "+this.cluster_name +", file "+doc_name);
249 return null;
250 }
251
252 // the rss-items.rdf file has no root element, only multiple <item> elements (and subelements)
253 // Without a root element, it can't be read into a DOM object. So we read it into a regular String,
254 // then bookend the contents with temporary <rssrawdata></rssrawdata> elements to provide a root
255 // element and can read in that String as a DOM object.
256
257 StringBuffer contents = new StringBuffer("<rssrawdata>\n");
258 try {
259 BufferedReader in = new BufferedReader(new FileReader(doc_file));
260 String line = null;
261 while((line = in.readLine()) != null) {
262 //line = line.replace("_httpcollection_", "/"+this.cluster_name);
263 line = line.replace("_httpdomain__httpcollection_", url_prefix);
264 contents.append(line);
265 }
266 contents.append("</rssrawdata>");
267 in.close(); // close the fileread handle
268
269 } catch (Exception e) {
270 e.printStackTrace();
271 contents.append("couldn't read ");
272 contents.append(doc_file);
273 contents.append("\n</rssrawdata>");
274 }
275
276
277 Document the_doc = null;
278 try {
279 the_doc = this.converter.getDOM(contents.toString()); // String input converted to DOM
280 } catch (Exception e) {
281 logger.error("couldn't create a DOM from file "+doc_file.getPath());
282 return null;
283 }
284
285 return the_doc.getDocumentElement();
286
287 }
288
289
290}
291
Note: See TracBrowser for help on using the repository browser.