source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/RSSRetrieve.java@ 32453

Last change on this file since 32453 was 29979, checked in by Georgiy Litvinov, 9 years ago

RSS Feed Links url use baseUrl from request. Fixed broken links when we use AJP proxy.

File size: 12.0 KB
Line 
1package org.greenstone.gsdl3.service;
2
3
4// Greenstone classes
5import org.greenstone.gsdl3.util.*;
6import org.greenstone.gsdl3.collection.Collection;
7import org.greenstone.util.GlobalProperties;
8
9// XML classes
10import org.w3c.dom.Document;
11import org.w3c.dom.Element;
12import org.w3c.dom.Node;
13import org.w3c.dom.Attr;
14import org.w3c.dom.Text;
15import org.w3c.dom.NodeList;
16import org.w3c.dom.NamedNodeMap;
17import org.w3c.dom.ProcessingInstruction;
18
19// General Java classes
20import java.io.BufferedReader;
21import java.io.File;
22import java.io.FileReader;
23import java.io.Serializable;
24import java.util.Vector;
25import java.util.HashMap;
26import java.util.Date;
27import java.text.SimpleDateFormat;
28
29import org.apache.log4j.*;
30
31public class RSSRetrieve extends ServiceRack {
32
33 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.RSSRetrieve.class.getName());
34 protected static final String RSS_SERVICE = "RSSFeed";
35
36 public boolean configure(Element info, Element extra_info) {
37 if (!super.configure(info, extra_info)){
38 return false;
39 }
40 logger.info("configuring RSSRetrieve...");
41
42 // set up short_service_info_ - for now just has name and type
43 Element rss_service = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
44 rss_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
45 rss_service.setAttribute(GSXML.NAME_ATT, RSS_SERVICE);
46 this.short_service_info.appendChild(rss_service);
47
48 return true;
49 }
50
51 // this may get called but is not useful in the case of retrieve services
52 protected Element getServiceDescription(Document doc, String service_id, String lang, String subset) {
53
54 Element rss_service = doc.createElement(GSXML.SERVICE_ELEM);
55 rss_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
56 rss_service.setAttribute(GSXML.NAME_ATT, service_id);
57 return rss_service;
58 }
59
60 // Sends off a collection 'describe' message and returns the <collection> element of the response.
61 // This contains the collection meta from collectionConfig.xml. Used to construct header of RSS feed
62 protected Element getCollMetadata(UserContext userContext) {
63
64 Document msg_doc = XMLConverter.newDOM();
65 Element mr_request_message = msg_doc.createElement(GSXML.MESSAGE_ELEM);
66 String to = this.cluster_name;
67 Element meta_request = GSXML.createBasicRequest(msg_doc, GSXML.REQUEST_TYPE_DESCRIBE, to, userContext);
68 mr_request_message.appendChild(meta_request);
69 Element meta_response = (Element) this.router.process(mr_request_message);
70 meta_response = (Element) GSXML.getChildByTagName(meta_response, GSXML.RESPONSE_ELEM);
71
72 NodeList nl = meta_response.getElementsByTagName(GSXML.COLLECTION_ELEM);
73 Element collectionEl = (Element) nl.item(0);
74 return collectionEl;
75 }
76
77
78 /**
79 Generates the RSS feed XML by creating the header and footer with the contents
80 of the the collection's index/rss-items.rdf file embedded in the middle.
81 @return the RSS feed XML.
82 @see http://cyber.law.harvard.edu/rss/rss.html
83 */
84 protected Element processRSSFeed(Element request) {
85
86 // Ask the MessageRouter for this collection's colConfig metadata
87 // from which the RSS header values will be constructed
88 UserContext userContext = new UserContext(request);
89 Element collMeta = getCollMetadata(userContext);
90
91 //logger.error("**** collection metadata:");
92 //GSXML.elementToLogAsString(collMeta, true);
93
94 // work out some commonly used variables such as lang and url_prefix
95 String lang = request.getAttribute("lang");
96 if(lang.equals("")) {
97 lang = "en";
98 }
99 //Get baseUrl from request
100 String baseURL = request.getAttribute("baseURL");
101
102 // url_prefix is of the form http://domain/greenstone3/library/collection/_colname_/
103 //String url_prefix = GlobalProperties.getFullGSDL3WebAddress()+"/"+this.library_name+"/collection/"+this.cluster_name;
104 String url_prefix = baseURL+"library"+"/"+this.library_name+"/collection/"+this.cluster_name;
105
106 // generate the header and footer
107 Document rssDoc = XMLConverter.newDOM();
108
109 Element rssNode = rssDoc.createElement("rss"); // rootnode
110 rssNode.setAttribute("version", "2.0");
111
112 String namespace_url = "http://www.w3.org/2000/xmlns/";
113 rssNode.setAttributeNS(namespace_url, "xmlns:content", "http://purl.org/rss/1.0/modules/content/");
114 rssNode.setAttributeNS(namespace_url, "xmlns:taxo", "http://purl.org/rss/1.0/modules/taxonomy/");
115 rssNode.setAttributeNS(namespace_url, "xmlns:dc", "http://purl.org/dc/elements/1.1/");
116 rssNode.setAttributeNS(namespace_url, "xmlns:syn", "http://purl.org/rss/1.0/modules/syndication/");
117 rssNode.setAttributeNS(namespace_url, "xmlns:admin", "http://webns.net/mvcb/");
118 rssDoc.appendChild(rssNode);
119
120 // Setting the preproccessing header line (Utf-8) will be done in web/interfaces' rss.xsl
121 //ProcessingInstruction procInstruction = doc.createProcessingInstruction("xml","version=\"1.0\"");
122 //rssDoc.appendChild(procInstruction);
123
124 Element channelNode = rssDoc.createElement("channel");
125 rssNode.appendChild(channelNode);
126
127 Element childNode = rssDoc.createElement("title");
128 GSXML.setNodeText(childNode, this.cluster_name); //_collectionname_
129 channelNode.appendChild(childNode);
130
131 // _httppageabout_: of form http://domain/greenstone3/library/collection/_colname_/page/about
132 childNode = rssDoc.createElement("link");
133 GSXML.setNodeText(childNode, url_prefix+"/page/about"); // _httppageabout_
134 channelNode.appendChild(childNode);
135
136 // get the description string for the requested language, else fallback on en description if present
137 childNode = rssDoc.createElement("description");
138 NodeList descriptions = GSXML.getNamedElements(collMeta, GSXML.DISPLAY_TEXT_ELEM, GSXML.NAME_ATT, GSXML.DISPLAY_TEXT_DESCRIPTION);
139 //Element descriptEl = GSXML.getNamedElement(collMeta, GSXML.DISPLAY_TEXT_ELEM, GSXML.NAME_ATT, GSXML.DISPLAY_TEXT_DESCRIPTION);
140 Element descriptEl = null;
141 if(descriptions != null) {
142 for (int i = 0; i < descriptions.getLength(); i++) {
143 Element e = (Element) descriptions.item(i);
144 if(e.getAttribute("lang").equals(lang)) {
145 descriptEl = e;
146 break; // found the description for the requested language, finish loop
147 } else if(e.getAttribute("lang").equals("en")) {
148 descriptEl = e; // at least found english fall-back description, continue loop
149 }
150 }
151 }
152 String description = (descriptEl == null) ? "none" : GSXML.getNodeText(descriptEl);
153 GSXML.setNodeText(childNode, description); //_collectionextra_
154 channelNode.appendChild(childNode);
155
156 childNode = rssDoc.createElement("language");
157 GSXML.setNodeText(childNode, lang); //_cgiargl_
158 channelNode.appendChild(childNode);
159
160
161 // RSS specification: http://cyber.law.harvard.edu/rss/rss.html
162 // pubDate is date of first publication of the item. Use collection.getEarliestDatestamp()
163 // lastBuildDate is the date the item was last modified. Use collection.getLastmodified()
164
165 //HashMap<String, ModuleInterface> module_map = this.router.getModuleMap();
166 //Collection coll = (Collection)module_map.get(this.cluster_name);
167 Collection coll = (Collection)serviceCluster;
168 SimpleDateFormat dateFormat = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z");
169
170 childNode = rssDoc.createElement("pubDate");
171 Date date = new Date(coll.getEarliestDatestamp()); // "Thu, 23 Aug 1999 07:00:00 GMT"
172 GSXML.setNodeText(childNode, dateFormat.format(date));
173 channelNode.appendChild(childNode);
174
175 childNode = rssDoc.createElement("lastBuildDate");
176 date = new Date(coll.getLastmodified()); // "Thu, 23 Aug 1999 16:20:26 GMT"
177 GSXML.setNodeText(childNode, dateFormat.format(date));
178 channelNode.appendChild(childNode);
179
180 childNode = rssDoc.createElement("managingEditor");
181 Element e = GSXML.getNamedElement(collMeta, GSXML.DISPLAY_TEXT_ELEM, GSXML.NAME_ATT, "creator");
182 String value = (e == null) ? "unknown" : GSXML.getNodeText(e);
183 GSXML.setNodeText(childNode, value); //_creator_
184 channelNode.appendChild(childNode);
185
186 childNode = rssDoc.createElement("webMaster");
187 e = GSXML.getNamedElement(collMeta, GSXML.DISPLAY_TEXT_ELEM, GSXML.NAME_ATT, "maintainer");
188 value = (e == null) ? "unknown" : GSXML.getNodeText(e);
189 GSXML.setNodeText(childNode, value); //_maintainer_
190 channelNode.appendChild(childNode);
191
192
193 // <image> child of <channel> has title, url, link and description children
194 Element collIcon = GSXML.getNamedElement(collMeta, GSXML.DISPLAY_TEXT_ELEM, GSXML.NAME_ATT, "icon");
195 if(collIcon != null) { // since there is a collection image, create an imageNode
196
197 Node imageNode = rssDoc.createElement("image");
198 channelNode.appendChild(imageNode);
199
200 childNode = rssDoc.createElement("title");
201 GSXML.setNodeText(childNode, this.cluster_name); //_collectionname_
202 imageNode.appendChild(childNode);
203
204 // need full URL for collection icon. Its name is in <displayItem name="icon_name.ext"/>
205 // URL is of the form domain/servlet/sites/localsite/collect/lucene-jdbm-demo/images/icon_name.ext
206 childNode = rssDoc.createElement("url");
207 String domain = GlobalProperties.getFullGSDL3WebAddress(); // remove servlet as it's included in site_http_address
208 domain = domain.substring(0, domain.lastIndexOf("/"));
209 String image_url = GSXML.getNodeText(collIcon); // name of image file
210 image_url = domain+"/"+this.site_http_address+"/"+"/collect/"+this.cluster_name+"/images/"+image_url;
211 GSXML.setNodeText(childNode, image_url); // _iconcollection_
212 imageNode.appendChild(childNode);
213
214 childNode = rssDoc.createElement("link");
215 GSXML.setNodeText(childNode, url_prefix+"/page/about"); // _httppageabout_
216 imageNode.appendChild(childNode);
217
218 childNode = rssDoc.createElement("description");
219 GSXML.setNodeText(childNode, description); //_collectionextra_
220 imageNode.appendChild(childNode);
221 }
222
223
224 // now add the contents of rss-items.rdf as a child of channel,
225 // passing in url_prefix for url resolution
226 Element rss_raw_data = loadDocument("rss-items.rdf", url_prefix);
227 if(rss_raw_data != null) {
228 NodeList rss_items = rss_raw_data.getElementsByTagName("item");
229 for(int i = 0; i < rss_items.getLength(); i++) {
230 channelNode.appendChild(rssDoc.importNode(rss_items.item(i), true));
231 }
232 }
233
234 // generate the GS3 response message containing the RSS xml
235 Element result = rssDoc.createElement(GSXML.RESPONSE_ELEM);
236 result.setAttribute(GSXML.FROM_ATT, RSS_SERVICE);
237 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
238 result.appendChild(rssNode); // body of <response> is simply the <rss> root element of the RSS feed
239 return result;
240
241 }
242
243 // load contents of rss-items.rdf file into an XML doc in memory, after doing the url_prefix replacements
244 protected Element loadDocument(String doc_name, String url_prefix) {
245 String document_encoding = "UTF-8";
246
247 // try to find the document
248 File doc_file = new File(GSFile.collectionIndexDir(this.site_home, this.cluster_name)+File.separator+doc_name);
249
250 if (!doc_file.exists()) {
251 logger.info("couldn't find file in coll "+this.cluster_name +", file "+doc_name);
252 return null;
253 }
254
255 // the rss-items.rdf file has no root element, only multiple <item> elements (and subelements)
256 // Without a root element, it can't be read into a DOM object. So we read it into a regular String,
257 // then bookend the contents with temporary <rssrawdata></rssrawdata> elements to provide a root
258 // element and can read in that String as a DOM object.
259
260 StringBuffer contents = new StringBuffer("<rssrawdata>\n");
261 try {
262 BufferedReader in = new BufferedReader(new FileReader(doc_file));
263 String line = null;
264 while((line = in.readLine()) != null) {
265 //line = line.replace("_httpcollection_", "/"+this.cluster_name);
266 line = line.replace("_httpdomain__httpcollection_", url_prefix);
267 contents.append(line);
268 }
269 contents.append("</rssrawdata>");
270 in.close(); // close the fileread handle
271
272 } catch (Exception e) {
273 e.printStackTrace();
274 contents.append("couldn't read ");
275 contents.append(doc_file);
276 contents.append("\n</rssrawdata>");
277 }
278
279
280 Document the_doc = null;
281 try {
282 the_doc = this.converter.getDOM(contents.toString()); // String input converted to DOM
283 } catch (Exception e) {
284 logger.error("couldn't create a DOM from file "+doc_file.getPath());
285 return null;
286 }
287
288 return the_doc.getDocumentElement();
289
290 }
291
292
293}
294
Note: See TracBrowser for help on using the repository browser.