source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/IViaRetrieve.java@ 25635

Last change on this file since 25635 was 25635, checked in by sjm84, 12 years ago

Fixing Greenstone 3's use (or lack thereof) of generics, this was done automatically so we may want to change it over time. This change will also auto-format any files that have not already been formatted.

  • Property svn:keywords set to Author Date Id Revision
File size: 7.8 KB
Line 
1package org.greenstone.gsdl3.service;
2
3// Greenstone classes
4import org.greenstone.gsdl3.core.GSException;
5import org.greenstone.util.Misc;
6import org.greenstone.gsdl3.util.*;
7
8// XML classes
9import org.w3c.dom.Element;
10import org.w3c.dom.Document;
11import org.w3c.dom.NodeList;
12
13import java.util.HashMap;
14import java.util.ArrayList;
15import java.io.File;
16import java.io.InputStream;
17import java.io.BufferedReader;
18import java.io.InputStreamReader;
19import java.io.IOException;
20import java.net.HttpURLConnection;
21import java.net.URLConnection;
22import java.net.URL;
23import java.net.Authenticator;
24import java.net.MalformedURLException;
25
26import org.apache.log4j.*;
27
28/**
29 *
30 * @author <a href="mailto:[email protected]">Katherine Don</a>
31 * @version $Revision: 25635 $
32 * Modified by <a href="mailto:[email protected]">Chi-Yu Huang</a>
33 */
34
35public class IViaRetrieve
36 extends AbstractDocumentRetrieve {
37
38 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.IViaRetrieve.class.getName());
39
40
41 protected String ivia_server_url = null;
42
43 public IViaRetrieve() {
44 does_structure = false;
45 }
46
47 //Configure IViaRetrieve Service
48 public boolean configure(Element info, Element extra_info)
49 {
50 if (!super.configure(info, extra_info)) {
51 return false;
52 }
53
54 Element server_elem = (Element)GSXML.getChildByTagName(info, "iViaServer");
55 if (server_elem == null) {
56 logger.error("no iViaServer element found");
57 return false;
58 }
59 ivia_server_url = server_elem.getAttribute("url");
60 if (ivia_server_url.equals("")) {
61 logger.error("no url for the iViaServer element");
62 return false;
63 }
64 return true;
65
66 }
67
68 /** gets a document by sending a request to iVia, then processes it and creates a documentNode around the text */
69 protected Element getNodeContent(String doc_id, String lang)
70 throws GSException {
71
72 String url_string = ivia_server_url+"/cgi-bin/view_record?theme=gsdl3&record_id="+doc_id;
73
74 StringBuffer buffer = new StringBuffer();
75 try {
76 BufferedReader reader = Misc.makeHttpConnection(url_string);
77 String line;
78 while((line = reader.readLine())!= null) {
79 buffer.append(line);
80 }
81 } catch (java.net.MalformedURLException e) {
82 throw new GSException("Malformed URL: "+url_string, GSXML.ERROR_TYPE_SYSTEM);
83 } catch (java.io.IOException e) {
84 throw new GSException("IOException during connection to "+url_string+": "+e.toString(),GSXML.ERROR_TYPE_SYSTEM);
85 }
86
87 String node_content = buffer.toString();
88
89 String escaped_content = GSXML.xmlSafe(node_content);
90
91 StringBuffer processed_content = new StringBuffer(escaped_content.length());
92 processed_content.append("<nodeContent>");
93 int pos = 0;
94 int lastpos = 0;
95 while ((pos = escaped_content.indexOf("&lt;a ", lastpos))!= -1) {
96 processed_content.append(escaped_content.substring(lastpos, pos));
97 int endpos = escaped_content.indexOf("&lt;/a&gt;", pos);
98 if (endpos == -1) {
99 break;
100 }
101 String link = escaped_content.substring(pos, endpos+10);
102 link = convertLink(link);
103 processed_content.append(link);
104 lastpos = endpos+10;
105 }
106 processed_content.append(escaped_content.substring(lastpos)); // get the last bit
107 processed_content.append("</nodeContent>");
108
109 Document content_doc = this.converter.getDOM(processed_content.toString());
110 if (content_doc == null) {
111 logger.error("Couldn't parse node content");
112 logger.error(processed_content.toString());
113 return null;
114 }
115
116 Element content_element = content_doc.getDocumentElement();
117
118 return (Element)this.doc.importNode(content_element,true);
119 }
120
121 /** converts a url from an <a> element into a greenstone suitable one */
122 protected String convertLink(String aref) {
123 if (aref.indexOf("href=&quot;http") != -1) {
124 return aref; // an external link
125 }
126 String type = "other";
127 if (aref.indexOf("/cgi-bin/canned_search")!=-1) {
128 type="query";
129 } else if (aref.indexOf("/cgi-bin/click_through") != -1) {
130 type = "external";
131 } else if (aref.indexOf("/cgi-bin/view_record") != -1) {
132 type="document";
133 }
134
135 int href_start = aref.indexOf("href=&quot;")+11;
136 int href_end = aref.indexOf("&gt;", href_start);
137 String href = aref.substring(href_start, href_end);
138 String link_content = aref.substring(href_end+4, aref.length()-10);
139
140 if (type.equals("external")) {
141 // the external link is everything after the http at the end.
142 String address = href.substring(href.lastIndexOf("http"));
143 address = address.replaceAll("%3[aA]", ":");
144 address = address.replaceAll("%2[fF]", "/");
145
146 return "&lt;a href=\""+address+"\"&gt;"+link_content+"&lt;/a&gt;";
147 }
148 if (type.equals("other")) {
149 return "other type of link ("+link_content+")";
150 }
151 StringBuffer result = new StringBuffer();
152 result.append("<link type='");
153 result.append(type);
154 result.append("'");
155 if (type.equals("query")) {
156 result.append(" service='TextQuery'");
157 }
158 result.append(">");
159 // add in the parameters
160 href = href.substring(href.indexOf("?")+1);
161 String [] params = href.split("&amp;");
162 for (int i=0; i<params.length; i++) {
163 String param = params[i];
164 int eq_pos = param.indexOf("=");
165 if (eq_pos != -1) {
166
167 result.append("<param name='"+param.substring(0, eq_pos)+"' value='"+param.substring(eq_pos+1)+"'/>");
168 }
169 }
170 result.append(link_content);
171 result.append("</link>");
172
173 return result.toString();
174 }
175
176 // iVia craps out if we ask for a metadata which is not valid. So need
177 // to make sure we only ask for acceptable fields.
178 protected boolean isAcceptableMetadata(String meta) {
179 String valid_metadata = ",title,url,ivia_description,keywords,subjects,";
180 if (valid_metadata.indexOf(","+meta+",")!=-1) {
181 return true;
182 }
183 return false;
184 }
185
186 protected String translateId(String oid){
187 int p = oid.lastIndexOf('.');
188 if (p != oid.length()-3) {
189 logger.info("translateoid error: '.' is not the third to last char!!");
190 return oid;
191 }
192 String top = oid.substring(0, p);
193 return top;
194 }
195
196 protected String translateExternalId(String id){
197 return id;
198 }
199
200 protected String getDocType(String node_id){
201 return GSXML.DOC_TYPE_SIMPLE;
202 }
203 protected String getRootId(String node_id){
204 return node_id;
205 }
206
207 protected ArrayList<String> getChildrenIds(String node_id){
208 return null;
209 }
210
211 protected String getParentId(String node_id){
212 return null;
213 }
214
215 protected Element getMetadataList (String doc_id,
216 boolean all_metadata,
217 ArrayList<String> metadata_names)
218 throws GSException {
219
220 Element meta_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
221
222 // do the query to the iVia server
223 StringBuffer field_list= new StringBuffer();
224 boolean metadata_found = false;
225
226 for (int i=0; i<metadata_names.size();i++){
227 if (isAcceptableMetadata(metadata_names.get(i))){
228 metadata_found = true;
229 field_list.append(metadata_names.get(i));
230 field_list.append(",");
231 }
232 }
233 if (!metadata_found){
234 return meta_list;
235 }
236
237 String url_string = ivia_server_url+"/cgi-bin/view_record_set?theme=gsdl3&record_id_list="+doc_id+"&field_list="+field_list.toString();
238 try {
239 BufferedReader reader = Misc.makeHttpConnection(url_string);
240 String line;
241 while ((line = reader.readLine()) != null) {
242 //metadata entry
243 int col_pos = line.indexOf(':');
244 if (col_pos == -1) {
245 // end of the metadata for this doc
246 break;
247 }
248 String name = line.substring(0,col_pos);
249 String value = line.substring(col_pos+2); // includes a space
250 GSXML.addMetadata(this.doc, meta_list, name, value);
251 }
252 } catch (java.net.MalformedURLException e) {
253 throw new GSException("Malformed URL: "+url_string, GSXML.ERROR_TYPE_SYSTEM);
254 } catch (java.io.IOException e) {
255 throw new GSException("IOException: "+e.toString(), GSXML.ERROR_TYPE_SYSTEM);
256 }
257 return meta_list;
258 }
259
260 protected String getStructureInfo(String doc_id, String info_type){
261 return "";
262 }
263}
Note: See TracBrowser for help on using the repository browser.