source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/IViaRetrieve.java@ 26215

Last change on this file since 26215 was 26215, checked in by ak19, 12 years ago

The lang parameter was introduced into AbstractDocumentRetrieve.getMetadataList(), even though the subclass IViaRetrieve's method doesn't appear to do anything with language.

  • Property svn:keywords set to Author Date Id Revision
File size: 7.8 KB
Line 
1package org.greenstone.gsdl3.service;
2
3// Greenstone classes
4import org.greenstone.gsdl3.core.GSException;
5import org.greenstone.util.Misc;
6import org.greenstone.gsdl3.util.*;
7
8// XML classes
9import org.w3c.dom.Element;
10import org.w3c.dom.Document;
11import org.w3c.dom.NodeList;
12
13import java.util.HashMap;
14import java.util.ArrayList;
15import java.io.File;
16import java.io.InputStream;
17import java.io.BufferedReader;
18import java.io.InputStreamReader;
19import java.io.IOException;
20import java.net.HttpURLConnection;
21import java.net.URLConnection;
22import java.net.URL;
23import java.net.Authenticator;
24import java.net.MalformedURLException;
25
26import org.apache.log4j.*;
27
28/**
29 *
30 * @author Katherine Don
31 * @version $Revision: 26215 $
32 * Modified by <a href="mailto:[email protected]">Chi-Yu Huang</a>
33 */
34
35public class IViaRetrieve
36 extends AbstractDocumentRetrieve {
37
38 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.IViaRetrieve.class.getName());
39
40
41 protected String ivia_server_url = null;
42
43 public IViaRetrieve() {
44 does_structure = false;
45 }
46
47 //Configure IViaRetrieve Service
48 public boolean configure(Element info, Element extra_info)
49 {
50 if (!super.configure(info, extra_info)) {
51 return false;
52 }
53
54 Element server_elem = (Element)GSXML.getChildByTagName(info, "iViaServer");
55 if (server_elem == null) {
56 logger.error("no iViaServer element found");
57 return false;
58 }
59 ivia_server_url = server_elem.getAttribute("url");
60 if (ivia_server_url.equals("")) {
61 logger.error("no url for the iViaServer element");
62 return false;
63 }
64 return true;
65
66 }
67
68 /** gets a document by sending a request to iVia, then processes it and creates a documentNode around the text */
69 protected Element getNodeContent(String doc_id, String lang)
70 throws GSException {
71
72 String url_string = ivia_server_url+"/cgi-bin/view_record?theme=gsdl3&record_id="+doc_id;
73
74 StringBuffer buffer = new StringBuffer();
75 try {
76 BufferedReader reader = Misc.makeHttpConnection(url_string);
77 String line;
78 while((line = reader.readLine())!= null) {
79 buffer.append(line);
80 }
81 } catch (java.net.MalformedURLException e) {
82 throw new GSException("Malformed URL: "+url_string, GSXML.ERROR_TYPE_SYSTEM);
83 } catch (java.io.IOException e) {
84 throw new GSException("IOException during connection to "+url_string+": "+e.toString(),GSXML.ERROR_TYPE_SYSTEM);
85 }
86
87 String node_content = buffer.toString();
88
89 String escaped_content = GSXML.xmlSafe(node_content);
90
91 StringBuffer processed_content = new StringBuffer(escaped_content.length());
92 processed_content.append("<nodeContent>");
93 int pos = 0;
94 int lastpos = 0;
95 while ((pos = escaped_content.indexOf("&lt;a ", lastpos))!= -1) {
96 processed_content.append(escaped_content.substring(lastpos, pos));
97 int endpos = escaped_content.indexOf("&lt;/a&gt;", pos);
98 if (endpos == -1) {
99 break;
100 }
101 String link = escaped_content.substring(pos, endpos+10);
102 link = convertLink(link);
103 processed_content.append(link);
104 lastpos = endpos+10;
105 }
106 processed_content.append(escaped_content.substring(lastpos)); // get the last bit
107 processed_content.append("</nodeContent>");
108
109 Document content_doc = this.converter.getDOM(processed_content.toString());
110 if (content_doc == null) {
111 logger.error("Couldn't parse node content");
112 logger.error(processed_content.toString());
113 return null;
114 }
115
116 Element content_element = content_doc.getDocumentElement();
117
118 return (Element)this.doc.importNode(content_element,true);
119 }
120
121 /** converts a url from an <a> element into a greenstone suitable one */
122 protected String convertLink(String aref) {
123 if (aref.indexOf("href=&quot;http") != -1) {
124 return aref; // an external link
125 }
126 String type = "other";
127 if (aref.indexOf("/cgi-bin/canned_search")!=-1) {
128 type="query";
129 } else if (aref.indexOf("/cgi-bin/click_through") != -1) {
130 type = "external";
131 } else if (aref.indexOf("/cgi-bin/view_record") != -1) {
132 type="document";
133 }
134
135 int href_start = aref.indexOf("href=&quot;")+11;
136 int href_end = aref.indexOf("&gt;", href_start);
137 String href = aref.substring(href_start, href_end);
138 String link_content = aref.substring(href_end+4, aref.length()-10);
139
140 if (type.equals("external")) {
141 // the external link is everything after the http at the end.
142 String address = href.substring(href.lastIndexOf("http"));
143 address = address.replaceAll("%3[aA]", ":");
144 address = address.replaceAll("%2[fF]", "/");
145
146 return "&lt;a href=\""+address+"\"&gt;"+link_content+"&lt;/a&gt;";
147 }
148 if (type.equals("other")) {
149 return "other type of link ("+link_content+")";
150 }
151 StringBuffer result = new StringBuffer();
152 result.append("<link type='");
153 result.append(type);
154 result.append("'");
155 if (type.equals("query")) {
156 result.append(" service='TextQuery'");
157 }
158 result.append(">");
159 // add in the parameters
160 href = href.substring(href.indexOf("?")+1);
161 String [] params = href.split("&amp;");
162 for (int i=0; i<params.length; i++) {
163 String param = params[i];
164 int eq_pos = param.indexOf("=");
165 if (eq_pos != -1) {
166
167 result.append("<param name='"+param.substring(0, eq_pos)+"' value='"+param.substring(eq_pos+1)+"'/>");
168 }
169 }
170 result.append(link_content);
171 result.append("</link>");
172
173 return result.toString();
174 }
175
176 // iVia craps out if we ask for a metadata which is not valid. So need
177 // to make sure we only ask for acceptable fields.
178 protected boolean isAcceptableMetadata(String meta) {
179 String valid_metadata = ",title,url,ivia_description,keywords,subjects,";
180 if (valid_metadata.indexOf(","+meta+",")!=-1) {
181 return true;
182 }
183 return false;
184 }
185
186 protected String translateId(String oid){
187 int p = oid.lastIndexOf('.');
188 if (p != oid.length()-3) {
189 logger.info("translateoid error: '.' is not the third to last char!!");
190 return oid;
191 }
192 String top = oid.substring(0, p);
193 return top;
194 }
195
196 protected String translateExternalId(String id){
197 return id;
198 }
199
200 protected String getDocType(String node_id){
201 return GSXML.DOC_TYPE_SIMPLE;
202 }
203 protected String getRootId(String node_id){
204 return node_id;
205 }
206
207 protected ArrayList<String> getChildrenIds(String node_id){
208 return null;
209 }
210
211 protected String getParentId(String node_id){
212 return null;
213 }
214
215 protected Element getMetadataList (String doc_id,
216 boolean all_metadata,
217 ArrayList<String> metadata_names,
218 String lang)
219 throws GSException {
220
221 Element meta_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
222
223 // do the query to the iVia server
224 StringBuffer field_list= new StringBuffer();
225 boolean metadata_found = false;
226
227 for (int i=0; i<metadata_names.size();i++){
228 if (isAcceptableMetadata(metadata_names.get(i))){
229 metadata_found = true;
230 field_list.append(metadata_names.get(i));
231 field_list.append(",");
232 }
233 }
234 if (!metadata_found){
235 return meta_list;
236 }
237
238 String url_string = ivia_server_url+"/cgi-bin/view_record_set?theme=gsdl3&record_id_list="+doc_id+"&field_list="+field_list.toString();
239 try {
240 BufferedReader reader = Misc.makeHttpConnection(url_string);
241 String line;
242 while ((line = reader.readLine()) != null) {
243 //metadata entry
244 int col_pos = line.indexOf(':');
245 if (col_pos == -1) {
246 // end of the metadata for this doc
247 break;
248 }
249 String name = line.substring(0,col_pos);
250 String value = line.substring(col_pos+2); // includes a space
251 GSXML.addMetadata(this.doc, meta_list, name, value);
252 }
253 } catch (java.net.MalformedURLException e) {
254 throw new GSException("Malformed URL: "+url_string, GSXML.ERROR_TYPE_SYSTEM);
255 } catch (java.io.IOException e) {
256 throw new GSException("IOException: "+e.toString(), GSXML.ERROR_TYPE_SYSTEM);
257 }
258 return meta_list;
259 }
260
261 protected String getStructureInfo(String doc_id, String info_type){
262 return "";
263 }
264}
Note: See TracBrowser for help on using the repository browser.