source: trunk/gsdl3/src/java/org/greenstone/gsdl3/service/IViaRetrieve.java@ 9289

Last change on this file since 9289 was 9289, checked in by kjdon, 19 years ago

removed some unnecessary stuff from configure - call the super class configure instead

  • Property svn:keywords set to Author Date Id Revision
File size: 7.8 KB
Line 
1package org.greenstone.gsdl3.service;
2
3// Greenstone classes
4import org.greenstone.gdbm.*;
5import org.greenstone.gsdl3.util.*;
6
7// XML classes
8import org.w3c.dom.Element;
9import org.w3c.dom.Document;
10import org.w3c.dom.NodeList;
11
12import java.util.HashMap;
13import java.util.ArrayList;
14import java.io.File;
15import java.io.InputStream;
16import java.io.BufferedReader;
17import java.io.InputStreamReader;
18import java.io.IOException;
19import java.net.HttpURLConnection;
20import java.net.URLConnection;
21import java.net.URL;
22import java.net.Authenticator;
23import java.net.MalformedURLException;
24
25/**
26 *
27 * @author <a href="mailto:[email protected]">Katherine Don</a>
28 * @version $Revision: 9289 $
29 * Modified by <a href="mailto:[email protected]">Chi-Yu Huang</a>
30 */
31
32public class IViaRetrieve
33 extends AbstractDocumentRetrieve {
34
35 protected String ivia_server_url = null;
36
37 public IViaRetrieve() {
38 does_structure = false;
39 }
40
41 //Configure IViaRetrieve Service
42 public boolean configure(Element info, Element extra_info)
43 {
44 if (!super.configure(info, extra_info)) {
45 return false;
46 }
47
48 Element server_elem = (Element)GSXML.getChildByTagName(info, "iViaServer");
49 if (server_elem == null) {
50 System.err.println("IViaRetrieve.configure error: no iViaServer element found");
51 return false;
52 }
53 ivia_server_url = server_elem.getAttribute("url");
54 if (ivia_server_url.equals("")) {
55 System.err.println("IViaRetrieve.configure error: no url for the iViaServer element");
56 return false;
57 }
58 return true;
59
60 }
61
62 /** gets a document by sending a request to iVia, then processes it and creates a documentNode around the text */
63 protected Element getNodeContent(String doc_id) {
64
65 String url_string = ivia_server_url+"/cgi-bin/view_record?theme=gsdl3&record_id="+doc_id;
66
67 StringBuffer buffer = new StringBuffer();
68 try {
69 BufferedReader reader = makeConnection(url_string);
70 String line;
71 while((line = reader.readLine())!= null) {
72 buffer.append(line);
73 }
74 } catch (Exception e) {
75 System.err.println("IViaRetrieve Error:exception happened");
76 e.printStackTrace();
77 }
78
79 String node_content = buffer.toString();
80
81 String escaped_content = GSXML.xmlSafe(node_content);
82
83 StringBuffer processed_content = new StringBuffer(escaped_content.length());
84 processed_content.append("<nodeContent>");
85 int pos = 0;
86 int lastpos = 0;
87 while ((pos = escaped_content.indexOf("&lt;a ", lastpos))!= -1) {
88 processed_content.append(escaped_content.substring(lastpos, pos));
89 int endpos = escaped_content.indexOf("&lt;/a&gt;", pos);
90 if (endpos == -1) {
91 break;
92 }
93 String link = escaped_content.substring(pos, endpos+10);
94 link = convertLink(link);
95 processed_content.append(link);
96 lastpos = endpos+10;
97 }
98 processed_content.append(escaped_content.substring(lastpos)); // get the last bit
99 processed_content.append("</nodeContent>");
100
101 Element content_element = this.converter.getDOM(processed_content.toString()).getDocumentElement();
102
103 return (Element)this.doc.importNode(content_element,true);
104 }
105
106 /** converts a url from an <a> element into a greenstone suitable one */
107 protected String convertLink(String aref) {
108 if (aref.indexOf("href=&quot;http") != -1) {
109 return aref; // an external link
110 }
111 String type = "other";
112 if (aref.indexOf("/cgi-bin/canned_search")!=-1) {
113 type="query";
114 } else if (aref.indexOf("/cgi-bin/click_through") != -1) {
115 type = "external";
116 } else if (aref.indexOf("/cgi-bin/view_record") != -1) {
117 type="document";
118 }
119
120 int href_start = aref.indexOf("href=&quot;")+11;
121 int href_end = aref.indexOf("&gt;", href_start);
122 String href = aref.substring(href_start, href_end);
123 String link_content = aref.substring(href_end+4, aref.length()-10);
124
125 if (type.equals("external")) {
126 // the external link is everything after the http at the end.
127 String address = href.substring(href.lastIndexOf("http"));
128 address = address.replaceAll("%3[aA]", ":");
129 address = address.replaceAll("%2[fF]", "/");
130
131 return "&lt;a href=\""+address+"\"&gt;"+link_content+"&lt;/a&gt;";
132 }
133 if (type.equals("other")) {
134 return "other type of link ("+link_content+")";
135 }
136 StringBuffer result = new StringBuffer();
137 result.append("<link type='");
138 result.append(type);
139 result.append("'");
140 if (type.equals("query")) {
141 result.append(" service='TextQuery'");
142 }
143 result.append(">");
144 // add in the parameters
145 href = href.substring(href.indexOf("?")+1);
146 String [] params = href.split("&amp;");
147 for (int i=0; i<params.length; i++) {
148 String param = params[i];
149 int eq_pos = param.indexOf("=");
150 if (eq_pos != -1) {
151
152 result.append("<param name='"+param.substring(0, eq_pos)+"' value='"+param.substring(eq_pos+1)+"'/>");
153 }
154 }
155 result.append(link_content);
156 result.append("</link>");
157
158 return result.toString();
159 }
160
161 // iVia craps out if we ask for a metadata which is not valid. So need
162 // to make sure we only ask for acceptable fields.
163 protected boolean isAcceptableMetadata(String meta) {
164 String valid_metadata = ",title,url,ivia_description,keywords,subjects,";
165 if (valid_metadata.indexOf(","+meta+",")!=-1) {
166 return true;
167 }
168 return false;
169 }
170
171 protected BufferedReader makeConnection(String url_string) {
172 BufferedReader reader = null;
173 try {
174 URL url = new URL(url_string);
175 HttpURLConnection connection = (HttpURLConnection)url.openConnection();
176 InputStream input = connection.getInputStream();
177 reader = new BufferedReader(new InputStreamReader(input));
178 } catch (java.net.MalformedURLException e) {
179 System.err.println("IViaRetrieve: Malformed URL: "+url_string);
180 } catch (java.io.IOException e) {
181 System.err.println("IViaRetrieve Error: An error occurred during IO to url "+url_string);
182 }
183 return reader;
184 }
185
186 protected String translateId(String oid){
187 int p = oid.lastIndexOf('.');
188 if (p != oid.length()-3) {
189 System.out.println("translateoid error: '.' is not the third to last char!!");
190 return oid;
191 }
192 String top = oid.substring(0, p);
193 return top;
194 }
195
196 protected String translateExternalId(String id){
197 return id;
198 }
199
200 protected String getDocType(String node_id){
201 return GSXML.DOC_TYPE_SIMPLE;
202 }
203 protected String getRootId(String node_id){
204 return node_id;
205 }
206
207 protected ArrayList getChildrenIds(String node_id){
208 return null;
209 }
210
211 protected String getParentId(String node_id){
212 return null;
213 }
214
215 protected Element getMetadataList (String doc_id,
216 boolean all_metadata,
217 ArrayList metadata_names){
218
219 Element meta_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
220
221 // do the query to the iVia server
222 StringBuffer field_list= new StringBuffer();
223 boolean metadata_found = false;
224
225 for (int i=0; i<metadata_names.size();i++){
226 if (isAcceptableMetadata((String)metadata_names.get(i))){
227 metadata_found = true;
228 field_list.append((String)metadata_names.get(i));
229 field_list.append(",");
230 }
231 }
232 if (!metadata_found){
233 return meta_list;
234 }
235
236 String url_string = ivia_server_url+"/cgi-bin/view_record_set?theme=gsdl3&record_id_list="+doc_id+"&field_list="+field_list.toString();
237 try {
238 BufferedReader reader = makeConnection(url_string);
239 String line;
240 while ((line = reader.readLine()) != null) {
241 //metadata entry
242 int col_pos = line.indexOf(':');
243 if (col_pos == -1) {
244 // end of the metadata for this doc
245 break;
246 }
247 String name = line.substring(0,col_pos);
248 String value = line.substring(col_pos+2); // includes a space
249 GSXML.addMetadata(this.doc, meta_list, name, value);
250 }
251 } catch (Exception e) {
252 System.err.println("IViaRetrieve Error:exception happened");
253 e.printStackTrace();
254 }
255 return meta_list;
256 }
257
258 protected String getStructureInfo(String doc_id, String info_type){
259 return "";
260 }
261}
Note: See TracBrowser for help on using the repository browser.