source: trunk/gsdl3/src/java/org/greenstone/gsdl3/service/GATEServices.java@ 4098

Last change on this file since 4098 was 4098, checked in by kjdon, 21 years ago

createParameter for description has new name: createParameterDescription

  • Property svn:keywords set to Author Date Id Revision
File size: 10.2 KB
Line 
1/*
2 * GATEServices.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21
22// Greenstone classes
23import org.greenstone.gsdl3.util.*;
24
25// GATE classes
26import gate.*;
27import gate.creole.*;
28import gate.gui.*;
29import gate.util.persistence.PersistenceManager;
30
31// XML classes
32import org.w3c.dom.Element;
33import org.w3c.dom.Node;
34import org.w3c.dom.NodeList;
35
36// General Java classes
37import java.io.File;
38import java.net.URL;
39import java.util.Collection;
40import java.util.HashSet;
41import java.util.Iterator;
42import java.util.Set;
43
44
45/**
46 * <p>Title: GATE Greenstone3 integration</p>
47 * <p>Description: </p>
48 * <p>Copyright: Copyright (c) 2003</p>
49 * <p>Company: University of Waikato</p>
50 * @author unascribed
51 * @version 1.0
52 */
53
54public class GATEServices
55 extends ServiceRack {
56
57 // the services on offer
58 // these strings must match what is found in the properties file
59 protected static final String GATE_POS_TAG_SERVICE = "GatePOSTag";
60
61 protected static final String ANNOTATION_TYPE_PARAM = "annotationType";
62
63 protected Element config_info_ = null;
64
65 protected CorpusController application_;
66
67 protected Corpus corpus_;
68
69 protected String[] annotation_types_ = { "Date", "Location",
70 "Organization", "Person" };
71 // Address,
72
73 /** constructor */
74 public GATEServices()
75 {
76 }
77
78
79 /** configure this service */
80 public boolean configure(Element info, Element extra_info)
81 {
82 System.out.println("Configuring GATEServices...");
83 config_info_ = info;
84
85 // set up short_service_info_ - for now just has name and type
86 Element tq_service = doc_.createElement(GSXML.SERVICE_ELEM);
87 tq_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_ENRICH);
88 tq_service.setAttribute(GSXML.NAME_ATT, GATE_POS_TAG_SERVICE);
89 short_service_info_.appendChild(tq_service);
90
91 // set up service_info_map_ - for now, just has the same elements as above
92 // should have full details about each service incl params lists etc.
93 Element tq_service_full = (Element) tq_service.cloneNode(true);
94 Element param_list = doc_.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
95 createParameter(ANNOTATION_TYPE_PARAM, param_list, false, null);
96 tq_service_full.appendChild(param_list);
97 service_info_map_.put(GATE_POS_TAG_SERVICE, tq_service_full);
98
99 // add some format info to service map if there is any
100 // Element format = (Element) GSXML.getChildByTagName(info, GSXML.FORMAT_ELEM);
101 // if (format != null) {
102 // format_info_map_.put(GATE_POS_TAG_SERVICE, doc_.importNode(format, true));
103 // }
104
105 // Configure GATE for use
106 try {
107 Gate.init();
108 Gate.getUserConfig().put(GateConstants.DOCUMENT_ADD_SPACE_ON_UNPACK_FEATURE_NAME,
109 new Boolean(false));
110
111 // MainFrame mainFrame = new MainFrame();
112 // mainFrame.setSize(new java.awt.Dimension(800, 600));
113 // mainFrame.setVisible(true);
114
115 // Load the (pre-created) application
116 URL applicationFileURL = ClassLoader.getSystemResource("gate.app");
117 File applicationFile = new File(applicationFileURL.getFile());
118 application_ = (CorpusController) PersistenceManager.loadObjectFromFile(applicationFile);
119
120 /* Collection processing_resources = application.getPRs();
121 Iterator pr_iterator = processing_resources.iterator();
122 while (pr_iterator.hasNext()) {
123 ProcessingResource pr = (ProcessingResource) pr_iterator.next();
124 String pr_name = pr.getName();
125 System.out.println("PR name: " + pr_name);
126 if (pr_name.startsWith("ANNIE POS Tagger")) {
127 String as_name = ((POSTagger) pr).getInputASName();
128 System.out.println("AS name: " + as_name);
129 }
130 } */
131
132 // Create a new corpus
133 corpus_ = Factory.newCorpus("GSDL3 Corpus");
134 application_.setCorpus(corpus_);
135 }
136 catch (Exception e) {
137 e.printStackTrace();
138 }
139
140 return true;
141 }
142
143
144 /** creates a display element containing all the text strings needed to display
145 the service page, in the language specified */
146 protected Element createServiceDisplay(String service, String lang)
147 {
148 // Create a service display for the basic text query service
149 Element display = doc_.createElement(GSXML.DISPLAY_ELEM);
150 display.appendChild(GSXML.createTextElement(doc_, GSXML.DISPLAY_NAME_ELEM, getTextString(service+".name", lang)));
151 display.appendChild(GSXML.createTextElement(doc_, GSXML.DISPLAY_SUBMIT_ELEM, getTextString(service+".submit", lang)));
152
153 // now need to add in the params
154 if (service.equals(GATE_POS_TAG_SERVICE)) {
155 createParameter(ANNOTATION_TYPE_PARAM, display, true, lang);
156 }
157
158 return display;
159 }
160
161
162 /** creates a new param element and adds it to the param list */
163 protected void createParameter(String name, Element param_list,
164 boolean display, String lang)
165 {
166 Element param = null;
167
168 if (name.equals(ANNOTATION_TYPE_PARAM)) {
169 if (display) {
170 int len = annotation_types_.length;
171 String[] annotation_type_names = new String[len];
172 for (int i = 0; i < len; i++) {
173 annotation_type_names[i] = getTextString("param." + name + "." + annotation_types_[i], lang);
174 }
175
176 param = GSXML.createParameterDisplay(doc_, name, getTextString("param." + name, lang), annotation_types_, annotation_type_names);
177 }
178 else {
179 param = GSXML.createParameterDescription(doc_, name, GSXML.PARAM_TYPE_ENUM_MULTI, annotation_types_[0], annotation_types_);
180 }
181 }
182
183 // Add the parameter to the list
184 if (param != null) {
185 param_list.appendChild(param);
186 }
187 }
188
189
190 protected Element processGatePOSTag(Element request)
191 {
192 // System.out.println("(GatePOSTag) Request:\n" + converter_.getPrettyString(request));
193
194 // Create a new (empty) result message
195 Element result = doc_.createElement(GSXML.RESPONSE_ELEM);
196 result.setAttribute(GSXML.FROM_ATT, GATE_POS_TAG_SERVICE);
197 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
198
199 // Get the parameters of the request
200 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
201 if (param_list == null) {
202 System.err.println("Error: GatePOSTag request had no paramList.");
203 return result; // Return the empty result
204 }
205
206 // Process the request parameters
207 Set annTypes = new HashSet();
208 Element param = (Element) param_list.getFirstChild();
209 while (param != null) {
210 // Identify the annotation types desired
211 if (param.getAttribute(GSXML.NAME_ATT).equals("annotationType")) {
212 String annotation_type = GSXML.getValue(param);
213 String [] types = annotation_type.split(",");
214 for (int i=0; i<types.length; i++) {
215 annTypes.add(types[i]);
216 }
217 }
218
219 param = (Element) param.getNextSibling();
220 }
221
222 // Get the request content
223 Element doc_node_list = (Element) GSXML.getChildByTagName(request, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
224 if (doc_node_list == null) {
225 System.err.println("Error: GatePOSTag request specified no doc nodes.");
226 return result; // Return the empty result
227 }
228
229 // Process each document node in the list
230 NodeList doc_nodes = doc_node_list.getChildNodes();
231 for (int i = 0; i < doc_nodes.getLength(); i++) {
232 Element doc_node = (Element) doc_nodes.item(i);
233 Element content = (Element) GSXML.getChildByTagName(doc_node, "nodeContent");
234 Node content_text = (Node) GSXML.getNodeTextNode(content);
235 String text = content_text.getNodeValue();
236
237 // GATE needs the text to be a valid HTML file
238 text = "<html><head></head><body>" + text + "</body></html>";
239 String annotated_text = processText(text, annTypes);
240
241 // Remove the surrounding HTML tags
242 annotated_text = annotated_text.substring(49, annotated_text.length() - 13);
243 // System.out.println("GATE result:\n" + annotated_text);
244
245 annotated_text = "<nodeContent>" + annotated_text + "</nodeContent>";
246 Element annotated_content = converter_.getDOM(annotated_text).getDocumentElement();
247 doc_node.replaceChild(doc_node.getOwnerDocument().importNode(annotated_content, true), content);
248 }
249
250 result.appendChild(doc_.importNode(doc_node_list, true));
251 // System.out.println("GatePOSTag result:\n" + converter_.getPrettyString(result));
252 return result;
253 }
254
255
256 public String processText(String text, Set annotationTypesToExport)
257 {
258 try {
259 // Create a new document containing the text
260 FeatureMap parameters = Factory.newFeatureMap();
261 parameters.put("stringContent", text);
262 parameters.put("markupAware", new Boolean(true));
263 parameters.put("preserveOriginalContent", new Boolean(true));
264 parameters.put("collectRepositioningInfo", new Boolean(true));
265 Document doc = (Document) Factory.createResource("gate.corpora.DocumentImpl",
266 parameters);
267
268 // Add it to the corpus
269 corpus_.clear();
270 corpus_.add(doc);
271
272 // Process the corpus
273 application_.execute();
274
275 // Extract all the annotations
276 AnnotationSet annSet = doc.getAnnotations();
277
278 // Return the desired annotations
279 AnnotationSet outputAnnotations = annSet.get(annotationTypesToExport);
280 String result = doc.toXml(outputAnnotations, false);
281 Factory.deleteResource(doc);
282
283 // 1. Escape the GATE result
284 result = GSXML.xmlSafe(result);
285
286 // 2. Unescape the annotation tags
287 Iterator setIterator = annotationTypesToExport.iterator();
288 while (setIterator.hasNext()) {
289 String annotationType = (String) setIterator.next();
290 result = result.replaceAll("&lt;" + annotationType + "&gt;",
291 "<annotation type=\"" + annotationType + "\">");
292 result = result.replaceAll("&lt;/" + annotationType + "&gt;",
293 "</annotation>");
294 }
295
296 return result;
297 }
298 catch (Exception e) {
299 e.printStackTrace();
300 return null;
301 }
302 }
303}
Note: See TracBrowser for help on using the repository browser.