source: trunk/gsdl3/src/java/org/greenstone/gsdl3/service/GATEServices.java@ 6490

Last change on this file since 6490 was 5126, checked in by kjdon, 21 years ago

describe requests to services can now take a parameter 'subset' just like any other describe requests. choices are paramList and displayItem. so you can just get the display text without having to go through and create all teh parameters

  • Property svn:keywords set to Author Date Id Revision
File size: 9.8 KB
Line 
1/*
2 * GATEServices.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21
22// Greenstone classes
23import org.greenstone.gsdl3.util.*;
24
25// GATE classes
26import gate.*;
27import gate.creole.*;
28import gate.gui.*;
29import gate.util.persistence.PersistenceManager;
30
31// XML classes
32import org.w3c.dom.Element;
33import org.w3c.dom.Node;
34import org.w3c.dom.NodeList;
35
36// General Java classes
37import java.io.File;
38import java.net.URL;
39import java.util.Collection;
40import java.util.HashSet;
41import java.util.Iterator;
42import java.util.Set;
43
44
45/**
46 * <p>Title: GATE Greenstone3 integration</p>
47 * <p>Description: </p>
48 * <p>Copyright: Copyright (c) 2003</p>
49 * <p>Company: University of Waikato</p>
50 * @author unascribed
51 * @version 1.0
52 */
53
54public class GATEServices
55 extends ServiceRack {
56
57 // the services on offer
58 // these strings must match what is found in the properties file
59 protected static final String GATE_POS_TAG_SERVICE = "GatePOSTag";
60
61 protected static final String ANNOTATION_TYPE_PARAM = "annotationType";
62
63 protected Element config_info = null;
64
65 protected CorpusController application;
66
67 protected Corpus corpus;
68
69 protected String[] annotation_types = { "Date", "Location",
70 "Organization", "Person" };
71 // Address,
72
73 /** constructor */
74 public GATEServices()
75 {
76 }
77
78
79 /** configure this service */
80 public boolean configure(Element info, Element extra_info)
81 {
82 System.out.println("Configuring GATEServices...");
83 this.config_info = info;
84
85 // set up short_service_info_ - for now just has name and type
86 Element tag_service = this.doc.createElement(GSXML.SERVICE_ELEM);
87 tag_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_ENRICH);
88 tag_service.setAttribute(GSXML.NAME_ATT, GATE_POS_TAG_SERVICE);
89 this.short_service_info.appendChild(tag_service);
90
91
92 // Configure GATE for use
93 try {
94 Gate.init();
95 Gate.getUserConfig().put(GateConstants.DOCUMENT_ADD_SPACE_ON_UNPACK_FEATURE_NAME,
96 new Boolean(false));
97
98 // MainFrame mainFrame = new MainFrame();
99 // mainFrame.setSize(new java.awt.Dimension(800, 600));
100 // mainFrame.setVisible(true);
101
102 // Load the (pre-created) application
103 URL applicationFileURL = ClassLoader.getSystemResource("gate.app");
104 File applicationFile = new File(applicationFileURL.getFile());
105 this.application = (CorpusController) PersistenceManager.loadObjectFromFile(applicationFile);
106
107 /* Collection processing_resources = application.getPRs();
108 Iterator pr_iterator = processing_resources.iterator();
109 while (pr_iterator.hasNext()) {
110 ProcessingResource pr = (ProcessingResource) pr_iterator.next();
111 String pr_name = pr.getName();
112 System.out.println("PR name: " + pr_name);
113 if (pr_name.startsWith("ANNIE POS Tagger")) {
114 String as_name = ((POSTagger) pr).getInputASName();
115 System.out.println("AS name: " + as_name);
116 }
117 } */
118
119 // Create a new corpus
120 this.corpus = Factory.newCorpus("GSDL3 Corpus");
121 this.application.setCorpus(this.corpus);
122 }
123 catch (Exception e) {
124 e.printStackTrace();
125 }
126
127 return true;
128 }
129
130 protected Element getServiceDescription(String service, String lang, String subset) {
131
132 if (!service.equals(GATE_POS_TAG_SERVICE)) {
133 return null;
134 }
135 Element tag_service = this.doc.createElement(GSXML.SERVICE_ELEM);
136 tag_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_ENRICH);
137 tag_service.setAttribute(GSXML.NAME_ATT, GATE_POS_TAG_SERVICE);
138 if (subset==null || subset.equals(GSXML.DISPLAY_TEXT_ELEM)) {
139 tag_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_NAME, getTextString(service+".name", lang)));
140 tag_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_DESCRIPTION, getTextString(service+".description", lang)));
141 tag_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_SUBMIT, getTextString(service+".submit", lang)));
142 }
143 if (subset==null || subset.equals(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER)) {
144 Element param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
145 createParameter(ANNOTATION_TYPE_PARAM, param_list, lang);
146 tag_service.appendChild(param_list);
147 }
148 return tag_service;
149 }
150
151
152 /** creates a new param element and adds it to the param list */
153 protected void createParameter(String name, Element param_list,
154 String lang)
155 {
156 Element param = null;
157
158 if (name.equals(ANNOTATION_TYPE_PARAM)) {
159 int len = this.annotation_types.length;
160 String[] annotation_type_names = new String[len];
161 for (int i = 0; i < len; i++) {
162 annotation_type_names[i] = getTextString("param." + name + "." + this.annotation_types[i], lang);
163 }
164
165 param = GSXML.createParameterDescription(this.doc, name, getTextString("param." + name, lang), GSXML.PARAM_TYPE_ENUM_MULTI, this.annotation_types[0], this.annotation_types, annotation_type_names);
166 param_list.appendChild(param);
167 }
168
169 }
170
171
172 protected Element processGatePOSTag(Element request)
173 {
174 // System.out.println("(GatePOSTag) Request:\n" + converter_.getPrettyString(request));
175
176 // Create a new (empty) result message
177 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
178 result.setAttribute(GSXML.FROM_ATT, GATE_POS_TAG_SERVICE);
179 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
180
181 // Get the parameters of the request
182 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
183 if (param_list == null) {
184 System.err.println("Error: GatePOSTag request had no paramList.");
185 return result; // Return the empty result
186 }
187
188 // Process the request parameters
189 Set annTypes = new HashSet();
190 Element param = (Element) param_list.getFirstChild();
191 while (param != null) {
192 // Identify the annotation types desired
193 if (param.getAttribute(GSXML.NAME_ATT).equals("annotationType")) {
194 String annotation_type = GSXML.getValue(param);
195 String [] types = annotation_type.split(",");
196 for (int i=0; i<types.length; i++) {
197 annTypes.add(types[i]);
198 }
199 }
200
201 param = (Element) param.getNextSibling();
202 }
203
204 // Get the request content
205 Element doc_node_list = (Element) GSXML.getChildByTagName(request, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
206 if (doc_node_list == null) {
207 System.err.println("Error: GatePOSTag request specified no doc nodes.");
208 return result; // Return the empty result
209 }
210
211 // Process each document node in the list
212 NodeList doc_nodes = doc_node_list.getChildNodes();
213 for (int i = 0; i < doc_nodes.getLength(); i++) {
214 Element doc_node = (Element) doc_nodes.item(i);
215 Element content = (Element) GSXML.getChildByTagName(doc_node, "nodeContent");
216 Node content_text = (Node) GSXML.getNodeTextNode(content);
217 String text = content_text.getNodeValue();
218
219 // GATE needs the text to be a valid HTML file
220 text = "<html><head></head><body>" + text + "</body></html>";
221 String annotated_text = processText(text, annTypes);
222
223 // Remove the surrounding HTML tags
224 annotated_text = annotated_text.substring(49, annotated_text.length() - 13);
225 // System.out.println("GATE result:\n" + annotated_text);
226
227 annotated_text = "<nodeContent>" + annotated_text + "</nodeContent>";
228 Element annotated_content = this.converter.getDOM(annotated_text).getDocumentElement();
229 doc_node.replaceChild(doc_node.getOwnerDocument().importNode(annotated_content, true), content);
230 }
231
232 result.appendChild(this.doc.importNode(doc_node_list, true));
233 // System.out.println("GatePOSTag result:\n" + converter_.getPrettyString(result));
234 return result;
235 }
236
237
238 public String processText(String text, Set annotationTypesToExport)
239 {
240 try {
241 // Create a new document containing the text
242 FeatureMap parameters = Factory.newFeatureMap();
243 parameters.put("stringContent", text);
244 parameters.put("markupAware", new Boolean(true));
245 parameters.put("preserveOriginalContent", new Boolean(true));
246 parameters.put("collectRepositioningInfo", new Boolean(true));
247 Document doc = (Document) Factory.createResource("gate.corpora.DocumentImpl",
248 parameters);
249
250 // Add it to the corpus
251 this.corpus.clear();
252 this.corpus.add(doc);
253
254 // Process the corpus
255 this.application.execute();
256
257 // Extract all the annotations
258 AnnotationSet annSet = doc.getAnnotations();
259
260 // Return the desired annotations
261 AnnotationSet outputAnnotations = annSet.get(annotationTypesToExport);
262 String result = doc.toXml(outputAnnotations, false);
263 Factory.deleteResource(doc);
264
265 // 1. Escape the GATE result
266 result = GSXML.xmlSafe(result);
267
268 // 2. Unescape the annotation tags
269 Iterator setIterator = annotationTypesToExport.iterator();
270 while (setIterator.hasNext()) {
271 String annotationType = (String) setIterator.next();
272 result = result.replaceAll("&lt;" + annotationType + "&gt;",
273 "<annotation type=\"" + annotationType + "\">");
274 result = result.replaceAll("&lt;/" + annotationType + "&gt;",
275 "</annotation>");
276 }
277
278 return result;
279 }
280 catch (Exception e) {
281 e.printStackTrace();
282 return null;
283 }
284 }
285}
Note: See TracBrowser for help on using the repository browser.