source: trunk/gsdl3/src/java/org/greenstone/gsdl3/service/GATEServices.java.tmp@ 9939

Last change on this file since 9939 was 9939, checked in by kjdon, 19 years ago

changed name from GatePOSTag to GateTag cos its not a POS tagger

  • Property svn:keywords set to Author Date Id Revision
File size: 10.0 KB
Line 
1/*
2 * GATEServices.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21
22// Greenstone classes
23import org.greenstone.gsdl3.util.*;
24
25// GATE classes
26import gate.*;
27import gate.creole.*;
28import gate.gui.*;
29import gate.util.persistence.PersistenceManager;
30
31// XML classes
32import org.w3c.dom.Element;
33import org.w3c.dom.Node;
34import org.w3c.dom.NodeList;
35
36// General Java classes
37import java.io.File;
38import java.net.URL;
39import java.util.Collection;
40import java.util.HashSet;
41import java.util.Iterator;
42import java.util.Set;
43
44
45/**
46 * <p>Title: GATE Greenstone3 integration</p>
47 * <p>Description: </p>
48 * <p>Copyright: Copyright (c) 2003</p>
49 * <p>Company: University of Waikato</p>
50 * @author unascribed
51 * @version 1.0
52 */
53
54public class GATEServices
55 extends ServiceRack {
56
57 // the services on offer
58 // these strings must match what is found in the properties file
59 protected static final String GATE_TAG_SERVICE = "GateTag";
60
61 protected static final String ANNOTATION_TYPE_PARAM = "annotationType";
62
63 protected Element config_info = null;
64
65 protected CorpusController application;
66
67 protected Corpus corpus;
68
69 protected String[] annotation_types = { "Date", "Location",
70 "Organization", "Person" };
71 // Address,
72
73 /** constructor */
74 public GATEServices()
75 {
76 }
77
78
79 /** configure this service */
80 public boolean configure(Element info, Element extra_info)
81 {
82 System.out.println("Configuring GATEServices...");
83 this.config_info = info;
84
85 // set up short_service_info_ - for now just has name and type
86 Element tag_service = this.doc.createElement(GSXML.SERVICE_ELEM);
87 tag_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_ENRICH);
88 tag_service.setAttribute(GSXML.NAME_ATT, GATE_TAG_SERVICE);
89 this.short_service_info.appendChild(tag_service);
90
91
92 // Configure GATE for use
93 try {
94 Gate.init();
95 Gate.getUserConfig().put(GateConstants.DOCUMENT_ADD_SPACE_ON_UNPACK_FEATURE_NAME,
96 new Boolean(false));
97
98 // MainFrame mainFrame = new MainFrame();
99 // mainFrame.setSize(new java.awt.Dimension(800, 600));
100 // mainFrame.setVisible(true);
101
102 // Load the (pre-created) application
103 URL applicationFileURL = ClassLoader.getSystemResource("gate.app");
104 File applicationFile = new File(applicationFileURL.getFile());
105 this.application = (CorpusController) PersistenceManager.loadObjectFromFile(applicationFile);
106
107 /* Collection processing_resources = application.getPRs();
108 Iterator pr_iterator = processing_resources.iterator();
109 while (pr_iterator.hasNext()) {
110 ProcessingResource pr = (ProcessingResource) pr_iterator.next();
111 String pr_name = pr.getName();
112 System.out.println("PR name: " + pr_name);
113 if (pr_name.startsWith("ANNIE POS Tagger")) {
114 String as_name = ((POSTagger) pr).getInputASName();
115 System.out.println("AS name: " + as_name);
116 }
117 } */
118
119 // Create a new corpus
120 this.corpus = Factory.newCorpus("GSDL3 Corpus");
121 this.application.setCorpus(this.corpus);
122 }
123 catch (Exception e) {
124 e.printStackTrace();
125 }
126
127 return true;
128 }
129
130 protected Element getServiceDescription(String service, String lang, String subset) {
131
132 if (!service.equals(GATE_TAG_SERVICE)) {
133 return null;
134 }
135 Element tag_service = this.doc.createElement(GSXML.SERVICE_ELEM);
136 tag_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_ENRICH);
137 tag_service.setAttribute(GSXML.NAME_ATT, GATE_TAG_SERVICE);
138 if (subset==null || subset.equals(GSXML.DISPLAY_TEXT_ELEM)) {
139 tag_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_NAME, getTextString(service+".name", lang)));
140 tag_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_DESCRIPTION, getTextString(service+".description", lang)));
141 tag_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_SUBMIT, getTextString(service+".submit", lang)));
142 }
143 if (subset==null || subset.equals(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER)) {
144 Element param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
145 createParameter(ANNOTATION_TYPE_PARAM, param_list, lang);
146 tag_service.appendChild(param_list);
147 }
148 return tag_service;
149 }
150
151
152 /** creates a new param element and adds it to the param list */
153 protected void createParameter(String name, Element param_list,
154 String lang)
155 {
156 Element param = null;
157
158 if (name.equals(ANNOTATION_TYPE_PARAM)) {
159 int len = this.annotation_types.length;
160 String[] annotation_type_names = new String[len];
161 for (int i = 0; i < len; i++) {
162 annotation_type_names[i] = getTextString("param." + name + "." + this.annotation_types[i], lang);
163 }
164
165 param = GSXML.createParameterDescription(this.doc, name, getTextString("param." + name, lang), GSXML.PARAM_TYPE_ENUM_MULTI, this.annotation_types[0], this.annotation_types, annotation_type_names);
166 param_list.appendChild(param);
167 }
168
169 }
170
171
172 protected Element processGateTag(Element request)
173 {
174 // System.out.println("(GateTag) Request:\n" + converter_.getPrettyString(request));
175
176 // Create a new (empty) result message
177 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
178 result.setAttribute(GSXML.FROM_ATT, GATE_TAG_SERVICE);
179 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
180
181 // Get the parameters of the request
182 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
183 if (param_list == null) {
184 System.err.println("Error: GateTag request had no paramList.");
185 return result; // Return the empty result
186 }
187
188 // Process the request parameters
189 Set annTypes = new HashSet();
190 Element param = (Element) param_list.getFirstChild();
191 while (param != null) {
192 // Identify the annotation types desired
193 if (param.getAttribute(GSXML.NAME_ATT).equals("annotationType")) {
194 String annotation_type = GSXML.getValue(param);
195 String [] types = annotation_type.split(",");
196 for (int i=0; i<types.length; i++) {
197 annTypes.add(types[i]);
198 }
199 }
200
201 param = (Element) param.getNextSibling();
202 }
203
204 // Get the request content
205 Element doc_node_list = (Element) GSXML.getChildByTagName(request, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
206 if (doc_node_list == null) {
207 System.err.println("Error: GateTag request specified no doc nodes.");
208 return result; // Return the empty result
209 }
210
211 // Process each document node in the list
212 NodeList doc_nodes = doc_node_list.getChildNodes();
213 for (int i = 0; i < doc_nodes.getLength(); i++) {
214 Element doc_node = (Element) doc_nodes.item(i);
215 Element content = (Element) GSXML.getChildByTagName(doc_node, "nodeContent");
216 Node content_text = (Node) GSXML.getNodeTextNode(content);
217 String text = content_text.getNodeValue();
218
219 // GATE needs the text to be a valid HTML file
220 text = "<html><head></head><body>" + text + "</body></html>";
221 String annotated_text = processText(text, annTypes);
222
223 // Remove the surrounding HTML tags
224 annotated_text = annotated_text.substring(49, annotated_text.length() - 13);
225 // System.out.println("GATE result:\n" + annotated_text);
226
227 annotated_text = "<nodeContent>" + annotated_text + "</nodeContent>";
228 Document annotated_content_doc = this.converter.getDOM(annotated_text);
229 if (annotated_content_doc != null) {
230 Element annotated_content = annotated_content_doc.getDocumentElement();
231 doc_node.replaceChild(doc_node.getOwnerDocument().importNode(annotated_content, true), content);
232 } else {
233 System.err.println("GATEServices.processGateTag Error: Couldn't parse annotated text for doc node "+i);
234 }
235
236 }
237
238 result.appendChild(this.doc.importNode(doc_node_list, true));
239 // System.out.println("GateTag result:\n" + converter_.getPrettyString(result));
240 return result;
241 }
242
243
244 public String processText(String text, Set annotationTypesToExport)
245 {
246 try {
247 // Create a new document containing the text
248 FeatureMap parameters = Factory.newFeatureMap();
249 parameters.put("stringContent", text);
250 parameters.put("markupAware", new Boolean(true));
251 parameters.put("preserveOriginalContent", new Boolean(true));
252 parameters.put("collectRepositioningInfo", new Boolean(true));
253 Document doc = (Document) Factory.createResource("gate.corpora.DocumentImpl",
254 parameters);
255
256 // Add it to the corpus
257 this.corpus.clear();
258 this.corpus.add(doc);
259
260 // Process the corpus
261 this.application.execute();
262
263 // Extract all the annotations
264 AnnotationSet annSet = doc.getAnnotations();
265
266 // Return the desired annotations
267 AnnotationSet outputAnnotations = annSet.get(annotationTypesToExport);
268 String result = doc.toXml(outputAnnotations, false);
269 Factory.deleteResource(doc);
270
271 // 1. Escape the GATE result
272 result = GSXML.xmlSafe(result);
273
274 // 2. Unescape the annotation tags
275 Iterator setIterator = annotationTypesToExport.iterator();
276 while (setIterator.hasNext()) {
277 String annotationType = (String) setIterator.next();
278 result = result.replaceAll("&lt;" + annotationType + "&gt;",
279 "<annotation type=\"" + annotationType + "\">");
280 result = result.replaceAll("&lt;/" + annotationType + "&gt;",
281 "</annotation>");
282 }
283
284 return result;
285 }
286 catch (Exception e) {
287 e.printStackTrace();
288 return null;
289 }
290 }
291}
Note: See TracBrowser for help on using the repository browser.