source: trunk/gsdl3/src/java/org/greenstone/gsdl3/service/GATEServices.java@ 4903

Last change on this file since 4903 was 4903, checked in by kjdon, 21 years ago

tidied up a lot of stuff, particularly the display text stuff, including how its formatted, and some of the service rack methods

  • Property svn:keywords set to Author Date Id Revision
File size: 9.4 KB
Line 
1/*
2 * GATEServices.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21
22// Greenstone classes
23import org.greenstone.gsdl3.util.*;
24
25// GATE classes
26import gate.*;
27import gate.creole.*;
28import gate.gui.*;
29import gate.util.persistence.PersistenceManager;
30
31// XML classes
32import org.w3c.dom.Element;
33import org.w3c.dom.Node;
34import org.w3c.dom.NodeList;
35
36// General Java classes
37import java.io.File;
38import java.net.URL;
39import java.util.Collection;
40import java.util.HashSet;
41import java.util.Iterator;
42import java.util.Set;
43
44
45/**
46 * <p>Title: GATE Greenstone3 integration</p>
47 * <p>Description: </p>
48 * <p>Copyright: Copyright (c) 2003</p>
49 * <p>Company: University of Waikato</p>
50 * @author unascribed
51 * @version 1.0
52 */
53
54public class GATEServices
55 extends ServiceRack {
56
57 // the services on offer
58 // these strings must match what is found in the properties file
59 protected static final String GATE_POS_TAG_SERVICE = "GatePOSTag";
60
61 protected static final String ANNOTATION_TYPE_PARAM = "annotationType";
62
63 protected Element config_info_ = null;
64
65 protected CorpusController application_;
66
67 protected Corpus corpus_;
68
69 protected String[] annotation_types_ = { "Date", "Location",
70 "Organization", "Person" };
71 // Address,
72
73 /** constructor */
74 public GATEServices()
75 {
76 }
77
78
79 /** configure this service */
80 public boolean configure(Element info, Element extra_info)
81 {
82 System.out.println("Configuring GATEServices...");
83 config_info_ = info;
84
85 // set up short_service_info_ - for now just has name and type
86 Element tag_service = doc_.createElement(GSXML.SERVICE_ELEM);
87 tag_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_ENRICH);
88 tag_service.setAttribute(GSXML.NAME_ATT, GATE_POS_TAG_SERVICE);
89 short_service_info_.appendChild(tag_service);
90
91
92 // Configure GATE for use
93 try {
94 Gate.init();
95 Gate.getUserConfig().put(GateConstants.DOCUMENT_ADD_SPACE_ON_UNPACK_FEATURE_NAME,
96 new Boolean(false));
97
98 // MainFrame mainFrame = new MainFrame();
99 // mainFrame.setSize(new java.awt.Dimension(800, 600));
100 // mainFrame.setVisible(true);
101
102 // Load the (pre-created) application
103 URL applicationFileURL = ClassLoader.getSystemResource("gate.app");
104 File applicationFile = new File(applicationFileURL.getFile());
105 application_ = (CorpusController) PersistenceManager.loadObjectFromFile(applicationFile);
106
107 /* Collection processing_resources = application.getPRs();
108 Iterator pr_iterator = processing_resources.iterator();
109 while (pr_iterator.hasNext()) {
110 ProcessingResource pr = (ProcessingResource) pr_iterator.next();
111 String pr_name = pr.getName();
112 System.out.println("PR name: " + pr_name);
113 if (pr_name.startsWith("ANNIE POS Tagger")) {
114 String as_name = ((POSTagger) pr).getInputASName();
115 System.out.println("AS name: " + as_name);
116 }
117 } */
118
119 // Create a new corpus
120 corpus_ = Factory.newCorpus("GSDL3 Corpus");
121 application_.setCorpus(corpus_);
122 }
123 catch (Exception e) {
124 e.printStackTrace();
125 }
126
127 return true;
128 }
129
130 protected Element getServiceDescription(String service, String lang) {
131
132 if (!service.equals(GATE_POS_TAG_SERVICE)) {
133 return null;
134 }
135 Element tag_service = doc_.createElement(GSXML.SERVICE_ELEM);
136 tag_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_ENRICH);
137 tag_service.setAttribute(GSXML.NAME_ATT, GATE_POS_TAG_SERVICE);
138 tag_service.appendChild(GSXML.createDisplayTextElement(doc_, GSXML.DISPLAY_TEXT_NAME, getTextString(service+".name", lang)));
139 tag_service.appendChild(GSXML.createDisplayTextElement(doc_, GSXML.DISPLAY_TEXT_SUBMIT, getTextString(service+".submit", lang)));
140 Element param_list = doc_.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
141 createParameter(ANNOTATION_TYPE_PARAM, param_list, lang);
142 tag_service.appendChild(param_list);
143
144 return tag_service;
145 }
146
147
148 /** creates a new param element and adds it to the param list */
149 protected void createParameter(String name, Element param_list,
150 String lang)
151 {
152 Element param = null;
153
154 if (name.equals(ANNOTATION_TYPE_PARAM)) {
155 int len = annotation_types_.length;
156 String[] annotation_type_names = new String[len];
157 for (int i = 0; i < len; i++) {
158 annotation_type_names[i] = getTextString("param." + name + "." + annotation_types_[i], lang);
159 }
160
161 param = GSXML.createParameterDescription(doc_, name, getTextString("param." + name, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, annotation_types_[0], annotation_types_, annotation_type_names);
162 param_list.appendChild(param);
163 }
164
165 }
166
167
168 protected Element processGatePOSTag(Element request)
169 {
170 // System.out.println("(GatePOSTag) Request:\n" + converter_.getPrettyString(request));
171
172 // Create a new (empty) result message
173 Element result = doc_.createElement(GSXML.RESPONSE_ELEM);
174 result.setAttribute(GSXML.FROM_ATT, GATE_POS_TAG_SERVICE);
175 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
176
177 // Get the parameters of the request
178 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
179 if (param_list == null) {
180 System.err.println("Error: GatePOSTag request had no paramList.");
181 return result; // Return the empty result
182 }
183
184 // Process the request parameters
185 Set annTypes = new HashSet();
186 Element param = (Element) param_list.getFirstChild();
187 while (param != null) {
188 // Identify the annotation types desired
189 if (param.getAttribute(GSXML.NAME_ATT).equals("annotationType")) {
190 String annotation_type = GSXML.getValue(param);
191 String [] types = annotation_type.split(",");
192 for (int i=0; i<types.length; i++) {
193 annTypes.add(types[i]);
194 }
195 }
196
197 param = (Element) param.getNextSibling();
198 }
199
200 // Get the request content
201 Element doc_node_list = (Element) GSXML.getChildByTagName(request, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
202 if (doc_node_list == null) {
203 System.err.println("Error: GatePOSTag request specified no doc nodes.");
204 return result; // Return the empty result
205 }
206
207 // Process each document node in the list
208 NodeList doc_nodes = doc_node_list.getChildNodes();
209 for (int i = 0; i < doc_nodes.getLength(); i++) {
210 Element doc_node = (Element) doc_nodes.item(i);
211 Element content = (Element) GSXML.getChildByTagName(doc_node, "nodeContent");
212 Node content_text = (Node) GSXML.getNodeTextNode(content);
213 String text = content_text.getNodeValue();
214
215 // GATE needs the text to be a valid HTML file
216 text = "<html><head></head><body>" + text + "</body></html>";
217 String annotated_text = processText(text, annTypes);
218
219 // Remove the surrounding HTML tags
220 annotated_text = annotated_text.substring(49, annotated_text.length() - 13);
221 // System.out.println("GATE result:\n" + annotated_text);
222
223 annotated_text = "<nodeContent>" + annotated_text + "</nodeContent>";
224 Element annotated_content = converter_.getDOM(annotated_text).getDocumentElement();
225 doc_node.replaceChild(doc_node.getOwnerDocument().importNode(annotated_content, true), content);
226 }
227
228 result.appendChild(doc_.importNode(doc_node_list, true));
229 // System.out.println("GatePOSTag result:\n" + converter_.getPrettyString(result));
230 return result;
231 }
232
233
234 public String processText(String text, Set annotationTypesToExport)
235 {
236 try {
237 // Create a new document containing the text
238 FeatureMap parameters = Factory.newFeatureMap();
239 parameters.put("stringContent", text);
240 parameters.put("markupAware", new Boolean(true));
241 parameters.put("preserveOriginalContent", new Boolean(true));
242 parameters.put("collectRepositioningInfo", new Boolean(true));
243 Document doc = (Document) Factory.createResource("gate.corpora.DocumentImpl",
244 parameters);
245
246 // Add it to the corpus
247 corpus_.clear();
248 corpus_.add(doc);
249
250 // Process the corpus
251 application_.execute();
252
253 // Extract all the annotations
254 AnnotationSet annSet = doc.getAnnotations();
255
256 // Return the desired annotations
257 AnnotationSet outputAnnotations = annSet.get(annotationTypesToExport);
258 String result = doc.toXml(outputAnnotations, false);
259 Factory.deleteResource(doc);
260
261 // 1. Escape the GATE result
262 result = GSXML.xmlSafe(result);
263
264 // 2. Unescape the annotation tags
265 Iterator setIterator = annotationTypesToExport.iterator();
266 while (setIterator.hasNext()) {
267 String annotationType = (String) setIterator.next();
268 result = result.replaceAll("&lt;" + annotationType + "&gt;",
269 "<annotation type=\"" + annotationType + "\">");
270 result = result.replaceAll("&lt;/" + annotationType + "&gt;",
271 "</annotation>");
272 }
273
274 return result;
275 }
276 catch (Exception e) {
277 e.printStackTrace();
278 return null;
279 }
280 }
281}
Note: See TracBrowser for help on using the repository browser.