source: gs3-extensions/atlas-src/trunk/src/org/greenstone/atlas/server/GateScanner.java@ 23934

Last change on this file since 23934 was 23934, checked in by sjm84, 13 years ago

Extensive improvements to the ATLAS code

File size: 3.6 KB
Line 
1package org.greenstone.atlas.server;
2
3import gate.Annotation;
4import gate.AnnotationSet;
5import gate.Corpus;
6import gate.Document;
7import gate.Factory;
8import gate.FeatureMap;
9import gate.Gate;
10import gate.GateConstants;
11import gate.creole.ANNIEConstants;
12import gate.creole.SerialAnalyserController;
13import gate.util.GateException;
14import gate.util.Out;
15import gate.util.persistence.PersistenceManager;
16
17import java.io.File;
18import java.net.URL;
19import java.util.ArrayList;
20import java.util.HashMap;
21
22import org.apache.log4j.BasicConfigurator;
23
24public class GateScanner
25{
26 private SerialAnalyserController annieController;
27
28 public GateScanner()
29 {
30 try
31 {
32 BasicConfigurator.configure();
33 Gate.setGateHome(new File("C:\\Users\\sjm84\\Desktop\\Gate"));
34 Gate.setPluginsHome(new File("C:\\Users\\sjm84\\Desktop\\Gate\\plugins"));
35 Gate.setUserConfigFile(new File("C:\\Users\\sjm84\\Desktop\\Gate\\gate.xml"));
36 Gate.init();
37 Gate.getCreoleRegister().registerDirectories(new File("C:\\Users\\sjm84\\Desktop\\Gate\\plugins\\ANNIE").toURI().toURL());
38 Gate.getUserConfig().put(GateConstants.DOCUMENT_ADD_SPACE_ON_UNPACK_FEATURE_NAME, new Boolean(false));
39
40 Out.prln("Initialising ANNIE...");
41
42 annieController = (SerialAnalyserController) PersistenceManager.loadObjectFromFile(new File(new File(Gate.getPluginsHome(), ANNIEConstants.PLUGIN_DIR), ANNIEConstants.DEFAULT_FILE));
43
44 Out.prln("...ANNIE loaded");
45 }
46 catch (Exception e)
47 {
48 e.printStackTrace();
49 }
50 }
51
52 /** Tell ANNIE's controller about the corpus you want to run on */
53 public void setCorpus(Corpus corpus)
54 {
55 annieController.setCorpus(corpus);
56 } // setCorpus
57
58 /** Run ANNIE */
59 public void execute() throws GateException
60 {
61 Out.prln("Running ANNIE...");
62 annieController.execute();
63 Out.prln("...ANNIE complete");
64 } // execute()
65
66 public HashMap<String, Word> classifyText(String originalText)
67 {
68 HashMap<String, Word> words = new HashMap<String, Word>();
69 try
70 {
71 Corpus corpus = (Corpus) Factory.createResource("gate.corpora.CorpusImpl");
72 FeatureMap params = Factory.newFeatureMap();
73 params.put("stringContent", "<html><head></head><body>" + originalText + "</body></html>");
74 params.put("preserveOriginalContent", new Boolean(true));
75 params.put("collectRepositioningInfo", new Boolean(true));
76 Document doc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params);
77 corpus.add(doc);
78
79 // tell the pipeline about the corpus and run it
80 this.setCorpus(corpus);
81 this.execute();
82
83 AnnotationSet annotations = doc.getAnnotations();
84
85 for(int j = 0; j < annotations.size(); j++)
86 {
87 Annotation currentAnnotation = annotations.get(j);
88
89 if(currentAnnotation == null)
90 {
91 continue;
92 }
93 if(currentAnnotation.getType().equals("Token"))
94 {
95 Word w = null;
96 if(words.containsKey((String)currentAnnotation.getFeatures().get("string")))
97 {
98 w = words.get((String)currentAnnotation.getFeatures().get("string"));
99 w.addClassification((String)currentAnnotation.getFeatures().get("category"));
100 w.setValue((String)currentAnnotation.getFeatures().get("string"));
101 }
102 else
103 {
104 w = new Word();
105 w.addClassification((String)currentAnnotation.getFeatures().get("category"));
106 w.setValue((String)currentAnnotation.getFeatures().get("string"));
107 words.put(w.getValue(), w);
108 }
109 }
110 }
111 }
112 catch(Exception ex)
113 {
114 ex.printStackTrace();
115 }
116
117 return words;
118 }
119}
Note: See TracBrowser for help on using the repository browser.