1 | package org.greenstone.server;
|
---|
2 |
|
---|
3 | import gate.Annotation;
|
---|
4 | import gate.AnnotationSet;
|
---|
5 | import gate.Corpus;
|
---|
6 | import gate.Document;
|
---|
7 | import gate.Factory;
|
---|
8 | import gate.FeatureMap;
|
---|
9 | import gate.Gate;
|
---|
10 | import gate.GateConstants;
|
---|
11 | import gate.creole.ANNIEConstants;
|
---|
12 | import gate.creole.SerialAnalyserController;
|
---|
13 | import gate.util.GateException;
|
---|
14 | import gate.util.Out;
|
---|
15 | import gate.util.persistence.PersistenceManager;
|
---|
16 |
|
---|
17 | import java.io.File;
|
---|
18 | import java.net.URL;
|
---|
19 | import java.util.ArrayList;
|
---|
20 | import java.util.HashMap;
|
---|
21 |
|
---|
22 | import org.apache.log4j.BasicConfigurator;
|
---|
23 |
|
---|
24 | public class GateScanner
|
---|
25 | {
|
---|
26 | private SerialAnalyserController annieController;
|
---|
27 |
|
---|
28 | public GateScanner()
|
---|
29 | {
|
---|
30 | try
|
---|
31 | {
|
---|
32 | BasicConfigurator.configure();
|
---|
33 | Gate.setGateHome(new File("C:\\Users\\sjm84\\Desktop\\stuff\\Gate5.2"));
|
---|
34 | Gate.setPluginsHome(new File("C:\\Users\\sjm84\\Desktop\\stuff\\Gate5.2\\plugins"));
|
---|
35 | Gate.setUserConfigFile(new File("C:\\Users\\sjm84\\Desktop\\stuff\\Gate5.2\\gate.xml"));
|
---|
36 | Gate.init();
|
---|
37 | Gate.getCreoleRegister().registerDirectories(new File("C:\\Users\\sjm84\\Desktop\\stuff\\Gate5.2\\plugins\\ANNIE").toURI().toURL());
|
---|
38 | Gate.getUserConfig().put(GateConstants.DOCUMENT_ADD_SPACE_ON_UNPACK_FEATURE_NAME, new Boolean(false));
|
---|
39 |
|
---|
40 | Out.prln("Initialising ANNIE...");
|
---|
41 |
|
---|
42 | annieController = (SerialAnalyserController) PersistenceManager.loadObjectFromFile(new File(new File(Gate.getPluginsHome(), ANNIEConstants.PLUGIN_DIR), ANNIEConstants.DEFAULT_FILE));
|
---|
43 |
|
---|
44 | Out.prln("...ANNIE loaded");
|
---|
45 | }
|
---|
46 | catch (Exception e)
|
---|
47 | {
|
---|
48 | e.printStackTrace();
|
---|
49 | }
|
---|
50 | }
|
---|
51 |
|
---|
52 | /** Tell ANNIE's controller about the corpus you want to run on */
|
---|
53 | public void setCorpus(Corpus corpus)
|
---|
54 | {
|
---|
55 | annieController.setCorpus(corpus);
|
---|
56 | } // setCorpus
|
---|
57 |
|
---|
58 | /** Run ANNIE */
|
---|
59 | public void execute() throws GateException
|
---|
60 | {
|
---|
61 | Out.prln("Running ANNIE...");
|
---|
62 | annieController.execute();
|
---|
63 | Out.prln("...ANNIE complete");
|
---|
64 | } // execute()
|
---|
65 |
|
---|
66 | public HashMap<String, Word> classifyText(String originalText)
|
---|
67 | {
|
---|
68 | HashMap<String, Word> words = new HashMap<String, Word>();
|
---|
69 | try
|
---|
70 | {
|
---|
71 | Corpus corpus = (Corpus) Factory.createResource("gate.corpora.CorpusImpl");
|
---|
72 | FeatureMap params = Factory.newFeatureMap();
|
---|
73 | params.put("stringContent", "<html><head></head><body>" + originalText + "</body></html>");
|
---|
74 | params.put("preserveOriginalContent", new Boolean(true));
|
---|
75 | params.put("collectRepositioningInfo", new Boolean(true));
|
---|
76 | Document doc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params);
|
---|
77 | corpus.add(doc);
|
---|
78 |
|
---|
79 | // tell the pipeline about the corpus and run it
|
---|
80 | this.setCorpus(corpus);
|
---|
81 | this.execute();
|
---|
82 |
|
---|
83 | AnnotationSet annotations = doc.getAnnotations();
|
---|
84 |
|
---|
85 | for(int j = 0; j < annotations.size(); j++)
|
---|
86 | {
|
---|
87 | Annotation currentAnnotation = annotations.get(j);
|
---|
88 |
|
---|
89 | if(currentAnnotation == null)
|
---|
90 | {
|
---|
91 | continue;
|
---|
92 | }
|
---|
93 | if(currentAnnotation.getType().equals("Token"))
|
---|
94 | {
|
---|
95 | Word w = null;
|
---|
96 | if(words.containsKey((String)currentAnnotation.getFeatures().get("string")))
|
---|
97 | {
|
---|
98 | w = words.get((String)currentAnnotation.getFeatures().get("string"));
|
---|
99 | w.addClassification((String)currentAnnotation.getFeatures().get("category"));
|
---|
100 | w.setValue((String)currentAnnotation.getFeatures().get("string"));
|
---|
101 | }
|
---|
102 | else
|
---|
103 | {
|
---|
104 | w = new Word();
|
---|
105 | w.addClassification((String)currentAnnotation.getFeatures().get("category"));
|
---|
106 | w.setValue((String)currentAnnotation.getFeatures().get("string"));
|
---|
107 | words.put(w.getValue(), w);
|
---|
108 | }
|
---|
109 | }
|
---|
110 | }
|
---|
111 | }
|
---|
112 | catch(Exception ex)
|
---|
113 | {
|
---|
114 | ex.printStackTrace();
|
---|
115 | }
|
---|
116 |
|
---|
117 | return words;
|
---|
118 | }
|
---|
119 | } |
---|