1 | package org.greenstone.server;
|
---|
2 |
|
---|
3 | import gate.Annotation;
|
---|
4 | import gate.AnnotationSet;
|
---|
5 | import gate.Corpus;
|
---|
6 | import gate.CorpusController;
|
---|
7 | import gate.Document;
|
---|
8 | import gate.Factory;
|
---|
9 | import gate.FeatureMap;
|
---|
10 | import gate.Gate;
|
---|
11 | import gate.GateConstants;
|
---|
12 | import gate.ProcessingResource;
|
---|
13 | import gate.corpora.RepositioningInfo;
|
---|
14 | import gate.creole.ANNIEConstants;
|
---|
15 | import gate.creole.ResourceInstantiationException;
|
---|
16 | import gate.creole.SerialAnalyserController;
|
---|
17 | import gate.util.GateException;
|
---|
18 | import gate.util.Out;
|
---|
19 | import gate.util.persistence.PersistenceManager;
|
---|
20 |
|
---|
21 | import java.io.File;
|
---|
22 | import java.net.URL;
|
---|
23 | import java.util.ArrayList;
|
---|
24 | import java.util.HashSet;
|
---|
25 | import java.util.Iterator;
|
---|
26 | import java.util.Set;
|
---|
27 |
|
---|
28 | import org.apache.log4j.BasicConfigurator;
|
---|
29 |
|
---|
30 | public class GateScanner
|
---|
31 | {
|
---|
32 | private SerialAnalyserController annieController;
|
---|
33 |
|
---|
34 | public GateScanner()
|
---|
35 | {
|
---|
36 | try
|
---|
37 | {
|
---|
38 | BasicConfigurator.configure();
|
---|
39 | Gate.setGateHome(new File("/home/sam/Desktop/Research/Gate5"));
|
---|
40 | Gate.setPluginsHome(new File("/home/sam/Desktop/Research/Gate5/plugins"));
|
---|
41 | Gate.setUserConfigFile(new File("/home/sam/Desktop/Research/Gate5/gate.xml"));
|
---|
42 | Gate.init();
|
---|
43 | Gate.getCreoleRegister().registerDirectories(new URL("file:///home/sam/Desktop/Research/Gate5/plugins/ANNIE"));
|
---|
44 | Gate.getUserConfig().put(GateConstants.DOCUMENT_ADD_SPACE_ON_UNPACK_FEATURE_NAME, new Boolean(false));
|
---|
45 |
|
---|
46 | Out.prln("Initialising ANNIE...");
|
---|
47 |
|
---|
48 | annieController = (SerialAnalyserController) PersistenceManager.loadObjectFromFile(new File(new File(Gate.getPluginsHome(), ANNIEConstants.PLUGIN_DIR), ANNIEConstants.DEFAULT_FILE));
|
---|
49 |
|
---|
50 | Out.prln("...ANNIE loaded");
|
---|
51 | }
|
---|
52 | catch (Exception e)
|
---|
53 | {
|
---|
54 | e.printStackTrace();
|
---|
55 | }
|
---|
56 | }
|
---|
57 |
|
---|
58 | /** Tell ANNIE's controller about the corpus you want to run on */
|
---|
59 | public void setCorpus(Corpus corpus)
|
---|
60 | {
|
---|
61 | annieController.setCorpus(corpus);
|
---|
62 | } // setCorpus
|
---|
63 |
|
---|
64 | /** Run ANNIE */
|
---|
65 | public void execute() throws GateException
|
---|
66 | {
|
---|
67 | Out.prln("Running ANNIE...");
|
---|
68 | annieController.execute();
|
---|
69 | Out.prln("...ANNIE complete");
|
---|
70 | } // execute()
|
---|
71 |
|
---|
72 | public ArrayList<Word> classifyText(String originalText)
|
---|
73 | {
|
---|
74 | ArrayList<Word> words = new ArrayList<Word>();
|
---|
75 | try
|
---|
76 | {
|
---|
77 | // create a GATE corpus and add a document for each command-line
|
---|
78 | // argument
|
---|
79 | Corpus corpus = (Corpus) Factory.createResource("gate.corpora.CorpusImpl");
|
---|
80 | FeatureMap params = Factory.newFeatureMap();
|
---|
81 | params.put("stringContent", "<html><head></head><body>" + originalText + "</body></html>");
|
---|
82 | params.put("preserveOriginalContent", new Boolean(true));
|
---|
83 | params.put("collectRepositioningInfo", new Boolean(true));
|
---|
84 | Document doc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params);
|
---|
85 | corpus.add(doc);
|
---|
86 |
|
---|
87 | // tell the pipeline about the corpus and run it
|
---|
88 | this.setCorpus(corpus);
|
---|
89 | this.execute();
|
---|
90 |
|
---|
91 | AnnotationSet annotations = doc.getAnnotations();
|
---|
92 |
|
---|
93 | int index = 0;
|
---|
94 | for(int j = 0; j < annotations.size(); j++)
|
---|
95 | {
|
---|
96 | Annotation currentAnnotation = annotations.get(j);
|
---|
97 |
|
---|
98 | if(currentAnnotation == null)
|
---|
99 | {
|
---|
100 | continue;
|
---|
101 | }
|
---|
102 | if(currentAnnotation.getType().equals("Token"))
|
---|
103 | {
|
---|
104 | Word w = new Word();
|
---|
105 | w.setIndex(index++);
|
---|
106 | w.setClassification((String)currentAnnotation.getFeatures().get("category"));
|
---|
107 | w.setValue((String)currentAnnotation.getFeatures().get("string"));
|
---|
108 |
|
---|
109 | words.add(w);
|
---|
110 | }
|
---|
111 | }
|
---|
112 | }
|
---|
113 | catch(Exception ex)
|
---|
114 | {
|
---|
115 | ex.printStackTrace();
|
---|
116 | }
|
---|
117 |
|
---|
118 | return words;
|
---|
119 | }
|
---|
120 | } |
---|