source: gs3-extensions/atlas-src/trunk/src/org/greenstone/server/GateScanner.java@ 22272

Last change on this file since 22272 was 22272, checked in by sjm84, 14 years ago

Initial version of ATLAS as an extension

File size: 3.5 KB
Line 
1package org.greenstone.server;
2
3import gate.Annotation;
4import gate.AnnotationSet;
5import gate.Corpus;
6import gate.CorpusController;
7import gate.Document;
8import gate.Factory;
9import gate.FeatureMap;
10import gate.Gate;
11import gate.GateConstants;
12import gate.ProcessingResource;
13import gate.corpora.RepositioningInfo;
14import gate.creole.ANNIEConstants;
15import gate.creole.ResourceInstantiationException;
16import gate.creole.SerialAnalyserController;
17import gate.util.GateException;
18import gate.util.Out;
19import gate.util.persistence.PersistenceManager;
20
21import java.io.File;
22import java.net.URL;
23import java.util.ArrayList;
24import java.util.HashSet;
25import java.util.Iterator;
26import java.util.Set;
27
28import org.apache.log4j.BasicConfigurator;
29
30public class GateScanner
31{
32 private SerialAnalyserController annieController;
33
34 public GateScanner()
35 {
36 try
37 {
38 BasicConfigurator.configure();
39 Gate.setGateHome(new File("/home/sam/Desktop/Research/Gate5"));
40 Gate.setPluginsHome(new File("/home/sam/Desktop/Research/Gate5/plugins"));
41 Gate.setUserConfigFile(new File("/home/sam/Desktop/Research/Gate5/gate.xml"));
42 Gate.init();
43 Gate.getCreoleRegister().registerDirectories(new URL("file:///home/sam/Desktop/Research/Gate5/plugins/ANNIE"));
44 Gate.getUserConfig().put(GateConstants.DOCUMENT_ADD_SPACE_ON_UNPACK_FEATURE_NAME, new Boolean(false));
45
46 Out.prln("Initialising ANNIE...");
47
48 annieController = (SerialAnalyserController) PersistenceManager.loadObjectFromFile(new File(new File(Gate.getPluginsHome(), ANNIEConstants.PLUGIN_DIR), ANNIEConstants.DEFAULT_FILE));
49
50 Out.prln("...ANNIE loaded");
51 }
52 catch (Exception e)
53 {
54 e.printStackTrace();
55 }
56 }
57
58 /** Tell ANNIE's controller about the corpus you want to run on */
59 public void setCorpus(Corpus corpus)
60 {
61 annieController.setCorpus(corpus);
62 } // setCorpus
63
64 /** Run ANNIE */
65 public void execute() throws GateException
66 {
67 Out.prln("Running ANNIE...");
68 annieController.execute();
69 Out.prln("...ANNIE complete");
70 } // execute()
71
72 public ArrayList<Word> classifyText(String originalText)
73 {
74 ArrayList<Word> words = new ArrayList<Word>();
75 try
76 {
77 // create a GATE corpus and add a document for each command-line
78 // argument
79 Corpus corpus = (Corpus) Factory.createResource("gate.corpora.CorpusImpl");
80 FeatureMap params = Factory.newFeatureMap();
81 params.put("stringContent", "<html><head></head><body>" + originalText + "</body></html>");
82 params.put("preserveOriginalContent", new Boolean(true));
83 params.put("collectRepositioningInfo", new Boolean(true));
84 Document doc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params);
85 corpus.add(doc);
86
87 // tell the pipeline about the corpus and run it
88 this.setCorpus(corpus);
89 this.execute();
90
91 AnnotationSet annotations = doc.getAnnotations();
92
93 int index = 0;
94 for(int j = 0; j < annotations.size(); j++)
95 {
96 Annotation currentAnnotation = annotations.get(j);
97
98 if(currentAnnotation == null)
99 {
100 continue;
101 }
102 if(currentAnnotation.getType().equals("Token"))
103 {
104 Word w = new Word();
105 w.setIndex(index++);
106 w.setClassification((String)currentAnnotation.getFeatures().get("category"));
107 w.setValue((String)currentAnnotation.getFeatures().get("string"));
108
109 words.add(w);
110 }
111 }
112 }
113 catch(Exception ex)
114 {
115 ex.printStackTrace();
116 }
117
118 return words;
119 }
120}
Note: See TracBrowser for help on using the repository browser.