1 | package org.greenstone.gsdl3.gs3build.extractor;
|
---|
2 |
|
---|
3 | import java.io.FileReader;
|
---|
4 |
|
---|
5 | import java.util.List;
|
---|
6 | import java.util.ArrayList;
|
---|
7 |
|
---|
8 | import org.xml.sax.XMLReader;
|
---|
9 | import org.xml.sax.InputSource;
|
---|
10 | import org.xml.sax.SAXException;
|
---|
11 | import org.xml.sax.Attributes;
|
---|
12 | import org.xml.sax.helpers.XMLReaderFactory;
|
---|
13 | import org.xml.sax.helpers.DefaultHandler;
|
---|
14 |
|
---|
15 | import org.greenstone.gsdl3.gs3build.doctypes.DocumentID;
|
---|
16 | import org.greenstone.gsdl3.gs3build.doctypes.DocumentInterface;
|
---|
17 | import org.greenstone.gsdl3.gs3build.doctypes.DocumentList;
|
---|
18 | import org.greenstone.gsdl3.gs3build.doctypes.GMLDocument;
|
---|
19 |
|
---|
20 | public class GMLExtractor implements ExtractorInterface
|
---|
21 | {
|
---|
22 | public static final String ACCUMULATE_MODE = "accumulate";
|
---|
23 |
|
---|
24 | /**
|
---|
25 | * An inner class to handle GML files
|
---|
26 | */
|
---|
27 | class GMLHandler extends DefaultHandler
|
---|
28 | { List files;
|
---|
29 | String label;
|
---|
30 | StringBuffer value;
|
---|
31 | boolean inElement;
|
---|
32 | boolean accumulate;
|
---|
33 |
|
---|
34 | GMLHandler()
|
---|
35 | { super();
|
---|
36 |
|
---|
37 | this.label = null;
|
---|
38 | this.value = null;
|
---|
39 | }
|
---|
40 |
|
---|
41 | public void startElement(String URI, String localName, String qName, Attributes attributes)
|
---|
42 | { if (localName.equals("FileName"))
|
---|
43 | { this.value = new StringBuffer();
|
---|
44 | }
|
---|
45 | else if (localName.equals("FileSet"))
|
---|
46 | { this.files = new ArrayList();
|
---|
47 | }
|
---|
48 | else if (localName.equals("Description"))
|
---|
49 | {
|
---|
50 | }
|
---|
51 | else if (localName.equals("Metadata"))
|
---|
52 | { this.label = attributes.getValue("name");
|
---|
53 | this.value = new StringBuffer();
|
---|
54 |
|
---|
55 | String mode = attributes.getValue("mode");
|
---|
56 | this.accumulate = mode.equals(ExtractorManager.ACCUMULATE_MODE);
|
---|
57 | }
|
---|
58 | }
|
---|
59 |
|
---|
60 | public void endElement(String URI, String localName, String qName)
|
---|
61 | { if (localName.equals("FileName"))
|
---|
62 | { String file = this.value.toString();
|
---|
63 | this.value = null;
|
---|
64 | this.files.add(file);
|
---|
65 | }
|
---|
66 | else if (localName.equals("FileSet"))
|
---|
67 | { // post the existing files item...
|
---|
68 | }
|
---|
69 | else if (localName.equals("Description"))
|
---|
70 | {
|
---|
71 | }
|
---|
72 | else if (localName.equals("Metadata"))
|
---|
73 | { GMLExtractor.postMetadata(this.files, this.label, this.value.toString());
|
---|
74 | this.value = null;
|
---|
75 | this.label = null;
|
---|
76 | }
|
---|
77 | }
|
---|
78 |
|
---|
79 | public void characters(char c[], int start, int length)
|
---|
80 | { if (this.value != null)
|
---|
81 | { String string = new String(c, start, length);
|
---|
82 | this.value.append(string);
|
---|
83 | }
|
---|
84 | }
|
---|
85 | }
|
---|
86 |
|
---|
87 | /**
|
---|
88 | * Construct of extractor
|
---|
89 | */
|
---|
90 | public GMLExtractor()
|
---|
91 | { // Intentionally left blank
|
---|
92 | }
|
---|
93 |
|
---|
94 | /**
|
---|
95 | * This extractor doesn't need to do any preparation/completion work,
|
---|
96 | * so this member function is empty.
|
---|
97 | */
|
---|
98 | public void configure(String outputDir)
|
---|
99 | { // Intentionally left blank
|
---|
100 | }
|
---|
101 |
|
---|
102 | public void configure(DocumentList documentList)
|
---|
103 | { // Intentionally left blank
|
---|
104 | }
|
---|
105 |
|
---|
106 | /**
|
---|
107 | * This extractor doesn't need to do any preparation/completion work,
|
---|
108 | * so this member function is empty.
|
---|
109 | */
|
---|
110 | public void startPass(int passNo)
|
---|
111 | { // Intentionally left blank
|
---|
112 | }
|
---|
113 |
|
---|
114 | /**
|
---|
115 | * Process the document - for a GML document, this results in the
|
---|
116 | * decoration of other files, for other documents, it does nothing.
|
---|
117 | */
|
---|
118 | public void extractDocument(DocumentID docID, DocumentInterface document)
|
---|
119 | {
|
---|
120 | if (document.getDocumentType().equals(GMLDocument.GML_DOCUMENT_TYPE))
|
---|
121 | { // Extract the content from the GML file
|
---|
122 | try {
|
---|
123 | XMLReader reader = XMLReaderFactory.createXMLReader();
|
---|
124 | GMLHandler handler = new GMLHandler();
|
---|
125 | reader.setContentHandler(handler);
|
---|
126 | reader.setErrorHandler(handler);
|
---|
127 |
|
---|
128 | // A GML document consists of one file only - get it from the 'default'
|
---|
129 | // file group
|
---|
130 | FileReader fileReader = new FileReader(document.getDocumentFiles().getFile(0).toString());
|
---|
131 | reader.parse(new InputSource(fileReader));
|
---|
132 | }
|
---|
133 | catch (SAXException saxException)
|
---|
134 | { // TODO: log error
|
---|
135 | }
|
---|
136 | catch (java.io.FileNotFoundException fileException)
|
---|
137 | {
|
---|
138 | }
|
---|
139 | catch (java.io.IOException ioException)
|
---|
140 | {
|
---|
141 | }
|
---|
142 | // for each document post it to the corresponding document
|
---|
143 | }
|
---|
144 | }
|
---|
145 |
|
---|
146 | protected static void postMetadata(List files, String value, String label)
|
---|
147 | {
|
---|
148 |
|
---|
149 | }
|
---|
150 |
|
---|
151 | /**
|
---|
152 | * This extractor doesn't need to do any preparation/completion work,
|
---|
153 | * so this member function is empty.
|
---|
154 | */
|
---|
155 | public void endPass(int passNo)
|
---|
156 | { // Intentionally left blank
|
---|
157 | }
|
---|
158 |
|
---|
159 | /**
|
---|
160 | * This extractor is a simple, single-pass extractor
|
---|
161 | *
|
---|
162 | * @see: org.greenstone.gsdl3.gs3build.extractor.ExtractorInterface:getNumberOfPasses
|
---|
163 | */
|
---|
164 | public int getNumberOfPasses()
|
---|
165 | { return 1;
|
---|
166 | }
|
---|
167 | }
|
---|