1 | package org.greenstone.gsdl3.gs3build.extractor;
|
---|
2 |
|
---|
3 | import java.io.FileReader;
|
---|
4 |
|
---|
5 | import org.xml.sax.XMLReader;
|
---|
6 | import org.xml.sax.InputSource;
|
---|
7 | import org.xml.sax.SAXException;
|
---|
8 | import org.xml.sax.Attributes;
|
---|
9 | import org.xml.sax.helpers.XMLReaderFactory;
|
---|
10 | import org.xml.sax.helpers.DefaultHandler;
|
---|
11 |
|
---|
12 | import org.greenstone.gsdl3.gs3build.doctypes.DocumentID;
|
---|
13 | import org.greenstone.gsdl3.gs3build.doctypes.DocumentInterface;
|
---|
14 | import org.greenstone.gsdl3.gs3build.doctypes.GMLDocument;
|
---|
15 |
|
---|
16 | public class GMLExtractor implements ExtractorInterface
|
---|
17 | {
|
---|
18 | /**
|
---|
19 | * An inner class to handle GML files
|
---|
20 | */
|
---|
21 | class GMLHandler extends DefaultHandler
|
---|
22 | { String file;
|
---|
23 | String label;
|
---|
24 | StringBuffer value;
|
---|
25 | boolean inElement;
|
---|
26 |
|
---|
27 | GMLHandler()
|
---|
28 | { super();
|
---|
29 |
|
---|
30 | this.file = null;
|
---|
31 | this.label = null;
|
---|
32 | this.value = null;
|
---|
33 | }
|
---|
34 |
|
---|
35 | public void startElement(String URI, String localName, String qName, Attributes attributes)
|
---|
36 | { if (localName.equals("Filename"))
|
---|
37 | { this.value = new StringBuffer();
|
---|
38 | }
|
---|
39 | else if (localName.equals("Metadata"))
|
---|
40 | { this.label = attributes.getValue("name");
|
---|
41 | this.value = new StringBuffer();
|
---|
42 | }
|
---|
43 | }
|
---|
44 |
|
---|
45 | public void endElement(String URI, String localName, String qName)
|
---|
46 | { if (localName.equals("Filename"))
|
---|
47 | { this.file = this.value.toString();
|
---|
48 | this.value = null;
|
---|
49 | }
|
---|
50 | else if (localName.equals("Metadata"))
|
---|
51 | { GMLExtractor.postMetadata(this.file, this.label, this.value.toString());
|
---|
52 | this.value = null;
|
---|
53 | this.label = null;
|
---|
54 | }
|
---|
55 | }
|
---|
56 |
|
---|
57 | public void characters(char c[], int start, int length)
|
---|
58 | { if (this.label != null)
|
---|
59 | { String string = new String(c, start, length);
|
---|
60 | this.value.append(string);
|
---|
61 | }
|
---|
62 | }
|
---|
63 | }
|
---|
64 |
|
---|
65 | /**
|
---|
66 | * Construct of extractor
|
---|
67 | */
|
---|
68 | public GMLExtractor()
|
---|
69 | { // Intentionally left blank
|
---|
70 | }
|
---|
71 |
|
---|
72 | /**
|
---|
73 | * This extractor doesn't need to do any preparation/completion work,
|
---|
74 | * so this member function is empty.
|
---|
75 | */
|
---|
76 | public void configure(String outputDir)
|
---|
77 | { // Intentionally left blank
|
---|
78 | }
|
---|
79 |
|
---|
80 | /**
|
---|
81 | * This extractor doesn't need to do any preparation/completion work,
|
---|
82 | * so this member function is empty.
|
---|
83 | */
|
---|
84 | public void startPass(int passNo)
|
---|
85 | { // Intentionally left blank
|
---|
86 | }
|
---|
87 |
|
---|
88 | /**
|
---|
89 | * Process the document - for a GML document, this results in the
|
---|
90 | * decoration of other files, for other documents, it does nothing.
|
---|
91 | */
|
---|
92 | public void extractDocument(DocumentID docID, DocumentInterface document)
|
---|
93 | {
|
---|
94 | if (document.getDocumentType().equals(GMLDocument.GML_DOCUMENT_TYPE))
|
---|
95 | { // Extract the content from the GML file
|
---|
96 | try {
|
---|
97 | XMLReader reader = XMLReaderFactory.createXMLReader();
|
---|
98 | GMLHandler handler = new GMLHandler();
|
---|
99 | reader.setContentHandler(handler);
|
---|
100 | reader.setErrorHandler(handler);
|
---|
101 |
|
---|
102 | // A GML document consists of one file only - get it from the 'default'
|
---|
103 | // file group
|
---|
104 | FileReader fileReader = new FileReader(document.getDocumentFiles().getFile(0).toString());
|
---|
105 | reader.parse(new InputSource(fileReader));
|
---|
106 | }
|
---|
107 | catch (SAXException saxException)
|
---|
108 | { // TODO: log error
|
---|
109 | }
|
---|
110 | catch (java.io.FileNotFoundException fileException)
|
---|
111 | {
|
---|
112 | }
|
---|
113 | catch (java.io.IOException ioException)
|
---|
114 | {
|
---|
115 | }
|
---|
116 | // for each document post it to the corresponding document
|
---|
117 | }
|
---|
118 | }
|
---|
119 |
|
---|
120 | protected static void postMetadata(String file, String value, String label)
|
---|
121 | {
|
---|
122 | }
|
---|
123 |
|
---|
124 | /**
|
---|
125 | * This extractor doesn't need to do any preparation/completion work,
|
---|
126 | * so this member function is empty.
|
---|
127 | */
|
---|
128 | public void endPass(int passNo)
|
---|
129 | { // Intentionally left blank
|
---|
130 | }
|
---|
131 |
|
---|
132 | /**
|
---|
133 | * This extractor is a simple, single-pass extractor
|
---|
134 | *
|
---|
135 | * @see: org.greenstone.gsdl3.gs3build.extractor.ExtractorInterface:getNumberOfPasses
|
---|
136 | */
|
---|
137 | public int getNumberOfPasses()
|
---|
138 | { return 1;
|
---|
139 | }
|
---|
140 | }
|
---|