1 | package org.greenstone.gsdl3.gs3build.doctypes;
|
---|
2 |
|
---|
3 | import java.io.*;
|
---|
4 |
|
---|
5 | import java.net.URL;
|
---|
6 |
|
---|
7 | import javax.xml.parsers.*;
|
---|
8 |
|
---|
9 | import org.w3c.dom.Document;
|
---|
10 | import org.w3c.dom.Element;
|
---|
11 | import org.w3c.dom.NamedNodeMap;
|
---|
12 | import org.w3c.dom.Node;
|
---|
13 | import org.w3c.dom.NodeList;
|
---|
14 | import org.w3c.dom.Text;
|
---|
15 |
|
---|
16 | import org.xml.sax.SAXException;
|
---|
17 | import org.xml.sax.SAXParseException;
|
---|
18 |
|
---|
19 | import org.greenstone.gsdl3.gs3build.metadata.*;
|
---|
20 |
|
---|
21 | public class METSDocument extends AbstractDocument
|
---|
22 | {
|
---|
23 | public static final String METS_DOCUMENT_TYPE = "METS";
|
---|
24 |
|
---|
25 | public METSDocument(URL url)
|
---|
26 | { super(url);
|
---|
27 |
|
---|
28 | if (url.toString().startsWith("file://"))
|
---|
29 | { this._parseFile(new File(url.toString().substring(7)));
|
---|
30 | }
|
---|
31 | }
|
---|
32 |
|
---|
33 | private void _parseFile(File file)
|
---|
34 | {
|
---|
35 | try {
|
---|
36 | DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
|
---|
37 | DocumentBuilder builder = factory.newDocumentBuilder();
|
---|
38 | Document document = builder.parse(file);
|
---|
39 |
|
---|
40 | // TODO: get all the types in the tree
|
---|
41 |
|
---|
42 | // TODO: do a traverse, and thus cope with elements-within-elements if needsbe, but
|
---|
43 | // this shouldn't happen except in a directly defined situation - actually doing the
|
---|
44 | // parsing in part inside each node would work well provided one checked for a node
|
---|
45 | // having already been done...
|
---|
46 | System.out.println("Read");
|
---|
47 |
|
---|
48 | NodeList fileSecs = document.getElementsByTagName("mets:fileSec");
|
---|
49 |
|
---|
50 | for (int g = 0; g < fileSecs.getLength(); g ++) {
|
---|
51 | // Schema schema = new Schema(schemas.item(s));
|
---|
52 | this._parseFileSec(fileSecs.item(g));
|
---|
53 | }
|
---|
54 |
|
---|
55 | // Get document metadata sections
|
---|
56 | NodeList dmdSecs = document.getElementsByTagName("mets:dmdSec");
|
---|
57 |
|
---|
58 | // Schema schema = new Schema(schemas.item(s));
|
---|
59 | this.metadata = METSDescriptiveSet.parseXML(fileSecs);
|
---|
60 | }
|
---|
61 | catch (FactoryConfigurationError e) {
|
---|
62 | System.out.println(e);
|
---|
63 | }
|
---|
64 | catch (ParserConfigurationException ex) {
|
---|
65 | System.out.println(ex);
|
---|
66 | }
|
---|
67 | catch (SAXException ex) {
|
---|
68 | System.out.println(ex);
|
---|
69 | }
|
---|
70 | catch (IOException ex) {
|
---|
71 | System.out.println(ex);
|
---|
72 | }
|
---|
73 | }
|
---|
74 |
|
---|
75 | private void _parseFileSec(Node fileSec)
|
---|
76 | { // this is in effect a group without a sense of 'self'...
|
---|
77 | this._parseFileGroup((Element) fileSec, null);
|
---|
78 | }
|
---|
79 |
|
---|
80 | private void _parseFileGroup(Element groupTag, METSFileGroup group)
|
---|
81 | { NodeList children = groupTag.getChildNodes();
|
---|
82 |
|
---|
83 | for (int c = 0; c < children.getLength(); c ++)
|
---|
84 | { if (children.item(c).getNodeType() != org.w3c.dom.Node.ELEMENT_NODE) {
|
---|
85 | continue;
|
---|
86 | }
|
---|
87 |
|
---|
88 | System.out.println(children.item(c));
|
---|
89 |
|
---|
90 | Element element = (Element) children.item(c);
|
---|
91 |
|
---|
92 | if (element.getNodeName().equals("mets:File"))
|
---|
93 | { if (group != null)
|
---|
94 | { METSFile file = METSFile.parseXML(element, group);
|
---|
95 | }
|
---|
96 | else
|
---|
97 | { // TODO: error
|
---|
98 | }
|
---|
99 | }
|
---|
100 | else if (element.getNodeName().equals("mets:fileGrp"))
|
---|
101 | { // recurse
|
---|
102 | METSFileGroup childGroup = new METSFileGroup(element.getAttribute("ID"));
|
---|
103 |
|
---|
104 | this._parseFileGroup(element, childGroup);
|
---|
105 | if (group != null)
|
---|
106 | { group.addGroup(childGroup);
|
---|
107 | }
|
---|
108 | else
|
---|
109 | { this.fileSet.addGroup(childGroup);
|
---|
110 | }
|
---|
111 | }
|
---|
112 | }
|
---|
113 | }
|
---|
114 |
|
---|
115 | public String getDocumentType()
|
---|
116 | { return METS_DOCUMENT_TYPE;
|
---|
117 | }
|
---|
118 |
|
---|
119 | public String getDocumentText()
|
---|
120 | { // TODO: make this more than a dummy function!
|
---|
121 | return null;
|
---|
122 | }
|
---|
123 |
|
---|
124 | public String getSectionText(String sectionId)
|
---|
125 | { return null;
|
---|
126 | }
|
---|
127 |
|
---|
128 | }
|
---|