source: trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/METSDocument.java@ 6897

Last change on this file since 6897 was 6101, checked in by cs025, 21 years ago

Added getSectionText member function to documents

  • Property svn:keywords set to Author Date Id Revision
File size: 3.3 KB
Line 
1package org.greenstone.gsdl3.gs3build.doctypes;
2
3import java.io.*;
4
5import java.net.URL;
6
7import javax.xml.parsers.*;
8
9import org.w3c.dom.Document;
10import org.w3c.dom.Element;
11import org.w3c.dom.NamedNodeMap;
12import org.w3c.dom.Node;
13import org.w3c.dom.NodeList;
14import org.w3c.dom.Text;
15
16import org.xml.sax.SAXException;
17import org.xml.sax.SAXParseException;
18
19import org.greenstone.gsdl3.gs3build.metadata.*;
20
21public class METSDocument extends AbstractDocument
22{
23 public static final String METS_DOCUMENT_TYPE = "METS";
24
25 public METSDocument(URL url)
26 { super(url);
27
28 if (url.toString().startsWith("file://"))
29 { this._parseFile(new File(url.toString().substring(7)));
30 }
31 }
32
33 private void _parseFile(File file)
34 {
35 try {
36 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
37 DocumentBuilder builder = factory.newDocumentBuilder();
38 Document document = builder.parse(file);
39
40 // TODO: get all the types in the tree
41
42 // TODO: do a traverse, and thus cope with elements-within-elements if needsbe, but
43 // this shouldn't happen except in a directly defined situation - actually doing the
44 // parsing in part inside each node would work well provided one checked for a node
45 // having already been done...
46 System.out.println("Read");
47
48 NodeList fileSecs = document.getElementsByTagName("mets:fileSec");
49
50 for (int g = 0; g < fileSecs.getLength(); g ++) {
51 // Schema schema = new Schema(schemas.item(s));
52 this._parseFileSec(fileSecs.item(g));
53 }
54
55 // Get document metadata sections
56 NodeList dmdSecs = document.getElementsByTagName("mets:dmdSec");
57
58 // Schema schema = new Schema(schemas.item(s));
59 this.metadata = METSDescriptiveSet.parseXML(fileSecs);
60 }
61 catch (FactoryConfigurationError e) {
62 System.out.println(e);
63 }
64 catch (ParserConfigurationException ex) {
65 System.out.println(ex);
66 }
67 catch (SAXException ex) {
68 System.out.println(ex);
69 }
70 catch (IOException ex) {
71 System.out.println(ex);
72 }
73 }
74
75 private void _parseFileSec(Node fileSec)
76 { // this is in effect a group without a sense of 'self'...
77 this._parseFileGroup((Element) fileSec, null);
78 }
79
80 private void _parseFileGroup(Element groupTag, METSFileGroup group)
81 { NodeList children = groupTag.getChildNodes();
82
83 for (int c = 0; c < children.getLength(); c ++)
84 { if (children.item(c).getNodeType() != org.w3c.dom.Node.ELEMENT_NODE) {
85 continue;
86 }
87
88 System.out.println(children.item(c));
89
90 Element element = (Element) children.item(c);
91
92 if (element.getNodeName().equals("mets:File"))
93 { if (group != null)
94 { METSFile file = METSFile.parseXML(element, group);
95 }
96 else
97 { // TODO: error
98 }
99 }
100 else if (element.getNodeName().equals("mets:fileGrp"))
101 { // recurse
102 METSFileGroup childGroup = new METSFileGroup(element.getAttribute("ID"));
103
104 this._parseFileGroup(element, childGroup);
105 if (group != null)
106 { group.addGroup(childGroup);
107 }
108 else
109 { this.fileSet.addGroup(childGroup);
110 }
111 }
112 }
113 }
114
115 public String getDocumentType()
116 { return METS_DOCUMENT_TYPE;
117 }
118
119 public String getDocumentText()
120 { // TODO: make this more than a dummy function!
121 return null;
122 }
123
124 public String getSectionText(String sectionId)
125 { return null;
126 }
127
128}
Note: See TracBrowser for help on using the repository browser.