1 | /**
|
---|
2 | *
|
---|
3 | * @author [email protected]
|
---|
4 | * @version
|
---|
5 | */
|
---|
6 |
|
---|
7 |
|
---|
8 | // gsdl3 classes
|
---|
9 | import org.greenstone.gsdl3.util.XMLConverter;
|
---|
10 | import org.greenstone.gsdl3.util.GSFile;
|
---|
11 | // XML classes
|
---|
12 | import org.w3c.dom.Document;
|
---|
13 | import org.w3c.dom.Element;
|
---|
14 | import org.w3c.dom.Node;
|
---|
15 | import org.w3c.dom.traversal.NodeIterator;
|
---|
16 | import org.w3c.dom.traversal.NodeFilter;
|
---|
17 | import org.w3c.dom.traversal.DocumentTraversal;
|
---|
18 | import org.apache.xml.serialize.XMLSerializer;
|
---|
19 | import org.xml.sax.InputSource;
|
---|
20 | import org.xml.sax.EntityResolver;
|
---|
21 | // java classes
|
---|
22 | import java.io.File;
|
---|
23 | import java.io.FileOutputStream;
|
---|
24 |
|
---|
25 | public class ImportXML
|
---|
26 | implements EntityResolver {
|
---|
27 | File out_dir = null;
|
---|
28 | XMLConverter converter = null;
|
---|
29 |
|
---|
30 | String base_path = null;
|
---|
31 | public ImportXML() {
|
---|
32 | converter = new XMLConverter();
|
---|
33 | converter.setEntityResolver(this);
|
---|
34 |
|
---|
35 | }
|
---|
36 | public void setOutDir(File out_dir) {
|
---|
37 | this.out_dir = out_dir;
|
---|
38 | }
|
---|
39 | public void init() {
|
---|
40 |
|
---|
41 | }
|
---|
42 | public void importFile(File file) throws Exception {
|
---|
43 | importFile(file, "");
|
---|
44 | }
|
---|
45 | protected void importFile(File file, String local_path) throws Exception {
|
---|
46 |
|
---|
47 | if (file.isDirectory()) {
|
---|
48 | File files [] = file.listFiles();
|
---|
49 | for (int i=0; i<files.length; i++) {
|
---|
50 | //if (files[i].getName().endsWith(".xml")) {
|
---|
51 | importFile(files[i], local_path+File.separator+files[i].getName());
|
---|
52 | //}
|
---|
53 | }
|
---|
54 | return;
|
---|
55 | }
|
---|
56 |
|
---|
57 | base_path = file.getPath();
|
---|
58 | base_path = base_path.substring(0, base_path.lastIndexOf(File.separatorChar));
|
---|
59 | System.out.println("base path = "+base_path);
|
---|
60 | // now we have an actual file
|
---|
61 | System.out.println("processing file "+file.getPath());
|
---|
62 | File out_file = new File (out_dir, local_path);
|
---|
63 | String name = file.getName();
|
---|
64 | if (name.endsWith(".dtd")) {
|
---|
65 | if (!GSFile.copyFile(file, out_file)) {
|
---|
66 | System.err.println("couldn't copy dtd file "+file.getPath()+" to "+out_file.getPath()+"- please do the copy yourself");
|
---|
67 | }
|
---|
68 | //copy the file
|
---|
69 | return;
|
---|
70 | }
|
---|
71 | if (!name.endsWith(".xml")) {
|
---|
72 | // now we ignore any that don't end in .xml
|
---|
73 | return;
|
---|
74 | }
|
---|
75 | // now do the importing
|
---|
76 | Document doc = converter.getDOM(file);
|
---|
77 |
|
---|
78 | String gs3NS = "http://www.greenstone.org/gs3";
|
---|
79 |
|
---|
80 | Element rootNode = doc.getDocumentElement();
|
---|
81 |
|
---|
82 | rootNode.setAttribute("xmlns:gs3", gs3NS);
|
---|
83 |
|
---|
84 | DocumentTraversal traversal = (DocumentTraversal)doc;
|
---|
85 | NodeIterator i = traversal.createNodeIterator(doc, NodeFilter.SHOW_ELEMENT, null, true);
|
---|
86 |
|
---|
87 | Element element = null;
|
---|
88 | Node node = null;
|
---|
89 | int id = 0;
|
---|
90 | while ((node = i.nextNode()) != null) {
|
---|
91 | element = (Element)node;
|
---|
92 | if (XMLTagInfo.isIndexable(element.getNodeName())) {
|
---|
93 | element.setAttribute("gs3:id", Integer.toString(id++));
|
---|
94 | }
|
---|
95 | }
|
---|
96 |
|
---|
97 | XMLSerializer gs3Serializer = new XMLSerializer(new FileOutputStream(out_file), null);
|
---|
98 | gs3Serializer.asDOMSerializer().serialize(doc);
|
---|
99 |
|
---|
100 | }
|
---|
101 |
|
---|
102 | public void finish() {
|
---|
103 | }
|
---|
104 |
|
---|
105 | public InputSource resolveEntity (String public_id, String system_id) {
|
---|
106 |
|
---|
107 | if (system_id.startsWith("file://")) {
|
---|
108 | return new InputSource(system_id);
|
---|
109 | }
|
---|
110 | if (!system_id.startsWith(File.separator)) {
|
---|
111 | system_id = base_path+File.separatorChar+system_id;
|
---|
112 | }
|
---|
113 | return new InputSource("file://"+system_id);
|
---|
114 | }
|
---|
115 |
|
---|
116 | }
|
---|