source: main/trunk/greenstone3/web/sites/localsite/collect/gberg/java/ImportXML.java@ 32484

Last change on this file since 32484 was 32484, checked in by kjdon, 6 years ago

XMLConverter no longer has setEntityResolver method. Pass it in to getDOM instead

  • Property svn:keywords set to Author Date Id Revision
File size: 3.2 KB
Line 
1/**
2 *
3 * @author [email protected]
4 * @version
5 */
6
7
8// gsdl3 classes
9import org.greenstone.gsdl3.util.XMLConverter;
10import org.greenstone.gsdl3.util.GSFile;
11// XML classes
12import org.w3c.dom.Document;
13import org.w3c.dom.Element;
14import org.w3c.dom.Node;
15import org.w3c.dom.traversal.NodeIterator;
16import org.w3c.dom.traversal.NodeFilter;
17import org.w3c.dom.traversal.DocumentTraversal;
18import org.apache.xml.serialize.XMLSerializer;
19import org.xml.sax.InputSource;
20import org.xml.sax.EntityResolver;
21// java classes
22import java.io.File;
23import java.io.FileOutputStream;
24
25public class ImportXML
26 implements EntityResolver {
27 File out_dir = null;
28 XMLConverter converter = null;
29
30 String base_path = null;
31 public ImportXML() {
32 converter = new XMLConverter();
33 }
34 public void setOutDir(File out_dir) {
35 this.out_dir = out_dir;
36 }
37 public void init() {
38
39 }
40 public void importFile(File file) throws Exception {
41 importFile(file, "");
42 }
43 protected void importFile(File file, String local_path) throws Exception {
44
45 if (file.isDirectory()) {
46 File files [] = file.listFiles();
47 for (int i=0; i<files.length; i++) {
48 //if (files[i].getName().endsWith(".xml")) {
49 importFile(files[i], local_path+File.separator+files[i].getName());
50 //}
51 }
52 return;
53 }
54
55 base_path = file.getPath();
56 base_path = base_path.substring(0, base_path.lastIndexOf(File.separatorChar));
57 System.out.println("base path = "+base_path);
58 // now we have an actual file
59 System.out.println("processing file "+file.getPath());
60 File out_file = new File (out_dir, local_path);
61 String name = file.getName();
62 if (name.endsWith(".dtd")) {
63 if (!GSFile.copyFile(file, out_file)) {
64 System.err.println("couldn't copy dtd file "+file.getPath()+" to "+out_file.getPath()+"- please do the copy yourself");
65 }
66 //copy the file
67 return;
68 }
69 if (!name.endsWith(".xml")) {
70 // now we ignore any that don't end in .xml
71 return;
72 }
73 // now do the importing
74 Document doc = converter.getDOM(file, this);
75
76 String gs3NS = "http://www.greenstone.org/gs3";
77
78 Element rootNode = doc.getDocumentElement();
79
80 rootNode.setAttribute("xmlns:gs3", gs3NS);
81
82 DocumentTraversal traversal = (DocumentTraversal)doc;
83 NodeIterator i = traversal.createNodeIterator(doc, NodeFilter.SHOW_ELEMENT, null, true);
84
85 Element element = null;
86 Node node = null;
87 int id = 0;
88 while ((node = i.nextNode()) != null) {
89 element = (Element)node;
90 if (XMLTagInfo.isIndexable(element.getNodeName())) {
91 element.setAttribute("gs3:id", Integer.toString(id++));
92 }
93 }
94
95 XMLSerializer gs3Serializer = new XMLSerializer(new FileOutputStream(out_file), null);
96 gs3Serializer.asDOMSerializer().serialize(doc);
97
98 }
99
100 public void finish() {
101 }
102
103 public InputSource resolveEntity (String public_id, String system_id) {
104
105 if (system_id.startsWith("file://")) {
106 return new InputSource(system_id);
107 }
108 if (!system_id.startsWith(File.separator)) {
109 system_id = base_path+File.separatorChar+system_id;
110 }
111 return new InputSource("file://"+system_id);
112 }
113
114}
Note: See TracBrowser for help on using the repository browser.