Changeset 6101
- Timestamp:
- 2003-12-03T09:38:39+13:00 (20 years ago)
- Location:
- trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes
- Files:
-
- 10 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/AbstractDocument.java
r6018 r6101 143 143 public abstract String getDocumentText(); 144 144 145 public abstract String getSectionText(String sectionId); 146 145 147 public String getMETSType() 146 148 { return "document"; -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/DocumentFactory.java
r6018 r6101 24 24 public static DocumentInterface readSQLDocument(GS3SQLConnection connection, DocumentID id) 25 25 { 26 String query = "SELECT * FROM document WHERE DocID= "+id.toString()+";";26 String query = "SELECT * FROM document WHERE DocID=\""+id.toString()+"\";"; 27 27 connection.execute(query); 28 28 -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/DocumentInterface.java
r5944 r6101 57 57 */ 58 58 public String getDocumentText(); 59 60 /** 61 * Get the text of a section of this document 62 * 63 * @return <code>String</code> the text as a string - for sections 64 * that have no textual components, this value may be 65 * <code>null</code> 66 */ 67 public String getSectionText(String sectionId); 59 68 60 69 /** -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/DocumentList.java
r6018 r6101 101 101 } 102 102 103 /** 104 * Get a list of documents that match a 105 */ 106 public List findDocumentIdsUsingFiles(List fileRefs, String withinNode) 107 { 108 StringBuffer queryBuffer = new StringBuffer("SELECT FileGroupRef FROM files WHERE FileLocation "); 109 110 Iterator files = fileRefs.iterator(); 111 while (files.hasNext()) { 112 String file = files.next().toString(); 113 114 if (withinNode != null) { 115 queryBuffer.append("REGEXP \"^"); 116 queryBuffer.append(withinNode); 117 queryBuffer.append(".*"); 118 } 119 else { 120 queryBuffer.append("REGEXP \""); 121 } 122 queryBuffer.append(file); 123 queryBuffer.append("\""); 124 125 if (files.hasNext()) { 126 queryBuffer.append(" OR "); 127 } 128 } 129 queryBuffer.append(";"); 130 return this.findDocumentIdsUsingFileQuery(queryBuffer.toString()); 131 } 132 133 public List findDocumentIdsUsingFile(String fileRef, String withinNode) 134 { String query = "SELECT FileGroupRef FROM files WHERE FileLocation REGEXP \"^"+withinNode+".*"+fileRef+"\";"; 135 return this.findDocumentIdsUsingFileQuery(query); 136 } 137 103 138 public List findDocumentIdsUsingFile(String fileRef) 104 139 { 105 140 // Get the simple list of file objects & their file group reference 106 141 String query = "SELECT FileGroupRef FROM files WHERE FileLocation REGEXP \"" + fileRef +"\";"; 107 108 this.connection.execute(query); 142 143 return this.findDocumentIdsUsingFileQuery(query); 144 } 145 146 private List findDocumentIdsUsingFileQuery(String query) 147 { this.connection.execute(query); 109 148 110 149 try { -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/GMLDocument.java
r5800 r6101 21 21 public class GMLDocument extends AbstractDocument 22 22 { 23 23 public static final String GML_DOCUMENT_TYPE = "GML"; 24 24 25 26 {super(url);27 25 public GMLDocument(URL url) 26 { super(url); 27 } 28 28 29 30 {return GML_DOCUMENT_TYPE;31 29 public String getDocumentType() 30 { return GML_DOCUMENT_TYPE; 31 } 32 32 33 34 35 36 37 38 33 /** 34 * A pretty minimal and lazy document text extraction process. 35 */ 36 public String getDocumentText() 37 { return ""; 38 } 39 39 40 public METSDescriptiveSet getDocumentMetadata() 41 { return null; 42 } 40 public String getSectionText(String sectionId) 41 { return ""; 42 } 43 44 public METSDescriptiveSet getDocumentMetadata() 45 { return null; 46 } 43 47 } -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/HTMLDocument.java
r5944 r6101 143 143 return htmlDoc.getContent(); 144 144 } 145 146 public String getSectionText(String document) 147 { return ""; 148 } 145 149 } -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/IndexDocument.java
r6010 r6101 42 42 } 43 43 44 public String getSectionText(String sectionId) 45 { return ""; 46 } 47 44 48 /** 45 49 * Indicate whether this document is indexed. -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/JPEGDocument.java
r5800 r6101 13 13 public class JPEGDocument extends AbstractDocument 14 14 { 15 public static final String DOCUMENT_JPEG_TYPE = "JPEG"; 15 public static final String DOCUMENT_JPEG_TYPE = "JPEG"; 16 17 public JPEGDocument(URL url) 18 { super(url); 19 } 16 20 17 public JPEGDocument(URL url)18 { super(url);19 21 public String getDocumentType() 22 { return DOCUMENT_JPEG_TYPE; 23 } 20 24 21 public String getDocumentType() 22 { return DOCUMENT_JPEG_TYPE; 23 } 25 public String getDocumentText() 26 { // Just a dummy function for JPEGs 27 return ""; 28 } 24 29 25 public String getDocumentText()26 27 28 30 public String getSectionText(String sectionId) 31 { // Just a dummy function for JPEGs 32 return ""; 33 } 29 34 30 31 {return true;32 33 34 35 {return null;36 35 public boolean isMETSCompatible() 36 { return true; 37 } 38 39 public DocumentWriter getMETSWriter() 40 { return null; 41 } 37 42 } -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/METSDocument.java
r5800 r6101 21 21 public class METSDocument extends AbstractDocument 22 22 { 23 23 public static final String METS_DOCUMENT_TYPE = "METS"; 24 24 25 public METSDocument(URL url) 26 { super(url); 25 public METSDocument(URL url) 26 { super(url); 27 28 if (url.toString().startsWith("file://")) 29 { this._parseFile(new File(url.toString().substring(7))); 30 } 31 } 32 33 private void _parseFile(File file) 34 { 35 try { 36 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 37 DocumentBuilder builder = factory.newDocumentBuilder(); 38 Document document = builder.parse(file); 27 39 28 if (url.toString().startsWith("file://")) 29 { this._parseFile(new File(url.toString().substring(7))); 30 } 40 // TODO: get all the types in the tree 41 42 // TODO: do a traverse, and thus cope with elements-within-elements if needsbe, but 43 // this shouldn't happen except in a directly defined situation - actually doing the 44 // parsing in part inside each node would work well provided one checked for a node 45 // having already been done... 46 System.out.println("Read"); 47 48 NodeList fileSecs = document.getElementsByTagName("mets:fileSec"); 49 50 for (int g = 0; g < fileSecs.getLength(); g ++) { 51 // Schema schema = new Schema(schemas.item(s)); 52 this._parseFileSec(fileSecs.item(g)); 53 } 54 55 // Get document metadata sections 56 NodeList dmdSecs = document.getElementsByTagName("mets:dmdSec"); 57 58 // Schema schema = new Schema(schemas.item(s)); 59 this.metadata = METSDescriptiveSet.parseXML(fileSecs); 60 } 61 catch (FactoryConfigurationError e) { 62 System.out.println(e); 63 } 64 catch (ParserConfigurationException ex) { 65 System.out.println(ex); 66 } 67 catch (SAXException ex) { 68 System.out.println(ex); 69 } 70 catch (IOException ex) { 71 System.out.println(ex); 72 } 73 } 74 75 private void _parseFileSec(Node fileSec) 76 { // this is in effect a group without a sense of 'self'... 77 this._parseFileGroup((Element) fileSec, null); 78 } 79 80 private void _parseFileGroup(Element groupTag, METSFileGroup group) 81 { NodeList children = groupTag.getChildNodes(); 82 83 for (int c = 0; c < children.getLength(); c ++) 84 { if (children.item(c).getNodeType() != org.w3c.dom.Node.ELEMENT_NODE) { 85 continue; 86 } 87 88 System.out.println(children.item(c)); 89 90 Element element = (Element) children.item(c); 91 92 if (element.getNodeName().equals("mets:File")) 93 { if (group != null) 94 { METSFile file = METSFile.parseXML(element, group); 31 95 } 96 else 97 { // TODO: error 98 } 99 } 100 else if (element.getNodeName().equals("mets:fileGrp")) 101 { // recurse 102 METSFileGroup childGroup = new METSFileGroup(element.getAttribute("ID")); 32 103 33 private void _parseFile(File file) 34 { 35 try { 36 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 37 DocumentBuilder builder = factory.newDocumentBuilder(); 38 Document document = builder.parse(file); 104 this._parseFileGroup(element, childGroup); 105 if (group != null) 106 { group.addGroup(childGroup); 107 } 108 else 109 { this.fileSet.addGroup(childGroup); 110 } 111 } 112 } 113 } 39 114 40 // TODO: get all the types in the tree 115 public String getDocumentType() 116 { return METS_DOCUMENT_TYPE; 117 } 41 118 42 // TODO: do a traverse, and thus cope with elements-within-elements if needsbe, but 43 // this shouldn't happen except in a directly defined situation - actually doing the 44 // parsing in part inside each node would work well provided one checked for a node 45 // having already been done... 46 System.out.println("Read"); 119 public String getDocumentText() 120 { // TODO: make this more than a dummy function! 121 return null; 122 } 47 123 48 NodeList fileSecs = document.getElementsByTagName("mets:fileSec"); 124 public String getSectionText(String sectionId) 125 { return null; 126 } 49 127 50 for (int g = 0; g < fileSecs.getLength(); g ++) {51 // Schema schema = new Schema(schemas.item(s));52 this._parseFileSec(fileSecs.item(g));53 }54 55 // Get document metadata sections56 NodeList dmdSecs = document.getElementsByTagName("mets:dmdSec");57 58 // Schema schema = new Schema(schemas.item(s));59 this.metadata = METSDescriptiveSet.parseXML(fileSecs);60 }61 catch (FactoryConfigurationError e) {62 System.out.println(e);63 }64 catch (ParserConfigurationException ex) {65 System.out.println(ex);66 }67 catch (SAXException ex) {68 System.out.println(ex);69 }70 catch (IOException ex) {71 System.out.println(ex);72 }73 }74 75 private void _parseFileSec(Node fileSec)76 { // this is in effect a group without a sense of 'self'...77 this._parseFileGroup((Element) fileSec, null);78 }79 80 private void _parseFileGroup(Element groupTag, METSFileGroup group)81 { NodeList children = groupTag.getChildNodes();82 83 for (int c = 0; c < children.getLength(); c ++)84 { if (children.item(c).getNodeType() != org.w3c.dom.Node.ELEMENT_NODE) {85 continue;86 }87 88 System.out.println(children.item(c));89 90 Element element = (Element) children.item(c);91 92 if (element.getNodeName().equals("mets:File"))93 { if (group != null)94 { METSFile file = METSFile.parseXML(element, group);95 }96 else97 { // TODO: error98 }99 }100 else if (element.getNodeName().equals("mets:fileGrp"))101 { // recurse102 METSFileGroup childGroup = new METSFileGroup(element.getAttribute("ID"));103 104 this._parseFileGroup(element, childGroup);105 if (group != null)106 { group.addGroup(childGroup);107 }108 else109 { this.fileSet.addGroup(childGroup);110 }111 }112 }113 }114 115 public String getDocumentType()116 { return METS_DOCUMENT_TYPE;117 }118 119 public String getDocumentText()120 { // TODO: make this more than a dummy function!121 return null;122 }123 128 } -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/TextDocument.java
r5944 r6101 35 35 { return DocumentLoader.getAsString((URL) this.fileSet.getFile(0).getLocation()); 36 36 } 37 38 /** 39 * Text documents don't actually have more than one section (at present), so 40 * this is a dummy function... 41 */ 42 public String getSectionText(String sectionId) 43 { return null; 44 } 37 45 }
Note:
See TracChangeset
for help on using the changeset viewer.