Changeset 6102
- Timestamp:
- 2003-12-03T09:39:47+13:00 (20 years ago)
- Location:
- trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/extractor
- Files:
-
- 1 added
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/extractor/GMLExtractor.java
r6013 r6102 17 17 public class GMLExtractor implements ExtractorInterface 18 18 { 19 public static final String ACCUMULATE_MODE = "accumulate"; 20 19 21 /** 20 22 * An inner class to handle GML files 21 23 */ 22 24 class GMLHandler extends DefaultHandler 23 { String file;25 { List files; 24 26 String label; 25 27 StringBuffer value; 26 28 boolean inElement; 29 boolean accumulate; 27 30 28 31 GMLHandler() … … 35 38 36 39 public void startElement(String URI, String localName, String qName, Attributes attributes) 37 { if (localName.equals("File name"))40 { if (localName.equals("FileName")) 38 41 { this.value = new StringBuffer(); 42 } 43 else if (localName.equals("FileSet")) 44 { this.files = new ArrayList(); 45 } 46 else if (localName.equals("Description")) 47 { 39 48 } 40 49 else if (localName.equals("Metadata")) 41 50 { this.label = attributes.getValue("name"); 42 51 this.value = new StringBuffer(); 52 53 String mode = attributes.getValue("mode"); 54 this.accumulate = mode.equals(ACCUMULATE_MODE); 43 55 } 44 56 } 45 57 46 58 public void endElement(String URI, String localName, String qName) 47 { if (localName.equals("File name"))48 { this.file = this.value.toString();59 { if (localName.equals("FileName")) 60 { String file = this.value.toString(); 49 61 this.value = null; 62 this.files.add(file); 63 } 64 else if (localName.equals("FileSet")) 65 { // post the existing files item... 66 } 67 else if (localName.equals("Description")) 68 { 50 69 } 51 70 else if (localName.equals("Metadata")) 52 { GMLExtractor.postMetadata(this.file , this.label, this.value.toString());71 { GMLExtractor.postMetadata(this.files, this.label, this.value.toString()); 53 72 this.value = null; 54 73 this.label = null; … … 57 76 58 77 public void characters(char c[], int start, int length) 59 { if (this. label!= null)78 { if (this.value != null) 60 79 { String string = new String(c, start, length); 61 80 this.value.append(string); … … 123 142 } 124 143 125 protected static void postMetadata( String file, String value, String label)144 protected static void postMetadata(List files, String value, String label) 126 145 { 146 127 147 } 128 148 -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/extractor/IndexExtractor.java
r6013 r6102 8 8 import java.util.ArrayList; 9 9 import java.util.Iterator; 10 import org.greenstone.gsdl3.gs3build.util.GS2TextFileHandler; 10 11 11 12 import org.xml.sax.XMLReader; … … 33 34 * An inner class to handle GML files 34 35 */ 35 class IndexHandler 36 { String content; 37 String line; 38 int pos; 39 boolean doneRow; 40 List labels; 36 class IndexHandler extends GS2TextFileHandler 37 { List labels; 41 38 URL base; 42 39 43 40 IndexHandler(String content, URL url, DocumentList documentList) throws IndexHandlerException 44 { this.content = content; 45 this.doneRow = false; 41 { 42 super(content); 43 46 44 this.labels = new ArrayList(); 47 45 this.base = url; … … 69 67 while (this.hasMore()) 70 68 { String label = this.getEntry(true); 71 if (label == null) 72 continue; 73 label.trim(); 74 75 if (label.length() == 0) { 69 if (label == null || label.length() == 0) { 76 70 continue; 77 71 } … … 86 80 // Get the file pattern itself 87 81 String filePattern = this.getEntry(true); 88 if (filePattern == null ) {82 if (filePattern == null || filePattern.length() == 0) { 89 83 continue; 90 84 } 91 85 92 filePattern.trim();93 if (filePattern.length() == 0) {94 continue;95 }96 97 86 // get a list of documents that match the file pattern 98 List files = documentList.findDocumentIdsUsingFile(filePattern);99 if ( files != null) {100 Iterator iterator = files.iterator();87 List documentIds = documentList.findDocumentIdsUsingFile(filePattern); 88 if (documentIds != null) { 89 Iterator iterator = documentIds.iterator(); 101 90 while (iterator.hasNext()) { 102 91 System.out.println("Matches file " + iterator.next().toString()); … … 106 95 // if no files match this data, then skip this row 107 96 // TODO: raise a quality error message 108 if ( files == null || files.size() == 0) {97 if (documentIds == null || documentIds.size() == 0) { 109 98 continue; 110 99 } 111 100 112 // TODO: cache up the documents that match for speed? 101 // cache up the documents that match for speed improvements... 102 List documents = new ArrayList(); 103 Iterator idIterator = documentIds.iterator(); 104 while (idIterator.hasNext()) { 105 String docIdString = idIterator.next().toString(); 106 System.out.println(docIdString); 107 DocumentID docId = new DocumentID(docIdString); 108 DocumentInterface document = documentList.getDocument(docId); 109 if (document != null) { 110 documents.add(document); 111 } 112 } 113 113 114 114 // Next, split the row into the separate metadata items … … 116 116 while (this.hasMore()) { 117 117 String item = this.getEntry(true); 118 if (item == null) { 119 entryNo ++; 120 continue; 121 } 122 123 item.trim(); 124 if (item.length() == 0) { 118 if (item == null || item.length() == 0) { 125 119 entryNo ++; 126 120 continue; … … 154 148 // in order to minimise rewrites... 155 149 if (label != null) { 156 System.out.println("Posting " + label + "=" + item + " to " + parentDir + filePattern); 157 // Post to document 150 Iterator docIterator = documents.iterator(); 151 while (docIterator.hasNext()) { 152 DocumentInterface document = (DocumentInterface) docIterator.next(); 153 154 // Post to document 155 // TODO: tailor this to posting documents to *sections* as required... 156 document.addDocumentMetadata(label, item); 157 System.out.println("Posting " + label + "=" + item + " to " + parentDir + filePattern); 158 } 158 159 } 159 160 entryNo ++; 160 161 } 162 163 // write out the modified documents 164 // TODO: nicer/more generalised interface for this and related activity in 165 // extractor manager (actually, enricher manager); 166 Iterator docIterator = documents.iterator(); 167 while (docIterator.hasNext()) { 168 DocumentInterface document = (DocumentInterface) docIterator.next(); 169 170 documentList.modifiedDocument(document); 171 } 161 172 } 162 173 } 163 174 164 private boolean hasMore()165 { return this.line != null;166 }167 168 private boolean hasMoreLines()169 { return this.content != null;170 }171 172 private String getEntry()173 { return this.getEntry(false);174 }175 176 private String getEntry(boolean breakSpace)177 { String reply;178 int start, tab = 0;179 boolean quoted = false;180 181 start = 0;182 while (start < this.line.length() &&183 this.line.charAt(start) == ' ') {184 start ++;185 }186 187 if (start == this.line.length()) {188 this.line = null;189 return null;190 }191 192 if (this.line.charAt(start) == '"') {193 quoted = true;194 breakSpace = false;195 start ++;196 }197 tab = start;198 199 while (tab != this.line.length() &&200 this.line.charAt(tab) != '\t' &&201 !(quoted && this.line.charAt(tab) == '"') &&202 !(this.line.charAt(tab) == ' ' && breakSpace))203 { tab ++;204 }205 206 if (start > 0) {207 this.line = this.line.substring(start);208 tab -= start;209 }210 211 if (tab == this.line.length()) {212 reply = this.line;213 this.line = null;214 }215 else {216 reply = this.line.substring(0, tab);217 this.line = this.line.substring(tab+1);218 }219 220 return reply;221 }222 223 private String getLine()224 { if (this.content == null) {225 this.line = null;226 return null;227 }228 229 do {230 int eol = this.content.indexOf('\n');231 if (eol < 0) {232 this.line = this.content;233 this.content = null;234 }235 else {236 this.line = this.content.substring(0, eol);237 this.content = this.content.substring(eol+1);238 while (this.content.length() > 0 &&239 this.content.charAt(0) < ' ')240 { this.content = this.content.substring(1);241 }242 }243 244 if (this.line != null) {245 this.line.trim();246 }247 } while (this.content != null && this.line != null && this.line.length() == 0);248 249 return this.line;250 }251 175 } 252 176
Note:
See TracChangeset
for help on using the changeset viewer.