- Timestamp:
- 2019-11-05T21:04:09+13:00 (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gs3-extensions/maori-lang-detection/src/org/greenstone/atea/TextDumpPage.java
r33615 r33623 84 84 String k = line.substring(0, endIndex); 85 85 String v = line.substring(endIndex+1); 86 if(k.startsWith("metadata")) { 87 k = k.substring("metadata".length()); 88 } 89 86 90 tuples.put(k.trim(), v.trim()); 87 91 } else { … … 134 138 } 135 139 140 /* Dr Nichols suggested storing timestamp and char encoding. Not sure which timestamp 141 or encoding he meant, but storing 2 of several timestamps and selecting 142 original character encoding (presumably the char encoding of the page) out of 2 143 pieces of char encoding metadata to store. */ 144 public String getModifiedTime() { 145 // is this the webpage's last mod time? 146 String time = tuples.get("modifiedTime"); 147 time = time.equals("0") ? "" : time; // zero will be assumed to be epoch, rather than unset 148 return time; 149 } 150 public String getFetchTime() { 151 // is this the nutch crawl time 152 String time = tuples.get("fetchTime"); 153 time = time.equals("0") ? "" : time; // zero will be assumed to be epoch, rather than unset 154 return time; 155 156 } 157 public String getOriginalCharEncoding() { 158 // is this the web page's char-encoding? 159 return tuples.get("OriginalCharEncoding"); 160 } 161 136 162 public String get(String key) { 137 163 return tuples.get(key);
Note:
See TracChangeset
for help on using the changeset viewer.