- Timestamp:
- 2019-10-31T20:03:55+13:00 (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gs3-extensions/maori-lang-detection/src/org/greenstone/atea/WETProcessor.java
r33573 r33615 77 77 78 78 String fileID = inFile.getName(); 79 // debug("*** Processing wetfile: " + fileID);79 //logger.debug("*** Processing wetfile: " + fileID); 80 80 fileID = fileID.substring(fileID.lastIndexOf("0")+1); 81 81 if(fileID.startsWith(".")) { // took off too many zeroes, as happens with *000000.warc.wet … … 195 195 String recordURI, String record) 196 196 { 197 info("CrawlID: CC-MAIN-" + this.crawlID197 logger.info("CrawlID: CC-MAIN-" + this.crawlID 198 198 + " WET #" + this.WETFileID 199 199 + " record #" + recordID 200 200 + " - contentLength: " + contentLength 201 201 + " - lineCount: " + lineCount); 202 info("URI: " + recordURI);203 // debug(record);204 // info("--------------------------");202 logger.info("URI: " + recordURI); 203 //logger.debug(record); 204 //logger.info("--------------------------"); 205 205 206 206 File parentFolder = null; … … 215 215 else if(batchProcessor.isGreylisted(recordURI)) { 216 216 parentFolder = batchProcessor.greyListedFolder; 217 debug("@@@GREYLISTED");217 logger.debug("@@@GREYLISTED"); 218 218 } 219 219 else { // url was only blacklisted 220 220 parentFolder = batchProcessor.discardFolder; 221 debug("@@@DISCARDING - blacklisted");221 logger.debug("@@@DISCARDING - blacklisted"); 222 222 } 223 223 } … … 229 229 else { 230 230 parentFolder = batchProcessor.greyListedFolder; 231 debug("@@@GREYLISTED");231 logger.debug("@@@GREYLISTED"); 232 232 } 233 233 } … … 274 274 if(numCamelCaseWords >= batchProcessor.MAX_WORDS_CAMELCASE) { 275 275 parentFolder = batchProcessor.discardFolder; 276 debug("@@@DISCARDING - CAMELCASED CONTENTS");276 logger.debug("@@@DISCARDING - CAMELCASED CONTENTS"); 277 277 } 278 278 else*/ … … 282 282 if(validWordCount >= batchProcessor.MIN_NUM_WORDS) { // otherwise, keep anything with a sufficient number of valid words 283 283 parentFolder = batchProcessor.keepFolder; 284 debug("@@@KEEPING");284 logger.debug("@@@KEEPING"); 285 285 } 286 286 } … … 289 289 if(parentFolder == null) { 290 290 parentFolder = batchProcessor.discardFolder; 291 debug("@@@DISCARDING");291 logger.debug("@@@DISCARDING"); 292 292 } 293 293 … … 301 301 } 302 302 } catch(Exception e) { 303 debug("Unable to write URL");303 logger.debug("Unable to write URL"); 304 304 e.printStackTrace(); 305 305 } 306 306 307 debug("--------------------------");307 logger.debug("--------------------------"); 308 308 309 309 // outFilename will look something like YYYY-##-#### … … 319 319 } catch(IOException ioe) { 320 320 ioe.printStackTrace(); 321 error("@@@@@@@@@ Error writing to file " + outFile, ioe);321 logger.error("@@@@@@@@@ Error writing to file " + outFile, ioe); 322 322 } 323 323 } 324 324 325 325 326 public void info(String msg) {327 System.err.println(msg);328 logger.info(msg);329 }330 public void debug(String msg) {331 System.err.println(msg);332 logger.debug(msg);333 }334 public void warn(String msg) {335 System.err.println(msg);336 logger.warn(msg);337 }338 public void error(String msg) {339 System.err.println(msg);340 logger.error(msg);341 }342 public void error(String msg, Exception e) {343 logger.error(msg, e);344 System.err.println("\n"+msg);345 e.printStackTrace();346 }347 326 }
Note:
See TracChangeset
for help on using the changeset viewer.