Changeset 33615 for gs3-extensions/maori-lang-detection/src/org/greenstone/atea/NutchTextDumpProcessor.java
- Timestamp:
- 2019-10-31T20:03:55+13:00 (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gs3-extensions/maori-lang-detection/src/org/greenstone/atea/NutchTextDumpProcessor.java
r33602 r33615 49 49 */ 50 50 public class NutchTextDumpProcessor { 51 privatestatic Logger logger = Logger.getLogger(org.greenstone.atea.NutchTextDumpProcessor.class.getName());51 static Logger logger = Logger.getLogger(org.greenstone.atea.NutchTextDumpProcessor.class.getName()); 52 52 53 53 static boolean DEBUG_MODE = true; … … 91 91 if(DEBUG_MODE) { 92 92 // START DEBUG 93 debug("__________________________________________");94 debug("@@@ Found page entry: ");95 debug("__________________________________________");96 debug(pageDump.toString());97 debug("------------------------------------------");93 logger.debug("__________________________________________"); 94 logger.debug("@@@ Found page entry: "); 95 logger.debug("__________________________________________"); 96 logger.debug(pageDump.toString()); 97 logger.debug("------------------------------------------"); 98 98 // END DEBUG 99 99 } … … 174 174 175 175 } catch (IOException ioe) { 176 error("@@@@@@@@@ Error reading in nutch txtdump file " + txtDumpFile, ioe);176 logger.error("@@@@@@@@@ Error reading in nutch txtdump file " + txtDumpFile, ioe); 177 177 } 178 178 … … 302 302 303 303 304 info("------------- " + this.siteID + " SITE STATS -----------");305 306 info("SITE DOMAIN: " + this.domainOfSite);307 info("Total number of web pages in site: " + pages.size());308 info("Of these, the number of pages in MÄori (mri) were: " + this.pagesInMRI.size());304 logger.info("------------- " + this.siteID + " SITE STATS -----------"); 305 306 logger.info("SITE DOMAIN: " + this.domainOfSite); 307 logger.info("Total number of web pages in site: " + pages.size()); 308 logger.info("Of these, the number of pages in MÄori (mri) were: " + this.pagesInMRI.size()); 309 309 310 310 if(pagesInMRI.size() > 0) { 311 info("The following were the pages detected by OpenNLP as being in MÄori with " + maoriTxtDetector.MINIMUM_CONFIDENCE + " confidence");311 logger.info("The following were the pages detected by OpenNLP as being in MÄori with " + maoriTxtDetector.MINIMUM_CONFIDENCE + " confidence"); 312 312 for(MRIWebPageStats mriWebPageInfo : pagesInMRI) { 313 info(mriWebPageInfo.toString());314 } 315 } 316 317 info(" ----------- ");313 logger.info(mriWebPageInfo.toString()); 314 } 315 } 316 317 logger.info(" ----------- "); 318 318 if(pagesContainingMRI.size() > 0) { 319 info("The following pages weren't detected as primarily being in MÄori");320 info("But still contained sentences detected as MÄori");319 logger.info("The following pages weren't detected as primarily being in MÄori"); 320 logger.info("But still contained sentences detected as MÄori"); 321 321 for(MRIWebPageStats mriWebPageInfo : pagesContainingMRI) { 322 info(mriWebPageInfo.toString());322 logger.info(mriWebPageInfo.toString()); 323 323 } 324 324 325 325 } else { 326 info("No further pages detected as containing any sentences in MRI");327 } 328 info(" ----------- ");326 logger.info("No further pages detected as containing any sentences in MRI"); 327 } 328 logger.info(" ----------- "); 329 329 } 330 330 … … 358 358 359 359 // --------------- STATIC METHODS AND INNER CLASSED USED BY MAIN -------------- // 360 public static void info(String msg) { 361 System.err.println(msg); 362 logger.info(msg); 363 } 364 public static void debug(String msg) { 365 System.err.println(msg); 366 logger.debug(msg); 367 } 368 public static void warn(String msg) { 369 System.err.println(msg); 370 logger.warn(msg); 371 } 372 public static void error(String msg) { 373 System.err.println(msg); 374 logger.error(msg); 375 } 376 public static void error(String msg, Exception e) { 377 logger.error(msg, e); 378 System.err.println("\n"+msg); 379 e.printStackTrace(); 380 } 381 360 382 361 public static void printUsage() { 383 info("Run this program as:");384 info("\tNutchTextDumpProcessor <path to 'crawled' folder>");362 System.err.println("Run this program as:"); 363 System.err.println("\tNutchTextDumpProcessor <path to 'crawled' folder>"); 385 364 } 386 365 … … 393 372 File sitesDir = new File(args[0]); 394 373 if(!sitesDir.exists() || !sitesDir.isDirectory()) { 395 error("Error: " + args[0] + " does not exist or is not a directory");374 logger.error("Error: " + args[0] + " does not exist or is not a directory"); 396 375 return; 397 376 } … … 430 409 File txtDumpFile = new File(siteDir, "dump.txt"); 431 410 if(!txtDumpFile.exists()) { 432 error("Text dump file " + txtDumpFile + " did not exist");411 logger.error("Text dump file " + txtDumpFile + " did not exist"); 433 412 continue; 434 413 } … … 439 418 String siteID = siteDir.getName(); 440 419 long lastModified = siteDir.lastModified(); 441 debug("Found siteID: " + siteID);420 logger.debug("Found siteID: " + siteID); 442 421 NutchTextDumpProcessor nutchTxtDump = new NutchTextDumpProcessor( 443 422 webpagesCSVPrinter, mriSentencesCSVPrinter, mriTxtDetector, … … 458 437 // can get an exception when instantiating CCWETProcessor instance 459 438 // or with CSV file 460 error(e.getMessage(), e);439 logger.error(e.getMessage(), e); 461 440 } 462 441 }
Note:
See TracChangeset
for help on using the changeset viewer.