Ignore:
Timestamp:
2019-10-31T20:03:55+13:00 (4 years ago)
Author:
ak19
Message:
  1. Worked out how to configure log4j to log both to console and logfile, so modified the existing laboured code to use this better way. 2. Added some Mongodb links under MoreReading.
File:
1 edited

Legend:

Unmodified
Added
Removed
  • gs3-extensions/maori-lang-detection/src/org/greenstone/atea/NutchTextDumpProcessor.java

    r33602 r33615  
    4949*/
    5050public class NutchTextDumpProcessor {
    51     private static Logger logger = Logger.getLogger(org.greenstone.atea.NutchTextDumpProcessor.class.getName());
     51    static Logger logger = Logger.getLogger(org.greenstone.atea.NutchTextDumpProcessor.class.getName());
    5252
    5353    static boolean DEBUG_MODE = true;
     
    9191    if(DEBUG_MODE) {
    9292        // START DEBUG
    93         debug("__________________________________________");
    94         debug("@@@ Found page entry: ");
    95         debug("__________________________________________");
    96         debug(pageDump.toString());
    97         debug("------------------------------------------");
     93        logger.debug("__________________________________________");
     94        logger.debug("@@@ Found page entry: ");
     95        logger.debug("__________________________________________");
     96        logger.debug(pageDump.toString());
     97        logger.debug("------------------------------------------");
    9898        // END DEBUG
    9999    }
     
    174174       
    175175    } catch (IOException ioe) {
    176         error("@@@@@@@@@ Error reading in nutch txtdump file " + txtDumpFile, ioe);
     176        logger.error("@@@@@@@@@ Error reading in nutch txtdump file " + txtDumpFile, ioe);
    177177    }
    178178
     
    302302   
    303303   
    304     info("------------- " + this.siteID + " SITE STATS -----------");
    305 
    306     info("SITE DOMAIN: " + this.domainOfSite);
    307     info("Total number of web pages in site: " + pages.size());
    308     info("Of these, the number of pages in Māori (mri) were: " + this.pagesInMRI.size());
     304    logger.info("------------- " + this.siteID + " SITE STATS -----------");
     305
     306    logger.info("SITE DOMAIN: " + this.domainOfSite);
     307    logger.info("Total number of web pages in site: " + pages.size());
     308    logger.info("Of these, the number of pages in Māori (mri) were: " + this.pagesInMRI.size());
    309309   
    310310    if(pagesInMRI.size() > 0) {
    311         info("The following were the pages detected by OpenNLP as being in Māori with " + maoriTxtDetector.MINIMUM_CONFIDENCE + " confidence");
     311        logger.info("The following were the pages detected by OpenNLP as being in Māori with " + maoriTxtDetector.MINIMUM_CONFIDENCE + " confidence");
    312312        for(MRIWebPageStats mriWebPageInfo : pagesInMRI) {
    313         info(mriWebPageInfo.toString());
    314         }
    315     }
    316 
    317     info("                      -----------                   ");
     313        logger.info(mriWebPageInfo.toString());
     314        }
     315    }
     316
     317    logger.info("                      -----------                   ");
    318318    if(pagesContainingMRI.size() > 0) {     
    319         info("The following pages weren't detected as primarily being in Māori");
    320         info("But still contained sentences detected as Māori");
     319        logger.info("The following pages weren't detected as primarily being in Māori");
     320        logger.info("But still contained sentences detected as Māori");
    321321        for(MRIWebPageStats mriWebPageInfo : pagesContainingMRI) {
    322         info(mriWebPageInfo.toString());
     322        logger.info(mriWebPageInfo.toString());
    323323        }
    324324       
    325325    } else {
    326         info("No further pages detected as containing any sentences in MRI");     
    327     }
    328     info("                      -----------                   ");
     326        logger.info("No further pages detected as containing any sentences in MRI");       
     327    }
     328    logger.info("                      -----------                   ");
    329329    }
    330330
     
    358358   
    359359    // --------------- STATIC METHODS AND INNER CLASSED USED BY MAIN -------------- //
    360     public static void info(String msg) {
    361     System.err.println(msg);
    362     logger.info(msg);
    363     }
    364     public static void debug(String msg) {
    365     System.err.println(msg);
    366     logger.debug(msg);
    367     }
    368     public static void warn(String msg) {
    369     System.err.println(msg);
    370     logger.warn(msg);
    371     }
    372     public static void error(String msg) {
    373     System.err.println(msg);
    374     logger.error(msg);
    375     }
    376     public static void error(String msg, Exception e) {
    377     logger.error(msg, e);
    378     System.err.println("\n"+msg);
    379     e.printStackTrace();
    380     }
    381    
     360   
    382361    public static void printUsage() {
    383     info("Run this program as:");
    384     info("\tNutchTextDumpProcessor <path to 'crawled' folder>");
     362    System.err.println("Run this program as:");
     363    System.err.println("\tNutchTextDumpProcessor <path to 'crawled' folder>");
    385364    }
    386365   
     
    393372    File sitesDir = new File(args[0]);
    394373    if(!sitesDir.exists() || !sitesDir.isDirectory()) {
    395         error("Error: " + args[0] + " does not exist or is not a directory");
     374        logger.error("Error: " + args[0] + " does not exist or is not a directory");
    396375        return;
    397376    }
     
    430409            File txtDumpFile = new File(siteDir, "dump.txt");
    431410            if(!txtDumpFile.exists()) {
    432             error("Text dump file " + txtDumpFile + " did not exist");
     411            logger.error("Text dump file " + txtDumpFile + " did not exist");
    433412            continue;
    434413            }
     
    439418            String siteID = siteDir.getName();
    440419            long lastModified = siteDir.lastModified();
    441             debug("Found siteID: " + siteID);           
     420            logger.debug("Found siteID: " + siteID);           
    442421            NutchTextDumpProcessor nutchTxtDump = new NutchTextDumpProcessor(
    443422                 webpagesCSVPrinter, mriSentencesCSVPrinter, mriTxtDetector,
     
    458437        // can get an exception when instantiating CCWETProcessor instance
    459438        // or with CSV file
    460         error(e.getMessage(), e);
     439        logger.error(e.getMessage(), e);
    461440    }
    462441    }
Note: See TracChangeset for help on using the changeset viewer.