Changeset 33656 for other-projects


Ignore:
Timestamp:
2019-11-12T21:11:05+13:00 (4 years ago)
Author:
ak19
Message:

Final minor changes before I start processing the crawls of node2.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • other-projects/maori-lang-detection/src/org/greenstone/atea/NutchTextDumpToMongoDB.java

    r33655 r33656  
    374374           
    375375            else {
    376             File UNFINISHED_FILE = new File(siteDir, "UNFINISHED");         
     376            File UNFINISHED_FILE = new File(siteDir, "UNFINISHED");
    377377           
    378378            String siteID = siteDir.getName();
     379            if(siteID.contains("_")) {
     380                logger.warn("*** Skipping site " + siteID + " as its dir name indicates it wasn't crawled properly.");
     381                continue;
     382            }
     383           
    379384            long lastModified = siteDir.lastModified();
    380             logger.debug("Processing siteID: " + siteID);           
     385            logger.debug("@@@ Processing siteID: " + siteID);           
    381386            NutchTextDumpToMongoDB nutchTxtDump = new NutchTextDumpToMongoDB(
    382387                 mongodb, mriTxtDetector,
Note: See TracChangeset for help on using the changeset viewer.