Ignore:
Timestamp:
2019-10-16T21:39:56+13:00 (5 years ago)
Author:
ak19
Message:

Forgot to document that spaces were also allowed as separator in the input of crawl-site ids to the batchcrawl.sh script

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gs3-extensions/maori-lang-detection/src/org/greenstone/atea/CCWETProcessor.java

    r33569 r33573  
    761761        // Finally, we can process this WETFile's records into the keep and discard pile
    762762        wetFileCount++;
    763         logger.debug("Off to process " + WETFile);
     763        debug("Off to process " + WETFile);
    764764        String crawlID = ccrawlWETFileDir.getName(); // something like CC-MAIN-YYYY-##-wet-files
    765765        crawlID = crawlID.substring("CC-MAIN-".length(), crawlID.indexOf("-wet-files")); // YYYY-##     
     
    794794    public static void error(String msg, Exception e) {
    795795    logger.error(msg, e);
    796     System.err.println(msg);
     796    System.err.println("\n"+msg);
    797797    e.printStackTrace();
    798798    }
     
    810810    public boolean accept(File dir, String name) {
    811811        if(name.endsWith(".warc.wet")) {
    812         logger.debug("Will include " + name + " for processing.");
     812        debug("Will include " + name + " for processing.");
    813813        return true;
    814814        }
     
    818818        File unzippedVersion = new File(dir, nameWithoutGZext);
    819819        if(unzippedVersion.exists()) {
    820             logger.debug("--- Unzipped version " + unzippedVersion + " exists.");
    821             logger.debug("Skipping " + name);
     820            debug("--- Unzipped version " + unzippedVersion + " exists.");
     821            debug("Skipping " + name);
    822822            return false; // don't count gzipped version if unzipped version exists.
    823823        }
    824824        else {
    825             logger.debug("Only zipped version " + name + " exists.");
     825            debug("Only zipped version " + name + " exists.");
    826826            return true; // No unzipped version, so have to work with gzipped version
    827827        }
     
    829829
    830830        // we're not even interested in any other file extensions
    831         logger.debug("Not a WET file. Skipping " + name);
     831        debug("Not a WET file. Skipping " + name);
    832832        return false;
    833833    }
Note: See TracChangeset for help on using the changeset viewer.