- Timestamp:
- 2019-10-16T21:39:56+13:00 (5 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gs3-extensions/maori-lang-detection/src/org/greenstone/atea/CCWETProcessor.java
r33569 r33573 761 761 // Finally, we can process this WETFile's records into the keep and discard pile 762 762 wetFileCount++; 763 logger.debug("Off to process " + WETFile);763 debug("Off to process " + WETFile); 764 764 String crawlID = ccrawlWETFileDir.getName(); // something like CC-MAIN-YYYY-##-wet-files 765 765 crawlID = crawlID.substring("CC-MAIN-".length(), crawlID.indexOf("-wet-files")); // YYYY-## … … 794 794 public static void error(String msg, Exception e) { 795 795 logger.error(msg, e); 796 System.err.println( msg);796 System.err.println("\n"+msg); 797 797 e.printStackTrace(); 798 798 } … … 810 810 public boolean accept(File dir, String name) { 811 811 if(name.endsWith(".warc.wet")) { 812 logger.debug("Will include " + name + " for processing.");812 debug("Will include " + name + " for processing."); 813 813 return true; 814 814 } … … 818 818 File unzippedVersion = new File(dir, nameWithoutGZext); 819 819 if(unzippedVersion.exists()) { 820 logger.debug("--- Unzipped version " + unzippedVersion + " exists.");821 logger.debug("Skipping " + name);820 debug("--- Unzipped version " + unzippedVersion + " exists."); 821 debug("Skipping " + name); 822 822 return false; // don't count gzipped version if unzipped version exists. 823 823 } 824 824 else { 825 logger.debug("Only zipped version " + name + " exists.");825 debug("Only zipped version " + name + " exists."); 826 826 return true; // No unzipped version, so have to work with gzipped version 827 827 } … … 829 829 830 830 // we're not even interested in any other file extensions 831 logger.debug("Not a WET file. Skipping " + name);831 debug("Not a WET file. Skipping " + name); 832 832 return false; 833 833 }
Note:
See TracChangeset
for help on using the changeset viewer.