Changeset 32101

Show
Ignore:
Timestamp:
12.01.2018 18:16:31 (10 months ago)
Author:
davidb
Message:

Tweaks to allow serial ingest to run

Location:
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures
Files:
1 added
2 modified

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ClusterFileIO.java

    r31310 r32101  
    55import java.io.BufferedReader; 
    66import java.io.BufferedWriter; 
     7import java.io.FileInputStream; 
    78import java.io.FileOutputStream; 
    89import java.io.IOException; 
     
    135136        } 
    136137        else { 
     138             
     139             
     140            // Trim 'file://' off the front 
    137141            /* 
    138             // Trim 'file://' off the front 
    139142            String local_file_in = fileIn; 
    140143            if (local_file_in.startsWith("file://")) { 
    141144                local_file_in = fileIn.substring("file://".length()); 
    142145            } 
     146            else if (local_file_in.startsWith("file:/")) { 
     147                local_file_in = fileIn.substring("file:/".length()); 
     148            } 
    143149            FileInputStream fin = new FileInputStream(local_file_in); 
    144150            bis = new BufferedInputStream(fin); 
    145151            */ 
     152             
     153             
    146154            URI uri = URI.create (fileIn); 
    147155            Path path = new Path(uri); 
    148156             
    149157            FSDataInputStream fin = fs.open(path); 
    150              
    151158            bis = new BufferedInputStream(fin); 
     159             
     160             
    152161        } 
    153162     
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/UniversalPOSLangMap.java

    r31509 r32101  
    3838            e.printStackTrace(); 
    3939        } 
    40         try (Stream<Path> stream_paths = Files.walk(Paths.get(langmap_directory_uri))) { 
     40             
     41        Path langmap_directory_path = null; 
     42        try { 
     43            // Spark/Hadoop friendly 
     44            langmap_directory_path = Paths.get(langmap_directory_uri); 
     45        } 
     46        catch (Exception e) { 
     47            // Relative local file-system friendly 
     48            langmap_directory_path = Paths.get(langmap_directory_uri.getRawPath()); 
     49        } 
     50         
     51         
     52        try (Stream<Path> stream_paths = Files.walk(langmap_directory_path)) { 
    4153            langmap_paths = stream_paths 
    4254                    .filter(Files::isRegularFile)