Changeset 32101


Ignore:
Timestamp:
2018-01-12T18:16:31+13:00 (4 years ago)
Author:
davidb
Message:

Tweaks to allow serial ingest to run

Location:
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures
Files:
1 added
2 edited

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ClusterFileIO.java

    r31310 r32101  
    55import java.io.BufferedReader;
    66import java.io.BufferedWriter;
     7import java.io.FileInputStream;
    78import java.io.FileOutputStream;
    89import java.io.IOException;
     
    135136        }
    136137        else {
     138           
     139           
     140            // Trim 'file://' off the front
    137141            /*
    138             // Trim 'file://' off the front
    139142            String local_file_in = fileIn;
    140143            if (local_file_in.startsWith("file://")) {
    141144                local_file_in = fileIn.substring("file://".length());
    142145            }
     146            else if (local_file_in.startsWith("file:/")) {
     147                local_file_in = fileIn.substring("file:/".length());
     148            }
    143149            FileInputStream fin = new FileInputStream(local_file_in);
    144150            bis = new BufferedInputStream(fin);
    145151            */
     152           
     153           
    146154            URI uri = URI.create (fileIn);
    147155            Path path = new Path(uri);
    148156           
    149157            FSDataInputStream fin = fs.open(path);
    150            
    151158            bis = new BufferedInputStream(fin);
     159           
     160           
    152161        }
    153162   
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/UniversalPOSLangMap.java

    r31509 r32101  
    3838            e.printStackTrace();
    3939        }
    40         try (Stream<Path> stream_paths = Files.walk(Paths.get(langmap_directory_uri))) {
     40           
     41        Path langmap_directory_path = null;
     42        try {
     43            // Spark/Hadoop friendly
     44            langmap_directory_path = Paths.get(langmap_directory_uri);
     45        }
     46        catch (Exception e) {
     47            // Relative local file-system friendly
     48            langmap_directory_path = Paths.get(langmap_directory_uri.getRawPath());
     49        }
     50       
     51       
     52        try (Stream<Path> stream_paths = Files.walk(langmap_directory_path)) {
    4153            langmap_paths = stream_paths
    4254                    .filter(Files::isRegularFile)
Note: See TracChangeset for help on using the changeset viewer.