Changeset 30945 for other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/PagedJSON.java
- Timestamp:
- 2016-10-26T15:37:24+13:00 (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/PagedJSON.java
r30942 r30945 24 24 25 25 protected String _input_dir; 26 protected String _output_dir; 26 27 protected int _verbosity; 27 28 28 public PagedJSON(String input_dir, int verbosity)29 public PagedJSON(String input_dir, String output_dir, int verbosity) 29 30 { 30 _input_dir = input_dir; 31 _verbosity = verbosity; 31 _input_dir = input_dir; 32 _output_dir = output_dir; 33 _verbosity = verbosity; 32 34 } 33 35 … … 44 46 while ((str = br.readLine()) != null) { 45 47 sb.append(str); 46 //System.out.println(str);47 48 } 48 49 49 50 br.close(); 50 51 //System.err.println("*****" + sb.toString());52 53 /*54 List<String> lines = Files.readAllLines(path,StandardCharsets.UTF_8);55 56 57 for (String line : lines) {58 sb.append(line);59 60 }61 */62 63 51 } 64 52 catch (Exception e) { … … 68 56 JSONObject json_obj = new JSONObject(sb.toString()); 69 57 70 71 58 return json_obj; 72 73 //return sb.toString();74 59 } 75 60 … … 83 68 String id = extracted_feature_record.getString("id"); 84 69 85 JSONObject ef_metadata = extracted_feature_record.getJSONObject("metadata");70 //JSONObject ef_metadata = extracted_feature_record.getJSONObject("metadata"); 86 71 JSONObject ef_features = extracted_feature_record.getJSONObject("features"); 87 72 … … 97 82 int ef_num_pages = ef_pages.length(); 98 83 84 // Make directory for page-level JSON output 85 String json_dir = ClusterFileIO.removeSuffix(json_file_in,".json.bz2"); 86 String page_json_dir = json_dir + "/pages"; 87 //ClusterFileIO.createDirectoryAll(_output_dir + "/" + page_json_dir); 88 System.out.println("mkdir: " + _output_dir + "/" + page_json_dir); 89 99 90 ArrayList<String> ids = new ArrayList<String>(ef_num_pages); 100 91 for (int i = 0; i < ef_page_count; i++) { 101 ids.add(id + "." + i); 92 String formatted_i = String.format("page-%06d", i); 93 String page_id = id + "." + formatted_i; 94 95 if (_verbosity >= 2) { 96 System.out.println(" Page: " + page_id); 97 } 98 99 // create JSON obj of just the page (for now) 100 // write it out 101 102 ids.add(page_json_dir +"/" + page_id + ".json.bz2"); 103 104 if (i==0) { 105 System.out.println("Sample output JSON page file: " + page_json_dir +"/" + page_id + ".json.bz2"); 106 } 102 107 } 103 108
Note:
See TracChangeset
for help on using the changeset viewer.