Context Navigation

PerVolumeJSONList.java@ 32106

Last change on this file since 32106 was 32106, checked in by davidb, 6 years ago
Rekindle ability to process a json-filelist.txt using Spark
Property svn:executable set to ``*
File size: 1.4 KB

Line
1	package org.hathitrust.extractedfeatures;
2
3	import java.io.IOException;
4	import java.util.ArrayList;
5	import java.util.HashMap;
6	import java.util.Iterator;
7
8	import org.apache.hadoop.io.Text;
9	import org.apache.spark.api.java.function.FlatMapFunction;
10	import org.apache.spark.api.java.function.Function;
11	import org.apache.spark.api.java.function.VoidFunction;
12	import org.apache.spark.util.DoubleAccumulator;
13	import org.json.JSONArray;
14	import org.json.JSONObject;
15
16	public class PerVolumeJSONList implements Function<String,Integer>
17	{
18	private static final long serialVersionUID = 1L;
19	protected PerVolumeUtil _per_vol_util;
20
21	public PerVolumeJSONList(String input_dir, String whitelist_filename, String langmap_directory,
22	ArrayList<String> solr_endpoints, String output_dir, int verbosity,
23	boolean icu_tokenize, boolean strict_file_io)
24	{
25	_per_vol_util = new PerVolumeUtil(input_dir, whitelist_filename, langmap_directory,
26	solr_endpoints, output_dir, verbosity,
27	icu_tokenize, strict_file_io);
28
29	}
30
31	public Integer call(String json_file_in) throws IOException
32	{
33	// Read in JSON file as Text
34	String full_json_file_in = _per_vol_util.getInputDir() + "/" + json_file_in;
35	String json_content_string = ClusterFileIO.readTextFile(full_json_file_in);
36
37	Text json_content_text = new Text(json_content_string);
38
39	return _per_vol_util.call(json_content_text);
40	}
41	}
42

Note: See TracBrowser for help on using the repository browser.