Context Navigation

PerVolumeJSON.java

Last change on this file was 32106, checked in by davidb, 6 years ago
Rekindle ability to process a json-filelist.txt using Spark
Property svn:executable set to ``*
File size: 1.5 KB

Line
1	package org.hathitrust.extractedfeatures;
2
3	import java.io.IOException;
4	import java.util.ArrayList;
5	import java.util.HashMap;
6	import java.util.Iterator;
7
8	import org.apache.hadoop.io.Text;
9	import org.apache.spark.api.java.function.FlatMapFunction;
10	import org.apache.spark.api.java.function.Function;
11	import org.apache.spark.api.java.function.VoidFunction;
12	import org.apache.spark.util.DoubleAccumulator;
13	import org.json.JSONArray;
14	import org.json.JSONObject;
15
16	/*
17	class PagedJSON implements Function<String, Boolean> {
18
19	private static final long serialVersionUID = 1L;
20
21	public Boolean call(String s) { return s.contains("a"); }
22	}
23	*/
24
25
26	//public class PerVolumeJSON implements VoidFunction<String>
27	public class PerVolumeJSON implements Function<Text,Integer>
28	{
29	private static final long serialVersionUID = 1L;
30	protected PerVolumeUtil _per_vol_util;
31
32	public PerVolumeJSON(String input_dir, String whitelist_filename, String langmap_directory,
33	ArrayList<String> solr_endpoints, String output_dir, int verbosity,
34	boolean icu_tokenize, boolean strict_file_io)
35	{
36
37	// Had issues with class not found in Spark when set up with inheritance
38	_per_vol_util = new PerVolumeUtil(input_dir, whitelist_filename, langmap_directory,
39	solr_endpoints, output_dir, verbosity,
40	icu_tokenize, strict_file_io);
41
42	}
43
44
45	public Integer call(Text json_text) throws IOException
46
47	{
48	return _per_vol_util.call(json_text);
49	}
50	}
51

Note: See TracBrowser for help on using the repository browser.