Context Navigation

PrepareForIngest.java

Timestamp:

2016-10-29T15:45:38+13:00 (7 years ago)

Author:

davidb

Message:

Introduction of Spark accumulator to measure progress. Output of POST read in and status checked for

File:

-              r30979
+              r30984
 import org.apache.spark.api.java.*;
+import org.apache.spark.util.DoubleAccumulator;
 import org.apache.spark.SparkConf;
 …
         JavaRDD<String> json_list_data = jsc.textFile(_json_list_filename,NUM_PARTITIONS).cache();
+        PagedJSON paged_json = new PagedJSON(_input_dir, _solr_url,_output_dir,_verbosity);
+        long num_volumes = json_list_data.count();
+        double per_vol = 100.0/(double)num_volumes;
+        DoubleAccumulator progress_accum = jsc.sc().doubleAccumulator("ProgressPercent");
+        //sc.parallelize(Arrays.asList(1, 2, 3, 4)).foreach(x -> accum.add(x));
+        // ...
+        // 10/09/29 18:41:08 INFO SparkContext: Tasks finished in 0.317106 s
+        //accum.value();
+        PagedJSON paged_json = new PagedJSON(_input_dir,_solr_url,_output_dir,_verbosity, progress_accum,per_vol);
         JavaRDD<String> json_ids = json_list_data.flatMap(paged_json).cache();

Note: See TracChangeset for help on using the changeset viewer.