Context Navigation

← Previous Change
Next Change →

solr-extracted-features

Timestamp:

2016-10-29T15:45:38+13:00 (7 years ago)

Author:

davidb

Message:

Introduction of Spark accumulator to measure progress. Output of POST read in and status checked for

Location:

other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust

Files:

: 2 edited

PagedJSON.java (modified) (5 diffs)
PrepareForIngest.java (modified) (2 diffs)

Legend:

: Unmodified
: Added
: Removed

other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/PagedJSON.java

-              r30980
+              r30984
 import org.apache.commons.compress.compressors.CompressorException;
 import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.util.DoubleAccumulator;
 import org.json.JSONArray;
 import org.json.JSONObject;
 …
     protected int    _verbosity;
+    public PagedJSON(String input_dir, String solr_url, String output_dir, int verbosity)
+    DoubleAccumulator _progress_accum;
+    double            _progress_step;
+    public PagedJSON(String input_dir, String solr_url, String output_dir, int verbosity,
+                     DoubleAccumulator progress_accum, double progress_step)
+    {
         _input_dir  = input_dir;
 …
         _output_dir = output_dir;
         _verbosity  = verbosity;
+        _progress_accum = progress_accum;
+        _progress_step  = progress_step;
+    }
 …
             // Read response
             BufferedReader in = new BufferedReader(new InputStreamReader(
                     httpcon.getInputStream()));
+            StringBuilder sb = new StringBuilder();
+            BufferedReader in = new BufferedReader(new InputStreamReader(httpcon.getInputStream()));
             String decodedString;
             while ((decodedString = in.readLine()) != null) {
+                System.out.println(decodedString);
+                //System.out.println(decodedString);
+                sb.append(decodedString);
+            }
             in.close();
+            JSONObject solr_status_json = new JSONObject(sb.toString());
+            JSONObject response_header_json = solr_status_json.getJSONObject("responseHeader");
+            if (response_header_json != null) {
+                int status = response_header_json.getInt("status");
+                if (status != 0) {
+                    System.err.println("Warning: POST request to " + post_url + " returned status " + status);
+                    System.err.println("Full response was: " + sb);
+                }
+            }
+            else {
+                System.err.println("Failed response to Solr POST: " + sb);
+            }
+        }
         catch (Exception e) {
 …
         ids.add(volume_id);
+        _progress_accum.add(_progress_step);
         return ids.iterator();
+    }

other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/PrepareForIngest.java

-              r30979
+              r30984
 import org.apache.spark.api.java.*;
+import org.apache.spark.util.DoubleAccumulator;
 import org.apache.spark.SparkConf;
 …
         JavaRDD<String> json_list_data = jsc.textFile(_json_list_filename,NUM_PARTITIONS).cache();
+        PagedJSON paged_json = new PagedJSON(_input_dir, _solr_url,_output_dir,_verbosity);
+        long num_volumes = json_list_data.count();
+        double per_vol = 100.0/(double)num_volumes;
+        DoubleAccumulator progress_accum = jsc.sc().doubleAccumulator("ProgressPercent");
+        //sc.parallelize(Arrays.asList(1, 2, 3, 4)).foreach(x -> accum.add(x));
+        // ...
+        // 10/09/29 18:41:08 INFO SparkContext: Tasks finished in 0.317106 s
+        //accum.value();
+        PagedJSON paged_json = new PagedJSON(_input_dir,_solr_url,_output_dir,_verbosity, progress_accum,per_vol);
         JavaRDD<String> json_ids = json_list_data.flatMap(paged_json).cache();

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 30984 for other-projects/hathitrust/solr-extracted-features

Legend:

other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/PagedJSON.java

other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/PrepareForIngest.java

Download in other formats: