source: other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/SolrDocJSON.java@ 31677

Last change on this file since 31677 was 31677, checked in by davidb, 7 years ago

Supress processing governmentDocument for now in JSON metadata record, as it is bool field not a string

  • Property svn:executable set to *
File size: 22.6 KB
Line 
1package org.hathitrust.extractedfeatures;
2
3import java.io.BufferedReader;
4import java.io.BufferedWriter;
5import java.io.IOException;
6import java.io.InputStream;
7import java.io.InputStreamReader;
8import java.io.OutputStream;
9import java.io.Reader;
10import java.io.StringReader;
11import java.net.HttpURLConnection;
12import java.net.URL;
13import java.util.ArrayList;
14import java.util.HashMap;
15import java.util.Iterator;
16import java.util.Set;
17
18import org.apache.commons.compress.compressors.CompressorException;
19import org.json.JSONArray;
20import org.json.JSONObject;
21
22import scala.Tuple2;
23
24import org.apache.lucene.analysis.TokenStream;
25import org.apache.lucene.analysis.Tokenizer;
26import org.apache.lucene.analysis.icu.segmentation.ICUTokenizer;
27import org.apache.lucene.analysis.standard.StandardTokenizer;
28import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
29import org.apache.lucene.analysis.core.LowerCaseFilter;
30
31public class SolrDocJSON {
32
33 protected static JSONObject generateToplevelMetadataSolrDocJSON(String volume_id, JSONObject ef_metadata)
34 {
35 JSONObject solr_update_json = null;
36 /*
37 Example JSON for id: "gri.ark:/13960/t0003qw46
38 metadata: {
39
40 "accessProfile": "open",
41 "bibliographicFormat": "BK",
42 "classification": {
43 "lcc": [
44 "ND646 .B8 1900"
45 ]
46 },
47 "dateCreated": "2016-06-19T08:30:16.11199Z",
48 "enumerationChronology": " ",
49 "genre": [
50 "not fiction"
51 ],
52 "governmentDocument": false,
53 "handleUrl": "http://hdl.handle.net/2027/gri.ark:/13960/t0003qw46",
54 "hathitrustRecordNumber": "100789562",
55 "htBibUrl": "http://catalog.hathitrust.org/api/volumes/full/htid/gri.ark:/13960/t0003qw46.json",
56 "imprint": "Burlington Fine Arts Club, 1900.",
57 "isbn": [],
58 "issn": [],
59 "issuance": "monographic",
60 "language": "eng",
61 "lastUpdateDate": "2015-09-14 13:25:03",
62 "lccn": [],
63 "names": [
64 "Burlington Fine Arts Club "
65 ],
66 "oclc": [
67 "25259734"
68 ],
69 "pubDate": "1900",
70 "pubPlace": "enk",
71 "rightsAttributes": "pd",
72 "schemaVersion": "1.3",
73 "sourceInstitution": "CMALG",
74 "sourceInstitutionRecordNumber": "9928077890001551",
75 "title": "Exhibition of pictures by Dutch masters of the seventeenth century.",
76 "typeOfResource": "text",
77 "volumeIdentifier": "gri.ark:/13960/t0003qw46"
78
79 }
80
81 */
82
83 String [] metadata_single = new String[] {
84 "accessProfile",
85 "bibliographicFormat",
86 "dateCreated", // date
87 //"enumerationChronology", // What is this?
88 //"governmentDocument", // bool: true/false
89 "handleUrl",
90 "hathitrustRecordNumber", // int?
91 "htBibUrl",
92 "imprint",
93 "issuance",
94 "language",
95 "lastUpdateDate",
96 "pubDate",
97 "pubPlace",
98 "rightsAttributes",
99 "schemaVersion",
100 "sourceInstitution",
101 "sourceInstitutionRecordNumber",
102 "title",
103 "typeOfResource",
104 "volumeIdentifier"
105 };
106
107 String [] metadata_multiple = new String[] {
108 "oclc",
109 "isbn",
110 "issn",
111 "lccn",
112 "genre",
113 "names"
114 };
115
116 String [] metadata_hashmap_multiple = new String[] {
117 "classification"
118 };
119
120
121 if (ef_metadata != null) {
122
123 // For JSON Solr format see:
124 // https://cwiki.apache.org/confluence/display/solr/Uploading+Data+with+Index+Handlers
125
126 //String title= ef_metadata.getString("title");
127 JSONObject solr_add_json = new JSONObject();
128
129 JSONObject solr_doc_json = new JSONObject();
130 solr_doc_json.put("id", volume_id);
131
132 for (String metaname: metadata_single) {
133 String metavalue = ef_metadata.getString(metaname);
134 if (metavalue != null) {
135 solr_doc_json.put(metaname+"_t",metavalue);
136 solr_doc_json.put(metaname+"_s",metavalue);
137 }
138 }
139
140 for (String metaname: metadata_multiple) {
141 JSONArray metavalues = ef_metadata.getJSONArray(metaname);
142 if (metavalues != null) {
143 solr_doc_json.put(metaname+"_t",metavalues);
144 solr_doc_json.put(metaname+"_ss",metavalues);
145 }
146 }
147
148 for (String metaname: metadata_hashmap_multiple) {
149 JSONObject metakeys = ef_metadata.getJSONObject(metaname);
150
151 if (metakeys != null) {
152 Iterator<String> metakey_iter = metakeys.keys();
153 while (metakey_iter.hasNext()) {
154 String metakey = metakey_iter.next();
155
156 JSONArray metavalues = metakeys.getJSONArray(metakey);
157 if (metavalues != null) {
158 String combined_metaname = metaname + "_" + metakey;
159 solr_doc_json.put(combined_metaname+"_t",metavalues);
160 solr_doc_json.put(combined_metaname+"_ss",metavalues);
161 }
162 }
163 }
164 }
165
166 solr_add_json.put("commitWithin", 60000); // used to be 5000
167 solr_add_json.put("doc", solr_doc_json);
168
169 solr_update_json = new JSONObject();
170 solr_update_json.put("add",solr_add_json);
171
172 }
173 else {
174 System.err.println("Warning: null metadata for '" + volume_id + "'");
175 }
176
177 return solr_update_json;
178 }
179
180
181
182 protected static ArrayList<String> getTokenPosCountWords(JSONObject ef_token_pos_count, String page_id,
183 boolean icu_tokenize)
184 {
185 boolean lowercase_filter = true;
186
187 ArrayList<String> words = new ArrayList<String>();
188
189 if (ef_token_pos_count != null) {
190
191 Iterator<String> word_token_iter = ef_token_pos_count.keys();
192 while (word_token_iter.hasNext()) {
193 String word_token = word_token_iter.next();
194
195 if (icu_tokenize) {
196 Reader reader = new StringReader(word_token);
197
198 ICUTokenizer icu_tokenizer = new ICUTokenizer();
199 icu_tokenizer.setReader(reader);
200
201 CharTermAttribute charTermAttribute = icu_tokenizer.addAttribute(CharTermAttribute.class);
202
203 TokenStream token_stream = null;
204
205 if (lowercase_filter) {
206 token_stream = new LowerCaseFilter(icu_tokenizer);
207 }
208 else {
209 token_stream = icu_tokenizer;
210 }
211
212 try {
213 token_stream.reset();
214
215 while (token_stream.incrementToken()) {
216 String term = charTermAttribute.toString();
217 words.add(term);
218 }
219
220 token_stream.end();
221 token_stream.close();
222 }
223 catch (IOException e) {
224 e.printStackTrace();
225 }
226 }
227 else {
228 words.add(word_token);
229 }
230 }
231 }
232 else {
233 System.err.println("Warning: empty tokenPosCount field for '" + page_id + "'");
234 }
235
236 /* Alternative way to get at keys
237 Set<String> token_keys = ef_token_pos_count.keySet();
238 for (String token : token_keys) {
239 sb.append(token + " ");
240 }
241*/
242 return words;
243 }
244
245 protected static ArrayList<POSString> getTokenPosCountWordsArrayList(JSONObject ef_token_pos_count, String page_id,
246 boolean icu_tokenize)
247 {
248 ArrayList<POSString> words = new ArrayList<POSString>();
249
250 if (ef_token_pos_count != null) {
251
252 Iterator<String> word_token_iter = ef_token_pos_count.keys();
253 while (word_token_iter.hasNext()) {
254 String word_token = word_token_iter.next();
255
256 JSONObject pos_json_object = ef_token_pos_count.getJSONObject(word_token);
257
258 Set<String> pos_keys = pos_json_object.keySet();
259 int pos_keys_len = pos_keys.size();
260 String[] pos_tags = (pos_keys_len>0) ? pos_keys.toArray(new String[pos_keys_len]) : null;
261
262 if (icu_tokenize == true) {
263 Reader reader = new StringReader(word_token);
264
265 ICUTokenizer icu_tokenizer = new ICUTokenizer();
266 icu_tokenizer.setReader(reader);
267
268 CharTermAttribute charTermAttribute = icu_tokenizer.addAttribute(CharTermAttribute.class);
269
270 TokenStream token_stream = icu_tokenizer;
271
272 try {
273 token_stream.reset();
274
275 while (token_stream.incrementToken()) {
276 String term = charTermAttribute.toString();
277
278 POSString pos_string = new POSString(term,pos_tags);
279
280 words.add(pos_string);
281 }
282
283 token_stream.end();
284 token_stream.close();
285 }
286 catch (IOException e) {
287 e.printStackTrace();
288 }
289 }
290 else {
291 POSString pos_word_token = new POSString(word_token,pos_tags);
292
293 words.add(pos_word_token);
294 }
295 }
296 }
297 else {
298 System.err.println("Warning: empty tokenPosCount field for '" + page_id + "'");
299 }
300
301 return words;
302 }
303 protected static ArrayList<POSString> getTokenPosCountWordsMapCaseInsensitive(ArrayList<POSString> words_in)
304 {
305 ArrayList<POSString> words_out = new ArrayList<POSString>();
306
307 for (POSString pos_word: words_in) {
308 String word = pos_word.getString();
309 String[] pos_tags = pos_word.getPOSTags();
310
311 Reader reader = new StringReader(word);
312
313 Tokenizer tokenizer = new StandardTokenizer();
314 tokenizer.setReader(reader);
315 CharTermAttribute charTermAttribute = tokenizer.addAttribute(CharTermAttribute.class);
316
317 TokenStream token_stream = new LowerCaseFilter(tokenizer);
318
319 try {
320 token_stream.reset();
321
322 while (token_stream.incrementToken()) {
323 String term = charTermAttribute.toString();
324
325 POSString pos_term = new POSString(term,pos_tags);
326 words_out.add(pos_term);
327 }
328
329 token_stream.end();
330 token_stream.close();
331 }
332 catch (IOException e) {
333 e.printStackTrace();
334 }
335
336 }
337
338 return words_out;
339 }
340
341 protected static ArrayList<POSString> getTokenPosCountWordsMapWhitelist(ArrayList<POSString> words_in,
342 WhitelistBloomFilter whitelist_bloomfilter)
343 {
344 ArrayList<POSString> words_out = new ArrayList<POSString>();
345
346 for (POSString pos_word: words_in) {
347 String word = pos_word.getString();
348 if (whitelist_bloomfilter.contains(word)) {
349 words_out.add(pos_word);
350 }
351 }
352
353 return words_out;
354 }
355
356 protected static ArrayList<String> getTokenPosCountPOSLabels(JSONObject ef_token_pos_count, String page_id)
357 {
358 ArrayList<String> pos_labels = new ArrayList<String>();
359
360 if (ef_token_pos_count != null) {
361
362 Iterator<String> word_token_iter = ef_token_pos_count.keys();
363 while (word_token_iter.hasNext()) {
364 String word_token = word_token_iter.next();
365
366 JSONObject word_pos_labels = ef_token_pos_count.getJSONObject(word_token);
367
368 Iterator<String> pos_token_iter = word_pos_labels.keys();
369 while (pos_token_iter.hasNext()) {
370 String pos_token = pos_token_iter.next();
371
372 pos_labels.add(pos_token);
373 }
374 }
375 }
376 else {
377 System.err.println("Warning: empty tokenPosCount field for '" + page_id + "'");
378 }
379
380 return pos_labels;
381 }
382
383
384
385 protected static String generateSolrText(JSONObject ef_token_pos_count, String page_id,
386 WhitelistBloomFilter whitelist_bloomfilter, boolean icu_tokenize)
387 {
388 ArrayList<String> tokens = getTokenPosCountWords(ef_token_pos_count, page_id,icu_tokenize);
389
390 StringBuilder sb = new StringBuilder();
391
392 if (whitelist_bloomfilter == null) {
393
394 boolean first_append = true;
395
396 for (int i=0; i<tokens.size(); i++) {
397 String token = tokens.get(i);
398
399 if (!first_append) {
400 sb.append(" ");
401 }
402 else {
403 first_append = false;
404 }
405 sb.append(token);
406 }
407 }
408 else {
409 boolean first_append = true;
410
411 for (int i=0; i<tokens.size(); i++) {
412 String token = tokens.get(i);
413
414 if (whitelist_bloomfilter.contains(token)) {
415 if (!first_append) {
416 sb.append(" ");
417 }
418 else {
419 first_append = false;
420 }
421 sb.append(token);
422 }
423 }
424
425 }
426
427
428 return sb.toString();
429 }
430
431 protected static ArrayList<POSString> filterSolrTextFields(JSONObject ef_token_pos_count, String page_id,
432 WhitelistBloomFilter whitelist_bloomfilter,
433 UniversalPOSLangMap universal_langmap,
434 boolean icu_tokenize)
435 {
436 ArrayList<POSString> cs_tokens = getTokenPosCountWordsArrayList(ef_token_pos_count, page_id,icu_tokenize);
437 ArrayList<POSString> lc_tokens = getTokenPosCountWordsMapCaseInsensitive(cs_tokens);
438
439 ArrayList<POSString> tokens = null;
440 if (whitelist_bloomfilter != null) {
441 tokens = getTokenPosCountWordsMapWhitelist(lc_tokens,whitelist_bloomfilter);
442 }
443 else {
444 tokens = lc_tokens;
445 }
446
447 return tokens;
448 }
449
450 protected static void addSolrLanguageTextFields(JSONObject ef_page, ArrayList<POSString> text_al,
451 UniversalPOSLangMap universal_langmap,
452 JSONObject solr_doc_json)
453 {
454 // e.g. ... "languages":[{"ko":"0.71"},{"ja":"0.29"}]
455 JSONArray ef_languages = ef_page.getJSONArray("languages");
456 if ((ef_languages != null) && (ef_languages.length()>0)) {
457
458 int lang_len = ef_languages.length();
459 String [] lang_list = new String[lang_len];
460
461 for (int i=0; i<lang_len; i++) {
462 JSONObject lang_rec = ef_languages.getJSONObject(i);
463
464 Iterator<String> lang_key_iter = lang_rec.keys();
465 while (lang_key_iter.hasNext()) {
466 String lang_label = lang_key_iter.next();
467
468 lang_list[i] = lang_label;
469 }
470 }
471
472 int text_len = text_al.size();
473
474 /*
475 for (int li=0; li<lang_len; li++) {
476 String lang_key = lang_list[li];
477
478 if (universal_langmap.containsLanguage(lang_key))
479 {
480 */
481 HashMap<String,JSONArray> pos_lang_text_field_map = new HashMap<String,JSONArray>();
482
483 for (int ti=0; ti<text_len; ti++) {
484 POSString pos_text_value = text_al.get(ti);
485 String text_value = pos_text_value.getString();
486
487 String[] pos_tags = pos_text_value.getPOSTags();
488 int pos_tags_len = pos_tags.length;
489
490 for (int pti=0; pti<pos_tags_len; pti++) {
491 String opennlp_pos_key = pos_tags[pti];
492
493 Tuple2<String,String> lang_pos_pair = universal_langmap.getUniversalLanguagePOSPair(lang_list, opennlp_pos_key);
494 String selected_lang = lang_pos_pair._1;
495 String upos = lang_pos_pair._2;
496
497 String pos_lang_text_field = selected_lang;
498 if (upos != null) {
499 pos_lang_text_field += "_" + upos;
500 }
501 pos_lang_text_field += "_htrctoken";
502
503 if (!pos_lang_text_field_map.containsKey(pos_lang_text_field)) {
504 JSONArray empty_json_values = new JSONArray();
505 pos_lang_text_field_map.put(pos_lang_text_field, empty_json_values);
506 }
507 pos_lang_text_field_map.get(pos_lang_text_field).put(text_value);
508 }
509 }
510
511 // Now add each of the POS language fields into solr_doc_json
512 Set<String> pos_lang_field_keys = pos_lang_text_field_map.keySet();
513 for (String plf_key : pos_lang_field_keys) {
514 String lang_text_field = plf_key;
515 JSONArray json_values = pos_lang_text_field_map.get(plf_key);
516
517 solr_doc_json.put(lang_text_field, json_values);
518 }
519 /*
520 }
521 else {
522 String lang_text_field = lang_key + "_htrctoken";
523
524 JSONArray json_values = new JSONArray();
525 for (int ti=0; ti<text_len; ti++) {
526 POSString pos_text_value = text_al.get(ti);
527 String text_value = pos_text_value.getString();
528 json_values.put(text_value);
529 }
530 solr_doc_json.put(lang_text_field, json_values);
531
532 }
533
534
535 }
536 */
537 }
538 }
539
540 protected static JSONObject generateSolrDocJSON(String volume_id, String page_id, JSONObject ef_page,
541 WhitelistBloomFilter whitelist_bloomfilter,
542 UniversalPOSLangMap universal_langmap,
543 boolean icu_tokenize)
544 {
545 JSONObject solr_update_json = null;
546
547 if (ef_page != null) {
548 JSONObject ef_body = ef_page.getJSONObject("body");
549 if (ef_body != null) {
550 JSONObject ef_token_pos_count = ef_body.getJSONObject("tokenPosCount");
551 if (ef_token_pos_count != null) {
552
553 JSONObject solr_add_json = new JSONObject();
554
555 ArrayList<POSString> text_al = filterSolrTextFields(ef_token_pos_count,page_id,whitelist_bloomfilter,universal_langmap,icu_tokenize);
556
557 JSONObject solr_doc_json = new JSONObject();
558 solr_doc_json.put("id", page_id);
559 solr_doc_json.put("volumeid_s", volume_id);
560 if (text_al.size()>0) {
561 addSolrLanguageTextFields(ef_page,text_al, universal_langmap, solr_doc_json);
562 //solr_doc_json.put("eftext_txt", text_al.toString()); // ****
563 }
564 else {
565 solr_doc_json.put("efnotext_b", true);
566 }
567 solr_add_json.put("commitWithin", 5000);
568 solr_add_json.put("doc", solr_doc_json);
569
570 solr_update_json = new JSONObject();
571 solr_update_json.put("add",solr_add_json);
572
573 }
574 else {
575 System.err.println("Warning: empty tokenPosCount field for '" + page_id + "'");
576 }
577 }
578 else {
579 System.err.println("Warning: empty body field for '" + page_id + "'");
580 }
581
582 }
583 else {
584 System.err.println("Warning: null page for '" + page_id + "'");
585 }
586
587
588 /*
589
590 /update/json/docs
591 */
592
593 // For Reference ...
594 // Example documentation on Solr JSON syntax:
595 // https://cwiki.apache.org/confluence/display/solr/Uploading+Data+with+Index+Handlers
596 // #UploadingDatawithIndexHandlers-JSONFormattedIndexUpdates
597
598 /*
599 curl -X POST -H 'Content-Type: application/json' 'http://localhost:8983/solr/my_collection/update' --data-binary '
600 {
601 "add": {
602 "doc": {
603 "id": "DOC1",
604 "my_boosted_field": { use a map with boost/value for a boosted field
605 "boost": 2.3,
606 "value": "test"
607 },
608 "my_multivalued_field": [ "aaa", "bbb" ] Can use an array for a multi-valued field
609 }
610 },
611 "add": {
612 "commitWithin": 5000, commit this document within 5 seconds
613 "overwrite": false, don't check for existing documents with the same uniqueKey
614 "boost": 3.45, a document boost
615 "doc": {
616 "f1": "v1", Can use repeated keys for a multi-valued field
617 "f1": "v2"
618 }
619 },
620
621 "commit": {},
622 "optimize": { "waitSearcher":false },
623
624 "delete": { "id":"ID" }, delete by ID
625 "delete": { "query":"QUERY" } delete by query
626 }'
627 */
628
629 return solr_update_json;
630 }
631
632 public static ArrayList<String> generateTokenPosCountWhitelistText(String volume_id, String page_id, JSONObject ef_page,
633 boolean icu_tokenize)
634 {
635 ArrayList<String> word_list = null;
636
637 if (ef_page != null) {
638 JSONObject ef_body = ef_page.getJSONObject("body");
639 if (ef_body != null) {
640 JSONObject ef_token_pos_count = ef_body.getJSONObject("tokenPosCount");
641 word_list = getTokenPosCountWords(ef_token_pos_count,page_id,icu_tokenize);
642 }
643 else {
644 System.err.println("Warning: empty body field for '" + page_id + "'");
645 }
646
647 }
648 else {
649 System.err.println("Warning: null page for '" + page_id + "'");
650 }
651
652 return word_list;
653 }
654
655 public static ArrayList<String> generateTokenPosCountPOSLabels(String volume_id, String page_id, JSONObject ef_page)
656 {
657 ArrayList<String> word_list = null;
658
659 if (ef_page != null) {
660 JSONObject ef_body = ef_page.getJSONObject("body");
661 if (ef_body != null) {
662 JSONObject ef_token_pos_count = ef_body.getJSONObject("tokenPosCount");
663 word_list = getTokenPosCountPOSLabels(ef_token_pos_count,page_id);
664 }
665 else {
666 System.err.println("Warning: empty body field for '" + page_id + "'");
667 }
668
669 }
670 else {
671 System.err.println("Warning: null page for '" + page_id + "'");
672 }
673
674 return word_list;
675 }
676
677 public static ArrayList<String> generateTokenPosCountLangLabels(String volume_id, String page_id, JSONObject ef_page)
678 {
679 ArrayList<String> lang_list = new ArrayList<String>();;
680
681 if (ef_page != null) {
682 JSONArray ef_languages = ef_page.getJSONArray("languages");
683 if (ef_languages != null) {
684
685 int lang_len = ef_languages.length();
686 for (int i=0; i<lang_len; i++) {
687 JSONObject lang_rec = ef_languages.getJSONObject(i);
688
689 Iterator<String> lang_key_iter = lang_rec.keys();
690 while (lang_key_iter.hasNext()) {
691 String lang_label = lang_key_iter.next();
692
693 lang_list.add(lang_label);
694 }
695 }
696 }
697 else {
698 System.err.println("Warning: empty languages field for '" + page_id + "'");
699 }
700
701 }
702 else {
703 System.err.println("Warning: null page for '" + page_id + "'");
704 }
705
706 return lang_list;
707 }
708
709 public static void saveSolrDoc(JSONObject solr_add_doc_json, String output_file_json_bz2)
710 {
711 try {
712 BufferedWriter bw = ClusterFileIO.getBufferedWriterForCompressedFile(output_file_json_bz2);
713 bw.write(solr_add_doc_json.toString());
714 bw.close();
715 } catch (IOException e) {
716 e.printStackTrace();
717 } catch (CompressorException e) {
718 e.printStackTrace();
719 }
720 }
721
722 public static void postSolrDoc(String post_url, JSONObject solr_add_doc_json,
723 String volume_id, String page_id)
724 {
725
726 //String curl_popen = "curl -X POST -H 'Content-Type: application/json'";
727 //curl_popen += " 'http://10.11.0.53:8983/solr/htrc-pd-ef/update'";
728 //curl_popen += " --data-binary '";
729 //curl_popen += "'"
730
731
732 // System.out.println("Post URL: " + post_url);
733
734 try {
735 HttpURLConnection httpcon = (HttpURLConnection) ((new URL(post_url).openConnection()));
736 httpcon.setDoOutput(true);
737 httpcon.setRequestProperty("Content-Type", "application/json");
738 httpcon.setRequestProperty("Accept", "application/json");
739 httpcon.setRequestMethod("POST");
740 httpcon.connect();
741
742 byte[] outputBytes = solr_add_doc_json.toString().getBytes("UTF-8");
743 OutputStream os = httpcon.getOutputStream();
744 os.write(outputBytes);
745 os.close();
746
747
748 // Read response
749 StringBuilder sb = new StringBuilder();
750 InputStream is = httpcon.getInputStream();
751 BufferedReader in = new BufferedReader(new InputStreamReader(is));
752 String decodedString;
753 while ((decodedString = in.readLine()) != null) {
754 sb.append(decodedString);
755 }
756 in.close();
757
758 JSONObject solr_status_json = new JSONObject(sb.toString());
759 JSONObject response_header_json = solr_status_json.getJSONObject("responseHeader");
760 if (response_header_json != null) {
761 int status = response_header_json.getInt("status");
762 if (status != 0) {
763 System.err.println("Warning: POST request to " + post_url + " returned status " + status);
764 System.err.println("Full response was: " + sb);
765 }
766 }
767 else {
768 System.err.println("Failed response to Solr POST: " + sb);
769 }
770
771
772
773 }
774 catch (IOException e) {
775 System.err.println("Solr core update failed when processing id: " + volume_id + "." + page_id);
776 e.printStackTrace();
777 }
778
779 catch (Exception e) {
780 e.printStackTrace();
781 }
782 }
783}
Note: See TracBrowser for help on using the repository browser.