Changeset 31509 for other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/SolrDocJSON.java
- Timestamp:
- 2017-03-13T20:50:06+13:00 (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/SolrDocJSON.java
r31505 r31509 19 19 import org.json.JSONArray; 20 20 import org.json.JSONObject; 21 22 import scala.Tuple2; 23 21 24 import org.apache.lucene.analysis.TokenStream; 22 25 import org.apache.lucene.analysis.Tokenizer; … … 388 391 int text_len = text_al.size(); 389 392 393 /* 390 394 for (int li=0; li<lang_len; li++) { 391 395 String lang_key = lang_list[li]; … … 393 397 if (universal_langmap.containsLanguage(lang_key)) 394 398 { 399 */ 395 400 HashMap<String,JSONArray> pos_lang_text_field_map = new HashMap<String,JSONArray>(); 396 401 … … 405 410 String opennlp_pos_key = pos_tags[pti]; 406 411 407 String upos = universal_langmap.getUniversalLanguagePOS(lang_key, opennlp_pos_key); 408 String pos_lang_text_field = lang_key + "_" + upos + "_htrctoken"; 412 Tuple2<String,String> lang_pos_pair = universal_langmap.getUniversalLanguagePOSPair(lang_list, opennlp_pos_key); 413 String selected_lang = lang_pos_pair._1; 414 String upos = lang_pos_pair._2; 415 416 String pos_lang_text_field = selected_lang; 417 if (upos != null) { 418 pos_lang_text_field += "_" + upos; 419 } 420 pos_lang_text_field += "_htrctoken"; 409 421 410 422 if (!pos_lang_text_field_map.containsKey(pos_lang_text_field)) { … … 424 436 solr_doc_json.put(lang_text_field, json_values); 425 437 } 438 /* 426 439 } 427 440 else { … … 440 453 441 454 } 442 455 */ 443 456 } 444 457 }
Note:
See TracChangeset
for help on using the changeset viewer.