Changeset 31509 for other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/UniversalPOSLangMap.java
- Timestamp:
- 2017-03-13T20:50:06+13:00 (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/UniversalPOSLangMap.java
r31506 r31509 11 11 import java.util.stream.Collectors; 12 12 import java.util.stream.Stream; 13 14 import scala.Tuple2; 13 15 14 16 public class UniversalPOSLangMap … … 83 85 } 84 86 85 public String getUniversalLanguagePOS (String lang_key,String opennlp_pos_key)87 public String getUniversalLanguagePOSUnchecked(String lang_key,String opennlp_pos_key) 86 88 { 87 89 String universal_pos = null; … … 90 92 if (langmap != null) { 91 93 universal_pos = langmap.get(opennlp_pos_key); 92 if (universal_pos == null) {93 String missing_lang_pos = lang_key + ":" + opennlp_pos_key;94 95 Integer mpos_freq = 0;96 if (_missing_pos.containsKey(missing_lang_pos)) {97 mpos_freq = _missing_pos.get(missing_lang_pos);98 }99 else {100 System.err.println("Warning: for language key '"+lang_key101 +"' failed to find POS '" + opennlp_pos_key + "'");102 System.err.println("Defaulting to POS 'X' (i.e., 'other')");103 }104 mpos_freq++;105 _missing_pos.put(lang_key,mpos_freq);106 107 universal_pos = "X";108 }109 94 } 110 95 … … 112 97 } 113 98 99 public String getUniversalLanguagePOSChecked(String lang_key,String opennlp_pos_key) 100 { 101 if (!_all_langmaps.containsKey(lang_key)) { 102 // Not a language with a POS map 103 return ""; 104 } 105 106 String universal_pos = null; 107 108 HashMap<String,String> langmap = _all_langmaps.get(lang_key); 109 universal_pos = langmap.get(opennlp_pos_key); 110 111 if (universal_pos == null) { 112 String missing_lang_pos = lang_key + ":" + opennlp_pos_key; 113 114 Integer mpos_freq = 0; 115 if (_missing_pos.containsKey(missing_lang_pos)) { 116 mpos_freq = _missing_pos.get(missing_lang_pos); 117 } 118 else { 119 System.err.println("Warning: for language key '"+lang_key 120 +"' failed to find POS '" + opennlp_pos_key + "'"); 121 System.err.println("Defaulting to POS 'X' (i.e., 'other')"); 122 } 123 mpos_freq++; 124 _missing_pos.put(missing_lang_pos,mpos_freq); 125 126 universal_pos = "X"; 127 } 128 129 return universal_pos; 130 } 131 132 public Tuple2<String,String> getUniversalLanguagePOSPair(String[] lang_keys,String opennlp_pos_key) 133 { 134 String universal_pos = null; 135 String selected_lang = null; 136 137 for (int li=0; li<lang_keys.length; li++) { 138 String lang_key = lang_keys[li]; 139 140 universal_pos = getUniversalLanguagePOSUnchecked(lang_key,opennlp_pos_key); 141 if (universal_pos != null) { 142 selected_lang = lang_key; 143 break; 144 } 145 } 146 147 if (universal_pos == null) { 148 // Failed to any match in any of the given languages 149 // => Lock onto the first language (highest probability when modeled) 150 selected_lang = lang_keys[0]; 151 152 if (!_all_langmaps.containsKey(selected_lang)) { 153 // Not a language with a POS map 154 return new Tuple2<String,String>(selected_lang,null); 155 } 156 157 // If here, then is a POS language => default to "X" 158 159 String missing_lang_pos = selected_lang + ":" + opennlp_pos_key; 160 161 Integer mpos_freq = 0; 162 if (_missing_pos.containsKey(missing_lang_pos)) { 163 mpos_freq = _missing_pos.get(missing_lang_pos); 164 } 165 else { 166 System.err.println("Warning: for language key '"+selected_lang 167 +"' failed to find POS '" + opennlp_pos_key + "'"); 168 System.err.println("Defaulting to POS 'X' (i.e., 'other')"); 169 } 170 mpos_freq++; 171 _missing_pos.put(missing_lang_pos,mpos_freq); 172 173 universal_pos = "X"; 174 } 175 176 return new Tuple2<String,String>(selected_lang,universal_pos); 177 } 114 178 }
Note:
See TracChangeset
for help on using the changeset viewer.