source: other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/UniversalPOSLangMap.java@ 31377

Last change on this file since 31377 was 31377, checked in by davidb, 7 years ago

Switch to using URI not string

File size: 2.2 KB
Line 
1package org.hathitrust.extractedfeatures;
2
3import java.io.File;
4import java.io.IOException;
5import java.net.URI;
6import java.nio.file.Files;
7import java.nio.file.Path;
8import java.nio.file.Paths;
9import java.util.HashMap;
10import java.util.List;
11import java.util.stream.Collectors;
12import java.util.stream.Stream;
13
14public class UniversalPOSLangMap
15{
16
17 protected HashMap<String,HashMap<String,String>> _all_langmaps;
18
19 public UniversalPOSLangMap(String langmap_directory) {
20 System.out.println("Constructing: UniversalPOS Language Map");
21
22 _all_langmaps = new HashMap<String,HashMap<String,String>>();
23
24 List<Path> langmap_paths = null;
25
26 URI langmap_directory_uri = null;
27
28 try {
29 langmap_directory_uri = new URI(langmap_directory);
30 }
31 catch (Exception e) {
32 e.printStackTrace();
33 }
34 try (Stream<Path> stream_paths = Files.walk(Paths.get(langmap_directory_uri))) {
35 langmap_paths = stream_paths
36 .filter(Files::isRegularFile)
37 .collect(Collectors.toList());
38
39 } catch (IOException e) {
40 e.printStackTrace();
41 }
42
43 // For-each language file
44 langmap_paths.forEach(langmap_path -> {
45 File langmap_file = langmap_path.toFile();
46 String lang_key = langmap_file.getName().substring(0,2);
47
48 HashMap<String,String> pos_lookup = new HashMap<String,String>();
49
50 // For-each line within that language file
51 try (Stream<String> lang_lines = Files.lines(langmap_path)) {
52 lang_lines.forEach(line -> {
53 String[] line_parts = line.split("\\t");
54 if (line_parts.length == 2) {
55 String pos_key = line_parts[0];
56 String pos_val = line_parts[1];
57 pos_lookup.put(pos_key, pos_val);
58 }
59 });
60 } catch (IOException e) {
61 e.printStackTrace();
62 }
63
64 _all_langmaps.put(lang_key, pos_lookup);
65 });
66
67
68
69
70 }
71 public boolean containsLanguage(String lang_key)
72 {
73 return _all_langmaps.containsKey(lang_key);
74 }
75
76 public String getUniversalLanguagePOS(String lang_key,String opennlp_pos_key)
77 {
78 String universal_pos = null;
79
80 HashMap<String,String> langmap = _all_langmaps.get(lang_key);
81 if (langmap != null) {
82 universal_pos = langmap.get(opennlp_pos_key);
83 }
84
85 return universal_pos;
86 }
87
88}
Note: See TracBrowser for help on using the repository browser.