1 | package org.greenstone.server;
|
---|
2 |
|
---|
3 |
|
---|
4 | import java.io.Serializable;
|
---|
5 | import java.util.HashMap;
|
---|
6 |
|
---|
7 | public class GazetteerTrieNode implements Serializable
|
---|
8 | {
|
---|
9 | private static final long serialVersionUID = 8121262849365540679L;
|
---|
10 | protected boolean _nameEnd = false;
|
---|
11 | protected GazetteerTrieNode[] _children = new GazetteerTrieNode[26];
|
---|
12 | protected HashMap<Character,GazetteerTrieNode> _unicodeCharacters = null;
|
---|
13 |
|
---|
14 | /**
|
---|
15 | * Basic constructor for a non-unicode trie node
|
---|
16 | * @param nameEnd defines whether the new node represents the end of a place name
|
---|
17 | */
|
---|
18 | protected GazetteerTrieNode(boolean nameEnd)
|
---|
19 | {
|
---|
20 | for(int i = 0; i < 26; i++)
|
---|
21 | {
|
---|
22 | _children[i] = null;
|
---|
23 | }
|
---|
24 | _nameEnd = nameEnd;
|
---|
25 | }
|
---|
26 |
|
---|
27 | /**
|
---|
28 | * Add a child to the current node
|
---|
29 | * @param c is the character used to decide where to add the child node
|
---|
30 | * @param nameEnd is whether or not this character represents the end of a place name
|
---|
31 | * @return true if a child was added and false if one already existed
|
---|
32 | */
|
---|
33 | protected boolean addChild(char c, boolean nameEnd)
|
---|
34 | {
|
---|
35 | //If the characters is an ASCII letter
|
---|
36 | int index;
|
---|
37 | if(c >= 'A' && c <= 'Z')
|
---|
38 | {
|
---|
39 | index = c - 65;
|
---|
40 | }
|
---|
41 | else if(c >= 'a' && c <= 'z')
|
---|
42 | {
|
---|
43 | index = c - 97;
|
---|
44 | }
|
---|
45 | //If the character is not an ASCII letter
|
---|
46 | else
|
---|
47 | {
|
---|
48 | //If the node does not currently have a unicode character map then make one
|
---|
49 | if(_unicodeCharacters == null)
|
---|
50 | {
|
---|
51 | _unicodeCharacters = new HashMap<Character, GazetteerTrieNode>();
|
---|
52 | _unicodeCharacters.put(c, new GazetteerTrieNode(nameEnd));
|
---|
53 | return true;
|
---|
54 | }
|
---|
55 |
|
---|
56 | //Add the unicode character to the map only if it either does not exist already or signifies the end of a place name
|
---|
57 | GazetteerTrieNode value;
|
---|
58 | if((value = _unicodeCharacters.get(c)) != null)
|
---|
59 | {
|
---|
60 | if(value.isNameEnd())
|
---|
61 | {
|
---|
62 | return false;
|
---|
63 | }
|
---|
64 | else
|
---|
65 | {
|
---|
66 | value.setNameEnd(nameEnd);
|
---|
67 | }
|
---|
68 | }
|
---|
69 | else
|
---|
70 | {
|
---|
71 | _unicodeCharacters.put(c, new GazetteerTrieNode(nameEnd));
|
---|
72 | }
|
---|
73 |
|
---|
74 | return true;
|
---|
75 | }
|
---|
76 |
|
---|
77 | //Add the ascii character if it either does not already exist or it signifies the end of a place name
|
---|
78 | if(_children[index] != null)
|
---|
79 | {
|
---|
80 | if(_children[index].isNameEnd() || nameEnd == false)
|
---|
81 | {
|
---|
82 | return false;
|
---|
83 | }
|
---|
84 | else
|
---|
85 | {
|
---|
86 | _children[index].setNameEnd(true);
|
---|
87 | return true;
|
---|
88 | }
|
---|
89 | }
|
---|
90 | else
|
---|
91 | {
|
---|
92 | _children[index] = new GazetteerTrieNode(nameEnd);
|
---|
93 | return true;
|
---|
94 | }
|
---|
95 | }
|
---|
96 |
|
---|
97 | /**
|
---|
98 | * @return whether or not this node represents the end of a place name
|
---|
99 | */
|
---|
100 | protected boolean isNameEnd()
|
---|
101 | {
|
---|
102 | return _nameEnd;
|
---|
103 | }
|
---|
104 |
|
---|
105 | /**
|
---|
106 | * Set whether or not this node represents the end of a place name
|
---|
107 | * @param nameEnd is the value to set it to
|
---|
108 | */
|
---|
109 | protected void setNameEnd(boolean nameEnd)
|
---|
110 | {
|
---|
111 | _nameEnd = nameEnd;
|
---|
112 | }
|
---|
113 |
|
---|
114 | /**
|
---|
115 | * Returns the child at the given index
|
---|
116 | * @param index is the index of the child to return
|
---|
117 | * @return the child at the given index
|
---|
118 | */
|
---|
119 | protected GazetteerTrieNode getChild(char c)
|
---|
120 | {
|
---|
121 | int index = -1;
|
---|
122 | if(c >= 'A' && c <= 'Z')
|
---|
123 | {
|
---|
124 | index = c - 65;
|
---|
125 | }
|
---|
126 | else if(c >= 'a' && c <= 'z')
|
---|
127 | {
|
---|
128 | index = c - 97;
|
---|
129 | }
|
---|
130 | if(index == -1)
|
---|
131 | {
|
---|
132 | if(_unicodeCharacters == null)
|
---|
133 | {
|
---|
134 | return null;
|
---|
135 | }
|
---|
136 | return _unicodeCharacters.get(c);
|
---|
137 | }
|
---|
138 | return _children[index];
|
---|
139 | }
|
---|
140 |
|
---|
141 | public void output()
|
---|
142 | {
|
---|
143 | if(_unicodeCharacters != null && _unicodeCharacters.size() >= 10)
|
---|
144 | {
|
---|
145 | System.out.println(_unicodeCharacters.size());
|
---|
146 | }
|
---|
147 |
|
---|
148 | for(GazetteerTrieNode g : _children)
|
---|
149 | {
|
---|
150 | if(g != null)
|
---|
151 | {
|
---|
152 | g.output();
|
---|
153 | }
|
---|
154 | }
|
---|
155 | }
|
---|
156 | }
|
---|