[17156] | 1 | package org.greenstone3.ms;
|
---|
| 2 |
|
---|
| 3 | import java.util.ArrayList;
|
---|
| 4 | import java.util.Arrays;
|
---|
| 5 | import java.util.Collection;
|
---|
| 6 | import java.util.Comparator;
|
---|
| 7 | import java.util.HashMap;
|
---|
| 8 | import java.util.Iterator;
|
---|
| 9 | import java.util.Map;
|
---|
| 10 | import java.util.Set;
|
---|
| 11 | import javax.xml.parsers.DocumentBuilder;
|
---|
| 12 | import javax.xml.parsers.DocumentBuilderFactory;
|
---|
| 13 | import java.io.File;
|
---|
| 14 | import java.math.BigDecimal;
|
---|
| 15 |
|
---|
| 16 | import org.w3c.dom.Document;
|
---|
| 17 | import org.w3c.dom.Element;
|
---|
| 18 | import org.w3c.dom.NamedNodeMap;
|
---|
| 19 | import org.w3c.dom.Node;
|
---|
| 20 | import org.w3c.dom.NodeList;
|
---|
| 21 | import org.greenstone.gsdl3.core.MessageRouter;
|
---|
| 22 | import org.greenstone.gsdl3.util.XMLConverter;
|
---|
| 23 |
|
---|
| 24 | public class DataMaker {
|
---|
| 25 |
|
---|
| 26 | MetadataStats ms;
|
---|
| 27 | ArrayList nameList;
|
---|
| 28 | private int Mode;
|
---|
| 29 | private int TotalDoc;
|
---|
| 30 | private String path;
|
---|
| 31 |
|
---|
| 32 | protected Document doc=null;
|
---|
| 33 | protected MessageRouter mr = null;
|
---|
| 34 | protected XMLConverter converter=null;
|
---|
| 35 | private ArrayList removedID = new ArrayList();
|
---|
| 36 | private HashMap elementMap = new HashMap();
|
---|
| 37 | private static final int DEF_DIV_SCALE = 10;
|
---|
| 38 |
|
---|
| 39 | public DataMaker(MetadataStats arg1){
|
---|
| 40 | ms = arg1;
|
---|
| 41 | nameList = arg1.metadataNameList;
|
---|
| 42 | path = arg1.myNewDir;
|
---|
| 43 | setTotalDocNumber();
|
---|
| 44 | }
|
---|
| 45 |
|
---|
| 46 |
|
---|
| 47 | private Element getRootNode(String core_element){
|
---|
| 48 |
|
---|
| 49 | if(elementMap.containsKey(core_element)){
|
---|
| 50 | return (Element)elementMap.get(core_element);
|
---|
| 51 | }
|
---|
| 52 | try{
|
---|
| 53 | DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
|
---|
| 54 | DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
|
---|
| 55 | Document doc = docBuilder.newDocument();
|
---|
| 56 | doc = docBuilder.parse (new File(path+"/"+core_element+".xml"));
|
---|
| 57 | Element rootNode = doc.getDocumentElement();
|
---|
| 58 | //elementMap.put(core_element, rootNode);
|
---|
| 59 | return rootNode;
|
---|
| 60 | }catch (Exception e) {
|
---|
| 61 | e.printStackTrace();
|
---|
| 62 | return null;
|
---|
| 63 | }
|
---|
| 64 | }
|
---|
| 65 |
|
---|
| 66 |
|
---|
| 67 | public void setTotalDocNumber(){
|
---|
| 68 | Element ex = getRootNode("archivedir");
|
---|
| 69 | NodeList listOfFrequency = ex.getElementsByTagName("Document");
|
---|
| 70 | TotalDoc = listOfFrequency.getLength();
|
---|
| 71 | }
|
---|
| 72 |
|
---|
| 73 | public int getTotalElementUsed(){
|
---|
| 74 |
|
---|
| 75 | int totalNumber = 0;
|
---|
| 76 |
|
---|
| 77 | for(int i = 0 ; i<nameList.size(); i++){
|
---|
| 78 |
|
---|
| 79 | Element ex = getRootNode((String)nameList.get(i));
|
---|
| 80 | NodeList listOfFrequency = ex.getElementsByTagName("Frequency");
|
---|
| 81 |
|
---|
| 82 | for(int s=0; s<listOfFrequency.getLength() ; s++){
|
---|
| 83 | Node FrequencyNode = listOfFrequency.item(s);
|
---|
| 84 | NodeList textFNList = FrequencyNode.getChildNodes();
|
---|
| 85 | String TextNode = textFNList.item(0).getNodeValue();
|
---|
| 86 | totalNumber = totalNumber + Integer.parseInt(TextNode);
|
---|
| 87 | }
|
---|
| 88 | }
|
---|
| 89 | return totalNumber;
|
---|
| 90 | }
|
---|
| 91 |
|
---|
| 92 | public int getFrequency(String name){
|
---|
| 93 |
|
---|
| 94 | int totalNumber = 0;
|
---|
| 95 |
|
---|
| 96 | Element ex = getRootNode(name);
|
---|
| 97 | NodeList listOfFrequency = ex.getElementsByTagName("Frequency");
|
---|
| 98 | if(listOfFrequency.getLength()==0){return 0;}
|
---|
| 99 |
|
---|
| 100 | for(int s=0; s<listOfFrequency.getLength() ; s++){
|
---|
| 101 | Node FrequencyNode = listOfFrequency.item(s);
|
---|
| 102 | NodeList textFNList = FrequencyNode.getChildNodes();
|
---|
| 103 | String TextNode = textFNList.item(0).getNodeValue();
|
---|
| 104 | int x = Integer.parseInt(TextNode);
|
---|
| 105 | totalNumber = totalNumber + x;
|
---|
| 106 | }
|
---|
| 107 | return totalNumber;
|
---|
| 108 | }
|
---|
| 109 |
|
---|
| 110 | public int getDistinctNumber(String name){
|
---|
| 111 |
|
---|
| 112 | ArrayList alist = new ArrayList();
|
---|
| 113 | Element ex = getRootNode(name);
|
---|
| 114 | NodeList listOfFrequency = ex.getElementsByTagName("ActualValue");
|
---|
| 115 |
|
---|
| 116 | if(listOfFrequency.getLength()==0){
|
---|
| 117 | return 0;
|
---|
| 118 | }
|
---|
| 119 |
|
---|
| 120 | for(int s=0; s<listOfFrequency.getLength() ; s++){
|
---|
| 121 | Node ActualValueNode = listOfFrequency.item(s);
|
---|
| 122 | NodeList textFNList = ActualValueNode.getChildNodes();
|
---|
| 123 | String TextNode = textFNList.item(0).getNodeValue();
|
---|
| 124 |
|
---|
| 125 | if(!alist.contains(TextNode) && !TextNode.equals(" ")){
|
---|
| 126 | alist.add(TextNode);
|
---|
| 127 | }
|
---|
| 128 | }
|
---|
| 129 | return alist.size();
|
---|
| 130 | }
|
---|
| 131 |
|
---|
| 132 | public int getDocumentUsedElement(String core_element){
|
---|
| 133 |
|
---|
| 134 | int totalNumber = 0;
|
---|
| 135 | Element ex = getRootNode(core_element);
|
---|
| 136 | NodeList listOfFrequency = ex.getElementsByTagName("Frequency");
|
---|
| 137 | for(int s=0; s<listOfFrequency.getLength() ; s++){
|
---|
| 138 | totalNumber++;
|
---|
| 139 | }
|
---|
| 140 | return totalNumber;
|
---|
| 141 | }
|
---|
| 142 |
|
---|
| 143 | // for all elements
|
---|
| 144 | public double Mean(String core_element){
|
---|
| 145 |
|
---|
| 146 | int due = getDocumentUsedElement(core_element);
|
---|
| 147 |
|
---|
| 148 | if(due==0 || TotalDoc==0){return 0;}
|
---|
| 149 | Double d1 = new Double(due);
|
---|
| 150 | Double d2 = new Double(TotalDoc);
|
---|
| 151 | Double result = div(d1,d2);
|
---|
| 152 | result = mul(result, new Double(100));
|
---|
| 153 | return round(result.doubleValue(),1);
|
---|
| 154 | }
|
---|
| 155 |
|
---|
| 156 | public int getMinRange(String core_element){
|
---|
| 157 |
|
---|
| 158 | Element ex = getRootNode(core_element);
|
---|
| 159 | NodeList listOfFrequency = ex.getElementsByTagName("Frequency");
|
---|
| 160 |
|
---|
| 161 | if(listOfFrequency.getLength()==0){return 0;}
|
---|
| 162 |
|
---|
| 163 | Node FrequencyNode = listOfFrequency.item(0);
|
---|
| 164 | NodeList textFNList = FrequencyNode.getChildNodes();
|
---|
| 165 | String TextNode = textFNList.item(0).getNodeValue();
|
---|
| 166 | int minNumber = 0;
|
---|
| 167 |
|
---|
| 168 | if(listOfFrequency.getLength()==TotalDoc){
|
---|
| 169 | minNumber = Integer.parseInt(TextNode);
|
---|
| 170 | }
|
---|
| 171 |
|
---|
| 172 | else {
|
---|
| 173 | minNumber = 0;
|
---|
| 174 | }
|
---|
| 175 |
|
---|
| 176 | for(int s=0; s<listOfFrequency.getLength() ; s++){
|
---|
| 177 | FrequencyNode = listOfFrequency.item(s);
|
---|
| 178 | textFNList = FrequencyNode.getChildNodes();
|
---|
| 179 | TextNode = textFNList.item(0).getNodeValue();
|
---|
| 180 | int x = Integer.parseInt(TextNode);
|
---|
| 181 | if(x<minNumber){minNumber=x;}
|
---|
| 182 | }
|
---|
| 183 | return minNumber;
|
---|
| 184 | }
|
---|
| 185 |
|
---|
| 186 | public int getMaxRange(String core_element){
|
---|
| 187 |
|
---|
| 188 | Element ex = getRootNode(core_element);
|
---|
| 189 | NodeList listOfFrequency = ex.getElementsByTagName("Frequency");
|
---|
| 190 | if(listOfFrequency.getLength()==0){return 0;}
|
---|
| 191 | Node FrequencyNode = listOfFrequency.item(0);
|
---|
| 192 | NodeList textFNList = FrequencyNode.getChildNodes();
|
---|
| 193 | String TextNode = textFNList.item(0).getNodeValue();
|
---|
| 194 | int maxNumber = 0;
|
---|
| 195 |
|
---|
| 196 | for(int s=0; s<listOfFrequency.getLength() ; s++){
|
---|
| 197 | FrequencyNode = listOfFrequency.item(s);
|
---|
| 198 | textFNList = FrequencyNode.getChildNodes();
|
---|
| 199 | TextNode = textFNList.item(0).getNodeValue();
|
---|
| 200 | int x = Integer.parseInt(TextNode);
|
---|
| 201 | if(x>maxNumber){maxNumber=x;}
|
---|
| 202 | }
|
---|
| 203 | return maxNumber;
|
---|
| 204 | }
|
---|
| 205 |
|
---|
| 206 | public int getMode(String core_element){
|
---|
| 207 |
|
---|
| 208 | Element ex = getRootNode(core_element);
|
---|
| 209 | NodeList listOfFrequency = ex.getElementsByTagName("Document");
|
---|
| 210 | if(listOfFrequency.getLength()==0){Mode = 0; return 0;}
|
---|
| 211 | ArrayList alist = new ArrayList();
|
---|
| 212 | String[] idsx = getDocumentIDs(core_element);
|
---|
| 213 |
|
---|
| 214 | for(int i = 0; i<idsx.length; i++){
|
---|
| 215 | alist.add(idsx[i]);
|
---|
| 216 | }
|
---|
| 217 | int[] list = new int[TotalDoc];
|
---|
| 218 |
|
---|
| 219 | for(int i = 0; i<list.length; i++){
|
---|
| 220 | list[i] = 0;
|
---|
| 221 | }
|
---|
| 222 |
|
---|
| 223 | for(int s=0; s< listOfFrequency.getLength() ; s++){
|
---|
| 224 | Node docNode = listOfFrequency.item(s);
|
---|
| 225 | NamedNodeMap NodeIDMap = docNode.getAttributes();
|
---|
| 226 | Node DocNodeID = NodeIDMap.item(0);
|
---|
| 227 | String DocID = DocNodeID.getNodeValue();
|
---|
| 228 | Element xNode = (Element)docNode;
|
---|
| 229 | int location = alist.indexOf(DocID);
|
---|
| 230 | NodeList xList = xNode.getElementsByTagName("Frequency");
|
---|
| 231 | int fre = Integer.parseInt(xList.item(0).getChildNodes().item(0).getNodeValue());
|
---|
| 232 | list[location] = fre;
|
---|
| 233 | }
|
---|
| 234 |
|
---|
| 235 | Arrays.sort(list);
|
---|
| 236 |
|
---|
| 237 | int max_idx = 0; // Index of the maximum count
|
---|
| 238 | int max_cnt = 0;
|
---|
| 239 | int count = 0;
|
---|
| 240 |
|
---|
| 241 | for ( int i = 0; i <list.length; i++) {
|
---|
| 242 | count = 0;
|
---|
| 243 | for ( int j = 0; j < list.length; j++) {
|
---|
| 244 | if (list[i] == list[j]) {
|
---|
| 245 | count++;
|
---|
| 246 | }
|
---|
| 247 | }
|
---|
| 248 | if (count > max_cnt) {
|
---|
| 249 | max_cnt = count;
|
---|
| 250 | max_idx = i;
|
---|
| 251 | }
|
---|
| 252 | }
|
---|
| 253 | Mode = list [max_idx];
|
---|
| 254 | return list [max_idx];
|
---|
| 255 |
|
---|
| 256 | }
|
---|
| 257 |
|
---|
| 258 | public double ModeFrequency(String core_element){
|
---|
| 259 |
|
---|
| 260 | Element ex = getRootNode(core_element);
|
---|
| 261 | NodeList listOfFrequency = ex.getElementsByTagName("Document");
|
---|
| 262 | if(listOfFrequency.getLength()==0){ return 100;}
|
---|
| 263 | ArrayList alist = new ArrayList();
|
---|
| 264 | String[] idsx = getDocumentIDs(core_element);
|
---|
| 265 |
|
---|
| 266 | for(int i = 0; i<idsx.length; i++){
|
---|
| 267 | alist.add(idsx[i]);
|
---|
| 268 | }
|
---|
| 269 |
|
---|
| 270 | int[] list = new int[TotalDoc];
|
---|
| 271 |
|
---|
| 272 | for(int i = 0; i<list.length; i++){
|
---|
| 273 | list[i] = 0;
|
---|
| 274 | }
|
---|
| 275 |
|
---|
| 276 | int length = alist.size();
|
---|
| 277 | int counter = 0;
|
---|
| 278 |
|
---|
| 279 | for(int s=0; s< listOfFrequency.getLength() ; s++){
|
---|
| 280 | Node docNode = listOfFrequency.item(s);
|
---|
| 281 | NamedNodeMap NodeIDMap = docNode.getAttributes();
|
---|
| 282 | Node DocNodeID = NodeIDMap.item(0);
|
---|
| 283 | String DocID = DocNodeID.getNodeValue();
|
---|
| 284 | Element xNode = (Element)docNode;
|
---|
| 285 | int location = alist.indexOf(DocID);
|
---|
| 286 | NodeList xList = xNode.getElementsByTagName("Frequency");
|
---|
| 287 | int fre = Integer.parseInt(xList.item(0).getChildNodes().item(0).getNodeValue());
|
---|
| 288 | list[location] = fre;
|
---|
| 289 | }
|
---|
| 290 |
|
---|
| 291 | for(int i =0; i<list.length; i++){
|
---|
| 292 | if(list[i]==Mode){counter++;}
|
---|
| 293 | }
|
---|
| 294 |
|
---|
| 295 | Double result = div(new Double(counter), new Double(length));
|
---|
| 296 | result = mul(result ,new Double (100));
|
---|
| 297 | return round(result.doubleValue(),1);
|
---|
| 298 | }
|
---|
| 299 |
|
---|
| 300 | public double Median(String core_element){
|
---|
| 301 |
|
---|
| 302 | Element ex = getRootNode(core_element);
|
---|
| 303 | NodeList listOfFrequency = ex.getElementsByTagName("Document");
|
---|
| 304 |
|
---|
| 305 | ArrayList alist = new ArrayList();
|
---|
| 306 | String[] idsx = getDocumentIDs(core_element);
|
---|
| 307 |
|
---|
| 308 | for(int i = 0; i<idsx.length; i++){
|
---|
| 309 | alist.add(idsx[i]);
|
---|
| 310 | }
|
---|
| 311 |
|
---|
| 312 | int[] list = new int[TotalDoc];
|
---|
| 313 |
|
---|
| 314 | for(int i = 0; i<list.length; i++){
|
---|
| 315 | list[i] = 0;
|
---|
| 316 | }
|
---|
| 317 |
|
---|
| 318 | for(int s=0; s< listOfFrequency.getLength() ; s++){
|
---|
| 319 | Node docNode = listOfFrequency.item(s);
|
---|
| 320 | NamedNodeMap NodeIDMap = docNode.getAttributes();
|
---|
| 321 | Node DocNodeID = NodeIDMap.item(0);
|
---|
| 322 | String DocID = DocNodeID.getNodeValue();
|
---|
| 323 | Element xNode = (Element)docNode;
|
---|
| 324 | int location = alist.indexOf(DocID);
|
---|
| 325 | NodeList xList = xNode.getElementsByTagName("Frequency");
|
---|
| 326 | int fre = Integer.parseInt(xList.item(0).getChildNodes().item(0).getNodeValue());
|
---|
| 327 | list[location] = fre;
|
---|
| 328 | }
|
---|
| 329 |
|
---|
| 330 | int length = alist.size();
|
---|
| 331 | int middle = length/2 -1;
|
---|
| 332 |
|
---|
| 333 | Arrays.sort(list);
|
---|
| 334 |
|
---|
| 335 | if(length % 2 == 1){
|
---|
| 336 | middle = middle + 1;
|
---|
| 337 | return list[middle];
|
---|
| 338 | }
|
---|
| 339 |
|
---|
| 340 | else{
|
---|
| 341 | return round((double)(list[middle]+list[middle+1])/2,1);
|
---|
| 342 | }
|
---|
| 343 | }
|
---|
| 344 |
|
---|
| 345 | // for all elements
|
---|
| 346 | public double Average(String core_element){
|
---|
| 347 |
|
---|
| 348 | int t1 = getFrequency(core_element);
|
---|
| 349 | int t2 = getDocumentUsedElement(core_element);
|
---|
| 350 | if(t1==0 || t2==0){return 0;}
|
---|
| 351 | Double result = div(new Double(t1),new Double(t2));
|
---|
| 352 | return round(result.doubleValue(),1);
|
---|
| 353 | }
|
---|
| 354 |
|
---|
| 355 | public Object[][] AllInformation(){
|
---|
| 356 |
|
---|
| 357 | int rows = nameList.size();
|
---|
| 358 | int cols = 11;
|
---|
| 359 | int y = 0;
|
---|
| 360 |
|
---|
| 361 | Object[][] info = new Object[rows][cols];
|
---|
| 362 | String[] list = new String[rows];
|
---|
| 363 |
|
---|
| 364 | for(int i = 0 ; i < list.length; i++){
|
---|
| 365 | list[i] = nameList.get(i).toString();
|
---|
| 366 | }
|
---|
| 367 |
|
---|
| 368 | Arrays.sort(list);
|
---|
| 369 |
|
---|
| 370 | for(int iu = 0; iu<list.length; iu++){
|
---|
| 371 | String xi = list[iu];
|
---|
| 372 | info[y][0] = xi ;
|
---|
| 373 | info[y][1] = new Integer(getFrequency(xi));
|
---|
| 374 | info[y][2] = new Integer(getDocumentUsedElement(xi));
|
---|
| 375 | info[y][3] = new Double(Mean(xi));
|
---|
| 376 | info[y][4] = new Double(Median(xi));
|
---|
| 377 | info[y][5] = new Integer(getDistinctNumber(xi));
|
---|
| 378 | info[y][6] = new Integer(getMinRange(xi));
|
---|
| 379 | info[y][7] = new Integer(getMaxRange(xi));
|
---|
| 380 | info[y][8] = new Double(Average(xi));
|
---|
| 381 | info[y][9] = new Integer(getMode(xi));
|
---|
| 382 | info[y][10] = ModeFrequency(xi)+"%";
|
---|
| 383 | y++;
|
---|
| 384 | }
|
---|
| 385 | return info;
|
---|
| 386 | }
|
---|
| 387 |
|
---|
| 388 | public String[] getSortList(String core_element,String sort){
|
---|
| 389 |
|
---|
| 390 | if(sort.equals("ASCII")){
|
---|
| 391 | HashMap hp = getDistinctValueMap (core_element);
|
---|
| 392 | String[] temp = new String[hp.size()];
|
---|
| 393 | int counter = 0;
|
---|
| 394 | Set s = hp.keySet();
|
---|
| 395 | Iterator i = s.iterator();
|
---|
| 396 |
|
---|
| 397 | while(i.hasNext()){
|
---|
| 398 | temp[counter] = (String)i.next();
|
---|
| 399 | counter++;
|
---|
| 400 | }
|
---|
| 401 |
|
---|
| 402 | Arrays.sort(temp);
|
---|
| 403 | return temp;
|
---|
| 404 | }
|
---|
| 405 |
|
---|
| 406 | else{
|
---|
| 407 | Map m = getDistinctValueMap (core_element);
|
---|
| 408 | ArrayList outputList = sortMap(m);
|
---|
| 409 | String[] temp = new String[outputList.size()];
|
---|
| 410 |
|
---|
| 411 | for(int i = 0; i< outputList.size(); i++){
|
---|
| 412 | Map.Entry entry = (Map.Entry) outputList.get(i);
|
---|
| 413 | temp[i] = (String) entry.getKey();
|
---|
| 414 | }
|
---|
| 415 |
|
---|
| 416 | return temp;
|
---|
| 417 | }
|
---|
| 418 |
|
---|
| 419 | }
|
---|
| 420 |
|
---|
| 421 |
|
---|
| 422 | /*
|
---|
| 423 | * Actually, we can directly access to "ActualValue" node instead of document node
|
---|
| 424 | */
|
---|
| 425 |
|
---|
| 426 | public HashMap getDistinctValueMap(String core_element){
|
---|
| 427 |
|
---|
| 428 | Element ex = getRootNode(core_element);
|
---|
| 429 | HashMap hp = new HashMap();
|
---|
| 430 | NodeList listOfFrequency = ex.getElementsByTagName("Document");
|
---|
| 431 |
|
---|
| 432 | for(int s=0; s<listOfFrequency.getLength() ; s++){
|
---|
| 433 | Node docNode = listOfFrequency.item(s);
|
---|
| 434 | Element docElement = (Element)docNode;
|
---|
| 435 | NodeList valueList = docElement.getElementsByTagName("ActualValue");
|
---|
| 436 |
|
---|
| 437 | for(int y = 0; y<valueList.getLength(); y++){
|
---|
| 438 | Element valueElement = (Element)valueList.item(y);
|
---|
| 439 | NodeList textFNList = valueElement.getChildNodes();
|
---|
| 440 | String text = ((Node)textFNList.item(0)).getNodeValue();
|
---|
| 441 |
|
---|
| 442 | if(!text.equals(" ")){
|
---|
| 443 | if(hp.containsKey(text)){
|
---|
| 444 | Integer i = (Integer)hp.get(text);
|
---|
| 445 | int number = i.intValue();
|
---|
| 446 | number++;
|
---|
| 447 | hp.put(text,new Integer(number));
|
---|
| 448 | }
|
---|
| 449 | else{
|
---|
| 450 | Integer i = new Integer(1);
|
---|
| 451 | hp.put(text, i);
|
---|
| 452 | }
|
---|
| 453 | }
|
---|
| 454 | }
|
---|
| 455 | }
|
---|
| 456 | return hp;
|
---|
| 457 | }
|
---|
| 458 |
|
---|
| 459 | public HashMap getDocFrequencyMap(String core_element){
|
---|
| 460 |
|
---|
| 461 | Element ex = getRootNode(core_element);
|
---|
| 462 | HashMap hp = new HashMap();
|
---|
| 463 | NodeList listOfFrequency = ex.getElementsByTagName("Document");
|
---|
| 464 |
|
---|
| 465 | for(int s=0; s<listOfFrequency.getLength() ; s++){
|
---|
| 466 |
|
---|
| 467 | Node docNode = listOfFrequency.item(s);
|
---|
| 468 | NamedNodeMap NodeMap = docNode.getAttributes();
|
---|
| 469 | Node AttributeNode = NodeMap.item(0);
|
---|
| 470 | String att_name = AttributeNode.getNodeValue();
|
---|
| 471 |
|
---|
| 472 | Element docElement = (Element)docNode;
|
---|
| 473 | NodeList valueList = docElement.getElementsByTagName("Frequency");
|
---|
| 474 | Element frequencyElement = (Element)valueList.item(0);
|
---|
| 475 | NodeList textFNList = frequencyElement.getChildNodes();
|
---|
| 476 | String text = ((Node)textFNList.item(0)).getNodeValue();
|
---|
| 477 | Integer i = new Integer(Integer.parseInt(text));
|
---|
| 478 | hp.put(att_name, i);
|
---|
| 479 | }
|
---|
| 480 | return hp;
|
---|
| 481 | }
|
---|
| 482 |
|
---|
| 483 |
|
---|
| 484 | public String[] getDocumentIDs(String core_element){
|
---|
| 485 |
|
---|
| 486 | Element ex = getRootNode("archivedir");
|
---|
| 487 | NodeList listOfFrequency = ex.getElementsByTagName("Document");
|
---|
| 488 | String[] ids = new String[listOfFrequency.getLength()];
|
---|
| 489 |
|
---|
| 490 | for(int s=0; s<listOfFrequency.getLength() ; s++){
|
---|
| 491 | Node docNode = listOfFrequency.item(s);
|
---|
| 492 | NamedNodeMap NodeMap = docNode.getAttributes();
|
---|
| 493 | Node AttributeNode = NodeMap.item(0);
|
---|
| 494 | String att_name = AttributeNode.getNodeValue();
|
---|
| 495 | ids[s] = att_name;
|
---|
| 496 | }
|
---|
| 497 | return (String[])ids.clone();
|
---|
| 498 | }
|
---|
| 499 |
|
---|
| 500 | public int[] getMetadataRows(String core_element){
|
---|
| 501 | Element ex = getRootNode(core_element);
|
---|
| 502 | //ArrayList alist = ms.getIDArray();
|
---|
| 503 | ArrayList alist = new ArrayList();
|
---|
| 504 | String[] idsx = getDocumentIDs(core_element);
|
---|
| 505 |
|
---|
| 506 | for(int i = 0; i<idsx.length; i++){
|
---|
| 507 | alist.add(idsx[i]);
|
---|
| 508 | }
|
---|
| 509 |
|
---|
| 510 | NodeList listOfFrequency = ex.getElementsByTagName("Document");
|
---|
| 511 | int[] row = new int[TotalDoc];
|
---|
| 512 |
|
---|
| 513 | for(int i = 0; i<row.length; i++){
|
---|
| 514 | row [i] = 0;
|
---|
| 515 | }
|
---|
| 516 |
|
---|
| 517 | for(int s=0; s<listOfFrequency.getLength() ; s++){
|
---|
| 518 | Node docNode = listOfFrequency.item(s);
|
---|
| 519 | int location = alist.indexOf(docNode.getAttributes().item(0).getNodeValue());
|
---|
| 520 | row[location] =1;
|
---|
| 521 |
|
---|
| 522 | }
|
---|
| 523 | return row;
|
---|
| 524 | }
|
---|
| 525 |
|
---|
| 526 | /**
|
---|
| 527 | * This method will use Arrays.sort for sorting Map
|
---|
| 528 | * @param map
|
---|
| 529 | * @return outputList of Map.Entries
|
---|
| 530 | */
|
---|
| 531 |
|
---|
| 532 | public ArrayList sortMap(Map map) {
|
---|
| 533 | ArrayList outputList = null;
|
---|
| 534 | int count = 0;
|
---|
| 535 | Set set = null;
|
---|
| 536 | Map.Entry[] entries = null;
|
---|
| 537 | // Logic:
|
---|
| 538 | // get a set from Map
|
---|
| 539 | // Build a Map.Entry[] from set
|
---|
| 540 | // Sort the list using Arrays.sort
|
---|
| 541 | // Add the sorted Map.Entries into arrayList and return
|
---|
| 542 |
|
---|
| 543 | set = (Set) map.entrySet();
|
---|
| 544 | Iterator iterator = set.iterator();
|
---|
| 545 | entries = new Map.Entry[set.size()];
|
---|
| 546 | while(iterator.hasNext()) {
|
---|
| 547 | entries[count++] = (Map.Entry) iterator.next();
|
---|
| 548 | }
|
---|
| 549 |
|
---|
| 550 | // Sort the entries with your own comparator for the values:
|
---|
| 551 | Arrays.sort(entries, new Comparator() {
|
---|
| 552 | public int compareTo(Object lhs, Object rhs) {
|
---|
| 553 | Map.Entry le = (Map.Entry)lhs;
|
---|
| 554 | Map.Entry re = (Map.Entry)rhs;
|
---|
| 555 | return ((Comparable)le.getValue()).compareTo((Comparable)re.getValue());
|
---|
| 556 | }
|
---|
| 557 |
|
---|
| 558 | public int compare(Object lhs, Object rhs) {
|
---|
| 559 | Map.Entry le = (Map.Entry)lhs;
|
---|
| 560 | Map.Entry re = (Map.Entry)rhs;
|
---|
| 561 | return ((Comparable)le.getValue()).compareTo((Comparable)re.getValue());
|
---|
| 562 | }
|
---|
| 563 | });
|
---|
| 564 |
|
---|
| 565 | outputList = new ArrayList();
|
---|
| 566 | for(int i = 0; i < entries.length; i++) {
|
---|
| 567 | outputList.add(entries[i]);
|
---|
| 568 | }
|
---|
| 569 | return outputList;
|
---|
| 570 | }//End of sortMap
|
---|
| 571 |
|
---|
| 572 | private Double div(Double d1, Double d2){
|
---|
| 573 | BigDecimal b1 = new BigDecimal(d1.toString());
|
---|
| 574 | BigDecimal b2 = new BigDecimal(d2.toString());
|
---|
| 575 | return new Double(b1.divide(b2,DEF_DIV_SCALE,BigDecimal.ROUND_HALF_UP).doubleValue());
|
---|
| 576 | }
|
---|
| 577 |
|
---|
| 578 | private Double mul(Double d1,Double d2){
|
---|
| 579 | BigDecimal b1 = new BigDecimal(d1.toString());
|
---|
| 580 | BigDecimal b2 = new BigDecimal(d2.toString());
|
---|
| 581 | return new Double(b1.multiply(b2).doubleValue());
|
---|
| 582 | }
|
---|
| 583 |
|
---|
| 584 | public double round(double v,int scale){
|
---|
| 585 | if(scale<0){
|
---|
| 586 | throw new IllegalArgumentException(
|
---|
| 587 | "The scale must be a positive integer or zero");
|
---|
| 588 | }
|
---|
| 589 | BigDecimal b = new BigDecimal(Double.toString(v));
|
---|
| 590 | BigDecimal one = new BigDecimal("1");
|
---|
| 591 | return b.divide(one,scale,BigDecimal.ROUND_HALF_UP).doubleValue();
|
---|
| 592 | }
|
---|
| 593 |
|
---|
| 594 | public double getSingleMetadataSetCompleteness(ArrayList mds_list){
|
---|
| 595 |
|
---|
| 596 | int totalElement = 0;
|
---|
| 597 | int totalElementUsed = 0;
|
---|
| 598 |
|
---|
| 599 | for(int a = 0; a<mds_list.size(); a++){
|
---|
| 600 | MetadataSet mds = (MetadataSet)mds_list.get(a);
|
---|
| 601 |
|
---|
| 602 | ArrayList alist = mds.getIndexsList();
|
---|
| 603 |
|
---|
| 604 | int length = alist.size();
|
---|
| 605 |
|
---|
| 606 | totalElement = totalElement + length * TotalDoc;
|
---|
| 607 | for(int i = 0; i<alist.size(); i++){
|
---|
| 608 | String name = (String)alist.get(i);
|
---|
| 609 |
|
---|
| 610 | totalElementUsed = totalElementUsed + getDocumentUsedElement(name);
|
---|
| 611 |
|
---|
| 612 | }
|
---|
| 613 | }
|
---|
| 614 | double x = (double)totalElementUsed/totalElement;
|
---|
| 615 | Double d1 = new Double(x);
|
---|
| 616 | Double d2 = new Double(100);
|
---|
| 617 | Double result = mul(d1,d2);
|
---|
| 618 |
|
---|
| 619 | return round(result.doubleValue(),1);
|
---|
| 620 | }
|
---|
| 621 |
|
---|
| 622 | public Object[][] getMetadataInfo(MetadataSet mds){
|
---|
| 623 |
|
---|
| 624 | ArrayList alist = mds.getIndexsList();
|
---|
| 625 | int rows = alist.size();
|
---|
| 626 | int cols = 11;
|
---|
| 627 | int y = 0;
|
---|
| 628 | Object[][] dataset = new Object[rows][cols];
|
---|
| 629 |
|
---|
| 630 | String[] list = new String[rows];
|
---|
| 631 |
|
---|
| 632 | for(int i = 0 ; i < list.length; i++){
|
---|
| 633 | list[i] = alist.get(i).toString();
|
---|
| 634 |
|
---|
| 635 | }
|
---|
| 636 |
|
---|
| 637 | Arrays.sort(list);
|
---|
| 638 |
|
---|
| 639 | for(int iu = 0; iu<list.length; iu++){
|
---|
| 640 | String xi = list[iu];
|
---|
| 641 | dataset[y][0] = xi ;
|
---|
| 642 | dataset[y][1] = new Integer(getFrequency(xi));
|
---|
| 643 | dataset[y][2] = new Integer(getDocumentUsedElement(xi));
|
---|
| 644 | dataset[y][3] = new Double (Mean(xi));
|
---|
| 645 | dataset[y][4] = new Double (Median(xi));
|
---|
| 646 | dataset[y][5] = new Integer(getDistinctNumber(xi));
|
---|
| 647 | dataset[y][6] = new Integer(getMinRange(xi));
|
---|
| 648 | dataset[y][7] = new Integer(getMaxRange(xi));
|
---|
| 649 | dataset[y][8] = new Double (Average(xi));
|
---|
| 650 | dataset[y][9] = new Integer(getMode(xi));
|
---|
| 651 | dataset[y][10] = ModeFrequency(xi)+"%";
|
---|
| 652 | y++;
|
---|
| 653 |
|
---|
| 654 | }
|
---|
| 655 | return dataset;
|
---|
| 656 | }
|
---|
| 657 |
|
---|
| 658 | public boolean IsElementEmpty(String core_element){
|
---|
| 659 |
|
---|
| 660 | int[] list = getMetadataRows(core_element);
|
---|
| 661 | boolean status = true;
|
---|
| 662 |
|
---|
| 663 | for(int i = 0; i< list.length; i++){
|
---|
| 664 | if(list[i]==1){status = false;}
|
---|
| 665 | }
|
---|
| 666 | return status;
|
---|
| 667 | }
|
---|
| 668 |
|
---|
| 669 | public boolean IsElementFull(String core_element){
|
---|
| 670 |
|
---|
| 671 | int[] list = getMetadataRows(core_element);
|
---|
| 672 | boolean status = true;
|
---|
| 673 |
|
---|
| 674 | for(int i = 0; i< list.length; i++){
|
---|
| 675 | if(list[i]==0){status = false;}
|
---|
| 676 | }
|
---|
| 677 | return status;
|
---|
| 678 | }
|
---|
| 679 |
|
---|
| 680 | public ArrayList removeDocument(ArrayList dataset, String[] ids, int number){
|
---|
| 681 | removedID = new ArrayList();
|
---|
| 682 |
|
---|
| 683 | int[] metadataLevel =(int[])dataset.get(0);
|
---|
| 684 | int docIDslength = metadataLevel.length;
|
---|
| 685 | int[][] valueMap = new int[dataset.size()][docIDslength];
|
---|
| 686 |
|
---|
| 687 | for(int i = 0; i< docIDslength; i++){
|
---|
| 688 |
|
---|
| 689 | boolean status = true;
|
---|
| 690 |
|
---|
| 691 | for(int j = 0; j<dataset.size(); j++){
|
---|
| 692 | int[] metadataLevelArray = (int[])dataset.get(j);
|
---|
| 693 | valueMap[j][i] = metadataLevelArray[i];
|
---|
| 694 | if(metadataLevelArray[i]!=number){status = false;}
|
---|
| 695 |
|
---|
| 696 | }
|
---|
| 697 | if(status == true){
|
---|
| 698 |
|
---|
| 699 | for(int j = 0; j<dataset.size(); j++){
|
---|
| 700 |
|
---|
| 701 | valueMap[j][i]=-1;
|
---|
| 702 |
|
---|
| 703 | }
|
---|
| 704 | removedID.add(ids[i]);
|
---|
| 705 | }
|
---|
| 706 | }
|
---|
| 707 | ArrayList wholeList = new ArrayList();
|
---|
| 708 |
|
---|
| 709 | for(int i = 0; i<valueMap.length; i++){
|
---|
| 710 | ArrayList numberList = new ArrayList();
|
---|
| 711 |
|
---|
| 712 | for(int j = 0; j<valueMap[i].length; j++){
|
---|
| 713 | numberList.add(new Integer(valueMap[i][j]));
|
---|
| 714 | }
|
---|
| 715 | wholeList.add(numberList);
|
---|
| 716 | }
|
---|
| 717 |
|
---|
| 718 | for(int i =0; i< wholeList.size(); i++){
|
---|
| 719 | ArrayList numberList = (ArrayList)wholeList.get(i);
|
---|
| 720 | Integer value = new Integer(-1);
|
---|
| 721 | while(numberList.contains(value)){
|
---|
| 722 | numberList.remove(value);
|
---|
| 723 | }
|
---|
| 724 | int[] valueList = new int [numberList.size()];
|
---|
| 725 |
|
---|
| 726 | for(int j = 0; j< valueList.length; j++){
|
---|
| 727 | valueList[j] = ((Integer)numberList.get(j)).intValue();
|
---|
| 728 | }
|
---|
| 729 | wholeList.remove(i);
|
---|
| 730 | wholeList.add(i,valueList);
|
---|
| 731 | }
|
---|
| 732 |
|
---|
| 733 | return wholeList;
|
---|
| 734 | }
|
---|
| 735 |
|
---|
| 736 | public ArrayList getRemovedID(){
|
---|
| 737 | return removedID;
|
---|
| 738 | }
|
---|
| 739 |
|
---|
| 740 | public int getDocNum(){
|
---|
| 741 | return TotalDoc;
|
---|
| 742 | }
|
---|
| 743 |
|
---|
| 744 | public HashMap getInternalIdentifier(String core_element){
|
---|
| 745 |
|
---|
| 746 | Element ex = getRootNode(core_element);
|
---|
| 747 | HashMap hp = new HashMap();
|
---|
| 748 | NodeList listOfFrequency = ex.getElementsByTagName("Document");
|
---|
| 749 |
|
---|
| 750 | for(int s=0; s<listOfFrequency.getLength() ; s++){
|
---|
| 751 | Node docNode = listOfFrequency.item(s);
|
---|
| 752 | String HashID = docNode.getAttributes().item(0).getNodeValue();
|
---|
| 753 | //System.out.println(HashID);
|
---|
| 754 | Element docElement = (Element)docNode;
|
---|
| 755 | NodeList valueList = docElement.getElementsByTagName("ActualValue");
|
---|
| 756 |
|
---|
| 757 | for(int y = 0; y<valueList.getLength(); y++){
|
---|
| 758 | Element valueElement = (Element)valueList.item(y);
|
---|
| 759 | NodeList textFNList = valueElement.getChildNodes();
|
---|
| 760 | String text = ((Node)textFNList.item(0)).getNodeValue();
|
---|
| 761 |
|
---|
| 762 | if(!text.equals(" ")){
|
---|
| 763 | if(hp.containsKey(text)){
|
---|
| 764 | InternalLink il = (InternalLink)hp.get(text);
|
---|
| 765 | il.increaseElement(HashID);
|
---|
| 766 | hp.put(text,il);
|
---|
| 767 | }
|
---|
| 768 | else{
|
---|
| 769 | InternalLink il = new InternalLink();
|
---|
| 770 | il.setValue(text);
|
---|
| 771 | il.increaseElement(HashID);
|
---|
| 772 | hp.put(text, il);
|
---|
| 773 | }
|
---|
| 774 | }
|
---|
| 775 | }
|
---|
| 776 | }
|
---|
| 777 | return hp;
|
---|
| 778 | }
|
---|
| 779 |
|
---|
| 780 |
|
---|
| 781 | public HashMap getIdentifierLink(String core_element){
|
---|
| 782 |
|
---|
| 783 | Element ex = getRootNode(core_element);
|
---|
| 784 | HashMap hp = new HashMap();
|
---|
| 785 | NodeList listOfFrequency = ex.getElementsByTagName("Document");
|
---|
| 786 |
|
---|
| 787 | for(int s=0; s<listOfFrequency.getLength() ; s++){
|
---|
| 788 | Node docNode = listOfFrequency.item(s);
|
---|
| 789 | String HashID = docNode.getAttributes().item(0).getNodeValue();
|
---|
| 790 | Element docElement = (Element)docNode;
|
---|
| 791 | NodeList valueList = docElement.getElementsByTagName("ActualValue");
|
---|
| 792 |
|
---|
| 793 | for(int y = 0; y<valueList.getLength(); y++){
|
---|
| 794 | Element valueElement = (Element)valueList.item(y);
|
---|
| 795 | NodeList textFNList = valueElement.getChildNodes();
|
---|
| 796 | String text = ((Node)textFNList.item(0)).getNodeValue();
|
---|
| 797 |
|
---|
| 798 | if(!text.equals(" ") && text.startsWith("http:")){
|
---|
| 799 | if(hp.containsKey(HashID)){
|
---|
| 800 | InternalLink il = (InternalLink)hp.get(HashID);
|
---|
| 801 | il.increaseElement(text);
|
---|
| 802 | hp.put(HashID,il);
|
---|
| 803 | }
|
---|
| 804 | else{
|
---|
| 805 | InternalLink il = new InternalLink();
|
---|
| 806 | il.setValue(HashID);
|
---|
| 807 | il.increaseElement(text);
|
---|
| 808 | hp.put(HashID, il);
|
---|
| 809 | }
|
---|
| 810 | }
|
---|
| 811 | }
|
---|
| 812 | }
|
---|
| 813 | return hp;
|
---|
| 814 | }
|
---|
| 815 |
|
---|
| 816 |
|
---|
| 817 | public String[] getDocumentIDList(String core_element){
|
---|
| 818 |
|
---|
| 819 | Element ex = getRootNode(core_element);
|
---|
| 820 | NodeList listOfFrequency = ex.getElementsByTagName("Document");
|
---|
| 821 | String[] ids = new String[listOfFrequency.getLength()];
|
---|
| 822 |
|
---|
| 823 | for(int s=0; s<listOfFrequency.getLength() ; s++){
|
---|
| 824 | Node docNode = listOfFrequency.item(s);
|
---|
| 825 | NamedNodeMap NodeMap = docNode.getAttributes();
|
---|
| 826 | Node AttributeNode = NodeMap.item(0);
|
---|
| 827 | String att_name = AttributeNode.getNodeValue();
|
---|
| 828 | ids[s] = att_name;
|
---|
| 829 | }
|
---|
| 830 | return (String[])ids.clone();
|
---|
| 831 | }
|
---|
| 832 |
|
---|
| 833 | }
|
---|
| 834 |
|
---|