[21927] | 1 | package org.greenstone.mat;
|
---|
[18093] | 2 |
|
---|
| 3 | import java.util.ArrayList;
|
---|
| 4 | import java.util.Arrays;
|
---|
| 5 | import java.util.Collection;
|
---|
| 6 | import java.util.Comparator;
|
---|
| 7 | import java.util.HashMap;
|
---|
| 8 | import java.util.Iterator;
|
---|
| 9 | import java.util.Map;
|
---|
| 10 | import java.util.Set;
|
---|
| 11 |
|
---|
| 12 | import javax.xml.parsers.DocumentBuilder;
|
---|
| 13 | import javax.xml.parsers.DocumentBuilderFactory;
|
---|
| 14 |
|
---|
| 15 | import java.io.File;
|
---|
| 16 | import java.io.PrintWriter;
|
---|
| 17 | import java.math.BigDecimal;
|
---|
| 18 |
|
---|
| 19 | import org.w3c.dom.Document;
|
---|
| 20 | import org.w3c.dom.Element;
|
---|
| 21 | import org.w3c.dom.NamedNodeMap;
|
---|
| 22 | import org.w3c.dom.Node;
|
---|
| 23 | import org.w3c.dom.NodeList;
|
---|
| 24 |
|
---|
| 25 | import org.greenstone.gsdl3.core.MessageRouter;
|
---|
| 26 | import org.greenstone.gsdl3.util.XMLConverter;
|
---|
| 27 |
|
---|
| 28 | public class DataMaker {
|
---|
| 29 |
|
---|
| 30 | MetadataStats ms;
|
---|
| 31 | ArrayList nameList = new ArrayList();
|
---|
| 32 |
|
---|
| 33 | private int Mode = 0;
|
---|
| 34 | private int TotalDoc = 0;
|
---|
| 35 | private String path = null;
|
---|
| 36 |
|
---|
| 37 | protected Document doc=null;
|
---|
| 38 | protected MessageRouter mr = null;
|
---|
| 39 | protected XMLConverter converter=null;
|
---|
| 40 | private ArrayList removedID = new ArrayList();
|
---|
| 41 |
|
---|
| 42 | private final String rootDocument = "archivedir";
|
---|
| 43 | private final String documentTag = "Document";
|
---|
| 44 | private final String frequencyTag ="Frequency";
|
---|
| 45 | private final String valueTag = "ActualValue";
|
---|
| 46 | private final String ASCII_sort = "ASCII";
|
---|
| 47 | private final String urlFile = "dc.Identifier";
|
---|
| 48 |
|
---|
| 49 | private static final int DEF_DIV_SCALE = 10;
|
---|
| 50 |
|
---|
| 51 | public DataMaker(MetadataStats stats){
|
---|
| 52 | ms = stats;
|
---|
| 53 | nameList = stats.metadataNameList;
|
---|
| 54 | path = stats.StatsDirectory;
|
---|
| 55 | setTotalDocNumber();
|
---|
| 56 | }
|
---|
| 57 |
|
---|
| 58 | private Element getRootNode(String core_element){
|
---|
| 59 |
|
---|
| 60 | try{
|
---|
| 61 | DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
|
---|
| 62 | DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
|
---|
| 63 | Document doc = docBuilder.newDocument();
|
---|
| 64 | doc = docBuilder.parse (new File(path+core_element+".xml"));
|
---|
| 65 | Element rootNode = doc.getDocumentElement();
|
---|
| 66 | return rootNode;
|
---|
| 67 | }catch (Exception e) {
|
---|
| 68 | e.printStackTrace();
|
---|
| 69 | return null;
|
---|
| 70 | }
|
---|
| 71 | }
|
---|
| 72 |
|
---|
| 73 | public void setTotalDocNumber(){
|
---|
| 74 | Element ex = getRootNode(rootDocument);
|
---|
| 75 | NodeList listOfFrequency = ex.getElementsByTagName(documentTag);
|
---|
| 76 | TotalDoc = listOfFrequency.getLength();
|
---|
| 77 | }
|
---|
| 78 |
|
---|
| 79 | public int getTotalDocNumber(){
|
---|
| 80 | return TotalDoc;
|
---|
| 81 | }
|
---|
| 82 |
|
---|
| 83 | public int getTotalElementUsed(){
|
---|
| 84 |
|
---|
| 85 | int totalNumber = 0;
|
---|
| 86 |
|
---|
| 87 | for(int i = 0 ; i<nameList.size(); i++){
|
---|
| 88 | Element rootElement = getRootNode((String)nameList.get(i));
|
---|
| 89 | NodeList listOfFrequency = rootElement.getElementsByTagName(frequencyTag);
|
---|
| 90 |
|
---|
| 91 | for(int a=0; a<listOfFrequency.getLength() ; a++){
|
---|
| 92 | Node FrequencyNode = listOfFrequency.item(a);
|
---|
| 93 | NodeList textFNList = FrequencyNode.getChildNodes();
|
---|
| 94 | String TextNode = textFNList.item(0).getNodeValue();
|
---|
| 95 | totalNumber = totalNumber + Integer.parseInt(TextNode);
|
---|
| 96 | }
|
---|
| 97 | }
|
---|
| 98 | return totalNumber;
|
---|
| 99 | }
|
---|
| 100 |
|
---|
| 101 | public int getFrequency(String core_element){
|
---|
| 102 |
|
---|
| 103 | int totalNumber = 0;
|
---|
| 104 | Element rootElement = getRootNode(core_element);
|
---|
| 105 | NodeList listOfFrequency = rootElement.getElementsByTagName(frequencyTag);
|
---|
| 106 |
|
---|
| 107 | if(listOfFrequency.getLength()==0){
|
---|
| 108 | return 0;
|
---|
| 109 | }
|
---|
| 110 |
|
---|
| 111 | for(int a=0; a<listOfFrequency.getLength(); a++){
|
---|
| 112 | Node FrequencyNode = listOfFrequency.item(a);
|
---|
| 113 | NodeList textFNList = FrequencyNode.getChildNodes();
|
---|
| 114 | String TextNode = textFNList.item(0).getNodeValue();
|
---|
| 115 | int count = Integer.parseInt(TextNode);
|
---|
| 116 | totalNumber = totalNumber + count;
|
---|
| 117 | }
|
---|
| 118 | return totalNumber;
|
---|
| 119 | }
|
---|
| 120 |
|
---|
| 121 | public int getDistinctNumber(String core_element){
|
---|
| 122 |
|
---|
| 123 | ArrayList alist = new ArrayList();
|
---|
| 124 | Element rootElement = getRootNode(core_element);
|
---|
| 125 | NodeList listOfFrequency = rootElement.getElementsByTagName(valueTag);
|
---|
| 126 |
|
---|
| 127 | if(listOfFrequency.getLength()==0){
|
---|
| 128 | return 0;
|
---|
| 129 | }
|
---|
| 130 |
|
---|
| 131 | for(int a=0; a<listOfFrequency.getLength(); a++){
|
---|
| 132 | Node ActualValueNode = listOfFrequency.item(a);
|
---|
| 133 | NodeList textFNList = ActualValueNode.getChildNodes();
|
---|
| 134 | String TextNode = textFNList.item(0).getNodeValue();
|
---|
| 135 |
|
---|
| 136 | if(!alist.contains(TextNode) && !TextNode.equals(" ")){
|
---|
| 137 | alist.add(TextNode);
|
---|
| 138 | }
|
---|
| 139 | }
|
---|
| 140 | return alist.size();
|
---|
| 141 | }
|
---|
| 142 |
|
---|
| 143 | public int getDocumentUsedElement(String core_element){
|
---|
| 144 |
|
---|
| 145 | int totalNumber = 0;
|
---|
| 146 | Element rootElement = getRootNode(core_element);
|
---|
| 147 | NodeList listOfFrequency = rootElement.getElementsByTagName(frequencyTag);
|
---|
| 148 |
|
---|
| 149 | for(int a=0; a<listOfFrequency.getLength(); a++){
|
---|
| 150 | totalNumber++;
|
---|
| 151 | }
|
---|
| 152 | return totalNumber;
|
---|
| 153 | }
|
---|
| 154 |
|
---|
| 155 | public double Mean(String core_element){
|
---|
| 156 |
|
---|
| 157 | int due = getDocumentUsedElement(core_element);
|
---|
| 158 |
|
---|
| 159 | if(due==0 || TotalDoc==0){
|
---|
| 160 | return 0;
|
---|
| 161 | }
|
---|
| 162 |
|
---|
| 163 | Double d1 = new Double(due);
|
---|
| 164 | Double d2 = new Double(TotalDoc);
|
---|
| 165 | Double result = div(d1,d2);
|
---|
| 166 | result = mul(result, new Double(100));
|
---|
| 167 | return round(result.doubleValue(),1);
|
---|
| 168 | }
|
---|
| 169 |
|
---|
| 170 | public int getMinRange(String core_element){
|
---|
| 171 |
|
---|
| 172 | Element rootElement = getRootNode(core_element);
|
---|
| 173 | NodeList listOfFrequency = rootElement.getElementsByTagName(frequencyTag);
|
---|
| 174 |
|
---|
| 175 | if(listOfFrequency.getLength()==0){
|
---|
| 176 | return 0;
|
---|
| 177 | }
|
---|
| 178 |
|
---|
| 179 | Node FrequencyNode = listOfFrequency.item(0);
|
---|
| 180 | NodeList textFNList = FrequencyNode.getChildNodes();
|
---|
| 181 | String TextNode = textFNList.item(0).getNodeValue();
|
---|
| 182 |
|
---|
| 183 | int minNumber = 0;
|
---|
| 184 |
|
---|
| 185 | if(listOfFrequency.getLength()==TotalDoc){
|
---|
| 186 | minNumber = Integer.parseInt(TextNode);
|
---|
| 187 | }
|
---|
| 188 |
|
---|
| 189 | else {
|
---|
| 190 | minNumber = 0;
|
---|
| 191 | }
|
---|
| 192 |
|
---|
| 193 | for(int a=0; a<listOfFrequency.getLength(); a++){
|
---|
| 194 | FrequencyNode = listOfFrequency.item(a);
|
---|
| 195 | textFNList = FrequencyNode.getChildNodes();
|
---|
| 196 | TextNode = textFNList.item(0).getNodeValue();
|
---|
| 197 | int x = Integer.parseInt(TextNode);
|
---|
| 198 | if(x<minNumber){minNumber=x;}
|
---|
| 199 | }
|
---|
| 200 | return minNumber;
|
---|
| 201 | }
|
---|
| 202 |
|
---|
| 203 | public int getMaxRange(String core_element){
|
---|
| 204 |
|
---|
| 205 | Element rootElement = getRootNode(core_element);
|
---|
| 206 | NodeList listOfFrequency = rootElement.getElementsByTagName(frequencyTag);
|
---|
| 207 |
|
---|
| 208 | if(listOfFrequency.getLength()==0){
|
---|
| 209 | return 0;
|
---|
| 210 | }
|
---|
| 211 |
|
---|
| 212 | Node FrequencyNode = listOfFrequency.item(0);
|
---|
| 213 | NodeList textFNList = FrequencyNode.getChildNodes();
|
---|
| 214 | String TextNode = textFNList.item(0).getNodeValue();
|
---|
| 215 | int maxNumber = 0;
|
---|
| 216 |
|
---|
| 217 | for(int a=0; a<listOfFrequency.getLength(); a++){
|
---|
| 218 | FrequencyNode = listOfFrequency.item(a);
|
---|
| 219 | textFNList = FrequencyNode.getChildNodes();
|
---|
| 220 | TextNode = textFNList.item(0).getNodeValue();
|
---|
| 221 | int x = Integer.parseInt(TextNode);
|
---|
| 222 | if(x>maxNumber){maxNumber=x;}
|
---|
| 223 | }
|
---|
| 224 | return maxNumber;
|
---|
| 225 | }
|
---|
| 226 |
|
---|
| 227 | public int getMode(String core_element){
|
---|
| 228 |
|
---|
| 229 | Element rootElement = getRootNode(core_element);
|
---|
| 230 | NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
|
---|
| 231 |
|
---|
| 232 | if(listOfDocument.getLength()==0){
|
---|
| 233 | Mode = 0;
|
---|
| 234 | return 0;
|
---|
| 235 | }
|
---|
| 236 |
|
---|
| 237 | ArrayList alist = new ArrayList();
|
---|
| 238 | String[] idList = getDocumentIDs(core_element);
|
---|
| 239 |
|
---|
| 240 | for(int i = 0; i<idList.length; i++){
|
---|
| 241 | alist.add(idList[i]);
|
---|
| 242 | }
|
---|
| 243 |
|
---|
| 244 | int[] list = new int[TotalDoc];
|
---|
| 245 |
|
---|
| 246 | for(int i = 0; i<list.length; i++){
|
---|
| 247 | list[i] = 0;
|
---|
| 248 | }
|
---|
| 249 |
|
---|
| 250 | for(int a=0; a<listOfDocument.getLength(); a++){
|
---|
| 251 | Node docNode = listOfDocument.item(a);
|
---|
| 252 | NamedNodeMap NodeIDMap = docNode.getAttributes();
|
---|
| 253 | Node DocNodeID = NodeIDMap.item(0);
|
---|
| 254 | String DocID = DocNodeID.getNodeValue();
|
---|
| 255 | Element xNode = (Element)docNode;
|
---|
| 256 | int location = alist.indexOf(DocID);
|
---|
| 257 | NodeList xList = xNode.getElementsByTagName(frequencyTag);
|
---|
| 258 | int frequency = Integer.parseInt(xList.item(0).getChildNodes().item(0).getNodeValue());
|
---|
| 259 | list[location] = frequency;
|
---|
| 260 | }
|
---|
| 261 |
|
---|
| 262 | Arrays.sort(list);
|
---|
| 263 |
|
---|
| 264 | int max_idx = 0; // Index of the maximum count
|
---|
| 265 | int max_cnt = 0;
|
---|
| 266 | int count = 0;
|
---|
| 267 |
|
---|
| 268 | for ( int i = 0; i <list.length; i++) {
|
---|
| 269 | count = 0;
|
---|
| 270 | for ( int j = 0; j < list.length; j++) {
|
---|
| 271 | if (list[i] == list[j]) {
|
---|
| 272 | count++;
|
---|
| 273 | }
|
---|
| 274 | }
|
---|
| 275 | if (count > max_cnt) {
|
---|
| 276 | max_cnt = count;
|
---|
| 277 | max_idx = i;
|
---|
| 278 | }
|
---|
| 279 | }
|
---|
| 280 | Mode = list [max_idx];
|
---|
| 281 | return list [max_idx];
|
---|
| 282 | }
|
---|
| 283 |
|
---|
| 284 | public double ModeFrequency(String core_element){
|
---|
| 285 |
|
---|
| 286 | Element rootElement = getRootNode(core_element);
|
---|
| 287 | NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
|
---|
| 288 |
|
---|
| 289 | if(listOfDocument.getLength()==0){
|
---|
| 290 | return 100;
|
---|
| 291 | }
|
---|
| 292 |
|
---|
| 293 | ArrayList alist = new ArrayList();
|
---|
| 294 | String[] idList = getDocumentIDs(core_element);
|
---|
| 295 |
|
---|
| 296 | for(int i = 0; i<idList.length; i++){
|
---|
| 297 | alist.add(idList[i]);
|
---|
| 298 | }
|
---|
| 299 |
|
---|
| 300 | int[] list = new int[TotalDoc];
|
---|
| 301 |
|
---|
| 302 | for(int i = 0; i<list.length; i++){
|
---|
| 303 | list[i] = 0;
|
---|
| 304 | }
|
---|
| 305 |
|
---|
| 306 | int length = alist.size();
|
---|
| 307 | int counter = 0;
|
---|
| 308 |
|
---|
| 309 | for(int a=0; a<listOfDocument.getLength(); a++){
|
---|
| 310 | Node docNode = listOfDocument.item(a);
|
---|
| 311 | NamedNodeMap NodeIDMap = docNode.getAttributes();
|
---|
| 312 | Node DocNodeID = NodeIDMap.item(0);
|
---|
| 313 | String DocID = DocNodeID.getNodeValue();
|
---|
| 314 | Element xNode = (Element)docNode;
|
---|
| 315 | int location = alist.indexOf(DocID);
|
---|
| 316 | NodeList xList = xNode.getElementsByTagName(frequencyTag);
|
---|
| 317 | int frequency = Integer.parseInt(xList.item(0).getChildNodes().item(0).getNodeValue());
|
---|
| 318 | list[location] = frequency;
|
---|
| 319 | }
|
---|
| 320 |
|
---|
| 321 | for(int i =0; i<list.length; i++){
|
---|
| 322 | if(list[i]==Mode){
|
---|
| 323 | counter++;
|
---|
| 324 | }
|
---|
| 325 | }
|
---|
| 326 |
|
---|
| 327 | Double result = div(new Double(counter), new Double(length));
|
---|
| 328 | result = mul(result ,new Double (100));
|
---|
| 329 | return round(result.doubleValue(),1);
|
---|
| 330 | }
|
---|
| 331 |
|
---|
| 332 | public double Median(String core_element){
|
---|
| 333 |
|
---|
| 334 | Element rootElement = getRootNode(core_element);
|
---|
| 335 | NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
|
---|
| 336 |
|
---|
| 337 | ArrayList alist = new ArrayList();
|
---|
| 338 | String[] idList = getDocumentIDs(core_element);
|
---|
| 339 |
|
---|
| 340 | for(int i = 0; i<idList.length; i++){
|
---|
| 341 | alist.add(idList[i]);
|
---|
| 342 | }
|
---|
| 343 |
|
---|
| 344 | int[] list = new int[TotalDoc];
|
---|
| 345 |
|
---|
| 346 | for(int i = 0; i<list.length; i++){
|
---|
| 347 | list[i] = 0;
|
---|
| 348 | }
|
---|
| 349 |
|
---|
| 350 | for(int a=0; a<listOfDocument.getLength(); a++){
|
---|
| 351 | Node docNode = listOfDocument.item(a);
|
---|
| 352 | NamedNodeMap NodeIDMap = docNode.getAttributes();
|
---|
| 353 | Node DocNodeID = NodeIDMap.item(0);
|
---|
| 354 | String DocID = DocNodeID.getNodeValue();
|
---|
| 355 | Element xNode = (Element)docNode;
|
---|
| 356 | int location = alist.indexOf(DocID);
|
---|
| 357 | NodeList xList = xNode.getElementsByTagName(frequencyTag);
|
---|
| 358 | int frequency = Integer.parseInt(xList.item(0).getChildNodes().item(0).getNodeValue());
|
---|
| 359 | list[location] = frequency;
|
---|
| 360 | }
|
---|
| 361 |
|
---|
| 362 | int length = alist.size();
|
---|
| 363 | int middle = length/2 -1;
|
---|
| 364 |
|
---|
| 365 | Arrays.sort(list);
|
---|
| 366 |
|
---|
| 367 | if(length % 2 == 1){
|
---|
| 368 | middle = middle + 1;
|
---|
| 369 | return list[middle];
|
---|
| 370 | }
|
---|
| 371 |
|
---|
| 372 | else{
|
---|
| 373 | return round((double)(list[middle]+list[middle+1])/2,1);
|
---|
| 374 | }
|
---|
| 375 | }
|
---|
| 376 |
|
---|
| 377 | public double Average(String core_element){
|
---|
| 378 |
|
---|
| 379 | int t1 = getFrequency(core_element);
|
---|
| 380 | int t2 = getDocumentUsedElement(core_element);
|
---|
| 381 |
|
---|
| 382 | if(t1==0 || t2==0){
|
---|
| 383 | return 0;
|
---|
| 384 | }
|
---|
| 385 |
|
---|
| 386 | Double result = div(new Double(t1),new Double(t2));
|
---|
| 387 | return round(result.doubleValue(),1);
|
---|
| 388 | }
|
---|
| 389 |
|
---|
| 390 | public Object[][] AllInformation(){
|
---|
| 391 |
|
---|
| 392 | int rows = nameList.size();
|
---|
| 393 | int cols = 11;
|
---|
| 394 | int y = 0;
|
---|
| 395 |
|
---|
| 396 | Object[][] info = new Object[rows][cols];
|
---|
| 397 | String[] list = new String[rows];
|
---|
| 398 |
|
---|
| 399 | for(int i = 0 ; i < list.length; i++){
|
---|
| 400 | list[i] = nameList.get(i).toString();
|
---|
| 401 | }
|
---|
| 402 |
|
---|
| 403 | Arrays.sort(list);
|
---|
| 404 |
|
---|
| 405 | for(int iu = 0; iu<list.length; iu++){
|
---|
| 406 | String xi = list[iu];
|
---|
| 407 | info[y][0] = xi ;
|
---|
| 408 | info[y][1] = new Integer(getFrequency(xi));
|
---|
| 409 | info[y][2] = new Integer(getDocumentUsedElement(xi));
|
---|
| 410 | info[y][3] = new Double(Mean(xi));
|
---|
| 411 | info[y][4] = new Double(Median(xi));
|
---|
| 412 | info[y][5] = new Integer(getDistinctNumber(xi));
|
---|
| 413 | info[y][6] = new Integer(getMinRange(xi));
|
---|
| 414 | info[y][7] = new Integer(getMaxRange(xi));
|
---|
| 415 | info[y][8] = new Double(Average(xi));
|
---|
| 416 | info[y][9] = new Integer(getMode(xi));
|
---|
| 417 | info[y][10] = ModeFrequency(xi)+"%";
|
---|
| 418 | y++;
|
---|
| 419 | }
|
---|
| 420 | return info;
|
---|
| 421 | }
|
---|
| 422 |
|
---|
| 423 | public String[] getSortList(String core_element,String sort){
|
---|
| 424 |
|
---|
| 425 | if(sort.equals(ASCII_sort)){
|
---|
| 426 | HashMap hp = getDistinctValueMap (core_element);
|
---|
| 427 | String[] temp = new String[hp.size()];
|
---|
| 428 | int counter = 0;
|
---|
| 429 | Set s = hp.keySet();
|
---|
| 430 | Iterator i = s.iterator();
|
---|
| 431 |
|
---|
| 432 | while(i.hasNext()){
|
---|
| 433 | temp[counter] = (String)i.next();
|
---|
| 434 | counter++;
|
---|
| 435 | }
|
---|
| 436 |
|
---|
| 437 | Arrays.sort(temp);
|
---|
| 438 | return temp;
|
---|
| 439 | }
|
---|
| 440 |
|
---|
| 441 | else{
|
---|
| 442 | Map m = getDistinctValueMap (core_element);
|
---|
| 443 | ArrayList outputList = sortMap(m);
|
---|
| 444 | String[] temp = new String[outputList.size()];
|
---|
| 445 |
|
---|
| 446 | for(int i = 0; i< outputList.size(); i++){
|
---|
| 447 | Map.Entry entry = (Map.Entry) outputList.get(i);
|
---|
| 448 | temp[i] = (String) entry.getKey();
|
---|
| 449 | }
|
---|
| 450 |
|
---|
| 451 | return temp;
|
---|
| 452 | }
|
---|
| 453 | }
|
---|
| 454 |
|
---|
| 455 |
|
---|
| 456 | public HashMap getDistinctValueMap(String core_element){
|
---|
| 457 |
|
---|
| 458 | Element rootElement = getRootNode(core_element);
|
---|
| 459 | HashMap hp = new HashMap();
|
---|
| 460 | NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
|
---|
| 461 |
|
---|
| 462 | for(int a=0; a<listOfDocument.getLength(); a++){
|
---|
| 463 |
|
---|
| 464 | Node docNode = listOfDocument.item(a);
|
---|
| 465 | Element docElement = (Element)docNode;
|
---|
| 466 | NodeList valueList = docElement.getElementsByTagName(valueTag);
|
---|
| 467 |
|
---|
| 468 | for(int b= 0; b<valueList.getLength(); b++){
|
---|
| 469 |
|
---|
| 470 | Element valueElement = (Element)valueList.item(b);
|
---|
| 471 | NodeList textFNList = valueElement.getChildNodes();
|
---|
| 472 | String text = ((Node)textFNList.item(0)).getNodeValue();
|
---|
| 473 |
|
---|
| 474 | if(!text.equals(" ")){
|
---|
| 475 | if(hp.containsKey(text)){
|
---|
| 476 | Integer i = (Integer)hp.get(text);
|
---|
| 477 | int number = i.intValue();
|
---|
| 478 | number++;
|
---|
| 479 | hp.put(text,new Integer(number));
|
---|
| 480 | }
|
---|
| 481 | else{
|
---|
| 482 | Integer i = new Integer(1);
|
---|
| 483 | hp.put(text, i);
|
---|
| 484 | }
|
---|
| 485 | }
|
---|
| 486 | }
|
---|
| 487 | }
|
---|
| 488 | return hp;
|
---|
| 489 | }
|
---|
| 490 |
|
---|
| 491 |
|
---|
| 492 | public HashMap getDocFrequencyMap(String core_element){
|
---|
| 493 |
|
---|
| 494 | Element rootElement = getRootNode(core_element);
|
---|
| 495 | HashMap hp = new HashMap();
|
---|
| 496 | NodeList listOfDocument= rootElement.getElementsByTagName(documentTag);
|
---|
| 497 |
|
---|
| 498 | for(int a=0; a<listOfDocument.getLength(); a++){
|
---|
| 499 |
|
---|
| 500 | Node docNode = listOfDocument.item(a);
|
---|
| 501 | NamedNodeMap NodeMap = docNode.getAttributes();
|
---|
| 502 | Node AttributeNode = NodeMap.item(0);
|
---|
| 503 | String att_name = AttributeNode.getNodeValue();
|
---|
| 504 |
|
---|
| 505 | Element docElement = (Element)docNode;
|
---|
| 506 | NodeList valueList = docElement.getElementsByTagName(frequencyTag);
|
---|
| 507 | Element frequencyElement = (Element)valueList.item(0);
|
---|
| 508 | NodeList textFNList = frequencyElement.getChildNodes();
|
---|
| 509 | String text = ((Node)textFNList.item(0)).getNodeValue();
|
---|
| 510 | Integer i = new Integer(Integer.parseInt(text));
|
---|
| 511 | hp.put(att_name, i);
|
---|
| 512 | }
|
---|
| 513 | return hp;
|
---|
| 514 | }
|
---|
| 515 |
|
---|
| 516 |
|
---|
| 517 | public String[] getDocumentIDs(String core_element){
|
---|
| 518 |
|
---|
| 519 | Element rootElement = getRootNode(rootDocument);
|
---|
| 520 | NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
|
---|
| 521 | String[] ids = new String[listOfDocument.getLength()];
|
---|
| 522 |
|
---|
| 523 | for(int a=0; a<listOfDocument.getLength(); a++){
|
---|
| 524 | Node docNode = listOfDocument.item(a);
|
---|
| 525 | NamedNodeMap NodeMap = docNode.getAttributes();
|
---|
| 526 | Node AttributeNode = NodeMap.item(0);
|
---|
| 527 | String att_name = AttributeNode.getNodeValue();
|
---|
| 528 | ids[a] = att_name;
|
---|
| 529 | }
|
---|
| 530 | return (String[])ids.clone();
|
---|
| 531 | }
|
---|
| 532 |
|
---|
| 533 | public int[] getMetadataRows(String core_element){
|
---|
| 534 | Element rootElement = getRootNode(core_element);
|
---|
| 535 | ArrayList alist = new ArrayList();
|
---|
| 536 | String[] idsx = getDocumentIDs(core_element);
|
---|
| 537 |
|
---|
| 538 | for(int i = 0; i<idsx.length; i++){
|
---|
| 539 | alist.add(idsx[i]);
|
---|
| 540 | }
|
---|
| 541 |
|
---|
| 542 | NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
|
---|
| 543 | int[] row = new int[TotalDoc];
|
---|
| 544 |
|
---|
| 545 | for(int i = 0; i<row.length; i++){
|
---|
| 546 | row [i] = 0;
|
---|
| 547 | }
|
---|
| 548 |
|
---|
| 549 | for(int a=0; a<listOfDocument.getLength(); a++){
|
---|
| 550 | Node docNode = listOfDocument.item(a);
|
---|
| 551 | int location = alist.indexOf(docNode.getAttributes().item(0).getNodeValue());
|
---|
| 552 | row[location] = 1;
|
---|
| 553 | }
|
---|
| 554 | return row;
|
---|
| 555 | }
|
---|
[18147] | 556 |
|
---|
| 557 | /**
|
---|
| 558 | * This method will use Arrays.sort for sorting Map
|
---|
| 559 | * @param map
|
---|
| 560 | * @return outputList of Map.Entries
|
---|
| 561 | */
|
---|
| 562 |
|
---|
[18093] | 563 | public ArrayList sortMap(Map map) {
|
---|
| 564 | ArrayList outputList = null;
|
---|
| 565 | int count = 0;
|
---|
| 566 | Set set = null;
|
---|
| 567 | Map.Entry[] entries = null;
|
---|
[18147] | 568 | // Logic:
|
---|
| 569 | // get a set from Map
|
---|
| 570 | // Build a Map.Entry[] from set
|
---|
| 571 | // Sort the list using Arrays.sort
|
---|
| 572 | // Add the sorted Map.Entries into arrayList and return
|
---|
[18093] | 573 |
|
---|
| 574 | set = (Set) map.entrySet();
|
---|
| 575 | Iterator iterator = set.iterator();
|
---|
| 576 | entries = new Map.Entry[set.size()];
|
---|
| 577 | while(iterator.hasNext()) {
|
---|
| 578 | entries[count++] = (Map.Entry) iterator.next();
|
---|
| 579 | }
|
---|
| 580 |
|
---|
[18147] | 581 | // Sort the entries with your own comparator for the values:
|
---|
[18093] | 582 | Arrays.sort(entries, new Comparator() {
|
---|
| 583 | public int compareTo(Object lhs, Object rhs) {
|
---|
| 584 | Map.Entry le = (Map.Entry)lhs;
|
---|
| 585 | Map.Entry re = (Map.Entry)rhs;
|
---|
| 586 | return ((Comparable)le.getValue()).compareTo((Comparable)re.getValue());
|
---|
| 587 | }
|
---|
| 588 |
|
---|
| 589 | public int compare(Object lhs, Object rhs) {
|
---|
| 590 | Map.Entry le = (Map.Entry)lhs;
|
---|
| 591 | Map.Entry re = (Map.Entry)rhs;
|
---|
| 592 | return ((Comparable)le.getValue()).compareTo((Comparable)re.getValue());
|
---|
| 593 | }
|
---|
| 594 | });
|
---|
| 595 |
|
---|
| 596 | outputList = new ArrayList();
|
---|
| 597 | for(int i = 0; i < entries.length; i++) {
|
---|
| 598 | outputList.add(entries[i]);
|
---|
| 599 | }
|
---|
| 600 | return outputList;
|
---|
[18147] | 601 | }//End of sortMap
|
---|
[18093] | 602 |
|
---|
| 603 | private Double div(Double d1, Double d2){
|
---|
| 604 | BigDecimal b1 = new BigDecimal(d1.toString());
|
---|
| 605 | BigDecimal b2 = new BigDecimal(d2.toString());
|
---|
| 606 | return new Double(b1.divide(b2,DEF_DIV_SCALE,BigDecimal.ROUND_HALF_UP).doubleValue());
|
---|
| 607 | }
|
---|
| 608 |
|
---|
| 609 | private Double mul(Double d1,Double d2){
|
---|
[18147] | 610 | //System.out.println(d1);
|
---|
| 611 | BigDecimal b1 = new BigDecimal(d1.toString());
|
---|
[18093] | 612 | BigDecimal b2 = new BigDecimal(d2.toString());
|
---|
| 613 | return new Double(b1.multiply(b2).doubleValue());
|
---|
| 614 | }
|
---|
| 615 |
|
---|
| 616 | public double round(double v,int scale){
|
---|
| 617 | if(scale<0){
|
---|
| 618 | throw new IllegalArgumentException(
|
---|
| 619 | "The scale must be a positive integer or zero");
|
---|
| 620 | }
|
---|
| 621 | BigDecimal b = new BigDecimal(Double.toString(v));
|
---|
| 622 | BigDecimal one = new BigDecimal("1");
|
---|
| 623 | return b.divide(one,scale,BigDecimal.ROUND_HALF_UP).doubleValue();
|
---|
| 624 | }
|
---|
| 625 |
|
---|
| 626 | public double getSingleMetadataSetCompleteness(ArrayList mds_list){
|
---|
| 627 |
|
---|
| 628 | int totalElement = 0;
|
---|
| 629 | int totalElementUsed = 0;
|
---|
| 630 |
|
---|
| 631 | for(int a = 0; a<mds_list.size(); a++){
|
---|
| 632 | MetadataSet mds = (MetadataSet)mds_list.get(a);
|
---|
| 633 | ArrayList alist = mds.getIndexsList();
|
---|
| 634 | int length = alist.size();
|
---|
| 635 | totalElement = totalElement + length * ms.getDocNum();
|
---|
| 636 |
|
---|
| 637 | for(int i = 0; i<alist.size(); i++){
|
---|
| 638 | String name = (String)alist.get(i);
|
---|
| 639 | totalElementUsed = totalElementUsed + getDocumentUsedElement(name);
|
---|
| 640 | }
|
---|
| 641 | }
|
---|
[18147] | 642 | //System.out.println(totalElementUsed +" "+ totalElement);
|
---|
| 643 | double x = (double)totalElementUsed/totalElement;
|
---|
| 644 | //System.out.println(x);
|
---|
| 645 | Double d1 = new Double(x);
|
---|
| 646 | //System.out.println(d1);
|
---|
| 647 | Double d2 = new Double(100);
|
---|
[18093] | 648 | Double result = mul(d1,d2);
|
---|
| 649 | return round(result.doubleValue(),1);
|
---|
| 650 | }
|
---|
| 651 |
|
---|
| 652 | public Object[][] getMetadataInfo(MetadataSet mds){
|
---|
| 653 |
|
---|
| 654 | ArrayList alist = mds.getIndexsList();
|
---|
| 655 | int rows = alist.size();
|
---|
| 656 | int cols = 11;
|
---|
| 657 | int y = 0;
|
---|
| 658 | Object[][] dataset = new Object[rows][cols];
|
---|
| 659 |
|
---|
| 660 | String[] list = new String[rows];
|
---|
| 661 |
|
---|
| 662 | for(int i = 0 ; i < list.length; i++){
|
---|
| 663 | list[i] = alist.get(i).toString();
|
---|
| 664 | }
|
---|
| 665 |
|
---|
| 666 | Arrays.sort(list);
|
---|
| 667 |
|
---|
| 668 | for(int iu = 0; iu<list.length; iu++){
|
---|
| 669 | String xi = list[iu];
|
---|
| 670 | dataset[y][0] = xi ;
|
---|
| 671 | dataset[y][1] = new Integer(getFrequency(xi));
|
---|
| 672 | dataset[y][2] = new Integer(getDocumentUsedElement(xi));
|
---|
| 673 | dataset[y][3] = new Double (Mean(xi));
|
---|
| 674 | dataset[y][4] = new Double (Median(xi));
|
---|
| 675 | dataset[y][5] = new Integer(getDistinctNumber(xi));
|
---|
| 676 | dataset[y][6] = new Integer(getMinRange(xi));
|
---|
| 677 | dataset[y][7] = new Integer(getMaxRange(xi));
|
---|
| 678 | dataset[y][8] = new Double (Average(xi));
|
---|
| 679 | dataset[y][9] = new Integer(getMode(xi));
|
---|
| 680 | dataset[y][10] = ModeFrequency(xi)+"%";
|
---|
| 681 | y++;
|
---|
| 682 |
|
---|
| 683 | }
|
---|
| 684 | return dataset;
|
---|
| 685 | }
|
---|
| 686 |
|
---|
| 687 | public boolean IsElementEmpty(String core_element){
|
---|
| 688 |
|
---|
| 689 | int[] list = getMetadataRows(core_element);
|
---|
| 690 | boolean status = true;
|
---|
| 691 |
|
---|
| 692 | for(int i=0; i<list.length; i++){
|
---|
| 693 | if(list[i]==1){status = false;}
|
---|
| 694 | }
|
---|
| 695 | return status;
|
---|
| 696 | }
|
---|
| 697 |
|
---|
| 698 | public boolean IsElementFull(String core_element){
|
---|
| 699 |
|
---|
| 700 | int[] list = getMetadataRows(core_element);
|
---|
| 701 | boolean status = true;
|
---|
| 702 |
|
---|
| 703 | for(int i=0; i<list.length; i++){
|
---|
| 704 | if(list[i]==0){status = false;}
|
---|
| 705 | }
|
---|
| 706 | return status;
|
---|
| 707 | }
|
---|
| 708 |
|
---|
| 709 | public ArrayList removeDocument(ArrayList dataset, String[] ids, int number){
|
---|
| 710 | removedID = new ArrayList();
|
---|
| 711 |
|
---|
| 712 | int[] metadataLevel =(int[])dataset.get(0);
|
---|
| 713 | int docIDslength = metadataLevel.length;
|
---|
| 714 | int[][] valueMap = new int[dataset.size()][docIDslength];
|
---|
| 715 |
|
---|
| 716 | for(int i = 0; i< docIDslength; i++){
|
---|
| 717 |
|
---|
| 718 | boolean status = true;
|
---|
| 719 |
|
---|
| 720 | for(int j = 0; j<dataset.size(); j++){
|
---|
| 721 | int[] metadataLevelArray = (int[])dataset.get(j);
|
---|
| 722 | valueMap[j][i] = metadataLevelArray[i];
|
---|
| 723 | if(metadataLevelArray[i]!=number){status = false;}
|
---|
| 724 | }
|
---|
| 725 | if(status == true){
|
---|
| 726 | for(int j = 0; j<dataset.size(); j++){
|
---|
| 727 | valueMap[j][i]=-1;
|
---|
| 728 |
|
---|
| 729 | }
|
---|
| 730 | removedID.add(ids[i]);
|
---|
| 731 | }
|
---|
| 732 | }
|
---|
| 733 | ArrayList wholeList = new ArrayList();
|
---|
| 734 |
|
---|
| 735 | for(int i = 0; i<valueMap.length; i++){
|
---|
| 736 | ArrayList numberList = new ArrayList();
|
---|
| 737 |
|
---|
| 738 | for(int j = 0; j<valueMap[i].length; j++){
|
---|
| 739 | numberList.add(new Integer(valueMap[i][j]));
|
---|
| 740 | }
|
---|
| 741 | wholeList.add(numberList);
|
---|
| 742 | }
|
---|
| 743 |
|
---|
| 744 | for(int i =0; i< wholeList.size(); i++){
|
---|
| 745 | ArrayList numberList = (ArrayList)wholeList.get(i);
|
---|
| 746 | Integer value = new Integer(-1);
|
---|
| 747 | while(numberList.contains(value)){
|
---|
| 748 | numberList.remove(value);
|
---|
| 749 | }
|
---|
| 750 | int[] valueList = new int [numberList.size()];
|
---|
| 751 |
|
---|
| 752 | for(int j = 0; j< valueList.length; j++){
|
---|
| 753 | valueList[j] = ((Integer)numberList.get(j)).intValue();
|
---|
| 754 | }
|
---|
| 755 | wholeList.remove(i);
|
---|
| 756 | wholeList.add(i,valueList);
|
---|
| 757 | }
|
---|
| 758 | return wholeList;
|
---|
| 759 | }
|
---|
| 760 |
|
---|
| 761 | public ArrayList getRemovedID(){
|
---|
| 762 | return removedID;
|
---|
| 763 | }
|
---|
| 764 |
|
---|
| 765 |
|
---|
| 766 | public HashMap getLinks(String[] args, String core_element){
|
---|
[18147] | 767 | Element rootElement = getRootNode(core_element);
|
---|
| 768 | HashMap hp = new HashMap();
|
---|
| 769 | NodeList listOfDocument = rootElement.getElementsByTagName("Document");
|
---|
| 770 | ArrayList tempList = new ArrayList();
|
---|
| 771 |
|
---|
| 772 |
|
---|
| 773 | for(int i = 0; i<args.length; i++){
|
---|
[18093] | 774 | tempList.add(args[i]);
|
---|
[18147] | 775 | }
|
---|
[18093] | 776 |
|
---|
[18147] | 777 | for(int s=0; s<listOfDocument.getLength() ; s++){
|
---|
[18093] | 778 | Node docNode = listOfDocument.item(s);
|
---|
| 779 | Element docElement = (Element)docNode;
|
---|
| 780 | NodeList valueList = docElement.getElementsByTagName(valueTag);
|
---|
| 781 |
|
---|
| 782 | if(valueList.getLength()==1){
|
---|
[18147] | 783 | String id = docNode.getAttributes().item(0).getNodeValue();
|
---|
| 784 |
|
---|
| 785 | Element valueElement = (Element)valueList.item(0);
|
---|
| 786 | NodeList textFNList = valueElement.getChildNodes();
|
---|
| 787 | String text = ((Node)textFNList.item(0)).getNodeValue();
|
---|
| 788 |
|
---|
| 789 | if(tempList.contains(text)){
|
---|
| 790 | hp.put(id, text);
|
---|
| 791 | }
|
---|
[18093] | 792 | }
|
---|
[18147] | 793 | }
|
---|
[18093] | 794 |
|
---|
[18147] | 795 | rootElement = getRootNode(urlFile);
|
---|
| 796 | listOfDocument = rootElement.getElementsByTagName(documentTag);
|
---|
| 797 | HashMap newHp = new HashMap();
|
---|
[18093] | 798 |
|
---|
[18147] | 799 | for(int a=0; a<listOfDocument.getLength(); a++){
|
---|
[18093] | 800 | Node docNode = listOfDocument.item(a);
|
---|
| 801 | Element docElement = (Element)docNode;
|
---|
| 802 | NodeList valueList = docElement.getElementsByTagName(valueTag);
|
---|
| 803 |
|
---|
| 804 | if(valueList.getLength()==1){
|
---|
[18147] | 805 | String id = docNode.getAttributes().item(0).getNodeValue();
|
---|
| 806 |
|
---|
| 807 | Element valueElement = (Element)valueList.item(0);
|
---|
| 808 | NodeList textFNList = valueElement.getChildNodes();
|
---|
| 809 | String text = ((Node)textFNList.item(0)).getNodeValue();
|
---|
| 810 | newHp.put(text,id);
|
---|
[18093] | 811 | }
|
---|
[18147] | 812 | }
|
---|
[18093] | 813 |
|
---|
[18147] | 814 | HashMap tempMap = new HashMap();
|
---|
| 815 | Collection c = hp.values();
|
---|
| 816 | Iterator i = c.iterator();
|
---|
| 817 | while(i.hasNext()){
|
---|
[18093] | 818 | String id = (String)i.next();
|
---|
| 819 | if(newHp.containsKey(id)){
|
---|
[18147] | 820 | String text = (String)newHp.get(id);
|
---|
| 821 | if(text.indexOf("http")!=-1){
|
---|
| 822 | tempMap.put((String)tempMap.get(id),text);
|
---|
| 823 | }
|
---|
[18093] | 824 | }
|
---|
[18147] | 825 | }
|
---|
| 826 |
|
---|
| 827 | return tempMap;
|
---|
[18093] | 828 | }
|
---|
| 829 |
|
---|
[18147] | 830 |
|
---|
[18093] | 831 | public ArrayList getURLMap(String elementName){
|
---|
| 832 |
|
---|
| 833 | String core_element =elementName;
|
---|
| 834 | Element rootElement = getRootNode(core_element);
|
---|
| 835 | ArrayList alist = new ArrayList();
|
---|
| 836 |
|
---|
| 837 | if(rootElement.equals(null)){
|
---|
[18147] | 838 | return new ArrayList();
|
---|
[18093] | 839 | }
|
---|
| 840 |
|
---|
| 841 | NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
|
---|
| 842 |
|
---|
| 843 | if(listOfDocument.getLength()==0){return new ArrayList();}
|
---|
| 844 |
|
---|
[18147] | 845 | //System.out.println(listOfDocument.getLength());
|
---|
| 846 | for(int a=0; a<listOfDocument.getLength(); a++){
|
---|
[18093] | 847 | Node docNode = listOfDocument.item(a);
|
---|
| 848 | Element docElement = (Element)docNode;
|
---|
| 849 | NodeList valueList = docElement.getElementsByTagName(valueTag);
|
---|
| 850 |
|
---|
[18147] | 851 | //System.out.println(valueList.getLength());
|
---|
| 852 | for(int b=0; b<valueList.getLength(); b++){
|
---|
[18093] | 853 | Element valueElement = (Element)valueList.item(b);
|
---|
| 854 | NodeList textFNList = valueElement.getChildNodes();
|
---|
| 855 | String text = ((Node)textFNList.item(0)).getNodeValue();
|
---|
| 856 |
|
---|
| 857 | if(!text.equals(" ")){
|
---|
[18147] | 858 | NamedNodeMap NodeIDMap = docNode .getAttributes();
|
---|
| 859 | Node DocNodeID = NodeIDMap.item(0);
|
---|
| 860 | String DocID = DocNodeID.getNodeValue();
|
---|
| 861 | if(alist.contains(DocID)){}
|
---|
| 862 | else{
|
---|
| 863 | alist.add(DocID);
|
---|
| 864 | }
|
---|
[18093] | 865 | }
|
---|
| 866 | }
|
---|
[18147] | 867 | }
|
---|
[18093] | 868 |
|
---|
[18147] | 869 | return alist;
|
---|
| 870 | }
|
---|
| 871 |
|
---|
[18093] | 872 | public HashMap getIdentifierLink(String core_element){
|
---|
| 873 |
|
---|
| 874 | Element rootElement = getRootNode(core_element);
|
---|
| 875 | HashMap hp = new HashMap();
|
---|
| 876 | NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
|
---|
| 877 |
|
---|
| 878 | for(int a=0; a<listOfDocument.getLength(); a++){
|
---|
| 879 | Node docNode = listOfDocument.item(a);
|
---|
| 880 | String HashID = docNode.getAttributes().item(0).getNodeValue();
|
---|
| 881 | Element docElement = (Element)docNode;
|
---|
| 882 | NodeList valueList = docElement.getElementsByTagName(valueTag);
|
---|
| 883 |
|
---|
| 884 | for(int y = 0; y<valueList.getLength(); y++){
|
---|
| 885 | Element valueElement = (Element)valueList.item(y);
|
---|
| 886 | NodeList textFNList = valueElement.getChildNodes();
|
---|
| 887 | String text = ((Node)textFNList.item(0)).getNodeValue();
|
---|
| 888 |
|
---|
| 889 | if(!text.equals(" ") && text.startsWith("http:")){
|
---|
| 890 | if(hp.containsKey(HashID)){
|
---|
| 891 | InternalLink il = (InternalLink)hp.get(HashID);
|
---|
| 892 | il.increaseElement(text);
|
---|
| 893 | hp.put(HashID,il);
|
---|
| 894 | }
|
---|
| 895 | else{
|
---|
| 896 | InternalLink il = new InternalLink();
|
---|
| 897 | il.setValue(HashID);
|
---|
| 898 | il.increaseElement(text);
|
---|
| 899 | hp.put(HashID, il);
|
---|
| 900 | }
|
---|
| 901 | }
|
---|
| 902 | }
|
---|
[18147] | 903 | }
|
---|
| 904 | return hp;
|
---|
| 905 | }
|
---|
| 906 |
|
---|
[18093] | 907 |
|
---|
| 908 | public String[] getDocumentIDList(String core_element){
|
---|
[18147] | 909 |
|
---|
[18093] | 910 | Element rootElement = getRootNode(core_element);
|
---|
| 911 | NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
|
---|
| 912 | String[] ids = new String[listOfDocument.getLength()];
|
---|
| 913 |
|
---|
| 914 | for(int a=0; a<listOfDocument.getLength(); a++){
|
---|
| 915 | Node docNode = listOfDocument .item(a);
|
---|
| 916 | NamedNodeMap NodeMap = docNode.getAttributes();
|
---|
| 917 | Node AttributeNode = NodeMap.item(0);
|
---|
| 918 | String att_name = AttributeNode.getNodeValue();
|
---|
| 919 | ids[a] = att_name;
|
---|
| 920 | }
|
---|
| 921 | return (String[])ids.clone();
|
---|
| 922 | }
|
---|
| 923 |
|
---|
| 924 |
|
---|
| 925 | public HashMap getInternalIdentifier(String core_element){
|
---|
| 926 |
|
---|
| 927 | Element rootElement = getRootNode(core_element);
|
---|
| 928 | HashMap hp = new HashMap();
|
---|
| 929 | NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
|
---|
| 930 |
|
---|
| 931 | for(int a=0; a<listOfDocument.getLength(); a++){
|
---|
| 932 | Node docNode = listOfDocument.item(a);
|
---|
| 933 | String HashID = docNode.getAttributes().item(0).getNodeValue();
|
---|
| 934 |
|
---|
| 935 | Element docElement = (Element)docNode;
|
---|
| 936 | NodeList valueList = docElement.getElementsByTagName(valueTag);
|
---|
| 937 |
|
---|
| 938 | for(int y = 0; y<valueList.getLength(); y++){
|
---|
| 939 | Element valueElement = (Element)valueList.item(y);
|
---|
| 940 | NodeList textFNList = valueElement.getChildNodes();
|
---|
| 941 | String text = ((Node)textFNList.item(0)).getNodeValue();
|
---|
| 942 |
|
---|
| 943 | if(!text.equals(" ")){
|
---|
| 944 | if(hp.containsKey(text)){
|
---|
| 945 | InternalLink il = (InternalLink)hp.get(text);
|
---|
| 946 | il.increaseElement(HashID);
|
---|
| 947 | hp.put(text,il);
|
---|
| 948 | }
|
---|
| 949 | else{
|
---|
| 950 | InternalLink il = new InternalLink();
|
---|
| 951 | il.setValue(text);
|
---|
| 952 | il.increaseElement(HashID);
|
---|
| 953 | hp.put(text, il);
|
---|
| 954 | }
|
---|
| 955 | }
|
---|
| 956 | }
|
---|
[18147] | 957 | }
|
---|
| 958 | return hp;
|
---|
| 959 | }
|
---|
| 960 |
|
---|
[18093] | 961 | public HashMap getIdentifierLinkNoIdentifier(){
|
---|
[18147] | 962 |
|
---|
[18093] | 963 | Element rootElement = getRootNode(rootDocument);
|
---|
| 964 | HashMap hp = new HashMap();
|
---|
| 965 | NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
|
---|
| 966 | String url ="No Source Available";
|
---|
[18147] | 967 | for(int s=0; s<listOfDocument.getLength() ; s++){
|
---|
[18093] | 968 | Node docNode = listOfDocument.item(s);
|
---|
| 969 | String HashID = docNode.getAttributes().item(0).getNodeValue();
|
---|
| 970 | InternalLink il = new InternalLink();
|
---|
[18147] | 971 | il.increaseElement(HashID);
|
---|
| 972 | hp.put(HashID, il);
|
---|
| 973 | }
|
---|
| 974 | return hp;
|
---|
| 975 | }
|
---|
[18093] | 976 | }
|
---|