source: other-projects/trunk/greenstone3-extension/mat/Greenstone3Project/src/org/greenstone3/ms/DataMaker.java@ 17156

Last change on this file since 17156 was 17156, checked in by cc108, 16 years ago

Adding the project Metadata Quality for Digital Libraries into the repository

File size: 24.3 KB
RevLine 
[17156]1package org.greenstone3.ms;
2
3import java.util.ArrayList;
4import java.util.Arrays;
5import java.util.Collection;
6import java.util.Comparator;
7import java.util.HashMap;
8import java.util.Iterator;
9import java.util.Map;
10import java.util.Set;
11import javax.xml.parsers.DocumentBuilder;
12import javax.xml.parsers.DocumentBuilderFactory;
13import java.io.File;
14import java.math.BigDecimal;
15
16import org.w3c.dom.Document;
17import org.w3c.dom.Element;
18import org.w3c.dom.NamedNodeMap;
19import org.w3c.dom.Node;
20import org.w3c.dom.NodeList;
21import org.greenstone.gsdl3.core.MessageRouter;
22import org.greenstone.gsdl3.util.XMLConverter;
23
24public class DataMaker {
25
26 MetadataStats ms;
27 ArrayList nameList;
28 private int Mode;
29 private int TotalDoc;
30 private String path;
31
32 protected Document doc=null;
33 protected MessageRouter mr = null;
34 protected XMLConverter converter=null;
35 private ArrayList removedID = new ArrayList();
36 private HashMap elementMap = new HashMap();
37 private static final int DEF_DIV_SCALE = 10;
38
39 public DataMaker(MetadataStats arg1){
40 ms = arg1;
41 nameList = arg1.metadataNameList;
42 path = arg1.myNewDir;
43 setTotalDocNumber();
44 }
45
46
47 private Element getRootNode(String core_element){
48
49 if(elementMap.containsKey(core_element)){
50 return (Element)elementMap.get(core_element);
51 }
52 try{
53 DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
54 DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
55 Document doc = docBuilder.newDocument();
56 doc = docBuilder.parse (new File(path+"/"+core_element+".xml"));
57 Element rootNode = doc.getDocumentElement();
58 //elementMap.put(core_element, rootNode);
59 return rootNode;
60 }catch (Exception e) {
61 e.printStackTrace();
62 return null;
63 }
64 }
65
66
67 public void setTotalDocNumber(){
68 Element ex = getRootNode("archivedir");
69 NodeList listOfFrequency = ex.getElementsByTagName("Document");
70 TotalDoc = listOfFrequency.getLength();
71 }
72
73 public int getTotalElementUsed(){
74
75 int totalNumber = 0;
76
77 for(int i = 0 ; i<nameList.size(); i++){
78
79 Element ex = getRootNode((String)nameList.get(i));
80 NodeList listOfFrequency = ex.getElementsByTagName("Frequency");
81
82 for(int s=0; s<listOfFrequency.getLength() ; s++){
83 Node FrequencyNode = listOfFrequency.item(s);
84 NodeList textFNList = FrequencyNode.getChildNodes();
85 String TextNode = textFNList.item(0).getNodeValue();
86 totalNumber = totalNumber + Integer.parseInt(TextNode);
87 }
88 }
89 return totalNumber;
90 }
91
92 public int getFrequency(String name){
93
94 int totalNumber = 0;
95
96 Element ex = getRootNode(name);
97 NodeList listOfFrequency = ex.getElementsByTagName("Frequency");
98 if(listOfFrequency.getLength()==0){return 0;}
99
100 for(int s=0; s<listOfFrequency.getLength() ; s++){
101 Node FrequencyNode = listOfFrequency.item(s);
102 NodeList textFNList = FrequencyNode.getChildNodes();
103 String TextNode = textFNList.item(0).getNodeValue();
104 int x = Integer.parseInt(TextNode);
105 totalNumber = totalNumber + x;
106 }
107 return totalNumber;
108 }
109
110 public int getDistinctNumber(String name){
111
112 ArrayList alist = new ArrayList();
113 Element ex = getRootNode(name);
114 NodeList listOfFrequency = ex.getElementsByTagName("ActualValue");
115
116 if(listOfFrequency.getLength()==0){
117 return 0;
118 }
119
120 for(int s=0; s<listOfFrequency.getLength() ; s++){
121 Node ActualValueNode = listOfFrequency.item(s);
122 NodeList textFNList = ActualValueNode.getChildNodes();
123 String TextNode = textFNList.item(0).getNodeValue();
124
125 if(!alist.contains(TextNode) && !TextNode.equals(" ")){
126 alist.add(TextNode);
127 }
128 }
129 return alist.size();
130 }
131
132 public int getDocumentUsedElement(String core_element){
133
134 int totalNumber = 0;
135 Element ex = getRootNode(core_element);
136 NodeList listOfFrequency = ex.getElementsByTagName("Frequency");
137 for(int s=0; s<listOfFrequency.getLength() ; s++){
138 totalNumber++;
139 }
140 return totalNumber;
141 }
142
143 // for all elements
144 public double Mean(String core_element){
145
146 int due = getDocumentUsedElement(core_element);
147
148 if(due==0 || TotalDoc==0){return 0;}
149 Double d1 = new Double(due);
150 Double d2 = new Double(TotalDoc);
151 Double result = div(d1,d2);
152 result = mul(result, new Double(100));
153 return round(result.doubleValue(),1);
154 }
155
156 public int getMinRange(String core_element){
157
158 Element ex = getRootNode(core_element);
159 NodeList listOfFrequency = ex.getElementsByTagName("Frequency");
160
161 if(listOfFrequency.getLength()==0){return 0;}
162
163 Node FrequencyNode = listOfFrequency.item(0);
164 NodeList textFNList = FrequencyNode.getChildNodes();
165 String TextNode = textFNList.item(0).getNodeValue();
166 int minNumber = 0;
167
168 if(listOfFrequency.getLength()==TotalDoc){
169 minNumber = Integer.parseInt(TextNode);
170 }
171
172 else {
173 minNumber = 0;
174 }
175
176 for(int s=0; s<listOfFrequency.getLength() ; s++){
177 FrequencyNode = listOfFrequency.item(s);
178 textFNList = FrequencyNode.getChildNodes();
179 TextNode = textFNList.item(0).getNodeValue();
180 int x = Integer.parseInt(TextNode);
181 if(x<minNumber){minNumber=x;}
182 }
183 return minNumber;
184 }
185
186 public int getMaxRange(String core_element){
187
188 Element ex = getRootNode(core_element);
189 NodeList listOfFrequency = ex.getElementsByTagName("Frequency");
190 if(listOfFrequency.getLength()==0){return 0;}
191 Node FrequencyNode = listOfFrequency.item(0);
192 NodeList textFNList = FrequencyNode.getChildNodes();
193 String TextNode = textFNList.item(0).getNodeValue();
194 int maxNumber = 0;
195
196 for(int s=0; s<listOfFrequency.getLength() ; s++){
197 FrequencyNode = listOfFrequency.item(s);
198 textFNList = FrequencyNode.getChildNodes();
199 TextNode = textFNList.item(0).getNodeValue();
200 int x = Integer.parseInt(TextNode);
201 if(x>maxNumber){maxNumber=x;}
202 }
203 return maxNumber;
204 }
205
206 public int getMode(String core_element){
207
208 Element ex = getRootNode(core_element);
209 NodeList listOfFrequency = ex.getElementsByTagName("Document");
210 if(listOfFrequency.getLength()==0){Mode = 0; return 0;}
211 ArrayList alist = new ArrayList();
212 String[] idsx = getDocumentIDs(core_element);
213
214 for(int i = 0; i<idsx.length; i++){
215 alist.add(idsx[i]);
216 }
217 int[] list = new int[TotalDoc];
218
219 for(int i = 0; i<list.length; i++){
220 list[i] = 0;
221 }
222
223 for(int s=0; s< listOfFrequency.getLength() ; s++){
224 Node docNode = listOfFrequency.item(s);
225 NamedNodeMap NodeIDMap = docNode.getAttributes();
226 Node DocNodeID = NodeIDMap.item(0);
227 String DocID = DocNodeID.getNodeValue();
228 Element xNode = (Element)docNode;
229 int location = alist.indexOf(DocID);
230 NodeList xList = xNode.getElementsByTagName("Frequency");
231 int fre = Integer.parseInt(xList.item(0).getChildNodes().item(0).getNodeValue());
232 list[location] = fre;
233 }
234
235 Arrays.sort(list);
236
237 int max_idx = 0; // Index of the maximum count
238 int max_cnt = 0;
239 int count = 0;
240
241 for ( int i = 0; i <list.length; i++) {
242 count = 0;
243 for ( int j = 0; j < list.length; j++) {
244 if (list[i] == list[j]) {
245 count++;
246 }
247 }
248 if (count > max_cnt) {
249 max_cnt = count;
250 max_idx = i;
251 }
252 }
253 Mode = list [max_idx];
254 return list [max_idx];
255
256 }
257
258 public double ModeFrequency(String core_element){
259
260 Element ex = getRootNode(core_element);
261 NodeList listOfFrequency = ex.getElementsByTagName("Document");
262 if(listOfFrequency.getLength()==0){ return 100;}
263 ArrayList alist = new ArrayList();
264 String[] idsx = getDocumentIDs(core_element);
265
266 for(int i = 0; i<idsx.length; i++){
267 alist.add(idsx[i]);
268 }
269
270 int[] list = new int[TotalDoc];
271
272 for(int i = 0; i<list.length; i++){
273 list[i] = 0;
274 }
275
276 int length = alist.size();
277 int counter = 0;
278
279 for(int s=0; s< listOfFrequency.getLength() ; s++){
280 Node docNode = listOfFrequency.item(s);
281 NamedNodeMap NodeIDMap = docNode.getAttributes();
282 Node DocNodeID = NodeIDMap.item(0);
283 String DocID = DocNodeID.getNodeValue();
284 Element xNode = (Element)docNode;
285 int location = alist.indexOf(DocID);
286 NodeList xList = xNode.getElementsByTagName("Frequency");
287 int fre = Integer.parseInt(xList.item(0).getChildNodes().item(0).getNodeValue());
288 list[location] = fre;
289 }
290
291 for(int i =0; i<list.length; i++){
292 if(list[i]==Mode){counter++;}
293 }
294
295 Double result = div(new Double(counter), new Double(length));
296 result = mul(result ,new Double (100));
297 return round(result.doubleValue(),1);
298 }
299
300 public double Median(String core_element){
301
302 Element ex = getRootNode(core_element);
303 NodeList listOfFrequency = ex.getElementsByTagName("Document");
304
305 ArrayList alist = new ArrayList();
306 String[] idsx = getDocumentIDs(core_element);
307
308 for(int i = 0; i<idsx.length; i++){
309 alist.add(idsx[i]);
310 }
311
312 int[] list = new int[TotalDoc];
313
314 for(int i = 0; i<list.length; i++){
315 list[i] = 0;
316 }
317
318 for(int s=0; s< listOfFrequency.getLength() ; s++){
319 Node docNode = listOfFrequency.item(s);
320 NamedNodeMap NodeIDMap = docNode.getAttributes();
321 Node DocNodeID = NodeIDMap.item(0);
322 String DocID = DocNodeID.getNodeValue();
323 Element xNode = (Element)docNode;
324 int location = alist.indexOf(DocID);
325 NodeList xList = xNode.getElementsByTagName("Frequency");
326 int fre = Integer.parseInt(xList.item(0).getChildNodes().item(0).getNodeValue());
327 list[location] = fre;
328 }
329
330 int length = alist.size();
331 int middle = length/2 -1;
332
333 Arrays.sort(list);
334
335 if(length % 2 == 1){
336 middle = middle + 1;
337 return list[middle];
338 }
339
340 else{
341 return round((double)(list[middle]+list[middle+1])/2,1);
342 }
343 }
344
345 // for all elements
346 public double Average(String core_element){
347
348 int t1 = getFrequency(core_element);
349 int t2 = getDocumentUsedElement(core_element);
350 if(t1==0 || t2==0){return 0;}
351 Double result = div(new Double(t1),new Double(t2));
352 return round(result.doubleValue(),1);
353 }
354
355 public Object[][] AllInformation(){
356
357 int rows = nameList.size();
358 int cols = 11;
359 int y = 0;
360
361 Object[][] info = new Object[rows][cols];
362 String[] list = new String[rows];
363
364 for(int i = 0 ; i < list.length; i++){
365 list[i] = nameList.get(i).toString();
366 }
367
368 Arrays.sort(list);
369
370 for(int iu = 0; iu<list.length; iu++){
371 String xi = list[iu];
372 info[y][0] = xi ;
373 info[y][1] = new Integer(getFrequency(xi));
374 info[y][2] = new Integer(getDocumentUsedElement(xi));
375 info[y][3] = new Double(Mean(xi));
376 info[y][4] = new Double(Median(xi));
377 info[y][5] = new Integer(getDistinctNumber(xi));
378 info[y][6] = new Integer(getMinRange(xi));
379 info[y][7] = new Integer(getMaxRange(xi));
380 info[y][8] = new Double(Average(xi));
381 info[y][9] = new Integer(getMode(xi));
382 info[y][10] = ModeFrequency(xi)+"%";
383 y++;
384 }
385 return info;
386 }
387
388 public String[] getSortList(String core_element,String sort){
389
390 if(sort.equals("ASCII")){
391 HashMap hp = getDistinctValueMap (core_element);
392 String[] temp = new String[hp.size()];
393 int counter = 0;
394 Set s = hp.keySet();
395 Iterator i = s.iterator();
396
397 while(i.hasNext()){
398 temp[counter] = (String)i.next();
399 counter++;
400 }
401
402 Arrays.sort(temp);
403 return temp;
404 }
405
406 else{
407 Map m = getDistinctValueMap (core_element);
408 ArrayList outputList = sortMap(m);
409 String[] temp = new String[outputList.size()];
410
411 for(int i = 0; i< outputList.size(); i++){
412 Map.Entry entry = (Map.Entry) outputList.get(i);
413 temp[i] = (String) entry.getKey();
414 }
415
416 return temp;
417 }
418
419 }
420
421
422 /*
423 * Actually, we can directly access to "ActualValue" node instead of document node
424 */
425
426 public HashMap getDistinctValueMap(String core_element){
427
428 Element ex = getRootNode(core_element);
429 HashMap hp = new HashMap();
430 NodeList listOfFrequency = ex.getElementsByTagName("Document");
431
432 for(int s=0; s<listOfFrequency.getLength() ; s++){
433 Node docNode = listOfFrequency.item(s);
434 Element docElement = (Element)docNode;
435 NodeList valueList = docElement.getElementsByTagName("ActualValue");
436
437 for(int y = 0; y<valueList.getLength(); y++){
438 Element valueElement = (Element)valueList.item(y);
439 NodeList textFNList = valueElement.getChildNodes();
440 String text = ((Node)textFNList.item(0)).getNodeValue();
441
442 if(!text.equals(" ")){
443 if(hp.containsKey(text)){
444 Integer i = (Integer)hp.get(text);
445 int number = i.intValue();
446 number++;
447 hp.put(text,new Integer(number));
448 }
449 else{
450 Integer i = new Integer(1);
451 hp.put(text, i);
452 }
453 }
454 }
455 }
456 return hp;
457 }
458
459 public HashMap getDocFrequencyMap(String core_element){
460
461 Element ex = getRootNode(core_element);
462 HashMap hp = new HashMap();
463 NodeList listOfFrequency = ex.getElementsByTagName("Document");
464
465 for(int s=0; s<listOfFrequency.getLength() ; s++){
466
467 Node docNode = listOfFrequency.item(s);
468 NamedNodeMap NodeMap = docNode.getAttributes();
469 Node AttributeNode = NodeMap.item(0);
470 String att_name = AttributeNode.getNodeValue();
471
472 Element docElement = (Element)docNode;
473 NodeList valueList = docElement.getElementsByTagName("Frequency");
474 Element frequencyElement = (Element)valueList.item(0);
475 NodeList textFNList = frequencyElement.getChildNodes();
476 String text = ((Node)textFNList.item(0)).getNodeValue();
477 Integer i = new Integer(Integer.parseInt(text));
478 hp.put(att_name, i);
479 }
480 return hp;
481 }
482
483
484 public String[] getDocumentIDs(String core_element){
485
486 Element ex = getRootNode("archivedir");
487 NodeList listOfFrequency = ex.getElementsByTagName("Document");
488 String[] ids = new String[listOfFrequency.getLength()];
489
490 for(int s=0; s<listOfFrequency.getLength() ; s++){
491 Node docNode = listOfFrequency.item(s);
492 NamedNodeMap NodeMap = docNode.getAttributes();
493 Node AttributeNode = NodeMap.item(0);
494 String att_name = AttributeNode.getNodeValue();
495 ids[s] = att_name;
496 }
497 return (String[])ids.clone();
498 }
499
500 public int[] getMetadataRows(String core_element){
501 Element ex = getRootNode(core_element);
502 //ArrayList alist = ms.getIDArray();
503 ArrayList alist = new ArrayList();
504 String[] idsx = getDocumentIDs(core_element);
505
506 for(int i = 0; i<idsx.length; i++){
507 alist.add(idsx[i]);
508 }
509
510 NodeList listOfFrequency = ex.getElementsByTagName("Document");
511 int[] row = new int[TotalDoc];
512
513 for(int i = 0; i<row.length; i++){
514 row [i] = 0;
515 }
516
517 for(int s=0; s<listOfFrequency.getLength() ; s++){
518 Node docNode = listOfFrequency.item(s);
519 int location = alist.indexOf(docNode.getAttributes().item(0).getNodeValue());
520 row[location] =1;
521
522 }
523 return row;
524 }
525
526 /**
527 * This method will use Arrays.sort for sorting Map
528 * @param map
529 * @return outputList of Map.Entries
530 */
531
532 public ArrayList sortMap(Map map) {
533 ArrayList outputList = null;
534 int count = 0;
535 Set set = null;
536 Map.Entry[] entries = null;
537 // Logic:
538 // get a set from Map
539 // Build a Map.Entry[] from set
540 // Sort the list using Arrays.sort
541 // Add the sorted Map.Entries into arrayList and return
542
543 set = (Set) map.entrySet();
544 Iterator iterator = set.iterator();
545 entries = new Map.Entry[set.size()];
546 while(iterator.hasNext()) {
547 entries[count++] = (Map.Entry) iterator.next();
548 }
549
550 // Sort the entries with your own comparator for the values:
551 Arrays.sort(entries, new Comparator() {
552 public int compareTo(Object lhs, Object rhs) {
553 Map.Entry le = (Map.Entry)lhs;
554 Map.Entry re = (Map.Entry)rhs;
555 return ((Comparable)le.getValue()).compareTo((Comparable)re.getValue());
556 }
557
558 public int compare(Object lhs, Object rhs) {
559 Map.Entry le = (Map.Entry)lhs;
560 Map.Entry re = (Map.Entry)rhs;
561 return ((Comparable)le.getValue()).compareTo((Comparable)re.getValue());
562 }
563 });
564
565 outputList = new ArrayList();
566 for(int i = 0; i < entries.length; i++) {
567 outputList.add(entries[i]);
568 }
569 return outputList;
570 }//End of sortMap
571
572 private Double div(Double d1, Double d2){
573 BigDecimal b1 = new BigDecimal(d1.toString());
574 BigDecimal b2 = new BigDecimal(d2.toString());
575 return new Double(b1.divide(b2,DEF_DIV_SCALE,BigDecimal.ROUND_HALF_UP).doubleValue());
576 }
577
578 private Double mul(Double d1,Double d2){
579 BigDecimal b1 = new BigDecimal(d1.toString());
580 BigDecimal b2 = new BigDecimal(d2.toString());
581 return new Double(b1.multiply(b2).doubleValue());
582 }
583
584 public double round(double v,int scale){
585 if(scale<0){
586 throw new IllegalArgumentException(
587 "The scale must be a positive integer or zero");
588 }
589 BigDecimal b = new BigDecimal(Double.toString(v));
590 BigDecimal one = new BigDecimal("1");
591 return b.divide(one,scale,BigDecimal.ROUND_HALF_UP).doubleValue();
592 }
593
594 public double getSingleMetadataSetCompleteness(ArrayList mds_list){
595
596 int totalElement = 0;
597 int totalElementUsed = 0;
598
599 for(int a = 0; a<mds_list.size(); a++){
600 MetadataSet mds = (MetadataSet)mds_list.get(a);
601
602 ArrayList alist = mds.getIndexsList();
603
604 int length = alist.size();
605
606 totalElement = totalElement + length * TotalDoc;
607 for(int i = 0; i<alist.size(); i++){
608 String name = (String)alist.get(i);
609
610 totalElementUsed = totalElementUsed + getDocumentUsedElement(name);
611
612 }
613 }
614 double x = (double)totalElementUsed/totalElement;
615 Double d1 = new Double(x);
616 Double d2 = new Double(100);
617 Double result = mul(d1,d2);
618
619 return round(result.doubleValue(),1);
620 }
621
622 public Object[][] getMetadataInfo(MetadataSet mds){
623
624 ArrayList alist = mds.getIndexsList();
625 int rows = alist.size();
626 int cols = 11;
627 int y = 0;
628 Object[][] dataset = new Object[rows][cols];
629
630 String[] list = new String[rows];
631
632 for(int i = 0 ; i < list.length; i++){
633 list[i] = alist.get(i).toString();
634
635 }
636
637 Arrays.sort(list);
638
639 for(int iu = 0; iu<list.length; iu++){
640 String xi = list[iu];
641 dataset[y][0] = xi ;
642 dataset[y][1] = new Integer(getFrequency(xi));
643 dataset[y][2] = new Integer(getDocumentUsedElement(xi));
644 dataset[y][3] = new Double (Mean(xi));
645 dataset[y][4] = new Double (Median(xi));
646 dataset[y][5] = new Integer(getDistinctNumber(xi));
647 dataset[y][6] = new Integer(getMinRange(xi));
648 dataset[y][7] = new Integer(getMaxRange(xi));
649 dataset[y][8] = new Double (Average(xi));
650 dataset[y][9] = new Integer(getMode(xi));
651 dataset[y][10] = ModeFrequency(xi)+"%";
652 y++;
653
654 }
655 return dataset;
656 }
657
658 public boolean IsElementEmpty(String core_element){
659
660 int[] list = getMetadataRows(core_element);
661 boolean status = true;
662
663 for(int i = 0; i< list.length; i++){
664 if(list[i]==1){status = false;}
665 }
666 return status;
667 }
668
669 public boolean IsElementFull(String core_element){
670
671 int[] list = getMetadataRows(core_element);
672 boolean status = true;
673
674 for(int i = 0; i< list.length; i++){
675 if(list[i]==0){status = false;}
676 }
677 return status;
678 }
679
680 public ArrayList removeDocument(ArrayList dataset, String[] ids, int number){
681 removedID = new ArrayList();
682
683 int[] metadataLevel =(int[])dataset.get(0);
684 int docIDslength = metadataLevel.length;
685 int[][] valueMap = new int[dataset.size()][docIDslength];
686
687 for(int i = 0; i< docIDslength; i++){
688
689 boolean status = true;
690
691 for(int j = 0; j<dataset.size(); j++){
692 int[] metadataLevelArray = (int[])dataset.get(j);
693 valueMap[j][i] = metadataLevelArray[i];
694 if(metadataLevelArray[i]!=number){status = false;}
695
696 }
697 if(status == true){
698
699 for(int j = 0; j<dataset.size(); j++){
700
701 valueMap[j][i]=-1;
702
703 }
704 removedID.add(ids[i]);
705 }
706 }
707 ArrayList wholeList = new ArrayList();
708
709 for(int i = 0; i<valueMap.length; i++){
710 ArrayList numberList = new ArrayList();
711
712 for(int j = 0; j<valueMap[i].length; j++){
713 numberList.add(new Integer(valueMap[i][j]));
714 }
715 wholeList.add(numberList);
716 }
717
718 for(int i =0; i< wholeList.size(); i++){
719 ArrayList numberList = (ArrayList)wholeList.get(i);
720 Integer value = new Integer(-1);
721 while(numberList.contains(value)){
722 numberList.remove(value);
723 }
724 int[] valueList = new int [numberList.size()];
725
726 for(int j = 0; j< valueList.length; j++){
727 valueList[j] = ((Integer)numberList.get(j)).intValue();
728 }
729 wholeList.remove(i);
730 wholeList.add(i,valueList);
731 }
732
733 return wholeList;
734 }
735
736 public ArrayList getRemovedID(){
737 return removedID;
738 }
739
740 public int getDocNum(){
741 return TotalDoc;
742 }
743
744 public HashMap getInternalIdentifier(String core_element){
745
746 Element ex = getRootNode(core_element);
747 HashMap hp = new HashMap();
748 NodeList listOfFrequency = ex.getElementsByTagName("Document");
749
750 for(int s=0; s<listOfFrequency.getLength() ; s++){
751 Node docNode = listOfFrequency.item(s);
752 String HashID = docNode.getAttributes().item(0).getNodeValue();
753 //System.out.println(HashID);
754 Element docElement = (Element)docNode;
755 NodeList valueList = docElement.getElementsByTagName("ActualValue");
756
757 for(int y = 0; y<valueList.getLength(); y++){
758 Element valueElement = (Element)valueList.item(y);
759 NodeList textFNList = valueElement.getChildNodes();
760 String text = ((Node)textFNList.item(0)).getNodeValue();
761
762 if(!text.equals(" ")){
763 if(hp.containsKey(text)){
764 InternalLink il = (InternalLink)hp.get(text);
765 il.increaseElement(HashID);
766 hp.put(text,il);
767 }
768 else{
769 InternalLink il = new InternalLink();
770 il.setValue(text);
771 il.increaseElement(HashID);
772 hp.put(text, il);
773 }
774 }
775 }
776 }
777 return hp;
778 }
779
780
781 public HashMap getIdentifierLink(String core_element){
782
783 Element ex = getRootNode(core_element);
784 HashMap hp = new HashMap();
785 NodeList listOfFrequency = ex.getElementsByTagName("Document");
786
787 for(int s=0; s<listOfFrequency.getLength() ; s++){
788 Node docNode = listOfFrequency.item(s);
789 String HashID = docNode.getAttributes().item(0).getNodeValue();
790 Element docElement = (Element)docNode;
791 NodeList valueList = docElement.getElementsByTagName("ActualValue");
792
793 for(int y = 0; y<valueList.getLength(); y++){
794 Element valueElement = (Element)valueList.item(y);
795 NodeList textFNList = valueElement.getChildNodes();
796 String text = ((Node)textFNList.item(0)).getNodeValue();
797
798 if(!text.equals(" ") && text.startsWith("http:")){
799 if(hp.containsKey(HashID)){
800 InternalLink il = (InternalLink)hp.get(HashID);
801 il.increaseElement(text);
802 hp.put(HashID,il);
803 }
804 else{
805 InternalLink il = new InternalLink();
806 il.setValue(HashID);
807 il.increaseElement(text);
808 hp.put(HashID, il);
809 }
810 }
811 }
812 }
813 return hp;
814 }
815
816
817 public String[] getDocumentIDList(String core_element){
818
819 Element ex = getRootNode(core_element);
820 NodeList listOfFrequency = ex.getElementsByTagName("Document");
821 String[] ids = new String[listOfFrequency.getLength()];
822
823 for(int s=0; s<listOfFrequency.getLength() ; s++){
824 Node docNode = listOfFrequency.item(s);
825 NamedNodeMap NodeMap = docNode.getAttributes();
826 Node AttributeNode = NodeMap.item(0);
827 String att_name = AttributeNode.getNodeValue();
828 ids[s] = att_name;
829 }
830 return (String[])ids.clone();
831 }
832
833}
834
Note: See TracBrowser for help on using the repository browser.