source: other-projects/trunk/greenstone3-extension/mat/src/java/org/greenstone/gsdl3_extension/mat/DataMaker.java@ 17365

Last change on this file since 17365 was 17365, checked in by cc108, 16 years ago

Updating Mat Source Code

File size: 28.8 KB
Line 
1package org.greenstone.gsdl3_extension.mat;
2
3import java.util.ArrayList;
4import java.util.Arrays;
5import java.util.Collection;
6import java.util.Comparator;
7import java.util.HashMap;
8import java.util.Iterator;
9import java.util.Map;
10import java.util.Set;
11
12import javax.xml.parsers.DocumentBuilder;
13import javax.xml.parsers.DocumentBuilderFactory;
14
15import java.io.File;
16import java.io.PrintWriter;
17import java.math.BigDecimal;
18
19import org.w3c.dom.Document;
20import org.w3c.dom.Element;
21import org.w3c.dom.NamedNodeMap;
22import org.w3c.dom.Node;
23import org.w3c.dom.NodeList;
24
25import org.greenstone.gsdl3.core.MessageRouter;
26import org.greenstone.gsdl3.util.XMLConverter;
27
28public class DataMaker {
29
30 MetadataStats ms;
31 ArrayList nameList = new ArrayList();
32
33 private int Mode = 0;
34 private int TotalDoc = 0;
35 private String path = null;
36
37 protected Document doc=null;
38 protected MessageRouter mr = null;
39 protected XMLConverter converter=null;
40 private ArrayList removedID = new ArrayList();
41
42 private final String rootDocument = "archivedir";
43 private final String documentTag = "Document";
44 //private final String elementTag ="Element";
45 private final String frequencyTag ="Frequency";
46 private final String valueTag = "ActualValue";
47 private final String ASCII_sort = "ASCII";
48 //private final String Frequency_sort ="frequency";
49 private final String urlFile = "dc.Identifier";
50
51 private static final int DEF_DIV_SCALE = 10;
52
53
54 public DataMaker(MetadataStats stats){
55 ms = stats;
56 nameList = stats.metadataNameList;
57 path = stats.StatsDirectory;
58 setTotalDocNumber();
59 }
60
61 private Element getRootNode(String core_element){
62
63 try{
64 DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
65 DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
66 Document doc = docBuilder.newDocument();
67 doc = docBuilder.parse (new File(path+"/"+core_element+".xml"));
68 Element rootNode = doc.getDocumentElement();
69 return rootNode;
70 }catch (Exception e) {
71 e.printStackTrace();
72 return null;
73 }
74 }
75
76 public void setTotalDocNumber(){
77 Element ex = getRootNode(rootDocument);
78 NodeList listOfFrequency = ex.getElementsByTagName(documentTag);
79 TotalDoc = listOfFrequency.getLength();
80 }
81
82 public int getTotalDocNumber(){
83 return TotalDoc;
84 }
85
86 public int getTotalElementUsed(){
87
88 int totalNumber = 0;
89
90 for(int i = 0 ; i<nameList.size(); i++){
91 Element rootElement = getRootNode((String)nameList.get(i));
92 NodeList listOfFrequency = rootElement.getElementsByTagName(frequencyTag);
93
94 for(int a=0; a<listOfFrequency.getLength() ; a++){
95 Node FrequencyNode = listOfFrequency.item(a);
96 NodeList textFNList = FrequencyNode.getChildNodes();
97 String TextNode = textFNList.item(0).getNodeValue();
98 totalNumber = totalNumber + Integer.parseInt(TextNode);
99 }
100 }
101 return totalNumber;
102 }
103
104 public int getFrequency(String core_element){
105
106 int totalNumber = 0;
107 Element rootElement = getRootNode(core_element);
108 NodeList listOfFrequency = rootElement.getElementsByTagName(frequencyTag);
109
110 if(listOfFrequency.getLength()==0){
111 return 0;
112 }
113
114 for(int a=0; a<listOfFrequency.getLength(); a++){
115 Node FrequencyNode = listOfFrequency.item(a);
116 NodeList textFNList = FrequencyNode.getChildNodes();
117 String TextNode = textFNList.item(0).getNodeValue();
118 int count = Integer.parseInt(TextNode);
119 totalNumber = totalNumber + count;
120 }
121 return totalNumber;
122 }
123
124 public int getDistinctNumber(String core_element){
125
126 ArrayList alist = new ArrayList();
127 Element rootElement = getRootNode(core_element);
128 NodeList listOfFrequency = rootElement.getElementsByTagName(valueTag);
129
130 if(listOfFrequency.getLength()==0){
131 return 0;
132 }
133
134 for(int a=0; a<listOfFrequency.getLength(); a++){
135 Node ActualValueNode = listOfFrequency.item(a);
136 NodeList textFNList = ActualValueNode.getChildNodes();
137 String TextNode = textFNList.item(0).getNodeValue();
138
139 if(!alist.contains(TextNode) && !TextNode.equals(" ")){
140 alist.add(TextNode);
141 }
142 }
143 return alist.size();
144 }
145
146 public int getDocumentUsedElement(String core_element){
147
148 int totalNumber = 0;
149 Element rootElement = getRootNode(core_element);
150 NodeList listOfFrequency = rootElement.getElementsByTagName(frequencyTag);
151
152 for(int a=0; a<listOfFrequency.getLength(); a++){
153 totalNumber++;
154 }
155 return totalNumber;
156 }
157
158 public double Mean(String core_element){
159
160 int due = getDocumentUsedElement(core_element);
161
162 if(due==0 || TotalDoc==0){
163 return 0;
164 }
165
166 Double d1 = new Double(due);
167 Double d2 = new Double(TotalDoc);
168 Double result = div(d1,d2);
169 result = mul(result, new Double(100));
170 return round(result.doubleValue(),1);
171 }
172
173 public int getMinRange(String core_element){
174
175 Element rootElement = getRootNode(core_element);
176 NodeList listOfFrequency = rootElement.getElementsByTagName(frequencyTag);
177
178 if(listOfFrequency.getLength()==0){
179 return 0;
180 }
181
182 Node FrequencyNode = listOfFrequency.item(0);
183 NodeList textFNList = FrequencyNode.getChildNodes();
184 String TextNode = textFNList.item(0).getNodeValue();
185
186 int minNumber = 0;
187
188 if(listOfFrequency.getLength()==TotalDoc){
189 minNumber = Integer.parseInt(TextNode);
190 }
191
192 else {
193 minNumber = 0;
194 }
195
196 for(int a=0; a<listOfFrequency.getLength(); a++){
197 FrequencyNode = listOfFrequency.item(a);
198 textFNList = FrequencyNode.getChildNodes();
199 TextNode = textFNList.item(0).getNodeValue();
200 int x = Integer.parseInt(TextNode);
201 if(x<minNumber){minNumber=x;}
202 }
203 return minNumber;
204 }
205
206 public int getMaxRange(String core_element){
207
208 Element rootElement = getRootNode(core_element);
209 NodeList listOfFrequency = rootElement.getElementsByTagName(frequencyTag);
210
211 if(listOfFrequency.getLength()==0){
212 return 0;
213 }
214
215 Node FrequencyNode = listOfFrequency.item(0);
216 NodeList textFNList = FrequencyNode.getChildNodes();
217 String TextNode = textFNList.item(0).getNodeValue();
218 int maxNumber = 0;
219
220 for(int a=0; a<listOfFrequency.getLength(); a++){
221 FrequencyNode = listOfFrequency.item(a);
222 textFNList = FrequencyNode.getChildNodes();
223 TextNode = textFNList.item(0).getNodeValue();
224 int x = Integer.parseInt(TextNode);
225 if(x>maxNumber){maxNumber=x;}
226 }
227 return maxNumber;
228 }
229
230 public int getMode(String core_element){
231
232 Element rootElement = getRootNode(core_element);
233 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
234
235 if(listOfDocument.getLength()==0){
236 Mode = 0;
237 return 0;
238 }
239
240 ArrayList alist = new ArrayList();
241 String[] idList = getDocumentIDs(core_element);
242
243 for(int i = 0; i<idList.length; i++){
244 alist.add(idList[i]);
245 }
246
247 int[] list = new int[TotalDoc];
248
249 for(int i = 0; i<list.length; i++){
250 list[i] = 0;
251 }
252
253 for(int a=0; a<listOfDocument.getLength(); a++){
254 Node docNode = listOfDocument.item(a);
255 NamedNodeMap NodeIDMap = docNode.getAttributes();
256 Node DocNodeID = NodeIDMap.item(0);
257 String DocID = DocNodeID.getNodeValue();
258 Element xNode = (Element)docNode;
259 int location = alist.indexOf(DocID);
260 NodeList xList = xNode.getElementsByTagName(frequencyTag);
261 int frequency = Integer.parseInt(xList.item(0).getChildNodes().item(0).getNodeValue());
262 list[location] = frequency;
263 }
264
265 Arrays.sort(list);
266
267 int max_idx = 0; // Index of the maximum count
268 int max_cnt = 0;
269 int count = 0;
270
271 for ( int i = 0; i <list.length; i++) {
272 count = 0;
273 for ( int j = 0; j < list.length; j++) {
274 if (list[i] == list[j]) {
275 count++;
276 }
277 }
278 if (count > max_cnt) {
279 max_cnt = count;
280 max_idx = i;
281 }
282 }
283 Mode = list [max_idx];
284 return list [max_idx];
285 }
286
287 public double ModeFrequency(String core_element){
288
289 Element rootElement = getRootNode(core_element);
290 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
291
292 if(listOfDocument.getLength()==0){
293 return 100;
294 }
295
296 ArrayList alist = new ArrayList();
297 String[] idList = getDocumentIDs(core_element);
298
299 for(int i = 0; i<idList.length; i++){
300 alist.add(idList[i]);
301 }
302
303 int[] list = new int[TotalDoc];
304
305 for(int i = 0; i<list.length; i++){
306 list[i] = 0;
307 }
308
309 int length = alist.size();
310 int counter = 0;
311
312 for(int a=0; a<listOfDocument.getLength(); a++){
313 Node docNode = listOfDocument.item(a);
314 NamedNodeMap NodeIDMap = docNode.getAttributes();
315 Node DocNodeID = NodeIDMap.item(0);
316 String DocID = DocNodeID.getNodeValue();
317 Element xNode = (Element)docNode;
318 int location = alist.indexOf(DocID);
319 NodeList xList = xNode.getElementsByTagName(frequencyTag);
320 int frequency = Integer.parseInt(xList.item(0).getChildNodes().item(0).getNodeValue());
321 list[location] = frequency;
322 }
323
324 for(int i =0; i<list.length; i++){
325 if(list[i]==Mode){
326 counter++;
327 }
328 }
329
330 Double result = div(new Double(counter), new Double(length));
331 result = mul(result ,new Double (100));
332 return round(result.doubleValue(),1);
333 }
334
335 public double Median(String core_element){
336
337 Element rootElement = getRootNode(core_element);
338 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
339
340 ArrayList alist = new ArrayList();
341 String[] idList = getDocumentIDs(core_element);
342
343 for(int i = 0; i<idList.length; i++){
344 alist.add(idList[i]);
345 }
346
347 int[] list = new int[TotalDoc];
348
349 for(int i = 0; i<list.length; i++){
350 list[i] = 0;
351 }
352
353 for(int a=0; a<listOfDocument.getLength(); a++){
354 Node docNode = listOfDocument.item(a);
355 NamedNodeMap NodeIDMap = docNode.getAttributes();
356 Node DocNodeID = NodeIDMap.item(0);
357 String DocID = DocNodeID.getNodeValue();
358 Element xNode = (Element)docNode;
359 int location = alist.indexOf(DocID);
360 NodeList xList = xNode.getElementsByTagName(frequencyTag);
361 int frequency = Integer.parseInt(xList.item(0).getChildNodes().item(0).getNodeValue());
362 list[location] = frequency;
363 }
364
365 int length = alist.size();
366 int middle = length/2 -1;
367
368 Arrays.sort(list);
369
370 if(length % 2 == 1){
371 middle = middle + 1;
372 return list[middle];
373 }
374
375 else{
376 return round((double)(list[middle]+list[middle+1])/2,1);
377 }
378 }
379
380 public double Average(String core_element){
381
382 int t1 = getFrequency(core_element);
383 int t2 = getDocumentUsedElement(core_element);
384
385 if(t1==0 || t2==0){
386 return 0;
387 }
388
389 Double result = div(new Double(t1),new Double(t2));
390 return round(result.doubleValue(),1);
391 }
392
393 public Object[][] AllInformation(){
394
395 int rows = nameList.size();
396 int cols = 11;
397 int y = 0;
398
399 Object[][] info = new Object[rows][cols];
400 String[] list = new String[rows];
401
402 for(int i = 0 ; i < list.length; i++){
403 list[i] = nameList.get(i).toString();
404 }
405
406 Arrays.sort(list);
407
408 for(int iu = 0; iu<list.length; iu++){
409 String xi = list[iu];
410 info[y][0] = xi ;
411 info[y][1] = new Integer(getFrequency(xi));
412 info[y][2] = new Integer(getDocumentUsedElement(xi));
413 info[y][3] = new Double(Mean(xi));
414 info[y][4] = new Double(Median(xi));
415 info[y][5] = new Integer(getDistinctNumber(xi));
416 info[y][6] = new Integer(getMinRange(xi));
417 info[y][7] = new Integer(getMaxRange(xi));
418 info[y][8] = new Double(Average(xi));
419 info[y][9] = new Integer(getMode(xi));
420 info[y][10] = ModeFrequency(xi)+"%";
421 y++;
422 }
423 return info;
424 }
425
426 public String[] getSortList(String core_element,String sort){
427
428 if(sort.equals(ASCII_sort)){
429 HashMap hp = getDistinctValueMap (core_element);
430 String[] temp = new String[hp.size()];
431 int counter = 0;
432 Set s = hp.keySet();
433 Iterator i = s.iterator();
434
435 while(i.hasNext()){
436 temp[counter] = (String)i.next();
437 counter++;
438 }
439
440 Arrays.sort(temp);
441 return temp;
442 }
443
444 else{
445 Map m = getDistinctValueMap (core_element);
446 ArrayList outputList = sortMap(m);
447 String[] temp = new String[outputList.size()];
448
449 for(int i = 0; i< outputList.size(); i++){
450 Map.Entry entry = (Map.Entry) outputList.get(i);
451 temp[i] = (String) entry.getKey();
452 }
453
454 return temp;
455 }
456 }
457
458
459 public HashMap getDistinctValueMap(String core_element){
460
461 Element rootElement = getRootNode(core_element);
462 HashMap hp = new HashMap();
463 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
464
465 for(int a=0; a<listOfDocument.getLength(); a++){
466
467 Node docNode = listOfDocument.item(a);
468 Element docElement = (Element)docNode;
469 NodeList valueList = docElement.getElementsByTagName(valueTag);
470
471 for(int b= 0; b<valueList.getLength(); b++){
472
473 Element valueElement = (Element)valueList.item(b);
474 NodeList textFNList = valueElement.getChildNodes();
475 String text = ((Node)textFNList.item(0)).getNodeValue();
476
477 if(!text.equals(" ")){
478 if(hp.containsKey(text)){
479 Integer i = (Integer)hp.get(text);
480 int number = i.intValue();
481 number++;
482 hp.put(text,new Integer(number));
483 }
484 else{
485 Integer i = new Integer(1);
486 hp.put(text, i);
487 }
488 }
489 }
490 }
491 return hp;
492 }
493
494
495 public HashMap getDocFrequencyMap(String core_element){
496
497 Element rootElement = getRootNode(core_element);
498 HashMap hp = new HashMap();
499 NodeList listOfDocument= rootElement.getElementsByTagName(documentTag);
500
501 for(int a=0; a<listOfDocument.getLength(); a++){
502
503 Node docNode = listOfDocument.item(a);
504 NamedNodeMap NodeMap = docNode.getAttributes();
505 Node AttributeNode = NodeMap.item(0);
506 String att_name = AttributeNode.getNodeValue();
507
508 Element docElement = (Element)docNode;
509 NodeList valueList = docElement.getElementsByTagName(frequencyTag);
510 Element frequencyElement = (Element)valueList.item(0);
511 NodeList textFNList = frequencyElement.getChildNodes();
512 String text = ((Node)textFNList.item(0)).getNodeValue();
513 Integer i = new Integer(Integer.parseInt(text));
514 hp.put(att_name, i);
515 }
516 return hp;
517 }
518
519
520 public String[] getDocumentIDs(String core_element){
521
522 Element rootElement = getRootNode(rootDocument);
523 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
524 String[] ids = new String[listOfDocument.getLength()];
525
526 for(int a=0; a<listOfDocument.getLength(); a++){
527 Node docNode = listOfDocument.item(a);
528 NamedNodeMap NodeMap = docNode.getAttributes();
529 Node AttributeNode = NodeMap.item(0);
530 String att_name = AttributeNode.getNodeValue();
531 ids[a] = att_name;
532 }
533 return (String[])ids.clone();
534 }
535
536 public int[] getMetadataRows(String core_element){
537 Element rootElement = getRootNode(core_element);
538 ArrayList alist = new ArrayList();
539 String[] idsx = getDocumentIDs(core_element);
540
541 for(int i = 0; i<idsx.length; i++){
542 alist.add(idsx[i]);
543 }
544
545 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
546 int[] row = new int[TotalDoc];
547
548 for(int i = 0; i<row.length; i++){
549 row [i] = 0;
550 }
551
552 for(int a=0; a<listOfDocument.getLength(); a++){
553 Node docNode = listOfDocument.item(a);
554 int location = alist.indexOf(docNode.getAttributes().item(0).getNodeValue());
555 row[location] = 1;
556 }
557 return row;
558 }
559
560 /**
561 * This method will use Arrays.sort for sorting Map
562 * @param map
563 * @return outputList of Map.Entries
564 */
565
566 public ArrayList sortMap(Map map) {
567 ArrayList outputList = null;
568 int count = 0;
569 Set set = null;
570 Map.Entry[] entries = null;
571 // Logic:
572 // get a set from Map
573 // Build a Map.Entry[] from set
574 // Sort the list using Arrays.sort
575 // Add the sorted Map.Entries into arrayList and return
576
577 set = (Set) map.entrySet();
578 Iterator iterator = set.iterator();
579 entries = new Map.Entry[set.size()];
580 while(iterator.hasNext()) {
581 entries[count++] = (Map.Entry) iterator.next();
582 }
583
584 // Sort the entries with your own comparator for the values:
585 Arrays.sort(entries, new Comparator() {
586 public int compareTo(Object lhs, Object rhs) {
587 Map.Entry le = (Map.Entry)lhs;
588 Map.Entry re = (Map.Entry)rhs;
589 return ((Comparable)le.getValue()).compareTo((Comparable)re.getValue());
590 }
591
592 public int compare(Object lhs, Object rhs) {
593 Map.Entry le = (Map.Entry)lhs;
594 Map.Entry re = (Map.Entry)rhs;
595 return ((Comparable)le.getValue()).compareTo((Comparable)re.getValue());
596 }
597 });
598
599 outputList = new ArrayList();
600 for(int i = 0; i < entries.length; i++) {
601 outputList.add(entries[i]);
602 }
603 return outputList;
604 }//End of sortMap
605
606 private Double div(Double d1, Double d2){
607 BigDecimal b1 = new BigDecimal(d1.toString());
608 BigDecimal b2 = new BigDecimal(d2.toString());
609 return new Double(b1.divide(b2,DEF_DIV_SCALE,BigDecimal.ROUND_HALF_UP).doubleValue());
610 }
611
612 private Double mul(Double d1,Double d2){
613 //System.out.println(d1);
614 BigDecimal b1 = new BigDecimal(d1.toString());
615 BigDecimal b2 = new BigDecimal(d2.toString());
616 return new Double(b1.multiply(b2).doubleValue());
617 }
618
619 public double round(double v,int scale){
620 if(scale<0){
621 throw new IllegalArgumentException(
622 "The scale must be a positive integer or zero");
623 }
624 BigDecimal b = new BigDecimal(Double.toString(v));
625 BigDecimal one = new BigDecimal("1");
626 return b.divide(one,scale,BigDecimal.ROUND_HALF_UP).doubleValue();
627 }
628
629 public double getSingleMetadataSetCompleteness(ArrayList mds_list){
630
631 int totalElement = 0;
632 int totalElementUsed = 0;
633
634 for(int a = 0; a<mds_list.size(); a++){
635 MetadataSet mds = (MetadataSet)mds_list.get(a);
636 ArrayList alist = mds.getIndexsList();
637 int length = alist.size();
638 totalElement = totalElement + length * ms.getDocNum();
639
640 for(int i = 0; i<alist.size(); i++){
641 String name = (String)alist.get(i);
642 totalElementUsed = totalElementUsed + getDocumentUsedElement(name);
643 }
644 }
645 System.out.println(totalElementUsed +" "+ totalElement);
646 double x = (double)totalElementUsed/totalElement;
647 System.out.println(x);
648 Double d1 = new Double(x);
649 System.out.println(d1);
650 Double d2 = new Double(100);
651 Double result = mul(d1,d2);
652 return round(result.doubleValue(),1);
653 }
654
655 public Object[][] getMetadataInfo(MetadataSet mds){
656
657 ArrayList alist = mds.getIndexsList();
658 int rows = alist.size();
659 int cols = 11;
660 int y = 0;
661 Object[][] dataset = new Object[rows][cols];
662
663 String[] list = new String[rows];
664
665 for(int i = 0 ; i < list.length; i++){
666 list[i] = alist.get(i).toString();
667 }
668
669 Arrays.sort(list);
670
671 for(int iu = 0; iu<list.length; iu++){
672 String xi = list[iu];
673 dataset[y][0] = xi ;
674 dataset[y][1] = new Integer(getFrequency(xi));
675 dataset[y][2] = new Integer(getDocumentUsedElement(xi));
676 dataset[y][3] = new Double (Mean(xi));
677 dataset[y][4] = new Double (Median(xi));
678 dataset[y][5] = new Integer(getDistinctNumber(xi));
679 dataset[y][6] = new Integer(getMinRange(xi));
680 dataset[y][7] = new Integer(getMaxRange(xi));
681 dataset[y][8] = new Double (Average(xi));
682 dataset[y][9] = new Integer(getMode(xi));
683 dataset[y][10] = ModeFrequency(xi)+"%";
684 y++;
685
686 }
687 return dataset;
688 }
689
690 public boolean IsElementEmpty(String core_element){
691
692 int[] list = getMetadataRows(core_element);
693 boolean status = true;
694
695 for(int i=0; i<list.length; i++){
696 if(list[i]==1){status = false;}
697 }
698 return status;
699 }
700
701 public boolean IsElementFull(String core_element){
702
703 int[] list = getMetadataRows(core_element);
704 boolean status = true;
705
706 for(int i=0; i<list.length; i++){
707 if(list[i]==0){status = false;}
708 }
709 return status;
710 }
711
712 public ArrayList removeDocument(ArrayList dataset, String[] ids, int number){
713 removedID = new ArrayList();
714
715 int[] metadataLevel =(int[])dataset.get(0);
716 int docIDslength = metadataLevel.length;
717 int[][] valueMap = new int[dataset.size()][docIDslength];
718
719 for(int i = 0; i< docIDslength; i++){
720
721 boolean status = true;
722
723 for(int j = 0; j<dataset.size(); j++){
724 int[] metadataLevelArray = (int[])dataset.get(j);
725 valueMap[j][i] = metadataLevelArray[i];
726 if(metadataLevelArray[i]!=number){status = false;}
727 }
728 if(status == true){
729 for(int j = 0; j<dataset.size(); j++){
730 valueMap[j][i]=-1;
731
732 }
733 removedID.add(ids[i]);
734 }
735 }
736 ArrayList wholeList = new ArrayList();
737
738 for(int i = 0; i<valueMap.length; i++){
739 ArrayList numberList = new ArrayList();
740
741 for(int j = 0; j<valueMap[i].length; j++){
742 numberList.add(new Integer(valueMap[i][j]));
743 }
744 wholeList.add(numberList);
745 }
746
747 for(int i =0; i< wholeList.size(); i++){
748 ArrayList numberList = (ArrayList)wholeList.get(i);
749 Integer value = new Integer(-1);
750 while(numberList.contains(value)){
751 numberList.remove(value);
752 }
753 int[] valueList = new int [numberList.size()];
754
755 for(int j = 0; j< valueList.length; j++){
756 valueList[j] = ((Integer)numberList.get(j)).intValue();
757 }
758 wholeList.remove(i);
759 wholeList.add(i,valueList);
760 }
761 return wholeList;
762 }
763
764 public ArrayList getRemovedID(){
765 return removedID;
766 }
767
768
769 public HashMap getLinks(String[] args, String core_element){
770 Element rootElement = getRootNode(core_element);
771 HashMap hp = new HashMap();
772 NodeList listOfDocument = rootElement.getElementsByTagName("Document");
773 ArrayList tempList = new ArrayList();
774
775
776 for(int i = 0; i<args.length; i++){
777 tempList.add(args[i]);
778 }
779
780 for(int s=0; s<listOfDocument.getLength() ; s++){
781 Node docNode = listOfDocument.item(s);
782 Element docElement = (Element)docNode;
783 NodeList valueList = docElement.getElementsByTagName(valueTag);
784
785 if(valueList.getLength()==1){
786 String id = docNode.getAttributes().item(0).getNodeValue();
787
788 Element valueElement = (Element)valueList.item(0);
789 NodeList textFNList = valueElement.getChildNodes();
790 String text = ((Node)textFNList.item(0)).getNodeValue();
791
792 if(tempList.contains(text)){
793 hp.put(id, text);
794 }
795 }
796 }
797
798 rootElement = getRootNode(urlFile);
799 listOfDocument = rootElement.getElementsByTagName(documentTag);
800 HashMap newHp = new HashMap();
801
802 for(int a=0; a<listOfDocument.getLength(); a++){
803 Node docNode = listOfDocument.item(a);
804 Element docElement = (Element)docNode;
805 NodeList valueList = docElement.getElementsByTagName(valueTag);
806
807 if(valueList.getLength()==1){
808 String id = docNode.getAttributes().item(0).getNodeValue();
809
810 Element valueElement = (Element)valueList.item(0);
811 NodeList textFNList = valueElement.getChildNodes();
812 String text = ((Node)textFNList.item(0)).getNodeValue();
813 newHp.put(text,id);
814 }
815 }
816
817 HashMap tempMap = new HashMap();
818 Collection c = hp.values();
819 Iterator i = c.iterator();
820 while(i.hasNext()){
821 String id = (String)i.next();
822 if(newHp.containsKey(id)){
823 String text = (String)newHp.get(id);
824 if(text.contains("http")){
825 tempMap.put((String)tempMap.get(id),text);
826 }
827 }
828 }
829
830 return tempMap;
831 }
832
833
834 public ArrayList getURLMap(String elementName){
835
836 String core_element =elementName;
837 Element rootElement = getRootNode(core_element);
838 ArrayList alist = new ArrayList();
839
840 if(rootElement.equals(null)){
841 return new ArrayList();
842 }
843
844 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
845
846 if(listOfDocument.getLength()==0){return new ArrayList();}
847
848 //System.out.println(listOfDocument.getLength());
849 for(int a=0; a<listOfDocument.getLength(); a++){
850 Node docNode = listOfDocument.item(a);
851 Element docElement = (Element)docNode;
852 NodeList valueList = docElement.getElementsByTagName(valueTag);
853
854 //System.out.println(valueList.getLength());
855 for(int b=0; b<valueList.getLength(); b++){
856 Element valueElement = (Element)valueList.item(b);
857 NodeList textFNList = valueElement.getChildNodes();
858 String text = ((Node)textFNList.item(0)).getNodeValue();
859
860 if(!text.equals(" ")){
861 NamedNodeMap NodeIDMap = docNode .getAttributes();
862 Node DocNodeID = NodeIDMap.item(0);
863 String DocID = DocNodeID.getNodeValue();
864 if(alist.contains(DocID)){}
865 else{
866 alist.add(DocID);
867 }
868 }
869 }
870 }
871
872 return alist;
873}
874
875 public HashMap getIdentifierLink(String core_element){
876
877 Element rootElement = getRootNode(core_element);
878 HashMap hp = new HashMap();
879 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
880
881 for(int a=0; a<listOfDocument.getLength(); a++){
882 Node docNode = listOfDocument.item(a);
883 String HashID = docNode.getAttributes().item(0).getNodeValue();
884 Element docElement = (Element)docNode;
885 NodeList valueList = docElement.getElementsByTagName(valueTag);
886
887 for(int y = 0; y<valueList.getLength(); y++){
888 Element valueElement = (Element)valueList.item(y);
889 NodeList textFNList = valueElement.getChildNodes();
890 String text = ((Node)textFNList.item(0)).getNodeValue();
891
892 if(!text.equals(" ") && text.startsWith("http:")){
893 if(hp.containsKey(HashID)){
894 InternalLink il = (InternalLink)hp.get(HashID);
895 il.increaseElement(text);
896 hp.put(HashID,il);
897 }
898 else{
899 InternalLink il = new InternalLink();
900 il.setValue(HashID);
901 il.increaseElement(text);
902 hp.put(HashID, il);
903 }
904 }
905 }
906 }
907 return hp;
908 }
909
910
911 public String[] getDocumentIDList(String core_element){
912
913 Element rootElement = getRootNode(core_element);
914 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
915 String[] ids = new String[listOfDocument.getLength()];
916
917 for(int a=0; a<listOfDocument.getLength(); a++){
918 Node docNode = listOfDocument .item(a);
919 NamedNodeMap NodeMap = docNode.getAttributes();
920 Node AttributeNode = NodeMap.item(0);
921 String att_name = AttributeNode.getNodeValue();
922 ids[a] = att_name;
923 }
924 return (String[])ids.clone();
925 }
926
927
928 public HashMap getInternalIdentifier(String core_element){
929
930 Element rootElement = getRootNode(core_element);
931 HashMap hp = new HashMap();
932 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
933
934 for(int a=0; a<listOfDocument.getLength(); a++){
935 Node docNode = listOfDocument.item(a);
936 String HashID = docNode.getAttributes().item(0).getNodeValue();
937
938 Element docElement = (Element)docNode;
939 NodeList valueList = docElement.getElementsByTagName(valueTag);
940
941 for(int y = 0; y<valueList.getLength(); y++){
942 Element valueElement = (Element)valueList.item(y);
943 NodeList textFNList = valueElement.getChildNodes();
944 String text = ((Node)textFNList.item(0)).getNodeValue();
945
946 if(!text.equals(" ")){
947 if(hp.containsKey(text)){
948 InternalLink il = (InternalLink)hp.get(text);
949 il.increaseElement(HashID);
950 hp.put(text,il);
951 }
952 else{
953 InternalLink il = new InternalLink();
954 il.setValue(text);
955 il.increaseElement(HashID);
956 hp.put(text, il);
957 }
958 }
959 }
960 }
961 return hp;
962 }
963
964 public HashMap getIdentifierLinkNoIdentifier(){
965
966 Element rootElement = getRootNode(rootDocument);
967 HashMap hp = new HashMap();
968 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
969 String url ="No Source Available";
970 for(int s=0; s<listOfDocument.getLength() ; s++){
971 Node docNode = listOfDocument.item(s);
972 String HashID = docNode.getAttributes().item(0).getNodeValue();
973 InternalLink il = new InternalLink();
974 il.increaseElement(HashID);
975 hp.put(HashID, il);
976 }
977 return hp;
978 }
979}
Note: See TracBrowser for help on using the repository browser.