source: gs3-extensions/mat/trunk/src/org/greenstone/mat/DataMaker.java@ 21927

Last change on this file since 21927 was 21927, checked in by sjm84, 14 years ago

Renamed package to org.greenstone.mat from org.greenstone.gsdl3_extension.mat

File size: 28.7 KB
Line 
1package org.greenstone.mat;
2
3import java.util.ArrayList;
4import java.util.Arrays;
5import java.util.Collection;
6import java.util.Comparator;
7import java.util.HashMap;
8import java.util.Iterator;
9import java.util.Map;
10import java.util.Set;
11
12import javax.xml.parsers.DocumentBuilder;
13import javax.xml.parsers.DocumentBuilderFactory;
14
15import java.io.File;
16import java.io.PrintWriter;
17import java.math.BigDecimal;
18
19import org.w3c.dom.Document;
20import org.w3c.dom.Element;
21import org.w3c.dom.NamedNodeMap;
22import org.w3c.dom.Node;
23import org.w3c.dom.NodeList;
24
25import org.greenstone.gsdl3.core.MessageRouter;
26import org.greenstone.gsdl3.util.XMLConverter;
27
28public class DataMaker {
29
30 MetadataStats ms;
31 ArrayList nameList = new ArrayList();
32
33 private int Mode = 0;
34 private int TotalDoc = 0;
35 private String path = null;
36
37 protected Document doc=null;
38 protected MessageRouter mr = null;
39 protected XMLConverter converter=null;
40 private ArrayList removedID = new ArrayList();
41
42 private final String rootDocument = "archivedir";
43 private final String documentTag = "Document";
44 private final String frequencyTag ="Frequency";
45 private final String valueTag = "ActualValue";
46 private final String ASCII_sort = "ASCII";
47 private final String urlFile = "dc.Identifier";
48
49 private static final int DEF_DIV_SCALE = 10;
50
51 public DataMaker(MetadataStats stats){
52 ms = stats;
53 nameList = stats.metadataNameList;
54 path = stats.StatsDirectory;
55 setTotalDocNumber();
56 }
57
58 private Element getRootNode(String core_element){
59
60 try{
61 DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
62 DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
63 Document doc = docBuilder.newDocument();
64 doc = docBuilder.parse (new File(path+core_element+".xml"));
65 Element rootNode = doc.getDocumentElement();
66 return rootNode;
67 }catch (Exception e) {
68 e.printStackTrace();
69 return null;
70 }
71 }
72
73 public void setTotalDocNumber(){
74 Element ex = getRootNode(rootDocument);
75 NodeList listOfFrequency = ex.getElementsByTagName(documentTag);
76 TotalDoc = listOfFrequency.getLength();
77 }
78
79 public int getTotalDocNumber(){
80 return TotalDoc;
81 }
82
83 public int getTotalElementUsed(){
84
85 int totalNumber = 0;
86
87 for(int i = 0 ; i<nameList.size(); i++){
88 Element rootElement = getRootNode((String)nameList.get(i));
89 NodeList listOfFrequency = rootElement.getElementsByTagName(frequencyTag);
90
91 for(int a=0; a<listOfFrequency.getLength() ; a++){
92 Node FrequencyNode = listOfFrequency.item(a);
93 NodeList textFNList = FrequencyNode.getChildNodes();
94 String TextNode = textFNList.item(0).getNodeValue();
95 totalNumber = totalNumber + Integer.parseInt(TextNode);
96 }
97 }
98 return totalNumber;
99 }
100
101 public int getFrequency(String core_element){
102
103 int totalNumber = 0;
104 Element rootElement = getRootNode(core_element);
105 NodeList listOfFrequency = rootElement.getElementsByTagName(frequencyTag);
106
107 if(listOfFrequency.getLength()==0){
108 return 0;
109 }
110
111 for(int a=0; a<listOfFrequency.getLength(); a++){
112 Node FrequencyNode = listOfFrequency.item(a);
113 NodeList textFNList = FrequencyNode.getChildNodes();
114 String TextNode = textFNList.item(0).getNodeValue();
115 int count = Integer.parseInt(TextNode);
116 totalNumber = totalNumber + count;
117 }
118 return totalNumber;
119 }
120
121 public int getDistinctNumber(String core_element){
122
123 ArrayList alist = new ArrayList();
124 Element rootElement = getRootNode(core_element);
125 NodeList listOfFrequency = rootElement.getElementsByTagName(valueTag);
126
127 if(listOfFrequency.getLength()==0){
128 return 0;
129 }
130
131 for(int a=0; a<listOfFrequency.getLength(); a++){
132 Node ActualValueNode = listOfFrequency.item(a);
133 NodeList textFNList = ActualValueNode.getChildNodes();
134 String TextNode = textFNList.item(0).getNodeValue();
135
136 if(!alist.contains(TextNode) && !TextNode.equals(" ")){
137 alist.add(TextNode);
138 }
139 }
140 return alist.size();
141 }
142
143 public int getDocumentUsedElement(String core_element){
144
145 int totalNumber = 0;
146 Element rootElement = getRootNode(core_element);
147 NodeList listOfFrequency = rootElement.getElementsByTagName(frequencyTag);
148
149 for(int a=0; a<listOfFrequency.getLength(); a++){
150 totalNumber++;
151 }
152 return totalNumber;
153 }
154
155 public double Mean(String core_element){
156
157 int due = getDocumentUsedElement(core_element);
158
159 if(due==0 || TotalDoc==0){
160 return 0;
161 }
162
163 Double d1 = new Double(due);
164 Double d2 = new Double(TotalDoc);
165 Double result = div(d1,d2);
166 result = mul(result, new Double(100));
167 return round(result.doubleValue(),1);
168 }
169
170 public int getMinRange(String core_element){
171
172 Element rootElement = getRootNode(core_element);
173 NodeList listOfFrequency = rootElement.getElementsByTagName(frequencyTag);
174
175 if(listOfFrequency.getLength()==0){
176 return 0;
177 }
178
179 Node FrequencyNode = listOfFrequency.item(0);
180 NodeList textFNList = FrequencyNode.getChildNodes();
181 String TextNode = textFNList.item(0).getNodeValue();
182
183 int minNumber = 0;
184
185 if(listOfFrequency.getLength()==TotalDoc){
186 minNumber = Integer.parseInt(TextNode);
187 }
188
189 else {
190 minNumber = 0;
191 }
192
193 for(int a=0; a<listOfFrequency.getLength(); a++){
194 FrequencyNode = listOfFrequency.item(a);
195 textFNList = FrequencyNode.getChildNodes();
196 TextNode = textFNList.item(0).getNodeValue();
197 int x = Integer.parseInt(TextNode);
198 if(x<minNumber){minNumber=x;}
199 }
200 return minNumber;
201 }
202
203 public int getMaxRange(String core_element){
204
205 Element rootElement = getRootNode(core_element);
206 NodeList listOfFrequency = rootElement.getElementsByTagName(frequencyTag);
207
208 if(listOfFrequency.getLength()==0){
209 return 0;
210 }
211
212 Node FrequencyNode = listOfFrequency.item(0);
213 NodeList textFNList = FrequencyNode.getChildNodes();
214 String TextNode = textFNList.item(0).getNodeValue();
215 int maxNumber = 0;
216
217 for(int a=0; a<listOfFrequency.getLength(); a++){
218 FrequencyNode = listOfFrequency.item(a);
219 textFNList = FrequencyNode.getChildNodes();
220 TextNode = textFNList.item(0).getNodeValue();
221 int x = Integer.parseInt(TextNode);
222 if(x>maxNumber){maxNumber=x;}
223 }
224 return maxNumber;
225 }
226
227 public int getMode(String core_element){
228
229 Element rootElement = getRootNode(core_element);
230 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
231
232 if(listOfDocument.getLength()==0){
233 Mode = 0;
234 return 0;
235 }
236
237 ArrayList alist = new ArrayList();
238 String[] idList = getDocumentIDs(core_element);
239
240 for(int i = 0; i<idList.length; i++){
241 alist.add(idList[i]);
242 }
243
244 int[] list = new int[TotalDoc];
245
246 for(int i = 0; i<list.length; i++){
247 list[i] = 0;
248 }
249
250 for(int a=0; a<listOfDocument.getLength(); a++){
251 Node docNode = listOfDocument.item(a);
252 NamedNodeMap NodeIDMap = docNode.getAttributes();
253 Node DocNodeID = NodeIDMap.item(0);
254 String DocID = DocNodeID.getNodeValue();
255 Element xNode = (Element)docNode;
256 int location = alist.indexOf(DocID);
257 NodeList xList = xNode.getElementsByTagName(frequencyTag);
258 int frequency = Integer.parseInt(xList.item(0).getChildNodes().item(0).getNodeValue());
259 list[location] = frequency;
260 }
261
262 Arrays.sort(list);
263
264 int max_idx = 0; // Index of the maximum count
265 int max_cnt = 0;
266 int count = 0;
267
268 for ( int i = 0; i <list.length; i++) {
269 count = 0;
270 for ( int j = 0; j < list.length; j++) {
271 if (list[i] == list[j]) {
272 count++;
273 }
274 }
275 if (count > max_cnt) {
276 max_cnt = count;
277 max_idx = i;
278 }
279 }
280 Mode = list [max_idx];
281 return list [max_idx];
282 }
283
284 public double ModeFrequency(String core_element){
285
286 Element rootElement = getRootNode(core_element);
287 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
288
289 if(listOfDocument.getLength()==0){
290 return 100;
291 }
292
293 ArrayList alist = new ArrayList();
294 String[] idList = getDocumentIDs(core_element);
295
296 for(int i = 0; i<idList.length; i++){
297 alist.add(idList[i]);
298 }
299
300 int[] list = new int[TotalDoc];
301
302 for(int i = 0; i<list.length; i++){
303 list[i] = 0;
304 }
305
306 int length = alist.size();
307 int counter = 0;
308
309 for(int a=0; a<listOfDocument.getLength(); a++){
310 Node docNode = listOfDocument.item(a);
311 NamedNodeMap NodeIDMap = docNode.getAttributes();
312 Node DocNodeID = NodeIDMap.item(0);
313 String DocID = DocNodeID.getNodeValue();
314 Element xNode = (Element)docNode;
315 int location = alist.indexOf(DocID);
316 NodeList xList = xNode.getElementsByTagName(frequencyTag);
317 int frequency = Integer.parseInt(xList.item(0).getChildNodes().item(0).getNodeValue());
318 list[location] = frequency;
319 }
320
321 for(int i =0; i<list.length; i++){
322 if(list[i]==Mode){
323 counter++;
324 }
325 }
326
327 Double result = div(new Double(counter), new Double(length));
328 result = mul(result ,new Double (100));
329 return round(result.doubleValue(),1);
330 }
331
332 public double Median(String core_element){
333
334 Element rootElement = getRootNode(core_element);
335 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
336
337 ArrayList alist = new ArrayList();
338 String[] idList = getDocumentIDs(core_element);
339
340 for(int i = 0; i<idList.length; i++){
341 alist.add(idList[i]);
342 }
343
344 int[] list = new int[TotalDoc];
345
346 for(int i = 0; i<list.length; i++){
347 list[i] = 0;
348 }
349
350 for(int a=0; a<listOfDocument.getLength(); a++){
351 Node docNode = listOfDocument.item(a);
352 NamedNodeMap NodeIDMap = docNode.getAttributes();
353 Node DocNodeID = NodeIDMap.item(0);
354 String DocID = DocNodeID.getNodeValue();
355 Element xNode = (Element)docNode;
356 int location = alist.indexOf(DocID);
357 NodeList xList = xNode.getElementsByTagName(frequencyTag);
358 int frequency = Integer.parseInt(xList.item(0).getChildNodes().item(0).getNodeValue());
359 list[location] = frequency;
360 }
361
362 int length = alist.size();
363 int middle = length/2 -1;
364
365 Arrays.sort(list);
366
367 if(length % 2 == 1){
368 middle = middle + 1;
369 return list[middle];
370 }
371
372 else{
373 return round((double)(list[middle]+list[middle+1])/2,1);
374 }
375 }
376
377 public double Average(String core_element){
378
379 int t1 = getFrequency(core_element);
380 int t2 = getDocumentUsedElement(core_element);
381
382 if(t1==0 || t2==0){
383 return 0;
384 }
385
386 Double result = div(new Double(t1),new Double(t2));
387 return round(result.doubleValue(),1);
388 }
389
390 public Object[][] AllInformation(){
391
392 int rows = nameList.size();
393 int cols = 11;
394 int y = 0;
395
396 Object[][] info = new Object[rows][cols];
397 String[] list = new String[rows];
398
399 for(int i = 0 ; i < list.length; i++){
400 list[i] = nameList.get(i).toString();
401 }
402
403 Arrays.sort(list);
404
405 for(int iu = 0; iu<list.length; iu++){
406 String xi = list[iu];
407 info[y][0] = xi ;
408 info[y][1] = new Integer(getFrequency(xi));
409 info[y][2] = new Integer(getDocumentUsedElement(xi));
410 info[y][3] = new Double(Mean(xi));
411 info[y][4] = new Double(Median(xi));
412 info[y][5] = new Integer(getDistinctNumber(xi));
413 info[y][6] = new Integer(getMinRange(xi));
414 info[y][7] = new Integer(getMaxRange(xi));
415 info[y][8] = new Double(Average(xi));
416 info[y][9] = new Integer(getMode(xi));
417 info[y][10] = ModeFrequency(xi)+"%";
418 y++;
419 }
420 return info;
421 }
422
423 public String[] getSortList(String core_element,String sort){
424
425 if(sort.equals(ASCII_sort)){
426 HashMap hp = getDistinctValueMap (core_element);
427 String[] temp = new String[hp.size()];
428 int counter = 0;
429 Set s = hp.keySet();
430 Iterator i = s.iterator();
431
432 while(i.hasNext()){
433 temp[counter] = (String)i.next();
434 counter++;
435 }
436
437 Arrays.sort(temp);
438 return temp;
439 }
440
441 else{
442 Map m = getDistinctValueMap (core_element);
443 ArrayList outputList = sortMap(m);
444 String[] temp = new String[outputList.size()];
445
446 for(int i = 0; i< outputList.size(); i++){
447 Map.Entry entry = (Map.Entry) outputList.get(i);
448 temp[i] = (String) entry.getKey();
449 }
450
451 return temp;
452 }
453 }
454
455
456 public HashMap getDistinctValueMap(String core_element){
457
458 Element rootElement = getRootNode(core_element);
459 HashMap hp = new HashMap();
460 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
461
462 for(int a=0; a<listOfDocument.getLength(); a++){
463
464 Node docNode = listOfDocument.item(a);
465 Element docElement = (Element)docNode;
466 NodeList valueList = docElement.getElementsByTagName(valueTag);
467
468 for(int b= 0; b<valueList.getLength(); b++){
469
470 Element valueElement = (Element)valueList.item(b);
471 NodeList textFNList = valueElement.getChildNodes();
472 String text = ((Node)textFNList.item(0)).getNodeValue();
473
474 if(!text.equals(" ")){
475 if(hp.containsKey(text)){
476 Integer i = (Integer)hp.get(text);
477 int number = i.intValue();
478 number++;
479 hp.put(text,new Integer(number));
480 }
481 else{
482 Integer i = new Integer(1);
483 hp.put(text, i);
484 }
485 }
486 }
487 }
488 return hp;
489 }
490
491
492 public HashMap getDocFrequencyMap(String core_element){
493
494 Element rootElement = getRootNode(core_element);
495 HashMap hp = new HashMap();
496 NodeList listOfDocument= rootElement.getElementsByTagName(documentTag);
497
498 for(int a=0; a<listOfDocument.getLength(); a++){
499
500 Node docNode = listOfDocument.item(a);
501 NamedNodeMap NodeMap = docNode.getAttributes();
502 Node AttributeNode = NodeMap.item(0);
503 String att_name = AttributeNode.getNodeValue();
504
505 Element docElement = (Element)docNode;
506 NodeList valueList = docElement.getElementsByTagName(frequencyTag);
507 Element frequencyElement = (Element)valueList.item(0);
508 NodeList textFNList = frequencyElement.getChildNodes();
509 String text = ((Node)textFNList.item(0)).getNodeValue();
510 Integer i = new Integer(Integer.parseInt(text));
511 hp.put(att_name, i);
512 }
513 return hp;
514 }
515
516
517 public String[] getDocumentIDs(String core_element){
518
519 Element rootElement = getRootNode(rootDocument);
520 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
521 String[] ids = new String[listOfDocument.getLength()];
522
523 for(int a=0; a<listOfDocument.getLength(); a++){
524 Node docNode = listOfDocument.item(a);
525 NamedNodeMap NodeMap = docNode.getAttributes();
526 Node AttributeNode = NodeMap.item(0);
527 String att_name = AttributeNode.getNodeValue();
528 ids[a] = att_name;
529 }
530 return (String[])ids.clone();
531 }
532
533 public int[] getMetadataRows(String core_element){
534 Element rootElement = getRootNode(core_element);
535 ArrayList alist = new ArrayList();
536 String[] idsx = getDocumentIDs(core_element);
537
538 for(int i = 0; i<idsx.length; i++){
539 alist.add(idsx[i]);
540 }
541
542 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
543 int[] row = new int[TotalDoc];
544
545 for(int i = 0; i<row.length; i++){
546 row [i] = 0;
547 }
548
549 for(int a=0; a<listOfDocument.getLength(); a++){
550 Node docNode = listOfDocument.item(a);
551 int location = alist.indexOf(docNode.getAttributes().item(0).getNodeValue());
552 row[location] = 1;
553 }
554 return row;
555 }
556
557 /**
558 * This method will use Arrays.sort for sorting Map
559 * @param map
560 * @return outputList of Map.Entries
561 */
562
563 public ArrayList sortMap(Map map) {
564 ArrayList outputList = null;
565 int count = 0;
566 Set set = null;
567 Map.Entry[] entries = null;
568 // Logic:
569 // get a set from Map
570 // Build a Map.Entry[] from set
571 // Sort the list using Arrays.sort
572 // Add the sorted Map.Entries into arrayList and return
573
574 set = (Set) map.entrySet();
575 Iterator iterator = set.iterator();
576 entries = new Map.Entry[set.size()];
577 while(iterator.hasNext()) {
578 entries[count++] = (Map.Entry) iterator.next();
579 }
580
581 // Sort the entries with your own comparator for the values:
582 Arrays.sort(entries, new Comparator() {
583 public int compareTo(Object lhs, Object rhs) {
584 Map.Entry le = (Map.Entry)lhs;
585 Map.Entry re = (Map.Entry)rhs;
586 return ((Comparable)le.getValue()).compareTo((Comparable)re.getValue());
587 }
588
589 public int compare(Object lhs, Object rhs) {
590 Map.Entry le = (Map.Entry)lhs;
591 Map.Entry re = (Map.Entry)rhs;
592 return ((Comparable)le.getValue()).compareTo((Comparable)re.getValue());
593 }
594 });
595
596 outputList = new ArrayList();
597 for(int i = 0; i < entries.length; i++) {
598 outputList.add(entries[i]);
599 }
600 return outputList;
601 }//End of sortMap
602
603 private Double div(Double d1, Double d2){
604 BigDecimal b1 = new BigDecimal(d1.toString());
605 BigDecimal b2 = new BigDecimal(d2.toString());
606 return new Double(b1.divide(b2,DEF_DIV_SCALE,BigDecimal.ROUND_HALF_UP).doubleValue());
607 }
608
609 private Double mul(Double d1,Double d2){
610 //System.out.println(d1);
611 BigDecimal b1 = new BigDecimal(d1.toString());
612 BigDecimal b2 = new BigDecimal(d2.toString());
613 return new Double(b1.multiply(b2).doubleValue());
614 }
615
616 public double round(double v,int scale){
617 if(scale<0){
618 throw new IllegalArgumentException(
619 "The scale must be a positive integer or zero");
620 }
621 BigDecimal b = new BigDecimal(Double.toString(v));
622 BigDecimal one = new BigDecimal("1");
623 return b.divide(one,scale,BigDecimal.ROUND_HALF_UP).doubleValue();
624 }
625
626 public double getSingleMetadataSetCompleteness(ArrayList mds_list){
627
628 int totalElement = 0;
629 int totalElementUsed = 0;
630
631 for(int a = 0; a<mds_list.size(); a++){
632 MetadataSet mds = (MetadataSet)mds_list.get(a);
633 ArrayList alist = mds.getIndexsList();
634 int length = alist.size();
635 totalElement = totalElement + length * ms.getDocNum();
636
637 for(int i = 0; i<alist.size(); i++){
638 String name = (String)alist.get(i);
639 totalElementUsed = totalElementUsed + getDocumentUsedElement(name);
640 }
641 }
642 //System.out.println(totalElementUsed +" "+ totalElement);
643 double x = (double)totalElementUsed/totalElement;
644 //System.out.println(x);
645 Double d1 = new Double(x);
646 //System.out.println(d1);
647 Double d2 = new Double(100);
648 Double result = mul(d1,d2);
649 return round(result.doubleValue(),1);
650 }
651
652 public Object[][] getMetadataInfo(MetadataSet mds){
653
654 ArrayList alist = mds.getIndexsList();
655 int rows = alist.size();
656 int cols = 11;
657 int y = 0;
658 Object[][] dataset = new Object[rows][cols];
659
660 String[] list = new String[rows];
661
662 for(int i = 0 ; i < list.length; i++){
663 list[i] = alist.get(i).toString();
664 }
665
666 Arrays.sort(list);
667
668 for(int iu = 0; iu<list.length; iu++){
669 String xi = list[iu];
670 dataset[y][0] = xi ;
671 dataset[y][1] = new Integer(getFrequency(xi));
672 dataset[y][2] = new Integer(getDocumentUsedElement(xi));
673 dataset[y][3] = new Double (Mean(xi));
674 dataset[y][4] = new Double (Median(xi));
675 dataset[y][5] = new Integer(getDistinctNumber(xi));
676 dataset[y][6] = new Integer(getMinRange(xi));
677 dataset[y][7] = new Integer(getMaxRange(xi));
678 dataset[y][8] = new Double (Average(xi));
679 dataset[y][9] = new Integer(getMode(xi));
680 dataset[y][10] = ModeFrequency(xi)+"%";
681 y++;
682
683 }
684 return dataset;
685 }
686
687 public boolean IsElementEmpty(String core_element){
688
689 int[] list = getMetadataRows(core_element);
690 boolean status = true;
691
692 for(int i=0; i<list.length; i++){
693 if(list[i]==1){status = false;}
694 }
695 return status;
696 }
697
698 public boolean IsElementFull(String core_element){
699
700 int[] list = getMetadataRows(core_element);
701 boolean status = true;
702
703 for(int i=0; i<list.length; i++){
704 if(list[i]==0){status = false;}
705 }
706 return status;
707 }
708
709 public ArrayList removeDocument(ArrayList dataset, String[] ids, int number){
710 removedID = new ArrayList();
711
712 int[] metadataLevel =(int[])dataset.get(0);
713 int docIDslength = metadataLevel.length;
714 int[][] valueMap = new int[dataset.size()][docIDslength];
715
716 for(int i = 0; i< docIDslength; i++){
717
718 boolean status = true;
719
720 for(int j = 0; j<dataset.size(); j++){
721 int[] metadataLevelArray = (int[])dataset.get(j);
722 valueMap[j][i] = metadataLevelArray[i];
723 if(metadataLevelArray[i]!=number){status = false;}
724 }
725 if(status == true){
726 for(int j = 0; j<dataset.size(); j++){
727 valueMap[j][i]=-1;
728
729 }
730 removedID.add(ids[i]);
731 }
732 }
733 ArrayList wholeList = new ArrayList();
734
735 for(int i = 0; i<valueMap.length; i++){
736 ArrayList numberList = new ArrayList();
737
738 for(int j = 0; j<valueMap[i].length; j++){
739 numberList.add(new Integer(valueMap[i][j]));
740 }
741 wholeList.add(numberList);
742 }
743
744 for(int i =0; i< wholeList.size(); i++){
745 ArrayList numberList = (ArrayList)wholeList.get(i);
746 Integer value = new Integer(-1);
747 while(numberList.contains(value)){
748 numberList.remove(value);
749 }
750 int[] valueList = new int [numberList.size()];
751
752 for(int j = 0; j< valueList.length; j++){
753 valueList[j] = ((Integer)numberList.get(j)).intValue();
754 }
755 wholeList.remove(i);
756 wholeList.add(i,valueList);
757 }
758 return wholeList;
759 }
760
761 public ArrayList getRemovedID(){
762 return removedID;
763 }
764
765
766 public HashMap getLinks(String[] args, String core_element){
767 Element rootElement = getRootNode(core_element);
768 HashMap hp = new HashMap();
769 NodeList listOfDocument = rootElement.getElementsByTagName("Document");
770 ArrayList tempList = new ArrayList();
771
772
773 for(int i = 0; i<args.length; i++){
774 tempList.add(args[i]);
775 }
776
777 for(int s=0; s<listOfDocument.getLength() ; s++){
778 Node docNode = listOfDocument.item(s);
779 Element docElement = (Element)docNode;
780 NodeList valueList = docElement.getElementsByTagName(valueTag);
781
782 if(valueList.getLength()==1){
783 String id = docNode.getAttributes().item(0).getNodeValue();
784
785 Element valueElement = (Element)valueList.item(0);
786 NodeList textFNList = valueElement.getChildNodes();
787 String text = ((Node)textFNList.item(0)).getNodeValue();
788
789 if(tempList.contains(text)){
790 hp.put(id, text);
791 }
792 }
793 }
794
795 rootElement = getRootNode(urlFile);
796 listOfDocument = rootElement.getElementsByTagName(documentTag);
797 HashMap newHp = new HashMap();
798
799 for(int a=0; a<listOfDocument.getLength(); a++){
800 Node docNode = listOfDocument.item(a);
801 Element docElement = (Element)docNode;
802 NodeList valueList = docElement.getElementsByTagName(valueTag);
803
804 if(valueList.getLength()==1){
805 String id = docNode.getAttributes().item(0).getNodeValue();
806
807 Element valueElement = (Element)valueList.item(0);
808 NodeList textFNList = valueElement.getChildNodes();
809 String text = ((Node)textFNList.item(0)).getNodeValue();
810 newHp.put(text,id);
811 }
812 }
813
814 HashMap tempMap = new HashMap();
815 Collection c = hp.values();
816 Iterator i = c.iterator();
817 while(i.hasNext()){
818 String id = (String)i.next();
819 if(newHp.containsKey(id)){
820 String text = (String)newHp.get(id);
821 if(text.indexOf("http")!=-1){
822 tempMap.put((String)tempMap.get(id),text);
823 }
824 }
825 }
826
827 return tempMap;
828 }
829
830
831 public ArrayList getURLMap(String elementName){
832
833 String core_element =elementName;
834 Element rootElement = getRootNode(core_element);
835 ArrayList alist = new ArrayList();
836
837 if(rootElement.equals(null)){
838 return new ArrayList();
839 }
840
841 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
842
843 if(listOfDocument.getLength()==0){return new ArrayList();}
844
845 //System.out.println(listOfDocument.getLength());
846 for(int a=0; a<listOfDocument.getLength(); a++){
847 Node docNode = listOfDocument.item(a);
848 Element docElement = (Element)docNode;
849 NodeList valueList = docElement.getElementsByTagName(valueTag);
850
851 //System.out.println(valueList.getLength());
852 for(int b=0; b<valueList.getLength(); b++){
853 Element valueElement = (Element)valueList.item(b);
854 NodeList textFNList = valueElement.getChildNodes();
855 String text = ((Node)textFNList.item(0)).getNodeValue();
856
857 if(!text.equals(" ")){
858 NamedNodeMap NodeIDMap = docNode .getAttributes();
859 Node DocNodeID = NodeIDMap.item(0);
860 String DocID = DocNodeID.getNodeValue();
861 if(alist.contains(DocID)){}
862 else{
863 alist.add(DocID);
864 }
865 }
866 }
867 }
868
869 return alist;
870}
871
872 public HashMap getIdentifierLink(String core_element){
873
874 Element rootElement = getRootNode(core_element);
875 HashMap hp = new HashMap();
876 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
877
878 for(int a=0; a<listOfDocument.getLength(); a++){
879 Node docNode = listOfDocument.item(a);
880 String HashID = docNode.getAttributes().item(0).getNodeValue();
881 Element docElement = (Element)docNode;
882 NodeList valueList = docElement.getElementsByTagName(valueTag);
883
884 for(int y = 0; y<valueList.getLength(); y++){
885 Element valueElement = (Element)valueList.item(y);
886 NodeList textFNList = valueElement.getChildNodes();
887 String text = ((Node)textFNList.item(0)).getNodeValue();
888
889 if(!text.equals(" ") && text.startsWith("http:")){
890 if(hp.containsKey(HashID)){
891 InternalLink il = (InternalLink)hp.get(HashID);
892 il.increaseElement(text);
893 hp.put(HashID,il);
894 }
895 else{
896 InternalLink il = new InternalLink();
897 il.setValue(HashID);
898 il.increaseElement(text);
899 hp.put(HashID, il);
900 }
901 }
902 }
903 }
904 return hp;
905 }
906
907
908 public String[] getDocumentIDList(String core_element){
909
910 Element rootElement = getRootNode(core_element);
911 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
912 String[] ids = new String[listOfDocument.getLength()];
913
914 for(int a=0; a<listOfDocument.getLength(); a++){
915 Node docNode = listOfDocument .item(a);
916 NamedNodeMap NodeMap = docNode.getAttributes();
917 Node AttributeNode = NodeMap.item(0);
918 String att_name = AttributeNode.getNodeValue();
919 ids[a] = att_name;
920 }
921 return (String[])ids.clone();
922 }
923
924
925 public HashMap getInternalIdentifier(String core_element){
926
927 Element rootElement = getRootNode(core_element);
928 HashMap hp = new HashMap();
929 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
930
931 for(int a=0; a<listOfDocument.getLength(); a++){
932 Node docNode = listOfDocument.item(a);
933 String HashID = docNode.getAttributes().item(0).getNodeValue();
934
935 Element docElement = (Element)docNode;
936 NodeList valueList = docElement.getElementsByTagName(valueTag);
937
938 for(int y = 0; y<valueList.getLength(); y++){
939 Element valueElement = (Element)valueList.item(y);
940 NodeList textFNList = valueElement.getChildNodes();
941 String text = ((Node)textFNList.item(0)).getNodeValue();
942
943 if(!text.equals(" ")){
944 if(hp.containsKey(text)){
945 InternalLink il = (InternalLink)hp.get(text);
946 il.increaseElement(HashID);
947 hp.put(text,il);
948 }
949 else{
950 InternalLink il = new InternalLink();
951 il.setValue(text);
952 il.increaseElement(HashID);
953 hp.put(text, il);
954 }
955 }
956 }
957 }
958 return hp;
959 }
960
961 public HashMap getIdentifierLinkNoIdentifier(){
962
963 Element rootElement = getRootNode(rootDocument);
964 HashMap hp = new HashMap();
965 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
966 String url ="No Source Available";
967 for(int s=0; s<listOfDocument.getLength() ; s++){
968 Node docNode = listOfDocument.item(s);
969 String HashID = docNode.getAttributes().item(0).getNodeValue();
970 InternalLink il = new InternalLink();
971 il.increaseElement(HashID);
972 hp.put(HashID, il);
973 }
974 return hp;
975 }
976}
Note: See TracBrowser for help on using the repository browser.