source: other-projects/trunk/greenstone3-extension/mat/src/org/greenstone/gsdl3_extension/mat/DataMaker.java@ 18093

Last change on this file since 18093 was 18093, checked in by cc108, 15 years ago

new Mat source code

File size: 27.9 KB
Line 
1package org.greenstone.gsdl3_extension.mat;
2
3import java.util.ArrayList;
4import java.util.Arrays;
5import java.util.Collection;
6import java.util.Comparator;
7import java.util.HashMap;
8import java.util.Iterator;
9import java.util.Map;
10import java.util.Set;
11
12import javax.xml.parsers.DocumentBuilder;
13import javax.xml.parsers.DocumentBuilderFactory;
14
15import java.io.File;
16import java.io.PrintWriter;
17import java.math.BigDecimal;
18
19import org.w3c.dom.Document;
20import org.w3c.dom.Element;
21import org.w3c.dom.NamedNodeMap;
22import org.w3c.dom.Node;
23import org.w3c.dom.NodeList;
24
25import org.greenstone.gsdl3.core.MessageRouter;
26import org.greenstone.gsdl3.util.XMLConverter;
27
28public class DataMaker {
29
30 MetadataStats ms;
31 ArrayList nameList = new ArrayList();
32
33 private int Mode = 0;
34 private int TotalDoc = 0;
35 private String path = null;
36
37 protected Document doc=null;
38 protected MessageRouter mr = null;
39 protected XMLConverter converter=null;
40 private ArrayList removedID = new ArrayList();
41
42 private final String rootDocument = "archivedir";
43 private final String documentTag = "Document";
44 private final String frequencyTag ="Frequency";
45 private final String valueTag = "ActualValue";
46 private final String ASCII_sort = "ASCII";
47 private final String urlFile = "dc.Identifier";
48
49 private static final int DEF_DIV_SCALE = 10;
50
51 public DataMaker(MetadataStats stats){
52 ms = stats;
53 nameList = stats.metadataNameList;
54 path = stats.StatsDirectory;
55 setTotalDocNumber();
56 }
57
58 private Element getRootNode(String core_element){
59
60 try{
61 DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
62 DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
63 Document doc = docBuilder.newDocument();
64 doc = docBuilder.parse (new File(path+core_element+".xml"));
65 Element rootNode = doc.getDocumentElement();
66 return rootNode;
67 }catch (Exception e) {
68 e.printStackTrace();
69 return null;
70 }
71 }
72
73 public void setTotalDocNumber(){
74 Element ex = getRootNode(rootDocument);
75 NodeList listOfFrequency = ex.getElementsByTagName(documentTag);
76 TotalDoc = listOfFrequency.getLength();
77 }
78
79 public int getTotalDocNumber(){
80 return TotalDoc;
81 }
82
83 public int getTotalElementUsed(){
84
85 int totalNumber = 0;
86
87 for(int i = 0 ; i<nameList.size(); i++){
88 Element rootElement = getRootNode((String)nameList.get(i));
89 NodeList listOfFrequency = rootElement.getElementsByTagName(frequencyTag);
90
91 for(int a=0; a<listOfFrequency.getLength() ; a++){
92 Node FrequencyNode = listOfFrequency.item(a);
93 NodeList textFNList = FrequencyNode.getChildNodes();
94 String TextNode = textFNList.item(0).getNodeValue();
95 totalNumber = totalNumber + Integer.parseInt(TextNode);
96 }
97 }
98 return totalNumber;
99 }
100
101 public int getFrequency(String core_element){
102
103 int totalNumber = 0;
104 Element rootElement = getRootNode(core_element);
105 NodeList listOfFrequency = rootElement.getElementsByTagName(frequencyTag);
106
107 if(listOfFrequency.getLength()==0){
108 return 0;
109 }
110
111 for(int a=0; a<listOfFrequency.getLength(); a++){
112 Node FrequencyNode = listOfFrequency.item(a);
113 NodeList textFNList = FrequencyNode.getChildNodes();
114 String TextNode = textFNList.item(0).getNodeValue();
115 int count = Integer.parseInt(TextNode);
116 totalNumber = totalNumber + count;
117 }
118 return totalNumber;
119 }
120
121 public int getDistinctNumber(String core_element){
122
123 ArrayList alist = new ArrayList();
124 Element rootElement = getRootNode(core_element);
125 NodeList listOfFrequency = rootElement.getElementsByTagName(valueTag);
126
127 if(listOfFrequency.getLength()==0){
128 return 0;
129 }
130
131 for(int a=0; a<listOfFrequency.getLength(); a++){
132 Node ActualValueNode = listOfFrequency.item(a);
133 NodeList textFNList = ActualValueNode.getChildNodes();
134 String TextNode = textFNList.item(0).getNodeValue();
135
136 if(!alist.contains(TextNode) && !TextNode.equals(" ")){
137 alist.add(TextNode);
138 }
139 }
140 return alist.size();
141 }
142
143 public int getDocumentUsedElement(String core_element){
144
145 int totalNumber = 0;
146 Element rootElement = getRootNode(core_element);
147 NodeList listOfFrequency = rootElement.getElementsByTagName(frequencyTag);
148
149 for(int a=0; a<listOfFrequency.getLength(); a++){
150 totalNumber++;
151 }
152 return totalNumber;
153 }
154
155 public double Mean(String core_element){
156
157 int due = getDocumentUsedElement(core_element);
158
159 if(due==0 || TotalDoc==0){
160 return 0;
161 }
162
163 Double d1 = new Double(due);
164 Double d2 = new Double(TotalDoc);
165 Double result = div(d1,d2);
166 result = mul(result, new Double(100));
167 return round(result.doubleValue(),1);
168 }
169
170 public int getMinRange(String core_element){
171
172 Element rootElement = getRootNode(core_element);
173 NodeList listOfFrequency = rootElement.getElementsByTagName(frequencyTag);
174
175 if(listOfFrequency.getLength()==0){
176 return 0;
177 }
178
179 Node FrequencyNode = listOfFrequency.item(0);
180 NodeList textFNList = FrequencyNode.getChildNodes();
181 String TextNode = textFNList.item(0).getNodeValue();
182
183 int minNumber = 0;
184
185 if(listOfFrequency.getLength()==TotalDoc){
186 minNumber = Integer.parseInt(TextNode);
187 }
188
189 else {
190 minNumber = 0;
191 }
192
193 for(int a=0; a<listOfFrequency.getLength(); a++){
194 FrequencyNode = listOfFrequency.item(a);
195 textFNList = FrequencyNode.getChildNodes();
196 TextNode = textFNList.item(0).getNodeValue();
197 int x = Integer.parseInt(TextNode);
198 if(x<minNumber){minNumber=x;}
199 }
200 return minNumber;
201 }
202
203 public int getMaxRange(String core_element){
204
205 Element rootElement = getRootNode(core_element);
206 NodeList listOfFrequency = rootElement.getElementsByTagName(frequencyTag);
207
208 if(listOfFrequency.getLength()==0){
209 return 0;
210 }
211
212 Node FrequencyNode = listOfFrequency.item(0);
213 NodeList textFNList = FrequencyNode.getChildNodes();
214 String TextNode = textFNList.item(0).getNodeValue();
215 int maxNumber = 0;
216
217 for(int a=0; a<listOfFrequency.getLength(); a++){
218 FrequencyNode = listOfFrequency.item(a);
219 textFNList = FrequencyNode.getChildNodes();
220 TextNode = textFNList.item(0).getNodeValue();
221 int x = Integer.parseInt(TextNode);
222 if(x>maxNumber){maxNumber=x;}
223 }
224 return maxNumber;
225 }
226
227 public int getMode(String core_element){
228
229 Element rootElement = getRootNode(core_element);
230 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
231
232 if(listOfDocument.getLength()==0){
233 Mode = 0;
234 return 0;
235 }
236
237 ArrayList alist = new ArrayList();
238 String[] idList = getDocumentIDs(core_element);
239
240 for(int i = 0; i<idList.length; i++){
241 alist.add(idList[i]);
242 }
243
244 int[] list = new int[TotalDoc];
245
246 for(int i = 0; i<list.length; i++){
247 list[i] = 0;
248 }
249
250 for(int a=0; a<listOfDocument.getLength(); a++){
251 Node docNode = listOfDocument.item(a);
252 NamedNodeMap NodeIDMap = docNode.getAttributes();
253 Node DocNodeID = NodeIDMap.item(0);
254 String DocID = DocNodeID.getNodeValue();
255 Element xNode = (Element)docNode;
256 int location = alist.indexOf(DocID);
257 NodeList xList = xNode.getElementsByTagName(frequencyTag);
258 int frequency = Integer.parseInt(xList.item(0).getChildNodes().item(0).getNodeValue());
259 list[location] = frequency;
260 }
261
262 Arrays.sort(list);
263
264 int max_idx = 0; // Index of the maximum count
265 int max_cnt = 0;
266 int count = 0;
267
268 for ( int i = 0; i <list.length; i++) {
269 count = 0;
270 for ( int j = 0; j < list.length; j++) {
271 if (list[i] == list[j]) {
272 count++;
273 }
274 }
275 if (count > max_cnt) {
276 max_cnt = count;
277 max_idx = i;
278 }
279 }
280 Mode = list [max_idx];
281 return list [max_idx];
282 }
283
284 public double ModeFrequency(String core_element){
285
286 Element rootElement = getRootNode(core_element);
287 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
288
289 if(listOfDocument.getLength()==0){
290 return 100;
291 }
292
293 ArrayList alist = new ArrayList();
294 String[] idList = getDocumentIDs(core_element);
295
296 for(int i = 0; i<idList.length; i++){
297 alist.add(idList[i]);
298 }
299
300 int[] list = new int[TotalDoc];
301
302 for(int i = 0; i<list.length; i++){
303 list[i] = 0;
304 }
305
306 int length = alist.size();
307 int counter = 0;
308
309 for(int a=0; a<listOfDocument.getLength(); a++){
310 Node docNode = listOfDocument.item(a);
311 NamedNodeMap NodeIDMap = docNode.getAttributes();
312 Node DocNodeID = NodeIDMap.item(0);
313 String DocID = DocNodeID.getNodeValue();
314 Element xNode = (Element)docNode;
315 int location = alist.indexOf(DocID);
316 NodeList xList = xNode.getElementsByTagName(frequencyTag);
317 int frequency = Integer.parseInt(xList.item(0).getChildNodes().item(0).getNodeValue());
318 list[location] = frequency;
319 }
320
321 for(int i =0; i<list.length; i++){
322 if(list[i]==Mode){
323 counter++;
324 }
325 }
326
327 Double result = div(new Double(counter), new Double(length));
328 result = mul(result ,new Double (100));
329 return round(result.doubleValue(),1);
330 }
331
332 public double Median(String core_element){
333
334 Element rootElement = getRootNode(core_element);
335 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
336
337 ArrayList alist = new ArrayList();
338 String[] idList = getDocumentIDs(core_element);
339
340 for(int i = 0; i<idList.length; i++){
341 alist.add(idList[i]);
342 }
343
344 int[] list = new int[TotalDoc];
345
346 for(int i = 0; i<list.length; i++){
347 list[i] = 0;
348 }
349
350 for(int a=0; a<listOfDocument.getLength(); a++){
351 Node docNode = listOfDocument.item(a);
352 NamedNodeMap NodeIDMap = docNode.getAttributes();
353 Node DocNodeID = NodeIDMap.item(0);
354 String DocID = DocNodeID.getNodeValue();
355 Element xNode = (Element)docNode;
356 int location = alist.indexOf(DocID);
357 NodeList xList = xNode.getElementsByTagName(frequencyTag);
358 int frequency = Integer.parseInt(xList.item(0).getChildNodes().item(0).getNodeValue());
359 list[location] = frequency;
360 }
361
362 int length = alist.size();
363 int middle = length/2 -1;
364
365 Arrays.sort(list);
366
367 if(length % 2 == 1){
368 middle = middle + 1;
369 return list[middle];
370 }
371
372 else{
373 return round((double)(list[middle]+list[middle+1])/2,1);
374 }
375 }
376
377 public double Average(String core_element){
378
379 int t1 = getFrequency(core_element);
380 int t2 = getDocumentUsedElement(core_element);
381
382 if(t1==0 || t2==0){
383 return 0;
384 }
385
386 Double result = div(new Double(t1),new Double(t2));
387 return round(result.doubleValue(),1);
388 }
389
390 public Object[][] AllInformation(){
391
392 int rows = nameList.size();
393 int cols = 11;
394 int y = 0;
395
396 Object[][] info = new Object[rows][cols];
397 String[] list = new String[rows];
398
399 for(int i = 0 ; i < list.length; i++){
400 list[i] = nameList.get(i).toString();
401 }
402
403 Arrays.sort(list);
404
405 for(int iu = 0; iu<list.length; iu++){
406 String xi = list[iu];
407 info[y][0] = xi ;
408 info[y][1] = new Integer(getFrequency(xi));
409 info[y][2] = new Integer(getDocumentUsedElement(xi));
410 info[y][3] = new Double(Mean(xi));
411 info[y][4] = new Double(Median(xi));
412 info[y][5] = new Integer(getDistinctNumber(xi));
413 info[y][6] = new Integer(getMinRange(xi));
414 info[y][7] = new Integer(getMaxRange(xi));
415 info[y][8] = new Double(Average(xi));
416 info[y][9] = new Integer(getMode(xi));
417 info[y][10] = ModeFrequency(xi)+"%";
418 y++;
419 }
420 return info;
421 }
422
423 public String[] getSortList(String core_element,String sort){
424
425 if(sort.equals(ASCII_sort)){
426 HashMap hp = getDistinctValueMap (core_element);
427 String[] temp = new String[hp.size()];
428 int counter = 0;
429 Set s = hp.keySet();
430 Iterator i = s.iterator();
431
432 while(i.hasNext()){
433 temp[counter] = (String)i.next();
434 counter++;
435 }
436
437 Arrays.sort(temp);
438 return temp;
439 }
440
441 else{
442 Map m = getDistinctValueMap (core_element);
443 ArrayList outputList = sortMap(m);
444 String[] temp = new String[outputList.size()];
445
446 for(int i = 0; i< outputList.size(); i++){
447 Map.Entry entry = (Map.Entry) outputList.get(i);
448 temp[i] = (String) entry.getKey();
449 }
450
451 return temp;
452 }
453 }
454
455
456 public HashMap getDistinctValueMap(String core_element){
457
458 Element rootElement = getRootNode(core_element);
459 HashMap hp = new HashMap();
460 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
461
462 for(int a=0; a<listOfDocument.getLength(); a++){
463
464 Node docNode = listOfDocument.item(a);
465 Element docElement = (Element)docNode;
466 NodeList valueList = docElement.getElementsByTagName(valueTag);
467
468 for(int b= 0; b<valueList.getLength(); b++){
469
470 Element valueElement = (Element)valueList.item(b);
471 NodeList textFNList = valueElement.getChildNodes();
472 String text = ((Node)textFNList.item(0)).getNodeValue();
473
474 if(!text.equals(" ")){
475 if(hp.containsKey(text)){
476 Integer i = (Integer)hp.get(text);
477 int number = i.intValue();
478 number++;
479 hp.put(text,new Integer(number));
480 }
481 else{
482 Integer i = new Integer(1);
483 hp.put(text, i);
484 }
485 }
486 }
487 }
488 return hp;
489 }
490
491
492 public HashMap getDocFrequencyMap(String core_element){
493
494 Element rootElement = getRootNode(core_element);
495 HashMap hp = new HashMap();
496 NodeList listOfDocument= rootElement.getElementsByTagName(documentTag);
497
498 for(int a=0; a<listOfDocument.getLength(); a++){
499
500 Node docNode = listOfDocument.item(a);
501 NamedNodeMap NodeMap = docNode.getAttributes();
502 Node AttributeNode = NodeMap.item(0);
503 String att_name = AttributeNode.getNodeValue();
504
505 Element docElement = (Element)docNode;
506 NodeList valueList = docElement.getElementsByTagName(frequencyTag);
507 Element frequencyElement = (Element)valueList.item(0);
508 NodeList textFNList = frequencyElement.getChildNodes();
509 String text = ((Node)textFNList.item(0)).getNodeValue();
510 Integer i = new Integer(Integer.parseInt(text));
511 hp.put(att_name, i);
512 }
513 return hp;
514 }
515
516
517 public String[] getDocumentIDs(String core_element){
518
519 Element rootElement = getRootNode(rootDocument);
520 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
521 String[] ids = new String[listOfDocument.getLength()];
522
523 for(int a=0; a<listOfDocument.getLength(); a++){
524 Node docNode = listOfDocument.item(a);
525 NamedNodeMap NodeMap = docNode.getAttributes();
526 Node AttributeNode = NodeMap.item(0);
527 String att_name = AttributeNode.getNodeValue();
528 ids[a] = att_name;
529 }
530 return (String[])ids.clone();
531 }
532
533 public int[] getMetadataRows(String core_element){
534 Element rootElement = getRootNode(core_element);
535 ArrayList alist = new ArrayList();
536 String[] idsx = getDocumentIDs(core_element);
537
538 for(int i = 0; i<idsx.length; i++){
539 alist.add(idsx[i]);
540 }
541
542 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
543 int[] row = new int[TotalDoc];
544
545 for(int i = 0; i<row.length; i++){
546 row [i] = 0;
547 }
548
549 for(int a=0; a<listOfDocument.getLength(); a++){
550 Node docNode = listOfDocument.item(a);
551 int location = alist.indexOf(docNode.getAttributes().item(0).getNodeValue());
552 row[location] = 1;
553 }
554 return row;
555 }
556
557 public ArrayList sortMap(Map map) {
558 ArrayList outputList = null;
559 int count = 0;
560 Set set = null;
561 Map.Entry[] entries = null;
562
563 set = (Set) map.entrySet();
564 Iterator iterator = set.iterator();
565 entries = new Map.Entry[set.size()];
566 while(iterator.hasNext()) {
567 entries[count++] = (Map.Entry) iterator.next();
568 }
569
570 Arrays.sort(entries, new Comparator() {
571 public int compareTo(Object lhs, Object rhs) {
572 Map.Entry le = (Map.Entry)lhs;
573 Map.Entry re = (Map.Entry)rhs;
574 return ((Comparable)le.getValue()).compareTo((Comparable)re.getValue());
575 }
576
577 public int compare(Object lhs, Object rhs) {
578 Map.Entry le = (Map.Entry)lhs;
579 Map.Entry re = (Map.Entry)rhs;
580 return ((Comparable)le.getValue()).compareTo((Comparable)re.getValue());
581 }
582 });
583
584 outputList = new ArrayList();
585 for(int i = 0; i < entries.length; i++) {
586 outputList.add(entries[i]);
587 }
588 return outputList;
589 }
590
591 private Double div(Double d1, Double d2){
592 BigDecimal b1 = new BigDecimal(d1.toString());
593 BigDecimal b2 = new BigDecimal(d2.toString());
594 return new Double(b1.divide(b2,DEF_DIV_SCALE,BigDecimal.ROUND_HALF_UP).doubleValue());
595 }
596
597 private Double mul(Double d1,Double d2){
598 BigDecimal b1 = new BigDecimal(d1.toString());
599 BigDecimal b2 = new BigDecimal(d2.toString());
600 return new Double(b1.multiply(b2).doubleValue());
601 }
602
603 public double round(double v,int scale){
604 if(scale<0){
605 throw new IllegalArgumentException(
606 "The scale must be a positive integer or zero");
607 }
608 BigDecimal b = new BigDecimal(Double.toString(v));
609 BigDecimal one = new BigDecimal("1");
610 return b.divide(one,scale,BigDecimal.ROUND_HALF_UP).doubleValue();
611 }
612
613 public double getSingleMetadataSetCompleteness(ArrayList mds_list){
614
615 int totalElement = 0;
616 int totalElementUsed = 0;
617
618 for(int a = 0; a<mds_list.size(); a++){
619 MetadataSet mds = (MetadataSet)mds_list.get(a);
620 ArrayList alist = mds.getIndexsList();
621 int length = alist.size();
622 totalElement = totalElement + length * ms.getDocNum();
623
624 for(int i = 0; i<alist.size(); i++){
625 String name = (String)alist.get(i);
626 totalElementUsed = totalElementUsed + getDocumentUsedElement(name);
627 }
628 }
629
630 double x = (double)totalElementUsed/totalElement;
631 Double d1 = new Double(x);
632 Double d2 = new Double(100);
633 Double result = mul(d1,d2);
634 return round(result.doubleValue(),1);
635 }
636
637 public Object[][] getMetadataInfo(MetadataSet mds){
638
639 ArrayList alist = mds.getIndexsList();
640 int rows = alist.size();
641 int cols = 11;
642 int y = 0;
643 Object[][] dataset = new Object[rows][cols];
644
645 String[] list = new String[rows];
646
647 for(int i = 0 ; i < list.length; i++){
648 list[i] = alist.get(i).toString();
649 }
650
651 Arrays.sort(list);
652
653 for(int iu = 0; iu<list.length; iu++){
654 String xi = list[iu];
655 dataset[y][0] = xi ;
656 dataset[y][1] = new Integer(getFrequency(xi));
657 dataset[y][2] = new Integer(getDocumentUsedElement(xi));
658 dataset[y][3] = new Double (Mean(xi));
659 dataset[y][4] = new Double (Median(xi));
660 dataset[y][5] = new Integer(getDistinctNumber(xi));
661 dataset[y][6] = new Integer(getMinRange(xi));
662 dataset[y][7] = new Integer(getMaxRange(xi));
663 dataset[y][8] = new Double (Average(xi));
664 dataset[y][9] = new Integer(getMode(xi));
665 dataset[y][10] = ModeFrequency(xi)+"%";
666 y++;
667
668 }
669 return dataset;
670 }
671
672 public boolean IsElementEmpty(String core_element){
673
674 int[] list = getMetadataRows(core_element);
675 boolean status = true;
676
677 for(int i=0; i<list.length; i++){
678 if(list[i]==1){status = false;}
679 }
680 return status;
681 }
682
683 public boolean IsElementFull(String core_element){
684
685 int[] list = getMetadataRows(core_element);
686 boolean status = true;
687
688 for(int i=0; i<list.length; i++){
689 if(list[i]==0){status = false;}
690 }
691 return status;
692 }
693
694 public ArrayList removeDocument(ArrayList dataset, String[] ids, int number){
695 removedID = new ArrayList();
696
697 int[] metadataLevel =(int[])dataset.get(0);
698 int docIDslength = metadataLevel.length;
699 int[][] valueMap = new int[dataset.size()][docIDslength];
700
701 for(int i = 0; i< docIDslength; i++){
702
703 boolean status = true;
704
705 for(int j = 0; j<dataset.size(); j++){
706 int[] metadataLevelArray = (int[])dataset.get(j);
707 valueMap[j][i] = metadataLevelArray[i];
708 if(metadataLevelArray[i]!=number){status = false;}
709 }
710 if(status == true){
711 for(int j = 0; j<dataset.size(); j++){
712 valueMap[j][i]=-1;
713
714 }
715 removedID.add(ids[i]);
716 }
717 }
718 ArrayList wholeList = new ArrayList();
719
720 for(int i = 0; i<valueMap.length; i++){
721 ArrayList numberList = new ArrayList();
722
723 for(int j = 0; j<valueMap[i].length; j++){
724 numberList.add(new Integer(valueMap[i][j]));
725 }
726 wholeList.add(numberList);
727 }
728
729 for(int i =0; i< wholeList.size(); i++){
730 ArrayList numberList = (ArrayList)wholeList.get(i);
731 Integer value = new Integer(-1);
732 while(numberList.contains(value)){
733 numberList.remove(value);
734 }
735 int[] valueList = new int [numberList.size()];
736
737 for(int j = 0; j< valueList.length; j++){
738 valueList[j] = ((Integer)numberList.get(j)).intValue();
739 }
740 wholeList.remove(i);
741 wholeList.add(i,valueList);
742 }
743 return wholeList;
744 }
745
746 public ArrayList getRemovedID(){
747 return removedID;
748 }
749
750
751 public HashMap getLinks(String[] args, String core_element){
752 Element rootElement = getRootNode(core_element);
753 HashMap hp = new HashMap();
754 NodeList listOfDocument = rootElement.getElementsByTagName("Document");
755 ArrayList tempList = new ArrayList();
756
757
758 for(int i = 0; i<args.length; i++){
759 tempList.add(args[i]);
760 }
761
762 for(int s=0; s<listOfDocument.getLength() ; s++){
763 Node docNode = listOfDocument.item(s);
764 Element docElement = (Element)docNode;
765 NodeList valueList = docElement.getElementsByTagName(valueTag);
766
767 if(valueList.getLength()==1){
768 String id = docNode.getAttributes().item(0).getNodeValue();
769 Element valueElement = (Element)valueList.item(0);
770 NodeList textFNList = valueElement.getChildNodes();
771 String text = ((Node)textFNList.item(0)).getNodeValue();
772
773 if(tempList.contains(text)){
774 hp.put(id, text);
775 }
776 }
777 }
778
779 rootElement = getRootNode(urlFile);
780 listOfDocument = rootElement.getElementsByTagName(documentTag);
781 HashMap newHp = new HashMap();
782
783 for(int a=0; a<listOfDocument.getLength(); a++){
784 Node docNode = listOfDocument.item(a);
785 Element docElement = (Element)docNode;
786 NodeList valueList = docElement.getElementsByTagName(valueTag);
787
788 if(valueList.getLength()==1){
789 String id = docNode.getAttributes().item(0).getNodeValue();
790 Element valueElement = (Element)valueList.item(0);
791 NodeList textFNList = valueElement.getChildNodes();
792 String text = ((Node)textFNList.item(0)).getNodeValue();
793 newHp.put(text,id);
794 }
795 }
796
797 HashMap tempMap = new HashMap();
798 Collection c = hp.values();
799 Iterator i = c.iterator();
800
801 while(i.hasNext()){
802 String id = (String)i.next();
803 if(newHp.containsKey(id)){
804 String text = (String)newHp.get(id);
805 if(text.contains("http")){
806 tempMap.put((String)tempMap.get(id),text);
807 }
808 }
809 }
810
811 return tempMap;
812 }
813
814
815 public ArrayList getURLMap(String elementName){
816
817 String core_element =elementName;
818 Element rootElement = getRootNode(core_element);
819 ArrayList alist = new ArrayList();
820
821 if(rootElement.equals(null)){
822 return new ArrayList();
823 }
824
825 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
826
827 if(listOfDocument.getLength()==0){return new ArrayList();}
828
829 for(int a=0; a<listOfDocument.getLength(); a++){
830 Node docNode = listOfDocument.item(a);
831 Element docElement = (Element)docNode;
832 NodeList valueList = docElement.getElementsByTagName(valueTag);
833
834 for(int b=0; b<valueList.getLength(); b++){
835 Element valueElement = (Element)valueList.item(b);
836 NodeList textFNList = valueElement.getChildNodes();
837 String text = ((Node)textFNList.item(0)).getNodeValue();
838
839 if(!text.equals(" ")){
840 NamedNodeMap NodeIDMap = docNode .getAttributes();
841 Node DocNodeID = NodeIDMap.item(0);
842 String DocID = DocNodeID.getNodeValue();
843 if(alist.contains(DocID)){}
844 else{
845 alist.add(DocID);
846 }
847 }
848 }
849 }
850 return alist;
851 }
852
853 public HashMap getIdentifierLink(String core_element){
854
855 Element rootElement = getRootNode(core_element);
856 HashMap hp = new HashMap();
857 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
858
859 for(int a=0; a<listOfDocument.getLength(); a++){
860 Node docNode = listOfDocument.item(a);
861 String HashID = docNode.getAttributes().item(0).getNodeValue();
862 Element docElement = (Element)docNode;
863 NodeList valueList = docElement.getElementsByTagName(valueTag);
864
865 for(int y = 0; y<valueList.getLength(); y++){
866 Element valueElement = (Element)valueList.item(y);
867 NodeList textFNList = valueElement.getChildNodes();
868 String text = ((Node)textFNList.item(0)).getNodeValue();
869
870 if(!text.equals(" ") && text.startsWith("http:")){
871 if(hp.containsKey(HashID)){
872 InternalLink il = (InternalLink)hp.get(HashID);
873 il.increaseElement(text);
874 hp.put(HashID,il);
875 }
876 else{
877 InternalLink il = new InternalLink();
878 il.setValue(HashID);
879 il.increaseElement(text);
880 hp.put(HashID, il);
881 }
882 }
883 }
884 }
885 return hp;
886 }
887
888 public String[] getDocumentIDList(String core_element){
889
890 Element rootElement = getRootNode(core_element);
891 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
892 String[] ids = new String[listOfDocument.getLength()];
893
894 for(int a=0; a<listOfDocument.getLength(); a++){
895 Node docNode = listOfDocument .item(a);
896 NamedNodeMap NodeMap = docNode.getAttributes();
897 Node AttributeNode = NodeMap.item(0);
898 String att_name = AttributeNode.getNodeValue();
899 ids[a] = att_name;
900 }
901 return (String[])ids.clone();
902 }
903
904
905 public HashMap getInternalIdentifier(String core_element){
906
907 Element rootElement = getRootNode(core_element);
908 HashMap hp = new HashMap();
909 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
910
911 for(int a=0; a<listOfDocument.getLength(); a++){
912 Node docNode = listOfDocument.item(a);
913 String HashID = docNode.getAttributes().item(0).getNodeValue();
914
915 Element docElement = (Element)docNode;
916 NodeList valueList = docElement.getElementsByTagName(valueTag);
917
918 for(int y = 0; y<valueList.getLength(); y++){
919 Element valueElement = (Element)valueList.item(y);
920 NodeList textFNList = valueElement.getChildNodes();
921 String text = ((Node)textFNList.item(0)).getNodeValue();
922
923 if(!text.equals(" ")){
924 if(hp.containsKey(text)){
925 InternalLink il = (InternalLink)hp.get(text);
926 il.increaseElement(HashID);
927 hp.put(text,il);
928 }
929 else{
930 InternalLink il = new InternalLink();
931 il.setValue(text);
932 il.increaseElement(HashID);
933 hp.put(text, il);
934 }
935 }
936 }
937 }
938 return hp;
939 }
940
941 public HashMap getIdentifierLinkNoIdentifier(){
942
943 Element rootElement = getRootNode(rootDocument);
944 HashMap hp = new HashMap();
945 NodeList listOfDocument = rootElement.getElementsByTagName(documentTag);
946 String url ="No Source Available";
947
948 for(int s=0; s<listOfDocument.getLength() ; s++){
949 Node docNode = listOfDocument.item(s);
950 String HashID = docNode.getAttributes().item(0).getNodeValue();
951 InternalLink il = new InternalLink();
952 il.increaseElement(HashID);
953 hp.put(HashID, il);
954 }
955 return hp;
956 }
957}
Note: See TracBrowser for help on using the repository browser.