source: other-projects/trunk/greenstone3-extension/mat/src/org/greenstone/gsdl3_extension/mat/servlet/DataMaker.java@ 18104

Last change on this file since 18104 was 18104, checked in by cc108, 15 years ago

new MatServlet source code

File size: 27.6 KB
Line 
1package org.greenstone.gsdl3_extension.mat.servlet;
2
3import java.util.Map;
4import java.util.Set;
5import java.util.Arrays;
6import java.util.HashMap;
7import java.util.Iterator;
8import java.util.ArrayList;
9import java.util.Collection;
10import java.util.Comparator;
11
12import java.io.File;
13import java.io.PrintWriter;
14import java.math.BigDecimal;
15
16import org.w3c.dom.Node;
17import org.w3c.dom.Element;
18import org.w3c.dom.NodeList;
19import org.w3c.dom.Document;
20import org.w3c.dom.NamedNodeMap;
21
22import org.greenstone.gsdl3.util.XMLConverter;
23import org.greenstone.gsdl3.core.MessageRouter;
24
25import javax.xml.parsers.DocumentBuilder;
26import javax.xml.parsers.DocumentBuilderFactory;
27
28public class DataMaker {
29
30 MetadataStats ms;
31 ArrayList nameList;
32 PrintWriter out;
33
34 private int Mode;
35 private int TotalDoc;
36 private String path;
37
38 protected Document doc=null;
39 protected MessageRouter mr = null;
40 protected XMLConverter converter=null;
41
42 private ArrayList removedID = new ArrayList();
43 private static final int DEF_DIV_SCALE = 10;
44
45 public DataMaker(MetadataStats arg){
46
47 ms = arg;
48 nameList = arg.metadataNameList;
49 path = arg.StatsDirectory;
50 setTotalDocNumber();
51 }
52
53 private Element getRootNode(String core_element){
54
55 try{
56 DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
57 DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
58 Document doc = docBuilder.newDocument();
59 doc = docBuilder.parse (new File(path+ms.fileSeparator+core_element+".xml"));
60 Element rootNode = doc.getDocumentElement();
61
62 return rootNode;
63 }catch (Exception e) {
64 e.printStackTrace();
65 return null;
66 }
67 }
68
69 public void setTotalDocNumber(){
70
71 Element ex = getRootNode(ms.RootDirectory);
72 NodeList listOfFrequency = ex.getElementsByTagName(ms.DocumentNode);
73 TotalDoc = listOfFrequency.getLength();
74 }
75
76 public int getTotalElementUsed(){
77
78 int totalNumber = 0;
79
80 for(int i=0 ; i<nameList.size(); i++){
81
82 Element ex = getRootNode((String)nameList.get(i));
83 NodeList listOfFrequency = ex.getElementsByTagName(ms.FrequencyNode);
84
85 for(int s=0; s<listOfFrequency.getLength() ; s++){
86 Node FrequencyNode = listOfFrequency.item(s);
87 NodeList textFNList = FrequencyNode.getChildNodes();
88 String TextNode = textFNList.item(0).getNodeValue();
89 totalNumber = totalNumber + Integer.parseInt(TextNode);
90 }
91 }
92
93 return totalNumber;
94 }
95
96 public int getFrequency(String name){
97
98 int totalNumber = 0;
99 Element ex = getRootNode(name);
100 NodeList listOfFrequency = ex.getElementsByTagName(ms.FrequencyNode);
101
102 if(listOfFrequency.getLength()==0){
103 return 0;
104 }
105
106 for(int s=0; s<listOfFrequency.getLength(); s++){
107 Node FrequencyNode = listOfFrequency.item(s);
108 NodeList textFNList = FrequencyNode.getChildNodes();
109 String TextNode = textFNList.item(0).getNodeValue();
110 int x = Integer.parseInt(TextNode);
111 totalNumber = totalNumber + x;
112 }
113
114 return totalNumber;
115 }
116
117 public int getDistinctNumber(String name){
118
119 ArrayList alist = new ArrayList();
120 Element ex = getRootNode(name);
121 NodeList listOfFrequency = ex.getElementsByTagName(ms.ActualValueNode);
122 if(listOfFrequency.getLength()==0){return 0;}
123 for(int s=0; s<listOfFrequency.getLength() ; s++){
124 Node ActualValueNode = listOfFrequency.item(s);
125 NodeList textFNList = ActualValueNode.getChildNodes();
126 String TextNode = textFNList.item(0).getNodeValue();
127 if(!alist.contains(TextNode) && !TextNode.equals(" ")){alist.add(TextNode);}
128 }
129 return alist.size();
130 }
131
132 public int getDocumentUsedElement(String core_element){
133
134 int totalNumber = 0;
135 Element ex = getRootNode(core_element);
136 NodeList listOfFrequency = ex.getElementsByTagName(ms.FrequencyNode);
137
138 for(int s=0; s<listOfFrequency.getLength() ; s++){
139 totalNumber++;
140 }
141 return totalNumber;
142 }
143
144 public double Mean(String core_element){
145
146 int due = getDocumentUsedElement(core_element);
147
148 if(due==0 || TotalDoc==0){
149 return 0;
150 }
151
152 Double d1 = new Double(due);
153 Double d2 = new Double(TotalDoc);
154 Double result = div(d1,d2);
155 result = mul(result, new Double(100));
156
157 return round(result.doubleValue(),1);
158 }
159
160 public int getMinRange(String core_element){
161
162 Element ex = getRootNode(core_element);
163 NodeList listOfFrequency = ex.getElementsByTagName(ms.FrequencyNode);
164
165 if(listOfFrequency.getLength()==0){
166 return 0;
167 }
168
169 Node FrequencyNode = listOfFrequency.item(0);
170 NodeList textFNList = FrequencyNode.getChildNodes();
171 String TextNode = textFNList.item(0).getNodeValue();
172 int minNumber = 0;
173
174 if(listOfFrequency.getLength()==TotalDoc){
175 minNumber = Integer.parseInt(TextNode);
176 }
177 else {
178 minNumber = 0;
179 }
180
181 for(int s=0; s<listOfFrequency.getLength() ; s++){
182 FrequencyNode = listOfFrequency.item(s);
183 textFNList = FrequencyNode.getChildNodes();
184 TextNode = textFNList.item(0).getNodeValue();
185 int x = Integer.parseInt(TextNode);
186
187 if(x<minNumber){
188 minNumber=x;
189 }
190 }
191
192 return minNumber;
193 }
194
195 public int getMaxRange(String core_element){
196
197 Element ex = getRootNode(core_element);
198 NodeList listOfFrequency = ex.getElementsByTagName(ms.FrequencyNode);
199
200 if(listOfFrequency.getLength()==0){
201 return 0;
202 }
203
204 Node FrequencyNode = listOfFrequency.item(0);
205 NodeList textFNList = FrequencyNode.getChildNodes();
206 String TextNode = textFNList.item(0).getNodeValue();
207 int maxNumber = 0;
208
209 for(int s=0; s<listOfFrequency.getLength() ; s++){
210 FrequencyNode = listOfFrequency.item(s);
211 textFNList = FrequencyNode.getChildNodes();
212 TextNode = textFNList.item(0).getNodeValue();
213 int x = Integer.parseInt(TextNode);
214
215 if(x>maxNumber){
216 maxNumber=x;
217 }
218 }
219
220 return maxNumber;
221 }
222
223 public int getMode(String core_element){
224
225 Element ex = getRootNode(core_element);
226 NodeList listOfFrequency = ex.getElementsByTagName(ms.DocumentNode);
227
228 if(listOfFrequency.getLength()==0){
229 Mode = 0;
230 return 0;
231 }
232
233 ArrayList alist = new ArrayList();
234 String[] idsx = getDocumentIDs(core_element);
235
236 for(int i = 0; i<idsx.length; i++){
237 alist.add(idsx[i]);
238 }
239
240 int[] list = new int[TotalDoc];
241
242 for(int i = 0; i<list.length; i++){
243 list[i] = 0;
244 }
245
246 for(int s=0; s< listOfFrequency.getLength() ; s++){
247 Node docNode = listOfFrequency.item(s);
248 NamedNodeMap NodeIDMap = docNode.getAttributes();
249 Node DocNodeID = NodeIDMap.item(0);
250 String DocID = DocNodeID.getNodeValue();
251 Element xNode = (Element)docNode;
252 int location = alist.indexOf(DocID);
253 NodeList xList = xNode.getElementsByTagName(ms.FrequencyNode);
254 int fre = Integer.parseInt(xList.item(0).getChildNodes().item(0).getNodeValue());
255 list[location] = fre;
256 }
257
258 Arrays.sort(list);
259
260 int max_idx = 0; // Index of the maximum count
261 int max_cnt = 0;
262 int count = 0;
263
264 for ( int i = 0; i <list.length; i++) {
265 count = 0;
266 for ( int j = 0; j < list.length; j++) {
267 if (list[i] == list[j]) {
268 count++;
269 }
270 }
271 if (count > max_cnt) {
272 max_cnt = count;
273 max_idx = i;
274 }
275 }
276 Mode = list [max_idx];
277
278 return list [max_idx];
279 }
280
281 public double ModeFrequency(String core_element){
282
283 Element ex = getRootNode(core_element);
284 NodeList listOfFrequency = ex.getElementsByTagName(ms.DocumentNode);
285
286 if(listOfFrequency.getLength()==0){
287 return 100;
288 }
289
290 ArrayList alist = new ArrayList();
291 String[] idsx = getDocumentIDs(core_element);
292
293 for(int i = 0; i<idsx.length; i++){
294 alist.add(idsx[i]);
295 }
296
297 int[] list = new int[TotalDoc];
298
299 for(int i = 0; i<list.length; i++){
300 list[i] = 0;
301 }
302
303 int length = alist.size();
304 int counter = 0;
305
306 for(int s=0; s< listOfFrequency.getLength() ; s++){
307 Node docNode = listOfFrequency.item(s);
308 NamedNodeMap NodeIDMap = docNode.getAttributes();
309 Node DocNodeID = NodeIDMap.item(0);
310 String DocID = DocNodeID.getNodeValue();
311 Element xNode = (Element)docNode;
312 int location = alist.indexOf(DocID);
313 NodeList xList = xNode.getElementsByTagName(ms.FrequencyNode);
314 int fre = Integer.parseInt(xList.item(0).getChildNodes().item(0).getNodeValue());
315 list[location] = fre;
316 }
317
318 for(int i =0; i<list.length; i++){
319 if(list[i]==Mode){counter++;}
320 }
321
322 Double result = div(new Double(counter), new Double(length));
323 result = mul(result ,new Double (100));
324
325 return round(result.doubleValue(),1);
326 }
327
328 public double Median(String core_element){
329
330 Element ex = getRootNode(core_element);
331 NodeList listOfFrequency = ex.getElementsByTagName(ms.DocumentNode);
332
333 ArrayList alist = new ArrayList();
334 String[] idsx = getDocumentIDs(core_element);
335
336 for(int i = 0; i<idsx.length; i++){
337 alist.add(idsx[i]);
338 }
339
340 int[] list = new int[TotalDoc];
341
342 for(int i = 0; i<list.length; i++){
343 list[i] = 0;
344 }
345
346 for(int s=0; s< listOfFrequency.getLength() ; s++){
347 Node docNode = listOfFrequency.item(s);
348 NamedNodeMap NodeIDMap = docNode.getAttributes();
349 Node DocNodeID = NodeIDMap.item(0);
350 String DocID = DocNodeID.getNodeValue();
351 Element xNode = (Element)docNode;
352 int location = alist.indexOf(DocID);
353 NodeList xList = xNode.getElementsByTagName(ms.FrequencyNode);
354 int fre = Integer.parseInt(xList.item(0).getChildNodes().item(0).getNodeValue());
355 list[location] = fre;
356 }
357
358 int length = alist.size();
359 int middle = length/2 -1;
360
361 Arrays.sort(list);
362
363 if(length % 2 == 1){
364 middle = middle + 1;
365 return list[middle];
366 }
367
368 else{
369 return round((double)(list[middle]+list[middle+1])/2,1);
370 }
371 }
372
373 public double Average(String core_element){
374
375 int t1 = getFrequency(core_element);
376 int t2 = getDocumentUsedElement(core_element);
377
378 if(t1==0 || t2==0){
379 return 0;
380 }
381
382 Double result = div(new Double(t1),new Double(t2));
383
384 return round(result.doubleValue(),1);
385 }
386
387 public Object[][] AllInformation(){
388
389 int rows = nameList.size();
390 int cols = 11;
391 int y = 0;
392
393 Object[][] info = new Object[rows][cols];
394 String[] list = new String[rows];
395
396 for(int i = 0 ; i < list.length; i++){
397 list[i] = nameList.get(i).toString();
398 }
399
400 Arrays.sort(list);
401
402 for(int iu = 0; iu<list.length; iu++){
403 String xi = list[iu];
404 info[y][0] = xi ;
405 info[y][1] = new Integer(getFrequency(xi));
406 info[y][2] = new Integer(getDocumentUsedElement(xi));
407 info[y][3] = new Double(Mean(xi));
408 info[y][4] = new Double(Median(xi));
409 info[y][5] = new Integer(getDistinctNumber(xi));
410 info[y][6] = new Integer(getMinRange(xi));
411 info[y][7] = new Integer(getMaxRange(xi));
412 info[y][8] = new Double(Average(xi));
413 info[y][9] = new Integer(getMode(xi));
414 info[y][10] = ModeFrequency(xi)+"%";
415 y++;
416 }
417
418 return info;
419 }
420
421 public String[] getSortList(String core_element,String sort){
422
423 if(sort.equals("ASCII")){
424 HashMap hp = getDistinctValueMap (core_element);
425 String[] temp = new String[hp.size()];
426 int counter = 0;
427 Set s = hp.keySet();
428 Iterator i = s.iterator();
429
430 while(i.hasNext()){
431 temp[counter] = (String)i.next();
432 counter++;
433 }
434
435 Arrays.sort(temp);
436
437 return temp;
438 }
439
440 else{
441 Map m = getDistinctValueMap (core_element);
442 ArrayList outputList = sortMap(m);
443 String[] temp = new String[outputList.size()];
444
445 for(int i = 0; i< outputList.size(); i++){
446 Map.Entry entry = (Map.Entry) outputList.get(i);
447 temp[i] = (String) entry.getKey();
448 }
449
450 return temp;
451 }
452 }
453
454 public HashMap getDistinctValueMap(String core_element){
455
456 Element ex = getRootNode(core_element);
457 HashMap hp = new HashMap();
458 NodeList listOfFrequency = ex.getElementsByTagName(ms.DocumentNode);
459
460 for(int s=0; s<listOfFrequency.getLength() ; s++){
461 Node docNode = listOfFrequency.item(s);
462 Element docElement = (Element)docNode;
463 NodeList valueList = docElement.getElementsByTagName(ms.ActualValueNode);
464
465 for(int y = 0; y<valueList.getLength(); y++){
466 Element valueElement = (Element)valueList.item(y);
467 NodeList textFNList = valueElement.getChildNodes();
468 String text = ((Node)textFNList.item(0)).getNodeValue();
469
470 if(!text.equals(" ")){
471 if(hp.containsKey(text)){
472 Integer i = (Integer)hp.get(text);
473 int number = i.intValue();
474 number++;
475 hp.put(text,new Integer(number));
476 }
477 else{
478 Integer i = new Integer(1);
479 hp.put(text, i);
480 }
481 }
482 }
483 }
484
485 return hp;
486 }
487
488
489 public HashMap getDocFrequencyMap(String core_element){
490
491 Element ex = getRootNode(core_element);
492 HashMap hp = new HashMap();
493 NodeList listOfFrequency = ex.getElementsByTagName(ms.DocumentNode);
494
495 for(int s=0; s<listOfFrequency.getLength() ; s++){
496
497 Node docNode = listOfFrequency.item(s);
498 NamedNodeMap NodeMap = docNode.getAttributes();
499 Node AttributeNode = NodeMap.item(0);
500 String att_name = AttributeNode.getNodeValue();
501
502 Element docElement = (Element)docNode;
503 NodeList valueList = docElement.getElementsByTagName(ms.FrequencyNode);
504 Element frequencyElement = (Element)valueList.item(0);
505 NodeList textFNList = frequencyElement.getChildNodes();
506 String text = ((Node)textFNList.item(0)).getNodeValue();
507 Integer i = new Integer(Integer.parseInt(text));
508 hp.put(att_name, i);
509 }
510
511 return hp;
512 }
513
514
515 public String[] getDocumentIDs(String core_element){
516
517 Element ex = getRootNode(ms.RootDirectory);
518 NodeList listOfFrequency = ex.getElementsByTagName(ms.DocumentNode);
519 String[] ids = new String[listOfFrequency.getLength()];
520
521 for(int s=0; s<listOfFrequency.getLength() ; s++){
522 Node docNode = listOfFrequency.item(s);
523 NamedNodeMap NodeMap = docNode.getAttributes();
524 Node AttributeNode = NodeMap.item(0);
525 String att_name = AttributeNode.getNodeValue();
526 ids[s] = att_name;
527 }
528
529 return (String[])ids.clone();
530 }
531
532 public int[] getMetadataRows(String core_element){
533
534 Element ex = getRootNode(core_element);
535 ArrayList alist = new ArrayList();
536 String[] idsx = getDocumentIDs(core_element);
537
538 for(int i = 0; i<idsx.length; i++){
539 alist.add(idsx[i]);
540 }
541
542 NodeList listOfFrequency = ex.getElementsByTagName(ms.DocumentNode);
543 int[] row = new int[TotalDoc];
544
545 for(int i = 0; i<row.length; i++){
546 row [i] = 0;
547 }
548
549 for(int s=0; s<listOfFrequency.getLength() ; s++){
550 Node docNode = listOfFrequency.item(s);
551 int location = alist.indexOf(docNode.getAttributes().item(0).getNodeValue());
552 row[location] =1;
553 }
554
555 return row;
556 }
557
558 public ArrayList sortMap(Map map) {
559
560 ArrayList outputList = null;
561 int count = 0;
562 Set set = null;
563 Map.Entry[] entries = null;
564 // Logic:
565 // get a set from Map
566 // Build a Map.Entry[] from set
567 // Sort the list using Arrays.sort
568 // Add the sorted Map.Entries into arrayList and return
569
570 set = (Set) map.entrySet();
571 Iterator iterator = set.iterator();
572 entries = new Map.Entry[set.size()];
573 while(iterator.hasNext()) {
574 entries[count++] = (Map.Entry) iterator.next();
575 }
576
577 // Sort the entries with your own comparator for the values:
578 Arrays.sort(entries, new Comparator() {
579 public int compareTo(Object lhs, Object rhs) {
580 Map.Entry le = (Map.Entry)lhs;
581 Map.Entry re = (Map.Entry)rhs;
582 return ((Comparable)le.getValue()).compareTo((Comparable)re.getValue());
583 }
584
585 public int compare(Object lhs, Object rhs) {
586 Map.Entry le = (Map.Entry)lhs;
587 Map.Entry re = (Map.Entry)rhs;
588 return ((Comparable)le.getValue()).compareTo((Comparable)re.getValue());
589 }
590 });
591
592 outputList = new ArrayList();
593
594 for(int i = 0; i < entries.length; i++) {
595 outputList.add(entries[i]);
596 }
597 return outputList;
598 }
599
600 private Double div(Double d1, Double d2){
601
602 BigDecimal b1 = new BigDecimal(d1.toString());
603 BigDecimal b2 = new BigDecimal(d2.toString());
604
605 return new Double(b1.divide(b2,DEF_DIV_SCALE,BigDecimal.ROUND_HALF_UP).doubleValue());
606 }
607
608 private Double mul(Double d1,Double d2){
609
610 BigDecimal b1 = new BigDecimal(d1.toString());
611 BigDecimal b2 = new BigDecimal(d2.toString());
612
613 return new Double(b1.multiply(b2).doubleValue());
614 }
615
616 public double round(double v,int scale){
617
618 if(scale<0){
619 throw new IllegalArgumentException("The scale must be a positive integer or zero");
620 }
621
622 BigDecimal b = new BigDecimal(Double.toString(v));
623 BigDecimal one = new BigDecimal("1");
624
625 return b.divide(one,scale,BigDecimal.ROUND_HALF_UP).doubleValue();
626 }
627
628 public double getSingleMetadataSetCompleteness(ArrayList mds_list){
629
630 int totalElement = 0;
631 int totalElementUsed = 0;
632
633 for(int a = 0; a<mds_list.size(); a++){
634 MetadataSet mds = (MetadataSet)mds_list.get(a);
635 ArrayList alist = mds.getIndexsList();
636 int length = alist.size();
637 totalElement = totalElement + length * ms.getDocNum();
638
639 for(int i = 0; i<alist.size(); i++){
640 String name = (String)alist.get(i);
641 totalElementUsed = totalElementUsed + getDocumentUsedElement(name);
642 }
643 }
644
645 double x = (double)totalElementUsed/totalElement;
646 Double d1 = new Double(x);
647 Double d2 = new Double(100);
648 Double result = mul(d1,d2);
649
650 return round(result.doubleValue(),1);
651 }
652
653 public Object[][] getMetadataInfo(MetadataSet mds){
654
655 ArrayList alist = mds.getIndexsList();
656 int rows = alist.size();
657 int cols = 11;
658 int y = 0;
659 Object[][] dataset = new Object[rows][cols];
660
661 String[] list = new String[rows];
662
663 for(int i = 0 ; i < list.length; i++){
664 list[i] = alist.get(i).toString();
665 }
666
667 Arrays.sort(list);
668
669 for(int iu = 0; iu<list.length; iu++){
670 String xi = list[iu];
671 dataset[y][0] = xi ;
672 dataset[y][1] = new Integer(getFrequency(xi));
673 dataset[y][2] = new Integer(getDocumentUsedElement(xi));
674 dataset[y][3] = new Double (Mean(xi));
675 dataset[y][4] = new Double (Median(xi));
676 dataset[y][5] = new Integer(getDistinctNumber(xi));
677 dataset[y][6] = new Integer(getMinRange(xi));
678 dataset[y][7] = new Integer(getMaxRange(xi));
679 dataset[y][8] = new Double (Average(xi));
680 dataset[y][9] = new Integer(getMode(xi));
681 dataset[y][10] = ModeFrequency(xi)+"%";
682 y++;
683 }
684
685 return dataset;
686 }
687
688 public boolean IsElementEmpty(String core_element){
689
690 int[] list = getMetadataRows(core_element);
691 boolean status = true;
692
693 for(int i = 0; i< list.length; i++){
694 if(list[i]==1){status = false;}
695 }
696
697 return status;
698 }
699
700 public boolean IsElementFull(String core_element){
701
702 int[] list = getMetadataRows(core_element);
703 boolean status = true;
704
705 for(int i = 0; i< list.length; i++){
706 if(list[i]==0){status = false;}
707 }
708 return status;
709 }
710
711 public ArrayList removeDocument(ArrayList dataset, String[] ids, int number){
712
713 removedID = new ArrayList();
714 int[] metadataLevel =(int[])dataset.get(0);
715 int docIDslength = metadataLevel.length;
716 int[][] valueMap = new int[dataset.size()][docIDslength];
717
718 for(int i = 0; i< docIDslength; i++){
719
720 boolean status = true;
721
722 for(int j = 0; j<dataset.size(); j++){
723 int[] metadataLevelArray = (int[])dataset.get(j);
724 valueMap[j][i] = metadataLevelArray[i];
725 if(metadataLevelArray[i]!=number){status = false;}
726
727 }
728 if(status == true){
729
730 for(int j = 0; j<dataset.size(); j++){
731
732 valueMap[j][i]=-1;
733
734 }
735 removedID.add(ids[i]);
736 }
737 }
738
739 ArrayList wholeList = new ArrayList();
740
741 for(int i = 0; i<valueMap.length; i++){
742 ArrayList numberList = new ArrayList();
743
744 for(int j = 0; j<valueMap[i].length; j++){
745 numberList.add(new Integer(valueMap[i][j]));
746 }
747 wholeList.add(numberList);
748 }
749
750 for(int i =0; i< wholeList.size(); i++){
751 ArrayList numberList = (ArrayList)wholeList.get(i);
752 Integer value = new Integer(-1);
753 while(numberList.contains(value)){
754 numberList.remove(value);
755 }
756 int[] valueList = new int [numberList.size()];
757
758 for(int j = 0; j< valueList.length; j++){
759 valueList[j] = ((Integer)numberList.get(j)).intValue();
760 }
761 wholeList.remove(i);
762 wholeList.add(i,valueList);
763 }
764
765 return wholeList;
766 }
767
768 public ArrayList getRemovedID(){
769 return removedID;
770 }
771
772
773 public HashMap getLinks(String[] args, String core_element){
774
775 Element ex = getRootNode(core_element);
776 HashMap hp = new HashMap();
777 NodeList listOfFrequency = ex.getElementsByTagName(ms.DocumentNode);
778 ArrayList tempList = new ArrayList();
779 String urlFile = "dc.Identifier";
780
781 for(int i = 0; i<args.length; i++){
782 tempList.add(args[i]);
783 }
784
785 for(int s=0; s<listOfFrequency.getLength() ; s++){
786 Node docNode = listOfFrequency.item(s);
787 Element docElement = (Element)docNode;
788 NodeList valueList = docElement.getElementsByTagName(ms.ActualValueNode);
789
790 if(valueList.getLength()==1){
791 String id = docNode.getAttributes().item(0).getNodeValue();
792 Element valueElement = (Element)valueList.item(0);
793 NodeList textFNList = valueElement.getChildNodes();
794 String text = ((Node)textFNList.item(0)).getNodeValue();
795
796 if(tempList.contains(text)){
797 hp.put(id, text);
798 }
799 }
800 }
801
802 ex = getRootNode(urlFile);
803 listOfFrequency = ex.getElementsByTagName(ms.DocumentNode);
804 HashMap newHp = new HashMap();
805
806 for(int s=0; s<listOfFrequency.getLength() ; s++){
807 Node docNode = listOfFrequency.item(s);
808 Element docElement = (Element)docNode;
809 NodeList valueList = docElement.getElementsByTagName(ms.ActualValueNode);
810
811 if(valueList.getLength()==1){
812 String id = docNode.getAttributes().item(0).getNodeValue();
813 Element valueElement = (Element)valueList.item(0);
814 NodeList textFNList = valueElement.getChildNodes();
815 String text = ((Node)textFNList.item(0)).getNodeValue();
816 newHp.put(text, id);
817 }
818 }
819
820 HashMap tempMap = new HashMap();
821 Collection c = hp.values();
822 Iterator i = c.iterator();
823
824 while(i.hasNext()){
825 String id = (String)i.next();
826 if(newHp.containsKey(id)){
827 String text = (String)newHp.get(id);
828 if(text.contains("http")){
829 tempMap.put((String)tempMap.get(id),text);
830 }
831 }
832 }
833
834 return tempMap;
835 }
836
837 public ArrayList getURLMap(String elementName){
838
839 String core_element =elementName;
840 Element ex = getRootNode(core_element);
841 ArrayList alist = new ArrayList();
842 if(ex.equals(null)){
843 return new ArrayList();
844 }
845
846 NodeList listOfFrequency = ex.getElementsByTagName(ms.DocumentNode);
847
848 if(listOfFrequency.getLength()==0){
849 return new ArrayList();
850 }
851
852 for(int s=0; s<listOfFrequency.getLength() ; s++){
853 Node docNode = listOfFrequency.item(s);
854 Element docElement = (Element)docNode;
855 NodeList valueList = docElement.getElementsByTagName(ms.ActualValueNode);
856
857 for(int y = 0; y<valueList.getLength(); y++){
858 Element valueElement = (Element)valueList.item(y);
859 NodeList textFNList = valueElement.getChildNodes();
860 String text = ((Node)textFNList.item(0)).getNodeValue();
861
862 if(!text.equals(" ")){
863 NamedNodeMap NodeIDMap = docNode .getAttributes();
864 Node DocNodeID = NodeIDMap.item(0);
865 String DocID = DocNodeID.getNodeValue();
866 if(alist.contains(DocID)){}
867 else{
868 alist.add(DocID);
869 }
870 }
871 }
872 }
873
874 return alist;
875}
876
877 public HashMap getIdentifierLink(String core_element){
878
879 Element ex = getRootNode(core_element);
880 HashMap hp = new HashMap();
881 NodeList listOfFrequency = ex.getElementsByTagName(ms.DocumentNode);
882
883 for(int s=0; s<listOfFrequency.getLength() ; s++){
884 Node docNode = listOfFrequency.item(s);
885 String HashID = docNode.getAttributes().item(0).getNodeValue();
886 Element docElement = (Element)docNode;
887 NodeList valueList = docElement.getElementsByTagName(ms.ActualValueNode);
888
889 for(int y = 0; y<valueList.getLength(); y++){
890 Element valueElement = (Element)valueList.item(y);
891 NodeList textFNList = valueElement.getChildNodes();
892 String text = ((Node)textFNList.item(0)).getNodeValue();
893
894 if(!text.equals(" ") && text.startsWith("http:")){
895 if(hp.containsKey(HashID)){
896 InternalLink il = (InternalLink)hp.get(HashID);
897 il.increaseElement(text);
898 hp.put(HashID,il);
899 }
900 else{
901 InternalLink il = new InternalLink();
902 il.setValue(HashID);
903 il.increaseElement(text);
904 hp.put(HashID, il);
905 }
906 }
907 }
908 }
909
910 return hp;
911 }
912
913 public String[] getDocumentIDList(String core_element){
914
915 Element ex = getRootNode(core_element);
916 NodeList listOfFrequency = ex.getElementsByTagName(ms.DocumentNode);
917 String[] ids = new String[listOfFrequency.getLength()];
918
919 for(int s=0; s<listOfFrequency.getLength() ; s++){
920 Node docNode = listOfFrequency.item(s);
921 NamedNodeMap NodeMap = docNode.getAttributes();
922 Node AttributeNode = NodeMap.item(0);
923 String att_name = AttributeNode.getNodeValue();
924 ids[s] = att_name;
925 }
926
927 return (String[])ids.clone();
928 }
929
930 public HashMap getInternalIdentifier(String core_element){
931
932 Element ex = getRootNode(core_element);
933 HashMap hp = new HashMap();
934 NodeList listOfFrequency = ex.getElementsByTagName(ms.DocumentNode);
935
936 for(int s=0; s<listOfFrequency.getLength() ; s++){
937 Node docNode = listOfFrequency.item(s);
938 String HashID = docNode.getAttributes().item(0).getNodeValue();
939 Element docElement = (Element)docNode;
940 NodeList valueList = docElement.getElementsByTagName(ms.ActualValueNode);
941
942 for(int y = 0; y<valueList.getLength(); y++){
943 Element valueElement = (Element)valueList.item(y);
944 NodeList textFNList = valueElement.getChildNodes();
945 String text = ((Node)textFNList.item(0)).getNodeValue();
946
947 if(!text.equals(" ")){
948 if(hp.containsKey(text)){
949 InternalLink il = (InternalLink)hp.get(text);
950 il.increaseElement(HashID);
951 hp.put(text,il);
952 }
953 else{
954 InternalLink il = new InternalLink();
955 il.setValue(text);
956 il.increaseElement(HashID);
957 hp.put(text, il);
958 }
959 }
960 }
961 }
962
963 return hp;
964 }
965
966 public HashMap getIdentifierLinkNoIdentifier(){
967
968 Element ex = getRootNode(ms.RootDirectory);
969 HashMap hp = new HashMap();
970 NodeList listOfFrequency = ex.getElementsByTagName(ms.DocumentNode);
971 String url ="No Source Available";
972
973 for(int s=0; s<listOfFrequency.getLength() ; s++){
974 Node docNode = listOfFrequency.item(s);
975 String HashID = docNode.getAttributes().item(0).getNodeValue();
976 InternalLink il = new InternalLink();
977 il.increaseElement(HashID);
978 hp.put(HashID, il);
979 }
980 return hp;
981 }
982}
Note: See TracBrowser for help on using the repository browser.