source: other-projects/trunk/greenstone3-extension/mat/src/java/org/greenstone/gsdl3_extension/mat/servlet/DataMaker.java@ 17365

Last change on this file since 17365 was 17365, checked in by cc108, 16 years ago

Updating Mat Source Code

File size: 30.3 KB
Line 
1package org.greenstone.gsdl3_extension.mat.servlet;
2
3import java.util.ArrayList;
4import java.util.Arrays;
5import java.util.Collection;
6import java.util.Comparator;
7import java.util.HashMap;
8import java.util.Iterator;
9import java.util.Map;
10import java.util.Set;
11import javax.xml.parsers.DocumentBuilder;
12import javax.xml.parsers.DocumentBuilderFactory;
13import java.io.File;
14import java.io.PrintWriter;
15import java.math.BigDecimal;
16
17import org.w3c.dom.Document;
18import org.w3c.dom.Element;
19import org.w3c.dom.NamedNodeMap;
20import org.w3c.dom.Node;
21import org.w3c.dom.NodeList;
22import org.greenstone.gsdl3.core.MessageRouter;
23import org.greenstone.gsdl3.util.XMLConverter;
24
25public class DataMaker {
26
27 MetadataStats ms;
28 ArrayList nameList;
29 private int Mode;
30 private int TotalDoc;
31 private String path;
32
33 protected Document doc=null;
34 protected MessageRouter mr = null;
35 protected XMLConverter converter=null;
36 private ArrayList removedID = new ArrayList();
37 PrintWriter out;
38 private static final int DEF_DIV_SCALE = 10;
39
40 public DataMaker(MetadataStats arg1){
41 ms = arg1;
42 nameList = arg1.metadataNameList;
43 path = arg1.StatsDirectory;
44 setTotalDocNumber();
45 }
46
47 private Element getRootNode(String core_element){
48
49 try{
50 //File x = new File(path+"/"+core_element+".xml");
51 //out.write(x.exists()+ x.getAbsolutePath());
52 //if(!x.exists()){return null;}
53 DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
54 DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
55 Document doc = docBuilder.newDocument();
56 doc = docBuilder.parse (new File(path+"/"+core_element+".xml"));
57 Element rootNode = doc.getDocumentElement();
58 //System.out.println("rootNode"+ core_element);
59 return rootNode;
60 }catch (Exception e) {
61 e.printStackTrace();
62 //System.out.println("return null");
63 return null;
64 }
65 }
66
67 public void setTotalDocNumber(){
68 //TotalDoc = ms.getDocNum();
69 Element ex = getRootNode("archivedir");
70 NodeList listOfFrequency = ex.getElementsByTagName("Document");
71 TotalDoc = listOfFrequency.getLength();
72 }
73
74 public int getTotalElementUsed(){
75
76 int totalNumber = 0;
77
78 for(int i = 0 ; i<nameList.size(); i++){
79 Element ex = getRootNode((String)nameList.get(i));
80 NodeList listOfFrequency = ex.getElementsByTagName("Frequency");
81
82 for(int s=0; s<listOfFrequency.getLength() ; s++){
83 Node FrequencyNode = listOfFrequency.item(s);
84 NodeList textFNList = FrequencyNode.getChildNodes();
85 String TextNode = textFNList.item(0).getNodeValue();
86 totalNumber = totalNumber + Integer.parseInt(TextNode);
87 }
88 }
89 return totalNumber;
90 }
91
92 public int getFrequency(String name){
93
94 int totalNumber = 0;
95 Element ex = getRootNode(name);
96 NodeList listOfFrequency = ex.getElementsByTagName("Frequency");
97 if(listOfFrequency.getLength()==0){return 0;}
98 for(int s=0; s<listOfFrequency.getLength() ; s++){
99 Node FrequencyNode = listOfFrequency.item(s);
100 NodeList textFNList = FrequencyNode.getChildNodes();
101 String TextNode = textFNList.item(0).getNodeValue();
102 int x = Integer.parseInt(TextNode);
103 totalNumber = totalNumber + x;
104 }
105 return totalNumber;
106 }
107
108 public int getDistinctNumber(String name){
109
110 ArrayList alist = new ArrayList();
111 Element ex = getRootNode(name);
112 NodeList listOfFrequency = ex.getElementsByTagName("ActualValue");
113 if(listOfFrequency.getLength()==0){return 0;}
114 for(int s=0; s<listOfFrequency.getLength() ; s++){
115 Node ActualValueNode = listOfFrequency.item(s);
116 NodeList textFNList = ActualValueNode.getChildNodes();
117 String TextNode = textFNList.item(0).getNodeValue();
118 if(!alist.contains(TextNode) && !TextNode.equals(" ")){alist.add(TextNode);}
119 }
120 return alist.size();
121 }
122
123 public int getDocumentUsedElement(String core_element){
124
125 int totalNumber = 0;
126 Element ex = getRootNode(core_element);
127 NodeList listOfFrequency = ex.getElementsByTagName("Frequency");
128
129 for(int s=0; s<listOfFrequency.getLength() ; s++){
130 totalNumber++;
131 }
132 return totalNumber;
133 }
134
135 // for all elements
136 public double Mean(String core_element){
137
138 int due = getDocumentUsedElement(core_element);
139 if(due==0 || TotalDoc==0){return 0;}
140 Double d1 = new Double(due);
141 Double d2 = new Double(TotalDoc);
142 Double result = div(d1,d2);
143 result = mul(result, new Double(100));
144 return round(result.doubleValue(),1);
145 }
146
147 public int getMinRange(String core_element){
148
149 Element ex = getRootNode(core_element);
150 NodeList listOfFrequency = ex.getElementsByTagName("Frequency");
151
152 if(listOfFrequency.getLength()==0){return 0;}
153
154 Node FrequencyNode = listOfFrequency.item(0);
155 NodeList textFNList = FrequencyNode.getChildNodes();
156 String TextNode = textFNList.item(0).getNodeValue();
157 int minNumber = 0;
158
159 if(listOfFrequency.getLength()==TotalDoc){
160 minNumber = Integer.parseInt(TextNode);
161 }
162
163 else {
164 minNumber = 0;
165 }
166
167 for(int s=0; s<listOfFrequency.getLength() ; s++){
168 FrequencyNode = listOfFrequency.item(s);
169 textFNList = FrequencyNode.getChildNodes();
170 TextNode = textFNList.item(0).getNodeValue();
171 int x = Integer.parseInt(TextNode);
172 if(x<minNumber){minNumber=x;}
173 }
174 return minNumber;
175 }
176
177 public int getMaxRange(String core_element){
178
179 Element ex = getRootNode(core_element);
180 NodeList listOfFrequency = ex.getElementsByTagName("Frequency");
181 if(listOfFrequency.getLength()==0){return 0;}
182 Node FrequencyNode = listOfFrequency.item(0);
183 NodeList textFNList = FrequencyNode.getChildNodes();
184 String TextNode = textFNList.item(0).getNodeValue();
185 int maxNumber = 0;
186
187 for(int s=0; s<listOfFrequency.getLength() ; s++){
188 FrequencyNode = listOfFrequency.item(s);
189 textFNList = FrequencyNode.getChildNodes();
190 TextNode = textFNList.item(0).getNodeValue();
191 int x = Integer.parseInt(TextNode);
192 if(x>maxNumber){maxNumber=x;}
193 }
194 return maxNumber;
195 }
196
197 public int getMode(String core_element){
198
199 Element ex = getRootNode(core_element);
200 NodeList listOfFrequency = ex.getElementsByTagName("Document");
201 if(listOfFrequency.getLength()==0){Mode = 0; return 0;}
202 ArrayList alist = new ArrayList();
203 String[] idsx = getDocumentIDs(core_element);
204
205 for(int i = 0; i<idsx.length; i++){
206 alist.add(idsx[i]);
207 }
208 int[] list = new int[TotalDoc];
209
210 for(int i = 0; i<list.length; i++){
211 list[i] = 0;
212 }
213
214 for(int s=0; s< listOfFrequency.getLength() ; s++){
215 Node docNode = listOfFrequency.item(s);
216 NamedNodeMap NodeIDMap = docNode.getAttributes();
217 Node DocNodeID = NodeIDMap.item(0);
218 String DocID = DocNodeID.getNodeValue();
219 Element xNode = (Element)docNode;
220 int location = alist.indexOf(DocID);
221 NodeList xList = xNode.getElementsByTagName("Frequency");
222 int fre = Integer.parseInt(xList.item(0).getChildNodes().item(0).getNodeValue());
223 list[location] = fre;
224 }
225
226 Arrays.sort(list);
227
228 int max_idx = 0; // Index of the maximum count
229 int max_cnt = 0;
230 int count = 0;
231
232 for ( int i = 0; i <list.length; i++) {
233 count = 0;
234 for ( int j = 0; j < list.length; j++) {
235 if (list[i] == list[j]) {
236 count++;
237 }
238 }
239 if (count > max_cnt) {
240 max_cnt = count;
241 max_idx = i;
242 }
243 }
244 Mode = list [max_idx];
245 return list [max_idx];
246
247 }
248
249 public double ModeFrequency(String core_element){
250
251 Element ex = getRootNode(core_element);
252 NodeList listOfFrequency = ex.getElementsByTagName("Document");
253 if(listOfFrequency.getLength()==0){ return 100;}
254 ArrayList alist = new ArrayList();
255 String[] idsx = getDocumentIDs(core_element);
256
257 for(int i = 0; i<idsx.length; i++){
258 alist.add(idsx[i]);
259 }
260
261 int[] list = new int[TotalDoc];
262
263 for(int i = 0; i<list.length; i++){
264 list[i] = 0;
265 }
266
267 int length = alist.size();
268 int counter = 0;
269
270 for(int s=0; s< listOfFrequency.getLength() ; s++){
271 Node docNode = listOfFrequency.item(s);
272 NamedNodeMap NodeIDMap = docNode.getAttributes();
273 Node DocNodeID = NodeIDMap.item(0);
274 String DocID = DocNodeID.getNodeValue();
275 Element xNode = (Element)docNode;
276 int location = alist.indexOf(DocID);
277 NodeList xList = xNode.getElementsByTagName("Frequency");
278 int fre = Integer.parseInt(xList.item(0).getChildNodes().item(0).getNodeValue());
279 list[location] = fre;
280 }
281
282 for(int i =0; i<list.length; i++){
283 if(list[i]==Mode){counter++;}
284 }
285
286 Double result = div(new Double(counter), new Double(length));
287 result = mul(result ,new Double (100));
288 return round(result.doubleValue(),1);
289 }
290
291 public double Median(String core_element){
292
293 Element ex = getRootNode(core_element);
294 NodeList listOfFrequency = ex.getElementsByTagName("Document");
295
296 ArrayList alist = new ArrayList();
297 String[] idsx = getDocumentIDs(core_element);
298
299 for(int i = 0; i<idsx.length; i++){
300 alist.add(idsx[i]);
301 }
302
303 int[] list = new int[TotalDoc];
304
305 for(int i = 0; i<list.length; i++){
306 list[i] = 0;
307 }
308
309 for(int s=0; s< listOfFrequency.getLength() ; s++){
310 Node docNode = listOfFrequency.item(s);
311 NamedNodeMap NodeIDMap = docNode.getAttributes();
312 Node DocNodeID = NodeIDMap.item(0);
313 String DocID = DocNodeID.getNodeValue();
314 Element xNode = (Element)docNode;
315 int location = alist.indexOf(DocID);
316 NodeList xList = xNode.getElementsByTagName("Frequency");
317 int fre = Integer.parseInt(xList.item(0).getChildNodes().item(0).getNodeValue());
318 list[location] = fre;
319 }
320
321 int length = alist.size();
322 int middle = length/2 -1;
323
324 Arrays.sort(list);
325
326 if(length % 2 == 1){
327 middle = middle + 1;
328 return list[middle];
329 }
330
331 else{
332 return round((double)(list[middle]+list[middle+1])/2,1);
333 }
334 }
335
336 // for all elements
337 public double Average(String core_element){
338
339 int t1 = getFrequency(core_element);
340 int t2 = getDocumentUsedElement(core_element);
341 if(t1==0 || t2==0){return 0;}
342 //System.out.println(core_element+" "+ t1+ " "+t2+" "+ RoundOff((float)t1/t2)+" "+(float)t1/t2);
343 Double result = div(new Double(t1),new Double(t2));
344 return round(result.doubleValue(),1);
345 }
346
347 public Object[][] AllInformation(){
348
349 int rows = nameList.size();
350 int cols = 11;
351 int y = 0;
352
353 Object[][] info = new Object[rows][cols];
354 String[] list = new String[rows];
355
356 for(int i = 0 ; i < list.length; i++){
357 list[i] = nameList.get(i).toString();
358 }
359
360 Arrays.sort(list);
361
362 for(int iu = 0; iu<list.length; iu++){
363 String xi = list[iu];
364 info[y][0] = xi ;
365 info[y][1] = new Integer(getFrequency(xi));
366 info[y][2] = new Integer(getDocumentUsedElement(xi));
367 info[y][3] = new Double(Mean(xi));
368 info[y][4] = new Double(Median(xi));
369 info[y][5] = new Integer(getDistinctNumber(xi));
370 info[y][6] = new Integer(getMinRange(xi));
371 info[y][7] = new Integer(getMaxRange(xi));
372 info[y][8] = new Double(Average(xi));
373 info[y][9] = new Integer(getMode(xi));
374 info[y][10] = ModeFrequency(xi)+"%";
375 y++;
376 }
377 return info;
378 }
379
380 public String[] getSortList(String core_element,String sort){
381
382 if(sort.equals("ASCII")){
383 HashMap hp = getDistinctValueMap (core_element);
384 String[] temp = new String[hp.size()];
385 int counter = 0;
386 Set s = hp.keySet();
387 Iterator i = s.iterator();
388
389 while(i.hasNext()){
390 temp[counter] = (String)i.next();
391 counter++;
392 }
393
394 Arrays.sort(temp);
395 return temp;
396 }
397
398 else{
399 Map m = getDistinctValueMap (core_element);
400 ArrayList outputList = sortMap(m);
401 String[] temp = new String[outputList.size()];
402
403 for(int i = 0; i< outputList.size(); i++){
404 Map.Entry entry = (Map.Entry) outputList.get(i);
405 temp[i] = (String) entry.getKey();
406 }
407
408 return temp;
409 }
410
411 }
412
413
414 /*
415 * Actually, we can directly access to "ActualValue" node instead of document node
416 */
417
418 public HashMap getDistinctValueMap(String core_element){
419
420 Element ex = getRootNode(core_element);
421 HashMap hp = new HashMap();
422 NodeList listOfFrequency = ex.getElementsByTagName("Document");
423
424 for(int s=0; s<listOfFrequency.getLength() ; s++){
425 Node docNode = listOfFrequency.item(s);
426 Element docElement = (Element)docNode;
427 NodeList valueList = docElement.getElementsByTagName("ActualValue");
428
429 for(int y = 0; y<valueList.getLength(); y++){
430 Element valueElement = (Element)valueList.item(y);
431 NodeList textFNList = valueElement.getChildNodes();
432 String text = ((Node)textFNList.item(0)).getNodeValue();
433
434 if(!text.equals(" ")){
435 if(hp.containsKey(text)){
436 Integer i = (Integer)hp.get(text);
437 int number = i.intValue();
438 number++;
439 hp.put(text,new Integer(number));
440 }
441 else{
442 Integer i = new Integer(1);
443 hp.put(text, i);
444 }
445 }
446 }
447 }
448 return hp;
449 }
450
451
452 public HashMap getDocFrequencyMap(String core_element){
453
454 Element ex = getRootNode(core_element);
455 HashMap hp = new HashMap();
456 NodeList listOfFrequency = ex.getElementsByTagName("Document");
457
458 for(int s=0; s<listOfFrequency.getLength() ; s++){
459
460 Node docNode = listOfFrequency.item(s);
461 NamedNodeMap NodeMap = docNode.getAttributes();
462 Node AttributeNode = NodeMap.item(0);
463 String att_name = AttributeNode.getNodeValue();
464
465 Element docElement = (Element)docNode;
466 NodeList valueList = docElement.getElementsByTagName("Frequency");
467 Element frequencyElement = (Element)valueList.item(0);
468 NodeList textFNList = frequencyElement.getChildNodes();
469 String text = ((Node)textFNList.item(0)).getNodeValue();
470 Integer i = new Integer(Integer.parseInt(text));
471 hp.put(att_name, i);
472 }
473 return hp;
474 }
475
476
477 public String[] getDocumentIDs(String core_element){
478
479 Element ex = getRootNode("archivedir");
480 NodeList listOfFrequency = ex.getElementsByTagName("Document");
481 String[] ids = new String[listOfFrequency.getLength()];
482
483 for(int s=0; s<listOfFrequency.getLength() ; s++){
484 Node docNode = listOfFrequency.item(s);
485 NamedNodeMap NodeMap = docNode.getAttributes();
486 Node AttributeNode = NodeMap.item(0);
487 String att_name = AttributeNode.getNodeValue();
488 ids[s] = att_name;
489 }
490 return (String[])ids.clone();
491 }
492
493 public int[] getMetadataRows(String core_element){
494 Element ex = getRootNode(core_element);
495 //ArrayList alist = ms.getIDArray();
496 ArrayList alist = new ArrayList();
497 String[] idsx = getDocumentIDs(core_element);
498
499 for(int i = 0; i<idsx.length; i++){
500 alist.add(idsx[i]);
501 }
502
503 NodeList listOfFrequency = ex.getElementsByTagName("Document");
504 int[] row = new int[TotalDoc];
505
506 for(int i = 0; i<row.length; i++){
507 row [i] = 0;
508 }
509
510 for(int s=0; s<listOfFrequency.getLength() ; s++){
511 Node docNode = listOfFrequency.item(s);
512 int location = alist.indexOf(docNode.getAttributes().item(0).getNodeValue());
513 row[location] =1;
514
515 }
516 return row;
517 }
518
519 /**
520 * This method will use Arrays.sort for sorting Map
521 * @param map
522 * @return outputList of Map.Entries
523 */
524
525 public ArrayList sortMap(Map map) {
526 ArrayList outputList = null;
527 int count = 0;
528 Set set = null;
529 Map.Entry[] entries = null;
530 // Logic:
531 // get a set from Map
532 // Build a Map.Entry[] from set
533 // Sort the list using Arrays.sort
534 // Add the sorted Map.Entries into arrayList and return
535
536 set = (Set) map.entrySet();
537 Iterator iterator = set.iterator();
538 entries = new Map.Entry[set.size()];
539 while(iterator.hasNext()) {
540 entries[count++] = (Map.Entry) iterator.next();
541 }
542
543 // Sort the entries with your own comparator for the values:
544 Arrays.sort(entries, new Comparator() {
545 public int compareTo(Object lhs, Object rhs) {
546 Map.Entry le = (Map.Entry)lhs;
547 Map.Entry re = (Map.Entry)rhs;
548 return ((Comparable)le.getValue()).compareTo((Comparable)re.getValue());
549 }
550
551 public int compare(Object lhs, Object rhs) {
552 Map.Entry le = (Map.Entry)lhs;
553 Map.Entry re = (Map.Entry)rhs;
554 return ((Comparable)le.getValue()).compareTo((Comparable)re.getValue());
555 }
556 });
557
558 outputList = new ArrayList();
559 for(int i = 0; i < entries.length; i++) {
560 outputList.add(entries[i]);
561 }
562 return outputList;
563 }//End of sortMap
564
565 private Double div(Double d1, Double d2){
566 BigDecimal b1 = new BigDecimal(d1.toString());
567 BigDecimal b2 = new BigDecimal(d2.toString());
568 return new Double(b1.divide(b2,DEF_DIV_SCALE,BigDecimal.ROUND_HALF_UP).doubleValue());
569 }
570
571 private Double mul(Double d1,Double d2){
572 BigDecimal b1 = new BigDecimal(d1.toString());
573 BigDecimal b2 = new BigDecimal(d2.toString());
574 return new Double(b1.multiply(b2).doubleValue());
575 }
576
577 public double round(double v,int scale){
578 if(scale<0){
579 throw new IllegalArgumentException(
580 "The scale must be a positive integer or zero");
581 }
582 BigDecimal b = new BigDecimal(Double.toString(v));
583 BigDecimal one = new BigDecimal("1");
584 return b.divide(one,scale,BigDecimal.ROUND_HALF_UP).doubleValue();
585 }
586
587 public double getSingleMetadataSetCompleteness(ArrayList mds_list){
588
589 int totalElement = 0;
590 int totalElementUsed = 0;
591 //System.out.println(mds_list.size()+"here1");
592 for(int a = 0; a<mds_list.size(); a++){
593 MetadataSet mds = (MetadataSet)mds_list.get(a);
594 ArrayList alist = mds.getIndexsList();
595 int length = alist.size();
596 //System.out.println(length+"???");
597 /*
598 String name = (String)alist.get(0);
599 Element ex = getRootNode(name);
600 NodeList nList = ex.getElementsByTagName("Document");
601 System.out.println(ms.getDocNum()+" total number");
602 */
603 totalElement = totalElement + length * ms.getDocNum();
604
605 for(int i = 0; i<alist.size(); i++){
606 String name = (String)alist.get(i);
607 totalElementUsed = totalElementUsed + getDocumentUsedElement(name);
608 }
609 }
610 double x = (double)totalElementUsed/totalElement;
611 Double d1 = new Double(x);
612 Double d2 = new Double(100);
613 Double result = mul(d1,d2);
614 return round(result.doubleValue(),1);
615 }
616
617 public Object[][] getMetadataInfo(MetadataSet mds){
618
619 ArrayList alist = mds.getIndexsList();
620 int rows = alist.size();
621 int cols = 11;
622 int y = 0;
623 Object[][] dataset = new Object[rows][cols];
624
625 String[] list = new String[rows];
626
627 for(int i = 0 ; i < list.length; i++){
628 list[i] = alist.get(i).toString();
629 //System.out.println(list[i]);
630 }
631
632 Arrays.sort(list);
633
634 for(int iu = 0; iu<list.length; iu++){
635 String xi = list[iu];
636 dataset[y][0] = xi ;
637 dataset[y][1] = new Integer(getFrequency(xi));
638 dataset[y][2] = new Integer(getDocumentUsedElement(xi));
639 dataset[y][3] = new Double (Mean(xi));
640 dataset[y][4] = new Double (Median(xi));
641 dataset[y][5] = new Integer(getDistinctNumber(xi));
642 dataset[y][6] = new Integer(getMinRange(xi));
643 dataset[y][7] = new Integer(getMaxRange(xi));
644 dataset[y][8] = new Double (Average(xi));
645 dataset[y][9] = new Integer(getMode(xi));
646 dataset[y][10] = ModeFrequency(xi)+"%";
647 y++;
648
649 }
650 return dataset;
651 }
652
653 public boolean IsElementEmpty(String core_element){
654
655 int[] list = getMetadataRows(core_element);
656 boolean status = true;
657
658 for(int i = 0; i< list.length; i++){
659 if(list[i]==1){status = false;}
660 }
661 return status;
662 }
663
664 public boolean IsElementFull(String core_element){
665
666 int[] list = getMetadataRows(core_element);
667 boolean status = true;
668
669 for(int i = 0; i< list.length; i++){
670 if(list[i]==0){status = false;}
671 }
672 return status;
673 }
674
675 public ArrayList removeDocument(ArrayList dataset, String[] ids, int number){
676 removedID = new ArrayList();
677 //System.out.println(dataset.size()+"ffff "+ ids.length+" "+number);
678 int[] metadataLevel =(int[])dataset.get(0);
679 int docIDslength = metadataLevel.length;
680 int[][] valueMap = new int[dataset.size()][docIDslength];
681 //System.out.println(ids.length+ " <ids docLength>"+docIDslength+ " "+ dataset.size());
682 for(int i = 0; i< docIDslength; i++){
683
684 boolean status = true;
685
686 for(int j = 0; j<dataset.size(); j++){
687 int[] metadataLevelArray = (int[])dataset.get(j);
688 valueMap[j][i] = metadataLevelArray[i];
689 if(metadataLevelArray[i]!=number){status = false;}
690 //System.out.println(j+" jjj ");
691 }
692 if(status == true){
693 //System.out.println(i + " docID rm");
694 for(int j = 0; j<dataset.size(); j++){
695 //int[] metadataLevelArray = (int[])dataset.get(j);
696 //System.out.print(metadataLevelArray[i]+" uid ");
697 valueMap[j][i]=-1;
698 //System.out.println(" removed id " + ids[i]);
699 }
700 removedID.add(ids[i]);
701 }
702 }
703 ArrayList wholeList = new ArrayList();
704
705 for(int i = 0; i<valueMap.length; i++){
706 ArrayList numberList = new ArrayList();
707
708 for(int j = 0; j<valueMap[i].length; j++){
709 numberList.add(new Integer(valueMap[i][j]));
710 }
711 wholeList.add(numberList);
712 }
713
714 for(int i =0; i< wholeList.size(); i++){
715 ArrayList numberList = (ArrayList)wholeList.get(i);
716 Integer value = new Integer(-1);
717 while(numberList.contains(value)){
718 numberList.remove(value);
719 }
720 int[] valueList = new int [numberList.size()];
721
722 for(int j = 0; j< valueList.length; j++){
723 valueList[j] = ((Integer)numberList.get(j)).intValue();
724 }
725 wholeList.remove(i);
726 wholeList.add(i,valueList);
727 }
728 //System.out.println(removedID.size()+" remove id size");
729 return wholeList;
730 }
731
732 public ArrayList getRemovedID(){
733 return removedID;
734 }
735
736/////////////////////////////////////////////////////////////////////////////////////
737
738 public HashMap getLinks(String[] args, String core_element){
739 Element ex = getRootNode(core_element);
740 HashMap hp = new HashMap();
741 NodeList listOfFrequency = ex.getElementsByTagName("Document");
742 ArrayList tempList = new ArrayList();
743 String urlFile = "dc.Identifier";
744
745 for(int i = 0; i<args.length; i++){
746 tempList.add(args[i]);
747 }
748
749 for(int s=0; s<listOfFrequency.getLength() ; s++){
750 Node docNode = listOfFrequency.item(s);
751 Element docElement = (Element)docNode;
752 NodeList valueList = docElement.getElementsByTagName("ActualValue");
753
754 if(valueList.getLength()==1){
755 String id = docNode.getAttributes().item(0).getNodeValue();
756
757 Element valueElement = (Element)valueList.item(0);
758 NodeList textFNList = valueElement.getChildNodes();
759 String text = ((Node)textFNList.item(0)).getNodeValue();
760
761 if(tempList.contains(text)){
762 hp.put(id, text);
763 }
764 }
765 }
766
767 ex = getRootNode(urlFile);
768 listOfFrequency = ex.getElementsByTagName("Document");
769 HashMap newHp = new HashMap();
770
771 for(int s=0; s<listOfFrequency.getLength() ; s++){
772 Node docNode = listOfFrequency.item(s);
773 Element docElement = (Element)docNode;
774 NodeList valueList = docElement.getElementsByTagName("ActualValue");
775
776 if(valueList.getLength()==1){
777 String id = docNode.getAttributes().item(0).getNodeValue();
778
779 Element valueElement = (Element)valueList.item(0);
780 NodeList textFNList = valueElement.getChildNodes();
781 String text = ((Node)textFNList.item(0)).getNodeValue();
782 newHp.put(text, id);
783 }
784 }
785
786 HashMap tempMap = new HashMap();
787 Collection c = hp.values();
788 Iterator i = c.iterator();
789 while(i.hasNext()){
790 String id = (String)i.next();
791 if(newHp.containsKey(id)){
792 String text = (String)newHp.get(id);
793 if(text.contains("http")){
794 tempMap.put((String)tempMap.get(id),text);
795 }
796 }
797 }
798
799 return tempMap;
800 }
801
802 //////////////////////////////////////////////////////////////////
803
804 /////////////////////////////////////////////////////////////////////////////////////
805
806 public ArrayList getURLMap(String elementName){
807
808
809 //String core_element ="dc.Identifier";
810 String core_element =elementName;
811 //printWriter.write("before opening doc");
812 Element ex = getRootNode(core_element);
813 //printWriter.write("after opening doc");
814 //printWriter.write(ex.equals(null)+"");
815 ArrayList alist = new ArrayList();
816 //printWriter.write(core_element + ex.getChildNodes().getLength());
817 //System.out.println("document lenght "+ex.getChildNodes().getLength());
818 if(ex.equals(null)){return new ArrayList();}
819 NodeList listOfFrequency = ex.getElementsByTagName("Document");
820
821 if(listOfFrequency.getLength()==0){return new ArrayList();}
822 for(int s=0; s<listOfFrequency.getLength() ; s++){
823 Node docNode = listOfFrequency.item(s);
824 Element docElement = (Element)docNode;
825 NodeList valueList = docElement.getElementsByTagName("ActualValue");
826
827 for(int y = 0; y<valueList.getLength(); y++){
828 Element valueElement = (Element)valueList.item(y);
829 NodeList textFNList = valueElement.getChildNodes();
830 String text = ((Node)textFNList.item(0)).getNodeValue();
831
832 if(!text.equals(" ")){
833 NamedNodeMap NodeIDMap = docNode .getAttributes();
834 Node DocNodeID = NodeIDMap.item(0);
835 String DocID = DocNodeID.getNodeValue();
836 if(alist.contains(DocID)){}
837 else{
838 alist.add(DocID);
839 }
840 }
841 }
842 }
843 return alist;
844}
845
846 //////////////////////////////////////////////////////////////////////////////////////
847
848
849 /////////////////////////////////////////////////////////////////////////////
850 public HashMap getIdentifierLink(String core_element){
851
852 Element ex = getRootNode(core_element);
853 HashMap hp = new HashMap();
854 NodeList listOfFrequency = ex.getElementsByTagName("Document");
855
856 for(int s=0; s<listOfFrequency.getLength() ; s++){
857 Node docNode = listOfFrequency.item(s);
858 String HashID = docNode.getAttributes().item(0).getNodeValue();
859 //System.out.println(HashID);
860 Element docElement = (Element)docNode;
861 NodeList valueList = docElement.getElementsByTagName("ActualValue");
862
863 for(int y = 0; y<valueList.getLength(); y++){
864 Element valueElement = (Element)valueList.item(y);
865 NodeList textFNList = valueElement.getChildNodes();
866 String text = ((Node)textFNList.item(0)).getNodeValue();
867
868 if(!text.equals(" ") && text.startsWith("http:")){
869 if(hp.containsKey(HashID)){
870 InternalLink il = (InternalLink)hp.get(HashID);
871 il.increaseElement(text);
872 hp.put(HashID,il);
873 }
874 else{
875 InternalLink il = new InternalLink();
876 il.setValue(HashID);
877 il.increaseElement(text);
878 hp.put(HashID, il);
879 }
880 }
881 }
882 }
883 return hp;
884 }
885 ///////////////////////////////////////////////////////////////////////////////
886 public String[] getDocumentIDList(String core_element){
887
888 Element ex = getRootNode(core_element);
889 NodeList listOfFrequency = ex.getElementsByTagName("Document");
890 String[] ids = new String[listOfFrequency.getLength()];
891
892 for(int s=0; s<listOfFrequency.getLength() ; s++){
893 Node docNode = listOfFrequency.item(s);
894 NamedNodeMap NodeMap = docNode.getAttributes();
895 Node AttributeNode = NodeMap.item(0);
896 String att_name = AttributeNode.getNodeValue();
897 ids[s] = att_name;
898 }
899 return (String[])ids.clone();
900 }
901 ///////////////////////////////////////////////////////////////////////////////////
902 public HashMap getInternalIdentifier(String core_element){
903
904 Element ex = getRootNode(core_element);
905 HashMap hp = new HashMap();
906 NodeList listOfFrequency = ex.getElementsByTagName("Document");
907
908 for(int s=0; s<listOfFrequency.getLength() ; s++){
909 Node docNode = listOfFrequency.item(s);
910 String HashID = docNode.getAttributes().item(0).getNodeValue();
911 //System.out.println(HashID);
912 Element docElement = (Element)docNode;
913 NodeList valueList = docElement.getElementsByTagName("ActualValue");
914
915 for(int y = 0; y<valueList.getLength(); y++){
916 Element valueElement = (Element)valueList.item(y);
917 NodeList textFNList = valueElement.getChildNodes();
918 String text = ((Node)textFNList.item(0)).getNodeValue();
919
920 if(!text.equals(" ")){
921 if(hp.containsKey(text)){
922 InternalLink il = (InternalLink)hp.get(text);
923 il.increaseElement(HashID);
924 hp.put(text,il);
925 }
926 else{
927 InternalLink il = new InternalLink();
928 il.setValue(text);
929 il.increaseElement(HashID);
930 hp.put(text, il);
931 }
932 }
933 }
934 }
935 return hp;
936 }
937 ///////////////////////////////////////////////////////////////////////////////////
938
939
940 //////////////////////////////////////////////////////////////////////extra
941 public HashMap getIdentifierLinkNoIdentifier(){
942
943 Element ex = getRootNode("archivedir");
944 HashMap hp = new HashMap();
945 NodeList listOfFrequency = ex.getElementsByTagName("Document");
946 String url ="No Source Available";
947 for(int s=0; s<listOfFrequency.getLength() ; s++){
948 Node docNode = listOfFrequency.item(s);
949 String HashID = docNode.getAttributes().item(0).getNodeValue();
950 //hp.put(HashID,url);
951 InternalLink il = new InternalLink();
952 il.increaseElement(HashID);
953 hp.put(HashID, il);
954 }
955 return hp;
956 }
957 /////////////////////////////////////////////////////////////////////////////
958
959}
Note: See TracBrowser for help on using the repository browser.