source: gs3-extensions/mat/trunk/src/org/greenstone/mat/PrintHTML.java@ 21927

Last change on this file since 21927 was 21927, checked in by sjm84, 14 years ago

Renamed package to org.greenstone.mat from org.greenstone.gsdl3_extension.mat

File size: 100.5 KB
Line 
1package org.greenstone.mat;
2
3import java.io.*;
4import java.text.SimpleDateFormat;
5import java.util.ArrayList;
6import java.util.Calendar;
7import java.util.Collection;
8import java.util.HashMap;
9import java.util.Iterator;
10import java.util.Map;
11import java.util.Set;
12
13import javax.xml.parsers.DocumentBuilder;
14import javax.xml.parsers.DocumentBuilderFactory;
15
16import org.w3c.dom.Document;
17import org.w3c.dom.Element;
18import org.w3c.dom.Node;
19import org.w3c.dom.NodeList;
20
21import org.greenstone.gsdl3.util.GlobalProperties;
22
23public class PrintHTML {
24
25 final String NORMAL = "normal";
26 final String WORST = "worst";
27 final String BEST = "best";
28
29 final String title1 ="Hide Empty Metadata ,";
30 final String title2 ="Hide Completed Metadata ,";
31 final String title3 ="Hide Documents with Empty Metadata ,";
32 final String title4 ="Hide Documents with Completed Metadata ,";
33 final String title5 ="No Available Graph";
34
35 final String metadataSet1 = "Dublin Core";
36 final String metadataSet2 = "Extracted";
37
38 final String spaceLeft = "<I>&laquo;";
39 final String spaceRight = "&raquo;</I>";
40 final String htmlSpace = "&nbsp;";
41 final String oddChar ="odd Character";
42
43 String linkIdentifier = "";
44 //String cssString = "<link rel=\"stylesheet\" href=\"http://www.nzdl.org/greenstone3/mat/script/mat.css\" type=\"text/css\" >";
45
46 final String space ="space";
47 private HashMap url;
48
49 String destination ="";
50 MetadataStats mds;
51 DataMaker dm;
52 Calendar cl;
53 SimpleDateFormat sdf;
54
55 public PrintHTML(MetadataStats ms){
56
57 mds = ms;
58
59 File current_dir = new File (".");
60 try{
61 destination = mds.HTMLDirectory;
62 }catch(Exception ex){
63 ex.printStackTrace();
64 }
65
66 dm = new DataMaker(mds);
67 cl=Calendar.getInstance();
68 linkIdentifier = "dc.Identifier";
69
70 sdf = new SimpleDateFormat(" dd MMM yyyy 'at' HH:mm:ss z 'GMT'Z");
71 setupMetadataLink("metadataElementURL");
72 }
73
74 private ArrayList getHideEmptyMetadataDetail(ArrayList dataset, String[] id, String[] metadataName){
75
76 if(id.length==0 || metadataName.length==0){
77 return new ArrayList();
78 }
79
80 ArrayList detailList = new ArrayList();
81 ArrayList alist = new ArrayList();
82 ArrayList nameList = new ArrayList();
83
84 for(int i = 0; i< metadataName.length; i++){
85 if(!dm.IsElementEmpty(metadataName[i])){
86 alist.add(dm.getMetadataRows(metadataName[i]));
87 nameList.add(metadataName[i]);
88 }
89 }
90
91 String[] metaDataElementName = new String[nameList.size()];
92
93 for(int i = 0; i< nameList.size(); i++){
94 metaDataElementName[i] = nameList.get(i).toString();
95 }
96
97 if(id.length==0 || metaDataElementName.length==0){
98 return new ArrayList();
99 }
100
101 detailList.add(alist);
102 detailList.add(id);
103 detailList.add(metaDataElementName);
104 return detailList;
105 }
106
107 private ArrayList getHideFullMetadataDetail(ArrayList dataset, String[] id, String[] metadataName){
108
109 if(id.length==0 || metadataName.length==0){
110 return new ArrayList();
111 }
112
113 ArrayList detailList = new ArrayList();
114 ArrayList alist = new ArrayList();
115 ArrayList nameList = new ArrayList();
116
117 for(int i = 0; i< metadataName.length; i++){
118 if(!dm.IsElementFull(metadataName[i])){
119 alist.add(dm.getMetadataRows(metadataName[i]));
120 nameList.add(metadataName[i]);
121 }
122 }
123
124 String[] metaDataElementName = new String[nameList.size()];
125
126 for(int i = 0; i< nameList.size(); i++){
127 metaDataElementName[i] = nameList.get(i).toString();
128 }
129
130 if(id.length==0 || metaDataElementName.length==0){
131 return new ArrayList();
132 }
133
134 detailList.add(alist);
135 detailList.add(id);
136 detailList.add(metaDataElementName);
137
138 return detailList;
139 }
140
141 private ArrayList getHideEmptyDocumentDetail(ArrayList dataset, String[] id, String[] metadataName){
142
143 if(id.length==0 || metadataName.length==0){
144 return new ArrayList();
145 }
146
147 ArrayList detailList = new ArrayList();
148 ArrayList alist = dm.removeDocument(dataset, id, 0);
149 ArrayList idList = dm.getRemovedID();
150 ArrayList newIDList = new ArrayList();
151
152 for(int i = 0; i< id.length; i++){
153 if(!idList.contains(id[i])){
154 newIDList.add(id[i]);
155 }
156 }
157
158 id = new String[newIDList.size()];
159
160 for(int i = 0; i<newIDList.size(); i++){
161 id[i] = newIDList.get(i).toString();
162 }
163
164 if(id.length==0 || metadataName.length==0){
165 return new ArrayList();
166 }
167
168 detailList.add(alist);
169 detailList.add(id);
170 detailList.add(metadataName);
171
172 return detailList;
173 }
174
175 private ArrayList getHideFullDocumentDetail(ArrayList dataset, String[] id, String[] metadataName){
176
177 if(id.length==0 || metadataName.length==0){
178 return new ArrayList();
179 }
180
181 ArrayList detailList = new ArrayList();
182 ArrayList alist = dm.removeDocument(dataset, id ,1);
183 ArrayList idList = dm.getRemovedID();
184 ArrayList newIDList = new ArrayList();
185
186 for(int i = 0; i< id.length; i++){
187 if(!idList.contains(id[i])){
188 newIDList.add(id[i]);
189 }
190 }
191
192 id = new String[newIDList.size()];
193
194 for(int i = 0; i<newIDList.size(); i++){
195 id[i] = newIDList.get(i).toString();
196 }
197
198 if(id.length==0 || metadataName.length==0){
199 return new ArrayList();
200 }
201
202 detailList.add(alist);
203 detailList.add(id);
204 detailList.add(metadataName);
205
206 return detailList;
207 }
208
209
210 public void generateAllPossibleGraph(ArrayList Namelist,String[]ids,String[] names,String condition){
211
212 ArrayList dataList= Namelist;
213 String[] id = ids;
214 String[] metadataName = names;
215
216 ArrayList alist = new ArrayList();
217 ArrayList detailList = new ArrayList();
218 ArrayList new_dataList = new ArrayList();
219 String[] idList;
220 String[] metadataNameList;
221
222 //-------------------SS
223 //-----generate SSSS
224 if(id.length==0 || metadataName.length==0){
225 generateEmptyGraph("SSSS",WORST,condition);
226 generateEmptyGraph("SSSS",BEST,condition);
227 }
228 else{
229 new_dataList = transformDataList(dataList,id);
230
231 WriteHTML("SSSS",new_dataList,id,metadataName,WORST,"Show completed graph",condition);
232 WriteHTML("SSSS",new_dataList,id,metadataName,BEST,"Show completed graph",condition);
233 }
234
235 //-----SSSH
236 detailList = getHideFullDocumentDetail(dataList,id,metadataName);
237
238 if(detailList.size()==0){
239 generateEmptyGraph("SSSH",WORST,condition);
240 generateEmptyGraph("SSSH",BEST,condition);
241 }
242 else{
243 alist = (ArrayList)detailList.get(0);
244 idList = (String[])detailList.get(1);
245 metadataNameList = (String[])detailList.get(2);
246 new_dataList = transformDataList(alist,idList);
247
248 WriteHTML("SSSH",new_dataList,idList,metadataNameList,WORST,title4,condition);
249 WriteHTML("SSSH",new_dataList,idList,metadataNameList,BEST,title4,condition);
250 }
251
252 //-----SSHS
253 detailList = getHideEmptyDocumentDetail(dataList,id,metadataName);
254 if(detailList.size()==0){
255 generateEmptyGraph("SSHS",WORST,condition);
256 generateEmptyGraph("SSHS",BEST,condition);
257 generateEmptyGraph("SSHH",WORST,condition);
258 generateEmptyGraph("SSHH",BEST,condition);
259 }
260 else{
261 alist = (ArrayList)detailList.get(0);
262 idList = (String[])detailList.get(1);
263 metadataNameList = (String[])detailList.get(2);
264 new_dataList = transformDataList(alist,idList);
265
266 WriteHTML("SSHS",new_dataList,idList,metadataNameList,WORST,title3,condition);
267 WriteHTML("SSHS",new_dataList,idList,metadataNameList,BEST,title3,condition);
268
269 //-----SSHH
270 detailList = getHideFullDocumentDetail(alist,idList,metadataNameList);
271 if(detailList.size()==0){
272 generateEmptyGraph("SSHH",WORST,condition);
273 generateEmptyGraph("SSHH",BEST,condition);
274 }
275 else{
276 alist = (ArrayList)detailList.get(0);
277 idList = (String[])detailList.get(1);
278 metadataNameList = (String[])detailList.get(2);
279 new_dataList = transformDataList(alist,idList);
280
281 WriteHTML("SSHH",new_dataList,idList,metadataNameList,WORST,title3+title4,condition);
282 WriteHTML("SSHH",new_dataList,idList,metadataNameList,BEST,title3+title4,condition);
283 }
284 }
285
286
287 //-------------------SH
288 ArrayList xList = new ArrayList();
289 String[] idListCopy;
290 String[] metadataNameListCopy;
291
292 //-----SHSS
293 detailList = getHideFullMetadataDetail(dataList,id,metadataName);
294
295 if(detailList.size()==0){
296 generateEmptyGraph("SHSS",WORST,condition);
297 generateEmptyGraph("SHSS",BEST,condition);
298
299 generateEmptyGraph("SHSH",WORST,condition);
300 generateEmptyGraph("SHSH",BEST,condition);
301
302 generateEmptyGraph("SHHS",WORST,condition);
303 generateEmptyGraph("SHHS",BEST,condition);
304
305 generateEmptyGraph("SHHH",WORST,condition);
306 generateEmptyGraph("SHHH",BEST,condition);
307 }
308 else{
309 alist = (ArrayList)detailList.get(0);
310 idList = (String[])detailList.get(1);
311 metadataNameList = (String[])detailList.get(2);
312
313 xList = (ArrayList)alist.clone();
314 idListCopy = (String[]) idList.clone();
315 metadataNameListCopy = (String[])metadataNameList.clone();
316 new_dataList = transformDataList(alist,idList);
317
318 WriteHTML("SHSS",new_dataList,idList,metadataNameList,WORST,title2,condition);
319 WriteHTML("SHSS",new_dataList,idList,metadataNameList,BEST,title2,condition);
320
321 //-----SHHS
322 detailList = getHideEmptyDocumentDetail((ArrayList)xList.clone(),(String[])idListCopy.clone(),(String[])metadataNameListCopy.clone());
323
324 if(detailList.size()==0){
325 generateEmptyGraph("SHHS",WORST,condition);
326 generateEmptyGraph("SHHS",BEST,condition);
327
328 generateEmptyGraph("SHHH",WORST,condition);
329 generateEmptyGraph("SHHH",BEST,condition);
330 }
331 else{
332 alist = (ArrayList)detailList.get(0);
333 idList = (String[])detailList.get(1);
334 metadataNameList = (String[])detailList.get(2);
335 new_dataList = transformDataList(alist,idList);
336
337 WriteHTML("SHHS",new_dataList,idList,metadataNameList,WORST,title2+title3,condition);
338 WriteHTML("SHHS",new_dataList,idList,metadataNameList,BEST,title2+title3,condition);
339
340 //-----SHHH
341 detailList = getHideFullDocumentDetail(alist,idList,metadataNameList);
342
343 if(detailList.size()==0){
344 generateEmptyGraph("SHHH",WORST,condition);
345 generateEmptyGraph("SHHH",BEST,condition);
346 }
347 else{
348 alist = (ArrayList)detailList.get(0);
349 idList = (String[])detailList.get(1);
350 metadataNameList = (String[])detailList.get(2);
351 new_dataList = transformDataList(alist,idList);
352
353 WriteHTML("SHHH",new_dataList,idList,metadataNameList,WORST,title2+title3+title4,condition);
354 WriteHTML("SHHH",new_dataList,idList,metadataNameList,BEST,title2+title3+title4,condition);
355 }
356 }
357
358 //-----SHSH
359 detailList = getHideFullDocumentDetail((ArrayList)xList.clone(),(String[])idListCopy.clone(),(String[])metadataNameListCopy.clone());
360
361 if(detailList.size()==0){
362 generateEmptyGraph("SHSH",WORST,condition);
363 generateEmptyGraph("SHSH",BEST,condition);
364 }
365 else{
366 alist = (ArrayList)detailList.get(0);
367 idList = (String[])detailList.get(1);
368 metadataNameList = (String[])detailList.get(2);
369 new_dataList = transformDataList(alist,idList);
370
371 WriteHTML("SHSH",new_dataList,idList,metadataNameList,WORST,title2+title4,condition);
372 WriteHTML("SHSH",new_dataList,idList,metadataNameList,BEST,title2+title4,condition);
373 }
374 }
375
376 //-------------------HS
377 ArrayList xList1 = new ArrayList();
378 String[] idListCopy1;
379 String[] metadataNameListCopy1;
380
381 //-----HSSS
382 detailList = getHideEmptyMetadataDetail(dataList,id,metadataName);
383
384 if(detailList.size()==0){
385 generateEmptyGraph("HSSS",WORST,condition);
386 generateEmptyGraph("HSSS",BEST,condition);
387
388 generateEmptyGraph("HSHS",WORST,condition);
389 generateEmptyGraph("HSHS",BEST,condition);
390
391 generateEmptyGraph("HSHH",WORST,condition);
392 generateEmptyGraph("HSHH",BEST,condition);
393
394 generateEmptyGraph("HSSH",WORST,condition);
395 generateEmptyGraph("HSSH",BEST,condition);
396 }
397 else{
398 alist = (ArrayList)detailList.get(0);
399 idList = (String[])detailList.get(1);
400 metadataNameList = (String[])detailList.get(2);
401
402 xList1 = (ArrayList)alist.clone();
403 idListCopy1 = (String[]) idList.clone();
404 metadataNameListCopy1 = (String[])metadataNameList.clone();
405 new_dataList = transformDataList(alist,idList);
406
407 WriteHTML("HSSS",new_dataList,idList,metadataNameList,WORST,title1,condition);
408 WriteHTML("HSSS",new_dataList,idList,metadataNameList,BEST,title1,condition);
409
410 //-----HSHS
411 detailList = getHideEmptyDocumentDetail((ArrayList)xList1.clone(),(String[])idListCopy1.clone(),(String[])metadataNameListCopy1.clone());
412
413 if(detailList.size()==0){
414 generateEmptyGraph("HSHS",WORST,condition);
415 generateEmptyGraph("HSHS",BEST,condition);
416
417 generateEmptyGraph("HSHH",WORST,condition);
418 generateEmptyGraph("HSHH",BEST,condition);
419 }
420 else{
421 alist = (ArrayList)detailList.get(0);
422 idList = (String[])detailList.get(1);
423 metadataNameList = (String[])detailList.get(2);
424 new_dataList = transformDataList(alist,idList);
425
426 WriteHTML("HSHS",new_dataList,idList,metadataNameList,WORST,title1+title3,condition);
427 WriteHTML("HSHS",new_dataList,idList,metadataNameList,BEST,title1+title3,condition);
428
429 //-----HSHH
430 detailList = getHideFullDocumentDetail(alist,idList,metadataNameList);
431 if(detailList.size()==0){
432 generateEmptyGraph("HSHH",WORST,condition);
433 generateEmptyGraph("HSHH",BEST,condition);
434 }
435 else{
436 alist = (ArrayList)detailList.get(0);
437 idList = (String[])detailList.get(1);
438 metadataNameList = (String[])detailList.get(2);
439 new_dataList = transformDataList(alist,idList);
440
441 WriteHTML("HSHH",new_dataList,idList,metadataNameList,WORST,title1+title3+title4,condition);
442 WriteHTML("HSHH",new_dataList,idList,metadataNameList,BEST,title1+title3+title4,condition);
443 }
444 }
445
446 //-----HSSH
447 detailList = getHideFullDocumentDetail((ArrayList)xList1.clone(),(String[])idListCopy1.clone(),(String[])metadataNameListCopy1.clone());
448
449 if(detailList.size()==0){
450 generateEmptyGraph("HSSH",WORST,condition);
451 generateEmptyGraph("HSSH",BEST,condition);
452 }
453 else{
454 alist = (ArrayList)detailList.get(0);
455 idList = (String[])detailList.get(1);
456 metadataNameList = (String[])detailList.get(2);
457 new_dataList = transformDataList(alist,idList);
458
459 WriteHTML("HSSH",new_dataList,idList,metadataNameList,WORST,title1+title4,condition);
460 WriteHTML("HSSH",new_dataList,idList,metadataNameList,BEST,title1+title4,condition);
461 }
462 }
463
464 //-------------------HH
465 ArrayList xList2 = new ArrayList();
466 String[] idListCopy2;
467 String[] metadataNameListCopy2;
468
469 //-----HHSS
470 detailList = getHideEmptyMetadataDetail(dataList,id,metadataName);
471
472 if(detailList.size()==0){
473 generateEmptyGraph("HHSS",WORST,condition);
474 generateEmptyGraph("HHSS",BEST,condition);
475
476 generateEmptyGraph("HHHS",WORST,condition);
477 generateEmptyGraph("HHHS",BEST,condition);
478
479 generateEmptyGraph("HHHH",WORST,condition);
480 generateEmptyGraph("HHHH",BEST,condition);
481
482 generateEmptyGraph("HHSH",WORST,condition);
483 generateEmptyGraph("HHSH",BEST,condition);
484 }
485 else{
486 alist = (ArrayList)detailList.get(0);
487 idList = (String[])detailList.get(1);
488 metadataNameList = (String[])detailList.get(2);
489 detailList = getHideFullMetadataDetail(alist,idList,metadataNameList);
490
491 if(detailList.size()==0){
492 generateEmptyGraph("HHSS",WORST,condition);
493 generateEmptyGraph("HHSS",BEST,condition);
494
495 generateEmptyGraph("HHHS",WORST,condition);
496 generateEmptyGraph("HHHS",BEST,condition);
497
498 generateEmptyGraph("HHHH",WORST,condition);
499 generateEmptyGraph("HHHH",BEST,condition);
500
501 generateEmptyGraph("HHSH",WORST,condition);
502 generateEmptyGraph("HHSH",BEST,condition);
503 }
504 else{
505 alist = (ArrayList)detailList.get(0);
506 idList = (String[])detailList.get(1);
507 metadataNameList = (String[])detailList.get(2);
508
509 xList2 = (ArrayList)alist.clone();
510 idListCopy2 = (String[]) idList.clone();
511 metadataNameListCopy2 = (String[])metadataNameList.clone();
512 new_dataList = transformDataList(alist,idList);
513
514 WriteHTML("HHSS",new_dataList,idList,metadataNameList,WORST,title1+title2,condition);
515 WriteHTML("HHSS",new_dataList,idList,metadataNameList,BEST,title1+title2,condition);
516
517 //----- HHSH
518 detailList = getHideFullDocumentDetail((ArrayList)xList2.clone(),(String[])idListCopy2.clone(),(String[])metadataNameListCopy2.clone());
519
520 if(detailList.size()==0){
521 generateEmptyGraph("HHSH",WORST,condition);
522 generateEmptyGraph("HHSH",BEST,condition);
523 }
524 else{
525 alist = (ArrayList)detailList.get(0);
526 idList = (String[])detailList.get(1);
527 metadataNameList = (String[])detailList.get(2);
528 new_dataList = transformDataList(alist,idList);
529
530 WriteHTML("HHSH",new_dataList,idList,metadataNameList,WORST,title1+title2+title4,condition);
531 WriteHTML("HHSH",new_dataList,idList,metadataNameList,BEST,title1+title2+title4,condition);
532 }
533 //-----HHHS
534 detailList = getHideEmptyDocumentDetail((ArrayList)xList2.clone(),(String[])idListCopy2.clone(),(String[])metadataNameListCopy2.clone());
535
536 if(detailList.size()==0){
537 generateEmptyGraph("HHHS",WORST,condition);
538 generateEmptyGraph("HHHS",BEST,condition);
539 generateEmptyGraph("HHHH",WORST,condition);
540 generateEmptyGraph("HHHH",BEST,condition);
541 }
542 else{
543 alist = (ArrayList)detailList.get(0);
544 idList = (String[])detailList.get(1);
545 metadataNameList = (String[])detailList.get(2);
546 new_dataList = transformDataList(alist,idList);
547
548 WriteHTML("HHHS",new_dataList,idList,metadataNameList,WORST,title1+title2+title3,condition);
549 WriteHTML("HHHS",new_dataList,idList,metadataNameList,BEST,title1+title2+title3,condition);
550
551 //-----HHHH
552 detailList = getHideFullDocumentDetail((ArrayList)alist.clone(),(String[])idList.clone(),(String[])metadataNameList.clone());
553
554 if(detailList.size()==0){
555 generateEmptyGraph("HHHH",WORST,condition);
556 generateEmptyGraph("HHHH",BEST,condition);
557 }
558 else{
559 alist = (ArrayList)detailList.get(0);
560 idList = (String[])detailList.get(1);
561 metadataNameList = (String[])detailList.get(2);
562 new_dataList = transformDataList(alist,idList);
563
564 WriteHTML("HHHH",new_dataList,idList,metadataNameList,WORST,title1+title2+title3+title4,condition);
565 WriteHTML("HHHH",new_dataList,idList,metadataNameList,BEST,title1+title2+title3+title4,condition);
566 }
567 }
568 }
569 }
570 }
571
572 private ArrayList transformDataList(ArrayList list, String[] ids){
573
574 ArrayList wholeList = new ArrayList();
575
576 for(int i = 0; i< ids.length; i++){
577 ArrayList idList = new ArrayList();
578 for(int j = 0; j<list.size();j++){
579 int[] datarows = (int[]) list.get(j);
580 idList.add(new Integer (datarows[i]));
581 }
582 wholeList.add(idList);
583 }
584
585 for(int i = 0; i< wholeList.size(); i++){
586 ArrayList alist = (ArrayList)wholeList.get(i);
587 int[] rows = new int[alist.size()];
588 for(int j = 0; j< alist.size(); j++){
589 rows[j] = ((Integer)alist.get(j)).intValue();
590 }
591 wholeList.remove(i);
592 wholeList.add(i,rows);
593 }
594 return wholeList;
595 }
596
597 private void generateEmptyGraph(String fileName,String condition,String suffix){
598
599 String suf = suffix;
600
601 if(!suffix.equalsIgnoreCase("dublin")){
602 suf = "other";
603 }
604
605 String cases = condition;
606 String collectionFolder = mds.getCollectionName();
607
608 try{
609 FileWriter fstream = new FileWriter(destination+suf+"_"+fileName+"_"+cases+".html");
610 BufferedWriter out = new BufferedWriter(fstream);
611
612 out.write("<!-- This comment keeps IE6/7 in the reliable quirks mode -->\r\n");
613 out.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\r\n");
614 out.write("<html>\r\n");
615 out.write("<head>\r\n<title> No Available Chart</title>\r\n");
616 out.write("<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n");
617 out.write("<link rel=\"stylesheet\" href=\"../script/doby2.css\" type=\"text/css\"/>\r\n");
618 out.write("</head>\r\n");
619 out.write("<p><a href=\" Overall.html \">Summary</a></p>");
620 out.write("<body>\r\n");
621 out.write("<p>No data available to render chart.</p>\r\n");
622 out.write("<p>Reason: Document number is zero or Metadata element number is zero </p>\r\n");
623 out.write("</body></html>\r\n");
624 out.close();
625 fstream.close();
626
627 }catch(Exception ex){
628 ex.printStackTrace();
629 }
630 }
631
632 private void WriteHTML(String fileName, ArrayList dataset, String[] ids, String[] metadataName, String condition, String title, String suffix){
633
634 int blueDot = 0;
635 String cases = condition;
636 String suf = suffix;
637
638 if(!suffix.equalsIgnoreCase("dublin")){
639 suf = "other";
640 }
641
642 try{
643 ArrayList tempList = mds.getMetadataNameList();
644 ArrayList urlIDList;
645
646 if(tempList.contains("dc.Identifier") && linkIdentifier.equals("dc.Identifier")){
647 urlIDList = dm.getURLMap("dc.Identifier");
648 }
649 else if(tempList.contains("nzir_internal.Link") && linkIdentifier.equals("nzir_internal.Link")){
650 urlIDList = dm.getURLMap("nzir_internal.Link");
651 }
652 else{
653 urlIDList = new ArrayList();
654 }
655
656 String collectionFolder = mds.getCollectionName();
657 FileWriter fstream = new FileWriter(destination+suf+"_"+fileName+"_"+cases+".html");
658 BufferedWriter out = new BufferedWriter(fstream);
659
660 out.write("<!-- This comment keeps IE6/7 in the reliable quirks mode -->\r\n");
661 out.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\r\n");
662 out.write("<html>\r\n");
663 out.write("<head>\r\n<title>"+title+"</title>\r\n");
664 out.write("<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n");
665 out.write("<link rel=\"stylesheet\" href=\"../script/doby2.css\" type=\"text/css\"/>\r\n");
666 out.write("<script type=\"text/javascript\" src=\"../script/getInfomation.js\"></script>\r\n");
667 out.write("<script type=\"text/javascript\" src=\"http://yui.yahooapis.com/2.4.1/build/yahoo/yahoo-min.js\"></script>\r\n");
668 out.write("<script type=\"text/javascript\" src=\"http://yui.yahooapis.com/2.4.1/build/event/event-min.js\"></script>\r\n");
669 out.write("<script type=\"text/javascript\" src=\"http://yui.yahooapis.com/2.4.1/build/connection/connection-min.js\"></script>\r\n");
670 out.write("</head>\r\n");
671 out.write ("<p><a href=\" Overall.html \">Summary</a></p>");
672 out.write("<body id=\""+mds.getCollectionName()+"\" onLoad=\"reconfig()\">\r\n");
673 out.write("<div id=\"container\">\r\n");
674 out.write("<div class=\"tableContainer\">\r\n");
675 out.write ("<table cellspacing=\"0\">\r\n");
676 out.write ("<thead>\r\n");
677 out.write ("<tr>\r\n");
678 out.write("<td class=\"qh\">Info\r\n");
679 out.write("<td class=\"qh\">URL\r\n");
680
681 for(int a = 0; a< metadataName.length; a++){
682 out.write("<td>"+ metadataName[a]+"\r\n");
683 }
684
685 out.write ("</thead>\r\n<tfoot>\r\n<tr>\r\n");
686 out.write("<td class=\"qh\">&nbsp;\r\n");
687 out.write("<td class=\"qh\">&nbsp;\r\n");
688
689 for(int a = 0; a< metadataName.length; a++){
690 out.write("<td>"+ dm.Mean(metadataName[a])+"%\r\n");
691 }
692
693 out.write ("</tfoot>\r\n<tbody>\r\n");
694
695 if(cases.equals("normal")){
696 for(int i = ids.length; i>0; i--){
697 out.write ("<tr>");
698 int[] datarows = (int[])dataset.get(i-1);
699
700 for(int j = 0; j<datarows.length; j++){
701 if(datarows[j]==1){
702 out.write ("<td class=\"b\">");
703 blueDot++;
704 }
705 else {
706 out.write ("<td>&nbsp;");
707 }
708 }
709 }
710 }
711 else{
712 ArrayList idList = new ArrayList();
713
714 for(int i = 0; i<ids.length;i++){
715 idList.add(ids[i]);
716 }
717
718 HashMap hp = new HashMap();
719
720 for(int i = ids.length; i>0; i--){
721 String idValue = ids[i-1];
722 int dots = 0;
723 int[] datarows = (int[])dataset.get(i-1);
724 for(int j = 0; j<datarows.length; j++){
725 if(datarows[j]==1){
726 dots++;
727 }
728 }
729 hp.put(idValue,new Integer(dots));
730 }
731
732 ArrayList alist = dm.sortMap(hp);
733 String[] idIntValue = new String[alist.size()];
734
735 for(int i = 0; i< alist.size(); i++){
736 Map.Entry entry = (Map.Entry) alist.get(i);
737 String idElement = ((String) entry.getKey());
738 idIntValue[i] = idElement;
739 }
740
741 if(cases.equals("worst")){
742
743 for(int i = 0; i<idIntValue.length; i++){
744 int value = idList.indexOf(idIntValue[i]);
745 int[] datarows = (int[])dataset.get(value);
746
747 out.write ("<tr id=\""+idIntValue[i].substring(4)+"\">\r\n");
748 out.write("<td class=\"E\" onclick=\"GD(this)\">&nbsp;\r\n");
749
750 if(urlIDList.contains(idIntValue[i])){
751 out.write("<td class=\"qh\" onclick=\"GX(this)\"><span title=\"open URL in new window\">open</span>\r\n");
752 }
753 else{
754 out.write("<td>\r\n");
755 }
756
757 for(int j = 0; j<datarows.length; j++){
758 if(datarows[j]==1){
759 out.write ("<td class=\"b\">\r\n");
760 blueDot++;
761 }
762 else {
763 out.write ("<td class=\"w\">\r\n");
764 }
765 }
766 }
767 }
768
769 if(cases.equals("best")){
770
771 for(int i = idIntValue.length; i>0; i--){
772 int value = idList.indexOf(idIntValue[i-1]);
773 int[] datarows = (int[])dataset.get(value);
774
775 out.write ("<tr id=\""+idIntValue[i-1].substring(4)+"\">\r\n");
776 out.write("<td class=\"E\" onclick=\"GD(this)\">&nbsp;\r\n");
777
778 if(urlIDList.contains(idIntValue[i-1])){
779 out.write("<td class=\"qh\" onclick=\"GX(this)\"><span title=\"open URL in new window\">open</span>\r\n");
780 }
781 else{
782 out.write("<td>\r\n");
783 }
784
785 for(int j = 0; j<datarows.length; j++){
786 if(datarows[j]==1){
787 out.write ("<td class=\"b\">\r\n");
788 blueDot++;
789 }
790 else {
791 out.write ("<td class=\"w\">\r\n");
792 }
793 }
794 }
795 }
796 }
797
798 out.write ("</tbody>\r\n");
799 out.write ("</table>\r\n</div></div>\r\n");
800
801 int t1 = ids.length;
802 int t3 = metadataName.length;
803 int t4 = t1*t3;
804 HashMap tempMap = mds.getMetadataSetMap();
805 MetadataSet ms = (MetadataSet)tempMap.get(suffix);
806
807 out.write("<table>\r\n<tbody class=\"table1\">");
808 out.write("<tr>\r\n");
809 out.write("<td class=\"bfont\">This subset shows "+t1+" out of "+mds.getDocNum()+" documents");
810 out.write("<td class=\"bfont\">"+blueDot+" out of "+(mds.getDocNum()*t3)+" metadata items are defined");
811 out.write("<tr>\r\n");
812 out.write("<td class=\"bfont\">This subset shows "+t3+" out of "+(ms.getIndexsList().size())+" metadata elements");
813 out.write("<td class=\"bfont\">Subset completeness: "+dm.round((double)(blueDot*100/t4),5)+"%");
814 out.write("</table>\r\n");
815 out.write ("<p> <a href=\"Overall.html \">Summary</a></p>");
816 out.write ("</body></html>");
817 out.close();
818 fstream.close();
819
820 }catch (Exception e){//Catch exception if any
821 e.printStackTrace();
822 }
823 }
824
825 public void generateOverallStatisticsPage(HashMap MetadataSetMap){
826
827 String fileName = "Overall";
828 ArrayList wholeList = new ArrayList();
829 HashMap hp = mds.getMetadataSetMap();
830 Collection c = hp.values();
831 Iterator i = c.iterator();
832 int counter = 0;
833 String[][] MetadataData = new String[c.size()][2];
834
835 while(i.hasNext()){
836 MetadataSet mds = (MetadataSet)i.next();
837 wholeList.add(mds);
838 ArrayList newMDS = new ArrayList();
839 newMDS.add(mds);
840 MetadataData[counter][0] = mds.getName();
841 MetadataData[counter][1] = dm.getSingleMetadataSetCompleteness(newMDS)+"%";
842 counter++;
843 }
844
845 try{
846 String str = sdf.format(cl.getTime());
847 String collectionFolder = mds.getCollectionName();
848 FileWriter fstream = new FileWriter(destination+fileName+".html");
849 BufferedWriter out = new BufferedWriter(fstream);
850
851 out.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">");
852 out.write("<html>\r\n");
853 out.write("<head><title>Summary</title>\r\n");
854 out.write("<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n");
855 out.write("<style type=\"text/css\">\r\n");
856 out.write(".tam{height:20; text-align:right}\r\n");
857 out.write("th{height:20; text-align:left}\r\n");
858 out.write("body{font-family: Arial;}\r\n");
859 out.write("</style>\r\n");
860 out.write("<script type=\"text/javascript\" src=\"../script/status3.js\"></script>\r\n");
861 out.write("</head>\r\n");
862 out.write("<body>\r\n");
863 out.write ("<td> <div style=\"float:left;\"> <a href=\"http://www.nzdl.org/greenstone3/mat\">Mat Home</a></div>");
864 out.write("<div style=\"float:right;\"><a href=\"http://chnm.gmu.edu/tools/surveys/4386/\">Please send feedback about the Mat tool</a></div></td><br>");
865 out.write("<h3 align=\"center\">Summary</h3>\r\n");
866 out.write("<table border=1 align=\"center\">\r\n");
867 out.write("<colgroup width=\"300\" span=\"2\">\r\n");
868 out.write("<tr>\r\n");
869 out.write("<th>OAI URL: </th>\r\n");
870
871 if(mds.getOAIURL().length()>50){
872 out.write("<td class=\"tam\"><font size=\"2px\"><a href=\""+mds.getOAIURL()+"\">"+mds.getOAIURL().substring(0,47)+"....</a></font>\r\n");
873 }
874 else{
875 out.write("<td class=\"tam\"><font size=\"2px\"><a href=\""+mds.getOAIURL()+"\">"+mds.getOAIURL()+"</a></font>\r\n");
876 }
877 out.write("</tr>\r\n");
878 out.write("<tr>\r\n");
879 out.write("<th>Number of Records:\r\n");
880 out.write("<td class=\"tam\">"+mds.getDocNum()+"\r\n");
881 out.write("</tr>\r\n");
882 out.write("<table border=1 align=\"center\">\r\n");
883 out.write("<colgroup width=\"300\" span=\"2\">\r\n");
884 out.write("<tr>\r\n");
885 out.write("<th>Metadata:\r\n");
886 out.write("<td class=\"tam\"><b>Completeness</b>\r\n");
887 out.write("</tr><br>\r\n");
888
889 for(int a = 0; a<MetadataData.length; a++){
890 out.write ("<tr>\r\n");
891
892 if(MetadataData[a][0].equalsIgnoreCase("dublin")){
893 out.write ("<th><a href =\""+MetadataData[a][0]+".html\">"+metadataSet1+"</a>");
894 }
895
896 else if (MetadataData[a][0].equalsIgnoreCase("extracted")){
897 out.write ("<th><a href =\""+MetadataData[a][0]+".html\">"+metadataSet2+"</a>");
898 }
899 else{
900 out.write ("<th><a href =\""+MetadataData[a][0]+".html\">"+MetadataData[a][0]+"</a>");
901 }
902
903 out.write ("<td class=\"tam\"> "+MetadataData[a][1]);
904 out.write ("</tr>\r\n");
905 }
906
907 out.write ("</table>");
908 out.write("<br>\r\n");
909 out.write("<FORM name=\"test\" onsubmit=\"checkStatus()\" action=\"\">");
910 out.write("<table border=1 align=\"center\">\r\n");
911 out.write ("<colgroup width=\"606\" span=\"1\">\r\n");
912 out.write("<tr>\r\n<th>Customize Visualization");
913 out.write("<tr>\r\n<td><LABEL FOR=\"H1\"><INPUT align=\"left\" TYPE=\"checkbox\" ID=\"H1\">Hide Empty Metadata Elements</LABEL>");
914 out.write("<tr>\r\n<td><LABEL FOR=\"H2\"><INPUT align=\"left\" TYPE=\"checkbox\" ID=\"H2\">Hide Completed Metadata Elements</LABEL>");
915 out.write("<tr>\r\n<td><LABEL FOR=\"H3\"><INPUT align=\"left\" TYPE=\"checkbox\" ID=\"H3\">Hide Documents with Empty Metadata Elements</LABEL>");
916 out.write("<tr>\r\n<td><LABEL FOR=\"H4\"><INPUT align=\"left\" TYPE=\"checkbox\" ID=\"H4\">Hide Documents with Completed Metadata Elements</LABEL>");
917 out.write("<tr>\r\n<th>Metadata: ");
918
919 if(MetadataData.length==1){
920 if(MetadataData[0][0].equalsIgnoreCase("dublin")){
921 out.write("<tr>\r\n<td><LABEL FOR=\"C1\"><INPUT align=\"left\" TYPE=\"radio\" name = \"col\" ID=\"C1\" checked> Dublin Core </LABEL>");
922 }
923 else {
924 out.write("<tr>\r\n<td><LABEL FOR=\"C4\"><INPUT align=\"left\" TYPE=\"radio\" name = \"col\" ID=\"C4\" checked>"+MetadataData[0][0]+"</LABEL>");
925 }
926 }
927 else{
928 for(int a = 0; a<MetadataData.length; a++){
929 if(a==0){
930 if(MetadataData[a][0].equalsIgnoreCase("dublin")){
931 out.write("<tr>\r\n<td><LABEL FOR=\"C1\"><INPUT align=\"left\" TYPE=\"radio\" name = \"col\" ID=\"C1\" checked>Dublin Core</LABEL>");
932 }
933 else {
934 out.write("<tr>\r\n<td><LABEL FOR=\"C4\"><INPUT align=\"left\" TYPE=\"radio\" name = \"col\" ID=\"C4\" checked>"+MetadataData[a][0]+"</LABEL>");
935 }
936 }
937 else{
938 if(MetadataData[a][0].equalsIgnoreCase("dublin")){
939 out.write("<tr>\r\n<td><LABEL FOR=\"C1\"><INPUT align=\"left\" TYPE=\"radio\" name = \"col\" ID=\"C1\">"+MetadataData[a][0]+"</LABEL>");
940 }
941 else{
942 out.write("<tr>\r\n<td><LABEL FOR=\"C4\"><INPUT align=\"left\" TYPE=\"radio\" name = \"col\" ID=\"C4\">"+MetadataData[a][0]+"</LABEL>");
943 }
944 }
945 }
946 }
947
948 out.write("<tr>\r\n<th>Order By Completeness : ");
949 out.write("<tr>\r\n<td><LABEL FOR=\"R1\"><INPUT align=\"left\" TYPE=\"radio\" name = \"order\" ID=\"R1\">Best Case to Worst Case</LABEL>");
950 out.write("<tr>\r\n<td><LABEL FOR=\"R2\"><INPUT align=\"left\" TYPE=\"radio\" name = \"order\" ID=\"R2\" checked >Worst Case to Best Case</LABEL>");
951 out.write("</table><p align=\"center\"><INPUT TYPE=\"button\" VALUE=\"Show Visualization\" onClick=\"checkStatus()\"> </FORM>");
952 out.write("<p align=\"center\">"+str+"</p>");
953 out.write("</body></html>");
954
955 out.close();
956 fstream.close();
957 }catch (Exception e){//Catch exception if any
958 e.printStackTrace();
959 }
960 }
961
962
963 public void WriteMetadataSetDetailHTML(MetadataSet mdset){
964
965 String fileName = mdset.getName();
966
967 try{
968 String str = sdf.format(cl.getTime());
969 String collectionFolder = mds.getCollectionName();
970 FileWriter fstream = new FileWriter(destination+fileName+".html");
971 BufferedWriter out = new BufferedWriter(fstream);
972
973 out.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">");
974 out.write("<html>\r\n");
975 out.write("<head>\r\n<title>Metadata Detail</title>\r\n");
976 out.write("<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n");
977 out.write("<style type=\"text/css\">\r\n");
978 out.write("td{height:20;text-align:right;}\r\n");
979 out.write("th{height:20;text-align:left;}\r\n");
980 out.write("body{font-family: Arial;}\r\n");
981 out.write("</style>\r\n");
982 out.write( "</head>\r\n");
983 out.write ("<body>\r\n");
984 out.write ("<td> <div style=\"float:left;\"> <a href=\" Overall.html \">Summary</a></div>");
985 out.write("<div style=\"float:right;\"><a href=\"http://chnm.gmu.edu/tools/surveys/4386/\">Please send feedback about the Mat tool</a></div></td><br>");
986
987 if(fileName.equalsIgnoreCase("dublin")){
988 out.write ("<h3 align=\"center\">Metadata Detail: "+metadataSet1+"</h3>\r\n");
989 }
990 else if (fileName.equalsIgnoreCase("extracted")){
991 out.write ("<h3 align=\"center\">Metadata Detail: "+metadataSet2+"</h3>\r\n");
992 }
993 else{
994 out.write ("<h3 align=\"center\">Metadata Detail: "+fileName+"</h3>\r\n");
995 }
996
997 out.write ("<table border=\"1\" align=\"center\">\r\n");
998 out.write ("<colgroup width=\"200\" span=\"2\">\r\n");
999 out.write ("<tr>\r\n");
1000 out.write ("<th>Elements:\r\n");
1001 out.write ("<td><b>Completeness</b>\r\n");
1002 out.write ("</tr>\r\n");
1003
1004 ArrayList elementList = mdset.getIndexsList();
1005 HashMap hp = new HashMap();
1006 int num = elementList.size();
1007
1008 for(int i = 0; i<num; i++){
1009 String elementName = (String)elementList.get(i);
1010 Double elementValue = new Double(dm.Mean(elementName));
1011 hp.put(elementName, elementValue);
1012 }
1013
1014 elementList = new ArrayList();
1015 elementList = dm.sortMap(hp);
1016 num = elementList.size();
1017
1018 for(int i = 0; i<num; i++){
1019 Map.Entry entry = (Map.Entry) elementList.get(i);
1020 String elementName = (String) entry.getKey();
1021
1022 out.write ("<tr>\r\n");
1023 out.write ("<th><a href=\""+ elementName +".html\">"+elementName+"</a>\r\n");
1024 out.write ("<td>"+ dm.Mean(elementName)+"%\r\n");
1025 out.write ("</tr>\r\n");
1026 }
1027
1028 out.write("</table>\r\n");
1029 out.write ("<p align=\"center\"> <a href=\" Overall.html \">Summary</a></p>");
1030 out.write("<p align=\"center\">"+str+"</p>");
1031 out.write ("</body></html>\r\n");
1032
1033 out.close();
1034 fstream.close();
1035 }catch (Exception e){//Catch exception if any
1036 e.printStackTrace();
1037 }
1038 }
1039
1040 public void generateMetadataElementDetailPage(MetadataSet mds){
1041
1042 ArrayList nameList = mds.getIndexsList();
1043
1044 for(int i = 0; i<nameList.size();i++){
1045 WriteMetadataElementDetailHTML((String)nameList.get(i),mds.getName());
1046 }
1047 }
1048
1049 public void WriteMetadataElementDetailHTML(String name, String linkName){
1050
1051 String fileName = name;
1052
1053 try{
1054 String collectionFolder = mds.getCollectionName();
1055
1056 FileWriter fstream = new FileWriter(destination+fileName+".html");
1057 BufferedWriter out = new BufferedWriter(fstream);
1058
1059 out.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">");
1060 out.write ("<html>\r\n");
1061 out.write("<head>\r\n<title> "+ name +" </title>\r\n");
1062 out.write("<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n");
1063 out.write("<style type=\"text/css\">\r\n");
1064 out.write("td{height:20; text-align:left;}\r\n");
1065 out.write(".tam{height:20; text-align:center}\r\n");
1066 out.write("th{height:20; text-align:left;}\r\n");
1067 out.write("body{font-family: Arial;}\r\n");
1068 out.write("</style>\r\n");
1069 out.write("<script type=\"text/javascript\" src=\"http://www.nzdl.org/greenstone3/mat/script/status3.js\"></script>");
1070 out.write("</head>\r\n");
1071 out.write("<body>\r\n");
1072 out.write ("<td> <div style=\"float:left;\"> <a href=\" Overall.html \">Summary</a></div>");
1073 out.write("<div style=\"float:right;\"><a href=\"http://chnm.gmu.edu/tools/surveys/4386/\">Please send feedback about the Mat tool</a></div></td>");
1074 out.write("&raquo;");
1075
1076 if(linkName.equalsIgnoreCase("dublin")){
1077 out.write ("<a href=\""+linkName+".html\">Metadata Detail ("+metadataSet1+")</a>");
1078 }
1079 else if (linkName.equalsIgnoreCase("extracted")){
1080 out.write ("<a href=\""+linkName+".html\">Metadata Detail ("+metadataSet2+")</a>");
1081 }
1082 else{
1083 out.write ("<a href=\""+linkName+".html\">Metadata Detail ("+linkName+")</a>");
1084 }
1085
1086 if(name.startsWith("dc.")){
1087 int dot = name.lastIndexOf('.');
1088 dot++;
1089 String nameReplace = name.substring(dot);
1090
1091 if(url.containsKey(nameReplace)){
1092 out.write ("<h3 align=\"center\">Metadata Element Detail:<a href=\""+ url.get(nameReplace) +"\">"+ name +"</a></h3>\r\n");
1093 }
1094 else{
1095 out.write ("<h3 align=\"center\">Metadata Element Detail: "+ name +"</a></h3>\r\n");
1096 }
1097 }
1098
1099 else{
1100 out.write ("<h3 align=\"center\">Metadata Element Detail: "+ name +"</h3>\r\n");
1101 }
1102
1103 out.write ("<table border=\"1\" align=\"center\" width=1000>\r\n");
1104 out.write ("<colgroup width=\"500\" span=\"2\">\r\n");
1105 out.write ("<tr>\r\n");
1106 out.write ("<td><b>Total Number of Records</b>\r\n");
1107 out.write ("<td class=\"tam\">"+mds.getDocNum()+"\r\n");
1108 out.write ("</tr>\r\n");
1109 out.write ("<tr>\r\n");
1110 out.write ("<td><b>Unique Values</b>\r\n");
1111 out.write ("<td class=\"tam\">"+dm.getDistinctNumber(name)+"\r\n");
1112 out.write ("</tr>\r\n");
1113 out.write ("<tr>\r\n");
1114 out.write ("<td><b>Total times element used</b>\r\n");
1115 out.write ("<td class=\"tam\">"+dm.getFrequency(name) +"\r\n");
1116 out.write ("</tr>\r\n");
1117 out.write ("<tr>\r\n");
1118 out.write ("<td><b>No. of records containing element</b>\r\n");
1119 out.write ("<td class=\"tam\"> "+dm.getDocumentUsedElement(name)+"\r\n");
1120 out.write ("</tr>\r\n");
1121 out.write ("<tr>\r\n");
1122
1123 double percentage = dm.Mean(name);
1124
1125 out.write ("<th title = \"The completeness means the arithmetic average \"> <div style=\"float:left;\"> Completeness</div>");
1126 out.write ("<td class=\"tam\"> "+dm.Mean(name) +"%\r\n");
1127 out.write ("</tr>\r\n");
1128 out.write ("<tr>\r\n");
1129 out.write ("<td><b><div style=\"float:left;\">Minimum "+name +" usage in any record</div></b><div style=\"float:right;\"> <a href=\"#\" onClick=\"helpWindow('Minimum','"+name+"')\" style=\"font-size:small;\">What's this?</a></div>\r\n");
1130 out.write ("<td class=\"tam\"> "+dm.getMinRange(name) +"\r\n");
1131 out.write ("</tr>\r\n");
1132 out.write ("<tr>\r\n");
1133 out.write ("<td><b><div style=\"float:left;\">Maximum "+name +" usage in any record</div></b><div style=\"float:right;\"> <a href=\"#\" onClick=\"helpWindow('Maximum','"+name+"')\" style=\"font-size:small;\">What's this?</a></div>\r\n");
1134 out.write ("<td class=\"tam\"> "+dm.getMaxRange(name) +"\r\n");
1135 out.write ("</tr>\r\n");
1136 out.write ("<tr>\r\n");
1137 out.write ("<td><b><div style=\"float:left;\">Average "+name +" usage/record</div></b><div style=\"float:right;\"> <a href=\"#\" onClick=\"helpWindow('Average','"+name+"')\" style=\"font-size:small;\">What's this?</a></div>\r\n");
1138 out.write ("<td class=\"tam\"> "+dm.Average(name) +"\r\n");
1139 out.write ("</tr>\r\n");
1140 out.write ("<tr>\r\n");
1141 out.write ("<td><b><div style=\"float:left;\">Mode of "+name +" usage/record</div></b><div style=\"float:right;\"><a href=\"#\" onClick=\"helpWindow('Mode','"+name+"')\" style=\"font-size:small;\">What's this?</a></div>\r\n");
1142 out.write ("<td class=\"tam\"> "+dm.getMode(name) +"\r\n");
1143 out.write ("</tr>\r\n");
1144 out.write ("<tr>\r\n");
1145 out.write ("<td><b><div style=\"float:left;\">Coverage of the mode of "+name +" usage/record</div></b><div style=\"float:right;\"><a href=\"#\" onClick=\"helpWindow('Mode Frequency','"+name+"')\" style=\"font-size:small;\">What's this?</a></div>\r\n");
1146 out.write ("<td class=\"tam\">"+dm.ModeFrequency(name) +"%\r\n");
1147 out.write ("</tr>\r\n");
1148
1149 if(mds.getOaiPrefix().equalsIgnoreCase("oai_dc")){
1150
1151 HashMap suggestionMap = generateMetadataElementSortList(fileName,"ASCII",linkName);
1152
1153 if(suggestionMap.size()>0){
1154 boolean status = compareElement(fileName,collectionFolder,suggestionMap,linkName);
1155 if(status){
1156 out.write("<tr><td class=\"tam\"><a href =\""+fileName+"_Suggestion.html\">View Potential Duplicate List</a>");
1157 if(percentage<100){
1158 out.write ("<td class=\"tam\"><a href =\""+fileName+"_IncompletedList.html\">Records missing "+fileName+"</a></td>");
1159 createIncompletedList(fileName,linkName,collectionFolder);
1160 }
1161 else{
1162 out.write("<td class=\"tam\">No Records Missing "+ fileName);
1163 }
1164 }
1165 else{
1166 out.write("<tr><td class=\"tam\">No Potential Duplicates");
1167 if(percentage<100){
1168 out.write ("<td class=\"tam\"><a href =\""+fileName+"_IncompletedList.html\">Records missing "+fileName+"</a></td>");
1169 createIncompletedList(fileName,linkName,collectionFolder);
1170 }
1171 else{
1172 out.write("<td class=\"tam\">No Records Missing "+ fileName);
1173 }
1174 }
1175 }
1176 else{
1177 out.write("<tr><td class=\"tam\">No Potential Duplicates");
1178 if(percentage<100){
1179 out.write ("<td class=\"tam\"><a href =\""+fileName+"_IncompletedList.html\">Records missing "+fileName+"</a></td>");
1180 createIncompletedList(fileName,linkName,collectionFolder);
1181 }
1182 else{
1183 out.write("<td class=\"tam\">No Records Missing "+ fileName);
1184 }
1185 }
1186 generateMetadataElementSortList(fileName,"Frequency-based",linkName);
1187 }
1188 else{
1189
1190 HashMap suggestionMap = generateMetadataElementSortList(fileName,"ASCII",linkName);
1191
1192 if(suggestionMap.size()>0){
1193 boolean status = compareElement(fileName,collectionFolder,suggestionMap,linkName);
1194 if(status){
1195 out.write("<tr><td class=\"tam\"><a href =\""+fileName+"_Suggestion.html\">View Potential Duplicate List</a>");
1196 }
1197 else{
1198 out.write("<tr><td class=\"tam\">No Potential Duplicates");
1199 }
1200 }
1201 else{
1202 out.write("<tr><td class=\"tam\">No Potential Duplicates");
1203 }
1204
1205 if(percentage<100){
1206 out.write ("<td class=\"tam\"><a href =\""+fileName+"_IncompletedList.html\">Records missing "+fileName+"</a></td>");
1207 createIncompletedList(fileName,linkName,collectionFolder);
1208 }
1209 else{
1210 out.write("<td class=\"tam\">No Records Missing "+ fileName);
1211 }
1212
1213 generateMetadataElementSortList(fileName,"Frequency-based",linkName);
1214 }
1215
1216 out.write ("<tr>\r\n");
1217 out.write ("<td class=\"tam\"><a href =\""+fileName+"_Frequency-based.html\">"+"View Full Frequency Sorted list</a>" +
1218 " <td class=\"tam\"><a href =\""+fileName+"_ASCII.html\">"+"View Full ASCII Sorted list</a></td>");
1219 out.write ("</tr>\r\n");
1220 out.write ("</table><br>\r\n");
1221 out.write ("<table border=\"1\" align=\"center\" width=1000>\r\n");
1222 out.write ("<colgroup width=\"500\" span=\"2\">\r\n");
1223 out.write ("<tr>\r\n");
1224 out.write ("<th>ASCII-Based\r\n");
1225 out.write ("<th>First Five\r\n");
1226 out.write ("</tr>\r\n");
1227
1228 String[] temp = dm.getSortList(name,"ASCII");
1229 String[] temp2 = {"&nbsp;","&nbsp;","&nbsp;","&nbsp;","&nbsp;"};
1230 int length = 0;
1231
1232 if(temp.length>=5){
1233 length=5;
1234 }
1235 else if(temp.length<5){
1236 length = temp.length;
1237 }
1238
1239 for(int i =0; i<length; i++){
1240 temp2[i] = temp[i];
1241 }
1242
1243 int x = temp2.length;
1244 for(int a = 0; a<x; a++){
1245 out.write ("<tr>\r\n");
1246 if(!temp2[a].equals("&nbsp;")){
1247 out.write ("<th>"+(a+1)+"\r\n");
1248 }
1249 else{
1250 out.write ("<th>&nbsp;\r\n");
1251 }
1252
1253 if(temp2[a].startsWith("http://")){
1254 if(temp2[a].length()>60){
1255 out.write ("<td><a href = \""+temp2[a]+"\">"+temp2[a].substring(0, 60)+"...</a>");
1256 }
1257 else {
1258 out.write ("<td><a href = \""+temp2[a]+"\">"+temp2[a]+"</a>");
1259 }
1260 }
1261 else{
1262 char singleChar = temp2[a].charAt(0);
1263 if(temp2[a].length()>61 ){
1264 if(temp2[a].startsWith(" ") && ((int)singleChar!=65279)){
1265 temp2[a] = temp2[a].substring(1);
1266 out.write ("<td>"+spaceLeft+space+spaceRight+temp2[a].substring(0, 60)+"<a href=\""+fileName+"_ASCII.html#topFive\"> ... </a>\r\n");
1267 }
1268 else if (((int)singleChar==65279)){
1269 temp2[a] = temp2[a].substring(1);
1270 out.write ("<td>"+spaceLeft+oddChar+spaceRight+temp2[a].substring(0, 60)+"<a href=\""+fileName+"_ASCII.html#topFive\"> ... </a>\r\n");
1271 }
1272 else{
1273 out.write ("<td>"+temp2[a].substring(0, 60)+"<a href=\""+fileName+"_ASCII.html#topFive\"> ... </a>\r\n");
1274 }
1275 }
1276 else {
1277 if(temp2[a].startsWith(" ")&& ((int)singleChar!=65279)){
1278 temp2[a] = temp2[a].substring(1);
1279 out.write ("<td>"+spaceLeft+space+spaceRight+temp2[a]+"\r\n");
1280 }
1281 else if (((int)singleChar==65279)){
1282 temp2[a] = temp2[a].substring(1);
1283 out.write ("<td>"+spaceLeft+oddChar+spaceRight+temp2[a]+"\r\n");
1284 }
1285 else{
1286 out.write ("<td>"+temp2[a]+"\r\n");
1287 }
1288 }
1289 }
1290 out.write ("</tr>\r\n");
1291 }
1292
1293 String[] temp3 ={"&nbsp;","&nbsp;","&nbsp;","&nbsp;","&nbsp;"};
1294 length = 0;
1295 int start = temp.length;
1296
1297 if(temp.length>=5){
1298 length= 5;
1299 }
1300 else if(temp.length<5){
1301 length = temp.length;
1302 }
1303
1304 for(int i = length; i>0; i--){
1305 temp3[i-1] = temp[start-1];
1306 start--;
1307 }
1308
1309 out.write ("<tr>\r\n");
1310 out.write ("<th>......\r\n");
1311 out.write ("<th>Last Five\r\n");
1312 out.write ("</tr>\r\n");
1313
1314 int counter = temp.length;
1315 start = temp.length;
1316 x = temp3.length;
1317
1318 for(int a = 0; a<x; a++){
1319 out.write ("<tr>\r\n");
1320 if(!temp3[a].equals("&nbsp;")){
1321 out.write ("<th>"+(start-length+1+a));
1322 }
1323 else{
1324 out.write ("<th>&nbsp;");
1325 }
1326 if(temp3[a].startsWith("http://")){
1327 if(temp3[a].length()>60){
1328 out.write ("<td><a href = \""+temp3[a]+"\">"+temp3[a].substring(0, 60)+"...</a>");
1329 }
1330 else {
1331 out.write ("<td><a href = \""+temp3[a]+"\">"+temp3[a]+"</a>");
1332 }
1333 }
1334 else{
1335 char singleChar = temp3[a].charAt(0);
1336 if(temp3[a].length()>61){
1337 if(temp3[a].startsWith(" ") && (int)singleChar!=65279){
1338 temp3[a] = temp3[a].substring(1);
1339 out.write ("<td>"+spaceLeft+space+spaceRight+temp3[a].substring(0, 60)+"<a href=\""+fileName+"_ASCII.html#lastFive\"> ... </a>\r\n");
1340 }
1341 else if((int)singleChar==65279){
1342 temp3[a] = temp3[a].substring(1);
1343 out.write ("<td>"+spaceLeft+oddChar+spaceRight+temp3[a].substring(0, 60)+"<a href=\""+fileName+"_ASCII.html#lastFive\"> ... </a>\r\n");
1344 }
1345 else{
1346 out.write ("<td>"+temp3[a].substring(0, 60)+"<a href=\""+fileName+"_ASCII.html#lastFive\"> ... </a>\r\n");
1347 }
1348 }
1349 else{
1350 if(temp3[a].startsWith(" ")&& (int)singleChar!=65279){
1351 temp3[a] = temp3[a].substring(1);
1352 out.write ("<td>"+spaceLeft+space+spaceRight+temp3[a]+"\r\n");
1353 }
1354 else if((int)singleChar==65279){
1355 out.write ("<td>"+spaceLeft+oddChar+spaceRight+temp3[a]+"\r\n");
1356 }
1357 else{
1358 out.write ("<td>"+temp3[a]+"\r\n");
1359 }
1360 }
1361 out.write ("</tr>\r\n");
1362 }
1363 }
1364 out.write ("</table><br>\r\n");
1365 out.write ("<table border=\"1\" align=\"center\" width=1000>\r\n");
1366 out.write ("<colgroup width=\"500\" span=\"2\">\r\n");
1367 out.write ("<tr>\r\n");
1368 out.write ("<th>Frequency-Based:\r\n");
1369 out.write ("<th>First Five\r\n");
1370 out.write ("</tr>\r\n");
1371
1372 HashMap xMap = dm.getDistinctValueMap(name);
1373 temp = dm.getSortList(name,"Frequency-based");
1374 temp2 = new String[] {"&nbsp;","&nbsp;","&nbsp;","&nbsp;","&nbsp;"};
1375 length = 0;
1376
1377 if(temp.length>=5){
1378 length=5;
1379 }
1380 else if(temp.length<5){
1381 length = temp.length;
1382 }
1383
1384 for(int i =0; i<length; i++){
1385 temp2[i] = temp[i];
1386 }
1387
1388 ArrayList tempList = new ArrayList();
1389 for(int i =0; i<length; i++){
1390 if(((Integer)xMap.get(temp2[i])).intValue()==1){
1391 tempList.add(temp2[i]);
1392 }
1393 }
1394
1395 x = temp2.length;
1396 for(int a = 0; a<x; a++){
1397 out.write ("<tr>\r\n");
1398 if(!temp2[a].equals("&nbsp;")){
1399 char singleChar = temp2[a].charAt(0);
1400 out.write ("<th>"+(a+1)+". (No. of occurrences: "+((Integer)xMap.get(temp2[a])).toString()+")\r\n");
1401 if(temp2[a].startsWith("http://") && (int)singleChar != 65279){
1402 if(temp2[a].length()>61){
1403 out.write ("<td><a href = \""+temp2[a]+"\">"+temp2[a].substring(0, 60)+"...</a>");
1404 }
1405 else {
1406 out.write ("<td><a href = \""+temp2[a]+"\">"+temp2[a]+"</a>");
1407 }
1408 }
1409
1410 else{
1411 if(temp2[a].length()>61){
1412 if(temp2[a].startsWith(" ")){
1413 temp2[a] = temp2[a].substring(1);
1414 out.write ("<td>"+spaceLeft+space+spaceRight+temp2[a].substring(0, 60)+"<a href=\""+fileName+"_Frequency-based.html#topFive\"> ... </a>\r\n");
1415 }
1416 else if((int)singleChar == 65279){
1417 temp2[a] = temp2[a].substring(1);
1418 out.write ("<td>"+spaceLeft+oddChar+spaceRight+temp2[a].substring(0, 60)+"<a href=\""+fileName+"_Frequency-based.html#topFive\"> ... </a>\r\n");
1419 }
1420 else{
1421 out.write ("<td>"+temp2[a].substring(0, 60)+"<a href=\""+fileName+"_Frequency-based.html#topFive\"> ... </a>\r\n");
1422 }
1423 }
1424 else{
1425 if(temp2[a].startsWith(" ")){
1426 temp2[a] = temp2[a].substring(1);
1427 out.write ("<td>"+spaceLeft+space+spaceRight+temp2[a]+"\r\n");
1428 }
1429 else if((int)singleChar == 65279){
1430 temp2[a] = temp2[a].substring(1);
1431 out.write ("<td>"+spaceLeft+oddChar+spaceRight+temp2[a]+"\r\n");
1432 }
1433 else{
1434 out.write ("<td>"+temp2[a]+"\r\n");
1435 }
1436 }
1437 }
1438 }
1439 else{
1440 out.write ("<th>&nbsp;\r\n");
1441 out.write ("<td>\r\n");
1442 }
1443 out.write ("</tr>\r\n");
1444 }
1445
1446 temp3 = new String[]{"&nbsp;","&nbsp;","&nbsp;","&nbsp;","&nbsp;"};
1447 length = 0;
1448 start = temp.length;
1449
1450 if(temp.length>=5){
1451 length= 5;
1452 }
1453 else if(temp.length<5){length = temp.length;}
1454
1455 for(int i = length; i>0; i--){
1456 temp3[i-1] = temp[start-1];
1457 start--;
1458 }
1459
1460 out.write ("<tr>\r\n");
1461 out.write ("<th>......\r\n");
1462 out.write ("<th>Last Five\r\n");
1463 out.write ("</tr>\r\n");
1464
1465 x = temp3.length;
1466 start = temp.length;
1467 for(int a = 0; a<x; a++){
1468 out.write ("<tr>\r\n");
1469 if(!temp3[a].equals("&nbsp;")){
1470 out.write ("<th>"+(start-length+1+a)+". (No. of occurrences: "+((Integer)xMap.get(temp3[a])).toString()+")\r\n");
1471 char singleChar = temp3[a].charAt(0);
1472 if(temp3[a].startsWith("http://")){
1473 if(temp3[a].length()>60){
1474 out.write ("<td><a href = \""+temp3[a]+"\">"+temp3[a].substring(0, 60)+"...</a>");
1475 }
1476 else {
1477 out.write ("<td><a href = \""+temp3[a]+"\">"+temp3[a]+"</a>");
1478 }
1479 }
1480 else{
1481 if(temp3[a].length()>61){
1482 if(temp3[a].startsWith(" ")){
1483 temp3[a] = temp3[a].substring(1);
1484 out.write ("<td>"+spaceLeft+space+spaceRight+temp3[a].substring(0, 60)+"<a href=\""+fileName+"_Frequency-based.html#lastFive\"> ... </a>\r\n");
1485 }
1486 else if((int)singleChar == 65279){
1487 temp3[a] = temp3[a].substring(1);
1488 out.write ("<td>"+spaceLeft+oddChar+spaceRight+temp3[a].substring(0, 60)+"<a href=\""+fileName+"_Frequency-based.html#lastFive\"> ... </a>\r\n");
1489 }
1490 else{
1491 out.write ("<td>"+temp3[a].substring(0, 60)+"<a href=\""+fileName+"_Frequency-based.html#lastFive\"> ... </a>\r\n");
1492 }
1493 }
1494 else{
1495 if(temp3[a].startsWith(" ")){
1496 out.write ("<td>"+spaceLeft+space+spaceRight+temp3[a]+"\r\n");
1497 }
1498 else if((int)singleChar == 65279){
1499 temp3[a] = temp3[a].substring(1);
1500 out.write ("<td>"+spaceLeft+oddChar+spaceRight+temp3[a]+"\r\n");
1501 }
1502 else{
1503 out.write ("<td>"+temp3[a]+"\r\n");
1504 }
1505 }
1506 }
1507 }
1508 else{
1509 out.write ("<th>&nbsp;\r\n");
1510 out.write ("<th>\r\n");
1511 }
1512 out.write ("</tr>\r\n");
1513 }
1514
1515 out.write ("</table>\r\n");
1516 out.write ("<p> <a href=\" Overall.html \">Summary</a>");
1517 out.write ("&raquo;");
1518
1519 if(linkName.equalsIgnoreCase("dublin")){
1520 out.write ("<a href=\""+linkName+".html\">Metadata Detail ("+metadataSet1+")</a>");
1521 }
1522 else if (linkName.equalsIgnoreCase("extracted")){
1523 out.write ("<a href=\""+linkName+".html\">Metadata Detail ("+metadataSet2+")</a>");
1524 }
1525 else{
1526 out.write ("<a href=\""+linkName+".html\">Metadata Detail ("+linkName+")</a>");
1527 }
1528
1529 out.write ("</body></html>\r\n");
1530 out.close();
1531 fstream.close();
1532 }catch (Exception e){//Catch exception if any
1533 e.printStackTrace();
1534 }
1535 }
1536
1537 private HashMap generateMetadataElementSortList(String title, String sort, String metadataSetName){
1538
1539 String fileName = title+"_"+sort;
1540 String collectionFolder = mds.getCollectionName();
1541 String IDENTIFIER = linkIdentifier;
1542 SearchLink sl = new SearchLink(mds.StatsDirectory);
1543 HashMap suggestionMap = new HashMap();
1544 HashMap valueMap = sl.createValueMap(title,collectionFolder);
1545 HashMap linkMap = sl.createLinkMap(IDENTIFIER,collectionFolder);
1546 HashMap internalIDMap = dm.getInternalIdentifier(title);
1547 boolean status = false;
1548
1549 try{
1550 FileWriter fstream = new FileWriter(destination+fileName+".html");
1551 BufferedWriter out = new BufferedWriter(fstream);
1552
1553 out.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">");
1554 out.write ("<html>\r\n");
1555 out.write("<head>\r\n<title>Metadata Element Sort List</title>\r\n");
1556 out.write("<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n");
1557 out.write("<script type=\"text/javascript\" src=\"../script/getInfomation.js\"></script>\r\n");
1558 out.write("<script type=\"text/javascript\" src=\"http://yui.yahooapis.com/2.4.1/build/yahoo/yahoo-min.js\"></script>\r\n");
1559 out.write("<script type=\"text/javascript\" src=\"http://yui.yahooapis.com/2.4.1/build/event/event-min.js\"></script>\r\n");
1560 out.write("<script type=\"text/javascript\" src=\"http://yui.yahooapis.com/2.4.1/build/connection/connection-min.js\"></script>\r\n");
1561 out.write("<style type=\"text/css\">\r\n");
1562 out.write("td{height:20; text-align:left;padding-left:5px;}\r\n");
1563 out.write(".tam{height:20; text-align:center}\r\n");
1564 out.write("th{height:20; text-align:center;}\r\n");
1565 out.write("body{font-family: Arial;}\r\n");
1566 out.write("</style>\r\n");
1567 out.write("</head><body id=\""+collectionFolder+"\">\r\n");
1568 out.write("<td><div style=\"float:left;\"><a href=\" Overall.html \">Summary</a>");
1569 out.write("&raquo;");
1570
1571 if(metadataSetName.equalsIgnoreCase("dublin")){
1572 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet1+")</a>");
1573 }
1574 else if (metadataSetName.equalsIgnoreCase("extracted")){
1575 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet2+")</a>");
1576 }
1577 else{
1578 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSetName+")</a>");
1579 }
1580
1581 out.write("&raquo;");
1582 out.write("<a href=\""+title+".html\">"+title+"</a></div>");
1583 out.write("<div style=\"float:right;\"><a href=\"http://chnm.gmu.edu/tools/surveys/4386/\">Please send feedback about the Mat tool</a></div></td><br>");
1584
1585 String[] list = dm.getSortList(title, sort);
1586
1587 if(list.length>=1){
1588 if(sort.equals("ASCII")){
1589 out.write("<table border=\"1\" align=\"center\" width=\"800\">\r\n");
1590 out.write ("<h2 align=\"center\">"+ title+"</h2>\r\n");
1591 out.write ("<th class=\"tam\">ASCII Sort\r\n");
1592 out.write ("<th class=\"tam\">Element Values\r\n");
1593 out.write ("<th class=\"tam\">Source Documents\r\n");
1594 out.write ("<th class=\"tam\">Internal Link\r\n");
1595 out.write ("<a name='topFive'>\r\n");
1596
1597 int counter = 0;
1598
1599 for(int i = 0; i<list.length; i++){
1600 if(list.length<=5 && i==0){
1601 out.write ("<a name='lastFive'>\r\n");
1602 }
1603 else if((list.length>5) && (list.length-5==i)){
1604 out.write ("<a name='lastFive'>\r\n");
1605 }
1606
1607 InternalLink il= (InternalLink)internalIDMap.get(list[i]);
1608 ArrayList alist2 = il.retrieveList();
1609 String id = (String)alist2.get(0);
1610 id = id.substring(4);
1611 out.write("<tr id=\""+id+"\" >\r\n");
1612
1613 if(list[i].length()>=201){
1614 if(list[i].startsWith("http://")){
1615 if(title.equals(IDENTIFIER)){
1616 if(list[i].length()>=100){
1617 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"...</a>");
1618 }
1619 else{
1620 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
1621 }
1622 out.write("<td><a href=\""+list[i]+"\">Source</a>\r\n");
1623 }
1624 else{
1625 ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],IDENTIFIER, collectionFolder,valueMap,linkMap);
1626 if(list[i].length()>=100){
1627 String url = (String)alist.get(0);
1628 if(alist.size()==1){
1629 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"...</a>");
1630 if(url.startsWith("http://")){
1631 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
1632 }else{
1633 out.write("<td>Source\r\n");
1634 }
1635 }
1636 else if(alist.size()>1){
1637 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"...</a>");
1638 if(url.startsWith("http://")){
1639 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
1640 }else{
1641 out.write("<td>Source\r\n");
1642 }
1643 }
1644 else{
1645 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
1646 out.write("<td>No Source Available\r\n");
1647 }
1648 suggestionMap.put(list[i], url);
1649 }
1650 else{
1651 String url = (String)alist.get(0);
1652 if(alist.size()==1){
1653 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
1654 if(url.startsWith("http://")){
1655 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
1656 }else{
1657 out.write("<td>Source\r\n");
1658 }
1659 }
1660 else if(alist.size()>1){
1661 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
1662 if(url.startsWith("http://")){
1663 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
1664 }else{
1665 out.write("<td>Source\r\n");
1666 }
1667 }
1668 else{
1669 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
1670 out.write("<td>No Source Available\r\n");
1671 }
1672 suggestionMap.put(list[i], url);
1673 }
1674 }
1675 }
1676 else{
1677 ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],IDENTIFIER, collectionFolder,valueMap,linkMap);
1678
1679
1680 if(list[i].startsWith(" ")){
1681 String elements = list[i];
1682 list[i] = list[i].substring(1);
1683
1684 if(alist.size()==1){
1685 out.write("<td>"+(counter+1)+"<td>"+spaceLeft+space+spaceRight+list[i].substring(0, 200)+"...<td>"+
1686 "<a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
1687 suggestionMap.put(elements, (String)alist.get(0));
1688 }
1689 else if(alist.size()>1){
1690 out.write("<td>"+(counter+1)+"<td>"+spaceLeft+space+spaceRight+list[i].substring(0, 200)+"...<td>"+
1691 "<a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
1692 suggestionMap.put(elements, (String)alist.get(0));
1693 }
1694 else{
1695 out.write("<td>"+(counter+1)+"<td>"+spaceLeft+space+spaceRight+list[i].substring(0, 200)+"...<td>No Source Available \r\n");
1696 suggestionMap.put(elements, "No Source Available");
1697 }
1698 }
1699 else{
1700 if(alist.size()==1){
1701 out.write("<td>"+(counter+1)+"<td>"+list[i].substring(0, 200)+"...<td>" +
1702 "<a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
1703 suggestionMap.put(list[i], (String)alist.get(0));
1704 }
1705 else if(alist.size()>1){
1706 out.write("<td>"+(counter+1)+"<td>"+list[i].substring(0, 200)+"...<td>" +
1707 "<a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
1708 suggestionMap.put(list[i], (String)alist.get(0));
1709 }
1710 else{
1711 out.write("<td>"+(counter+1)+"<td>"+list[i].substring(0, 200)+"... <td>No Source Available\r\n");
1712 suggestionMap.put(list[i],"No Source Available");
1713 }
1714 }
1715 }
1716
1717 }
1718 else{
1719 if(list[i].startsWith("http://")){
1720 if(title.equals(IDENTIFIER)){
1721 if(list[i].length()>=100){
1722 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"...</a>");
1723 }
1724 else{
1725 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
1726 }
1727 out.write("<td><a href=\""+list[i]+"\">Source</a>\r\n");
1728 }
1729 else{
1730 ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],IDENTIFIER, collectionFolder,valueMap,linkMap);
1731
1732 if(list[i].length()>=100){
1733 if(alist.size()==1){
1734 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"...</a>");
1735 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
1736 }
1737 else if(alist.size()>1){
1738 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"...</a>");
1739 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
1740 }
1741 else{
1742 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
1743 out.write("<td>No Source Available\r\n");
1744 }
1745 }
1746 else{
1747 if(alist.size()==1){
1748 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
1749 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
1750 suggestionMap.put(list[i], (String)alist.get(0));
1751 }
1752 else if(alist.size()>1){
1753 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
1754 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
1755 suggestionMap.put(list[i], (String)alist.get(0));
1756 }
1757 else{
1758 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
1759 out.write("<td>No Source Available\r\n");
1760 suggestionMap.put(list[i],"No Source Available");
1761 }
1762 }
1763 }
1764 }
1765 else{
1766 ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],IDENTIFIER, collectionFolder,valueMap,linkMap);
1767
1768 if(list[i].startsWith(" ")){
1769 String elements = list[i];
1770 list[i] = list[i].substring(1);
1771
1772 if(alist.size()==1){
1773 out.write("<td>"+(counter+1)+"<td>"+spaceLeft+space+spaceRight+list[i]+
1774 "<td><a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
1775 suggestionMap.put(list[i], (String)alist.get(0));
1776 }
1777 else if(alist.size()>1){
1778 out.write("<td>"+(counter+1)+"<td>"+spaceLeft+space+spaceRight+list[i]+
1779 "<td><a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
1780 suggestionMap.put(list[i], (String)alist.get(0));
1781 }
1782 else{
1783 out.write("<td>"+(counter+1)+"<td>"+spaceLeft+space+spaceRight+list[i]+"<td>No Source Available\r\n");
1784 suggestionMap.put(elements,"No Source Available");
1785 }
1786 }
1787 else{
1788 if(alist.size()==1){
1789 out.write("<td>"+(counter+1)+"<td>"+list[i]+"<td><a href=\""+alist.get(0)+"\">Source</a>\r\n");
1790 suggestionMap.put(list[i],(String)alist.get(0));
1791 }
1792 else if(alist.size()>1){
1793 out.write("<td>"+(counter+1)+"<td>"+list[i]+"<td><a href=\""+alist.get(0)+"\">Source</a>...\r\n");
1794 suggestionMap.put(list[i],(String)alist.get(0));
1795 }
1796 else{
1797 out.write("<td>"+(counter+1)+"<td>"+list[i]+"<td>No Source Available\r\n");
1798 suggestionMap.put(list[i],"No Source Available");
1799 }
1800 }
1801 }
1802 }
1803 counter++;
1804 out.write("<td onclick=\"GD(this)\">View");
1805 }
1806
1807 if(suggestionMap.size()>1 && !title.equals(IDENTIFIER)){
1808 status = true;
1809 }
1810 }
1811 else{
1812 out.write("<table border=\"1\" align=\"center\" width=\"800\">\r\n");
1813 out.write ("<h2 align=\"center\">"+ title+"</h2>\r\n");
1814 out.write ("<th class=\"tam\">&nbsp;\r\n");
1815 out.write ("<th class=\"tam\">Frequency\r\n");
1816 out.write ("<th class=\"tam\">Element Values\r\n");
1817 out.write ("<th class=\"tam\">Source Documents\r\n");
1818 out.write ("<th class=\"tam\">Internal Link\r\n");
1819 out.write ("<a name='topFive'>\r\n");
1820
1821 HashMap xMap = dm.getDistinctValueMap(title);
1822 int counter = 0;
1823
1824 for(int i = 0; i<list.length; i++){
1825 if(list.length<=5 && i==0){
1826 out.write ("<a name='lastFive'>\r\n");
1827 }
1828 else if((list.length>5) && (list.length-5==i)){
1829 out.write ("<a name='lastFive'>\r\n");
1830 }
1831
1832 InternalLink il= (InternalLink)internalIDMap.get(list[i]);
1833 ArrayList alist2 = il.retrieveList();
1834 String id = (String)alist2.get(0);
1835 id = id.substring(4);
1836
1837 out.write("<tr id=\""+id+"\" >\r\n");
1838 out.write("<td>"+(counter+1)+"<th> "+((Integer)xMap.get(list[i])).toString()+"\r\n");
1839
1840 if(list[i].length()>=201){
1841 if(list[i].startsWith("http://")){
1842 if(title.equals(IDENTIFIER)){
1843 if(list[i].length()>=100){
1844 out.write("<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
1845 }
1846 else{
1847 out.write("<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
1848 }
1849 out.write("<td><a href=\""+list[i]+"\">Source</a>\r\n");
1850 }
1851 else{
1852 ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],IDENTIFIER, collectionFolder,valueMap,linkMap);
1853 if(list[i].length()>=100){
1854 if(alist.size()==1){
1855 out.write("<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
1856 out.write("<td><a href=\""+alist.get(0)+"\">Source</a>\r\n");
1857 }
1858 else if (alist.size()>1){
1859 out.write("<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
1860 out.write("<td><a href=\""+alist.get(0)+"\">Source</a>...\r\n");
1861 }
1862 else{
1863 out.write("<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
1864 out.write("<td>No Source Available\r\n");
1865 }
1866 }
1867 else{
1868 out.write("<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
1869 }
1870 }
1871 }
1872 else{
1873 ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],IDENTIFIER, collectionFolder,valueMap,linkMap);
1874
1875 if(list[i].startsWith(" ")){
1876 list[i]=list[i].substring(1);
1877 if(alist.size()==1){
1878 out.write("<td>"+spaceLeft+space+spaceRight+list[i].substring(0,200)+"...<td>"+
1879 "<a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
1880 }
1881 else if(alist.size()>1){
1882 out.write("<td>"+spaceLeft+space+spaceRight+list[i].substring(0,200)+"...<td>"+
1883 "<a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
1884 }
1885 else{
1886 out.write("<td>"+spaceLeft+space+spaceRight+list[i].substring(0,200)+"...<td>No Source Available\r\n");
1887 }
1888 }
1889 else{
1890 if(alist.size()==1){
1891 out.write("<td>"+list[i].substring(0,200)+"...<td>" +
1892 "<a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
1893 }
1894 else if(alist.size()>1){
1895 out.write("<td>"+list[i].substring(0,200)+"...<td>"+
1896 "<a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
1897 }
1898 else{
1899 out.write("<td>"+list[i].substring(0,200)+"...<td>No Source Available\r\n");
1900 }
1901 }
1902 }
1903 }
1904 else{
1905 if(list[i].startsWith("http://")){
1906 if(title.equals(IDENTIFIER)){
1907 if(list[i].length()>=100){
1908 out.write("<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
1909 }
1910 else{
1911 out.write("<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
1912 }
1913 out.write("<td><a href=\""+list[i]+"\">Source</a>\r\n");
1914 }
1915 else{
1916 ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],IDENTIFIER, collectionFolder,valueMap,linkMap);
1917
1918 if(list[i].length()>=100){
1919 if(alist.size()==1){
1920 out.write("<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
1921 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
1922 }
1923 else if(alist.size()>1){
1924 out.write("<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
1925 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
1926 }
1927 else{
1928 out.write("<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
1929 out.write("<td>No Source Available\r\n");
1930 }
1931 }
1932 else{
1933 if(alist.size()==1){
1934 out.write("<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
1935 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
1936 }
1937 else if(alist.size()>1){
1938 out.write("<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
1939 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
1940 }
1941 else{
1942 out.write("<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
1943 out.write("<td>No Source Available\r\n");
1944 }
1945 }
1946 }
1947 }
1948 else{
1949
1950 ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],IDENTIFIER, collectionFolder,valueMap,linkMap);
1951
1952 if(list[i].startsWith(" ")){
1953 list[i]=list[i].substring(1);
1954 if(alist.size()==1){
1955 out.write("<td>"+spaceLeft+space+spaceRight+list[i]+
1956 "<td><a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
1957 }
1958 else if(alist.size()>1){
1959 out.write("<td>"+spaceLeft+space+spaceRight+list[i]+
1960 "<td><a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
1961 }
1962 else{
1963 out.write("<td>"+spaceLeft+space+spaceRight+list[i]+"<td>No Source Available\r\n");
1964 }
1965 }
1966 else{
1967 if(alist.size()==1){
1968 out.write("<td>"+list[i]+"<td><a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
1969 }
1970 else if(alist.size()>1){
1971 out.write("<td>"+list[i]+"<td><a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
1972 }
1973 else{
1974 out.write("<td>"+list[i]+"<td>No Source Available\r\n");
1975 }
1976 }
1977 }
1978 }
1979 counter++;
1980 out.write("<td onclick=\"GD(this)\">View");
1981 }
1982 }
1983 out.write("</table>\r\n");
1984 }
1985 else{
1986 out.write("<p>Sorry! The list is empty</p>");
1987 }
1988 out.write("<tr>\r\n");
1989 out.write("<p><a href=\" Overall.html \">Summary</a>");
1990 out.write("&raquo;");
1991
1992 if(metadataSetName.equalsIgnoreCase("dublin")){
1993 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet1+")</a>");
1994 }
1995 else if (metadataSetName.equalsIgnoreCase("extracted")){
1996 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet2+")</a>");
1997 }
1998 else{
1999 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSetName+")</a>");
2000 }
2001
2002 out.write("&raquo;");
2003 out.write("<a href=\""+title+".html\">"+title+"</a></p>");
2004 out.write("</body></html>\r\n");
2005 out.close();
2006
2007 fstream.close();
2008 }catch(Exception e){
2009 e.printStackTrace();
2010 }
2011
2012 if(status){
2013 return suggestionMap;
2014 }
2015 else{
2016 return new HashMap();
2017 }
2018 }
2019
2020 private void setupMetadataLink(String fileName){
2021
2022 try{
2023
2024 DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
2025 DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
2026 Document doc = docBuilder.newDocument();
2027 //doc = docBuilder.parse (new File("/research/cc108/greenstone3/web/mat/script/"+fileName+".xml"));
2028 String gsdl3Home = GlobalProperties.getGSDL3Home();
2029 String fileSeparator = File.separator;
2030 doc = docBuilder.parse (new File(gsdl3Home+ fileSeparator + "mat"+ fileSeparator +"script"+ fileSeparator + fileName+".xml"));
2031 Element rootNode = doc.getDocumentElement();
2032 NodeList listOfName = rootNode.getElementsByTagName("metadataElement");
2033 url = new HashMap();
2034
2035 for(int i = 0; i<listOfName.getLength(); i++){
2036 Node NameNode = listOfName.item(i);
2037 Element docElement = (Element)NameNode;
2038 NodeList valueList = docElement.getElementsByTagName("URL");
2039 Node urlParentNode = valueList.item(0);
2040 String urlText = urlParentNode.getChildNodes().item(0).getNodeValue();
2041
2042 valueList = docElement.getElementsByTagName("name");
2043 Node urlNameParentNode = valueList.item(0);
2044 String urlNameText = urlNameParentNode.getChildNodes().item(0).getNodeValue();
2045
2046 url.put(urlNameText, urlText);
2047 }
2048 }catch(Exception e){
2049 e.printStackTrace();
2050 }
2051 }
2052
2053 private void createIncompletedList(String fileName, String metadataSetName, String collectionFolder){
2054
2055 HashMap hp = dm.getIdentifierLink(linkIdentifier);
2056 String[] ids = dm.getDocumentIDList(fileName);
2057
2058 for(int i = 0; i<ids.length; i++){
2059 hp.remove(ids[i]);
2060 }
2061
2062 try{
2063 FileWriter fstream = new FileWriter(destination+fileName+"_IncompletedList.html");
2064 BufferedWriter out = new BufferedWriter(fstream);
2065
2066 out.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">");
2067 out.write ("<html>\r\n");
2068 out.write("<head>\r\n<title>Incompleted Document List</title>\r\n");
2069 out.write("<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n");
2070 out.write("<style type=\"text/css\">\r\n");
2071 out.write("td{height:20; text-align:left;padding-left:5px;}\r\n");
2072 out.write(".tam{height:20; text-align:center}\r\n");
2073 out.write("th{height:20; text-align:center;}\r\n");
2074 out.write("body{font-family: Arial;}\r\n");
2075 out.write("</style>\r\n");
2076 out.write("</head><body>\r\n");
2077 out.write("<p><a href=\" Overall.html \">Summary</a>");
2078 out.write("&raquo;");
2079
2080 if(metadataSetName.equalsIgnoreCase("dublin")){
2081 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet1+")</a>");
2082 }
2083 else if (metadataSetName.equalsIgnoreCase("extracted")){
2084 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet2+")</a>");
2085 }
2086 else{
2087 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSetName+")</a>");
2088 }
2089
2090 out.write("&raquo;");
2091 out.write("<a href=\""+fileName+".html \">"+fileName+"</a>");
2092 out.write("<h3 align=center>"+fileName+" does not appear in the following documents</h3>");
2093 out.write("<table border=1 align=center><tr><th>Document ID<th>Source Link\r\n");
2094
2095 int counter = 0;
2096 Set s = hp.keySet();
2097 Iterator i = s.iterator();
2098
2099 while(i.hasNext()){
2100 counter++;
2101 String keys = (String)i.next();
2102 InternalLink il = (InternalLink) hp.get(keys);
2103 ArrayList alist = il.retrieveList();
2104 String url = (String)alist.get(0);
2105 out.write("<tr><td>"+counter+"<td><a href=\""+url+"\">"+url+"</a>\r\n");
2106 }
2107
2108 out.write("</table></body></html>\r\n");
2109 out.write("<p><a href=\" Overall.html \">Summary</a>");
2110 out.write("&raquo;");
2111
2112 if(metadataSetName.equalsIgnoreCase("dublin")){
2113 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet1+")</a>");
2114 }
2115 else if (metadataSetName.equalsIgnoreCase("extracted")){
2116 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet2+")</a>");
2117 }
2118 else{
2119 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSetName+")</a>");
2120 }
2121
2122 out.write("&raquo;");
2123 out.write("<a href=\""+fileName+".html \">"+fileName+"</a>");
2124
2125 out.close();
2126 fstream.close();
2127 }catch(IOException ex){
2128 ex.printStackTrace();
2129 }
2130 }
2131
2132 public boolean compareElement(String fileName,String collectionFolder,HashMap suggestionMap, String metadataSetName){
2133
2134 boolean status = false;
2135 Set kset = suggestionMap.keySet();
2136 ArrayList arrayList = new ArrayList();
2137 Iterator is = kset.iterator();
2138
2139 while(is.hasNext()){
2140 arrayList.add((String)is.next());
2141 }
2142
2143 HashMap distanceMap = new HashMap();
2144 int totalLength = 0;
2145 int arrayListLength = arrayList.size();
2146 int counter = 0;
2147 double distance ;
2148
2149 try{
2150 for(int i = 0; i<arrayListLength; i++){
2151 String keyword = (String)arrayList.get(i);
2152 int spaceCounterFront1 = 0;
2153 int spaceCounterEnd1 = 0;
2154 String keywordClone = keyword;
2155 String _keywordClone = keyword;
2156 String bkKeyWord = keyword;
2157 keywordClone = removeUnusedCharacter(keywordClone);
2158
2159 while(true){
2160 if(_keywordClone.length()>1){
2161 if(_keywordClone.charAt(0)==' '){
2162 _keywordClone = _keywordClone.substring(1);
2163 spaceCounterFront1++;
2164 }
2165 else{
2166 if(_keywordClone.charAt(_keywordClone.length()-1)==' '){
2167 _keywordClone = _keywordClone.substring(0,_keywordClone.length()-1);
2168 spaceCounterEnd1++;
2169 }
2170 else{
2171 break;
2172 }
2173 }
2174 }
2175 else{
2176 break;
2177 }
2178 }
2179
2180 int keywordLength = keywordClone.length();
2181 totalLength = totalLength + keywordLength;
2182
2183 for(int j = i+1; j<arrayListLength; j++){
2184 String keyword2 = (String)arrayList.get(j);
2185 counter ++;
2186 double pre_cost = 0;
2187 String keywordClone2 = keyword2;
2188 String _keywordClone2 = keyword2;
2189 String bkKeyWord2 = keyword2;
2190 CostModel cm = new CostModel();
2191 cm = removeUnusedCharacter(keywordClone2,pre_cost);
2192 keywordClone2 = cm.getString();
2193 pre_cost = cm.getCost();
2194
2195 int spaceCounterFront2 = 0;
2196 int spaceCounterEnd2 = 0;
2197
2198 while(true){
2199 if(_keywordClone2.length()>1){
2200 if(_keywordClone2.charAt(0)==' '){
2201 _keywordClone2 = _keywordClone2.substring(1);
2202 spaceCounterFront2++;
2203 }
2204 else{
2205 if(_keywordClone2.charAt(_keywordClone2.length()-1)==' '){
2206 _keywordClone2 = _keywordClone2.substring(0,_keywordClone2.length()-1);
2207 spaceCounterEnd2++;
2208 }
2209 else{
2210 break;
2211 }
2212 }
2213 }
2214 else{
2215 break;
2216 }
2217 }
2218
2219 if(counter == 50000){
2220 counter = 0;
2221 }
2222
2223 int keyword2Length = keywordClone2.length();
2224
2225 if(keywordLength>(keyword2Length+2) || (keywordLength+2)<keyword2Length ){}
2226 else{
2227 distance = calculateEditDistance(keywordClone.toLowerCase().toCharArray(),keywordClone2.toLowerCase().toCharArray());
2228 distance = distance + pre_cost;
2229
2230 if(distance<=2){
2231 if(distanceMap.containsKey(_keywordClone)){
2232 InternalLink il = (InternalLink)distanceMap.get(_keywordClone);
2233 String keywordHolder = _keywordClone2;
2234
2235 while(true){
2236 if(keywordHolder.indexOf(" ")!=-1){
2237 keywordHolder = keywordHolder.replaceFirst(" ",spaceLeft+space+spaceRight+" ");
2238 }
2239 else{
2240 break;
2241 }
2242 }
2243
2244 for(int a = 0; a<spaceCounterFront2; a++){
2245 keywordHolder = spaceLeft+space+spaceRight+keywordHolder;
2246 }
2247
2248 for(int a = 0; a<spaceCounterEnd2; a++){
2249 keywordHolder = keywordHolder+spaceLeft+space+spaceRight;
2250 }
2251
2252 InternalLink il2 = new InternalLink();
2253 il2.setValue(keywordHolder);
2254
2255 if(suggestionMap.containsKey(bkKeyWord2)){
2256 il2.increaseElement((String)suggestionMap.get(bkKeyWord2));
2257 }
2258 else{
2259 il2.increaseElement("&nbsp;");
2260 }
2261
2262 il.increaseNode(il2);
2263 distanceMap.put(_keywordClone, il);
2264 }
2265
2266 else{
2267 InternalLink il = new InternalLink();
2268 String keywordHolder = _keywordClone;
2269
2270 while(true){
2271 if(keywordHolder.indexOf(" ")!=-1){
2272 keywordHolder = keywordHolder.replaceFirst(" ",spaceLeft+space+spaceRight+" ");
2273 }
2274 else{
2275 break;
2276 }
2277 }
2278
2279 for(int a = 0; a<spaceCounterFront1; a++){
2280 keywordHolder = spaceLeft+space+spaceRight+keywordHolder;
2281 }
2282
2283 for(int a = 0; a<spaceCounterEnd1; a++){
2284 keywordHolder = keywordHolder+spaceLeft+space+spaceRight;
2285 }
2286
2287 InternalLink il2 = new InternalLink();
2288 il2.setValue(keywordHolder);
2289 if(suggestionMap.containsKey(bkKeyWord)){
2290 il2.increaseElement((String)suggestionMap.get(bkKeyWord));
2291 }
2292 else{
2293 il2.increaseElement("&nbsp;");
2294 }
2295
2296 il.increaseNode(il2);
2297 keywordHolder = _keywordClone2;
2298
2299 while(true){
2300 if(keywordHolder.indexOf(" ")!=-1){
2301 keywordHolder = keywordHolder.replaceFirst(" ",spaceLeft+space+spaceRight+" ");
2302 }
2303 else{
2304 break;
2305 }
2306 }
2307
2308 for(int a = 0; a<spaceCounterFront2; a++){
2309 keywordHolder = spaceLeft+space+spaceRight+keywordHolder;
2310 }
2311
2312 for(int a = 0; a<spaceCounterEnd2; a++){
2313 keywordHolder = keywordHolder+spaceLeft+space+spaceRight;
2314 }
2315
2316 InternalLink il3 = new InternalLink();
2317 il3.setValue(keywordHolder);
2318 if(suggestionMap.containsKey(bkKeyWord2)){
2319 il3.increaseElement((String)suggestionMap.get(bkKeyWord2));
2320 }
2321 else{
2322 il3.increaseElement("&nbsp;");
2323 }
2324
2325 il.increaseNode(il3);
2326 distanceMap.put(_keywordClone, il);
2327 }
2328 }
2329 }
2330 }
2331 }
2332
2333 if(distanceMap.size()!=0){
2334 generateHTML(distanceMap,fileName,metadataSetName);
2335 status = true;
2336 }
2337 }catch(Exception ex){
2338 ex.printStackTrace();
2339 }
2340 return status;
2341 }
2342
2343 private void generateHTML(HashMap distanceMap, String fileName,String metadataSetName){
2344
2345 String collectionFolder = mds.getCollectionName();
2346
2347 try{
2348 FileWriter fstream = new FileWriter(destination+fileName+"_Suggestion.html");
2349 BufferedWriter out = new BufferedWriter(fstream);
2350
2351 out.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">");
2352 out.write ("<html>\r\n");
2353 out.write("<head>\r\n<title>Potential Duplicate List</title>\r\n");
2354 out.write("<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n");
2355 out.write("<style type=\"text/css\">\r\n");
2356 out.write("td{height:20; text-align:left;padding-left:5px;}\r\n");
2357 out.write(".tam{height:20; text-align:center}\r\n");
2358 out.write("th{height:20; text-align:left;}\r\n");
2359 out.write("body{font-family: Arial;}\r\n");
2360 out.write("</style>\r\n");
2361 out.write("</head><body>\r\n");
2362 out.write("<p><a href=\" Overall.html \">Summary</a>");
2363 out.write("&raquo;");
2364
2365 if(metadataSetName.equalsIgnoreCase("dublin")){
2366 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet1+")</a>");
2367 }
2368 else if (metadataSetName.equalsIgnoreCase("extracted")){
2369 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet2+")</a>");
2370 }
2371 else{
2372 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSetName+")</a>");
2373 }
2374
2375 out.write("&raquo;");
2376 out.write("<a href=\""+fileName+".html \">"+fileName+"</a>");
2377 out.write("<h3 align=center>Potential Duplicate List</h3>");
2378 out.write("<table border=1 align=center>\r\n");
2379
2380 Set s = distanceMap.keySet();
2381 Iterator i = s.iterator();
2382
2383 while(i.hasNext()){
2384 String keyword = (String)i.next();
2385 InternalLink il = (InternalLink)distanceMap.get(keyword);
2386 ArrayList alist = il.retrieveNodeList();
2387
2388 out.write("<tr><th>Original Text<th>Source Link\r\n");
2389
2390 for(int a = 0; a<alist.size(); a++){
2391 InternalLink il2 = new InternalLink();
2392 il2 = (InternalLink)alist.get(a);
2393 String url = (String)il2.retrieveList().get(0);
2394 out.write("<tr><td>"+il2.getValue()+"<td><a href=\""+url+"\">"+url+"</a>");
2395 }
2396 out.write("</tr>");
2397 }
2398 out.write("</table>\r\n");
2399 out.write("<p><a href=\" Overall.html \">Summary</a>");
2400 out.write("&raquo;");
2401
2402 if(metadataSetName.equalsIgnoreCase("dublin")){
2403 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet1+")</a>");
2404 }
2405 else if (metadataSetName.equalsIgnoreCase("extracted")){
2406 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet2+")</a>");
2407 }
2408 else{
2409 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSetName+")</a>");
2410 }
2411
2412 out.write("&raquo;");
2413 out.write("<a href=\""+fileName+".html \">"+fileName+"</a>");
2414 out.write("</body></html>\r\n");
2415 out.close();
2416
2417 fstream.close();
2418 }catch(IOException ex){
2419 ex.printStackTrace();
2420 }
2421 }
2422
2423 public int calculateEditDistance(char[] args1, char[] args2){
2424
2425 int n = args1.length;
2426 int m = args2.length;
2427
2428 if (n == 0) {
2429 return m;
2430 }
2431 else if (m == 0) {
2432 return n;
2433 }
2434
2435 int[] p = new int[n + 1];
2436 int[] d = new int[n + 1];
2437 int[] _d;
2438 int i;
2439 int j;
2440 int cost; // cost
2441
2442 for (i = 0; i <= n; i++) {
2443 p[i] = i;
2444 }
2445
2446 for (j = 1; j <= m; j++) {
2447 d[0] = j;
2448 for (i = 1; i <= n; i++) {
2449 cost = (args1[i-1] == args2[j-1]) ? 0 : 1;
2450 d[i] = Math.min(Math.min(d[i - 1] + 1, p[i] + 1),
2451 p[i - 1] + cost);
2452 }
2453
2454 // copy current distance counts to 'previous row' distance counts
2455 _d = p;
2456 p = d;
2457 d = _d;
2458 }
2459 // our last action in the above loop was to switch d and p, so p now
2460 // actually has the most recent cost counts
2461 return p[n];
2462 }
2463
2464 private String removeUnusedCharacter(String target){
2465
2466 while(true){
2467 if(target.indexOf("\\n")!=-1){
2468 target = target.replaceFirst("\\\\n","");
2469 }
2470 else{
2471 break;
2472 }
2473 }
2474
2475 //remove spaces at the end of string
2476 if(target.length()>1){
2477 while(true){
2478 if(target.length()>1){
2479 if(target.charAt(target.length()-1)==' '){
2480 target = target.substring(0,target.length()-1);
2481 }
2482 else{
2483 break;
2484 }
2485 }
2486 else{
2487 break;
2488 }
2489 }
2490 }
2491
2492 //remove leading spaces
2493 if(target.length()>1){
2494 while(true){
2495 if(target.length()>1){
2496 if(target.charAt(0)==' '){
2497 target = target.substring(1,target.length());
2498 }
2499 else{
2500 break;
2501 }
2502 }
2503 else{
2504 break;
2505 }
2506 }
2507 }
2508
2509 //remove multiple spaces between words
2510 while(true){
2511 if(target.indexOf(" ")!=-1){
2512 target = target.replaceFirst(" "," ");
2513 }
2514 else{
2515 break;
2516 }
2517 }
2518 return target;
2519 }
2520
2521 private CostModel removeUnusedCharacter(String target, double cost){
2522
2523 CostModel cm = new CostModel();
2524
2525 while(true){
2526 if(target.indexOf("\\n")!=-1){
2527 target = target.replaceFirst("\\\\n","");
2528 cost = cost + 0.2;
2529 }
2530 else{
2531 break;
2532 }
2533 }
2534
2535 //remove spaces at the end of string
2536 if(target.length()>1){
2537 while(true){
2538 if(target.length()>1){
2539 if(target.charAt(target.length()-1)==' '){
2540 target = target.substring(0,target.length()-1);
2541 cost = cost + 0.2;
2542 }
2543 else{
2544 break;
2545 }
2546 }
2547 else{
2548 break;
2549 }
2550 }
2551 }
2552
2553 //remove leading spaces
2554 if(target.length()>1){
2555 while(true){
2556 if(target.length()>1){
2557 if(target.charAt(0)==' '){
2558 target = target.substring(1,target.length());
2559 cost = cost + 0.2;
2560 }
2561 else{
2562 break;
2563 }
2564 }
2565 else{
2566 break;
2567 }
2568 }
2569 }
2570
2571 //remove multiple spaces between words
2572 while(true){
2573 if(target.indexOf(" ")!=-1){
2574 target = target.replaceFirst(" "," ");
2575 cost = cost + 0.2;
2576 }
2577 else{
2578 break;
2579 }
2580 }
2581
2582 cm.setCost(cost);
2583 cm.setString(target);
2584 return cm;
2585 }
2586
2587 private HashMap generateMetadataElementSortListNoIdentifier(String title, String sort, String metadataSetName){
2588
2589 String fileName = title+"_"+sort;
2590 String collectionFolder = mds.getCollectionName();
2591 SearchLink sl = new SearchLink(mds.StatsDirectory);
2592 HashMap suggestionMap = new HashMap();
2593
2594 HashMap internalIDMap = dm.getInternalIdentifier(title);
2595 boolean status = false;
2596
2597 try{
2598
2599 FileWriter fstream = new FileWriter(destination+fileName+".html");
2600 BufferedWriter out = new BufferedWriter(fstream);
2601
2602 out.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">");
2603 out.write ("<html>\r\n");
2604 out.write("<head>\r\n<title>Metadata Element Sort List</title>\r\n");
2605 out.write("<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n");
2606 out.write("<script type=\"text/javascript\" src=\"../script/getInfomation.js\"></script>\r\n");
2607 out.write("<script type=\"text/javascript\" src=\"http://yui.yahooapis.com/2.4.1/build/yahoo/yahoo-min.js\"></script>\r\n");
2608 out.write("<script type=\"text/javascript\" src=\"http://yui.yahooapis.com/2.4.1/build/event/event-min.js\"></script>\r\n");
2609 out.write("<script type=\"text/javascript\" src=\"http://yui.yahooapis.com/2.4.1/build/connection/connection-min.js\"></script>\r\n");
2610 out.write("<style type=\"text/css\">\r\n");
2611 out.write("td{height:20; text-align:left;padding-left:5px;}\r\n");
2612 out.write(".tam{height:20; text-align:center}\r\n");
2613 out.write("th{height:20; text-align:center;}\r\n");
2614 out.write("body{font-family: Arial;}\r\n");
2615 out.write("</style>\r\n");
2616 out.write("</head><body id=\""+collectionFolder+"\">\r\n");
2617 out.write("<td><div style=\"float:left;\"><a href=\" Overall.html \">Summary</a>");
2618 out.write("&raquo;");
2619
2620 if(metadataSetName.equalsIgnoreCase("dublin")){
2621 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet1+")</a>");
2622 }
2623 else if (metadataSetName.equalsIgnoreCase("extracted")){
2624 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet2+")</a>");
2625 }
2626 else{
2627 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSetName+")</a>");
2628 }
2629 out.write("&raquo;");
2630 out.write("<a href=\""+title+".html\">"+title+"</a></div>");
2631 out.write("<div style=\"float:right;\"><a href=\"http://chnm.gmu.edu/tools/surveys/4386/\">Please send feedback about the Mat tool</a></div></td><br>");
2632
2633 String[] list = dm.getSortList(title, sort);
2634
2635 if(list.length>=1){
2636 if(sort.equals("ASCII")){
2637
2638 out.write("<table border=\"1\" align=\"center\" width=\"800\">\r\n");
2639 out.write ("<h2 align=\"center\">"+ title+"</h2>\r\n");
2640 out.write ("<th class=\"tam\">ASCII Sort\r\n");
2641 out.write ("<th class=\"tam\">Element Values\r\n");
2642 out.write ("<th class=\"tam\">Source Documents\r\n");
2643 out.write ("<th class=\"tam\">Internal Link\r\n");
2644 out.write ("<a name='topFive'>\r\n");
2645
2646 int counter = 0;
2647
2648 for(int i = 0; i<list.length; i++){
2649 if(list.length<=5 && i==0){
2650 out.write ("<a name='lastFive'>\r\n");
2651 }
2652 else if((list.length>5) && (list.length-5==i)){
2653 out.write ("<a name='lastFive'>\r\n");
2654 }
2655
2656
2657 InternalLink il= (InternalLink)internalIDMap.get(list[i]);
2658 ArrayList alist2 = il.retrieveList();
2659 String id = (String)alist2.get(0);
2660 id = id.substring(4);
2661
2662 out.write("<tr id=\""+id+"\" >\r\n");
2663
2664 if(list[i].length()>=201){
2665 if(list[i].startsWith("http://")){
2666 if(list[i].length()>=100){
2667 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
2668 out.write("<td><a href=\""+list[i]+"\">Source</a>\r\n");
2669 }
2670 else{
2671 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
2672 out.write("<td><a href=\""+list[i]+"\">Source</a>\r\n");
2673 }
2674 }
2675 else{
2676
2677 if(list[i].startsWith(" ")){
2678 String elements = list[i];
2679 list[i] = list[i].substring(1);
2680 out.write("<td>"+(counter+1)+"<td>"+spaceLeft+space+spaceRight+list[i].substring(0, 200)+"...<td>No Source Available \r\n");
2681 }
2682 else{
2683 out.write("<td>"+(counter+1)+"<td>"+list[i].substring(0, 200)+"... <td>No Source Available\r\n");
2684 }
2685 }
2686 }
2687 else{
2688 if(list[i].startsWith("http://")){
2689 if(list[i].length()>=100){
2690 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"...</a>");
2691 }
2692 else{
2693 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
2694 }
2695 out.write("<td><a href=\""+list[i]+"\">Source</a>\r\n");
2696 }
2697 else{
2698 if(list[i].startsWith(" ")){
2699 String elements = list[i];
2700 list[i] = list[i].substring(1);
2701 out.write("<td>"+(counter+1)+"<td>"+spaceLeft+space+spaceRight+list[i]+"<td>No Source Available\r\n");
2702 suggestionMap.put(elements,"No Source Available");
2703 }
2704 else{
2705 out.write("<td>"+(counter+1)+"<td>"+list[i]+"<td>No Source Available\r\n");
2706 }
2707 }
2708 }
2709 counter++;
2710 out.write("<td onclick=\"GD(this)\">View");
2711 }
2712 }
2713 else{
2714 out.write("<table border=\"1\" align=\"center\" width=\"800\">\r\n");
2715 out.write ("<h2 align=\"center\">"+ title+"</h2>\r\n");
2716 out.write ("<th class=\"tam\">&nbsp;\r\n");
2717 out.write ("<th class=\"tam\">Frequency\r\n");
2718 out.write ("<th class=\"tam\">Element Values\r\n");
2719 out.write ("<th class=\"tam\">Source Documents\r\n");
2720 out.write ("<th class=\"tam\">Internal Link\r\n");
2721 out.write ("<a name='topFive'>\r\n");
2722
2723 HashMap xMap = dm.getDistinctValueMap(title);
2724 int counter = 0;
2725
2726 for(int i = 0; i<list.length; i++){
2727 if(list.length<=5 && i==0){
2728 out.write ("<a name='lastFive'>\r\n");
2729 }
2730 else if((list.length>5) && (list.length-5==i)){
2731 out.write ("<a name='lastFive'>\r\n");
2732 }
2733
2734 InternalLink il= (InternalLink)internalIDMap.get(list[i]);
2735 ArrayList alist2 = il.retrieveList();
2736 String id = (String)alist2.get(0);
2737 id = id.substring(4);
2738
2739 out.write("<tr id=\""+id+"\" >\r\n");
2740 out.write("<td>"+(counter+1)+"<th> "+((Integer)xMap.get(list[i])).toString()+"\r\n");
2741
2742 if(list[i].length()>=201){
2743 if(list[i].startsWith("http://")){
2744 if(list[i].length()>=100){
2745 out.write("<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
2746 }
2747 else{
2748 out.write("<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
2749 }
2750 out.write("<td><a href=\""+list[i]+"\">Source</a>\r\n");
2751 }
2752 else{
2753 if(list[i].startsWith(" ")){
2754 list[i]=list[i].substring(1);
2755 out.write("<td>"+spaceLeft+space+spaceRight+list[i].substring(0,200)+"...<td>No Source Available\r\n");
2756 }
2757 else{
2758 out.write("<td>"+list[i].substring(0,200)+"...<td>No Source Available\r\n");
2759 }
2760 }
2761 }
2762 else{
2763 if(list[i].startsWith("http://")){
2764 if(list[i].length()>=100){
2765 out.write("<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
2766 }
2767 else{
2768 out.write("<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
2769 }
2770 out.write("<td><a href=\""+list[i]+"\">Source</a>\r\n");
2771 }
2772 else{
2773 if(list[i].startsWith(" ")){
2774 list[i]=list[i].substring(1);
2775 out.write("<td>"+spaceLeft+space+spaceRight+list[i]+"<td>No Source Available\r\n");
2776 }
2777 else{
2778 out.write("<td>"+list[i]+"<td>No Source Available\r\n");
2779 }
2780 }
2781 }
2782 counter++;
2783 out.write("<td onclick=\"GD(this)\">View");
2784 }
2785 }
2786 out.write("</table>\r\n");
2787 }
2788 else{
2789 out.write("<p>Sorry! The list is empty</p>");
2790 }
2791
2792 out.write("<tr>\r\n");
2793 out.write("<p><a href=\" Overall.html \">Summary</a>");
2794 out.write("&raquo;");
2795
2796 if(metadataSetName.equalsIgnoreCase("dublin")){
2797 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet1+")</a>");
2798 }
2799 else if (metadataSetName.equalsIgnoreCase("extracted")){
2800 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet2+")</a>");
2801 }
2802 else{
2803 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSetName+")</a>");
2804 }
2805
2806 out.write("&raquo;");
2807 out.write("<a href=\""+title+".html\">"+title+"</a></p>");
2808 out.write("</body></html>\r\n");
2809 out.close();
2810
2811 fstream.close();
2812 }catch(Exception e){
2813 e.printStackTrace();
2814 }
2815 return new HashMap();
2816 }
2817
2818 private void createIncompletedListNoIdentifier(String fileName, String metadataSetName, String collectionFolder){
2819
2820 HashMap hp = dm.getIdentifierLinkNoIdentifier();
2821 String[] ids = dm.getDocumentIDList(fileName);
2822
2823 Set st = hp.keySet();
2824 Iterator it = st.iterator();
2825
2826 while(it.hasNext()){
2827 System.out.println(it.next());
2828 }
2829 for(int i = 0; i<ids.length; i++){
2830 System.out.println("IDS:" +ids[i]);
2831 hp.remove(ids[i]);
2832 }
2833
2834 //System.out.println("Length: "+ids.length+" "+fileName+ " "+hp.size());
2835
2836 try{
2837 FileWriter fstream = new FileWriter(destination+fileName+"_IncompletedList.html");
2838 BufferedWriter out = new BufferedWriter(fstream);
2839
2840 out.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">");
2841 out.write ("<html>\r\n");
2842 out.write("<head>\r\n<title>Incompleted Document List</title>\r\n");
2843 out.write("<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n");
2844 out.write("<style type=\"text/css\">\r\n");
2845 out.write("td{height:20; text-align:left;padding-left:5px;}\r\n");
2846 out.write(".tam{height:20; text-align:center}\r\n");
2847 out.write("th{height:20; text-align:center;}\r\n");
2848 out.write("body{font-family: Arial;}\r\n");
2849 out.write("</style>\r\n");
2850 out.write("</head><body>\r\n");
2851 out.write("<p><a href=\" Overall.html \">Summary</a>");
2852 out.write("&raquo;");
2853
2854 if(metadataSetName.equalsIgnoreCase("dublin")){
2855 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet1+")</a>");
2856 }
2857 else if (metadataSetName.equalsIgnoreCase("extracted")){
2858 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet2+")</a>");
2859 }
2860 else{
2861 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSetName+")</a>");
2862 }
2863
2864 out.write("&raquo;");
2865 out.write("<a href=\""+fileName+".html \">"+fileName+"</a>");
2866 out.write("<h3 align=center>"+fileName+" does not appear in the following documents</h3>");
2867 int counter = 0;
2868 Set s = hp.keySet();
2869 Iterator i = s.iterator();
2870 out.write("<table border=1 align=center><tr><th>Document ID<th>Source Link\r\n");
2871
2872 while(i.hasNext()){
2873 counter++;
2874 String keys = (String)i.next();
2875 InternalLink il = (InternalLink) hp.get(keys);
2876 ArrayList alist = il.retrieveList();
2877 String url = (String)alist.get(0);
2878 out.write("<tr><td>"+counter+"<td><a href=\""+url+"\">"+url+"</a>\r\n");
2879 }
2880 out.write("</table></body></html>\r\n");
2881 out.write("<p><a href=\" Overall.html \">Summary</a>");
2882 out.write("&raquo;");
2883
2884 if(metadataSetName.equalsIgnoreCase("dublin")){
2885 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet1+")</a>");
2886 }
2887 else if (metadataSetName.equalsIgnoreCase("extracted")){
2888 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet2+")</a>");
2889 }
2890 else{
2891 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSetName+")</a>");
2892 }
2893 out.write("&raquo;");
2894 out.write("<a href=\""+fileName+".html \">"+fileName+"</a>");
2895
2896 out.close();
2897 fstream.close();
2898 }catch(IOException ex){
2899 ex.printStackTrace();
2900 }
2901 }
2902}
Note: See TracBrowser for help on using the repository browser.