source: other-projects/trunk/greenstone3-extension/mat/org/greenstone/gsdl3_extension/mat/PrintHTML.java@ 17358

Last change on this file since 17358 was 17358, checked in by cc108, 16 years ago

Updating Mat Source Code

File size: 100.5 KB
Line 
1package org.greenstone.gsdl3_extension.mat;
2
3import java.io.*;
4import java.text.SimpleDateFormat;
5import java.util.ArrayList;
6import java.util.Calendar;
7import java.util.Collection;
8import java.util.HashMap;
9import java.util.Iterator;
10import java.util.Map;
11import java.util.Set;
12
13import javax.xml.parsers.DocumentBuilder;
14import javax.xml.parsers.DocumentBuilderFactory;
15
16import org.w3c.dom.Document;
17import org.w3c.dom.Element;
18import org.w3c.dom.Node;
19import org.w3c.dom.NodeList;
20
21public class PrintHTML {
22
23 final String NORMAL = "normal";
24 final String WORST = "worst";
25 final String BEST = "best";
26
27 final String title1 ="Hide Empty Metadata ,";
28 final String title2 ="Hide Completed Metadata ,";
29 final String title3 ="Hide Documents with Empty Metadata ,";
30 final String title4 ="Hide Documents with Completed Metadata ,";
31 final String title5 ="No Available Graph";
32
33 final String metadataSet1 = "Dublin Core";
34 final String metadataSet2 = "Extracted";
35
36 final String spaceLeft = "<I>&laquo;";
37 final String spaceRight = "&raquo;</I>";
38 final String htmlSpace = "&nbsp;";
39 final String oddChar ="odd Character";
40
41 String linkIdentifier = "";
42 String cssString = "<link rel=\"stylesheet\" href=\"http://www.nzdl.org/greenstone3/mat/script/mat.css\" type=\"text/css\" >";
43
44 final String space ="space";
45 private HashMap url;
46
47 String destination ="";
48 MetadataStats mds;
49 DataMaker dm;
50 Calendar cl;
51 SimpleDateFormat sdf;
52
53 public PrintHTML(MetadataStats ms){
54
55 mds = ms;
56
57 File current_dir = new File (".");
58 try{
59 //destination = current_dir.getCanonicalPath()+"/";
60 destination = mds.HTMLDirectory;
61 System.out.println(destination+" printhtml");
62 //System.out.println(destination+"destination for html");
63 }catch(Exception ex){
64 ex.printStackTrace();
65 }
66
67 dm = new DataMaker(mds);
68 cl=Calendar.getInstance();
69 //linkIdentifier = ms.getIdendifier();
70 linkIdentifier = "dc.Identifier";
71 //linkIdentifier = "nzir_internal.Link";
72
73 sdf = new SimpleDateFormat(" dd MMM yyyy 'at' HH:mm:ss z 'GMT'Z");
74 setupMetadataLink("metadataElementURL");
75 }
76
77 private ArrayList getHideEmptyMetadataDetail(ArrayList dataset, String[] id, String[] metadataName){
78
79 if(id.length==0 || metadataName.length==0){
80 return new ArrayList();
81 }
82
83 ArrayList detailList = new ArrayList();
84 ArrayList alist = new ArrayList();
85 ArrayList nameList = new ArrayList();
86
87 for(int i = 0; i< metadataName.length; i++){
88 if(!dm.IsElementEmpty(metadataName[i])){
89 alist.add(dm.getMetadataRows(metadataName[i]));
90 nameList.add(metadataName[i]);
91 }
92 }
93
94 String[] metaDataElementName = new String[nameList.size()];
95
96 for(int i = 0; i< nameList.size(); i++){
97 metaDataElementName[i] = nameList.get(i).toString();
98 }
99
100 if(id.length==0 || metaDataElementName.length==0){
101 return new ArrayList();
102 }
103
104 detailList.add(alist);
105 detailList.add(id);
106 detailList.add(metaDataElementName);
107 return detailList;
108 }
109
110 private ArrayList getHideFullMetadataDetail(ArrayList dataset, String[] id, String[] metadataName){
111
112 if(id.length==0 || metadataName.length==0){
113 return new ArrayList();
114 }
115
116 ArrayList detailList = new ArrayList();
117 ArrayList alist = new ArrayList();
118 ArrayList nameList = new ArrayList();
119
120 for(int i = 0; i< metadataName.length; i++){
121 if(!dm.IsElementFull(metadataName[i])){
122 alist.add(dm.getMetadataRows(metadataName[i]));
123 nameList.add(metadataName[i]);
124 }
125 }
126
127 String[] metaDataElementName = new String[nameList.size()];
128
129 for(int i = 0; i< nameList.size(); i++){
130 metaDataElementName[i] = nameList.get(i).toString();
131 }
132
133 if(id.length==0 || metaDataElementName.length==0){
134 return new ArrayList();
135 }
136
137 detailList.add(alist);
138 detailList.add(id);
139 detailList.add(metaDataElementName);
140
141 return detailList;
142 }
143
144 private ArrayList getHideEmptyDocumentDetail(ArrayList dataset, String[] id, String[] metadataName){
145
146 if(id.length==0 || metadataName.length==0){
147 return new ArrayList();
148 }
149
150 ArrayList detailList = new ArrayList();
151 ArrayList alist = dm.removeDocument(dataset, id, 0);
152 ArrayList idList = dm.getRemovedID();
153 ArrayList newIDList = new ArrayList();
154
155 for(int i = 0; i< id.length; i++){
156 if(!idList.contains(id[i])){
157 newIDList.add(id[i]);
158 }
159 }
160
161 id = new String[newIDList.size()];
162
163 for(int i = 0; i<newIDList.size(); i++){
164 id[i] = newIDList.get(i).toString();
165 }
166
167 if(id.length==0 || metadataName.length==0){
168 return new ArrayList();
169 }
170
171 detailList.add(alist);
172 detailList.add(id);
173 detailList.add(metadataName);
174
175 return detailList;
176 }
177
178 private ArrayList getHideFullDocumentDetail(ArrayList dataset, String[] id, String[] metadataName){
179
180 if(id.length==0 || metadataName.length==0){
181 return new ArrayList();
182 }
183
184 ArrayList detailList = new ArrayList();
185 ArrayList alist = dm.removeDocument(dataset, id ,1);
186 ArrayList idList = dm.getRemovedID();
187 ArrayList newIDList = new ArrayList();
188
189 for(int i = 0; i< id.length; i++){
190 if(!idList.contains(id[i])){
191 newIDList.add(id[i]);
192 }
193 }
194
195 id = new String[newIDList.size()];
196
197 for(int i = 0; i<newIDList.size(); i++){
198 id[i] = newIDList.get(i).toString();
199 }
200
201 if(id.length==0 || metadataName.length==0){
202 return new ArrayList();
203 }
204
205 detailList.add(alist);
206 detailList.add(id);
207 detailList.add(metadataName);
208
209 return detailList;
210 }
211
212
213 public void generateAllPossibleGraph(ArrayList Namelist,String[]ids,String[] names,String condition){
214
215 ArrayList dataList= Namelist;
216 String[] id = ids;
217 String[] metadataName = names;
218
219 ArrayList alist = new ArrayList();
220 ArrayList detailList = new ArrayList();
221 ArrayList new_dataList = new ArrayList();
222 String[] idList;
223 String[] metadataNameList;
224
225 //-------------------SS
226 //-----generate SSSS
227 if(id.length==0 || metadataName.length==0){
228 generateEmptyGraph("SSSS",WORST,condition);
229 generateEmptyGraph("SSSS",BEST,condition);
230 }
231 else{
232 new_dataList = transformDataList(dataList,id);
233
234 WriteHTML("SSSS",new_dataList,id,metadataName,WORST,"Show completed graph",condition);
235 WriteHTML("SSSS",new_dataList,id,metadataName,BEST,"Show completed graph",condition);
236 }
237
238 //-----SSSH
239 detailList = getHideFullDocumentDetail(dataList,id,metadataName);
240
241 if(detailList.size()==0){
242 generateEmptyGraph("SSSH",WORST,condition);
243 generateEmptyGraph("SSSH",BEST,condition);
244 }
245 else{
246 alist = (ArrayList)detailList.get(0);
247 idList = (String[])detailList.get(1);
248 metadataNameList = (String[])detailList.get(2);
249 new_dataList = transformDataList(alist,idList);
250
251 WriteHTML("SSSH",new_dataList,idList,metadataNameList,WORST,title4,condition);
252 WriteHTML("SSSH",new_dataList,idList,metadataNameList,BEST,title4,condition);
253 }
254
255 //-----SSHS
256 detailList = getHideEmptyDocumentDetail(dataList,id,metadataName);
257 if(detailList.size()==0){
258 generateEmptyGraph("SSHS",WORST,condition);
259 generateEmptyGraph("SSHS",BEST,condition);
260 generateEmptyGraph("SSHH",WORST,condition);
261 generateEmptyGraph("SSHH",BEST,condition);
262 }
263 else{
264 alist = (ArrayList)detailList.get(0);
265 idList = (String[])detailList.get(1);
266 metadataNameList = (String[])detailList.get(2);
267 new_dataList = transformDataList(alist,idList);
268
269 WriteHTML("SSHS",new_dataList,idList,metadataNameList,WORST,title3,condition);
270 WriteHTML("SSHS",new_dataList,idList,metadataNameList,BEST,title3,condition);
271
272 //-----SSHH
273 detailList = getHideFullDocumentDetail(alist,idList,metadataNameList);
274 if(detailList.size()==0){
275 generateEmptyGraph("SSHH",WORST,condition);
276 generateEmptyGraph("SSHH",BEST,condition);
277 }
278 else{
279 alist = (ArrayList)detailList.get(0);
280 idList = (String[])detailList.get(1);
281 metadataNameList = (String[])detailList.get(2);
282 new_dataList = transformDataList(alist,idList);
283
284 WriteHTML("SSHH",new_dataList,idList,metadataNameList,WORST,title3+title4,condition);
285 WriteHTML("SSHH",new_dataList,idList,metadataNameList,BEST,title3+title4,condition);
286 }
287 }
288
289
290 //-------------------SH
291 ArrayList xList = new ArrayList();
292 String[] idListCopy;
293 String[] metadataNameListCopy;
294
295 //-----SHSS
296 detailList = getHideFullMetadataDetail(dataList,id,metadataName);
297
298 if(detailList.size()==0){
299 generateEmptyGraph("SHSS",WORST,condition);
300 generateEmptyGraph("SHSS",BEST,condition);
301
302 generateEmptyGraph("SHSH",WORST,condition);
303 generateEmptyGraph("SHSH",BEST,condition);
304
305 generateEmptyGraph("SHHS",WORST,condition);
306 generateEmptyGraph("SHHS",BEST,condition);
307
308 generateEmptyGraph("SHHH",WORST,condition);
309 generateEmptyGraph("SHHH",BEST,condition);
310 }
311 else{
312 alist = (ArrayList)detailList.get(0);
313 idList = (String[])detailList.get(1);
314 metadataNameList = (String[])detailList.get(2);
315
316 xList = (ArrayList)alist.clone();
317 idListCopy = (String[]) idList.clone();
318 metadataNameListCopy = (String[])metadataNameList.clone();
319 new_dataList = transformDataList(alist,idList);
320
321 WriteHTML("SHSS",new_dataList,idList,metadataNameList,WORST,title2,condition);
322 WriteHTML("SHSS",new_dataList,idList,metadataNameList,BEST,title2,condition);
323
324 //-----SHHS
325 detailList = getHideEmptyDocumentDetail((ArrayList)xList.clone(),(String[])idListCopy.clone(),(String[])metadataNameListCopy.clone());
326
327 if(detailList.size()==0){
328 generateEmptyGraph("SHHS",WORST,condition);
329 generateEmptyGraph("SHHS",BEST,condition);
330
331 generateEmptyGraph("SHHH",WORST,condition);
332 generateEmptyGraph("SHHH",BEST,condition);
333 }
334 else{
335 alist = (ArrayList)detailList.get(0);
336 idList = (String[])detailList.get(1);
337 metadataNameList = (String[])detailList.get(2);
338 new_dataList = transformDataList(alist,idList);
339
340 WriteHTML("SHHS",new_dataList,idList,metadataNameList,WORST,title2+title3,condition);
341 WriteHTML("SHHS",new_dataList,idList,metadataNameList,BEST,title2+title3,condition);
342
343 //-----SHHH
344 detailList = getHideFullDocumentDetail(alist,idList,metadataNameList);
345
346 if(detailList.size()==0){
347 generateEmptyGraph("SHHH",WORST,condition);
348 generateEmptyGraph("SHHH",BEST,condition);
349 }
350 else{
351 alist = (ArrayList)detailList.get(0);
352 idList = (String[])detailList.get(1);
353 metadataNameList = (String[])detailList.get(2);
354 new_dataList = transformDataList(alist,idList);
355
356 WriteHTML("SHHH",new_dataList,idList,metadataNameList,WORST,title2+title3+title4,condition);
357 WriteHTML("SHHH",new_dataList,idList,metadataNameList,BEST,title2+title3+title4,condition);
358 }
359 }
360
361 //-----SHSH
362 detailList = getHideFullDocumentDetail((ArrayList)xList.clone(),(String[])idListCopy.clone(),(String[])metadataNameListCopy.clone());
363
364 if(detailList.size()==0){
365 generateEmptyGraph("SHSH",WORST,condition);
366 generateEmptyGraph("SHSH",BEST,condition);
367 }
368 else{
369 alist = (ArrayList)detailList.get(0);
370 idList = (String[])detailList.get(1);
371 metadataNameList = (String[])detailList.get(2);
372 new_dataList = transformDataList(alist,idList);
373
374 WriteHTML("SHSH",new_dataList,idList,metadataNameList,WORST,title2+title4,condition);
375 WriteHTML("SHSH",new_dataList,idList,metadataNameList,BEST,title2+title4,condition);
376 }
377 }
378
379 //-------------------HS
380 ArrayList xList1 = new ArrayList();
381 String[] idListCopy1;
382 String[] metadataNameListCopy1;
383
384 //-----HSSS
385 detailList = getHideEmptyMetadataDetail(dataList,id,metadataName);
386
387 if(detailList.size()==0){
388 generateEmptyGraph("HSSS",WORST,condition);
389 generateEmptyGraph("HSSS",BEST,condition);
390
391 generateEmptyGraph("HSHS",WORST,condition);
392 generateEmptyGraph("HSHS",BEST,condition);
393
394 generateEmptyGraph("HSHH",WORST,condition);
395 generateEmptyGraph("HSHH",BEST,condition);
396
397 generateEmptyGraph("HSSH",WORST,condition);
398 generateEmptyGraph("HSSH",BEST,condition);
399 }
400 else{
401 alist = (ArrayList)detailList.get(0);
402 idList = (String[])detailList.get(1);
403 metadataNameList = (String[])detailList.get(2);
404
405 xList1 = (ArrayList)alist.clone();
406 idListCopy1 = (String[]) idList.clone();
407 metadataNameListCopy1 = (String[])metadataNameList.clone();
408 new_dataList = transformDataList(alist,idList);
409
410 WriteHTML("HSSS",new_dataList,idList,metadataNameList,WORST,title1,condition);
411 WriteHTML("HSSS",new_dataList,idList,metadataNameList,BEST,title1,condition);
412
413 //-----HSHS
414 detailList = getHideEmptyDocumentDetail((ArrayList)xList1.clone(),(String[])idListCopy1.clone(),(String[])metadataNameListCopy1.clone());
415
416 if(detailList.size()==0){
417 generateEmptyGraph("HSHS",WORST,condition);
418 generateEmptyGraph("HSHS",BEST,condition);
419
420 generateEmptyGraph("HSHH",WORST,condition);
421 generateEmptyGraph("HSHH",BEST,condition);
422 }
423 else{
424 alist = (ArrayList)detailList.get(0);
425 idList = (String[])detailList.get(1);
426 metadataNameList = (String[])detailList.get(2);
427 new_dataList = transformDataList(alist,idList);
428
429 WriteHTML("HSHS",new_dataList,idList,metadataNameList,WORST,title1+title3,condition);
430 WriteHTML("HSHS",new_dataList,idList,metadataNameList,BEST,title1+title3,condition);
431
432 //-----HSHH
433 detailList = getHideFullDocumentDetail(alist,idList,metadataNameList);
434 if(detailList.size()==0){
435 generateEmptyGraph("HSHH",WORST,condition);
436 generateEmptyGraph("HSHH",BEST,condition);
437 }
438 else{
439 alist = (ArrayList)detailList.get(0);
440 idList = (String[])detailList.get(1);
441 metadataNameList = (String[])detailList.get(2);
442 new_dataList = transformDataList(alist,idList);
443
444 WriteHTML("HSHH",new_dataList,idList,metadataNameList,WORST,title1+title3+title4,condition);
445 WriteHTML("HSHH",new_dataList,idList,metadataNameList,BEST,title1+title3+title4,condition);
446 }
447 }
448
449 //-----HSSH
450 detailList = getHideFullDocumentDetail((ArrayList)xList1.clone(),(String[])idListCopy1.clone(),(String[])metadataNameListCopy1.clone());
451
452 if(detailList.size()==0){
453 generateEmptyGraph("HSSH",WORST,condition);
454 generateEmptyGraph("HSSH",BEST,condition);
455 }
456 else{
457 alist = (ArrayList)detailList.get(0);
458 idList = (String[])detailList.get(1);
459 metadataNameList = (String[])detailList.get(2);
460 new_dataList = transformDataList(alist,idList);
461
462 WriteHTML("HSSH",new_dataList,idList,metadataNameList,WORST,title1+title4,condition);
463 WriteHTML("HSSH",new_dataList,idList,metadataNameList,BEST,title1+title4,condition);
464 }
465 }
466
467 //-------------------HH
468 ArrayList xList2 = new ArrayList();
469 String[] idListCopy2;
470 String[] metadataNameListCopy2;
471
472 //-----HHSS
473 detailList = getHideEmptyMetadataDetail(dataList,id,metadataName);
474
475 if(detailList.size()==0){
476 generateEmptyGraph("HHSS",WORST,condition);
477 generateEmptyGraph("HHSS",BEST,condition);
478
479 generateEmptyGraph("HHHS",WORST,condition);
480 generateEmptyGraph("HHHS",BEST,condition);
481
482 generateEmptyGraph("HHHH",WORST,condition);
483 generateEmptyGraph("HHHH",BEST,condition);
484
485 generateEmptyGraph("HHSH",WORST,condition);
486 generateEmptyGraph("HHSH",BEST,condition);
487 }
488 else{
489 alist = (ArrayList)detailList.get(0);
490 idList = (String[])detailList.get(1);
491 metadataNameList = (String[])detailList.get(2);
492 detailList = getHideFullMetadataDetail(alist,idList,metadataNameList);
493
494 if(detailList.size()==0){
495 generateEmptyGraph("HHSS",WORST,condition);
496 generateEmptyGraph("HHSS",BEST,condition);
497
498 generateEmptyGraph("HHHS",WORST,condition);
499 generateEmptyGraph("HHHS",BEST,condition);
500
501 generateEmptyGraph("HHHH",WORST,condition);
502 generateEmptyGraph("HHHH",BEST,condition);
503
504 generateEmptyGraph("HHSH",WORST,condition);
505 generateEmptyGraph("HHSH",BEST,condition);
506 }
507 else{
508 alist = (ArrayList)detailList.get(0);
509 idList = (String[])detailList.get(1);
510 metadataNameList = (String[])detailList.get(2);
511
512 xList2 = (ArrayList)alist.clone();
513 idListCopy2 = (String[]) idList.clone();
514 metadataNameListCopy2 = (String[])metadataNameList.clone();
515 new_dataList = transformDataList(alist,idList);
516
517 WriteHTML("HHSS",new_dataList,idList,metadataNameList,WORST,title1+title2,condition);
518 WriteHTML("HHSS",new_dataList,idList,metadataNameList,BEST,title1+title2,condition);
519
520 //----- HHSH
521 detailList = getHideFullDocumentDetail((ArrayList)xList2.clone(),(String[])idListCopy2.clone(),(String[])metadataNameListCopy2.clone());
522
523 if(detailList.size()==0){
524 generateEmptyGraph("HHSH",WORST,condition);
525 generateEmptyGraph("HHSH",BEST,condition);
526 }
527 else{
528 alist = (ArrayList)detailList.get(0);
529 idList = (String[])detailList.get(1);
530 metadataNameList = (String[])detailList.get(2);
531 new_dataList = transformDataList(alist,idList);
532
533 WriteHTML("HHSH",new_dataList,idList,metadataNameList,WORST,title1+title2+title4,condition);
534 WriteHTML("HHSH",new_dataList,idList,metadataNameList,BEST,title1+title2+title4,condition);
535 }
536 //-----HHHS
537 detailList = getHideEmptyDocumentDetail((ArrayList)xList2.clone(),(String[])idListCopy2.clone(),(String[])metadataNameListCopy2.clone());
538
539 if(detailList.size()==0){
540 generateEmptyGraph("HHHS",WORST,condition);
541 generateEmptyGraph("HHHS",BEST,condition);
542 generateEmptyGraph("HHHH",WORST,condition);
543 generateEmptyGraph("HHHH",BEST,condition);
544 }
545 else{
546 alist = (ArrayList)detailList.get(0);
547 idList = (String[])detailList.get(1);
548 metadataNameList = (String[])detailList.get(2);
549 new_dataList = transformDataList(alist,idList);
550
551 WriteHTML("HHHS",new_dataList,idList,metadataNameList,WORST,title1+title2+title3,condition);
552 WriteHTML("HHHS",new_dataList,idList,metadataNameList,BEST,title1+title2+title3,condition);
553
554 //-----HHHH
555 detailList = getHideFullDocumentDetail((ArrayList)alist.clone(),(String[])idList.clone(),(String[])metadataNameList.clone());
556
557 if(detailList.size()==0){
558 generateEmptyGraph("HHHH",WORST,condition);
559 generateEmptyGraph("HHHH",BEST,condition);
560 }
561 else{
562 alist = (ArrayList)detailList.get(0);
563 idList = (String[])detailList.get(1);
564 metadataNameList = (String[])detailList.get(2);
565 new_dataList = transformDataList(alist,idList);
566
567 WriteHTML("HHHH",new_dataList,idList,metadataNameList,WORST,title1+title2+title3+title4,condition);
568 WriteHTML("HHHH",new_dataList,idList,metadataNameList,BEST,title1+title2+title3+title4,condition);
569 }
570 }
571 }
572 }
573 }
574
575 private ArrayList transformDataList(ArrayList list, String[] ids){
576
577 ArrayList wholeList = new ArrayList();
578
579 for(int i = 0; i< ids.length; i++){
580 ArrayList idList = new ArrayList();
581 for(int j = 0; j<list.size();j++){
582 int[] datarows = (int[]) list.get(j);
583 idList.add(new Integer (datarows[i]));
584 }
585 wholeList.add(idList);
586 }
587
588 for(int i = 0; i< wholeList.size(); i++){
589 ArrayList alist = (ArrayList)wholeList.get(i);
590 int[] rows = new int[alist.size()];
591 for(int j = 0; j< alist.size(); j++){
592 rows[j] = ((Integer)alist.get(j)).intValue();
593 }
594 wholeList.remove(i);
595 wholeList.add(i,rows);
596 }
597 return wholeList;
598 }
599
600 private void generateEmptyGraph(String fileName,String condition,String suffix){
601
602 String suf = suffix;
603
604 if(!suffix.equalsIgnoreCase("dublin")){
605 suf = "other";
606 }
607
608 String cases = condition;
609 String collectionFolder = mds.getCollectionName();
610
611 try{
612 FileWriter fstream = new FileWriter(destination+"/"+suf+"_"+fileName+"_"+cases+".html");
613 BufferedWriter out = new BufferedWriter(fstream);
614
615 out.write("<!-- This comment keeps IE6/7 in the reliable quirks mode -->\r\n");
616 out.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\r\n");
617 out.write("<html>\r\n");
618 out.write("<head>\r\n<title> No Available Chart</title>\r\n");
619 out.write("<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n");
620 out.write("<link rel=\"stylesheet\" href=\"../script/doby2.css\" type=\"text/css\"/>\r\n");
621 out.write("</head>\r\n");
622 out.write("<p><a href=\" Overall.html \">Summary</a></p>");
623 out.write("<body>\r\n");
624 out.write("<p>No data available to render chart.</p>\r\n");
625 out.write("<p>Reason: Document number is zero or Metadata element number is zero </p>\r\n");
626 out.write("</body></html>\r\n");
627 out.close();
628 fstream.close();
629
630 }catch(Exception ex){
631 ex.printStackTrace();
632 }
633 }
634
635 private void WriteHTML(String fileName, ArrayList dataset, String[] ids, String[] metadataName, String condition, String title, String suffix){
636
637 int blueDot = 0;
638 String cases = condition;
639 String suf = suffix;
640
641 if(!suffix.equalsIgnoreCase("dublin")){
642 suf = "other";
643 }
644
645 try{
646 ArrayList tempList = mds.getMetadataNameList();
647 ArrayList urlIDList;
648
649 if(tempList.contains("dc.Identifier") && linkIdentifier.equals("dc.Identifier")){
650 urlIDList = dm.getURLMap("dc.Identifier");
651 }
652 else if(tempList.contains("nzir_internal.Link") && linkIdentifier.equals("nzir_internal.Link")){
653 urlIDList = dm.getURLMap("nzir_internal.Link");
654 }
655 else{
656 urlIDList = new ArrayList();
657 }
658
659 String collectionFolder = mds.getCollectionName();
660 FileWriter fstream = new FileWriter(destination+"/"+suf+"_"+fileName+"_"+cases+".html");
661 BufferedWriter out = new BufferedWriter(fstream);
662
663 out.write("<!-- This comment keeps IE6/7 in the reliable quirks mode -->\r\n");
664 out.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\r\n");
665 out.write("<html>\r\n");
666 out.write("<head>\r\n<title>"+title+"</title>\r\n");
667 out.write("<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n");
668 out.write("<link rel=\"stylesheet\" href=\"../script/doby2.css\" type=\"text/css\"/>\r\n");
669 out.write("<script type=\"text/javascript\" src=\"../script/getInfomation.js\"></script>\r\n");
670 out.write("<script type=\"text/javascript\" src=\"http://yui.yahooapis.com/2.4.1/build/yahoo/yahoo-min.js\"></script>\r\n");
671 out.write("<script type=\"text/javascript\" src=\"http://yui.yahooapis.com/2.4.1/build/event/event-min.js\"></script>\r\n");
672 out.write("<script type=\"text/javascript\" src=\"http://yui.yahooapis.com/2.4.1/build/connection/connection-min.js\"></script>\r\n");
673 out.write("</head>\r\n");
674 out.write ("<p><a href=\" Overall.html \">Summary</a></p>");
675 out.write("<body id=\""+mds.getCollectionName()+"\" onLoad=\"reconfig()\">\r\n");
676 out.write("<div id=\"container\">\r\n");
677 out.write("<div class=\"tableContainer\">\r\n");
678 out.write ("<table cellspacing=\"0\">\r\n");
679 out.write ("<thead>\r\n");
680 out.write ("<tr>\r\n");
681 out.write("<td class=\"qh\">Info\r\n");
682 out.write("<td class=\"qh\">URL\r\n");
683
684 for(int a = 0; a< metadataName.length; a++){
685 out.write("<td>"+ metadataName[a]+"\r\n");
686 }
687
688 out.write ("</thead>\r\n<tfoot>\r\n<tr>\r\n");
689 out.write("<td class=\"qh\">&nbsp;\r\n");
690 out.write("<td class=\"qh\">&nbsp;\r\n");
691
692 for(int a = 0; a< metadataName.length; a++){
693 out.write("<td>"+ dm.Mean(metadataName[a])+"%\r\n");
694 }
695
696 out.write ("</tfoot>\r\n<tbody>\r\n");
697
698 if(cases.equals("normal")){
699 for(int i = ids.length; i>0; i--){
700 out.write ("<tr>");
701 int[] datarows = (int[])dataset.get(i-1);
702
703 for(int j = 0; j<datarows.length; j++){
704 if(datarows[j]==1){
705 out.write ("<td class=\"b\">");
706 blueDot++;
707 }
708 else {
709 out.write ("<td>&nbsp;");
710 }
711 }
712 }
713 }
714 else{
715 ArrayList idList = new ArrayList();
716
717 for(int i = 0; i<ids.length;i++){
718 idList.add(ids[i]);
719 }
720
721 HashMap hp = new HashMap();
722
723 for(int i = ids.length; i>0; i--){
724 String idValue = ids[i-1];
725 int dots = 0;
726 int[] datarows = (int[])dataset.get(i-1);
727 for(int j = 0; j<datarows.length; j++){
728 if(datarows[j]==1){
729 dots++;
730 }
731 }
732 hp.put(idValue,new Integer(dots));
733 }
734
735 ArrayList alist = dm.sortMap(hp);
736 String[] idIntValue = new String[alist.size()];
737
738 for(int i = 0; i< alist.size(); i++){
739 Map.Entry entry = (Map.Entry) alist.get(i);
740 String idElement = ((String) entry.getKey());
741 idIntValue[i] = idElement;
742 }
743
744 if(cases.equals("worst")){
745
746 for(int i = 0; i<idIntValue.length; i++){
747 int value = idList.indexOf(idIntValue[i]);
748 int[] datarows = (int[])dataset.get(value);
749
750 out.write ("<tr id=\""+idIntValue[i].substring(4)+"\">\r\n");
751 out.write("<td class=\"E\" onclick=\"GD(this)\">&nbsp;\r\n");
752
753 if(urlIDList.contains(idIntValue[i])){
754 out.write("<td class=\"qh\" onclick=\"GX(this)\"><span title=\"open URL in new window\">open</span>\r\n");
755 }
756 else{
757 out.write("<td>\r\n");
758 }
759
760 for(int j = 0; j<datarows.length; j++){
761 if(datarows[j]==1){
762 out.write ("<td class=\"b\">\r\n");
763 blueDot++;
764 }
765 else {
766 out.write ("<td class=\"w\">\r\n");
767 }
768 }
769 }
770 }
771
772 if(cases.equals("best")){
773
774 for(int i = idIntValue.length; i>0; i--){
775 int value = idList.indexOf(idIntValue[i-1]);
776 int[] datarows = (int[])dataset.get(value);
777
778 out.write ("<tr id=\""+idIntValue[i-1].substring(4)+"\">\r\n");
779 out.write("<td class=\"E\" onclick=\"GD(this)\">&nbsp;\r\n");
780
781 if(urlIDList.contains(idIntValue[i-1])){
782 out.write("<td class=\"qh\" onclick=\"GX(this)\"><span title=\"open URL in new window\">open</span>\r\n");
783 }
784 else{
785 out.write("<td>\r\n");
786 }
787
788 for(int j = 0; j<datarows.length; j++){
789 if(datarows[j]==1){
790 out.write ("<td class=\"b\">\r\n");
791 blueDot++;
792 }
793 else {
794 out.write ("<td class=\"w\">\r\n");
795 }
796 }
797 }
798 }
799 }
800
801 out.write ("</tbody>\r\n");
802 out.write ("</table>\r\n</div></div>\r\n");
803
804 int t1 = ids.length;
805 int t3 = metadataName.length;
806 int t4 = t1*t3;
807 HashMap tempMap = mds.getMetadataSetMap();
808 MetadataSet ms = (MetadataSet)tempMap.get(suffix);
809
810 out.write("<table>\r\n<tbody class=\"table1\">");
811 out.write("<tr>\r\n");
812 out.write("<td class=\"bfont\">This subset shows "+t1+" out of "+mds.getDocNum()+" documents");
813 out.write("<td class=\"bfont\">"+blueDot+" out of "+(mds.getDocNum()*t3)+" metadata items are defined");
814 out.write("<tr>\r\n");
815 out.write("<td class=\"bfont\">This subset shows "+t3+" out of "+(ms.getIndexsList().size())+" metadata elements");
816 out.write("<td class=\"bfont\">Subset completeness: "+dm.round((double)(blueDot*100/t4),5)+"%");
817 out.write("</table>\r\n");
818 out.write ("<p> <a href=\"Overall.html \">Summary</a></p>");
819 out.write ("</body></html>");
820 out.close();
821 fstream.close();
822
823 }catch (Exception e){//Catch exception if any
824 e.printStackTrace();
825 }
826 }
827
828 public void generateOverallStatisticsPage(HashMap MetadataSetMap){
829
830 String fileName = "Overall";
831 ArrayList wholeList = new ArrayList();
832 HashMap hp = mds.getMetadataSetMap();
833 Collection c = hp.values();
834 Iterator i = c.iterator();
835 int counter = 0;
836 String[][] MetadataData = new String[c.size()][2];
837
838 while(i.hasNext()){
839 MetadataSet mds = (MetadataSet)i.next();
840 wholeList.add(mds);
841 ArrayList newMDS = new ArrayList();
842 newMDS.add(mds);
843 MetadataData[counter][0] = mds.getName();
844 MetadataData[counter][1] = dm.getSingleMetadataSetCompleteness(newMDS)+"%";
845 counter++;
846 }
847
848 try{
849 String str = sdf.format(cl.getTime());
850 String collectionFolder = mds.getCollectionName();
851 FileWriter fstream = new FileWriter(destination+"/"+fileName+".html");
852 BufferedWriter out = new BufferedWriter(fstream);
853
854 out.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">");
855 out.write("<html>\r\n");
856 out.write("<head><title>Summary</title>\r\n");
857 out.write("<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n");
858 out.write("<style type=\"text/css\">\r\n");
859 out.write(".tam{height:20; text-align:right}\r\n");
860 out.write("th{height:20; text-align:left}\r\n");
861 out.write("body{font-family: Arial;}\r\n");
862 out.write("</style>\r\n");
863 out.write("<script type=\"text/javascript\" src=\"../script/status3.js\"></script>\r\n");
864 out.write("</head>\r\n");
865 out.write("<body>\r\n");
866 out.write ("<td> <div style=\"float:left;\"> <a href=\"http://www.nzdl.org/greenstone3/mat\">Mat Home</a></div>");
867 out.write("<div style=\"float:right;\"><a href=\"http://chnm.gmu.edu/tools/surveys/4386/\">Please send feedback about the Mat tool</a></div></td><br>");
868 out.write("<h3 align=\"center\">Summary</h3>\r\n");
869 out.write("<table border=1 align=\"center\">\r\n");
870 out.write("<colgroup width=\"300\" span=\"2\">\r\n");
871 out.write("<tr>\r\n");
872 out.write("<th>OAI URL: </th>\r\n");
873
874 if(mds.getOAIURL().length()>50){
875 out.write("<td class=\"tam\"><font size=\"2px\"><a href=\""+mds.getOAIURL()+"\">"+mds.getOAIURL().substring(0,47)+"....</a></font>\r\n");
876 }
877 else{
878 out.write("<td class=\"tam\"><font size=\"2px\"><a href=\""+mds.getOAIURL()+"\">"+mds.getOAIURL()+"</a></font>\r\n");
879 }
880 out.write("</tr>\r\n");
881 out.write("<tr>\r\n");
882 out.write("<th>Number of Records:\r\n");
883 out.write("<td class=\"tam\">"+mds.getDocNum()+"\r\n");
884 out.write("</tr>\r\n");
885 out.write("<table border=1 align=\"center\">\r\n");
886 out.write("<colgroup width=\"300\" span=\"2\">\r\n");
887 out.write("<tr>\r\n");
888 out.write("<th>Metadata:\r\n");
889 out.write("<td class=\"tam\"><b>Completeness</b>\r\n");
890 out.write("</tr><br>\r\n");
891
892 for(int a = 0; a<MetadataData.length; a++){
893 out.write ("<tr>\r\n");
894
895 if(MetadataData[a][0].equalsIgnoreCase("dublin")){
896 out.write ("<th><a href =\""+MetadataData[a][0]+".html\">"+metadataSet1+"</a>");
897 }
898
899 else if (MetadataData[a][0].equalsIgnoreCase("extracted")){
900 out.write ("<th><a href =\""+MetadataData[a][0]+".html\">"+metadataSet2+"</a>");
901 }
902 else{
903 out.write ("<th><a href =\""+MetadataData[a][0]+".html\">"+MetadataData[a][0]+"</a>");
904 }
905
906 out.write ("<td class=\"tam\"> "+MetadataData[a][1]);
907 out.write ("</tr>\r\n");
908 }
909
910 out.write ("</table>");
911 out.write("<br>\r\n");
912 out.write("<FORM name=\"test\" onsubmit=\"checkStatus()\" action=\"\">");
913 out.write("<table border=1 align=\"center\">\r\n");
914 out.write ("<colgroup width=\"606\" span=\"1\">\r\n");
915 out.write("<tr>\r\n<th>Customize Visualization");
916 out.write("<tr>\r\n<td><LABEL FOR=\"H1\"><INPUT align=\"left\" TYPE=\"checkbox\" ID=\"H1\">Hide Empty Metadata Elements</LABEL>");
917 out.write("<tr>\r\n<td><LABEL FOR=\"H2\"><INPUT align=\"left\" TYPE=\"checkbox\" ID=\"H2\">Hide Completed Metadata Elements</LABEL>");
918 out.write("<tr>\r\n<td><LABEL FOR=\"H3\"><INPUT align=\"left\" TYPE=\"checkbox\" ID=\"H3\">Hide Documents with Empty Metadata Elements</LABEL>");
919 out.write("<tr>\r\n<td><LABEL FOR=\"H4\"><INPUT align=\"left\" TYPE=\"checkbox\" ID=\"H4\">Hide Documents with Completed Metadata Elements</LABEL>");
920 out.write("<tr>\r\n<th>Metadata: ");
921
922 if(MetadataData.length==1){
923 if(MetadataData[0][0].equalsIgnoreCase("dublin")){
924 out.write("<tr>\r\n<td><LABEL FOR=\"C1\"><INPUT align=\"left\" TYPE=\"radio\" name = \"col\" ID=\"C1\" checked> Dublin Core </LABEL>");
925 }
926 else {
927 out.write("<tr>\r\n<td><LABEL FOR=\"C4\"><INPUT align=\"left\" TYPE=\"radio\" name = \"col\" ID=\"C4\" checked>"+MetadataData[0][0]+"</LABEL>");
928 }
929 }
930 else{
931 for(int a = 0; a<MetadataData.length; a++){
932 if(a==0){
933 if(MetadataData[a][0].equalsIgnoreCase("dublin")){
934 out.write("<tr>\r\n<td><LABEL FOR=\"C1\"><INPUT align=\"left\" TYPE=\"radio\" name = \"col\" ID=\"C1\" checked>Dublin Core</LABEL>");
935 }
936 else {
937 out.write("<tr>\r\n<td><LABEL FOR=\"C4\"><INPUT align=\"left\" TYPE=\"radio\" name = \"col\" ID=\"C4\" checked>"+MetadataData[a][0]+"</LABEL>");
938 }
939 }
940 else{
941 if(MetadataData[a][0].equalsIgnoreCase("dublin")){
942 out.write("<tr>\r\n<td><LABEL FOR=\"C1\"><INPUT align=\"left\" TYPE=\"radio\" name = \"col\" ID=\"C1\">"+MetadataData[a][0]+"</LABEL>");
943 }
944 else{
945 out.write("<tr>\r\n<td><LABEL FOR=\"C4\"><INPUT align=\"left\" TYPE=\"radio\" name = \"col\" ID=\"C4\">"+MetadataData[a][0]+"</LABEL>");
946 }
947 }
948 }
949 }
950
951 out.write("<tr>\r\n<th>Order By Completeness : ");
952 out.write("<tr>\r\n<td><LABEL FOR=\"R1\"><INPUT align=\"left\" TYPE=\"radio\" name = \"order\" ID=\"R1\">Best Case to Worst Case</LABEL>");
953 out.write("<tr>\r\n<td><LABEL FOR=\"R2\"><INPUT align=\"left\" TYPE=\"radio\" name = \"order\" ID=\"R2\" checked >Worst Case to Best Case</LABEL>");
954 out.write("</table><p align=\"center\"><INPUT TYPE=\"button\" VALUE=\"Show Visualization\" onClick=\"checkStatus()\"> </FORM>");
955 out.write("<p align=\"center\">"+str+"</p>");
956 out.write("</body></html>");
957
958 out.close();
959 fstream.close();
960 }catch (Exception e){//Catch exception if any
961 e.printStackTrace();
962 }
963 }
964
965
966 public void WriteMetadataSetDetailHTML(MetadataSet mdset){
967
968 String fileName = mdset.getName();
969
970 try{
971 String str = sdf.format(cl.getTime());
972 String collectionFolder = mds.getCollectionName();
973 FileWriter fstream = new FileWriter(destination+"/"+fileName+".html");
974 BufferedWriter out = new BufferedWriter(fstream);
975
976 out.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">");
977 out.write("<html>\r\n");
978 out.write("<head>\r\n<title>Metadata Detail</title>\r\n");
979 out.write("<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n");
980 out.write("<style type=\"text/css\">\r\n");
981 out.write("td{height:20;text-align:right;}\r\n");
982 out.write("th{height:20;text-align:left;}\r\n");
983 out.write("body{font-family: Arial;}\r\n");
984 out.write("</style>\r\n");
985 out.write( "</head>\r\n");
986 out.write ("<body>\r\n");
987 out.write ("<td> <div style=\"float:left;\"> <a href=\" Overall.html \">Summary</a></div>");
988 out.write("<div style=\"float:right;\"><a href=\"http://chnm.gmu.edu/tools/surveys/4386/\">Please send feedback about the Mat tool</a></div></td><br>");
989
990 if(fileName.equalsIgnoreCase("dublin")){
991 out.write ("<h3 align=\"center\">Metadata Detail: "+metadataSet1+"</h3>\r\n");
992 }
993 else if (fileName.equalsIgnoreCase("extracted")){
994 out.write ("<h3 align=\"center\">Metadata Detail: "+metadataSet2+"</h3>\r\n");
995 }
996 else{
997 out.write ("<h3 align=\"center\">Metadata Detail: "+fileName+"</h3>\r\n");
998 }
999
1000 out.write ("<table border=\"1\" align=\"center\">\r\n");
1001 out.write ("<colgroup width=\"200\" span=\"2\">\r\n");
1002 out.write ("<tr>\r\n");
1003 out.write ("<th>Elements:\r\n");
1004 out.write ("<td><b>Completeness</b>\r\n");
1005 out.write ("</tr>\r\n");
1006
1007 ArrayList elementList = mdset.getIndexsList();
1008 HashMap hp = new HashMap();
1009 int num = elementList.size();
1010
1011 for(int i = 0; i<num; i++){
1012 String elementName = (String)elementList.get(i);
1013 Double elementValue = new Double(dm.Mean(elementName));
1014 hp.put(elementName, elementValue);
1015 }
1016
1017 elementList = new ArrayList();
1018 elementList = dm.sortMap(hp);
1019 num = elementList.size();
1020
1021 for(int i = 0; i<num; i++){
1022 Map.Entry entry = (Map.Entry) elementList.get(i);
1023 String elementName = (String) entry.getKey();
1024
1025 out.write ("<tr>\r\n");
1026 out.write ("<th><a href=\""+ elementName +".html\">"+elementName+"</a>\r\n");
1027 out.write ("<td>"+ dm.Mean(elementName)+"%\r\n");
1028 out.write ("</tr>\r\n");
1029 }
1030
1031 out.write("</table>\r\n");
1032 out.write ("<p align=\"center\"> <a href=\" Overall.html \">Summary</a></p>");
1033 out.write("<p align=\"center\">"+str+"</p>");
1034 out.write ("</body></html>\r\n");
1035
1036 out.close();
1037 fstream.close();
1038 }catch (Exception e){//Catch exception if any
1039 e.printStackTrace();
1040 }
1041 }
1042
1043 public void generateMetadataElementDetailPage(MetadataSet mds){
1044
1045 ArrayList nameList = mds.getIndexsList();
1046
1047 for(int i = 0; i<nameList.size();i++){
1048 WriteMetadataElementDetailHTML((String)nameList.get(i),mds.getName());
1049 }
1050 }
1051
1052 public void WriteMetadataElementDetailHTML(String name, String linkName){
1053
1054 String fileName = name;
1055
1056 try{
1057 String collectionFolder = mds.getCollectionName();
1058
1059 FileWriter fstream = new FileWriter(destination+"/"+fileName+".html");
1060 BufferedWriter out = new BufferedWriter(fstream);
1061
1062 out.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">");
1063 out.write ("<html>\r\n");
1064 out.write("<head>\r\n<title> "+ name +" </title>\r\n");
1065 out.write("<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n");
1066 out.write("<style type=\"text/css\">\r\n");
1067 out.write("td{height:20; text-align:left;}\r\n");
1068 out.write(".tam{height:20; text-align:center}\r\n");
1069 out.write("th{height:20; text-align:left;}\r\n");
1070 out.write("body{font-family: Arial;}\r\n");
1071 out.write("</style>\r\n");
1072 out.write("<script type=\"text/javascript\" src=\"http://www.nzdl.org/greenstone3/mat/script/status3.js\"></script>");
1073 out.write("</head>\r\n");
1074 out.write("<body>\r\n");
1075 out.write ("<td> <div style=\"float:left;\"> <a href=\" Overall.html \">Summary</a></div>");
1076 out.write("<div style=\"float:right;\"><a href=\"http://chnm.gmu.edu/tools/surveys/4386/\">Please send feedback about the Mat tool</a></div></td>");
1077 out.write("&raquo;");
1078
1079 if(linkName.equalsIgnoreCase("dublin")){
1080 out.write ("<a href=\""+linkName+".html\">Metadata Detail ("+metadataSet1+")</a>");
1081 }
1082 else if (linkName.equalsIgnoreCase("extracted")){
1083 out.write ("<a href=\""+linkName+".html\">Metadata Detail ("+metadataSet2+")</a>");
1084 }
1085 else{
1086 out.write ("<a href=\""+linkName+".html\">Metadata Detail ("+linkName+")</a>");
1087 }
1088
1089 if(name.startsWith("dc.")){
1090 int dot = name.lastIndexOf('.');
1091 dot++;
1092 String nameReplace = name.substring(dot);
1093
1094 if(url.containsKey(nameReplace)){
1095 out.write ("<h3 align=\"center\">Metadata Element Detail:<a href=\""+ url.get(nameReplace) +"\">"+ name +"</a></h3>\r\n");
1096 }
1097 else{
1098 out.write ("<h3 align=\"center\">Metadata Element Detail: "+ name +"</a></h3>\r\n");
1099 }
1100 }
1101
1102 else{
1103 out.write ("<h3 align=\"center\">Metadata Element Detail: "+ name +"</h3>\r\n");
1104 }
1105
1106 out.write ("<table border=\"1\" align=\"center\" width=1000>\r\n");
1107 out.write ("<colgroup width=\"500\" span=\"2\">\r\n");
1108 out.write ("<tr>\r\n");
1109 out.write ("<td><b>Total Number of Records</b>\r\n");
1110 out.write ("<td class=\"tam\">"+mds.getDocNum()+"\r\n");
1111 out.write ("</tr>\r\n");
1112 out.write ("<tr>\r\n");
1113 out.write ("<td><b>Unique Values</b>\r\n");
1114 out.write ("<td class=\"tam\">"+dm.getDistinctNumber(name)+"\r\n");
1115 out.write ("</tr>\r\n");
1116 out.write ("<tr>\r\n");
1117 out.write ("<td><b>Total times element used</b>\r\n");
1118 out.write ("<td class=\"tam\">"+dm.getFrequency(name) +"\r\n");
1119 out.write ("</tr>\r\n");
1120 out.write ("<tr>\r\n");
1121 out.write ("<td><b>No. of records containing element</b>\r\n");
1122 out.write ("<td class=\"tam\"> "+dm.getDocumentUsedElement(name)+"\r\n");
1123 out.write ("</tr>\r\n");
1124 out.write ("<tr>\r\n");
1125
1126 double percentage = dm.Mean(name);
1127
1128 out.write ("<th title = \"The completeness means the arithmetic average \"> <div style=\"float:left;\"> Completeness</div>");
1129 out.write ("<td class=\"tam\"> "+dm.Mean(name) +"%\r\n");
1130 out.write ("</tr>\r\n");
1131 out.write ("<tr>\r\n");
1132 out.write ("<td><b><div style=\"float:left;\">Minimum "+name +" usage in any record</div></b><div style=\"float:right;\"> <a href=\"#\" onClick=\"helpWindow('Minimum','"+name+"')\" style=\"font-size:small;\">What's this?</a></div>\r\n");
1133 out.write ("<td class=\"tam\"> "+dm.getMinRange(name) +"\r\n");
1134 out.write ("</tr>\r\n");
1135 out.write ("<tr>\r\n");
1136 out.write ("<td><b><div style=\"float:left;\">Maximum "+name +" usage in any record</div></b><div style=\"float:right;\"> <a href=\"#\" onClick=\"helpWindow('Maximum','"+name+"')\" style=\"font-size:small;\">What's this?</a></div>\r\n");
1137 out.write ("<td class=\"tam\"> "+dm.getMaxRange(name) +"\r\n");
1138 out.write ("</tr>\r\n");
1139 out.write ("<tr>\r\n");
1140 out.write ("<td><b><div style=\"float:left;\">Average "+name +" usage/record</div></b><div style=\"float:right;\"> <a href=\"#\" onClick=\"helpWindow('Average','"+name+"')\" style=\"font-size:small;\">What's this?</a></div>\r\n");
1141 out.write ("<td class=\"tam\"> "+dm.Average(name) +"\r\n");
1142 out.write ("</tr>\r\n");
1143 out.write ("<tr>\r\n");
1144 out.write ("<td><b><div style=\"float:left;\">Mode of "+name +" usage/record</div></b><div style=\"float:right;\"><a href=\"#\" onClick=\"helpWindow('Mode','"+name+"')\" style=\"font-size:small;\">What's this?</a></div>\r\n");
1145 out.write ("<td class=\"tam\"> "+dm.getMode(name) +"\r\n");
1146 out.write ("</tr>\r\n");
1147 out.write ("<tr>\r\n");
1148 out.write ("<td><b><div style=\"float:left;\">Coverage of the mode of "+name +" usage/record</div></b><div style=\"float:right;\"><a href=\"#\" onClick=\"helpWindow('Mode Frequency','"+name+"')\" style=\"font-size:small;\">What's this?</a></div>\r\n");
1149 out.write ("<td class=\"tam\">"+dm.ModeFrequency(name) +"%\r\n");
1150 out.write ("</tr>\r\n");
1151
1152 if(mds.getOaiPrefix().equalsIgnoreCase("oai_dc")){
1153
1154 HashMap suggestionMap = generateMetadataElementSortList(fileName,"ASCII",linkName);
1155
1156 if(suggestionMap.size()>0){
1157 boolean status = compareElement(fileName,collectionFolder,suggestionMap,linkName);
1158 if(status){
1159 out.write("<tr><td class=\"tam\"><a href =\""+fileName+"_Suggestion.html\">View Potential Duplicate List</a>");
1160 if(percentage<100){
1161 out.write ("<td class=\"tam\"><a href =\""+fileName+"_IncompletedList.html\">Records missing "+fileName+"</a></td>");
1162 createIncompletedList(fileName,linkName,collectionFolder);
1163 }
1164 else{
1165 out.write("<td class=\"tam\">No Records Missing "+ fileName);
1166 }
1167 }
1168 else{
1169 out.write("<tr><td class=\"tam\">No Potential Duplicates");
1170 if(percentage<100){
1171 out.write ("<td class=\"tam\"><a href =\""+fileName+"_IncompletedList.html\">Records missing "+fileName+"</a></td>");
1172 createIncompletedList(fileName,linkName,collectionFolder);
1173 }
1174 else{
1175 out.write("<td class=\"tam\">No Records Missing "+ fileName);
1176 }
1177 }
1178 }
1179 else{
1180 out.write("<tr><td class=\"tam\">No Potential Duplicates");
1181 if(percentage<100){
1182 out.write ("<td class=\"tam\"><a href =\""+fileName+"_IncompletedList.html\">Records missing "+fileName+"</a></td>");
1183 createIncompletedList(fileName,linkName,collectionFolder);
1184 }
1185 else{
1186 out.write("<td class=\"tam\">No Records Missing "+ fileName);
1187 }
1188 }
1189 generateMetadataElementSortList(fileName,"Frequency-based",linkName);
1190 }
1191 else{
1192
1193 HashMap suggestionMap = generateMetadataElementSortList(fileName,"ASCII",linkName);
1194
1195 if(suggestionMap.size()>0){
1196 boolean status = compareElement(fileName,collectionFolder,suggestionMap,linkName);
1197 if(status){
1198 out.write("<tr><td class=\"tam\"><a href =\""+fileName+"_Suggestion.html\">View Potential Duplicate List</a>");
1199 }
1200 else{
1201 out.write("<tr><td class=\"tam\">No Potential Duplicates");
1202 }
1203 }
1204 else{
1205 out.write("<tr><td class=\"tam\">No Potential Duplicates");
1206 }
1207
1208 if(percentage<100){
1209 out.write ("<td class=\"tam\"><a href =\""+fileName+"_IncompletedList.html\">Records missing "+fileName+"</a></td>");
1210 createIncompletedList(fileName,linkName,collectionFolder);
1211 }
1212 else{
1213 out.write("<td class=\"tam\">No Records Missing "+ fileName);
1214 }
1215
1216 generateMetadataElementSortList(fileName,"Frequency-based",linkName);
1217 }
1218
1219 out.write ("<tr>\r\n");
1220 out.write ("<td class=\"tam\"><a href =\""+fileName+"_Frequency-based.html\">"+"View Full Frequency Sorted list</a>" +
1221 " <td class=\"tam\"><a href =\""+fileName+"_ASCII.html\">"+"View Full ASCII Sorted list</a></td>");
1222 out.write ("</tr>\r\n");
1223 out.write ("</table><br>\r\n");
1224 out.write ("<table border=\"1\" align=\"center\" width=1000>\r\n");
1225 out.write ("<colgroup width=\"500\" span=\"2\">\r\n");
1226 out.write ("<tr>\r\n");
1227 out.write ("<th>ASCII-Based\r\n");
1228 out.write ("<th>First Five\r\n");
1229 out.write ("</tr>\r\n");
1230
1231 String[] temp = dm.getSortList(name,"ASCII");
1232 String[] temp2 = {"&nbsp;","&nbsp;","&nbsp;","&nbsp;","&nbsp;"};
1233 int length = 0;
1234
1235 if(temp.length>=5){
1236 length=5;
1237 }
1238 else if(temp.length<5){
1239 length = temp.length;
1240 }
1241
1242 for(int i =0; i<length; i++){
1243 temp2[i] = temp[i];
1244 }
1245
1246 int x = temp2.length;
1247 for(int a = 0; a<x; a++){
1248 out.write ("<tr>\r\n");
1249 if(!temp2[a].equals("&nbsp;")){
1250 out.write ("<th>"+(a+1)+"\r\n");
1251 }
1252 else{
1253 out.write ("<th>&nbsp;\r\n");
1254 }
1255
1256 if(temp2[a].startsWith("http://")){
1257 if(temp2[a].length()>60){
1258 out.write ("<td><a href = \""+temp2[a]+"\">"+temp2[a].substring(0, 60)+"...</a>");
1259 }
1260 else {
1261 out.write ("<td><a href = \""+temp2[a]+"\">"+temp2[a]+"</a>");
1262 }
1263 }
1264 else{
1265 char singleChar = temp2[a].charAt(0);
1266 if(temp2[a].length()>61 ){
1267 if(temp2[a].startsWith(" ") && ((int)singleChar!=65279)){
1268 temp2[a] = temp2[a].substring(1);
1269 out.write ("<td>"+spaceLeft+space+spaceRight+temp2[a].substring(0, 60)+"<a href=\""+fileName+"_ASCII.html#topFive\"> ... </a>\r\n");
1270 }
1271 else if (((int)singleChar==65279)){
1272 temp2[a] = temp2[a].substring(1);
1273 out.write ("<td>"+spaceLeft+oddChar+spaceRight+temp2[a].substring(0, 60)+"<a href=\""+fileName+"_ASCII.html#topFive\"> ... </a>\r\n");
1274 }
1275 else{
1276 out.write ("<td>"+temp2[a].substring(0, 60)+"<a href=\""+fileName+"_ASCII.html#topFive\"> ... </a>\r\n");
1277 }
1278 }
1279 else {
1280 if(temp2[a].startsWith(" ")&& ((int)singleChar!=65279)){
1281 temp2[a] = temp2[a].substring(1);
1282 out.write ("<td>"+spaceLeft+space+spaceRight+temp2[a]+"\r\n");
1283 }
1284 else if (((int)singleChar==65279)){
1285 temp2[a] = temp2[a].substring(1);
1286 out.write ("<td>"+spaceLeft+oddChar+spaceRight+temp2[a]+"\r\n");
1287 }
1288 else{
1289 out.write ("<td>"+temp2[a]+"\r\n");
1290 }
1291 }
1292 }
1293 out.write ("</tr>\r\n");
1294 }
1295
1296 String[] temp3 ={"&nbsp;","&nbsp;","&nbsp;","&nbsp;","&nbsp;"};
1297 length = 0;
1298 int start = temp.length;
1299
1300 if(temp.length>=5){
1301 length= 5;
1302 }
1303 else if(temp.length<5){
1304 length = temp.length;
1305 }
1306
1307 for(int i = length; i>0; i--){
1308 temp3[i-1] = temp[start-1];
1309 start--;
1310 }
1311
1312 out.write ("<tr>\r\n");
1313 out.write ("<th>......\r\n");
1314 out.write ("<th>Last Five\r\n");
1315 out.write ("</tr>\r\n");
1316
1317 int counter = temp.length;
1318 start = temp.length;
1319 x = temp3.length;
1320
1321 for(int a = 0; a<x; a++){
1322 out.write ("<tr>\r\n");
1323 if(!temp3[a].equals("&nbsp;")){
1324 out.write ("<th>"+(start-length+1+a));
1325 }
1326 else{
1327 out.write ("<th>&nbsp;");
1328 }
1329 if(temp3[a].startsWith("http://")){
1330 if(temp3[a].length()>60){
1331 out.write ("<td><a href = \""+temp3[a]+"\">"+temp3[a].substring(0, 60)+"...</a>");
1332 }
1333 else {
1334 out.write ("<td><a href = \""+temp3[a]+"\">"+temp3[a]+"</a>");
1335 }
1336 }
1337 else{
1338 char singleChar = temp3[a].charAt(0);
1339 if(temp3[a].length()>61){
1340 if(temp3[a].startsWith(" ") && (int)singleChar!=65279){
1341 temp3[a] = temp3[a].substring(1);
1342 out.write ("<td>"+spaceLeft+space+spaceRight+temp3[a].substring(0, 60)+"<a href=\""+fileName+"_ASCII.html#lastFive\"> ... </a>\r\n");
1343 }
1344 else if((int)singleChar==65279){
1345 temp3[a] = temp3[a].substring(1);
1346 out.write ("<td>"+spaceLeft+oddChar+spaceRight+temp3[a].substring(0, 60)+"<a href=\""+fileName+"_ASCII.html#lastFive\"> ... </a>\r\n");
1347 }
1348 else{
1349 out.write ("<td>"+temp3[a].substring(0, 60)+"<a href=\""+fileName+"_ASCII.html#lastFive\"> ... </a>\r\n");
1350 }
1351 }
1352 else{
1353 if(temp3[a].startsWith(" ")&& (int)singleChar!=65279){
1354 temp3[a] = temp3[a].substring(1);
1355 out.write ("<td>"+spaceLeft+space+spaceRight+temp3[a]+"\r\n");
1356 }
1357 else if((int)singleChar==65279){
1358 out.write ("<td>"+spaceLeft+oddChar+spaceRight+temp3[a]+"\r\n");
1359 }
1360 else{
1361 out.write ("<td>"+temp3[a]+"\r\n");
1362 }
1363 }
1364 out.write ("</tr>\r\n");
1365 }
1366 }
1367 out.write ("</table><br>\r\n");
1368 out.write ("<table border=\"1\" align=\"center\" width=1000>\r\n");
1369 out.write ("<colgroup width=\"500\" span=\"2\">\r\n");
1370 out.write ("<tr>\r\n");
1371 out.write ("<th>Frequency-Based:\r\n");
1372 out.write ("<th>First Five\r\n");
1373 out.write ("</tr>\r\n");
1374
1375 HashMap xMap = dm.getDistinctValueMap(name);
1376 temp = dm.getSortList(name,"Frequency-based");
1377 temp2 = new String[] {"&nbsp;","&nbsp;","&nbsp;","&nbsp;","&nbsp;"};
1378 length = 0;
1379
1380 if(temp.length>=5){
1381 length=5;
1382 }
1383 else if(temp.length<5){
1384 length = temp.length;
1385 }
1386
1387 for(int i =0; i<length; i++){
1388 temp2[i] = temp[i];
1389 }
1390
1391 ArrayList tempList = new ArrayList();
1392 for(int i =0; i<length; i++){
1393 if(((Integer)xMap.get(temp2[i])).intValue()==1){
1394 tempList.add(temp2[i]);
1395 }
1396 }
1397
1398 x = temp2.length;
1399 for(int a = 0; a<x; a++){
1400 out.write ("<tr>\r\n");
1401 if(!temp2[a].equals("&nbsp;")){
1402 char singleChar = temp2[a].charAt(0);
1403 out.write ("<th>"+(a+1)+". (No. of occurrences: "+((Integer)xMap.get(temp2[a])).toString()+")\r\n");
1404 if(temp2[a].startsWith("http://") && (int)singleChar != 65279){
1405 if(temp2[a].length()>61){
1406 out.write ("<td><a href = \""+temp2[a]+"\">"+temp2[a].substring(0, 60)+"...</a>");
1407 }
1408 else {
1409 out.write ("<td><a href = \""+temp2[a]+"\">"+temp2[a]+"</a>");
1410 }
1411 }
1412
1413 else{
1414 if(temp2[a].length()>61){
1415 if(temp2[a].startsWith(" ")){
1416 temp2[a] = temp2[a].substring(1);
1417 out.write ("<td>"+spaceLeft+space+spaceRight+temp2[a].substring(0, 60)+"<a href=\""+fileName+"_Frequency-based.html#topFive\"> ... </a>\r\n");
1418 }
1419 else if((int)singleChar == 65279){
1420 temp2[a] = temp2[a].substring(1);
1421 out.write ("<td>"+spaceLeft+oddChar+spaceRight+temp2[a].substring(0, 60)+"<a href=\""+fileName+"_Frequency-based.html#topFive\"> ... </a>\r\n");
1422 }
1423 else{
1424 out.write ("<td>"+temp2[a].substring(0, 60)+"<a href=\""+fileName+"_Frequency-based.html#topFive\"> ... </a>\r\n");
1425 }
1426 }
1427 else{
1428 if(temp2[a].startsWith(" ")){
1429 temp2[a] = temp2[a].substring(1);
1430 out.write ("<td>"+spaceLeft+space+spaceRight+temp2[a]+"\r\n");
1431 }
1432 else if((int)singleChar == 65279){
1433 temp2[a] = temp2[a].substring(1);
1434 out.write ("<td>"+spaceLeft+oddChar+spaceRight+temp2[a]+"\r\n");
1435 }
1436 else{
1437 out.write ("<td>"+temp2[a]+"\r\n");
1438 }
1439 }
1440 }
1441 }
1442 else{
1443 out.write ("<th>&nbsp;\r\n");
1444 out.write ("<td>\r\n");
1445 }
1446 out.write ("</tr>\r\n");
1447 }
1448
1449 temp3 = new String[]{"&nbsp;","&nbsp;","&nbsp;","&nbsp;","&nbsp;"};
1450 length = 0;
1451 start = temp.length;
1452
1453 if(temp.length>=5){
1454 length= 5;
1455 }
1456 else if(temp.length<5){length = temp.length;}
1457
1458 for(int i = length; i>0; i--){
1459 temp3[i-1] = temp[start-1];
1460 start--;
1461 }
1462
1463 out.write ("<tr>\r\n");
1464 out.write ("<th>......\r\n");
1465 out.write ("<th>Last Five\r\n");
1466 out.write ("</tr>\r\n");
1467
1468 x = temp3.length;
1469 start = temp.length;
1470 for(int a = 0; a<x; a++){
1471 out.write ("<tr>\r\n");
1472 if(!temp3[a].equals("&nbsp;")){
1473 out.write ("<th>"+(start-length+1+a)+". (No. of occurrences: "+((Integer)xMap.get(temp3[a])).toString()+")\r\n");
1474 char singleChar = temp3[a].charAt(0);
1475 if(temp3[a].startsWith("http://")){
1476 if(temp3[a].length()>60){
1477 out.write ("<td><a href = \""+temp3[a]+"\">"+temp3[a].substring(0, 60)+"...</a>");
1478 }
1479 else {
1480 out.write ("<td><a href = \""+temp3[a]+"\">"+temp3[a]+"</a>");
1481 }
1482 }
1483 else{
1484 if(temp3[a].length()>61){
1485 if(temp3[a].startsWith(" ")){
1486 temp3[a] = temp3[a].substring(1);
1487 out.write ("<td>"+spaceLeft+space+spaceRight+temp3[a].substring(0, 60)+"<a href=\""+fileName+"_Frequency-based.html#lastFive\"> ... </a>\r\n");
1488 }
1489 else if((int)singleChar == 65279){
1490 temp3[a] = temp3[a].substring(1);
1491 out.write ("<td>"+spaceLeft+oddChar+spaceRight+temp3[a].substring(0, 60)+"<a href=\""+fileName+"_Frequency-based.html#lastFive\"> ... </a>\r\n");
1492 }
1493 else{
1494 out.write ("<td>"+temp3[a].substring(0, 60)+"<a href=\""+fileName+"_Frequency-based.html#lastFive\"> ... </a>\r\n");
1495 }
1496 }
1497 else{
1498 if(temp3[a].startsWith(" ")){
1499 out.write ("<td>"+spaceLeft+space+spaceRight+temp3[a]+"\r\n");
1500 }
1501 else if((int)singleChar == 65279){
1502 temp3[a] = temp3[a].substring(1);
1503 out.write ("<td>"+spaceLeft+oddChar+spaceRight+temp3[a]+"\r\n");
1504 }
1505 else{
1506 out.write ("<td>"+temp3[a]+"\r\n");
1507 }
1508 }
1509 }
1510 }
1511 else{
1512 out.write ("<th>&nbsp;\r\n");
1513 out.write ("<th>\r\n");
1514 }
1515 out.write ("</tr>\r\n");
1516 }
1517
1518 out.write ("</table>\r\n");
1519 out.write ("<p> <a href=\" Overall.html \">Summary</a>");
1520 out.write ("&raquo;");
1521
1522 if(linkName.equalsIgnoreCase("dublin")){
1523 out.write ("<a href=\""+linkName+".html\">Metadata Detail ("+metadataSet1+")</a>");
1524 }
1525 else if (linkName.equalsIgnoreCase("extracted")){
1526 out.write ("<a href=\""+linkName+".html\">Metadata Detail ("+metadataSet2+")</a>");
1527 }
1528 else{
1529 out.write ("<a href=\""+linkName+".html\">Metadata Detail ("+linkName+")</a>");
1530 }
1531
1532 out.write ("</body></html>\r\n");
1533 out.close();
1534 fstream.close();
1535 }catch (Exception e){//Catch exception if any
1536 e.printStackTrace();
1537 }
1538 }
1539
1540 private HashMap generateMetadataElementSortList(String title, String sort, String metadataSetName){
1541
1542 String fileName = title+"_"+sort;
1543 String collectionFolder = mds.getCollectionName();
1544 String IDENTIFIER = linkIdentifier;
1545 SearchLink sl = new SearchLink(mds.StatsDirectory);
1546 HashMap suggestionMap = new HashMap();
1547 HashMap valueMap = sl.createValueMap(title,collectionFolder);
1548 HashMap linkMap = sl.createLinkMap(IDENTIFIER,collectionFolder);
1549 HashMap internalIDMap = dm.getInternalIdentifier(title);
1550 boolean status = false;
1551
1552 try{
1553 FileWriter fstream = new FileWriter(destination+"/"+fileName+".html");
1554 BufferedWriter out = new BufferedWriter(fstream);
1555
1556 out.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">");
1557 out.write ("<html>\r\n");
1558 out.write("<head>\r\n<title>Metadata Element Sort List</title>\r\n");
1559 out.write("<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n");
1560 out.write("<script type=\"text/javascript\" src=\"../script/getInfomation.js\"></script>\r\n");
1561 out.write("<script type=\"text/javascript\" src=\"http://yui.yahooapis.com/2.4.1/build/yahoo/yahoo-min.js\"></script>\r\n");
1562 out.write("<script type=\"text/javascript\" src=\"http://yui.yahooapis.com/2.4.1/build/event/event-min.js\"></script>\r\n");
1563 out.write("<script type=\"text/javascript\" src=\"http://yui.yahooapis.com/2.4.1/build/connection/connection-min.js\"></script>\r\n");
1564 out.write("<style type=\"text/css\">\r\n");
1565 out.write("td{height:20; text-align:left;padding-left:5px;}\r\n");
1566 out.write(".tam{height:20; text-align:center}\r\n");
1567 out.write("th{height:20; text-align:center;}\r\n");
1568 out.write("body{font-family: Arial;}\r\n");
1569 out.write("</style>\r\n");
1570 out.write("</head><body id=\""+collectionFolder+"\">\r\n");
1571 out.write("<td><div style=\"float:left;\"><a href=\" Overall.html \">Summary</a>");
1572 out.write("&raquo;");
1573
1574 if(metadataSetName.equalsIgnoreCase("dublin")){
1575 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet1+")</a>");
1576 }
1577 else if (metadataSetName.equalsIgnoreCase("extracted")){
1578 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet2+")</a>");
1579 }
1580 else{
1581 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSetName+")</a>");
1582 }
1583
1584 out.write("&raquo;");
1585 out.write("<a href=\""+title+".html\">"+title+"</a></div>");
1586 out.write("<div style=\"float:right;\"><a href=\"http://chnm.gmu.edu/tools/surveys/4386/\">Please send feedback about the Mat tool</a></div></td><br>");
1587
1588 String[] list = dm.getSortList(title, sort);
1589
1590 if(list.length>=1){
1591 if(sort.equals("ASCII")){
1592 out.write("<table border=\"1\" align=\"center\" width=\"800\">\r\n");
1593 out.write ("<h2 align=\"center\">"+ title+"</h2>\r\n");
1594 out.write ("<th class=\"tam\">ASCII Sort\r\n");
1595 out.write ("<th class=\"tam\">Element Values\r\n");
1596 out.write ("<th class=\"tam\">Source Documents\r\n");
1597 out.write ("<th class=\"tam\">Internal Link\r\n");
1598 out.write ("<a name='topFive'>\r\n");
1599
1600 int counter = 0;
1601
1602 for(int i = 0; i<list.length; i++){
1603 if(list.length<=5 && i==0){
1604 out.write ("<a name='lastFive'>\r\n");
1605 }
1606 else if((list.length>5) && (list.length-5==i)){
1607 out.write ("<a name='lastFive'>\r\n");
1608 }
1609
1610 InternalLink il= (InternalLink)internalIDMap.get(list[i]);
1611 ArrayList alist2 = il.retrieveList();
1612 String id = (String)alist2.get(0);
1613 id = id.substring(4);
1614 out.write("<tr id=\""+id+"\" >\r\n");
1615
1616 if(list[i].length()>=201){
1617 if(list[i].startsWith("http://")){
1618 if(title.equals(IDENTIFIER)){
1619 if(list[i].length()>=100){
1620 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"...</a>");
1621 }
1622 else{
1623 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
1624 }
1625 out.write("<td><a href=\""+list[i]+"\">Source</a>\r\n");
1626 }
1627 else{
1628 ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],IDENTIFIER, collectionFolder,valueMap,linkMap);
1629 if(list[i].length()>=100){
1630 String url = (String)alist.get(0);
1631 if(alist.size()==1){
1632 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"...</a>");
1633 if(url.startsWith("http://")){
1634 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
1635 }else{
1636 out.write("<td>Source\r\n");
1637 }
1638 }
1639 else if(alist.size()>1){
1640 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"...</a>");
1641 if(url.startsWith("http://")){
1642 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
1643 }else{
1644 out.write("<td>Source\r\n");
1645 }
1646 }
1647 else{
1648 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
1649 out.write("<td>No Source Available\r\n");
1650 }
1651 suggestionMap.put(list[i], url);
1652 }
1653 else{
1654 String url = (String)alist.get(0);
1655 if(alist.size()==1){
1656 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
1657 if(url.startsWith("http://")){
1658 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
1659 }else{
1660 out.write("<td>Source\r\n");
1661 }
1662 }
1663 else if(alist.size()>1){
1664 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
1665 if(url.startsWith("http://")){
1666 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
1667 }else{
1668 out.write("<td>Source\r\n");
1669 }
1670 }
1671 else{
1672 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
1673 out.write("<td>No Source Available\r\n");
1674 }
1675 suggestionMap.put(list[i], url);
1676 }
1677 }
1678 }
1679 else{
1680 ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],IDENTIFIER, collectionFolder,valueMap,linkMap);
1681
1682
1683 if(list[i].startsWith(" ")){
1684 String elements = list[i];
1685 list[i] = list[i].substring(1);
1686
1687 if(alist.size()==1){
1688 out.write("<td>"+(counter+1)+"<td>"+spaceLeft+space+spaceRight+list[i].substring(0, 200)+"...<td>"+
1689 "<a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
1690 suggestionMap.put(elements, (String)alist.get(0));
1691 }
1692 else if(alist.size()>1){
1693 out.write("<td>"+(counter+1)+"<td>"+spaceLeft+space+spaceRight+list[i].substring(0, 200)+"...<td>"+
1694 "<a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
1695 suggestionMap.put(elements, (String)alist.get(0));
1696 }
1697 else{
1698 out.write("<td>"+(counter+1)+"<td>"+spaceLeft+space+spaceRight+list[i].substring(0, 200)+"...<td>No Source Available \r\n");
1699 suggestionMap.put(elements, "No Source Available");
1700 }
1701 }
1702 else{
1703 if(alist.size()==1){
1704 out.write("<td>"+(counter+1)+"<td>"+list[i].substring(0, 200)+"...<td>" +
1705 "<a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
1706 suggestionMap.put(list[i], (String)alist.get(0));
1707 }
1708 else if(alist.size()>1){
1709 out.write("<td>"+(counter+1)+"<td>"+list[i].substring(0, 200)+"...<td>" +
1710 "<a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
1711 suggestionMap.put(list[i], (String)alist.get(0));
1712 }
1713 else{
1714 out.write("<td>"+(counter+1)+"<td>"+list[i].substring(0, 200)+"... <td>No Source Available\r\n");
1715 suggestionMap.put(list[i],"No Source Available");
1716 }
1717 }
1718 }
1719
1720 }
1721 else{
1722 if(list[i].startsWith("http://")){
1723 if(title.equals(IDENTIFIER)){
1724 if(list[i].length()>=100){
1725 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"...</a>");
1726 }
1727 else{
1728 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
1729 }
1730 out.write("<td><a href=\""+list[i]+"\">Source</a>\r\n");
1731 }
1732 else{
1733 ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],IDENTIFIER, collectionFolder,valueMap,linkMap);
1734
1735 if(list[i].length()>=100){
1736 if(alist.size()==1){
1737 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"...</a>");
1738 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
1739 }
1740 else if(alist.size()>1){
1741 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"...</a>");
1742 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
1743 }
1744 else{
1745 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
1746 out.write("<td>No Source Available\r\n");
1747 }
1748 }
1749 else{
1750 if(alist.size()==1){
1751 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
1752 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
1753 suggestionMap.put(list[i], (String)alist.get(0));
1754 }
1755 else if(alist.size()>1){
1756 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
1757 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
1758 suggestionMap.put(list[i], (String)alist.get(0));
1759 }
1760 else{
1761 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
1762 out.write("<td>No Source Available\r\n");
1763 suggestionMap.put(list[i],"No Source Available");
1764 }
1765 }
1766 }
1767 }
1768 else{
1769 ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],IDENTIFIER, collectionFolder,valueMap,linkMap);
1770
1771 if(list[i].startsWith(" ")){
1772 String elements = list[i];
1773 list[i] = list[i].substring(1);
1774
1775 if(alist.size()==1){
1776 out.write("<td>"+(counter+1)+"<td>"+spaceLeft+space+spaceRight+list[i]+
1777 "<td><a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
1778 suggestionMap.put(list[i], (String)alist.get(0));
1779 }
1780 else if(alist.size()>1){
1781 out.write("<td>"+(counter+1)+"<td>"+spaceLeft+space+spaceRight+list[i]+
1782 "<td><a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
1783 suggestionMap.put(list[i], (String)alist.get(0));
1784 }
1785 else{
1786 out.write("<td>"+(counter+1)+"<td>"+spaceLeft+space+spaceRight+list[i]+"<td>No Source Available\r\n");
1787 suggestionMap.put(elements,"No Source Available");
1788 }
1789 }
1790 else{
1791 if(alist.size()==1){
1792 out.write("<td>"+(counter+1)+"<td>"+list[i]+"<td><a href=\""+alist.get(0)+"\">Source</a>\r\n");
1793 suggestionMap.put(list[i],(String)alist.get(0));
1794 }
1795 else if(alist.size()>1){
1796 out.write("<td>"+(counter+1)+"<td>"+list[i]+"<td><a href=\""+alist.get(0)+"\">Source</a>...\r\n");
1797 suggestionMap.put(list[i],(String)alist.get(0));
1798 }
1799 else{
1800 out.write("<td>"+(counter+1)+"<td>"+list[i]+"<td>No Source Available\r\n");
1801 suggestionMap.put(list[i],"No Source Available");
1802 }
1803 }
1804 }
1805 }
1806 counter++;
1807 out.write("<td onclick=\"GD(this)\">View");
1808 }
1809
1810 if(suggestionMap.size()>1 && !title.equals(IDENTIFIER)){
1811 status = true;
1812 }
1813 }
1814 else{
1815 out.write("<table border=\"1\" align=\"center\" width=\"800\">\r\n");
1816 out.write ("<h2 align=\"center\">"+ title+"</h2>\r\n");
1817 out.write ("<th class=\"tam\">&nbsp;\r\n");
1818 out.write ("<th class=\"tam\">Frequency\r\n");
1819 out.write ("<th class=\"tam\">Element Values\r\n");
1820 out.write ("<th class=\"tam\">Source Documents\r\n");
1821 out.write ("<th class=\"tam\">Internal Link\r\n");
1822 out.write ("<a name='topFive'>\r\n");
1823
1824 HashMap xMap = dm.getDistinctValueMap(title);
1825 int counter = 0;
1826
1827 for(int i = 0; i<list.length; i++){
1828 if(list.length<=5 && i==0){
1829 out.write ("<a name='lastFive'>\r\n");
1830 }
1831 else if((list.length>5) && (list.length-5==i)){
1832 out.write ("<a name='lastFive'>\r\n");
1833 }
1834
1835 InternalLink il= (InternalLink)internalIDMap.get(list[i]);
1836 ArrayList alist2 = il.retrieveList();
1837 String id = (String)alist2.get(0);
1838 id = id.substring(4);
1839
1840 out.write("<tr id=\""+id+"\" >\r\n");
1841 out.write("<td>"+(counter+1)+"<th> "+((Integer)xMap.get(list[i])).toString()+"\r\n");
1842
1843 if(list[i].length()>=201){
1844 if(list[i].startsWith("http://")){
1845 if(title.equals(IDENTIFIER)){
1846 if(list[i].length()>=100){
1847 out.write("<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
1848 }
1849 else{
1850 out.write("<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
1851 }
1852 out.write("<td><a href=\""+list[i]+"\">Source</a>\r\n");
1853 }
1854 else{
1855 ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],IDENTIFIER, collectionFolder,valueMap,linkMap);
1856 if(list[i].length()>=100){
1857 if(alist.size()==1){
1858 out.write("<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
1859 out.write("<td><a href=\""+alist.get(0)+"\">Source</a>\r\n");
1860 }
1861 else if (alist.size()>1){
1862 out.write("<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
1863 out.write("<td><a href=\""+alist.get(0)+"\">Source</a>...\r\n");
1864 }
1865 else{
1866 out.write("<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
1867 out.write("<td>No Source Available\r\n");
1868 }
1869 }
1870 else{
1871 out.write("<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
1872 }
1873 }
1874 }
1875 else{
1876 ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],IDENTIFIER, collectionFolder,valueMap,linkMap);
1877
1878 if(list[i].startsWith(" ")){
1879 list[i]=list[i].substring(1);
1880 if(alist.size()==1){
1881 out.write("<td>"+spaceLeft+space+spaceRight+list[i].substring(0,200)+"...<td>"+
1882 "<a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
1883 }
1884 else if(alist.size()>1){
1885 out.write("<td>"+spaceLeft+space+spaceRight+list[i].substring(0,200)+"...<td>"+
1886 "<a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
1887 }
1888 else{
1889 out.write("<td>"+spaceLeft+space+spaceRight+list[i].substring(0,200)+"...<td>No Source Available\r\n");
1890 }
1891 }
1892 else{
1893 if(alist.size()==1){
1894 out.write("<td>"+list[i].substring(0,200)+"...<td>" +
1895 "<a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
1896 }
1897 else if(alist.size()>1){
1898 out.write("<td>"+list[i].substring(0,200)+"...<td>"+
1899 "<a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
1900 }
1901 else{
1902 out.write("<td>"+list[i].substring(0,200)+"...<td>No Source Available\r\n");
1903 }
1904 }
1905 }
1906 }
1907 else{
1908 if(list[i].startsWith("http://")){
1909 if(title.equals(IDENTIFIER)){
1910 if(list[i].length()>=100){
1911 out.write("<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
1912 }
1913 else{
1914 out.write("<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
1915 }
1916 out.write("<td><a href=\""+list[i]+"\">Source</a>\r\n");
1917 }
1918 else{
1919 ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],IDENTIFIER, collectionFolder,valueMap,linkMap);
1920
1921 if(list[i].length()>=100){
1922 if(alist.size()==1){
1923 out.write("<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
1924 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
1925 }
1926 else if(alist.size()>1){
1927 out.write("<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
1928 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
1929 }
1930 else{
1931 out.write("<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
1932 out.write("<td>No Source Available\r\n");
1933 }
1934 }
1935 else{
1936 if(alist.size()==1){
1937 out.write("<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
1938 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
1939 }
1940 else if(alist.size()>1){
1941 out.write("<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
1942 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
1943 }
1944 else{
1945 out.write("<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
1946 out.write("<td>No Source Available\r\n");
1947 }
1948 }
1949 }
1950 }
1951 else{
1952
1953 ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],IDENTIFIER, collectionFolder,valueMap,linkMap);
1954
1955 if(list[i].startsWith(" ")){
1956 list[i]=list[i].substring(1);
1957 if(alist.size()==1){
1958 out.write("<td>"+spaceLeft+space+spaceRight+list[i]+
1959 "<td><a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
1960 }
1961 else if(alist.size()>1){
1962 out.write("<td>"+spaceLeft+space+spaceRight+list[i]+
1963 "<td><a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
1964 }
1965 else{
1966 out.write("<td>"+spaceLeft+space+spaceRight+list[i]+"<td>No Source Available\r\n");
1967 }
1968 }
1969 else{
1970 if(alist.size()==1){
1971 out.write("<td>"+list[i]+"<td><a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
1972 }
1973 else if(alist.size()>1){
1974 out.write("<td>"+list[i]+"<td><a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
1975 }
1976 else{
1977 out.write("<td>"+list[i]+"<td>No Source Available\r\n");
1978 }
1979 }
1980 }
1981 }
1982 counter++;
1983 out.write("<td onclick=\"GD(this)\">View");
1984 }
1985 }
1986 out.write("</table>\r\n");
1987 }
1988 else{
1989 out.write("<p>Sorry! The list is empty</p>");
1990 }
1991 out.write("<tr>\r\n");
1992 out.write("<p><a href=\" Overall.html \">Summary</a>");
1993 out.write("&raquo;");
1994
1995 if(metadataSetName.equalsIgnoreCase("dublin")){
1996 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet1+")</a>");
1997 }
1998 else if (metadataSetName.equalsIgnoreCase("extracted")){
1999 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet2+")</a>");
2000 }
2001 else{
2002 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSetName+")</a>");
2003 }
2004
2005 out.write("&raquo;");
2006 out.write("<a href=\""+title+".html\">"+title+"</a></p>");
2007 out.write("</body></html>\r\n");
2008 out.close();
2009
2010 fstream.close();
2011 }catch(Exception e){
2012 e.printStackTrace();
2013 }
2014
2015 if(status){
2016 return suggestionMap;
2017 }
2018 else{
2019 return new HashMap();
2020 }
2021 }
2022
2023 private void setupMetadataLink(String fileName){
2024
2025 try{
2026
2027 DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
2028 DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
2029 Document doc = docBuilder.newDocument();
2030 doc = docBuilder.parse (new File("/research/cc108/greenstone3/web/mat/script/"+fileName+".xml"));
2031
2032 Element rootNode = doc.getDocumentElement();
2033 NodeList listOfName = rootNode.getElementsByTagName("metadataElement");
2034 url = new HashMap();
2035
2036 for(int i = 0; i<listOfName.getLength(); i++){
2037 Node NameNode = listOfName.item(i);
2038 Element docElement = (Element)NameNode;
2039 NodeList valueList = docElement.getElementsByTagName("URL");
2040 Node urlParentNode = valueList.item(0);
2041 String urlText = urlParentNode.getChildNodes().item(0).getNodeValue();
2042
2043 valueList = docElement.getElementsByTagName("name");
2044 Node urlNameParentNode = valueList.item(0);
2045 String urlNameText = urlNameParentNode.getChildNodes().item(0).getNodeValue();
2046
2047 url.put(urlNameText, urlText);
2048 }
2049 }catch(Exception e){
2050 e.printStackTrace();
2051 }
2052 }
2053
2054 private void createIncompletedList(String fileName, String metadataSetName, String collectionFolder){
2055
2056 HashMap hp = dm.getIdentifierLink(linkIdentifier);
2057 String[] ids = dm.getDocumentIDList(fileName);
2058
2059 for(int i = 0; i<ids.length; i++){
2060 hp.remove(ids[i]);
2061 }
2062
2063 try{
2064 FileWriter fstream = new FileWriter(destination+"/"+fileName+"_IncompletedList.html");
2065 BufferedWriter out = new BufferedWriter(fstream);
2066
2067 out.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">");
2068 out.write ("<html>\r\n");
2069 out.write("<head>\r\n<title>Incompleted Document List</title>\r\n");
2070 out.write("<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n");
2071 out.write("<style type=\"text/css\">\r\n");
2072 out.write("td{height:20; text-align:left;padding-left:5px;}\r\n");
2073 out.write(".tam{height:20; text-align:center}\r\n");
2074 out.write("th{height:20; text-align:center;}\r\n");
2075 out.write("body{font-family: Arial;}\r\n");
2076 out.write("</style>\r\n");
2077 out.write("</head><body>\r\n");
2078 out.write("<p><a href=\" Overall.html \">Summary</a>");
2079 out.write("&raquo;");
2080
2081 if(metadataSetName.equalsIgnoreCase("dublin")){
2082 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet1+")</a>");
2083 }
2084 else if (metadataSetName.equalsIgnoreCase("extracted")){
2085 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet2+")</a>");
2086 }
2087 else{
2088 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSetName+")</a>");
2089 }
2090
2091 out.write("&raquo;");
2092 out.write("<a href=\""+fileName+".html \">"+fileName+"</a>");
2093 out.write("<h3 align=center>"+fileName+" does not appear in the following documents</h3>");
2094 out.write("<table border=1 align=center><tr><th>Document ID<th>Source Link\r\n");
2095
2096 int counter = 0;
2097 Set s = hp.keySet();
2098 Iterator i = s.iterator();
2099
2100 while(i.hasNext()){
2101 counter++;
2102 String keys = (String)i.next();
2103 InternalLink il = (InternalLink) hp.get(keys);
2104 ArrayList alist = il.retrieveList();
2105 String url = (String)alist.get(0);
2106 out.write("<tr><td>"+counter+"<td><a href=\""+url+"\">"+url+"</a>\r\n");
2107 }
2108
2109 out.write("</table></body></html>\r\n");
2110 out.write("<p><a href=\" Overall.html \">Summary</a>");
2111 out.write("&raquo;");
2112
2113 if(metadataSetName.equalsIgnoreCase("dublin")){
2114 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet1+")</a>");
2115 }
2116 else if (metadataSetName.equalsIgnoreCase("extracted")){
2117 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet2+")</a>");
2118 }
2119 else{
2120 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSetName+")</a>");
2121 }
2122
2123 out.write("&raquo;");
2124 out.write("<a href=\""+fileName+".html \">"+fileName+"</a>");
2125
2126 out.close();
2127 fstream.close();
2128 }catch(IOException ex){
2129 ex.printStackTrace();
2130 }
2131 }
2132
2133 public boolean compareElement(String fileName,String collectionFolder,HashMap suggestionMap, String metadataSetName){
2134
2135 boolean status = false;
2136 Set kset = suggestionMap.keySet();
2137 ArrayList arrayList = new ArrayList();
2138 Iterator is = kset.iterator();
2139
2140 while(is.hasNext()){
2141 arrayList.add((String)is.next());
2142 }
2143
2144 HashMap distanceMap = new HashMap();
2145 int totalLength = 0;
2146 int arrayListLength = arrayList.size();
2147 int counter = 0;
2148 double distance ;
2149
2150 try{
2151 for(int i = 0; i<arrayListLength; i++){
2152 String keyword = (String)arrayList.get(i);
2153 int spaceCounterFront1 = 0;
2154 int spaceCounterEnd1 = 0;
2155 String keywordClone = keyword;
2156 String _keywordClone = keyword;
2157 String bkKeyWord = keyword;
2158 keywordClone = removeUnusedCharacter(keywordClone);
2159
2160 while(true){
2161 if(_keywordClone.length()>1){
2162 if(_keywordClone.charAt(0)==' '){
2163 _keywordClone = _keywordClone.substring(1);
2164 spaceCounterFront1++;
2165 }
2166 else{
2167 if(_keywordClone.charAt(_keywordClone.length()-1)==' '){
2168 _keywordClone = _keywordClone.substring(0,_keywordClone.length()-1);
2169 spaceCounterEnd1++;
2170 }
2171 else{
2172 break;
2173 }
2174 }
2175 }
2176 else{
2177 break;
2178 }
2179 }
2180
2181 int keywordLength = keywordClone.length();
2182 totalLength = totalLength + keywordLength;
2183
2184 for(int j = i+1; j<arrayListLength; j++){
2185 String keyword2 = (String)arrayList.get(j);
2186 counter ++;
2187 double pre_cost = 0;
2188 String keywordClone2 = keyword2;
2189 String _keywordClone2 = keyword2;
2190 String bkKeyWord2 = keyword2;
2191 CostModel cm = new CostModel();
2192 cm = removeUnusedCharacter(keywordClone2,pre_cost);
2193 keywordClone2 = cm.getString();
2194 pre_cost = cm.getCost();
2195
2196 int spaceCounterFront2 = 0;
2197 int spaceCounterEnd2 = 0;
2198
2199 while(true){
2200 if(_keywordClone2.length()>1){
2201 if(_keywordClone2.charAt(0)==' '){
2202 _keywordClone2 = _keywordClone2.substring(1);
2203 spaceCounterFront2++;
2204 }
2205 else{
2206 if(_keywordClone2.charAt(_keywordClone2.length()-1)==' '){
2207 _keywordClone2 = _keywordClone2.substring(0,_keywordClone2.length()-1);
2208 spaceCounterEnd2++;
2209 }
2210 else{
2211 break;
2212 }
2213 }
2214 }
2215 else{
2216 break;
2217 }
2218 }
2219
2220 if(counter == 50000){
2221 counter = 0;
2222 }
2223
2224 int keyword2Length = keywordClone2.length();
2225
2226 if(keywordLength>(keyword2Length+2) || (keywordLength+2)<keyword2Length ){}
2227 else{
2228 distance = calculateEditDistance(keywordClone.toLowerCase().toCharArray(),keywordClone2.toLowerCase().toCharArray());
2229 distance = distance + pre_cost;
2230
2231 if(distance<=2){
2232 if(distanceMap.containsKey(_keywordClone)){
2233 InternalLink il = (InternalLink)distanceMap.get(_keywordClone);
2234 String keywordHolder = _keywordClone2;
2235
2236 while(true){
2237 if(keywordHolder.contains(" ")){
2238 keywordHolder = keywordHolder.replaceFirst(" ",spaceLeft+space+spaceRight+" ");
2239 }
2240 else{
2241 break;
2242 }
2243 }
2244
2245 for(int a = 0; a<spaceCounterFront2; a++){
2246 keywordHolder = spaceLeft+space+spaceRight+keywordHolder;
2247 }
2248
2249 for(int a = 0; a<spaceCounterEnd2; a++){
2250 keywordHolder = keywordHolder+spaceLeft+space+spaceRight;
2251 }
2252
2253 InternalLink il2 = new InternalLink();
2254 il2.setValue(keywordHolder);
2255
2256 if(suggestionMap.containsKey(bkKeyWord2)){
2257 il2.increaseElement((String)suggestionMap.get(bkKeyWord2));
2258 }
2259 else{
2260 il2.increaseElement("&nbsp;");
2261 }
2262
2263 il.increaseNode(il2);
2264 distanceMap.put(_keywordClone, il);
2265 }
2266
2267 else{
2268 InternalLink il = new InternalLink();
2269 String keywordHolder = _keywordClone;
2270
2271 while(true){
2272 if(keywordHolder.contains(" ")){
2273 keywordHolder = keywordHolder.replaceFirst(" ",spaceLeft+space+spaceRight+" ");
2274 }
2275 else{
2276 break;
2277 }
2278 }
2279
2280 for(int a = 0; a<spaceCounterFront1; a++){
2281 keywordHolder = spaceLeft+space+spaceRight+keywordHolder;
2282 }
2283
2284 for(int a = 0; a<spaceCounterEnd1; a++){
2285 keywordHolder = keywordHolder+spaceLeft+space+spaceRight;
2286 }
2287
2288 InternalLink il2 = new InternalLink();
2289 il2.setValue(keywordHolder);
2290 if(suggestionMap.containsKey(bkKeyWord)){
2291 il2.increaseElement((String)suggestionMap.get(bkKeyWord));
2292 }
2293 else{
2294 il2.increaseElement("&nbsp;");
2295 }
2296
2297 il.increaseNode(il2);
2298 keywordHolder = _keywordClone2;
2299
2300 while(true){
2301 if(keywordHolder.contains(" ")){
2302 keywordHolder = keywordHolder.replaceFirst(" ",spaceLeft+space+spaceRight+" ");
2303 }
2304 else{
2305 break;
2306 }
2307 }
2308
2309 for(int a = 0; a<spaceCounterFront2; a++){
2310 keywordHolder = spaceLeft+space+spaceRight+keywordHolder;
2311 }
2312
2313 for(int a = 0; a<spaceCounterEnd2; a++){
2314 keywordHolder = keywordHolder+spaceLeft+space+spaceRight;
2315 }
2316
2317 InternalLink il3 = new InternalLink();
2318 il3.setValue(keywordHolder);
2319 if(suggestionMap.containsKey(bkKeyWord2)){
2320 il3.increaseElement((String)suggestionMap.get(bkKeyWord2));
2321 }
2322 else{
2323 il3.increaseElement("&nbsp;");
2324 }
2325
2326 il.increaseNode(il3);
2327 distanceMap.put(_keywordClone, il);
2328 }
2329 }
2330 }
2331 }
2332 }
2333
2334 if(distanceMap.size()!=0){
2335 generateHTML(distanceMap,fileName,metadataSetName);
2336 status = true;
2337 }
2338 }catch(Exception ex){
2339 ex.printStackTrace();
2340 }
2341 return status;
2342 }
2343
2344 private void generateHTML(HashMap distanceMap, String fileName,String metadataSetName){
2345
2346 String collectionFolder = mds.getCollectionName();
2347
2348 try{
2349 FileWriter fstream = new FileWriter(destination+"/"+fileName+"_Suggestion.html");
2350 BufferedWriter out = new BufferedWriter(fstream);
2351
2352 out.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">");
2353 out.write ("<html>\r\n");
2354 out.write("<head>\r\n<title>Potential Duplicate List</title>\r\n");
2355 out.write("<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n");
2356 out.write("<style type=\"text/css\">\r\n");
2357 out.write("td{height:20; text-align:left;padding-left:5px;}\r\n");
2358 out.write(".tam{height:20; text-align:center}\r\n");
2359 out.write("th{height:20; text-align:left;}\r\n");
2360 out.write("body{font-family: Arial;}\r\n");
2361 out.write("</style>\r\n");
2362 out.write("</head><body>\r\n");
2363 out.write("<p><a href=\" Overall.html \">Summary</a>");
2364 out.write("&raquo;");
2365
2366 if(metadataSetName.equalsIgnoreCase("dublin")){
2367 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet1+")</a>");
2368 }
2369 else if (metadataSetName.equalsIgnoreCase("extracted")){
2370 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet2+")</a>");
2371 }
2372 else{
2373 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSetName+")</a>");
2374 }
2375
2376 out.write("&raquo;");
2377 out.write("<a href=\""+fileName+".html \">"+fileName+"</a>");
2378 out.write("<h3 align=center>Potential Duplicate List</h3>");
2379 out.write("<table border=1 align=center>\r\n");
2380
2381 Set s = distanceMap.keySet();
2382 Iterator i = s.iterator();
2383
2384 while(i.hasNext()){
2385 String keyword = (String)i.next();
2386 InternalLink il = (InternalLink)distanceMap.get(keyword);
2387 ArrayList alist = il.retrieveNodeList();
2388
2389 out.write("<tr><th>Original Text<th>Source Link\r\n");
2390
2391 for(int a = 0; a<alist.size(); a++){
2392 InternalLink il2 = new InternalLink();
2393 il2 = (InternalLink)alist.get(a);
2394 String url = (String)il2.retrieveList().get(0);
2395 out.write("<tr><td>"+il2.getValue()+"<td><a href=\""+url+"\">"+url+"</a>");
2396 }
2397 out.write("</tr>");
2398 }
2399 out.write("</table>\r\n");
2400 out.write("<p><a href=\" Overall.html \">Summary</a>");
2401 out.write("&raquo;");
2402
2403 if(metadataSetName.equalsIgnoreCase("dublin")){
2404 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet1+")</a>");
2405 }
2406 else if (metadataSetName.equalsIgnoreCase("extracted")){
2407 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet2+")</a>");
2408 }
2409 else{
2410 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSetName+")</a>");
2411 }
2412
2413 out.write("&raquo;");
2414 out.write("<a href=\""+fileName+".html \">"+fileName+"</a>");
2415 out.write("</body></html>\r\n");
2416 out.close();
2417
2418 fstream.close();
2419 }catch(IOException ex){
2420 ex.printStackTrace();
2421 }
2422 }
2423
2424 public int calculateEditDistance(char[] args1, char[] args2){
2425
2426 int n = args1.length;
2427 int m = args2.length;
2428
2429 if (n == 0) {
2430 return m;
2431 }
2432 else if (m == 0) {
2433 return n;
2434 }
2435
2436 int[] p = new int[n + 1];
2437 int[] d = new int[n + 1];
2438 int[] _d;
2439 int i;
2440 int j;
2441 int cost; // cost
2442
2443 for (i = 0; i <= n; i++) {
2444 p[i] = i;
2445 }
2446
2447 for (j = 1; j <= m; j++) {
2448 d[0] = j;
2449 for (i = 1; i <= n; i++) {
2450 cost = (args1[i-1] == args2[j-1]) ? 0 : 1;
2451 d[i] = Math.min(Math.min(d[i - 1] + 1, p[i] + 1),
2452 p[i - 1] + cost);
2453 }
2454
2455 // copy current distance counts to 'previous row' distance counts
2456 _d = p;
2457 p = d;
2458 d = _d;
2459 }
2460 // our last action in the above loop was to switch d and p, so p now
2461 // actually has the most recent cost counts
2462 return p[n];
2463 }
2464
2465 private String removeUnusedCharacter(String target){
2466
2467 while(true){
2468 if(target.contains("\\n")){
2469 target = target.replaceFirst("\\\\n","");
2470 }
2471 else{
2472 break;
2473 }
2474 }
2475
2476 //remove spaces at the end of string
2477 if(target.length()>1){
2478 while(true){
2479 if(target.length()>1){
2480 if(target.charAt(target.length()-1)==' '){
2481 target = target.substring(0,target.length()-1);
2482 }
2483 else{
2484 break;
2485 }
2486 }
2487 else{
2488 break;
2489 }
2490 }
2491 }
2492
2493 //remove leading spaces
2494 if(target.length()>1){
2495 while(true){
2496 if(target.length()>1){
2497 if(target.charAt(0)==' '){
2498 target = target.substring(1,target.length());
2499 }
2500 else{
2501 break;
2502 }
2503 }
2504 else{
2505 break;
2506 }
2507 }
2508 }
2509
2510 //remove multiple spaces between words
2511 while(true){
2512 if(target.contains(" ")){
2513 target = target.replaceFirst(" "," ");
2514 }
2515 else{
2516 break;
2517 }
2518 }
2519 return target;
2520 }
2521
2522 private CostModel removeUnusedCharacter(String target, double cost){
2523
2524 CostModel cm = new CostModel();
2525
2526 while(true){
2527 if(target.contains("\\n")){
2528 target = target.replaceFirst("\\\\n","");
2529 cost = cost + 0.2;
2530 }
2531 else{
2532 break;
2533 }
2534 }
2535
2536 //remove spaces at the end of string
2537 if(target.length()>1){
2538 while(true){
2539 if(target.length()>1){
2540 if(target.charAt(target.length()-1)==' '){
2541 target = target.substring(0,target.length()-1);
2542 cost = cost + 0.2;
2543 }
2544 else{
2545 break;
2546 }
2547 }
2548 else{
2549 break;
2550 }
2551 }
2552 }
2553
2554 //remove leading spaces
2555 if(target.length()>1){
2556 while(true){
2557 if(target.length()>1){
2558 if(target.charAt(0)==' '){
2559 target = target.substring(1,target.length());
2560 cost = cost + 0.2;
2561 }
2562 else{
2563 break;
2564 }
2565 }
2566 else{
2567 break;
2568 }
2569 }
2570 }
2571
2572 //remove multiple spaces between words
2573 while(true){
2574 if(target.contains(" ")){
2575 target = target.replaceFirst(" "," ");
2576 cost = cost + 0.2;
2577 }
2578 else{
2579 break;
2580 }
2581 }
2582
2583 cm.setCost(cost);
2584 cm.setString(target);
2585 return cm;
2586 }
2587
2588 private HashMap generateMetadataElementSortListNoIdentifier(String title, String sort, String metadataSetName){
2589
2590 String fileName = title+"_"+sort;
2591 String collectionFolder = mds.getCollectionName();
2592 SearchLink sl = new SearchLink(mds.StatsDirectory);
2593 HashMap suggestionMap = new HashMap();
2594
2595 HashMap internalIDMap = dm.getInternalIdentifier(title);
2596 boolean status = false;
2597
2598 try{
2599
2600 FileWriter fstream = new FileWriter(destination+"/"+fileName+".html");
2601 BufferedWriter out = new BufferedWriter(fstream);
2602
2603 out.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">");
2604 out.write ("<html>\r\n");
2605 out.write("<head>\r\n<title>Metadata Element Sort List</title>\r\n");
2606 out.write("<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n");
2607 out.write("<script type=\"text/javascript\" src=\"../script/getInfomation.js\"></script>\r\n");
2608 out.write("<script type=\"text/javascript\" src=\"http://yui.yahooapis.com/2.4.1/build/yahoo/yahoo-min.js\"></script>\r\n");
2609 out.write("<script type=\"text/javascript\" src=\"http://yui.yahooapis.com/2.4.1/build/event/event-min.js\"></script>\r\n");
2610 out.write("<script type=\"text/javascript\" src=\"http://yui.yahooapis.com/2.4.1/build/connection/connection-min.js\"></script>\r\n");
2611 out.write("<style type=\"text/css\">\r\n");
2612 out.write("td{height:20; text-align:left;padding-left:5px;}\r\n");
2613 out.write(".tam{height:20; text-align:center}\r\n");
2614 out.write("th{height:20; text-align:center;}\r\n");
2615 out.write("body{font-family: Arial;}\r\n");
2616 out.write("</style>\r\n");
2617 out.write("</head><body id=\""+collectionFolder+"\">\r\n");
2618 out.write("<td><div style=\"float:left;\"><a href=\" Overall.html \">Summary</a>");
2619 out.write("&raquo;");
2620
2621 if(metadataSetName.equalsIgnoreCase("dublin")){
2622 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet1+")</a>");
2623 }
2624 else if (metadataSetName.equalsIgnoreCase("extracted")){
2625 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet2+")</a>");
2626 }
2627 else{
2628 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSetName+")</a>");
2629 }
2630 out.write("&raquo;");
2631 out.write("<a href=\""+title+".html\">"+title+"</a></div>");
2632 out.write("<div style=\"float:right;\"><a href=\"http://chnm.gmu.edu/tools/surveys/4386/\">Please send feedback about the Mat tool</a></div></td><br>");
2633
2634 String[] list = dm.getSortList(title, sort);
2635
2636 if(list.length>=1){
2637 if(sort.equals("ASCII")){
2638
2639 out.write("<table border=\"1\" align=\"center\" width=\"800\">\r\n");
2640 out.write ("<h2 align=\"center\">"+ title+"</h2>\r\n");
2641 out.write ("<th class=\"tam\">ASCII Sort\r\n");
2642 out.write ("<th class=\"tam\">Element Values\r\n");
2643 out.write ("<th class=\"tam\">Source Documents\r\n");
2644 out.write ("<th class=\"tam\">Internal Link\r\n");
2645 out.write ("<a name='topFive'>\r\n");
2646
2647 int counter = 0;
2648
2649 for(int i = 0; i<list.length; i++){
2650 if(list.length<=5 && i==0){
2651 out.write ("<a name='lastFive'>\r\n");
2652 }
2653 else if((list.length>5) && (list.length-5==i)){
2654 out.write ("<a name='lastFive'>\r\n");
2655 }
2656
2657
2658 InternalLink il= (InternalLink)internalIDMap.get(list[i]);
2659 ArrayList alist2 = il.retrieveList();
2660 String id = (String)alist2.get(0);
2661 id = id.substring(4);
2662
2663 out.write("<tr id=\""+id+"\" >\r\n");
2664
2665 if(list[i].length()>=201){
2666 if(list[i].startsWith("http://")){
2667 if(list[i].length()>=100){
2668 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
2669 out.write("<td><a href=\""+list[i]+"\">Source</a>\r\n");
2670 }
2671 else{
2672 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
2673 out.write("<td><a href=\""+list[i]+"\">Source</a>\r\n");
2674 }
2675 }
2676 else{
2677
2678 if(list[i].startsWith(" ")){
2679 String elements = list[i];
2680 list[i] = list[i].substring(1);
2681 out.write("<td>"+(counter+1)+"<td>"+spaceLeft+space+spaceRight+list[i].substring(0, 200)+"...<td>No Source Available \r\n");
2682 }
2683 else{
2684 out.write("<td>"+(counter+1)+"<td>"+list[i].substring(0, 200)+"... <td>No Source Available\r\n");
2685 }
2686 }
2687 }
2688 else{
2689 if(list[i].startsWith("http://")){
2690 if(list[i].length()>=100){
2691 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"...</a>");
2692 }
2693 else{
2694 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
2695 }
2696 out.write("<td><a href=\""+list[i]+"\">Source</a>\r\n");
2697 }
2698 else{
2699 if(list[i].startsWith(" ")){
2700 String elements = list[i];
2701 list[i] = list[i].substring(1);
2702 out.write("<td>"+(counter+1)+"<td>"+spaceLeft+space+spaceRight+list[i]+"<td>No Source Available\r\n");
2703 suggestionMap.put(elements,"No Source Available");
2704 }
2705 else{
2706 out.write("<td>"+(counter+1)+"<td>"+list[i]+"<td>No Source Available\r\n");
2707 }
2708 }
2709 }
2710 counter++;
2711 out.write("<td onclick=\"GD(this)\">View");
2712 }
2713 }
2714 else{
2715 out.write("<table border=\"1\" align=\"center\" width=\"800\">\r\n");
2716 out.write ("<h2 align=\"center\">"+ title+"</h2>\r\n");
2717 out.write ("<th class=\"tam\">&nbsp;\r\n");
2718 out.write ("<th class=\"tam\">Frequency\r\n");
2719 out.write ("<th class=\"tam\">Element Values\r\n");
2720 out.write ("<th class=\"tam\">Source Documents\r\n");
2721 out.write ("<th class=\"tam\">Internal Link\r\n");
2722 out.write ("<a name='topFive'>\r\n");
2723
2724 HashMap xMap = dm.getDistinctValueMap(title);
2725 int counter = 0;
2726
2727 for(int i = 0; i<list.length; i++){
2728 if(list.length<=5 && i==0){
2729 out.write ("<a name='lastFive'>\r\n");
2730 }
2731 else if((list.length>5) && (list.length-5==i)){
2732 out.write ("<a name='lastFive'>\r\n");
2733 }
2734
2735 InternalLink il= (InternalLink)internalIDMap.get(list[i]);
2736 ArrayList alist2 = il.retrieveList();
2737 String id = (String)alist2.get(0);
2738 id = id.substring(4);
2739
2740 out.write("<tr id=\""+id+"\" >\r\n");
2741 out.write("<td>"+(counter+1)+"<th> "+((Integer)xMap.get(list[i])).toString()+"\r\n");
2742
2743 if(list[i].length()>=201){
2744 if(list[i].startsWith("http://")){
2745 if(list[i].length()>=100){
2746 out.write("<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
2747 }
2748 else{
2749 out.write("<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
2750 }
2751 out.write("<td><a href=\""+list[i]+"\">Source</a>\r\n");
2752 }
2753 else{
2754 if(list[i].startsWith(" ")){
2755 list[i]=list[i].substring(1);
2756 out.write("<td>"+spaceLeft+space+spaceRight+list[i].substring(0,200)+"...<td>No Source Available\r\n");
2757 }
2758 else{
2759 out.write("<td>"+list[i].substring(0,200)+"...<td>No Source Available\r\n");
2760 }
2761 }
2762 }
2763 else{
2764 if(list[i].startsWith("http://")){
2765 if(list[i].length()>=100){
2766 out.write("<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
2767 }
2768 else{
2769 out.write("<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
2770 }
2771 out.write("<td><a href=\""+list[i]+"\">Source</a>\r\n");
2772 }
2773 else{
2774 if(list[i].startsWith(" ")){
2775 list[i]=list[i].substring(1);
2776 out.write("<td>"+spaceLeft+space+spaceRight+list[i]+"<td>No Source Available\r\n");
2777 }
2778 else{
2779 out.write("<td>"+list[i]+"<td>No Source Available\r\n");
2780 }
2781 }
2782 }
2783 counter++;
2784 out.write("<td onclick=\"GD(this)\">View");
2785 }
2786 }
2787 out.write("</table>\r\n");
2788 }
2789 else{
2790 out.write("<p>Sorry! The list is empty</p>");
2791 }
2792
2793 out.write("<tr>\r\n");
2794 out.write("<p><a href=\" Overall.html \">Summary</a>");
2795 out.write("&raquo;");
2796
2797 if(metadataSetName.equalsIgnoreCase("dublin")){
2798 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet1+")</a>");
2799 }
2800 else if (metadataSetName.equalsIgnoreCase("extracted")){
2801 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet2+")</a>");
2802 }
2803 else{
2804 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSetName+")</a>");
2805 }
2806
2807 out.write("&raquo;");
2808 out.write("<a href=\""+title+".html\">"+title+"</a></p>");
2809 out.write("</body></html>\r\n");
2810 out.close();
2811
2812 fstream.close();
2813 }catch(Exception e){
2814 e.printStackTrace();
2815 }
2816 return new HashMap();
2817 }
2818
2819 private void createIncompletedListNoIdentifier(String fileName, String metadataSetName, String collectionFolder){
2820
2821 HashMap hp = dm.getIdentifierLinkNoIdentifier();
2822 String[] ids = dm.getDocumentIDList(fileName);
2823
2824 Set st = hp.keySet();
2825 Iterator it = st.iterator();
2826
2827 while(it.hasNext()){
2828 System.out.println(it.next());
2829 }
2830 for(int i = 0; i<ids.length; i++){
2831 System.out.println("IDS:" +ids[i]);
2832 hp.remove(ids[i]);
2833 }
2834
2835 System.out.println("Length: "+ids.length+" "+fileName+ " "+hp.size());
2836
2837 try{
2838 FileWriter fstream = new FileWriter(destination+"/"+fileName+"_IncompletedList.html");
2839 BufferedWriter out = new BufferedWriter(fstream);
2840
2841 out.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">");
2842 out.write ("<html>\r\n");
2843 out.write("<head>\r\n<title>Incompleted Document List</title>\r\n");
2844 out.write("<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n");
2845 out.write("<style type=\"text/css\">\r\n");
2846 out.write("td{height:20; text-align:left;padding-left:5px;}\r\n");
2847 out.write(".tam{height:20; text-align:center}\r\n");
2848 out.write("th{height:20; text-align:center;}\r\n");
2849 out.write("body{font-family: Arial;}\r\n");
2850 out.write("</style>\r\n");
2851 out.write("</head><body>\r\n");
2852 out.write("<p><a href=\" Overall.html \">Summary</a>");
2853 out.write("&raquo;");
2854
2855 if(metadataSetName.equalsIgnoreCase("dublin")){
2856 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet1+")</a>");
2857 }
2858 else if (metadataSetName.equalsIgnoreCase("extracted")){
2859 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet2+")</a>");
2860 }
2861 else{
2862 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSetName+")</a>");
2863 }
2864
2865 out.write("&raquo;");
2866 out.write("<a href=\""+fileName+".html \">"+fileName+"</a>");
2867 out.write("<h3 align=center>"+fileName+" does not appear in the following documents</h3>");
2868 int counter = 0;
2869 Set s = hp.keySet();
2870 Iterator i = s.iterator();
2871 out.write("<table border=1 align=center><tr><th>Document ID<th>Source Link\r\n");
2872
2873 while(i.hasNext()){
2874 counter++;
2875 String keys = (String)i.next();
2876 InternalLink il = (InternalLink) hp.get(keys);
2877 ArrayList alist = il.retrieveList();
2878 String url = (String)alist.get(0);
2879 out.write("<tr><td>"+counter+"<td><a href=\""+url+"\">"+url+"</a>\r\n");
2880 }
2881 out.write("</table></body></html>\r\n");
2882 out.write("<p><a href=\" Overall.html \">Summary</a>");
2883 out.write("&raquo;");
2884
2885 if(metadataSetName.equalsIgnoreCase("dublin")){
2886 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet1+")</a>");
2887 }
2888 else if (metadataSetName.equalsIgnoreCase("extracted")){
2889 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet2+")</a>");
2890 }
2891 else{
2892 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSetName+")</a>");
2893 }
2894 out.write("&raquo;");
2895 out.write("<a href=\""+fileName+".html \">"+fileName+"</a>");
2896
2897 out.close();
2898 fstream.close();
2899 }catch(IOException ex){
2900 ex.printStackTrace();
2901 }
2902 }
2903}
Note: See TracBrowser for help on using the repository browser.