source: other-projects/trunk/greenstone3-extension/mat/Greenstone3Project/src/org/greenstone3/ms/PrintHTML.java@ 17156

Last change on this file since 17156 was 17156, checked in by cc108, 16 years ago

Adding the project Metadata Quality for Digital Libraries into the repository

File size: 119.9 KB
Line 
1package org.greenstone3.ms;
2
3//import InternalLink;
4
5import java.io.*;
6import java.text.SimpleDateFormat;
7import java.util.ArrayList;
8import java.util.Arrays;
9import java.util.Calendar;
10import java.util.Collection;
11import java.util.HashMap;
12import java.util.Iterator;
13import java.util.List;
14import java.util.Map;
15import java.util.Set;
16
17import javax.xml.parsers.DocumentBuilder;
18import javax.xml.parsers.DocumentBuilderFactory;
19
20import org.w3c.dom.*;
21
22public class PrintHTML {
23
24 final String NORMAL = "normal";
25 final String WORST = "worst";
26 final String BEST = "best";
27 final String title1 ="Hide Empty Metadata ,";
28 final String title2 ="Hide Completed Metadata ,";
29 final String title3 ="Hide Documents with Empty Metadata ,";
30 final String title4 ="Hide Documents with Completed Metadata ,";
31 final String title5 ="No Available Graph";
32 final String metadataSet1 = "Dublin Core";
33 final String metadataSet2 = "Extracted";
34 final String incompletedList="IncompletedList";
35 final String spaceLeft = "<I>&laquo;";
36 final String spaceRight = "&raquo;</I>";
37 final String space ="space";
38 final String oddChar ="odd Character";
39 HashMap url;
40 //servlet
41 //final String destination = "/research/cc108/greenstone3/web/mat/";
42 MetadataStats mds;
43 DataMaker dm;
44 Calendar cl;
45 SimpleDateFormat sdf;
46 public PrintHTML(MetadataStats ms){
47 mds = ms;
48
49 dm = new DataMaker(mds);
50 cl=Calendar.getInstance();
51
52 sdf = new SimpleDateFormat(" dd MMM yyyy 'at' HH:mm:ss z 'GMT'Z");
53 setupMetadataLink("metadataElementURL");
54 }
55
56 private ArrayList getHideEmptyMetadataDetail(ArrayList dataset, String[] id, String[] metadataName){
57
58 if(id.length==0 || metadataName.length==0){return new ArrayList();}
59 ArrayList detailList = new ArrayList();
60 ArrayList alist = new ArrayList();
61 ArrayList nameList = new ArrayList();
62
63 for(int i = 0; i< metadataName.length; i++){
64 if(!dm.IsElementEmpty(metadataName[i])){
65 alist.add(dm.getMetadataRows(metadataName[i]));
66 nameList.add(metadataName[i]);
67 }
68 }
69
70 String[] metaDataElementName = new String[nameList.size()];
71
72 for(int i = 0; i< nameList.size(); i++){
73 metaDataElementName[i] = nameList.get(i).toString();
74 }
75
76 if(id.length==0 || metaDataElementName.length==0){return new ArrayList();}
77
78 detailList.add(alist);
79 detailList.add(id);
80 detailList.add(metaDataElementName);
81
82 return detailList;
83 }
84
85 private ArrayList getHideFullMetadataDetail(ArrayList dataset, String[] id, String[] metadataName){
86
87 if(id.length==0 || metadataName.length==0){return new ArrayList();}
88 ArrayList detailList = new ArrayList();
89 ArrayList alist = new ArrayList();
90 ArrayList nameList = new ArrayList();
91
92 for(int i = 0; i< metadataName.length; i++){
93 if(!dm.IsElementFull(metadataName[i])){
94 alist.add(dm.getMetadataRows(metadataName[i]));
95 nameList.add(metadataName[i]);
96 }
97 }
98
99 String[] metaDataElementName = new String[nameList.size()];
100
101 for(int i = 0; i< nameList.size(); i++){
102 metaDataElementName[i] = nameList.get(i).toString();
103 }
104
105 if(id.length==0 || metaDataElementName.length==0){return new ArrayList();}
106
107 detailList.add(alist);
108 detailList.add(id);
109 detailList.add(metaDataElementName);
110
111 return detailList;
112 }
113
114 private ArrayList getHideEmptyDocumentDetail(ArrayList dataset, String[] id, String[] metadataName){
115
116 if(id.length==0 || metadataName.length==0){return new ArrayList();}
117 ArrayList detailList = new ArrayList();
118 ArrayList alist = dm.removeDocument(dataset, id, 0);
119 ArrayList idList = dm.getRemovedID();
120 ArrayList newIDList = new ArrayList();
121
122 for(int i = 0; i< id.length; i++){
123 if(!idList.contains(id[i])){
124 newIDList.add(id[i]);
125 }
126 }
127 id = new String[newIDList.size()];
128 for(int i = 0; i<newIDList.size(); i++){
129 id[i] = newIDList.get(i).toString();
130 }
131
132 if(id.length==0 || metadataName.length==0){return new ArrayList();}
133
134 detailList.add(alist);
135 detailList.add(id);
136 detailList.add(metadataName);
137
138 return detailList;
139 }
140
141 private ArrayList getHideFullDocumentDetail(ArrayList dataset, String[] id, String[] metadataName){
142
143 if(id.length==0 || metadataName.length==0){return new ArrayList();}
144 ArrayList detailList = new ArrayList();
145 ArrayList alist = dm.removeDocument(dataset, id ,1);
146 ArrayList idList = dm.getRemovedID();
147 ArrayList newIDList = new ArrayList();
148
149 for(int i = 0; i< id.length; i++){
150 if(!idList.contains(id[i])){
151 newIDList.add(id[i]);
152 }
153 }
154 id = new String[newIDList.size()];
155 for(int i = 0; i<newIDList.size(); i++){
156 id[i] = newIDList.get(i).toString();
157 }
158
159 if(id.length==0 || metadataName.length==0){return new ArrayList();}
160
161 detailList.add(alist);
162 detailList.add(id);
163 detailList.add(metadataName);
164
165 return detailList;
166 }
167
168 /*
169 * private ArrayList getEmptyMetadataDetail(ArrayList dataset, String[] id, String[] metadataName){
170 ArrayList detailList = new ArrayList();
171 return detailList;
172 }
173 */
174
175 public void generateAllPossibleGraph(ArrayList Namelist,String[]ids,String[] names,String condition){
176 //System.out.println("I AM HERE");
177 ArrayList dataList= Namelist;
178 String[] id = ids;
179 String[] metadataName = names;
180
181 ArrayList alist = new ArrayList();
182 ArrayList detailList = new ArrayList();
183 ArrayList new_dataList = new ArrayList();
184 String[] idList;
185 String[] metadataNameList;
186
187 //System.out.println("condition"+ condition);
188 //---------------------------------------------------------------------------------------------------------SS
189 //-----------------------------------------------------------------------SSSS
190 if(id.length==0 || metadataName.length==0){
191 generateEmptyGraph("SSSS","WORST",condition);
192 generateEmptyGraph("SSSS","BEST",condition);
193 }
194
195 else{
196 new_dataList = transformDataList(dataList,id);
197 //WriteHTML("SSSS",new_dataList,id,metadataName,NORMAL);
198 WriteHTML("SSSS",new_dataList,id,metadataName,WORST,"Show completed graph",condition);
199 WriteHTML("SSSS",new_dataList,id,metadataName,BEST,"Show completed graph",condition);
200 }
201 //-----------------------------------------------------------------------SSSH
202 //System.out.println("SSSH 1");
203 detailList = getHideFullDocumentDetail(dataList,id,metadataName);
204 //System.out.println("SSSH 2");
205 if(detailList.size()==0){
206 generateEmptyGraph("SSSH","WORST",condition);
207 generateEmptyGraph("SSSH","BEST",condition);
208 }
209 else{
210 alist = (ArrayList)detailList.get(0);
211 idList = (String[])detailList.get(1);
212 metadataNameList = (String[])detailList.get(2);
213 new_dataList = transformDataList(alist,idList);
214 //WriteHTML("SSSH",new_dataList,idList,metadataNameList,NORMAL);
215 WriteHTML("SSSH",new_dataList,idList,metadataNameList,WORST,title4,condition);
216 WriteHTML("SSSH",new_dataList,idList,metadataNameList,BEST,title4,condition);
217 }
218 //-----------------------------------------------------------------------SSHS
219 //System.out.println("SSSS");
220 detailList = getHideEmptyDocumentDetail(dataList,id,metadataName);
221 //System.out.println("SSSS");
222 if(detailList.size()==0){
223 generateEmptyGraph("SSHS","WORST",condition);
224 generateEmptyGraph("SSHS","BEST",condition);
225 generateEmptyGraph("SSHH","WORST",condition);
226 generateEmptyGraph("SSHH","BEST",condition);
227 }
228 else{
229 alist = (ArrayList)detailList.get(0);
230 idList = (String[])detailList.get(1);
231 metadataNameList = (String[])detailList.get(2);
232 new_dataList = transformDataList(alist,idList);
233 //WriteHTML("SSHS",new_dataList,idList,metadataNameList,NORMAL);
234 WriteHTML("SSHS",new_dataList,idList,metadataNameList,WORST,title3,condition);
235 WriteHTML("SSHS",new_dataList,idList,metadataNameList,BEST,title3,condition);
236 //-----------------------------------------------------------------------SSHH
237 detailList = getHideFullDocumentDetail(alist,idList,metadataNameList);
238 if(detailList.size()==0){
239 generateEmptyGraph("SSHH","WORST",condition);
240 generateEmptyGraph("SSHH","BEST",condition);
241 }
242 else{
243 alist = (ArrayList)detailList.get(0);
244 idList = (String[])detailList.get(1);
245 metadataNameList = (String[])detailList.get(2);
246 new_dataList = transformDataList(alist,idList);
247 //WriteHTML("SSHH",new_dataList,idList,metadataNameList,NORMAL);
248 WriteHTML("SSHH",new_dataList,idList,metadataNameList,WORST,title3+title4,condition);
249 WriteHTML("SSHH",new_dataList,idList,metadataNameList,BEST,title3+title4,condition);
250 }
251 }
252
253
254 //---------------------------------------------------------------------------------------------------------SH
255 ArrayList xList = new ArrayList();
256 String[] idListCopy;
257 String[] metadataNameListCopy;
258
259 //-----------------------------------------------------------------------SHSS
260 detailList = getHideFullMetadataDetail(dataList,id,metadataName);
261
262 if(detailList.size()==0){
263 generateEmptyGraph("SHSS","WORST",condition);
264 generateEmptyGraph("SHSS","BEST",condition);
265
266 generateEmptyGraph("SHSH","WORST",condition);
267 generateEmptyGraph("SHSH","BEST",condition);
268
269 generateEmptyGraph("SHHS","WORST",condition);
270 generateEmptyGraph("SHHS","BEST",condition);
271
272 generateEmptyGraph("SHHH","WORST",condition);
273 generateEmptyGraph("SHHH","BEST",condition);
274 }
275 else{
276 alist = (ArrayList)detailList.get(0);
277 idList = (String[])detailList.get(1);
278 metadataNameList = (String[])detailList.get(2);
279
280 xList = (ArrayList)alist.clone();
281 idListCopy = (String[]) idList.clone();
282 metadataNameListCopy = (String[])metadataNameList.clone();
283
284 new_dataList = transformDataList(alist,idList);
285 //WriteHTML("SHSS",new_dataList,idList,metadataNameList,NORMAL);
286 WriteHTML("SHSS",new_dataList,idList,metadataNameList,WORST,title2,condition);
287 WriteHTML("SHSS",new_dataList,idList,metadataNameList,BEST,title2,condition);
288
289 //-----------------------------------------------------------------------SHHS
290 detailList = getHideEmptyDocumentDetail((ArrayList)xList.clone(),(String[])idListCopy.clone(),(String[])metadataNameListCopy.clone());
291 //System.out.println("SHHS"+detailList.size());
292 if(detailList.size()==0){
293 generateEmptyGraph("SHHS","WORST",condition);
294 generateEmptyGraph("SHHS","BEST",condition);
295
296 generateEmptyGraph("SHHH","WORST",condition);
297 generateEmptyGraph("SHHH","BEST",condition);
298 }
299 else{
300 alist = (ArrayList)detailList.get(0);
301 idList = (String[])detailList.get(1);
302 metadataNameList = (String[])detailList.get(2);
303 //if(idList.length==0 || metadataName.length==0){}
304 new_dataList = transformDataList(alist,idList);
305 //WriteHTML("SHHS",new_dataList,idList,metadataNameList,NORMAL);
306 WriteHTML("SHHS",new_dataList,idList,metadataNameList,WORST,title2+title3,condition);
307 WriteHTML("SHHS",new_dataList,idList,metadataNameList,BEST,title2+title3,condition);
308 //step
309 //-----------------------------------------------------------------------SHHH
310 detailList = getHideFullDocumentDetail(alist,idList,metadataNameList);
311 if(detailList.size()==0){
312 generateEmptyGraph("SHHH","WORST",condition);
313 generateEmptyGraph("SHHH","BEST",condition);
314 }
315 else{
316 alist = (ArrayList)detailList.get(0);
317 idList = (String[])detailList.get(1);
318 metadataNameList = (String[])detailList.get(2);
319
320 new_dataList = transformDataList(alist,idList);
321 //WriteHTML("SHHH",new_dataList,idList,metadataNameList,NORMAL);
322 WriteHTML("SHHH",new_dataList,idList,metadataNameList,WORST,title2+title3+title4,condition);
323 WriteHTML("SHHH",new_dataList,idList,metadataNameList,BEST,title2+title3+title4,condition);
324 }
325 }
326
327 //-----------------------------------------------------------------------SHSH
328 detailList = getHideFullDocumentDetail((ArrayList)xList.clone(),(String[])idListCopy.clone(),(String[])metadataNameListCopy.clone());
329 if(detailList.size()==0){
330 generateEmptyGraph("SHSH","WORST",condition);
331 generateEmptyGraph("SHSH","BEST",condition);
332 }
333 else{
334 alist = (ArrayList)detailList.get(0);
335 idList = (String[])detailList.get(1);
336 metadataNameList = (String[])detailList.get(2);
337
338 new_dataList = transformDataList(alist,idList);
339 //WriteHTML("SHSH",new_dataList,idList,metadataNameList,NORMAL);
340 WriteHTML("SHSH",new_dataList,idList,metadataNameList,WORST,title2+title4,condition);
341 WriteHTML("SHSH",new_dataList,idList,metadataNameList,BEST,title2+title4,condition);
342 }
343 }
344
345 //---------------------------------------------------------------------------------------------------------HS
346 ArrayList xList1 = new ArrayList();
347 String[] idListCopy1;
348 String[] metadataNameListCopy1;
349
350 //-----------------------------------------------------------------------HSSS
351 detailList = getHideEmptyMetadataDetail(dataList,id,metadataName);
352
353 if(detailList.size()==0){
354 generateEmptyGraph("HSSS","WORST",condition);
355 generateEmptyGraph("HSSS","BEST",condition);
356
357 generateEmptyGraph("HSHS","WORST",condition);
358 generateEmptyGraph("HSHS","BEST",condition);
359
360 generateEmptyGraph("HSHH","WORST",condition);
361 generateEmptyGraph("HSHH","BEST",condition);
362
363 generateEmptyGraph("HSSH","WORST",condition);
364 generateEmptyGraph("HSSH","BEST",condition);
365 }
366 else{
367 alist = (ArrayList)detailList.get(0);
368 idList = (String[])detailList.get(1);
369 metadataNameList = (String[])detailList.get(2);
370
371 xList1 = (ArrayList)alist.clone();
372 idListCopy1 = (String[]) idList.clone();
373 metadataNameListCopy1 = (String[])metadataNameList.clone();
374
375 new_dataList = transformDataList(alist,idList);
376 //WriteHTML("HSSS",new_dataList,idList,metadataNameList,NORMAL);
377 WriteHTML("HSSS",new_dataList,idList,metadataNameList,WORST,title1,condition);
378 WriteHTML("HSSS",new_dataList,idList,metadataNameList,BEST,title1,condition);
379
380 //-----------------------------------------------------------------------HSHS
381 detailList = getHideEmptyDocumentDetail((ArrayList)xList1.clone(),(String[])idListCopy1.clone(),(String[])metadataNameListCopy1.clone());
382 if(detailList.size()==0){
383 generateEmptyGraph("HSHS","WORST",condition);
384 generateEmptyGraph("HSHS","BEST",condition);
385
386 generateEmptyGraph("HSHH","WORST",condition);
387 generateEmptyGraph("HSHH","BEST",condition);
388 }
389 else{
390 alist = (ArrayList)detailList.get(0);
391 idList = (String[])detailList.get(1);
392 metadataNameList = (String[])detailList.get(2);
393
394 new_dataList = transformDataList(alist,idList);
395 //WriteHTML("HSHS",new_dataList,idList,metadataNameList,NORMAL);
396 WriteHTML("HSHS",new_dataList,idList,metadataNameList,WORST,title1+title3,condition);
397 WriteHTML("HSHS",new_dataList,idList,metadataNameList,BEST,title1+title3,condition);
398
399 //-----------------------------------------------------------------------HSHH
400 detailList = getHideFullDocumentDetail(alist,idList,metadataNameList);
401 if(detailList.size()==0){
402 generateEmptyGraph("HSHH","WORST",condition);
403 generateEmptyGraph("HSHH","BEST",condition);
404 }
405 else{
406 alist = (ArrayList)detailList.get(0);
407 idList = (String[])detailList.get(1);
408 metadataNameList = (String[])detailList.get(2);
409
410 new_dataList = transformDataList(alist,idList);
411 //WriteHTML("HSHH",new_dataList,idList,metadataNameList,NORMAL);
412 WriteHTML("HSHH",new_dataList,idList,metadataNameList,WORST,title1+title3+title4,condition);
413 WriteHTML("HSHH",new_dataList,idList,metadataNameList,BEST,title1+title3+title4,condition);
414 }
415 }
416
417 //-----------------------------------------------------------------------HSSH
418 detailList = getHideFullDocumentDetail((ArrayList)xList1.clone(),(String[])idListCopy1.clone(),(String[])metadataNameListCopy1.clone());
419 if(detailList.size()==0){
420 generateEmptyGraph("HSSH","WORST",condition);
421 generateEmptyGraph("HSHS","BEST",condition);
422 }
423 else{
424 alist = (ArrayList)detailList.get(0);
425 idList = (String[])detailList.get(1);
426 metadataNameList = (String[])detailList.get(2);
427
428 new_dataList = transformDataList(alist,idList);
429 //WriteHTML("HSSH",new_dataList,idList,metadataNameList,NORMAL);
430 WriteHTML("HSSH",new_dataList,idList,metadataNameList,WORST,title1+title4,condition);
431 WriteHTML("HSSH",new_dataList,idList,metadataNameList,BEST,title1+title4,condition);
432 }
433 }
434
435 //---------------------------------------------------------------------------------------------------------HH
436 ArrayList xList2 = new ArrayList();
437 String[] idListCopy2;
438 String[] metadataNameListCopy2;
439
440 //-----------------------------------------------------------------------HHSS
441 detailList = getHideEmptyMetadataDetail(dataList,id,metadataName);
442 if(detailList.size()==0){
443 generateEmptyGraph("HHSS","WORST",condition);
444 generateEmptyGraph("HHSS","BEST",condition);
445
446 generateEmptyGraph("HHHS","WORST",condition);
447 generateEmptyGraph("HHHS","BEST",condition);
448
449 generateEmptyGraph("HHHH","WORST",condition);
450 generateEmptyGraph("HHHH","BEST",condition);
451
452 generateEmptyGraph("HHSH","WORST",condition);
453 generateEmptyGraph("HHSH","BEST",condition);
454 }
455 else{
456 alist = (ArrayList)detailList.get(0);
457 idList = (String[])detailList.get(1);
458 metadataNameList = (String[])detailList.get(2);
459
460 detailList = getHideFullMetadataDetail(alist,idList,metadataNameList);
461 //System.out.println(detailList.size());
462 if(detailList.size()==0){
463 generateEmptyGraph("HHSS","WORST",condition);
464 generateEmptyGraph("HHSS","BEST",condition);
465
466 generateEmptyGraph("HHHS","WORST",condition);
467 generateEmptyGraph("HHHS","BEST",condition);
468
469 generateEmptyGraph("HHHH","WORST",condition);
470 generateEmptyGraph("HHHH","BEST",condition);
471
472 generateEmptyGraph("HHSH","WORST",condition);
473 generateEmptyGraph("HHSH","BEST",condition);
474 }
475 else{
476 //System.out.println("bad"+detailList.size());
477 alist = (ArrayList)detailList.get(0);
478 //System.out.println("bad");
479 idList = (String[])detailList.get(1);
480 //System.out.println("bad");
481 metadataNameList = (String[])detailList.get(2);
482 //System.out.println("bad");
483
484 xList2 = (ArrayList)alist.clone();
485 idListCopy2 = (String[]) idList.clone();
486 metadataNameListCopy2 = (String[])metadataNameList.clone();
487
488 new_dataList = transformDataList(alist,idList);
489 //WriteHTML("HHSS",new_dataList,idList,metadataNameList,NORMAL);
490 WriteHTML("HHSS",new_dataList,idList,metadataNameList,WORST,title1+title2,condition);
491 WriteHTML("HHSS",new_dataList,idList,metadataNameList,BEST,title1+title2,condition);
492
493
494 //---------------------------------------------------------------------- HHSH
495 detailList = getHideFullDocumentDetail((ArrayList)xList2.clone(),(String[])idListCopy2.clone(),(String[])metadataNameListCopy2.clone());
496 if(detailList.size()==0){
497 generateEmptyGraph("HHSH","WORST",condition);
498 generateEmptyGraph("HHSH","BEST",condition);
499 }
500 else{
501 alist = (ArrayList)detailList.get(0);
502 idList = (String[])detailList.get(1);
503 metadataNameList = (String[])detailList.get(2);
504
505 new_dataList = transformDataList(alist,idList);
506 //WriteHTML("HHSH",new_dataList,idList,metadataNameList,NORMAL);
507 WriteHTML("HHSH",new_dataList,idList,metadataNameList,WORST,title1+title2+title4,condition);
508 WriteHTML("HHSH",new_dataList,idList,metadataNameList,BEST,title1+title2+title4,condition);
509 }
510 //----------------------------------------------------------------------HHHS
511 detailList = getHideEmptyDocumentDetail((ArrayList)xList2.clone(),(String[])idListCopy2.clone(),(String[])metadataNameListCopy2.clone());
512 if(detailList.size()==0){
513 generateEmptyGraph("HHHS","WORST",condition);
514 generateEmptyGraph("HHHS","BEST",condition);
515
516 generateEmptyGraph("HHHH","WORST",condition);
517 generateEmptyGraph("HHHH","BEST",condition);
518 }
519 else{
520 alist = (ArrayList)detailList.get(0);
521 idList = (String[])detailList.get(1);
522 metadataNameList = (String[])detailList.get(2);
523
524 new_dataList = transformDataList(alist,idList);
525 //WriteHTML("HHHS",new_dataList,idList,metadataNameList,NORMAL);
526 WriteHTML("HHHS",new_dataList,idList,metadataNameList,WORST,title1+title2+title3,condition);
527 WriteHTML("HHHS",new_dataList,idList,metadataNameList,BEST,title1+title2+title3,condition);
528
529 //----------------------------------------------------------------------HHHH
530 detailList = getHideFullDocumentDetail((ArrayList)alist.clone(),(String[])idList.clone(),(String[])metadataNameList.clone());
531 if(detailList.size()==0){
532 generateEmptyGraph("HHHH","WORST",condition);
533 generateEmptyGraph("HHHH","BEST",condition);
534 }
535 else{
536 alist = (ArrayList)detailList.get(0);
537 idList = (String[])detailList.get(1);
538 metadataNameList = (String[])detailList.get(2);
539
540 new_dataList = transformDataList(alist,idList);
541 //WriteHTML("HHHH",new_dataList,idList,metadataNameList,NORMAL);
542 WriteHTML("HHHH",new_dataList,idList,metadataNameList,WORST,title1+title2+title3+title4,condition);
543 WriteHTML("HHHH",new_dataList,idList,metadataNameList,BEST,title1+title2+title3+title4,condition);
544 }
545 }
546 }
547 }
548 }
549
550 private ArrayList transformDataList(ArrayList list, String[] ids){
551
552 //System.out.println("warning2");
553 ArrayList wholeList = new ArrayList();
554
555 for(int i = 0; i< ids.length; i++){
556 ArrayList idList = new ArrayList();
557 for(int j = 0; j<list.size();j++){
558 int[] datarows = (int[]) list.get(j);
559 idList.add(new Integer (datarows[i]));
560 }
561 wholeList.add(idList);
562 }
563
564 for(int i = 0; i< wholeList.size(); i++){
565 ArrayList alist = (ArrayList)wholeList.get(i);
566 int[] rows = new int[alist.size()];
567 for(int j = 0; j< alist.size(); j++){
568 rows[j] = ((Integer)alist.get(j)).intValue();
569 }
570 wholeList.remove(i);
571 wholeList.add(i,rows);
572 }
573 //System.out.println("warning3");
574 return wholeList;
575
576 }
577
578 private void generateEmptyGraph(String fileName,String condition,String suffix){
579 String cases = condition;
580 String collectionFolder = mds.getCollectionName();
581
582 String suf = suffix;
583 if(!suffix.equalsIgnoreCase("dublin")){
584 suf = "other";
585 }
586
587 //System.out.println(fileName+cases+" err");
588 try{
589 //FileWriter fstream = new FileWriter(destination+collectionFolder+"/"+suffix+"_"+fileName+"_"+cases+".html");
590 FileWriter fstream = new FileWriter(collectionFolder+"/"+suf+"_"+fileName+"_"+cases+".html");
591 BufferedWriter out = new BufferedWriter(fstream);
592 out.write("<!-- This comment keeps IE6/7 in the reliable quirks mode -->\r\n");
593 out.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\r\n");
594 out.write("<html>\r\n");
595 out.write("<head>\r\n<title> No Available Chart</title>\r\n");
596
597 out.write("<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n");
598 //out.write("<link rel=\"stylesheet\" href=\"../script/doby2.css\" type=\"text/css\"/>\r\n");
599
600 //out.write("<script type=\"text/javascript\" src=\"../script/getInfomation.js\"></script>\r\n");
601 out.write("</head>\r\n");
602 out.write("<p><a href=\" Overall.html \">Summary</a></p>");
603 out.write("<body>\r\n");
604 out.write("<p>No data available to render chart.</p>\r\n");
605 out.write("<p>Reason: Document number is zero or Metadata element number is zero </p>\r\n");
606 out.close();
607 }catch(Exception e){}
608
609 }
610 private void WriteHTML(String fileName, ArrayList dataset, String[] ids, String[] metadataName, String condition, String title, String suffix){
611
612 int blueDot = 0;
613 String cases = condition;
614
615 String suf = suffix;
616 if(!suffix.equalsIgnoreCase("dublin")){
617 suf = "other";
618 }
619
620 try{
621
622 String str = sdf.format(cl.getTime());
623 DataMaker dmx = new DataMaker(mds);
624 String collectionFolder = mds.getCollectionName();
625 int cols = (metadataName.length +2 )*100;
626 FileWriter fstream = new FileWriter(collectionFolder+"/"+suf+"_"+fileName+"_"+cases+".html");
627 //FileWriter fstream = new FileWriter(destination+collectionFolder+"/"+suffix+"_"+fileName+"_"+cases+".html");
628
629 BufferedWriter out = new BufferedWriter(fstream);
630 out.write("<!-- This comment keeps IE6/7 in the reliable quirks mode -->\r\n");
631 out.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\r\n");
632 out.write("<html>\r\n");
633 out.write("<head>\r\n<title>"+title+"</title>\r\n");
634
635 out.write("<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n");
636 out.write("<link rel=\"stylesheet\" href=\"../script/doby2.css\" type=\"text/css\"/>\r\n");
637
638 out.write("<script type=\"text/javascript\" src=\"../script/getInfomation.js\"></script>\r\n");
639 out.write("<script type=\"text/javascript\" src=\"http://yui.yahooapis.com/2.4.1/build/yahoo/yahoo-min.js\"></script>\r\n");
640 out.write("<script type=\"text/javascript\" src=\"http://yui.yahooapis.com/2.4.1/build/event/event-min.js\"></script>\r\n");
641 out.write("<script type=\"text/javascript\" src=\"http://yui.yahooapis.com/2.4.1/build/connection/connection-min.js\"></script>\r\n");
642 //out.write("<script type=\"text/javascript\" src=\"../script/getInfomation.js\"></script>\r\n");
643 // <script src="http://yui.yahooapis.com/2.4.1/build/yahoo/yahoo-min.js"></script>
644 //<script src="http://yui.yahooapis.com/2.4.1/build/event/event-min.js"></script>
645 // <script src="http://yui.yahooapis.com/2.4.1/build/connection/connection-min.js"></script>
646
647 out.write("</head>\r\n");
648 out.write ("<p><a href=\" Overall.html \">Summary</a></p>");
649 out.write("<body id=\""+mds.getCollectionName()+"\" onLoad=\"reconfig()\">\r\n");
650
651 out.write("<div id=\"container\">\r\n");
652 out.write("<div class=\"tableContainer\">\r\n");
653
654 out.write ("<table cellspacing=\"0\">\r\n");
655
656
657 out.write ("<thead>\r\n");
658 out.write ("<tr>\r\n");
659 out.write("<td class=\"qh\">Info\r\n");
660 out.write("<td class=\"qh\">URL\r\n");
661 //System.out.println("step1");
662 for(int a = 0; a< metadataName.length; a++){
663 out.write("<td>"+ metadataName[a]+"\r\n");
664 }
665 //System.out.println("step2");
666 out.write ("</thead>\r\n<tfoot>\r\n<tr>\r\n");
667
668 out.write("<td class=\"qh\">&nbsp;\r\n");
669 out.write("<td class=\"qh\">&nbsp;\r\n");
670 for(int a = 0; a< metadataName.length; a++){
671 //System.out.println(a+metadataName[a]);
672 out.write("<td>"+ dm.Mean(metadataName[a])+"%\r\n");
673 }
674 //System.out.println("step3");
675 out.write ("</tfoot>\r\n<tbody>\r\n");
676
677 if(cases.equals("normal")){
678 for(int i = ids.length; i>0; i--){
679 out.write ("<tr>");
680 int[] datarows = (int[])dataset.get(i-1);
681
682 for(int j = 0; j<datarows.length; j++){
683
684 if(datarows[j]==1){
685 out.write ("<td class=\"b\">");
686 blueDot++;
687 }
688 else {
689 out.write ("<td>&nbsp;");
690 }
691 }
692 }
693 }
694 else{
695 ArrayList idList = new ArrayList();
696 for(int i = 0; i<ids.length;i++){
697 idList.add(ids[i]);
698 }
699
700
701 HashMap hp = new HashMap();
702
703 for(int i = ids.length; i>0; i--){
704 String idValue = ids[i-1];
705 int dots = 0;
706 int[] datarows = (int[])dataset.get(i-1);
707 for(int j = 0; j<datarows.length; j++){
708 if(datarows[j]==1){
709 dots++;
710 }
711 }
712 hp.put(idValue,new Integer(dots));
713
714 }
715 ArrayList alist = dmx.sortMap(hp);
716 String[] idIntValue = new String[alist.size()];
717 for(int i = 0; i< alist.size(); i++){
718 Map.Entry entry = (Map.Entry) alist.get(i);
719 String idElement = ((String) entry.getKey());
720 idIntValue[i] = idElement;
721 }
722
723 if(cases.equals("worst")){
724 //System.out.println("step4");
725 for(int i = 0; i<idIntValue.length; i++){
726 int value = idList.indexOf(idIntValue[i]);
727 int[] datarows = (int[])dataset.get(value);
728 //System.out.println("step5");
729 out.write ("<tr id=\""+idIntValue[i].substring(4)+"\">\r\n");
730 out.write("<td class=\"E\" onclick=\"GD(this)\">&nbsp;\r\n");
731 //out.write("<td class=\"qh\" onclick=\"GX(this)\">URL\r\n");
732 out.write("<td class=\"qh\" onclick=\"GX(this)\">open\r\n");
733 for(int j = 0; j<datarows.length; j++){
734 //System.out.println("step6");
735 if(datarows[j]==1){
736 out.write ("<td class=\"b\">\r\n");
737 blueDot++;
738 }
739 else {
740
741 out.write ("<td class=\"w\">\r\n");
742 }
743 }
744 //System.out.println("step7");
745 //out.write ("</tr>\r\n");
746 }
747 }
748
749 if(cases.equals("best")){
750 for(int i = idIntValue.length; i>0; i--){
751 int value = idList.indexOf(idIntValue[i-1]);
752 int[] datarows = (int[])dataset.get(value);
753
754 out.write ("<tr id=\""+idIntValue[i-1].substring(4)+"\">\r\n");
755 out.write("<td class=\"E\" onclick=\"GD(this)\">&nbsp;\r\n");
756 //out.write("<td class=\"qh\" onclick=\"GX(this)\">URL\r\n");
757 out.write("<td class=\"qh\" onclick=\"GX(this)\">open\r\n");
758 for(int j = 0; j<datarows.length; j++){
759
760 if(datarows[j]==1){
761 out.write ("<td class=\"b\">\r\n");
762 blueDot++;
763 }
764 else {
765 out.write ("<td class=\"w\">\r\n");
766 }
767 }
768 //out.write("<td class=\"E\" onmousemove=\"GD(this)\">&nbsp;\r\n");
769 }
770 }
771 }
772 out.write ("</tbody>\r\n");
773
774 out.write ("</table>\r\n</div></div>\r\n");
775
776 int t1 = ids.length;
777 HashMap hpd = mds.getMetadataSetMap();
778 int t2 = hpd.size();
779 int t3 = metadataName.length;
780
781 int t4 = t1*t3;
782 out.write("<table>\r\n<tbody class=\"table1\">");
783 out.write("<tr>\r\n");
784 out.write("<td class=\"bfont\">This subset shows "+t1+" out of "+dm.getDocNum()+" documents");
785 out.write("<td class=\"bfont\">"+blueDot+" out of "+(dm.getDocNum()*t3)+" metadata items are defined");
786 out.write("<tr>\r\n");
787 out.write("<td class=\"bfont\">This subset shows "+t3+" out of "+mds.metadataNameList.size()+" metadata elements");
788 out.write("<td class=\"bfont\">Subset completeness: "+dm.round((double)(blueDot*100/t4),5)+"%");
789 //out.write ("<p align=center> <a href=\" Overall.html \">Overall Statistics</a></p>");
790 out.write("</table>\r\n");
791 out.write ("<p> <a href=\"Overall.html \">Summary</a></p>");
792 out.write ("</body></html>");
793 //Close the output stream
794 out.close();
795 }catch (Exception e){//Catch exception if any
796
797 System.err.println("Error: " + e.getMessage());
798 }
799 }
800
801
802 public void generateOverallStatisticsPage(HashMap MetadataSetMap){
803 //System.out.println("overall start");
804 String fileName = "Overall";
805
806 ArrayList wholeList = new ArrayList();
807 HashMap hp = mds.getMetadataSetMap();
808 Collection c = hp.values();
809 Iterator i = c.iterator();
810 int counter = 0;
811 String[][] MetadataData = new String[c.size()][2];
812 while(i.hasNext()){
813 MetadataSet mds = (MetadataSet)i.next();
814 wholeList.add(mds);
815 ArrayList newMDS = new ArrayList();
816 newMDS.add(mds);
817
818 MetadataData[counter][0] = mds.getName();
819 MetadataData[counter][1] = dm.getSingleMetadataSetCompleteness(newMDS)+"%";
820 //System.out.println(MetadataData[counter][0]+MetadataData[counter][1]);
821 counter++;
822 }
823
824 try{
825 //printWriter.write("<!-- generating all possible page-->");
826 //printWriter.flush();
827
828 String str = sdf.format(cl.getTime());
829 String collectionFolder = mds.getCollectionName();
830
831 //FileWriter fstream = new FileWriter(collectionFolder+"/"+fileName+".html");
832 FileWriter fstream = new FileWriter("/home/cc108/MRWks1/describeMessenger/"+collectionFolder+"/"+fileName+".html");
833 BufferedWriter out = new BufferedWriter(fstream);
834
835 out.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">");
836 out.write("<html>\r\n");
837 out.write("<head><title>Summary</title>\r\n");
838 out.write("<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n");
839 //out.write("<link rel=\"stylesheet\" href=\"../script/doby2.css\" type=\"text/css\"/>\r\n");
840 out.write("<style type=\"text/css\">\r\n");
841 out.write(".tam{height:20; text-align:right}\r\n");
842 out.write("th{height:20; text-align:left}\r\n");
843 out.write("body{font-family: Arial;}\r\n");
844 out.write("</style>\r\n");
845 out.write("<script type=\"text/javascript\" src=\"../script/status3.js\"></script>\r\n");
846 out.write("</head>\r\n");
847
848 out.write("<body>\r\n");
849 out.write ("<td> <div style=\"float:left;\"> <a href=\"http://www.nzdl.org/greenstone3/mat\">Mat Home</a></div>");
850 out.write("<div style=\"float:right;\"><a href=\"http://chnm.gmu.edu/tools/surveys/4386/\">Please send feedback about the Mat tool</a></div></td><br>");
851 // out.write("<p align=\"right\"><a href=\"http://chnm.gmu.edu/tools/surveys/4386/\">Short feedback survey about the Mat tool</a></p>");
852 //out.write("<p> <a href=\"http://www.nzdl.org/greenstone3/mat\">Mat Home</a></p>");
853 out.write("<h3 align=\"center\">Summary</h3>\r\n");
854
855 out.write("<table border=1 align=\"center\">\r\n");
856 out.write("<colgroup width=\"300\" span=\"2\">\r\n");
857
858 /*
859 out.write("<tr>\r\n");
860 out.write("<th>Collection: </th>\r\n");
861 out.write("<td class=\"tam\">"+mds.getCollectionName()+"\r\n");
862 out.write("</tr>\r\n");
863 */
864
865 out.write("<tr>\r\n");
866 out.write("<th>OAI URL: </th>\r\n");
867 if(mds.getOAIURL().length()>50){
868 out.write("<td class=\"tam\"><font size=\"2px\"><a href=\""+mds.getOAIURL()+"\">"+mds.getOAIURL().substring(0,47)+"....</a></font>\r\n");
869 }
870 else{
871 out.write("<td class=\"tam\"><font size=\"2px\"><a href=\""+mds.getOAIURL()+"\">"+mds.getOAIURL()+"</a></font>\r\n");
872 }
873 out.write("</tr>\r\n");
874
875 /*
876 out.write("<tr>\r\n");
877 out.write("<th>Metadata Prefix:\r\n");
878 out.write("<td class=\"tam\">"+mds.getOaiPrefix()+"\r\n");
879 out.write("</tr>\r\n");
880 */
881
882 out.write("<tr>\r\n");
883 out.write("<th>Number of Records:\r\n");
884 out.write("<td class=\"tam\">"+dm.getDocNum()+"\r\n");
885 out.write("</tr>\r\n");
886 /*
887 out.write("<tr>\r\n");
888 out.write("<th>Number of Metadata:\r\n");
889 out.write("<td class=\"tam\">"+mds.getMetadataSetMap().size()+"\r\n");
890 out.write("</tr>\r\n");
891
892 out.write("<tr>\r\n");
893 out.write("<th>Overall Metadata Completeness:\r\n");
894 out.write("<td class=\"tam\">"+dm.getSingleMetadataSetCompleteness(wholeList)+"%\r\n");
895 out.write("</tr>\r\n</table>");
896 */
897 out.write("<table border=1 align=\"center\">\r\n");
898 out.write("<colgroup width=\"300\" span=\"2\">\r\n");
899 out.write("<tr>\r\n");
900 out.write("<th>Metadata:\r\n");
901 out.write("<td class=\"tam\"><b>Completeness</b>\r\n");
902 out.write("</tr><br>\r\n");
903
904 for(int a = 0; a<MetadataData.length; a++){
905 out.write ("<tr>\r\n");
906 if(MetadataData[a][0].equalsIgnoreCase("dublin")){
907 out.write ("<th><a href =\""+MetadataData[a][0]+".html\">"+metadataSet1+"</a>");
908 }
909 else if (MetadataData[a][0].equalsIgnoreCase("extracted")){
910 out.write ("<th><a href =\""+MetadataData[a][0]+".html\">"+metadataSet2+"</a>");
911 }
912 else{
913 out.write ("<th><a href =\""+MetadataData[a][0]+".html\">"+MetadataData[a][0]+"</a>");
914 }
915 //out.write ("<th><a href =\""+MetadataData[a][0]+".html\">"+MetadataData[a][0] +"</a>");
916 out.write ("<td class=\"tam\"> "+MetadataData[a][1]);
917 out.write ("</tr>\r\n");
918 }
919 out.write ("</table>");
920 out.write("<br>\r\n");
921 out.write("<FORM name=\"test\" onsubmit=\"checkStatus()\" action=\"\">");
922 out.write("<table border=1 align=\"center\">\r\n");
923 out.write ("<colgroup width=\"606\" span=\"1\">\r\n");
924 out.write("<tr>\r\n<th>Customize Visualization");
925 out.write("<tr>\r\n<td><LABEL FOR=\"H1\"><INPUT align=\"left\" TYPE=\"checkbox\" ID=\"H1\">Hide Empty Metadata Elements</LABEL>");
926 out.write("<tr>\r\n<td><LABEL FOR=\"H2\"><INPUT align=\"left\" TYPE=\"checkbox\" ID=\"H2\">Hide Completed Metadata Elements</LABEL>");
927 out.write("<tr>\r\n<td><LABEL FOR=\"H3\"><INPUT align=\"left\" TYPE=\"checkbox\" ID=\"H3\">Hide Documents with Empty Metadata Elements</LABEL>");
928 out.write("<tr>\r\n<td><LABEL FOR=\"H4\"><INPUT align=\"left\" TYPE=\"checkbox\" ID=\"H4\">Hide Documents with Completed Metadata Elements</LABEL>");
929 out.write("<tr>\r\n<th>Metadata: ");
930
931 ///////////////////////////////////////
932
933 //out.write("<tr>\r\n<td><LABEL FOR=\"C1\"><INPUT align=\"left\" TYPE=\"radio\" name = \"col\" ID=\"C1\" checked>Dublin Core</LABEL>");
934 //out.write("<tr>\r\n<td><LABEL FOR=\"C2\"><INPUT align=\"left\" TYPE=\"radio\" name = \"col\" ID=\"C2\" checked>Extracted</LABEL>");
935 //out.write("<tr>\r\n<td><LABEL FOR=\"C3\"><INPUT align=\"left\" TYPE=\"radio\" name = \"col\" ID=\"C3\" checked >Both</LABEL>");
936
937 if(MetadataData.length==1){
938 //out.write("<tr>\r\n<td><LABEL FOR=\"C1\"><INPUT align=\"left\" TYPE=\"radio\" name = \"col\" ID=\"C1\" checked>"+MetadataData[0][0]+"</LABEL>");
939 if(MetadataData[0][0].equalsIgnoreCase("dublin")){
940 out.write("<tr>\r\n<td><LABEL FOR=\"C1\"><INPUT align=\"left\" TYPE=\"radio\" name = \"col\" ID=\"C1\" checked> Dublin Core </LABEL>");
941 }
942 else {
943 out.write("<tr>\r\n<td><LABEL FOR=\"C4\"><INPUT align=\"left\" TYPE=\"radio\" name = \"col\" ID=\"C4\" checked>"+MetadataData[0][0]+"</LABEL>");
944 }
945 }
946 else{
947 for(int a = 0; a<MetadataData.length; a++){
948 if(a==0){
949 if(MetadataData[a][0].equalsIgnoreCase("dublin")){
950 out.write("<tr>\r\n<td><LABEL FOR=\"C1\"><INPUT align=\"left\" TYPE=\"radio\" name = \"col\" ID=\"C1\" checked>Dublin Core</LABEL>");
951 }
952 else {
953 out.write("<tr>\r\n<td><LABEL FOR=\"C4\"><INPUT align=\"left\" TYPE=\"radio\" name = \"col\" ID=\"C4\" checked>"+MetadataData[a][0]+"</LABEL>");
954 }
955 }
956 else{
957 if(MetadataData[a][0].equalsIgnoreCase("dublin")){
958 out.write("<tr>\r\n<td><LABEL FOR=\"C1\"><INPUT align=\"left\" TYPE=\"radio\" name = \"col\" ID=\"C1\">"+MetadataData[a][0]+"</LABEL>");
959 }
960 else{
961 out.write("<tr>\r\n<td><LABEL FOR=\"C4\"><INPUT align=\"left\" TYPE=\"radio\" name = \"col\" ID=\"C4\">"+MetadataData[a][0]+"</LABEL>");
962 }
963
964 }
965 }
966 }
967
968 ////////////////////////////////////////////////////////////////////////////////////////////////
969 out.write("<tr>\r\n<th>Order By Completeness : ");
970 out.write("<tr>\r\n<td><LABEL FOR=\"R1\"><INPUT align=\"left\" TYPE=\"radio\" name = \"order\" ID=\"R1\">Best Case to Worst Case</LABEL>");
971 out.write("<tr>\r\n<td><LABEL FOR=\"R2\"><INPUT align=\"left\" TYPE=\"radio\" name = \"order\" ID=\"R2\" checked >Worst Case to Best Case</LABEL>");
972 out.write("</table><p align=\"center\"><INPUT TYPE=\"button\" VALUE=\"Show Visualization\" onClick=\"checkStatus()\"> </FORM>");
973 //out.write("<p align=\"right\"><a href=\"http://chnm.gmu.edu/tools/surveys/4386/\">Short feedback survey about the Mat tool</a></p>");
974 out.write("<p align=\"center\">"+str+"</p>");
975 //out.write("<p align=\"left\">Comments to <a href=\"http://www.cs.waikato.ac.nz/~daven/\">Dave Nichols</a></p>");
976 out.write("</body></html>");
977 //Close the output stream
978 out.close();
979 fstream.close();
980 }catch (Exception e){//Catch exception if any
981 //System.out.println("overall");
982 System.err.println("Error: " + e.getMessage());
983 }
984 //printWriter.write("<!-- generating all possible page-->");
985 //printWriter.flush();
986 //System.out.println("overall ends");
987
988 }
989
990 public void generateMetadataSetDetailPage(HashMap MetadataSetMap){
991
992 Collection c = MetadataSetMap.values();
993 Iterator i = c.iterator();
994 while(i.hasNext()){
995 MetadataSet mds = (MetadataSet)i.next();
996
997 }
998 }
999
1000
1001 public void WriteMetadataSetDetailHTML(MetadataSet mdset){
1002 String fileName = mdset.getName();
1003 DataMaker dmx = new DataMaker(mds);
1004 //DataMaker dmx = dm;
1005 //System.out.println("set start");
1006 try{
1007
1008 String str = sdf.format(cl.getTime());
1009 String collectionFolder = mds.getCollectionName();
1010 FileWriter fstream = new FileWriter(collectionFolder+"/"+fileName+".html");
1011 //FileWriter fstream = new FileWriter(destination+collectionFolder+"/"+fileName+".html");
1012 BufferedWriter out = new BufferedWriter(fstream);
1013 out.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">");
1014 out.write("<html>\r\n");
1015 out.write("<head>\r\n<title>Metadata Detail</title>\r\n");
1016 out.write("<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n");
1017 out.write("<style type=\"text/css\">\r\n");
1018 out.write("td{height:20;text-align:right;}\r\n");
1019 out.write("th{height:20;text-align:left;}\r\n");
1020 out.write("body{font-family: Arial;}\r\n");
1021 out.write("</style>\r\n");
1022 out.write( "</head>\r\n");
1023
1024 out.write ("<body>\r\n");
1025 out.write ("<p> <a href=\" Overall.html \">Summary</a></p>");
1026 if(fileName.equalsIgnoreCase("dublin")){
1027 //out.write ("<th><a href =\""+MetadataData[a][0]+".html\">"+metadataSet1+"</a>");
1028 out.write ("<h3 align=\"center\">Metadata Detail: "+metadataSet1+"</h3>\r\n");
1029 }
1030 else if (fileName.equalsIgnoreCase("extracted")){
1031 //out.write ("<th><a href =\""+MetadataData[a][0]+".html\">"+metadataSet2+"</a>");
1032 out.write ("<h3 align=\"center\">Metadata Detail: "+metadataSet2+"</h3>\r\n");
1033 }
1034 else{
1035 out.write ("<h3 align=\"center\">Metadata Detail: "+fileName+"</h3>\r\n");
1036 }
1037 out.write ("<table border=\"1\" align=\"center\">\r\n");
1038 out.write ("<colgroup width=\"200\" span=\"2\">\r\n");
1039
1040 out.write ("<tr>\r\n");
1041 out.write ("<th>Elements:\r\n");
1042 out.write ("<td><b>Completeness</b>\r\n");
1043 out.write ("</tr>\r\n");
1044
1045 ArrayList elementList = mdset.getIndexsList();
1046 HashMap hp = new HashMap();
1047 int num = elementList.size();
1048 for(int i = 0; i<num; i++){
1049 String elementName = (String)elementList.get(i);
1050 Double elementValue = new Double(dmx.Mean(elementName));
1051 hp.put(elementName, elementValue);
1052 }
1053
1054 elementList = new ArrayList();
1055 elementList = dmx.sortMap(hp);
1056 num = elementList.size();
1057 for(int i = 0; i<num; i++){
1058 Map.Entry entry = (Map.Entry) elementList.get(i);
1059 String elementName = (String) entry.getKey();
1060 out.write ("<tr>\r\n");
1061 out.write ("<th><a href=\""+ elementName +".html\">"+elementName+"</a>\r\n");
1062 out.write ("<td>"+ dmx.Mean(elementName)+"%\r\n");
1063 out.write ("</tr>\r\n");
1064 }
1065
1066 out.write("</table>\r\n");
1067
1068 out.write ("<p align=\"center\"> <a href=\" Overall.html \">Summary</a></p>");
1069 out.write("<p align=\"center\">"+str+"</p>");
1070
1071 //out.write ("<td> <div style=\"float:left;\"> <a href=\" Overall.html \">Overall Statistics</a></div>");
1072 //out.write("<div style=\"float:right;\">"+str+"</div></td>");
1073 //<td><div style="float:left;">test</div><div style="float:right;">test</div></td>
1074 out.write ("</body></html>\r\n");
1075 //Close the output stream
1076 out.close();
1077 }catch (Exception e){//Catch exception if any
1078 //System.out.println("set");
1079 System.err.println("Error: " + e.getMessage());
1080 }
1081 //System.out.println("set ends");
1082 }
1083
1084 public void generateMetadataElementDetailPage(MetadataSet mds){
1085
1086 ArrayList nameList = mds.getIndexsList();
1087 for(int i = 0; i<nameList.size();i++){
1088 WriteMetadataElementDetailHTML((String)nameList.get(i),mds.getName());
1089 }
1090 }
1091
1092 public void WriteMetadataElementDetailHTML(String name, String linkName){
1093
1094 String fileName = name;
1095 DataMaker dmx = new DataMaker(this.mds);
1096
1097 //System.out.println("detail start");
1098
1099 try{
1100 //String str = sdf.format(cl.getTime());
1101 String collectionFolder = mds.getCollectionName();
1102 FileWriter fstream = new FileWriter(collectionFolder+"/"+fileName+".html");
1103 //FileWriter fstream = new FileWriter(destination+collectionFolder+"/"+fileName+".html");
1104 BufferedWriter out = new BufferedWriter(fstream);
1105 //System.out.println("step0" + fileName);
1106 out.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">");
1107 out.write ("<html>\r\n");
1108 out.write("<head>\r\n<title>Metadata Element Detail</title>\r\n");
1109 out.write("<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n");
1110 out.write("<style type=\"text/css\">\r\n");
1111 out.write("td{height:20; text-align:left;}\r\n");
1112 out.write(".tam{height:20; text-align:center}\r\n");
1113 out.write("th{height:20; text-align:left;}\r\n");
1114 out.write("body{font-family: Arial;}\r\n");
1115 out.write("</style>\r\n");
1116 out.write("<script type=\"text/javascript\" src=\"http://www.nzdl.org/greenstone3/mat/script/status3.js\"></script>");
1117 out.write("</head>\r\n");
1118 out.write("<body>\r\n");
1119 out.write("<p><a href=\" Overall.html \">Summary</a>");
1120 out.write("&raquo;");
1121 //out.write("<a href=\""+linkName+".html\">Metadata Set Detail("+linkName+")</a></p>");
1122
1123
1124 ////////////////////////////////////////////////////////////////////////////////////
1125 if(linkName.equalsIgnoreCase("dublin")){
1126 out.write ("<a href=\""+linkName+".html\">Metadata Detail ("+metadataSet1+")</a></p>");
1127 //out.write ("<h1 align=\"center\">Metadata Set Detail: "+metadataSet1+"</h1>\r\n");
1128 }/*
1129 else if (linkName.equalsIgnoreCase("extracted")){
1130 out.write ("<a href=\""+linkName+".html\">Metadata Detail ("+metadataSet2+")</a></p>");
1131 //out.write ("<h1 align=\"center\">Metadata Set Detail: "+metadataSet2+"</h1>\r\n");
1132 }*/
1133 else{
1134 out.write ("<a href=\""+linkName+".html\">Metadata Detail ("+linkName+")</a></p>");
1135 }
1136 ///////////////////////////////////////////////////////////////////////////////////////////////
1137 if(name.startsWith("dc.")){
1138 int dot = name.lastIndexOf('.');
1139 dot++;
1140 String nameReplace = name.substring(dot);
1141 if(url.containsKey(nameReplace)){
1142 out.write ("<h3 align=\"center\">Metadata Element Detail:<a href=\""+ url.get(nameReplace) +"\">"+ name +"</a></h3>\r\n");
1143 }
1144 else{
1145 out.write ("<h3 align=\"center\">Metadata Element Detail: "+ name +"</a></h3>\r\n");
1146 }
1147 }
1148
1149 else{
1150 out.write ("<h3 align=\"center\">Metadata Element Detail: "+ name +"</h3>\r\n");
1151 }
1152
1153 /*
1154 * 1. retrieve all IDs from archivedir
1155 * 2. retrieve all IDs from
1156 *
1157 */
1158 /////////////////////////////////////////////////////////////
1159
1160 HashMap internalIDMap = dm.getInternalIdentifier(fileName);
1161 ///////////////////////////////////////////////////////////
1162 out.write ("<table border=\"1\" align=\"center\" width=1000>\r\n");
1163 out.write ("<colgroup width=\"500\" span=\"2\">\r\n");
1164
1165 //System.out.println("step1");
1166 out.write ("<tr>\r\n");
1167 out.write ("<th>Total Number of Documents\r\n");
1168
1169 out.write ("<td class=\"tam\">"+dm.getDocNum()+"\r\n");
1170 out.write ("</tr>\r\n");
1171
1172 //System.out.println("step2");
1173 out.write ("<tr>\r\n");
1174 out.write ("<th>Unique Values\r\n");
1175 //System.out.println(name);
1176 out.write ("<td class=\"tam\">"+dmx.getDistinctNumber(name)+"\r\n");
1177 //System.out.println(name);
1178 out.write ("</tr>\r\n");
1179
1180 //System.out.println("step3");
1181 out.write ("<tr>\r\n");
1182 out.write ("<th>Total times element used\r\n");
1183 out.write ("<td class=\"tam\">"+dmx.getFrequency(name) +"\r\n");
1184 out.write ("</tr>\r\n");
1185
1186 //System.out.println("step4");
1187 out.write ("<tr>\r\n");
1188 out.write ("<th>No. of records containing element\r\n");
1189 out.write ("<td class=\"tam\"> "+dmx.getDocumentUsedElement(name)+"\r\n");
1190 out.write ("</tr>\r\n");
1191
1192 //System.out.println("step5");
1193 out.write ("<tr>\r\n");
1194 //out.write ("<th title = \"The completeness means the arithmetic average \">Completeness\r\n");
1195 double percentage = dmx.Mean(name);
1196 if(percentage<100){
1197 out.write ("<th title = \"The completeness means the arithmetic average \"> <div style=\"float:left;\"> Completeness</div>");
1198 out.write ("<div style=\"float:right;\"><a href =\""+fileName+"_IncompletedList.html\">List</a></div></td>");
1199 createIncompletedList(fileName,linkName);
1200
1201 }
1202 else{
1203 out.write ("<th title = \"The completeness means the arithmetic average \">Completeness\r\n");
1204 }
1205 out.write ("<td class=\"tam\"> "+dmx.Mean(name) +"%\r\n");
1206 out.write ("</tr>\r\n");
1207
1208
1209
1210 //System.out.println("step6");
1211 out.write ("<tr>\r\n");
1212 //out.write ("<th title=\" Minimum number of occurrence of a value\">Smallest Number\r\n");
1213 //out.write ("<th><div style=\"float:left;\">Smallest Number</div><div style=\"float:right;\" onClick=\"alert('Minimum "+name +" usage in any record')\">What it this?</div>\r\n");
1214 out.write ("<td><b><div style=\"float:left;\">Minimum "+name +" usage in any record</div></b><div style=\"float:right;\"> <a href=\"#\" onClick=\"helpWindow('Minimum','"+name+"')\" style=\"font-size:small;\">What's this?</a></div>\r\n");
1215 //out.write ("<td><b><div style=\"float:left;\">Minimun "+name +" usage in any record</div></b><div style=\"float:right;\"> onClick=\"helpWindow('Minimun','"+name+"')\">What's this?</div>\r\n");
1216 out.write ("<td class=\"tam\"> "+dm.getMinRange(name) +"\r\n");
1217 out.write ("</tr>\r\n");
1218
1219 //System.out.println("step7");
1220 out.write ("<tr>\r\n");
1221 //out.write ("<th title=\" Maximum number of occurrence of a value\">Largest Number\r\n");
1222 //out.write ("<th><div style=\"float:left;\">Largest Number</div><div style=\"float:right;\" onClick=\"alert('Maximum "+name +" usage in any record')\">What's this?</div>\r\n");
1223 out.write ("<td><b><div style=\"float:left;\">Maximum "+name +" usage in any record</div></b><div style=\"float:right;\"> <a href=\"#\" onClick=\"helpWindow('Maximum','"+name+"')\" style=\"font-size:small;\">What's this?</a></div>\r\n");
1224 out.write ("<td class=\"tam\"> "+dm.getMaxRange(name) +"\r\n");
1225 out.write ("</tr>\r\n");
1226
1227 //System.out.println("step8");
1228 out.write ("<tr>\r\n");
1229 //out.write ("<th title=\"(Total times element used / No. of records containing element)\">Average\r\n");
1230 //out.write ("<th><div style=\"float:left;\">Average</div><div style=\"float:right;\" onClick=\"alert('Average "+name +" usage/record')\">What's this?</div>\r\n");
1231 out.write ("<td><b><div style=\"float:left;\">Average "+name +" usage/record</div></b><div style=\"float:right;\"> <a href=\"#\" onClick=\"helpWindow('Average','"+name+"')\" style=\"font-size:small;\">What's this?</a></div>\r\n");
1232 out.write ("<td class=\"tam\"> "+dm.Average(name) +"\r\n");
1233 out.write ("</tr>\r\n");
1234
1235 //System.out.println("step9");
1236 out.write ("<tr>\r\n");
1237 //out.write ("<th title=\"The most frequent occurrence value in a group of values \"> Mode\r\n");
1238 //out.write ("<th><div style=\"float:left;\">Mode</div><div style=\"float:right;\" onClick=\"alert('Mode of "+ name +" usage/record')\">What's this?</div>\r\n");
1239 out.write ("<td><b><div style=\"float:left;\">Mode of "+name +" usage/record</div></b><div style=\"float:right;\"><a href=\"#\" onClick=\"helpWindow('Mode','"+name+"')\" style=\"font-size:small;\">What's this?</a></div>\r\n");
1240 out.write ("<td class=\"tam\"> "+dm.getMode(name) +"\r\n");
1241 out.write ("</tr>\r\n");
1242
1243 //System.out.println("step10");
1244 out.write ("<tr>\r\n");
1245 //out.write ("<th>Mode Frequency\r\n");
1246 //out.write ("<th><div style=\"float:left;\">Mode Frequency</div><div style=\"float:right;\" onClick=\"alert('Coverage of the mode of "+ name +" usage/record')\">What's this?</div>\r\n");
1247 out.write ("<td><b><div style=\"float:left;\">Coverage of the mode of "+name +" usage/record</div></b><div style=\"float:right;\"><a href=\"#\" onClick=\"helpWindow('Mode Frequency','"+name+"')\" style=\"font-size:small;\">What's this?</a></div>\r\n");
1248 out.write ("<td class=\"tam\">"+dm.ModeFrequency(name) +"%\r\n");
1249 out.write ("</tr>\r\n");
1250
1251
1252
1253 out.write ("<tr>\r\n");
1254 out.write ("<td class=\"tam\"><a href =\""+fileName+"_Frequency-based.html\">"+"View Full Frequency Sorted list</a>" +
1255 " <td class=\"tam\"><a href =\""+fileName+"_ASCII.html\">"+"View Full ASCII Sorted list</a></div></td>");
1256 out.write ("</tr>\r\n");
1257
1258 HashMap suggestionMap = generateMetadataElementSortList(fileName,"ASCII",linkName);
1259
1260 if(suggestionMap.size()>0){
1261 compareElement(fileName,collectionFolder,suggestionMap,linkName);
1262 out.write("<tr><td class=\"tam\"><a href =\""+fileName+"_Suggestion.html\">View<a><td>&nbsp;");
1263 }
1264
1265 generateMetadataElementSortList(fileName,"Frequency-based",linkName);
1266
1267 out.write ("</table><br>\r\n");
1268
1269
1270 out.write ("<table border=\"1\" align=\"center\" width=1000>\r\n");
1271 out.write ("<colgroup width=\"500\" span=\"2\">\r\n");
1272 out.write ("<tr>\r\n");
1273 out.write ("<th>ASCII-Based\r\n");
1274 out.write ("<th>First Five\r\n");
1275 out.write ("</tr>\r\n");
1276
1277
1278
1279 String[] temp = dmx.getSortList(name,"ASCII");
1280
1281
1282 String[] temp2 = {"&nbsp;","&nbsp;","&nbsp;","&nbsp;","&nbsp;"};
1283 int length = 0;
1284 //int counter = 0;
1285 //counter = temp.length;
1286 if(temp.length>=5){length=5;}
1287 else if(temp.length<5){length = temp.length;}
1288
1289 for(int i =0; i<length; i++){
1290 temp2[i] = temp[i];
1291 }
1292
1293
1294
1295 int x = temp2.length;
1296 for(int a = 0; a<x; a++){
1297 out.write ("<tr>\r\n");
1298 if(!temp2[a].equals("&nbsp;")){
1299 out.write ("<th>"+(a+1)+"\r\n");
1300 }
1301 else{
1302 out.write ("<th>&nbsp;\r\n");
1303 }
1304 if(temp2[a].startsWith("http")){
1305 if(temp2[a].length()>60){
1306 out.write ("<td><a href = \""+temp2[a]+"\">"+temp2[a].substring(0, 60)+"</a>");
1307 }
1308 else {
1309 out.write ("<td><a href = \""+temp2[a]+"\">"+temp2[a]+"</a>");
1310 }
1311 }
1312 else{
1313 char singleChar = temp2[a].charAt(0);
1314 if(temp2[a].length()>60){
1315
1316 if(temp2[a].startsWith(" ") && ((int)singleChar!=65279)){
1317 temp2[a] = temp2[a].substring(1);
1318 out.write ("<td>"+spaceLeft+space+spaceRight+temp2[a].substring(0, 60)+"<a href=\""+fileName+"_ASCII.html#topFive\"> ... </a>\r\n");
1319 }
1320
1321 else if (((int)singleChar==65279)){
1322 temp2[a] = temp2[a].substring(1);
1323 out.write ("<td>"+spaceLeft+oddChar+spaceRight+temp2[a].substring(0, 60)+"<a href=\""+fileName+"_ASCII.html#topFive\"> ... </a>\r\n");
1324 }
1325 else{
1326 out.write ("<td>"+temp2[a].substring(0, 60)+"<a href=\""+fileName+"_ASCII.html#topFive\"> ... </a>\r\n");
1327 }
1328 }
1329 else {
1330 if(temp2[a].startsWith(" ") && ((int)singleChar!=65279)){
1331 temp2[a] = temp2[a].substring(1);
1332 out.write ("<td>"+spaceLeft+space+spaceRight+temp2[a]+"\r\n");
1333 }
1334 else if (((int)singleChar==65279)){
1335 temp2[a] = temp2[a].substring(1);
1336 out.write ("<td>"+spaceLeft+oddChar+spaceRight+temp2[a]+"\r\n");
1337 }
1338 else{
1339 out.write ("<td>"+temp2[a]+"\r\n");
1340 }
1341 //out.write ("<td>"+temp2[a]+"\r\n");
1342 }
1343 }
1344 out.write ("</tr>\r\n");
1345 }
1346 String[] temp3 ={"&nbsp;","&nbsp;","&nbsp;","&nbsp;","&nbsp;"};
1347 length = 0;
1348 int start = temp.length;
1349 if(temp.length>=5){length= 5;}
1350 else if(temp.length<5){length = temp.length;}
1351
1352 for(int i = length; i>0; i--){
1353 temp3[i-1] = temp[start-1];
1354 start--;
1355 }
1356 out.write ("<tr>\r\n");
1357 out.write ("<th>......\r\n");
1358 out.write ("<th>Last Five\r\n");
1359 out.write ("</tr>\r\n");
1360
1361 //counter = temp.length;
1362 start = temp.length;
1363 x = temp3.length;
1364 for(int a = 0; a<x; a++){
1365 out.write ("<tr>\r\n");
1366
1367 if(!temp3[a].equals("&nbsp;")){
1368 out.write ("<th>"+(start-length+1+a));
1369 }
1370 else{
1371 out.write ("<th>&nbsp;");
1372 }
1373 if(temp3[a].startsWith("http")){
1374 if(temp3[a].length()>60){
1375 out.write ("<td><a href = \""+temp3[a]+"\">"+temp3[a].substring(0, 60)+"</a>");
1376 }
1377 else {
1378 //out.write ("<td>"+temp2[a]+"\r\n");
1379 out.write ("<td><a href = \""+temp3[a]+"\">"+temp3[a]+"</a>");
1380 }
1381 }
1382 else{
1383 char singleChar = temp3[a].charAt(0);
1384 if(temp3[a].length()>60){
1385 //out.write ("<td>"+temp3[a].substring(0, 60)+" ... \r\n");
1386 //System.out.println("7");
1387 //out.write ("<td>"+temp3[a].substring(0, 60)+"<a href=\""+fileName+"_ASCII.html#lastFive\"> ... \r\n");
1388 //System.out.println("8");
1389 if(temp3[a].startsWith(" ") && (int)singleChar!=65279){
1390 temp3[a] = temp3[a].substring(1);
1391 out.write ("<td>"+spaceLeft+space+spaceRight+temp3[a].substring(0, 60)+"<a href=\""+fileName+"_ASCII.html#lastFive\"> ... </a>\r\n");
1392 }
1393
1394 else if((int)singleChar==65279){
1395 temp3[a] = temp3[a].substring(1);
1396 out.write ("<td>"+spaceLeft+oddChar+spaceRight+temp3[a].substring(0, 60)+"<a href=\""+fileName+"_ASCII.html#lastFive\"> ... </a>\r\n");
1397
1398 }
1399 else{
1400 out.write ("<td>"+temp3[a].substring(0, 60)+"<a href=\""+fileName+"_ASCII.html#lastFive\"> ... </a>\r\n");
1401 }
1402 }
1403 else{
1404 //out.write ("<td>"+temp3[a]+"\r\n");
1405 if(temp3[a].startsWith(" ") && (int)singleChar!=65279){
1406 temp3[a] = temp3[a].substring(1);
1407 out.write ("<td>"+spaceLeft+space+spaceRight+temp3[a]+"\r\n");
1408 }
1409 else if((int)singleChar==65279){
1410 out.write ("<td>"+spaceLeft+oddChar+spaceRight+temp3[a]+"\r\n");
1411 }
1412 else{
1413 out.write ("<td>"+temp3[a]+"\r\n");
1414 }
1415 }
1416 out.write ("</tr>\r\n");
1417 }
1418 }
1419 out.write ("</table><br>\r\n");
1420 //////////////////////////////////////////////////////////////////////////////////////////////////////
1421 //////////////////////////////////////////////////////////////////////////////////////////////////////
1422 out.write ("<table border=\"1\" align=\"center\" width=1000>\r\n");
1423 out.write ("<colgroup width=\"500\" span=\"2\">\r\n");
1424 out.write ("<tr>\r\n");
1425 out.write ("<th>Frequency-Based:\r\n");
1426 out.write ("<th>First Five (Minimum ->Maximum)\r\n");
1427 out.write ("</tr>\r\n");
1428
1429 HashMap xMap = dmx.getDistinctValueMap(name);
1430 temp = dmx.getSortList(name,"Frequency-based");
1431 temp2 = new String[] {"&nbsp;","&nbsp;","&nbsp;","&nbsp;","&nbsp;"};
1432 length = 0;
1433
1434 if(temp.length>=5){length=5;}
1435 else if(temp.length<5){length = temp.length;}
1436
1437 for(int i =0; i<length; i++){
1438 temp2[i] = temp[i];
1439 }
1440
1441 ArrayList tempList = new ArrayList();
1442 for(int i =0; i<length; i++){
1443 if(((Integer)xMap.get(temp2[i])).intValue()==1){
1444 tempList.add(temp2[i]);
1445 }
1446 }
1447 //HashMap tempMap = dmx.getLinks(tempList, name);
1448 x = temp2.length;
1449 for(int a = 0; a<x; a++){
1450 out.write ("<tr>\r\n");
1451 //System.out.println("link map length"+tempMap.size());
1452 if(!temp2[a].equals("&nbsp;")){
1453 char singleChar = temp2[a].charAt(0);
1454 out.write ("<th>"+(a+1)+". (No. of occurrences: "+((Integer)xMap.get(temp2[a])).toString()+")\r\n");
1455 if(temp2[a].startsWith("http") && (int)singleChar != 65279){
1456 if(temp2[a].length()>60){
1457 out.write ("<td><a href = \""+temp2[a]+"\">"+temp2[a].substring(0, 60)+"</a>");
1458 }
1459 else {
1460 out.write ("<td><a href = \""+temp2[a]+"\">"+temp2[a]+"</a>");
1461 }
1462 }
1463 else if((int)singleChar == 65279){
1464 if(temp2[a].length()>60){
1465 out.write ("<td>"+spaceLeft+oddChar+spaceRight+temp2[a].substring(1, 60)+"<a href=\""+fileName+"_Frequency-based.html#topFive\"> ... \r\n");
1466 }
1467 else{
1468 //out.write ("<td>"+temp2[a]+"\r\n");
1469 out.write ("<td>"+spaceLeft+oddChar+spaceRight+temp2[a]+"<a href=\""+fileName+"_Frequency-based.html#topFive\"> ... \r\n");
1470
1471 }
1472 }
1473 else{
1474 if(temp2[a].length()>60){
1475 out.write ("<td>"+temp2[a].substring(0, 60)+"<a href=\""+fileName+"_Frequency-based.html#topFive\"> ... \r\n");
1476 }
1477 else{
1478 out.write ("<td>"+temp2[a]+"\r\n");
1479 }
1480 }
1481
1482 }
1483 else{
1484 //out.write ("<th>"+(a+1)+"\r\n");
1485 out.write ("<th>&nbsp;\r\n");
1486 out.write ("<td>\r\n");
1487 }
1488
1489
1490 out.write ("</tr>\r\n");
1491 }
1492
1493 temp3 = new String[]{"&nbsp;","&nbsp;","&nbsp;","&nbsp;","&nbsp;"};
1494 length = 0;
1495 start = temp.length;
1496 if(temp.length>=5){length= 5;}
1497 else if(temp.length<5){length = temp.length;}
1498
1499 for(int i = length; i>0; i--){
1500 temp3[i-1] = temp[start-1];
1501 start--;
1502 }
1503
1504 out.write ("<tr>\r\n");
1505 out.write ("<th>......\r\n");
1506 out.write ("<th>Last Five (Maximum -> Minimum)\r\n");
1507 out.write ("</tr>\r\n");
1508
1509 x = temp3.length;
1510 start = temp.length;
1511 for(int a = 0; a<x; a++){
1512 out.write ("<tr>\r\n");
1513 if(!temp3[a].equals("&nbsp;")){
1514 out.write ("<th>"+(start-length+1+a)+". (No. of occurrences: "+((Integer)xMap.get(temp3[a])).toString()+")\r\n");
1515 char singleChar = temp3[a].charAt(0);
1516 if(temp3[a].startsWith("http") && (int)singleChar!=65279){
1517 if(temp3[a].length()>60){
1518 out.write ("<td><a href = \""+temp3[a]+"\">"+temp3[a].substring(0, 60)+"</a>");
1519 }
1520 else {
1521 //out.write ("<td>"+temp2[a]+"\r\n");
1522 out.write ("<td><a href = \""+temp3[a]+"\">"+temp3[a]+"</a>");
1523 }
1524 }
1525 else if((int)singleChar==65279){
1526 if(temp3[a].length()>60){
1527 //System.out.println("15");
1528 out.write ("<td>"+spaceLeft+oddChar+spaceRight+temp3[a].substring(1, 60)+"<a href=\""+fileName+"_Frequency-based.html#lastFive\"> ... \r\n");
1529 //System.out.println("16");
1530
1531 }
1532 else{
1533 //out.write ("<td>"+temp3[a]+"\r\n");
1534 out.write ("<td>"+spaceLeft+oddChar+spaceRight+temp3[a]+"<a href=\""+fileName+"_Frequency-based.html#topFive\"> ... \r\n");
1535 }
1536 }
1537 else{
1538 if(temp3[a].length()>60){
1539 //out.write ("<td>"+temp3[a].substring(0, 60)+" ... \r\n");
1540 //System.out.println("15");
1541 out.write ("<td>"+temp3[a].substring(0, 60)+"<a href=\""+fileName+"_Frequency-based.html#lastFive\"> ... \r\n");
1542 //System.out.println("16");
1543
1544 }
1545 else{
1546 out.write ("<td>"+temp3[a]+"\r\n");
1547 }
1548 }
1549
1550 }
1551 else{
1552 //out.write ("<th>"+(a+1)+". (No. of occurrences: 0)\r\n");
1553 //out.write ("<th>\r\n");
1554 //out.write ("<th>"+(a+1)+"\r\n");
1555 out.write ("<th>&nbsp;\r\n");
1556 out.write ("<th>\r\n");
1557 }
1558
1559
1560 //out.write ("<td>"+temp3[a]+"\r\n");
1561 out.write ("</tr>\r\n");
1562 }
1563
1564
1565 out.write ("</table>\r\n");
1566
1567 //generateMetadataElementSortList(fileName,"ASCII",linkName);
1568 /*
1569 if(suggestionMap.size()>0){
1570 compareElement(fileName,collectionFolder,suggestionMap);
1571 out.write("<p><a href =\""+fileName+"\"_Suggestion.html>Link<a>");
1572 }
1573 */
1574 //generateMetadataElementSortList(fileName,"Frequency-based",linkName);
1575 //out.write ("<p>");
1576 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
1577 out.write ("<p> <a href=\" Overall.html \">Summary</a>");
1578 out.write ("&raquo;");
1579
1580 if(linkName.equalsIgnoreCase("dublin")){
1581 out.write ("<a href=\""+linkName+".html\">Metadata Detail ("+metadataSet1+")</a>");
1582 //out.write ("<h1 align=\"center\">Metadata Set Detail: "+metadataSet1+"</h1>\r\n");
1583 }
1584 else if (linkName.equalsIgnoreCase("extracted")){
1585 out.write ("<a href=\""+linkName+".html\">Metadata Detail ("+metadataSet2+")</a>");
1586 //out.write ("<h1 align=\"center\">Metadata Set Detail: "+metadataSet2+"</h1>\r\n");
1587 }
1588 else{
1589 out.write ("<a href=\""+linkName+".html\">Metadata Detail ("+linkName+")</a>");
1590 }
1591 //out.write(str+"</p>");
1592 out.write ("</body></html>\r\n");
1593
1594 //Close the output stream
1595 out.close();
1596 }catch (Exception e){//Catch exception if any
1597 //System.out.println("detail");
1598 System.err.println("Error: " + e.getMessage());
1599 }
1600 //System.out.println("detail ends");
1601
1602 }
1603
1604 /*
1605 private void generateMetadataElementSortList(String title, String sort, String metadataSetName){
1606 //String str = sdf.format(cl.getTime());
1607 //String collectionFolder = mds.getCollectionName();
1608 String fileName = title+"_"+sort;
1609 String collectionFolder = mds.getCollectionName();
1610 DataMaker dmx = new DataMaker(this.mds);
1611
1612 //if(title.equals("dc.Identifier")){
1613 //return;
1614 //}
1615
1616 try{
1617 FileWriter fstream = new FileWriter(collectionFolder+"/"+fileName+".html");
1618 BufferedWriter out = new BufferedWriter(fstream);
1619
1620 out.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">");
1621 out.write ("<html>\r\n");
1622 out.write("<head>\r\n<title>Metadata Element Sort List</title>\r\n");
1623 out.write("<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n");
1624 out.write("<style type=\"text/css\">\r\n");
1625 out.write("td{height:20; text-align:justify;}\r\n");
1626 out.write(".tam{height:20; text-align:center}\r\n");
1627 out.write("th{height:20; text-align:left;}\r\n");
1628 out.write("body{font-family: Arial;}\r\n");
1629 out.write("</style>\r\n");
1630 out.write("</head><body>\r\n");
1631
1632 out.write("<p><a href=\" Overall.html \">Summary</a>");
1633 out.write("&raquo;");
1634 ////////////////////////////////////////////////////////////////////////////////////
1635 if(metadataSetName.equalsIgnoreCase("dublin")){
1636 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet1+")</a>");
1637 }
1638 else if (metadataSetName.equalsIgnoreCase("extracted")){
1639 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet2+")</a>");
1640 }
1641 else{
1642 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSetName+")</a>");
1643 }
1644 out.write("&raquo;");
1645 out.write("<a href=\""+title+".html\">"+title+"</a></p>");
1646
1647 SearchLink sl = new SearchLink();
1648 String[] list = dmx.getSortList(title, sort);
1649 System.out.println(list.length+" length" + title);
1650 if(list.length>=1){
1651
1652 out.write("<table border=\"1\" align=\"center\" width=\"800\">\r\n");
1653 out.write("<colgroup width=\"400\" span=\"2\">\r\n");
1654 out.write ("<h2 align=\"center\">"+ title+"</h2>\r\n");
1655 out.write ("<th class=\"tam\">"+ sort+"\r\n");
1656 out.write ("<th class=\"tam\">Element Values\r\n");
1657
1658 out.write ("<a name='topFive'>\r\n");
1659
1660 if(sort.equals("ASCII")){
1661
1662 //System.out.println("ASCII STARTS");
1663 int counter = 0;
1664
1665 for(int i = 0; i<list.length; i++){
1666 //System.out.println(i);
1667 if(list.length<=5 && i==0){
1668 out.write ("<a name='lastFive'>\r\n");
1669 }
1670 else if((list.length>5) && (list.length-5==i)){
1671 out.write ("<a name='lastFive'>\r\n");
1672 }
1673 out.write("<tr>\r\n");
1674
1675 if(list[i].length()>=200){
1676 //out.write("<td>"+(counter+1)+"<td>"+list[i].substring(0, 200)+"... \r\n");
1677 if(list[i].startsWith("http://")){
1678 //out.write("<td><a href=\""+list[i]+"\">"+list[i]+"</a>\r\n");
1679 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>\r\n");
1680 }
1681 else{
1682 ///////////////////////////////////// adding links
1683 //System.out.println("MDE: "+title+" Text: "+ list[i]+ " Collection: "+ collectionFolder);
1684 ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],"dc.Identifier", collectionFolder);
1685 //System.out.println(alist.get(0)+"?????????????????????????????????/");
1686 //////////////////////////////////////
1687
1688 if(list[i].startsWith(" ")){
1689 list[i] = list[i].substring(1);
1690 ////////////////////////////////////////////////////////////////////////////////////////////////////////
1691 if(alist.size()>=1){
1692 out.write("<td>"+(counter+1)+"<td>"+spaceLeft+space+spaceRight+list[i].substring(0, 200)+"..." +
1693 "<a href=\""+(String)alist.get(0)+"\"> Link</a>\r\n");
1694 }
1695 else{
1696 out.write("<td>"+(counter+1)+"<td>"+spaceLeft+space+spaceRight+list[i].substring(0, 200)+"... \r\n");
1697 }
1698 /////////////////////////////////////////////////////////////////////////////////////////////////////////
1699
1700 }
1701 else{
1702 ////////////////////////////////////////////////////////////////////////////////////////////////////////
1703 if(alist.size()>=1){
1704 out.write("<td>"+(counter+1)+"<td>"+list[i].substring(0, 200)+"..." +
1705 "<a href=\""+(String)alist.get(0)+"\"> Link</a>\r\n");
1706 }
1707 else{
1708 out.write("<td>"+(counter+1)+"<td>"+list[i].substring(0, 200)+"... \r\n");
1709 }
1710 /////////////////////////////////////////////////////////////////////////////////////////////////////////
1711
1712 }
1713 }
1714 }
1715 else{
1716 //out.write("<td>"+(counter+1)+"<td>"+list[i]+"\r\n");
1717 if(list[i].startsWith("http://")){
1718 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>\r\n");
1719 }
1720 else{
1721
1722 ///////////////////////////////////// adding links
1723 //System.out.println("MDE: "+title+" Text: "+ list[i]+ " Collection: "+ collectionFolder);
1724 ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],"dc.Identifier", collectionFolder);
1725 //System.out.println(alist.get(0)+"?????????????????????????????????/");
1726 //////////////////////////////////////
1727
1728 if(list[i].startsWith(" ")){
1729 list[i] = list[i].substring(1);
1730 //out.write("<td>"+(counter+1)+"<td>"+spaceLeft+space+spaceRight+list[i]+"\r\n");
1731
1732 ////////////////////////////////////////////////////////////////////////////////////////////////////////
1733 if(alist.size()>=1){
1734 out.write("<td>"+(counter+1)+"<td>"+spaceLeft+space+spaceRight+list[i]+"..." +
1735 "<a href=\""+(String)alist.get(0)+"\"> Link </a>\r\n");
1736 }
1737 else{
1738 out.write("<td>"+(counter+1)+"<td>"+spaceLeft+space+spaceRight+list[i]+"... \r\n");
1739 }
1740 /////////////////////////////////////////////////////////////////////////////////////////////////////////
1741
1742 }
1743 else{
1744 if(alist.size()>=1){
1745 out.write("<td>"+(counter+1)+"<td>"+list[i]+"<a href=\""+alist.get(0)+"\"> Link </a>\r\n");
1746 }
1747 else{
1748 out.write("<td>"+(counter+1)+"<td>"+list[i]+"\r\n");
1749 }
1750 }
1751 }
1752 }
1753 counter++;
1754 }
1755 }
1756 else{
1757 HashMap xMap = dmx.getDistinctValueMap(title);
1758 int counter = 0;
1759 //System.out.println("frequency STARTS");
1760 for(int i = 0; i<list.length; i++){
1761 if(list.length<=5 && i==0){
1762 out.write ("<a name='lastFive'>\r\n");
1763 }
1764 else if((list.length>5) && (list.length-5==i)){
1765 out.write ("<a name='lastFive'>\r\n");
1766 }
1767 out.write("<tr>\r\n");
1768 out.write("<th>"+(counter+1)+". (No. of occurrences: "+((Integer)xMap.get(list[i])).toString()+")\r\n");
1769
1770 if(list[i].length()>=200){
1771
1772 //System.out.println(i + " <- 1f -> " +list[i]);
1773 //out.write("<td>"+list[i].substring(0,200)+"\r\n");
1774 if(list[i].startsWith("http://")){
1775 out.write("<td><a href=\""+list[i]+"\">"+list[i]+"</a>\r\n");
1776 }
1777 else{
1778 if(list[i].startsWith(" ")){
1779 list[i]=list[i].substring(1);
1780 out.write("<td>"+spaceLeft+space+spaceRight+list[i].substring(0,200)+"\r\n");
1781 }
1782 else{
1783 out.write("<td>"+list[i].substring(0,200)+"\r\n");
1784 }
1785 }
1786 }
1787 else{
1788 //System.out.println(i + " <- 2f -> " +list[i]);
1789 //out.write("<td>"+list[i]+"\r\n");
1790 if(list[i].startsWith("http://")){
1791 out.write("<td><a href=\""+list[i]+"\">"+list[i]+"</a>\r\n");
1792 }
1793 else{
1794 if(list[i].startsWith(" ")){
1795 list[i]=list[i].substring(1);
1796 //out.write("<td>"+spaceLeft+space+spaceRight+list[i].substring(0,200)+"\r\n");
1797 out.write("<td>"+spaceLeft+space+spaceRight+list[i]+"\r\n");
1798 }
1799 else{
1800 out.write("<td>"+list[i]+"\r\n");
1801 }
1802 }
1803 }
1804 counter++;
1805 }
1806 }
1807 out.write("</table>\r\n");
1808 }
1809 else{
1810 out.write("<p>Sorry! The list is empty</p>");
1811 }
1812 out.write("<tr>\r\n");
1813
1814
1815 out.write("<p><a href=\" Overall.html \">Summary</a>");
1816 out.write("&raquo;");
1817 ////////////////////////////////////////////////////////////////////////////////////
1818 if(metadataSetName.equalsIgnoreCase("dublin")){
1819 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet1+")</a>");
1820 }
1821 else if (metadataSetName.equalsIgnoreCase("extracted")){
1822 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet2+")</a>");
1823 }
1824 else{
1825 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSetName+")</a>");
1826 }
1827 out.write("&raquo;");
1828 out.write("<a href=\""+title+".html\">"+title+"</a></p>");
1829 out.write("</body></html>\r\n");
1830 out.close();
1831 }catch(Exception e){
1832 e.printStackTrace();
1833 //sSystem.err.println("Error: " + e.printStackTrace());
1834 }
1835 }
1836
1837 */
1838 private void setupMetadataLink(String fileName){
1839
1840 try{
1841 DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
1842 DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
1843 Document doc = docBuilder.newDocument();
1844 doc = docBuilder.parse (new File("/research/cc108/greenstone3/web/mat/script/"+fileName+".xml"));
1845 Element rootNode = doc.getDocumentElement();
1846
1847 NodeList listOfName = rootNode.getElementsByTagName("metadataElement");
1848 url = new HashMap();
1849 for(int i = 0; i<listOfName.getLength(); i++){
1850 Node NameNode = listOfName.item(i);
1851 Element docElement = (Element)NameNode;
1852 NodeList valueList = docElement.getElementsByTagName("URL");
1853 Node urlParentNode = valueList.item(0);
1854 String urlText = urlParentNode.getChildNodes().item(0).getNodeValue();
1855
1856 valueList = docElement.getElementsByTagName("name");
1857 Node urlNameParentNode = valueList.item(0);
1858 String urlNameText = urlNameParentNode.getChildNodes().item(0).getNodeValue();
1859
1860 url.put(urlNameText, urlText);
1861 //System.out.println(urlText+" "+urlNameText);
1862
1863 }
1864
1865
1866 }catch(Exception e){
1867 //e.printStackTrace(printWriter);
1868 System.out.print(e.toString());
1869 }
1870 }
1871 private HashMap generateMetadataElementSortList(String title, String sort, String metadataSetName){
1872
1873 String fileName = title+"_"+sort;
1874 String collectionFolder = mds.getCollectionName();
1875 String IDENTIFIER = "dc.Identifier";
1876 SearchLink sl = new SearchLink();
1877 HashMap suggestionMap = new HashMap();
1878 boolean status = false;
1879 try{
1880 HashMap valueMap = sl.createValueMap(title,collectionFolder);
1881 HashMap linkMap = sl.createLinkMap("dc.Identifier",collectionFolder);
1882 HashMap internalIDMap = dm.getInternalIdentifier(title);
1883 FileWriter fstream = new FileWriter(collectionFolder+"/"+fileName+".html");
1884 BufferedWriter out = new BufferedWriter(fstream);
1885
1886 out.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">");
1887 out.write ("<html>\r\n");
1888 out.write("<head>\r\n<title>Metadata Element Sort List</title>\r\n");
1889 out.write("<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n");
1890 //out.write("<link rel=\"stylesheet\" href=\"../script/doby2.css\" type=\"text/css\"/>\r\n");
1891 out.write("<script type=\"text/javascript\" src=\"../script/getInfomation.js\"></script>\r\n");
1892 out.write("<script type=\"text/javascript\" src=\"http://yui.yahooapis.com/2.4.1/build/yahoo/yahoo-min.js\"></script>\r\n");
1893 out.write("<script type=\"text/javascript\" src=\"http://yui.yahooapis.com/2.4.1/build/event/event-min.js\"></script>\r\n");
1894 out.write("<script type=\"text/javascript\" src=\"http://yui.yahooapis.com/2.4.1/build/connection/connection-min.js\"></script>\r\n");
1895 out.write("<style type=\"text/css\">\r\n");
1896 out.write("td{height:20; text-align:left;padding-left:5px;}\r\n");
1897 out.write(".tam{height:20; text-align:center}\r\n");
1898 out.write("th{height:20; text-align:center;}\r\n");
1899 out.write("body{font-family: Arial;}\r\n");
1900 out.write("</style>\r\n");
1901 out.write("</head><body id=\""+collectionFolder+"\">\r\n");
1902
1903 out.write("<td><div style=\"float:left;\"><a href=\" Overall.html \">Summary</a>");
1904 out.write("&raquo;");
1905 ////////////////////////////////////////////////////////////////////////////////////
1906 if(metadataSetName.equalsIgnoreCase("dublin")){
1907 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet1+")</a>");
1908 }/*
1909 else if (metadataSetName.equalsIgnoreCase("extracted")){
1910 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet2+")</a>");
1911 }*/
1912 else{
1913 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSetName+")</a>");
1914 }
1915 out.write("&raquo;");
1916 out.write("<a href=\""+title+".html\">"+title+"</a></div>");
1917
1918
1919 // out.write ("<td> <div style=\"float:left;\"> <a href=\" Overall.html \">Summary</a></div>");
1920 out.write("<div style=\"float:right;\"><a href=\"http://chnm.gmu.edu/tools/surveys/4386/\">Please send feedback about the Mat tool</a></div></td><br>");
1921
1922 //kljkl
1923 String[] list = dm.getSortList(title, sort);
1924 //String Link = "Link";
1925 //int MaxLinks = 2;
1926
1927
1928 if(list.length>=1){
1929
1930
1931 if(sort.equals("ASCII")){
1932
1933 out.write("<table border=\"1\" align=\"center\" width=\"800\">\r\n");
1934 out.write("<h2 align=\"center\">"+ title+"</h2>\r\n");
1935 out.write("<th class=\"tam\">ASCII Sort\r\n");
1936 out.write("<th class=\"tam\">Element Values\r\n");
1937 out.write("<th class=\"tam\">Source Documents\r\n");
1938 out.write("<th class=\"tam\">InternalLink\r\n");
1939
1940 out.write ("<a name='topFive'>\r\n");
1941
1942 int counter = 0;
1943
1944 for(int i = 0; i<list.length; i++){
1945 if(list.length<=5 && i==0){
1946 out.write ("<a name='lastFive'>\r\n");
1947 }
1948 else if((list.length>5) && (list.length-5==i)){
1949 out.write ("<a name='lastFive'>\r\n");
1950 }
1951
1952
1953 InternalLink il= (InternalLink)internalIDMap.get(list[i]);
1954 ArrayList alist2 = il.retrieveList();
1955 String id = (String)alist2.get(0);
1956 id = id.substring(4);
1957 out.write("<tr id=\""+id+"\" >\r\n");
1958
1959 //out.write("<tr>\r\n");
1960
1961
1962 if(list[i].length()>=201){
1963 if(list[i].startsWith("http://")){
1964
1965 if(title.equals("dc.Identifier")){
1966 if(list[i].length()>=100){
1967 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"...</a>");
1968 }
1969 else{
1970 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
1971 }
1972 out.write("<td><a href=\""+list[i]+"\">Source</a>\r\n");
1973 //out.write("<td id=\""+internalIDMap.get(list[i])+"\">");
1974 }
1975 else{
1976
1977 ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],"dc.Identifier", collectionFolder,valueMap,linkMap);
1978 System.out.println("break");
1979 if(list[i].length()>=100){
1980
1981 //out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"...</a>");
1982 String url = (String)alist.get(0);
1983 if(alist.size()==1){
1984 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"...</a>");
1985 if(url.startsWith("http://")){
1986 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
1987 }else{
1988 out.write("<td>Source\r\n");
1989 }
1990 //suggestionMap.put(list[i], url);
1991 }
1992 else if(alist.size()>1){
1993 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"...</a>");
1994 //out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
1995 if(url.startsWith("http://")){
1996 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
1997 }else{
1998 out.write("<td>Source\r\n");
1999 }
2000 //suggestionMap.put(list[i], url);
2001 }
2002 else{
2003 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
2004 out.write("<td>No Source Available\r\n");
2005 }
2006 suggestionMap.put(list[i], url);
2007 //out.write("<td id=\""+internalIDMap.get(list[i])+"\">");
2008 }
2009 else{
2010 //out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
2011 String url = (String)alist.get(0);
2012 if(alist.size()==1){
2013 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
2014 //out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
2015 if(url.startsWith("http://")){
2016 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
2017 }else{
2018 out.write("<td>Source\r\n");
2019 }
2020 }
2021 else if(alist.size()>1){
2022 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
2023 //out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
2024 if(url.startsWith("http://")){
2025 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
2026 }else{
2027 out.write("<td>Source\r\n");
2028 }
2029 }
2030 else{
2031 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
2032 out.write("<td>No Source Available\r\n");
2033 }
2034 suggestionMap.put(list[i], url);
2035 }
2036 }
2037 //out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i].substring(0, 200)+"...</a>");
2038
2039
2040 }
2041 else{
2042 ///////////////////////////////////// adding links have not change the empty link form here
2043 ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],"dc.Identifier", collectionFolder,valueMap,linkMap);
2044 //////////////////////////////////////
2045
2046 if(list[i].startsWith(" ")){
2047 String elements = list[i];
2048 list[i] = list[i].substring(1);
2049
2050 //out.write("<td>"+(counter+1)+"<td>"+spaceLeft+space+spaceRight+list[i].substring(0, 200)+"\r\n");
2051
2052
2053 if(alist.size()==1){
2054 out.write("<td>"+(counter+1)+"<td>"+spaceLeft+space+spaceRight+list[i].substring(0, 200)+"...<td>"+
2055 "<a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
2056 suggestionMap.put(elements, (String)alist.get(0));
2057 }
2058 else if(alist.size()>1){
2059 out.write("<td>"+(counter+1)+"<td>"+spaceLeft+space+spaceRight+list[i].substring(0, 200)+"...<td>"+
2060 "<a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
2061 suggestionMap.put(elements, (String)alist.get(0));
2062 }
2063 else{
2064 out.write("<td>"+(counter+1)+"<td>"+spaceLeft+space+spaceRight+list[i].substring(0, 200)+"...<td>No Source Available \r\n");
2065 suggestionMap.put(elements, "No Source Available");
2066 }
2067
2068 }
2069 else{
2070 //out.write("<td>"+(counter+1)+"<td>"+list[i].substring(0, 200)+"...<td>\r\n");
2071
2072 if(alist.size()==1){
2073 out.write("<td>"+(counter+1)+"<td>"+list[i].substring(0, 200)+"...<td>" +
2074 "<a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
2075 suggestionMap.put(list[i], (String)alist.get(0));
2076 }
2077 else if(alist.size()>1){
2078 out.write("<td>"+(counter+1)+"<td>"+list[i].substring(0, 200)+"...<td>" +
2079 "<a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
2080 suggestionMap.put(list[i], (String)alist.get(0));
2081 }
2082 else{
2083 out.write("<td>"+(counter+1)+"<td>"+list[i].substring(0, 200)+"... <td>No Source Available\r\n");
2084 suggestionMap.put(list[i],"No Source Available");
2085 }
2086
2087 }
2088 }
2089
2090 }
2091 else{
2092 /*
2093 if(list[i].startsWith("http://")){
2094 //out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
2095 //out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">1</a>");
2096
2097 if(list[i].length()>=100){
2098 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"...</a>");
2099 }
2100 else{
2101 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
2102 }
2103
2104 out.write("<td><a href=\""+list[i]+"\">Source</a>\r\n");
2105 }*/
2106
2107 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
2108 if(list[i].startsWith("http://")){
2109 //out.write("<td><a href=\""+list[i]+"\">"+list[i]+"</a>\r\n");
2110 if(title.equals("dc.Identifier")){
2111 if(list[i].length()>=100){
2112 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"...</a>");
2113 //out.write("<td><a href=\""+list[i]+"\">Source</a>\r\n");
2114 }
2115 else{
2116 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
2117 }
2118 out.write("<td><a href=\""+list[i]+"\">Source</a>\r\n");
2119 }
2120 else{
2121 ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],"dc.Identifier", collectionFolder,valueMap,linkMap);
2122 if(list[i].length()>=100){
2123 //out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"...</a>\r\n");
2124
2125 if(alist.size()==1){
2126 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"...</a>");
2127 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
2128 suggestionMap.put(list[i], (String)alist.get(0));
2129 }
2130 else if(alist.size()>1){
2131 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"...</a>");
2132 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
2133 suggestionMap.put(list[i], (String)alist.get(0));
2134 }
2135 else{
2136 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
2137 out.write("<td>No Source Available\r\n");
2138 suggestionMap.put(list[i],"No Source Available");
2139 }
2140 }
2141 else{
2142 //out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>\r\n");
2143 //out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
2144
2145 if(alist.size()==1){
2146 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
2147 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
2148 suggestionMap.put(list[i], (String)alist.get(0));
2149 }
2150 else if(alist.size()>1){
2151 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
2152 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
2153 suggestionMap.put(list[i], (String)alist.get(0));
2154 }
2155 else{
2156 out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
2157 out.write("<td>No Source Available\r\n");
2158 suggestionMap.put(list[i],"No Source Available");
2159 }
2160
2161 }
2162 }
2163 //out.write("<td>"+(counter+1)+"<td><a href=\""+list[i]+"\">"+list[i].substring(0, 200)+"...</a>");
2164
2165
2166 }
2167
2168 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////
2169
2170 else{
2171 ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],"dc.Identifier", collectionFolder,valueMap,linkMap);
2172
2173 if(list[i].startsWith(" ")){
2174 String elements = list[i];
2175 list[i] = list[i].substring(1);
2176 //out.write("<td>"+(counter+1)+"<td>"+spaceLeft+space+spaceRight+list[i]+"\r\n");
2177
2178 if(alist.size()==1){
2179 out.write("<td>"+(counter+1)+"<td>"+spaceLeft+space+spaceRight+list[i]+
2180 "<td><a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
2181 suggestionMap.put(elements,(String)alist.get(0));
2182 }
2183 else if(alist.size()>1){
2184 out.write("<td>"+(counter+1)+"<td>"+spaceLeft+space+spaceRight+list[i]+
2185 "<td><a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
2186 suggestionMap.put(elements,(String)alist.get(0));
2187 }
2188 else{
2189 out.write("<td>"+(counter+1)+"<td>"+spaceLeft+space+spaceRight+list[i]+"<td>No Source Available\r\n");
2190 suggestionMap.put(elements,"No Source Available");
2191 }
2192
2193 //out.write("<td>"+(counter+1)+"<td>"+spaceLeft+space+spaceRight+list[i]+"\r\n");
2194 }
2195 else{
2196 //out.write("<td>"+(counter+1)+"<td>"+list[i]+"\r\n");
2197
2198 if(alist.size()==1){
2199 out.write("<td>"+(counter+1)+"<td>"+list[i]+"<td><a href=\""+alist.get(0)+"\">Source</a>\r\n");
2200 suggestionMap.put(list[i],(String)alist.get(0));
2201 }
2202 else if(alist.size()>1){
2203 out.write("<td>"+(counter+1)+"<td>"+list[i]+"<td><a href=\""+alist.get(0)+"\">Source</a>...\r\n");
2204 suggestionMap.put(list[i],(String)alist.get(0));
2205 }
2206 else{
2207 out.write("<td>"+(counter+1)+"<td>"+list[i]+"<td>No Source Available\r\n");
2208 suggestionMap.put(list[i],"No Source Available");
2209 }
2210
2211 //out.write("<td>"+(counter+1)+"<td>"+list[i]+"\r\n");
2212 }
2213 }
2214 }
2215 counter++;
2216 out.write("<td onclick=\"GD(this)\">View");
2217 }
2218 /////////////////////// insert
2219 if(suggestionMap.size()>1 && !title.equals("dc.Identifier")){
2220 //return suggestionMap;
2221 //compareElement(title,collectionFolder,suggestionMap);
2222 status = true;
2223 }
2224
2225 ////////////////////////
2226 }
2227 else{
2228
2229
2230 out.write("<table border=\"1\" align=\"center\" width=\"800\">\r\n");
2231 out.write ("<h2 align=\"center\">"+ title+"</h2>\r\n");
2232 out.write ("<th class=\"tam\">&nbsp;\r\n");
2233 out.write ("<th class=\"tam\">Frequency\r\n");
2234 out.write ("<th class=\"tam\">Element Values\r\n");
2235 out.write ("<th class=\"tam\">Source Documents\r\n");
2236 out.write ("<th class=\"tam\">Internal Link\r\n");
2237 out.write ("<a name='topFive'>\r\n");
2238
2239 HashMap xMap = dm.getDistinctValueMap(title);
2240 int counter = 0;
2241 for(int i = 0; i<list.length; i++){
2242 if(list.length<=5 && i==0){
2243 out.write ("<a name='lastFive'>\r\n");
2244 }
2245 else if((list.length>5) && (list.length-5==i)){
2246 out.write ("<a name='lastFive'>\r\n");
2247 }
2248
2249 InternalLink il= (InternalLink)internalIDMap.get(list[i]);
2250 ArrayList alist2 = il.retrieveList();
2251 String id = (String)alist2.get(0);
2252 id = id.substring(4);
2253 out.write("<tr id=\""+id+"\" >\r\n");
2254
2255 /*
2256 InternalLink il= (InternalLink)internalIDMap.get(list[i]);
2257 ArrayList alist2 = il.retrieveList();
2258 String id = (String)alist2.get(0);
2259 id = id.substring(4);
2260
2261 out.write("<tr id=\""+id+"\" >\r\n");
2262 */
2263 out.write("<tr>\r\n");
2264 out.write("<td>"+(counter+1)+"<th> "+((Integer)xMap.get(list[i])).toString()+"\r\n");
2265
2266 if(list[i].length()>=201){
2267
2268 if(list[i].startsWith("http://")){
2269 //out.write("<td><a href=\""+list[i]+"\">"+list[i].substring(0,200)+"...</a>");
2270 if(title.equals("dc.Identifier")){
2271 if(list[i].length()>=100){
2272 out.write("<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
2273 }
2274 else{
2275 out.write("<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
2276 }
2277
2278 out.write("<td><a href=\""+list[i]+"\">Source</a>\r\n");
2279 }
2280 else{
2281 ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],IDENTIFIER, collectionFolder,valueMap,linkMap);
2282 if(list[i].length()>=100){
2283 if(alist.size()==1){
2284 out.write("<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
2285 out.write("<td><a href=\""+alist.get(0)+"\">Source</a>\r\n");
2286 }
2287 else if (alist.size()>1){
2288 out.write("<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
2289 out.write("<td><a href=\""+alist.get(0)+"\">Source</a>...\r\n");
2290 }
2291 else{
2292 out.write("<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
2293 //out.write("<td><a href=\""+alist.get(0)+"\">Source</a>...\r\n");
2294 out.write("<td>No Source Available\r\n");
2295 }
2296
2297 }
2298 else{
2299 out.write("<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
2300 }
2301
2302 //out.write("<td><a href=\""+list[i]+"\">Source</a>\r\n");
2303 }
2304 }
2305
2306
2307
2308
2309 else{
2310 ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],IDENTIFIER, collectionFolder,valueMap,linkMap);
2311
2312 if(list[i].startsWith(" ")){
2313 list[i]=list[i].substring(1);
2314 if(alist.size()==1){
2315 out.write("<td>"+spaceLeft+space+spaceRight+list[i].substring(0,200)+"...<td>"+
2316 "<a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
2317 }
2318 else if(alist.size()>1){
2319 out.write("<td>"+spaceLeft+space+spaceRight+list[i].substring(0,200)+"...<td>"+
2320 "<a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
2321 }
2322 else{
2323 out.write("<td>"+spaceLeft+space+spaceRight+list[i].substring(0,200)+"...<td>No Source Available\r\n");
2324 }
2325 }
2326 else{
2327
2328 if(alist.size()==1){
2329 out.write("<td>"+list[i].substring(0,200)+"...<td>" +
2330 "<a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
2331 }
2332 else if(alist.size()>1){
2333 out.write("<td>"+list[i].substring(0,200)+"...<td>"+
2334 "<a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
2335 }
2336 else{
2337 out.write("<td>"+list[i].substring(0,200)+"...<td>No Source Available\r\n");
2338 }
2339 }
2340 }
2341 }
2342 else{
2343 if(list[i].startsWith("http://")){
2344 //out.write("<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
2345 //out.write("<td><a href=\""+list[i]+"\">1</a>");
2346 if(title.equals("dc.Identifier")){
2347 if(list[i].length()>=100){
2348 out.write("<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
2349 }
2350 else{
2351 out.write("<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
2352 }
2353 out.write("<td><a href=\""+list[i]+"\">Source</a>\r\n");
2354 }
2355 else{
2356 ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],"dc.Identifier", collectionFolder,valueMap,linkMap);
2357 if(list[i].length()>=100){
2358 if(alist.size()==1){
2359 out.write("<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
2360 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
2361 }
2362 else if(alist.size()>1){
2363 out.write("<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
2364 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
2365 }
2366 else{
2367 out.write("<td><a href=\""+list[i]+"\">"+list[i].substring(0,100)+"</a>");
2368 out.write("<td>No Source Available\r\n");
2369 }
2370 }
2371 else{
2372 if(alist.size()==1){
2373 out.write("<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
2374 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
2375 }
2376 else if(alist.size()>1){
2377 out.write("<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
2378 out.write("<td><a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
2379 }
2380 else{
2381 out.write("<td><a href=\""+list[i]+"\">"+list[i]+"</a>");
2382 out.write("<td>No Source Available\r\n");
2383 }
2384 }
2385
2386 }
2387
2388 }
2389 else{
2390
2391 ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],IDENTIFIER, collectionFolder,valueMap,linkMap);
2392
2393 if(list[i].startsWith(" ")){
2394 list[i]=list[i].substring(1);
2395 //out.write("<td>"+spaceLeft+space+spaceRight+list[i].substring(0,200)+"\r\n");
2396 if(alist.size()==1){
2397 out.write("<td>"+spaceLeft+space+spaceRight+list[i]+
2398 "<td><a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
2399 }
2400 else if(alist.size()>1){
2401 out.write("<td>"+spaceLeft+space+spaceRight+list[i]+
2402 "<td><a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
2403 }
2404 else{
2405 out.write("<td>"+spaceLeft+space+spaceRight+list[i]+"<td>No Source Available\r\n");
2406 }
2407 }
2408 else{
2409 if(alist.size()==1){
2410 out.write("<td>"+list[i]+"<td><a href=\""+(String)alist.get(0)+"\">Source</a>\r\n");
2411 }
2412 else if(alist.size()>1){
2413 out.write("<td>"+list[i]+"<td><a href=\""+(String)alist.get(0)+"\">Source</a>...\r\n");
2414 }
2415 else{
2416 out.write("<td>"+list[i]+"<td>No Source Available\r\n");
2417 }
2418 }
2419 }
2420 }
2421 counter++;
2422
2423 out.write("<td onclick=\"GD(this)\">View");
2424 //out.write("<td id=\""+internalIDMap.get(list[i])+"\">Click");
2425 }
2426
2427 }
2428 out.write("</table>\r\n");
2429 }
2430 else{
2431 out.write("<p>Sorry! The list is empty</p>");
2432 }
2433 out.write("<tr>\r\n");
2434
2435
2436 out.write("<p><a href=\" Overall.html \">Summary</a>");
2437 out.write("&raquo;");
2438 ////////////////////////////////////////////////////////////////////////////////////
2439 if(metadataSetName.equalsIgnoreCase("dublin")){
2440 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet1+")</a>");
2441 }
2442 else if (metadataSetName.equalsIgnoreCase("extracted")){
2443 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet2+")</a>");
2444 }
2445 else{
2446 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSetName+")</a>");
2447 }
2448 out.write("&raquo;");
2449 out.write("<a href=\""+title+".html\">"+title+"</a></p>");
2450 out.write("</body></html>\r\n");
2451 out.close();
2452 fstream.close();
2453 }catch(Exception e){
2454 System.err.println("Error: " + e.getMessage());
2455 //e.printStackTrace(outx);
2456 }
2457
2458 //printWriter.write("<!-- generating all possible page-->");
2459 //printWriter.flush();
2460 //return new HashMap();
2461 if(status){
2462 return suggestionMap;
2463 }
2464 else{
2465 return new HashMap();
2466 }
2467 }
2468
2469 private void createIncompletedList(String fileName, String metadataSetName){
2470 //System.out.println("break1");
2471 HashMap hp = dm.getIdentifierLink("dc.Identifier");
2472 String[] ids = dm.getDocumentIDList(fileName);
2473 System.out.println("Length: "+ids.length+" "+fileName+ " "+hp.size());
2474 for(int i = 0; i<ids.length; i++){
2475 hp.remove(ids[i]);
2476 }
2477 String collectionFolder = mds.getCollectionName();
2478 System.out.println(hp.size());
2479 try{
2480 FileWriter fstream = new FileWriter(collectionFolder+"/"+fileName+"_IncompletedList.html");
2481 BufferedWriter out = new BufferedWriter(fstream);
2482
2483 out.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">");
2484 out.write ("<html>\r\n");
2485 out.write("<head>\r\n<title>Incompleted Document List</title>\r\n");
2486 out.write("<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n");
2487 out.write("<style type=\"text/css\">\r\n");
2488 out.write("td{height:20; text-align:left;padding-left:5px;}\r\n");
2489 out.write(".tam{height:20; text-align:center}\r\n");
2490 out.write("th{height:20; text-align:center;}\r\n");
2491 out.write("body{font-family: Arial;}\r\n");
2492 out.write("</style>\r\n");
2493 out.write("</head><body>\r\n");
2494
2495 out.write("<p><a href=\" Overall.html \">Summary</a>");
2496 out.write("&raquo;");
2497 if(metadataSetName.equalsIgnoreCase("dublin")){
2498 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet1+")</a>");
2499 }
2500 else if (metadataSetName.equalsIgnoreCase("extracted")){
2501 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet2+")</a>");
2502 }
2503 else{
2504 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSetName+")</a>");
2505 }
2506 out.write("&raquo;");
2507 out.write("<a href=\""+fileName+".html \">"+fileName+"</a>");
2508
2509 //out.write("<h3 align=center>"+fileName+"</h3>");
2510 out.write("<h3 align=center>"+fileName+" does not appear in the following documents</h3>");
2511 int counter = 0;
2512 Set s = hp.keySet();
2513 Iterator i = s.iterator();
2514 out.write("<table border=1 align=center><tr><th>Document ID<th>Source Link\r\n");
2515 while(i.hasNext()){
2516 counter++;
2517 String keys = (String)i.next();
2518 InternalLink il = (InternalLink) hp.get(keys);
2519 ArrayList alist = il.retrieveList();
2520 String url = (String)alist.get(0);
2521 out.write("<tr><td>"+counter+"<td><a href=\""+url+"\">"+url+"</a>\r\n");
2522 }
2523 out.write("</table></body></html>\r\n");
2524
2525
2526 out.write("<p><a href=\" Overall.html \">Summary</a>");
2527 out.write("&raquo;");
2528 if(metadataSetName.equalsIgnoreCase("dublin")){
2529 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet1+")</a>");
2530 }
2531 else if (metadataSetName.equalsIgnoreCase("extracted")){
2532 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet2+")</a>");
2533 }
2534 else{
2535 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSetName+")</a>");
2536 }
2537 out.write("&raquo;");
2538 out.write("<a href=\""+fileName+".html \">"+fileName+"</a>");
2539
2540 out.close();
2541 fstream.close();
2542 }catch(IOException ex){ex.printStackTrace();}
2543
2544
2545 }
2546
2547
2548 private void generateHTML(HashMap distanceMap, String fileName,String metadataSetName){
2549
2550 String collectionFolder = mds.getCollectionName();
2551
2552 try{
2553 FileWriter fstream = new FileWriter(collectionFolder+"/"+fileName+"_Suggestion.html");
2554 BufferedWriter out = new BufferedWriter(fstream);
2555 out.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">");
2556 out.write ("<html>\r\n");
2557 out.write("<head>\r\n<title>Suggestion list</title>\r\n");
2558 out.write("<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n");
2559 out.write("<style type=\"text/css\">\r\n");
2560 out.write("td{height:20; text-align:left;padding-left:5px;}\r\n");
2561 out.write(".tam{height:20; text-align:center}\r\n");
2562 out.write("th{height:20; text-align:left;}\r\n");
2563 out.write("body{font-family: Arial;}\r\n");
2564 out.write("</style>\r\n");
2565 out.write("</head><body>\r\n");
2566 out.write("<p><a href=\" Overall.html \">Summary</a>");
2567 out.write("&raquo;");
2568 if(metadataSetName.equalsIgnoreCase("dublin")){
2569 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet1+")</a>");
2570 }
2571 else if (metadataSetName.equalsIgnoreCase("extracted")){
2572 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSet2+")</a>");
2573 }
2574 else{
2575 out.write ("<a href=\""+metadataSetName+".html\">Metadata Detail ("+metadataSetName+")</a>");
2576 }
2577 out.write("&raquo;");
2578 out.write("<a href=\""+fileName+".html \">"+fileName+"</a>");
2579
2580 out.write("<table border=1 align=center>\r\n");
2581 //out.write("<tr><td>&nbsp;<td>&nbsp;\r\n");
2582 Set s = distanceMap.keySet();
2583 Iterator i = s.iterator();
2584 while(i.hasNext()){
2585 String keyword = (String)i.next();
2586 InternalLink il = (InternalLink)distanceMap.get(keyword);
2587 //ArrayList alist = il.retrieveList();
2588 ArrayList alist = il.retrieveNodeList();
2589 //out.write("<tr><td>&nbsp;<td>&nbsp;\r\n");
2590 //System.out.println("here");
2591
2592 //out.write("<tr><th>"+keyword+"\r\n");
2593 out.write("<tr><th>&nbsp;<th>\r\n");
2594 for(int a = 0; a<alist.size(); a++){
2595 InternalLink il2 = new InternalLink();
2596 il2 = (InternalLink)alist.get(a);
2597 String url = (String)il2.retrieveList().get(0);
2598 out.write("<tr><td>"+il2.getValue()+"<td><a href=\""+url+"\">"+url+"</a>");
2599 //out.write("<tr><td>"+(String)alist.get(a));
2600 }
2601 out.write("</tr>");
2602 //System.out.println("\n\n");
2603 }
2604 out.write("</table></body></html>\r\n");
2605 out.close();
2606 fstream.close();
2607 }catch(IOException ex){ex.printStackTrace();}
2608
2609 //distanceMap.clear();
2610 //System.gc();
2611 }
2612
2613
2614 public void compareElement(String fileName,String collectionFolder,HashMap suggestionMap, String metadataSetName){
2615
2616 //EditDistance ed = new EditDistance();
2617 //SearchLink sl = new SearchLink();
2618
2619 System.out.println("Starting test "+fileName);
2620 long time = System.currentTimeMillis();
2621 //List list = Arrays.asList(args);
2622 Set kset = suggestionMap.keySet();
2623 ArrayList arrayList = new ArrayList();
2624 Iterator is = kset.iterator();
2625 while(is.hasNext()){
2626 arrayList.add((String)is.next());
2627 }
2628 HashMap distanceMap = new HashMap();
2629
2630 int totalLength = 0;
2631
2632 int arrayListLength = arrayList.size();
2633
2634 System.out.println("Length: "+arrayListLength);
2635 double distance ;
2636 try{
2637 for(int i = 0; i<arrayListLength; i++){
2638 String keyword = (String)arrayList.get(i);
2639 //ArrayList alistkw1 = sl.CreateIndentifierLinkPage(fileName, keyword,"dc.Identifier", collectionFolder,valueMap,linkMap);
2640 int spaceCounterFront1 = 0;
2641 int spaceCounterEnd1 = 0;
2642 String keywordClone = keyword;
2643 String _keywordClone = keyword;
2644 String bkKeyWord = keyword;
2645 //System.out.println("before parsing kw1");
2646 keywordClone = removeUnusedCharacter(keywordClone);
2647 //System.out.println("after parsing kw1");
2648 //System.out.println("before removing space from kw1");
2649 while(true){
2650 if(_keywordClone.length()>1){
2651 if(_keywordClone.charAt(0)==' '){
2652 _keywordClone = _keywordClone.substring(1);
2653 spaceCounterFront1++;
2654 }
2655 else{
2656 if(_keywordClone.charAt(_keywordClone.length()-1)==' '){
2657 _keywordClone = _keywordClone.substring(0,_keywordClone.length()-1);
2658 spaceCounterEnd1++;
2659 }
2660 else{
2661 break;
2662 }
2663 }
2664 }
2665 else{
2666 break;
2667 }
2668 }
2669 //System.out.println("after removing space from kw1");
2670 /*
2671 for(int a = 0; a<spaceCounterFront; a++){
2672 keywordClone = spaceLeft+space+spaceRight+keywordClone;
2673 }
2674 for(int a = 0; a<spaceCounterFront; a++){
2675 keywordClone = keywordClone+spaceLeft+space+spaceRight;
2676 }
2677 */
2678 //System.out.println("after5");
2679 int keywordLength = keywordClone.length();
2680 totalLength = totalLength + keywordLength;
2681 for(int j = i+1; j<arrayListLength; j++){
2682 String keyword2 = (String)arrayList.get(j);
2683 //ArrayList alistkw2 = sl.CreateIndentifierLinkPage(fileName, keyword2,"dc.Identifier", collectionFolder,valueMap,linkMap);
2684 double pre_cost = 0;
2685 String keywordClone2 = keyword2;
2686 String _keywordClone2 = keyword2;
2687 String bkKeyWord2 = keyword2;
2688 //System.out.println("before parsing kw2");
2689 costModel cm = new costModel();
2690 cm = removeUnusedCharacter(keywordClone2,pre_cost);
2691 //System.out.println("after parsing kw2");
2692 keywordClone2 = cm.getString();
2693 pre_cost = cm.getCost();
2694
2695 int spaceCounterFront2 = 0;
2696 int spaceCounterEnd2 = 0;
2697
2698 //System.out.println("before removing space from kw2");
2699 while(true){
2700 if(_keywordClone2.length()>1){
2701 if(_keywordClone2.charAt(0)==' '){
2702 _keywordClone2 = _keywordClone2.substring(1);
2703 spaceCounterFront2++;
2704 }
2705 else{
2706 if(_keywordClone2.charAt(_keywordClone2.length()-1)==' '){
2707 _keywordClone2 = _keywordClone2.substring(0,_keywordClone2.length()-1);
2708 spaceCounterEnd2++;
2709 }
2710 else{
2711 break;
2712 }
2713 }}
2714 else{
2715 break;
2716 }
2717 }
2718 /*
2719 while(true){
2720 if(_keywordClone2.contains(" ")){
2721 _keywordClone2.replaceFirst(" "," ");
2722 }
2723 else{
2724 break;
2725 }
2726 }*/
2727 //System.out.println("after removing space from kw2");
2728
2729 //System.out.println("after4");
2730 int keyword2Length = keywordClone2.length();
2731
2732 if(keywordLength>(keyword2Length+2) || (keywordLength+2)<keyword2Length ){}
2733 else{
2734 //System.out.println("start edit distance: "+keywordClone + " " + keywordClone2);
2735 //distance = ed.getLevenshteinDistance(keyword, keyword2);
2736 distance = calculateEditDistance(keywordClone.toLowerCase().toCharArray(),keywordClone2.toLowerCase().toCharArray());
2737 distance = distance + pre_cost;
2738 //System.out.println("finish calculating edit distance");
2739 if(distance<=3){
2740 if(distanceMap.containsKey(_keywordClone)){
2741 InternalLink il = (InternalLink)distanceMap.get(_keywordClone);
2742
2743 String keywordHolder = _keywordClone2;
2744
2745 while(true){
2746 if(keywordHolder.contains(" ")){
2747 keywordHolder = keywordHolder.replaceFirst(" ",spaceLeft+space+spaceRight+" ");
2748 }
2749 else{
2750 break;
2751 }
2752 }
2753
2754 for(int a = 0; a<spaceCounterFront2; a++){
2755 keywordHolder = spaceLeft+space+spaceRight+keywordHolder;
2756 }
2757 for(int a = 0; a<spaceCounterEnd2; a++){
2758 keywordHolder = keywordHolder+spaceLeft+space+spaceRight;
2759 }
2760
2761 InternalLink il2 = new InternalLink();
2762 il2.setValue(keywordHolder);
2763 if(suggestionMap.containsKey(bkKeyWord2)){
2764 il2.increaseElement((String)suggestionMap.get(bkKeyWord2));
2765 //System.out.println("input this url(3):"+(String)alistkw2.get(0));
2766 }
2767 else{
2768 il2.increaseElement("&nbsp;");
2769 }
2770
2771 il.increaseNode(il2);
2772 //il.increaseElement(keywordHolder);
2773 distanceMap.put(_keywordClone, il);
2774
2775 //il.increaseElement(keywordClone2);
2776 //distanceMap.put(keyword, il);
2777 }
2778 else{
2779 InternalLink il = new InternalLink();
2780 String keywordHolder = _keywordClone;
2781
2782 while(true){
2783 if(keywordHolder.contains(" ")){
2784 keywordHolder = keywordHolder.replaceFirst(" ",spaceLeft+space+spaceRight+" ");
2785 }
2786 else{
2787 break;
2788 }
2789 }
2790
2791 for(int a = 0; a<spaceCounterFront1; a++){
2792 keywordHolder = spaceLeft+space+spaceRight+keywordHolder;
2793 }
2794 for(int a = 0; a<spaceCounterEnd1; a++){
2795 keywordHolder = keywordHolder+spaceLeft+space+spaceRight;
2796 }
2797
2798 InternalLink il2 = new InternalLink();
2799 il2.setValue(keywordHolder);
2800 if(suggestionMap.containsKey(bkKeyWord)){
2801 il2.increaseElement((String)suggestionMap.get(bkKeyWord));
2802 //System.out.println("input this url(1):"+(String)alistkw1.get(0));
2803 }
2804 else{
2805 il2.increaseElement("&nbsp;");
2806 }
2807
2808 il.increaseNode(il2);
2809 //il.increaseElement(keywordHolder);
2810
2811 keywordHolder = _keywordClone2;
2812
2813 while(true){
2814 if(keywordHolder.contains(" ")){
2815 keywordHolder = keywordHolder.replaceFirst(" ",spaceLeft+space+spaceRight+" ");
2816 }
2817 else{
2818 break;
2819 }
2820 }
2821 for(int a = 0; a<spaceCounterFront2; a++){
2822 keywordHolder = spaceLeft+space+spaceRight+keywordHolder;
2823 }
2824 for(int a = 0; a<spaceCounterEnd2; a++){
2825 keywordHolder = keywordHolder+spaceLeft+space+spaceRight;
2826 }
2827
2828 InternalLink il3 = new InternalLink();
2829 il3.setValue(keywordHolder);
2830 if(suggestionMap.containsKey(bkKeyWord2)){
2831 il3.increaseElement((String)suggestionMap.get(bkKeyWord2));
2832 //System.out.println("input this url(2):"+(String)alistkw2.get(0));
2833 }
2834 else{
2835 il3.increaseElement("&nbsp;");
2836 }
2837
2838 il.increaseNode(il3);
2839 //il.increaseElement(keywordHolder);
2840 distanceMap.put(_keywordClone, il);
2841 }
2842 }
2843 }
2844 //alistkw2.clear();
2845 }
2846 //alistkw1.clear();
2847 //System.gc();
2848 }
2849 //System.out.println("after3");
2850 time = System.currentTimeMillis() - time;
2851
2852 System.out.println("The test took " + time + " milliseconds");
2853 System.out.println("TotalLength: "+totalLength+"Avg. Length:"+totalLength/arrayListLength);
2854 //arrayList.clear();
2855 //System.gc();
2856 //if(fileName.endsWith("dc.Title")){
2857 //System.out.println(fileName +"Map Size"+ distanceMap.size(
2858 //System.out.println("after2");
2859 if(distanceMap.size()!=0){
2860 generateHTML(distanceMap,fileName,metadataSetName);
2861 }
2862 }catch(Exception ex){ex.printStackTrace();}
2863 //System.out.println("after1");
2864 }
2865
2866
2867 public int calculateEditDistance(char[] args1, char[] args2){
2868 int n = args1.length;
2869 int m = args2.length;
2870 if (n == 0) {
2871 return m;
2872 }
2873 else if (m == 0) {
2874 return n;
2875 }
2876
2877 int[] p = new int[n + 1];
2878 int[] d = new int[n + 1];
2879 int[] _d;
2880
2881
2882 int i;
2883 int j;
2884
2885 int cost; // cost
2886
2887 for (i = 0; i <= n; i++) {
2888 p[i] = i;
2889 }
2890 for (j = 1; j <= m; j++) {
2891
2892 d[0] = j;
2893 for (i = 1; i <= n; i++) {
2894 cost = (args1[i-1] == args2[j-1]) ? 0 : 1;
2895 d[i] = Math.min(Math.min(d[i - 1] + 1, p[i] + 1),
2896 p[i - 1] + cost);
2897 }
2898
2899 // copy current distance counts to 'previous row' distance counts
2900 _d = p;
2901 p = d;
2902 d = _d;
2903 }
2904 // our last action in the above loop was to switch d and p, so p now
2905 // actually has the most recent cost counts
2906 return p[n];
2907 }
2908
2909 private String removeUnusedCharacter(String target){
2910
2911 // remove \n
2912 while(true){
2913 if(target.contains("\\n")){
2914 target = target.replaceFirst("\\\\n","");
2915 }
2916 else{
2917 break;
2918 }
2919 }
2920
2921 //remove spaces at the end of string
2922 if(target.length()>1){
2923 while(true){
2924 if(target.length()>1){
2925 if(target.charAt(target.length()-1)==' '){
2926 target = target.substring(0,target.length()-1);
2927 }
2928 else{
2929 break;
2930 }
2931 }
2932 else{
2933 break;
2934 }
2935 }
2936 }
2937 //remove leading spaces
2938 if(target.length()>1){
2939 while(true){
2940 if(target.length()>1){
2941 if(target.charAt(0)==' '){
2942 target = target.substring(1,target.length());
2943 }
2944 else{
2945 break;
2946 }}
2947 else{
2948 break;
2949 }
2950 }
2951 }
2952 //remove multiple spaces between words
2953 while(true){
2954 if(target.contains(" ")){
2955 target = target.replaceFirst(" "," ");
2956 }
2957 else{
2958 break;
2959 }
2960 }
2961 return target;
2962 }
2963
2964
2965 private costModel removeUnusedCharacter(String target, double cost){
2966
2967 costModel cm = new costModel();
2968
2969 // remove \n plus cost
2970 while(true){
2971 if(target.contains("\\n")){
2972 target = target.replaceFirst("\\\\n","");
2973 cost = cost + 0.2;
2974 }
2975 else{
2976 break;
2977 }
2978 }
2979
2980 //remove spaces at the end of string
2981 if(target.length()>1){
2982 while(true){
2983 if(target.length()>1){
2984 if(target.charAt(target.length()-1)==' '){
2985 target = target.substring(0,target.length()-1);
2986 cost = cost + 0.2;
2987 }
2988 else{
2989 break;
2990 }}
2991 else{
2992 break;
2993 }
2994 }
2995 }
2996 //remove leading spaces
2997 if(target.length()>1){
2998 while(true){
2999 if(target.length()>1){
3000 if(target.charAt(0)==' '){
3001 target = target.substring(1,target.length());
3002 cost = cost + 0.2;
3003 }
3004 else{
3005 break;
3006 }}
3007 else{
3008 break;
3009 }
3010 }
3011 }
3012 //remove multiple spaces between words
3013 while(true){
3014 if(target.contains(" ")){
3015 target = target.replaceFirst(" "," ");
3016 cost = cost + 0.2;
3017 }
3018 else{
3019 break;
3020 }
3021 }
3022
3023 cm.setCost(cost);
3024 cm.setString(target);
3025
3026 return cm;
3027 }
3028}
3029
3030
Note: See TracBrowser for help on using the repository browser.