package org.greenstone.mat.servlet; import java.io.*; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Calendar; import java.util.Collection; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.Set; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.greenstone.gsdl3.*; import org.greenstone.gsdl3.util.GlobalProperties; public class PrintHTML { private final String NORMAL = "normal"; private final String WORST = "worst"; private final String BEST = "best"; private final String title1 ="Hide Empty Metadata ,"; private final String title2 ="Hide Completed Metadata ,"; private final String title3 ="Hide Documents with Empty Metadata ,"; private final String title4 ="Hide Documents with Completed Metadata ,"; private final String title5 ="No Available Graph"; private final String metadataSet1 = "Dublin Core"; private final String metadataSet2 = "Extracted"; private final String spaceLeft = "«"; private final String spaceRight = "»"; private final String htmlSpace = " "; private final String oddChar ="odd Character"; private final String destination; private final String space ="space"; private String hostName; private String cssString; private int port_number; private GlobalProperties globalProperty = null; private String fileSeparator = File.separator; String linkIdentifier = ""; private HashMap url; MetadataStats mds; DataMaker dm; Calendar cl; SimpleDateFormat sdf; PrintWriter printWriter; public PrintHTML(MetadataStats ms, PrintWriter arg, int port){ port_number = port; mds = ms; printWriter = arg; dm = new DataMaker(mds); cl=Calendar.getInstance(); linkIdentifier = "dc.Identifier"; destination = mds.HTMLDirectory; sdf = new SimpleDateFormat(" dd MMM yyyy 'at' HH:mm:ss z 'GMT'Z"); setupMetadataLink("metadataElementURL"); try { java.net.InetAddress localMachine = java.net.InetAddress.getLocalHost(); hostName = localMachine.getHostName(); }catch (java.net.UnknownHostException uhe) { uhe.printStackTrace(); } cssString = ""; } private ArrayList getHideEmptyMetadataDetail(ArrayList dataset, String[] id, String[] metadataName){ printWriter.write(""); printWriter.flush(); if(id.length==0 || metadataName.length==0){ return new ArrayList(); } ArrayList detailList = new ArrayList(); ArrayList alist = new ArrayList(); ArrayList nameList = new ArrayList(); for(int i = 0; i< metadataName.length; i++){ if(!dm.IsElementEmpty(metadataName[i])){ alist.add(dm.getMetadataRows(metadataName[i])); nameList.add(metadataName[i]); } } String[] metaDataElementName = new String[nameList.size()]; for(int i = 0; i< nameList.size(); i++){ metaDataElementName[i] = nameList.get(i).toString(); } if(id.length==0 || metaDataElementName.length==0){ return new ArrayList(); } detailList.add(alist); detailList.add(id); detailList.add(metaDataElementName); printWriter.write(""); printWriter.flush(); return detailList; } private ArrayList getHideFullMetadataDetail(ArrayList dataset, String[] id, String[] metadataName){ printWriter.write(""); printWriter.flush(); if(id.length==0 || metadataName.length==0){ return new ArrayList(); } ArrayList detailList = new ArrayList(); ArrayList alist = new ArrayList(); ArrayList nameList = new ArrayList(); for(int i = 0; i< metadataName.length; i++){ if(!dm.IsElementFull(metadataName[i])){ alist.add(dm.getMetadataRows(metadataName[i])); nameList.add(metadataName[i]); } } String[] metaDataElementName = new String[nameList.size()]; for(int i = 0; i< nameList.size(); i++){ metaDataElementName[i] = nameList.get(i).toString(); } if(id.length==0 || metaDataElementName.length==0){ return new ArrayList(); } detailList.add(alist); detailList.add(id); detailList.add(metaDataElementName); printWriter.write(""); printWriter.flush(); return detailList; } private ArrayList getHideEmptyDocumentDetail(ArrayList dataset, String[] id, String[] metadataName){ printWriter.write(""); printWriter.flush(); if(id.length==0 || metadataName.length==0){ return new ArrayList(); } ArrayList detailList = new ArrayList(); ArrayList alist = dm.removeDocument(dataset, id, 0); ArrayList idList = dm.getRemovedID(); ArrayList newIDList = new ArrayList(); for(int i = 0; i< id.length; i++){ if(!idList.contains(id[i])){ newIDList.add(id[i]); } } id = new String[newIDList.size()]; for(int i = 0; i"); printWriter.flush(); return detailList; } private ArrayList getHideFullDocumentDetail(ArrayList dataset, String[] id, String[] metadataName){ printWriter.write(""); printWriter.flush(); if(id.length==0 || metadataName.length==0){ return new ArrayList(); } ArrayList detailList = new ArrayList(); ArrayList alist = dm.removeDocument(dataset, id ,1); ArrayList idList = dm.getRemovedID(); ArrayList newIDList = new ArrayList(); for(int i = 0; i< id.length; i++){ if(!idList.contains(id[i])){ newIDList.add(id[i]); } } id = new String[newIDList.size()]; for(int i = 0; i"); printWriter.flush(); return detailList; } public void generateAllPossibleGraph(ArrayList Namelist,String[]ids,String[] names,String condition){ printWriter.write(""); printWriter.flush(); ArrayList dataList= Namelist; String[] id = ids; String[] metadataName = names; ArrayList alist = new ArrayList(); ArrayList detailList = new ArrayList(); ArrayList new_dataList = new ArrayList(); String[] idList; String[] metadataNameList; //---------------------------------------------------------------------------------------------------------SS //-----------------------------------------------------------------------SSSS if(id.length==0 || metadataName.length==0){ generateEmptyGraph("SSSS",WORST,condition); generateEmptyGraph("SSSS",BEST,condition); } else{ new_dataList = transformDataList(dataList,id); WriteHTML("SSSS",new_dataList,id,metadataName,WORST,"Show completed graph",condition); WriteHTML("SSSS",new_dataList,id,metadataName,BEST,"Show completed graph",condition); } //-----------------------------------------------------------------------SSSH detailList = getHideFullDocumentDetail(dataList,id,metadataName); if(detailList.size()==0){ generateEmptyGraph("SSSH",WORST,condition); generateEmptyGraph("SSSH",BEST,condition); } else{ alist = (ArrayList)detailList.get(0); idList = (String[])detailList.get(1); metadataNameList = (String[])detailList.get(2); new_dataList = transformDataList(alist,idList); WriteHTML("SSSH",new_dataList,idList,metadataNameList,WORST,title4,condition); WriteHTML("SSSH",new_dataList,idList,metadataNameList,BEST,title4,condition); } //-----------------------------------------------------------------------SSHS detailList = getHideEmptyDocumentDetail(dataList,id,metadataName); if(detailList.size()==0){ generateEmptyGraph("SSHS",WORST,condition); generateEmptyGraph("SSHS",BEST,condition); generateEmptyGraph("SSHH",WORST,condition); generateEmptyGraph("SSHH",BEST,condition); } else{ alist = (ArrayList)detailList.get(0); idList = (String[])detailList.get(1); metadataNameList = (String[])detailList.get(2); new_dataList = transformDataList(alist,idList); WriteHTML("SSHS",new_dataList,idList,metadataNameList,WORST,title3,condition); WriteHTML("SSHS",new_dataList,idList,metadataNameList,BEST,title3,condition); //-----------------------------------------------------------------------SSHH detailList = getHideFullDocumentDetail(alist,idList,metadataNameList); if(detailList.size()==0){ generateEmptyGraph("SSHH",WORST,condition); generateEmptyGraph("SSHH",BEST,condition); } else{ alist = (ArrayList)detailList.get(0); idList = (String[])detailList.get(1); metadataNameList = (String[])detailList.get(2); new_dataList = transformDataList(alist,idList); WriteHTML("SSHH",new_dataList,idList,metadataNameList,WORST,title3+title4,condition); WriteHTML("SSHH",new_dataList,idList,metadataNameList,BEST,title3+title4,condition); } } //---------------------------------------------------------------------------------------------------------SH ArrayList xList = new ArrayList(); String[] idListCopy; String[] metadataNameListCopy; //-----------------------------------------------------------------------SHSS detailList = getHideFullMetadataDetail(dataList,id,metadataName); if(detailList.size()==0){ generateEmptyGraph("SHSS",WORST,condition); generateEmptyGraph("SHSS",BEST,condition); generateEmptyGraph("SHSH",WORST,condition); generateEmptyGraph("SHSH",BEST,condition); generateEmptyGraph("SHHS",WORST,condition); generateEmptyGraph("SHHS",BEST,condition); generateEmptyGraph("SHHH",WORST,condition); generateEmptyGraph("SHHH",BEST,condition); } else{ alist = (ArrayList)detailList.get(0); idList = (String[])detailList.get(1); metadataNameList = (String[])detailList.get(2); xList = (ArrayList)alist.clone(); idListCopy = (String[]) idList.clone(); metadataNameListCopy = (String[])metadataNameList.clone(); new_dataList = transformDataList(alist,idList); WriteHTML("SHSS",new_dataList,idList,metadataNameList,WORST,title2,condition); WriteHTML("SHSS",new_dataList,idList,metadataNameList,BEST,title2,condition); //-----------------------------------------------------------------------SHHS detailList = getHideEmptyDocumentDetail((ArrayList)xList.clone(),(String[])idListCopy.clone(),(String[])metadataNameListCopy.clone()); if(detailList.size()==0){ generateEmptyGraph("SHHS",WORST,condition); generateEmptyGraph("SHHS",BEST,condition); generateEmptyGraph("SHHH",WORST,condition); generateEmptyGraph("SHHH",BEST,condition); } else{ alist = (ArrayList)detailList.get(0); idList = (String[])detailList.get(1); metadataNameList = (String[])detailList.get(2); new_dataList = transformDataList(alist,idList); WriteHTML("SHHS",new_dataList,idList,metadataNameList,WORST,title2+title3,condition); WriteHTML("SHHS",new_dataList,idList,metadataNameList,BEST,title2+title3,condition); //-----------------------------------------------------------------------SHHH detailList = getHideFullDocumentDetail(alist,idList,metadataNameList); if(detailList.size()==0){ generateEmptyGraph("SHHH",WORST,condition); generateEmptyGraph("SHHH",BEST,condition); } else{ alist = (ArrayList)detailList.get(0); idList = (String[])detailList.get(1); metadataNameList = (String[])detailList.get(2); new_dataList = transformDataList(alist,idList); WriteHTML("SHHH",new_dataList,idList,metadataNameList,WORST,title2+title3+title4,condition); WriteHTML("SHHH",new_dataList,idList,metadataNameList,BEST,title2+title3+title4,condition); } } //-----------------------------------------------------------------------SHSH detailList = getHideFullDocumentDetail((ArrayList)xList.clone(),(String[])idListCopy.clone(),(String[])metadataNameListCopy.clone()); if(detailList.size()==0){ generateEmptyGraph("SHSH",WORST,condition); generateEmptyGraph("SHSH",BEST,condition); } else{ alist = (ArrayList)detailList.get(0); idList = (String[])detailList.get(1); metadataNameList = (String[])detailList.get(2); new_dataList = transformDataList(alist,idList); WriteHTML("SHSH",new_dataList,idList,metadataNameList,WORST,title2+title4,condition); WriteHTML("SHSH",new_dataList,idList,metadataNameList,BEST,title2+title4,condition); } } //---------------------------------------------------------------------------------------------------------HS ArrayList xList1 = new ArrayList(); String[] idListCopy1; String[] metadataNameListCopy1; //-----------------------------------------------------------------------HSSS detailList = getHideEmptyMetadataDetail(dataList,id,metadataName); if(detailList.size()==0){ generateEmptyGraph("HSSS",WORST,condition); generateEmptyGraph("HSSS",BEST,condition); generateEmptyGraph("HSHS",WORST,condition); generateEmptyGraph("HSHS",BEST,condition); generateEmptyGraph("HSHH",WORST,condition); generateEmptyGraph("HSHH",BEST,condition); generateEmptyGraph("HSSH",WORST,condition); generateEmptyGraph("HSSH",BEST,condition); } else{ alist = (ArrayList)detailList.get(0); idList = (String[])detailList.get(1); metadataNameList = (String[])detailList.get(2); xList1 = (ArrayList)alist.clone(); idListCopy1 = (String[]) idList.clone(); metadataNameListCopy1 = (String[])metadataNameList.clone(); new_dataList = transformDataList(alist,idList); WriteHTML("HSSS",new_dataList,idList,metadataNameList,WORST,title1,condition); WriteHTML("HSSS",new_dataList,idList,metadataNameList,BEST,title1,condition); //-----------------------------------------------------------------------HSHS detailList = getHideEmptyDocumentDetail((ArrayList)xList1.clone(),(String[])idListCopy1.clone(),(String[])metadataNameListCopy1.clone()); if(detailList.size()==0){ generateEmptyGraph("HSHS",WORST,condition); generateEmptyGraph("HSHS",BEST,condition); generateEmptyGraph("HSHH",WORST,condition); generateEmptyGraph("HSHH",BEST,condition); } else{ alist = (ArrayList)detailList.get(0); idList = (String[])detailList.get(1); metadataNameList = (String[])detailList.get(2); new_dataList = transformDataList(alist,idList); WriteHTML("HSHS",new_dataList,idList,metadataNameList,WORST,title1+title3,condition); WriteHTML("HSHS",new_dataList,idList,metadataNameList,BEST,title1+title3,condition); //-----------------------------------------------------------------------HSHH detailList = getHideFullDocumentDetail(alist,idList,metadataNameList); if(detailList.size()==0){ generateEmptyGraph("HSHH",WORST,condition); generateEmptyGraph("HSHH",BEST,condition); } else{ alist = (ArrayList)detailList.get(0); idList = (String[])detailList.get(1); metadataNameList = (String[])detailList.get(2); new_dataList = transformDataList(alist,idList); WriteHTML("HSHH",new_dataList,idList,metadataNameList,WORST,title1+title3+title4,condition); WriteHTML("HSHH",new_dataList,idList,metadataNameList,BEST,title1+title3+title4,condition); } } //-----------------------------------------------------------------------HSSH detailList = getHideFullDocumentDetail((ArrayList)xList1.clone(),(String[])idListCopy1.clone(),(String[])metadataNameListCopy1.clone()); if(detailList.size()==0){ generateEmptyGraph("HSSH",WORST,condition); generateEmptyGraph("HSSH",BEST,condition); } else{ alist = (ArrayList)detailList.get(0); idList = (String[])detailList.get(1); metadataNameList = (String[])detailList.get(2); new_dataList = transformDataList(alist,idList); WriteHTML("HSSH",new_dataList,idList,metadataNameList,WORST,title1+title4,condition); WriteHTML("HSSH",new_dataList,idList,metadataNameList,BEST,title1+title4,condition); } } //---------------------------------------------------------------------------------------------------------HH ArrayList xList2 = new ArrayList(); String[] idListCopy2; String[] metadataNameListCopy2; //-----------------------------------------------------------------------HHSS detailList = getHideEmptyMetadataDetail(dataList,id,metadataName); if(detailList.size()==0){ generateEmptyGraph("HHSS",WORST,condition); generateEmptyGraph("HHSS",BEST,condition); generateEmptyGraph("HHHS",WORST,condition); generateEmptyGraph("HHHS",BEST,condition); generateEmptyGraph("HHHH",WORST,condition); generateEmptyGraph("HHHH",BEST,condition); generateEmptyGraph("HHSH",WORST,condition); generateEmptyGraph("HHSH",BEST,condition); } else{ alist = (ArrayList)detailList.get(0); idList = (String[])detailList.get(1); metadataNameList = (String[])detailList.get(2); detailList = getHideFullMetadataDetail(alist,idList,metadataNameList); if(detailList.size()==0){ generateEmptyGraph("HHSS",WORST,condition); generateEmptyGraph("HHSS",BEST,condition); generateEmptyGraph("HHHS",WORST,condition); generateEmptyGraph("HHHS",BEST,condition); generateEmptyGraph("HHHH",WORST,condition); generateEmptyGraph("HHHH",BEST,condition); generateEmptyGraph("HHSH",WORST,condition); generateEmptyGraph("HHSH",BEST,condition); } else{ alist = (ArrayList)detailList.get(0); idList = (String[])detailList.get(1); metadataNameList = (String[])detailList.get(2); xList2 = (ArrayList)alist.clone(); idListCopy2 = (String[]) idList.clone(); metadataNameListCopy2 = (String[])metadataNameList.clone(); new_dataList = transformDataList(alist,idList); WriteHTML("HHSS",new_dataList,idList,metadataNameList,WORST,title1+title2,condition); WriteHTML("HHSS",new_dataList,idList,metadataNameList,BEST,title1+title2,condition); //---------------------------------------------------------------------- HHSH detailList = getHideFullDocumentDetail((ArrayList)xList2.clone(),(String[])idListCopy2.clone(),(String[])metadataNameListCopy2.clone()); if(detailList.size()==0){ generateEmptyGraph("HHSH",WORST,condition); generateEmptyGraph("HHSH",BEST,condition); } else{ alist = (ArrayList)detailList.get(0); idList = (String[])detailList.get(1); metadataNameList = (String[])detailList.get(2); new_dataList = transformDataList(alist,idList); WriteHTML("HHSH",new_dataList,idList,metadataNameList,WORST,title1+title2+title4,condition); WriteHTML("HHSH",new_dataList,idList,metadataNameList,BEST,title1+title2+title4,condition); } //----------------------------------------------------------------------HHHS detailList = getHideEmptyDocumentDetail((ArrayList)xList2.clone(),(String[])idListCopy2.clone(),(String[])metadataNameListCopy2.clone()); if(detailList.size()==0){ generateEmptyGraph("HHHS",WORST,condition); generateEmptyGraph("HHHS",BEST,condition); generateEmptyGraph("HHHH",WORST,condition); generateEmptyGraph("HHHH",BEST,condition); } else{ alist = (ArrayList)detailList.get(0); idList = (String[])detailList.get(1); metadataNameList = (String[])detailList.get(2); new_dataList = transformDataList(alist,idList); WriteHTML("HHHS",new_dataList,idList,metadataNameList,WORST,title1+title2+title3,condition); WriteHTML("HHHS",new_dataList,idList,metadataNameList,BEST,title1+title2+title3,condition); //----------------------------------------------------------------------HHHH detailList = getHideFullDocumentDetail((ArrayList)alist.clone(),(String[])idList.clone(),(String[])metadataNameList.clone()); if(detailList.size()==0){ generateEmptyGraph("HHHH",WORST,condition); generateEmptyGraph("HHHH",BEST,condition); } else{ alist = (ArrayList)detailList.get(0); idList = (String[])detailList.get(1); metadataNameList = (String[])detailList.get(2); new_dataList = transformDataList(alist,idList); WriteHTML("HHHH",new_dataList,idList,metadataNameList,WORST,title1+title2+title3+title4,condition); WriteHTML("HHHH",new_dataList,idList,metadataNameList,BEST,title1+title2+title3+title4,condition); } } } } } private ArrayList transformDataList(ArrayList list, String[] ids){ printWriter.write(""); printWriter.flush(); ArrayList wholeList = new ArrayList(); for(int i = 0; i< ids.length; i++){ ArrayList idList = new ArrayList(); for(int j = 0; j"); printWriter.flush(); return wholeList; } private void generateEmptyGraph(String fileName,String condition,String suffix){ printWriter.write(""); printWriter.flush(); String suf = suffix; if(!suffix.equalsIgnoreCase("dublin")){ suf = "other"; } String cases = condition; String collectionFolder = mds.getCollectionName(); try{ FileWriter fstream = new FileWriter(destination+suf+"_"+fileName+"_"+cases+".html"); BufferedWriter out = new BufferedWriter(fstream); out.write("\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("\r\n No Available Chart\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("

Summary

"); out.write("\r\n"); out.write("

No data available to render chart.

\r\n"); out.write("

Reason: Document number is zero or Metadata element number is zero

\r\n"); out.write("\r\n"); out.close(); fstream.close(); }catch(Exception e){} printWriter.write(""); printWriter.flush(); } private void WriteHTML(String fileName, ArrayList dataset, String[] ids, String[] metadataName, String condition, String title, String suffix){ int blueDot = 0; String cases = condition; String suf = suffix; if(!suffix.equalsIgnoreCase("dublin")){ suf = "other"; } try{ printWriter.write(""); printWriter.flush(); ArrayList tempList = mds.getMetadataNameList(); ArrayList urlIDList; if(tempList.contains("dc.Identifier") && linkIdentifier.equals("dc.Identifier")){ urlIDList = dm.getURLMap("dc.Identifier"); } else if(tempList.contains("nzir_internal.Link") && linkIdentifier.equals("nzir_internal.Link")){ urlIDList = dm.getURLMap("nzir_internal.Link"); } else{ urlIDList = new ArrayList(); } FileWriter fstream = new FileWriter(destination+suf+"_"+fileName+"_"+cases+".html"); BufferedWriter out = new BufferedWriter(fstream); out.write("\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("\r\n"+title+"\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("\r\n"); out.write ("

Summary

"); out.write("\r\n"); out.write("
\r\n"); out.write("
\r\n"); out.write ("\r\n"); out.write ("\r\n"); out.write ("\r\n"); out.write("\r\n\r\n\r\n"); out.write("\r\n\r\n"); if(cases.equals("normal")){ for(int i = ids.length; i>0; i--){ out.write (""); int[] datarows = (int[])dataset.get(i-1); for(int j = 0; j"); blueDot++; } else { out.write ("\r\n"); out.write("\r\n"); out.write("\r\n"); out.write ("
Info\r\n"); out.write("URL\r\n"); for(int a = 0; a< metadataName.length; a++){ out.write(""+ metadataName[a]+"\r\n"); } out.write ("
 \r\n"); out.write(" \r\n"); for(int a = 0; a< metadataName.length; a++){ out.write(""+ dm.Mean(metadataName[a])+"%\r\n"); } out.write ("
 "); } } } } else{ ArrayList idList = new ArrayList(); for(int i = 0; i0; i--){ String idValue = ids[i-1]; int dots = 0; int[] datarows = (int[])dataset.get(i-1); for(int j = 0; j"); printWriter.flush(); int value = idList.indexOf(idIntValue[i]); int[] datarows = (int[])dataset.get(value); out.write ("
 \r\n"); if(urlIDList.contains(idIntValue[i])){ out.write("open\r\n"); } else{ out.write("\r\n"); } for(int j = 0; j\r\n"); blueDot++; } else { out.write ("\r\n"); } } } } if(cases.equals("best")){ for(int i = idIntValue.length; i>0; i--){ printWriter.write(""); printWriter.flush(); int value = idList.indexOf(idIntValue[i-1]); int[] datarows = (int[])dataset.get(value); out.write ("
 \r\n"); if(urlIDList.contains(idIntValue[i-1])){ out.write("open\r\n"); } else{ out.write("\r\n"); } for(int j = 0; j\r\n"); blueDot++; } else { out.write ("\r\n"); } } } } } out.write ("
\r\n
\r\n"); int t1 = ids.length; int t3 = metadataName.length; int t4 = t1*t3; out.write("\r\n"); out.write("\r\n"); HashMap tempMap = mds.getMetadataSetMap(); MetadataSet ms = (MetadataSet)tempMap.get(suffix); out.write("\r\n"); out.write("
This subset shows "+t1+" out of "+mds.getDocNum()+" documents"); out.write(""+blueDot+" out of "+(mds.getDocNum()*t3)+" metadata items are defined"); out.write("
This subset shows "+t3+" out of "+(ms.getIndexsList().size())+" metadata elements"); out.write("Subset completeness: "+dm.round((double)(blueDot*100/t4),5)+"%"); out.write("
\r\n"); out.write ("

Summary

"); out.write (""); //Close the output stream out.close(); fstream.close(); }catch (Exception e){ e.printStackTrace(printWriter); } printWriter.write(""); printWriter.flush(); } public void generateOverallStatisticsPage(HashMap MetadataSetMap){ String fileName = "Overall"; ArrayList wholeList = new ArrayList(); HashMap hp = mds.getMetadataSetMap(); Collection c = hp.values(); Iterator i = c.iterator(); int counter = 0; String[][] MetadataData = new String[c.size()][2]; while(i.hasNext()){ MetadataSet mds = (MetadataSet)i.next(); wholeList.add(mds); ArrayList newMDS = new ArrayList(); newMDS.add(mds); MetadataData[counter][0] = mds.getName(); MetadataData[counter][1] = dm.getSingleMetadataSetCompleteness(newMDS)+"%"; counter++; } try{ printWriter.write(""); printWriter.flush(); String str = sdf.format(cl.getTime()); String collectionFolder = mds.getCollectionName(); FileWriter fstream = new FileWriter(destination+fileName+".html"); BufferedWriter out = new BufferedWriter(fstream); out.write(""); out.write("\r\n"); out.write("Summary\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("\r\n"); out.write ("
Mat Home
"); out.write("
Please send feedback about the Mat tool

"); out.write("

Summary

\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("\r\n"); if(mds.getOAIURL().length()>50){ out.write("\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("
OAI URL: "+mds.getOAIURL().substring(0,47)+"....\r\n"); } else{ out.write(""+mds.getOAIURL()+"\r\n"); } out.write("
Number of Records:\r\n"); out.write(""+mds.getDocNum()+"\r\n"); out.write("
\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("
\r\n"); for(int a = 0; a\r\n"); if(MetadataData[a][0].equalsIgnoreCase("dublin")){ out.write ("\r\n"); } out.write ("
Metadata:\r\n"); out.write("Completeness\r\n"); out.write("
"+metadataSet1+""); } else if (MetadataData[a][0].equalsIgnoreCase("extracted")){ out.write (""+metadataSet2+""); } else{ out.write (""+MetadataData[a][0]+""); } out.write (" "+MetadataData[a][1]); out.write ("
"); out.write("
\r\n"); out.write("
"); out.write("\r\n"); out.write ("\r\n"); out.write("\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n
Customize Visualization"); out.write("
"); out.write("
"); out.write("
"); out.write("
"); out.write("
Metadata: "); if(MetadataData.length==1){ if(MetadataData[0][0].equalsIgnoreCase("dublin")){ out.write("
"); } else { out.write("
"); } } else{ for(int a = 0; a\r\n"); } else { out.write("
"); } } else{ if(MetadataData[a][0].equalsIgnoreCase("dublin")){ out.write("
"); } else{ out.write("
"); } } } } out.write("
Order By Completeness : "); out.write("
"); out.write("
"); out.write("

"); out.write("

"+str+"

"); out.write(""); out.close(); fstream.close(); }catch (Exception e){ System.err.println("Error: " + e.getMessage()); } printWriter.write(""); printWriter.flush(); } public void WriteMetadataSetDetailHTML(MetadataSet mdset){ String fileName = mdset.getName(); try{ printWriter.write(""); printWriter.flush(); String str = sdf.format(cl.getTime()); String collectionFolder = mds.getCollectionName(); FileWriter fstream = new FileWriter(destination+fileName+".html"); BufferedWriter out = new BufferedWriter(fstream); out.write(""); out.write("\r\n"); out.write("\r\nMetadata Detail\r\n"); out.write("\r\n"); out.write("\r\n"); out.write( "\r\n"); out.write ("\r\n"); out.write ("
Summary
"); out.write("
Please send feedback about the Mat tool

"); if(fileName.equalsIgnoreCase("dublin")){ out.write ("

Metadata Detail: "+metadataSet1+"

\r\n"); } else if (fileName.equalsIgnoreCase("extracted")){ out.write ("

Metadata Detail: "+metadataSet2+"

\r\n"); } else{ out.write ("

Metadata Detail: "+fileName+"

\r\n"); } out.write ("\r\n"); out.write ("\r\n"); out.write ("\r\n"); out.write ("\r\n"); ArrayList elementList = mdset.getIndexsList(); HashMap hp = new HashMap(); int num = elementList.size(); for(int i = 0; i\r\n"); out.write ("\r\n"); } out.write("
Elements:\r\n"); out.write ("Completeness\r\n"); out.write ("
"+elementName+"\r\n"); out.write (""+ dm.Mean(elementName)+"%\r\n"); out.write ("
\r\n"); out.write ("

Summary

"); out.write("

"+str+"

"); out.write ("\r\n"); out.close(); fstream.close(); }catch (Exception e){ e.printStackTrace(printWriter); } printWriter.write(""); printWriter.flush(); } public void generateMetadataElementDetailPage(MetadataSet mds,PrintWriter out){ ArrayList nameList = mds.getIndexsList(); for(int i = 0; i"); printWriter.flush(); String collectionFolder = mds.getCollectionName(); FileWriter fstream = new FileWriter(destination+fileName+".html"); BufferedWriter out = new BufferedWriter(fstream); out.write(""); out.write ("\r\n"); out.write("\r\n "+ name +" \r\n"); out.write("\r\n"); out.write("\r\n"); out.write(""); out.write("\r\n"); out.write("\r\n"); out.write ("
Summary
"); out.write("
Please send feedback about the Mat tool
"); out.write("»"); if(linkName.equalsIgnoreCase("dublin")){ out.write ("Metadata Detail ("+metadataSet1+")"); } else if (linkName.equalsIgnoreCase("extracted")){ out.write ("Metadata Detail ("+metadataSet2+")"); } else{ out.write ("Metadata Detail ("+linkName+")"); } if(name.startsWith("dc.")){ int dot = name.lastIndexOf('.'); dot++; String nameReplace = name.substring(dot); if(url.containsKey(nameReplace)){ out.write ("

Metadata Element Detail:"+ name +"

\r\n"); } else{ out.write ("

Metadata Element Detail: "+ name +"

\r\n"); } } else{ out.write ("

Metadata Element Detail: "+ name +"

\r\n"); } out.write ("\r\n"); out.write ("\r\n"); out.write ("\r\n"); out.write ("\r\n"); out.write ("\r\n"); out.write ("\r\n"); out.write ("\r\n"); out.write ("\r\n"); out.write ("\r\n"); out.write ("\r\n"); out.write ("\r\n"); double percentage = dm.Mean(name); out.write ("\r\n"); out.write ("\r\n"); out.write ("\r\n"); out.write ("\r\n"); out.write ("\r\n"); out.write ("\r\n"); out.write ("\r\n"); out.write ("\r\n"); out.write ("\r\n"); out.write ("\r\n"); out.write ("\r\n"); if(mds.getOaiPrefix().equalsIgnoreCase("oai_dc")){ HashMap suggestionMap = generateMetadataElementSortList(fileName,"ASCII",linkName,outx); if(suggestionMap.size()>0){ boolean status = compareElement(fileName,collectionFolder,suggestionMap,linkName); if(status){ out.write(""); createIncompletedList(fileName,linkName,collectionFolder,outx); } else{ out.write(""); createIncompletedList(fileName,linkName,collectionFolder,outx); } else{ out.write(""); createIncompletedList(fileName,linkName,collectionFolder,outx); } else{ out.write("\r\n"); out.write (""); out.write ("\r\n"); out.write ("
Total Number of Records\r\n"); out.write (""+mds.getDocNum()+"\r\n"); out.write ("
Unique Values\r\n"); out.write (""+dm.getDistinctNumber(name)+"\r\n"); out.write ("
Total times element used\r\n"); out.write (""+dm.getFrequency(name) +"\r\n"); out.write ("
No. of records containing element\r\n"); out.write (" "+dm.getDocumentUsedElement(name)+"\r\n"); out.write ("
Completeness
"); out.write ("
"+dm.Mean(name) +"%\r\n"); out.write ("
Minimum "+name +" usage in any record
\r\n"); out.write ("
"+dm.getMinRange(name) +"\r\n"); out.write ("
Maximum "+name +" usage in any record
\r\n"); out.write ("
"+dm.getMaxRange(name) +"\r\n"); out.write ("
Average "+name +" usage/record
\r\n"); out.write ("
"+dm.Average(name) +"\r\n"); out.write ("
Mode of "+name +" usage/record
\r\n"); out.write ("
"+dm.getMode(name) +"\r\n"); out.write ("
Coverage of the mode of "+name +" usage/record
\r\n"); out.write ("
"+dm.ModeFrequency(name) +"%\r\n"); out.write ("
View Potential Duplicate List"); if(percentage<100){ out.write ("Records missing "+fileName+"No Records Missing "+ fileName); } } else{ out.write("
No Potential Duplicates"); if(percentage<100){ out.write ("Records missing "+fileName+"No Records Missing "+ fileName); } } } else{ out.write("
No Potential Duplicates"); if(percentage<100){ out.write ("Records missing "+fileName+"No Records Missing "+ fileName); } } generateMetadataElementSortList(fileName,"Frequency-based",linkName,outx); } else{ HashMap suggestionMap = generateMetadataElementSortListNoIdentifier(fileName,"ASCII",linkName,outx); generateMetadataElementSortListNoIdentifier(fileName,"Frequency-based",linkName,outx); } out.write ("
"+"View Full Frequency Sorted list" + " "+"View Full ASCII Sorted list

\r\n"); out.write ("\r\n"); out.write ("\r\n"); out.write ("\r\n"); out.write ("\r\n"); String[] temp = dm.getSortList(name,"ASCII"); String[] temp2 = {" "," "," "," "," "}; int length = 0; if(temp.length>=5){ length=5; } else if(temp.length<5){ length = temp.length; } for(int i =0; i\r\n"); if(!temp2[a].equals(" ")){ out.write ("\r\n"); } String[] temp3 ={" "," "," "," "," "}; length = 0; int start = temp.length; if(temp.length>=5){ length= 5; } else if(temp.length<5){ length = temp.length; } for(int i = length; i>0; i--){ temp3[i-1] = temp[start-1]; start--; } out.write ("\r\n"); out.write ("\r\n"); int counter = temp.length; start = temp.length; x = temp3.length; for(int a = 0; a\r\n"); if(!temp3[a].equals(" ")){ out.write ("\r\n"); } } out.write ("
ASCII-Based\r\n"); out.write ("First Five\r\n"); out.write ("
"+(a+1)+"\r\n"); } else{ out.write (" \r\n"); } if(temp2[a].startsWith("http://")){ if(temp2[a].length()>60){ out.write (""+temp2[a].substring(0, 60)+"..."); } else { out.write (""+temp2[a]+""); } } else{ char singleChar = temp2[a].charAt(0); if(temp2[a].length()>61 ){ if(temp2[a].startsWith(" ") && ((int)singleChar!=65279)){ temp2[a] = temp2[a].substring(1); out.write (""+spaceLeft+space+spaceRight+temp2[a].substring(0, 60)+" ... \r\n"); } else if (((int)singleChar==65279)){ temp2[a] = temp2[a].substring(1); out.write (""+spaceLeft+oddChar+spaceRight+temp2[a].substring(0, 60)+" ... \r\n"); } else{ out.write (""+temp2[a].substring(0, 60)+" ... \r\n"); } } else { if(temp2[a].startsWith(" ")&& ((int)singleChar!=65279)){ temp2[a] = temp2[a].substring(1); out.write (""+spaceLeft+space+spaceRight+temp2[a]+"\r\n"); } else if (((int)singleChar==65279)){ temp2[a] = temp2[a].substring(1); out.write (""+spaceLeft+oddChar+spaceRight+temp2[a]+"\r\n"); } else{ out.write (""+temp2[a]+"\r\n"); } } } out.write ("
......\r\n"); out.write ("Last Five\r\n"); out.write ("
"+(start-length+1+a)); } else{ out.write (" "); } if(temp3[a].startsWith("http://")){ if(temp3[a].length()>60){ out.write (""+temp3[a].substring(0, 60)+"..."); } else { out.write (""+temp3[a]+""); } } else{ char singleChar = temp3[a].charAt(0); if(temp3[a].length()>61){ if(temp3[a].startsWith(" ") && (int)singleChar!=65279){ temp3[a] = temp3[a].substring(1); out.write (""+spaceLeft+space+spaceRight+temp3[a].substring(0, 60)+" ... \r\n"); } else if((int)singleChar==65279){ temp3[a] = temp3[a].substring(1); out.write (""+spaceLeft+oddChar+spaceRight+temp3[a].substring(0, 60)+" ... \r\n"); } else{ out.write (""+temp3[a].substring(0, 60)+" ... \r\n"); } } else{ if(temp3[a].startsWith(" ")&& (int)singleChar!=65279){ temp3[a] = temp3[a].substring(1); out.write (""+spaceLeft+space+spaceRight+temp3[a]+"\r\n"); } else if((int)singleChar==65279){ out.write (""+spaceLeft+oddChar+spaceRight+temp3[a]+"\r\n"); } else{ out.write (""+temp3[a]+"\r\n"); } } out.write ("

\r\n"); out.write ("\r\n"); out.write ("\r\n"); out.write ("\r\n"); out.write ("\r\n"); HashMap xMap = dm.getDistinctValueMap(name); temp = dm.getSortList(name,"Frequency-based"); temp2 = new String[] {" "," "," "," "," "}; length = 0; if(temp.length>=5){ length=5; } else if(temp.length<5){length = temp.length;} for(int i =0; i\r\n"); if(!temp2[a].equals(" ")){ char singleChar = temp2[a].charAt(0); out.write ("\r\n"); } temp3 = new String[]{" "," "," "," "," "}; length = 0; start = temp.length; if(temp.length>=5){length= 5;} else if(temp.length<5){length = temp.length;} for(int i = length; i>0; i--){ temp3[i-1] = temp[start-1]; start--; } out.write ("\r\n"); out.write ("\r\n"); x = temp3.length; start = temp.length; for(int a = 0; a\r\n"); if(!temp3[a].equals(" ")){ out.write ("\r\n"); } out.write ("
Frequency-Based:\r\n"); out.write ("First Five\r\n"); out.write ("
"+(a+1)+". (No. of occurrences: "+((Integer)xMap.get(temp2[a])).toString()+")\r\n"); if(temp2[a].startsWith("http://") && (int)singleChar != 65279){ if(temp2[a].length()>61){ out.write (""+temp2[a].substring(0, 60)+"..."); } else { out.write (""+temp2[a]+""); } } else{ if(temp2[a].length()>61){ if(temp2[a].startsWith(" ")){ temp2[a] = temp2[a].substring(1); out.write (""+spaceLeft+space+spaceRight+temp2[a].substring(0, 60)+" ... \r\n"); } else if((int)singleChar == 65279){ temp2[a] = temp2[a].substring(1); out.write (""+spaceLeft+oddChar+spaceRight+temp2[a].substring(0, 60)+" ... \r\n"); } else{ out.write (""+temp2[a].substring(0, 60)+" ... \r\n"); } } else{ if(temp2[a].startsWith(" ")){ temp2[a] = temp2[a].substring(1); out.write (""+spaceLeft+space+spaceRight+temp2[a]+"\r\n"); } else if((int)singleChar == 65279){ temp2[a] = temp2[a].substring(1); out.write (""+spaceLeft+oddChar+spaceRight+temp2[a]+"\r\n"); } else{ out.write (""+temp2[a]+"\r\n"); } } } } else{ out.write (" \r\n"); out.write ("\r\n"); } out.write ("
......\r\n"); out.write ("Last Five\r\n"); out.write ("
"+(start-length+1+a)+". (No. of occurrences: "+((Integer)xMap.get(temp3[a])).toString()+")\r\n"); char singleChar = temp3[a].charAt(0); if(temp3[a].startsWith("http://")){ if(temp3[a].length()>60){ out.write (""+temp3[a].substring(0, 60)+"..."); } else { out.write (""+temp3[a]+""); } } else{ if(temp3[a].length()>61){ if(temp3[a].startsWith(" ")){ temp3[a] = temp3[a].substring(1); out.write (""+spaceLeft+space+spaceRight+temp3[a].substring(0, 60)+" ... \r\n"); } else if((int)singleChar == 65279){ temp3[a] = temp3[a].substring(1); out.write (""+spaceLeft+oddChar+spaceRight+temp3[a].substring(0, 60)+" ... \r\n"); } else{ out.write (""+temp3[a].substring(0, 60)+" ... \r\n"); } } else{ if(temp3[a].startsWith(" ")){ out.write (""+spaceLeft+space+spaceRight+temp3[a]+"\r\n"); } else if((int)singleChar == 65279){ temp3[a] = temp3[a].substring(1); out.write (""+spaceLeft+oddChar+spaceRight+temp3[a]+"\r\n"); } else{ out.write (""+temp3[a]+"\r\n"); } } } } else{ //out.write (""+(a+1)+"\r\n"); out.write (" \r\n"); out.write ("\r\n"); } out.write ("
\r\n"); /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// out.write ("

Summary"); out.write ("»"); if(linkName.equalsIgnoreCase("dublin")){ out.write ("Metadata Detail ("+metadataSet1+")"); } else if (linkName.equalsIgnoreCase("extracted")){ out.write ("Metadata Detail ("+metadataSet2+")"); } else{ out.write ("Metadata Detail ("+linkName+")"); } //out.write(str+"

"); out.write ("\r\n"); //Close the output stream out.close(); fstream.close(); }catch (Exception e){ e.printStackTrace(printWriter); } printWriter.write(""); printWriter.flush(); //System.out.println("detail ends"); } private HashMap generateMetadataElementSortList(String title, String sort, String metadataSetName,PrintWriter outx){ String fileName = title+"_"+sort; String collectionFolder = mds.getCollectionName(); String IDENTIFIER = linkIdentifier; SearchLink sl = new SearchLink(mds.StatsDirectory); HashMap suggestionMap = new HashMap(); HashMap valueMap = sl.createValueMap(title,collectionFolder); HashMap linkMap = sl.createLinkMap(IDENTIFIER,collectionFolder); HashMap internalIDMap = dm.getInternalIdentifier(title); boolean status = false; try{ printWriter.write(""); printWriter.flush(); FileWriter fstream = new FileWriter(destination+fileName+".html"); BufferedWriter out = new BufferedWriter(fstream); out.write(""); out.write ("\r\n"); out.write("\r\nMetadata Element Sort List\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("
Summary"); out.write("»"); //////////////////////////////////////////////////////////////////////////////////// if(metadataSetName.equalsIgnoreCase("dublin")){ out.write ("Metadata Detail ("+metadataSet1+")"); } else if (metadataSetName.equalsIgnoreCase("extracted")){ out.write ("Metadata Detail ("+metadataSet2+")"); } else{ out.write ("Metadata Detail ("+metadataSetName+")"); } out.write("»"); out.write(""+title+"
"); out.write("
Please send feedback about the Mat tool

"); String[] list = dm.getSortList(title, sort); if(list.length>=1){ if(sort.equals("ASCII")){ out.write("\r\n"); out.write ("

"+ title+"

\r\n"); out.write ("\r\n"); if(list[i].length()>=201){ if(list[i].startsWith("http://")){ if(title.equals(IDENTIFIER)){ if(list[i].length()>=100){ out.write("\r\n"); out.write("

Summary"); out.write("»"); //////////////////////////////////////////////////////////////////////////////////// if(metadataSetName.equalsIgnoreCase("dublin")){ out.write ("Metadata Detail ("+metadataSet1+")"); } else if (metadataSetName.equalsIgnoreCase("extracted")){ out.write ("Metadata Detail ("+metadataSet2+")"); } else{ out.write ("Metadata Detail ("+metadataSetName+")"); } out.write("»"); out.write(""+title+"

"); out.write("\r\n"); out.close(); fstream.close(); }catch(Exception e){ e.printStackTrace(outx); } if(status){ return suggestionMap; } else{ return new HashMap(); } } private void setupMetadataLink(String fileName){ String destination = globalProperty.getGSDL3Home()+fileSeparator+"mat"+fileSeparator+"script"+fileSeparator+fileName+".xml"; try{ DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder(); Document doc = docBuilder.newDocument(); //doc = docBuilder.parse (new File("/research/cc108/greenstone3/web/mat/script/"+fileName+".xml")); doc = docBuilder.parse (destination); Element rootNode = doc.getDocumentElement(); NodeList listOfName = rootNode.getElementsByTagName("metadataElement"); url = new HashMap(); for(int i = 0; i"); out.write ("\r\n"); out.write("\r\nIncompleted Document List\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("

Summary"); out.write("»"); if(metadataSetName.equalsIgnoreCase("dublin")){ out.write ("Metadata Detail ("+metadataSet1+")"); } else if (metadataSetName.equalsIgnoreCase("extracted")){ out.write ("Metadata Detail ("+metadataSet2+")"); } else{ out.write ("Metadata Detail ("+metadataSetName+")"); } out.write("»"); out.write(""+fileName+""); out.write("

"+fileName+" does not appear in the following documents

"); int counter = 0; Set s = hp.keySet(); Iterator i = s.iterator(); out.write("
ASCII Sort\r\n"); out.write ("Element Values\r\n"); out.write ("Source Documents\r\n"); out.write ("Internal Link\r\n"); out.write ("\r\n"); int counter = 0; for(int i = 0; i\r\n"); } else if((list.length>5) && (list.length-5==i)){ out.write ("\r\n"); } InternalLink il= (InternalLink)internalIDMap.get(list[i]); ArrayList alist2 = il.retrieveList(); String id = (String)alist2.get(0); id = id.substring(4); out.write("
"+(counter+1)+""+list[i].substring(0,100)+"..."); } else{ out.write(""+(counter+1)+""+list[i]+""); } out.write("Source\r\n"); } else{ ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],IDENTIFIER, collectionFolder,valueMap,linkMap); if(list[i].length()>=100){ String url = (String)alist.get(0); if(alist.size()==1){ out.write(""+(counter+1)+""+list[i].substring(0,100)+"..."); if(url.startsWith("http://")){ out.write("Source\r\n"); }else{ out.write("Source\r\n"); } } else if(alist.size()>1){ out.write(""+(counter+1)+""+list[i].substring(0,100)+"..."); if(url.startsWith("http://")){ out.write("Source...\r\n"); }else{ out.write("Source\r\n"); } } else{ out.write(""+(counter+1)+""+list[i].substring(0,100)+""); out.write("No Source Available\r\n"); } suggestionMap.put(list[i], url); } else{ String url = (String)alist.get(0); if(alist.size()==1){ out.write(""+(counter+1)+""+list[i]+""); //out.write("Source\r\n"); if(url.startsWith("http://")){ out.write("Source\r\n"); }else{ out.write("Source\r\n"); } } else if(alist.size()>1){ out.write(""+(counter+1)+""+list[i]+""); //out.write("Source...\r\n"); if(url.startsWith("http://")){ out.write("Source...\r\n"); }else{ out.write("Source\r\n"); } } else{ out.write(""+(counter+1)+""+list[i]+""); out.write("No Source Available\r\n"); } suggestionMap.put(list[i], url); } } } else{ ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],IDENTIFIER, collectionFolder,valueMap,linkMap); ////////////////////////////////////// if(list[i].startsWith(" ")){ String elements = list[i]; list[i] = list[i].substring(1); if(alist.size()==1){ out.write(""+(counter+1)+""+spaceLeft+space+spaceRight+list[i].substring(0, 200)+"..."+ "Source\r\n"); suggestionMap.put(elements, (String)alist.get(0)); } else if(alist.size()>1){ out.write(""+(counter+1)+""+spaceLeft+space+spaceRight+list[i].substring(0, 200)+"..."+ "Source...\r\n"); suggestionMap.put(elements, (String)alist.get(0)); } else{ out.write(""+(counter+1)+""+spaceLeft+space+spaceRight+list[i].substring(0, 200)+"...No Source Available \r\n"); suggestionMap.put(elements, "No Source Available"); } } else{ if(alist.size()==1){ out.write(""+(counter+1)+""+list[i].substring(0, 200)+"..." + "Source\r\n"); suggestionMap.put(list[i], (String)alist.get(0)); } else if(alist.size()>1){ out.write(""+(counter+1)+""+list[i].substring(0, 200)+"..." + "Source...\r\n"); suggestionMap.put(list[i], (String)alist.get(0)); } else{ out.write(""+(counter+1)+""+list[i].substring(0, 200)+"... No Source Available\r\n"); suggestionMap.put(list[i],"No Source Available"); } } } } else{ if(list[i].startsWith("http://")){ if(title.equals(IDENTIFIER)){ if(list[i].length()>=100){ out.write(""+(counter+1)+""+list[i].substring(0,100)+"..."); } else{ out.write(""+(counter+1)+""+list[i]+""); } out.write("Source\r\n"); } else{ ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],IDENTIFIER, collectionFolder,valueMap,linkMap); if(list[i].length()>=100){ if(alist.size()==1){ out.write(""+(counter+1)+""+list[i].substring(0,100)+"..."); out.write("Source\r\n"); } else if(alist.size()>1){ out.write(""+(counter+1)+""+list[i].substring(0,100)+"..."); out.write("Source...\r\n"); } else{ out.write(""+(counter+1)+""+list[i]+""); out.write("No Source Available\r\n"); } } else{ if(alist.size()==1){ out.write(""+(counter+1)+""+list[i]+""); out.write("Source\r\n"); suggestionMap.put(list[i], (String)alist.get(0)); } else if(alist.size()>1){ out.write(""+(counter+1)+""+list[i]+""); out.write("Source...\r\n"); suggestionMap.put(list[i], (String)alist.get(0)); } else{ out.write(""+(counter+1)+""+list[i]+""); out.write("No Source Available\r\n"); suggestionMap.put(list[i],"No Source Available"); } } } } else{ ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],IDENTIFIER, collectionFolder,valueMap,linkMap); if(list[i].startsWith(" ")){ String elements = list[i]; list[i] = list[i].substring(1); if(alist.size()==1){ out.write(""+(counter+1)+""+spaceLeft+space+spaceRight+list[i]+ "Source\r\n"); suggestionMap.put(list[i], (String)alist.get(0)); } else if(alist.size()>1){ out.write(""+(counter+1)+""+spaceLeft+space+spaceRight+list[i]+ "Source...\r\n"); suggestionMap.put(list[i], (String)alist.get(0)); } else{ out.write(""+(counter+1)+""+spaceLeft+space+spaceRight+list[i]+"No Source Available\r\n"); suggestionMap.put(elements,"No Source Available"); } } else{ if(alist.size()==1){ out.write(""+(counter+1)+""+list[i]+"Source\r\n"); suggestionMap.put(list[i],(String)alist.get(0)); } else if(alist.size()>1){ out.write(""+(counter+1)+""+list[i]+"Source...\r\n"); suggestionMap.put(list[i],(String)alist.get(0)); } else{ out.write(""+(counter+1)+""+list[i]+"No Source Available\r\n"); suggestionMap.put(list[i],"No Source Available"); } //out.write(""+(counter+1)+""+list[i]+"\r\n"); } } } counter++; out.write("View"); } if(suggestionMap.size()>1 && !title.equals(IDENTIFIER)){ status = true; } } else{ out.write("\r\n"); out.write ("

"+ title+"

\r\n"); out.write ("\r\n"); out.write("
 \r\n"); out.write ("Frequency\r\n"); out.write ("Element Values\r\n"); out.write ("Source Documents\r\n"); out.write ("Internal Link\r\n"); out.write ("\r\n"); HashMap xMap = dm.getDistinctValueMap(title); int counter = 0; for(int i = 0; i\r\n"); } else if((list.length>5) && (list.length-5==i)){ out.write ("\r\n"); } InternalLink il= (InternalLink)internalIDMap.get(list[i]); ArrayList alist2 = il.retrieveList(); String id = (String)alist2.get(0); id = id.substring(4); out.write("
"+(counter+1)+" "+((Integer)xMap.get(list[i])).toString()+"\r\n"); if(list[i].length()>=201){ if(list[i].startsWith("http://")){ if(title.equals(IDENTIFIER)){ if(list[i].length()>=100){ out.write(""+list[i].substring(0,100)+""); } else{ out.write(""+list[i]+""); } out.write("Source\r\n"); } else{ ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],IDENTIFIER, collectionFolder,valueMap,linkMap); if(list[i].length()>=100){ if(alist.size()==1){ out.write(""+list[i].substring(0,100)+""); out.write("Source\r\n"); } else if (alist.size()>1){ out.write(""+list[i].substring(0,100)+""); out.write("Source...\r\n"); } else{ out.write(""+list[i].substring(0,100)+""); out.write("No Source Available\r\n"); } } else{ out.write(""+list[i]+""); } } } else{ ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],IDENTIFIER, collectionFolder,valueMap,linkMap); if(list[i].startsWith(" ")){ list[i]=list[i].substring(1); if(alist.size()==1){ out.write(""+spaceLeft+space+spaceRight+list[i].substring(0,200)+"..."+ "Source\r\n"); } else if(alist.size()>1){ out.write(""+spaceLeft+space+spaceRight+list[i].substring(0,200)+"..."+ "Source...\r\n"); } else{ out.write(""+spaceLeft+space+spaceRight+list[i].substring(0,200)+"...No Source Available\r\n"); } } else{ if(alist.size()==1){ out.write(""+list[i].substring(0,200)+"..." + "Source\r\n"); } else if(alist.size()>1){ out.write(""+list[i].substring(0,200)+"..."+ "Source...\r\n"); } else{ out.write(""+list[i].substring(0,200)+"...No Source Available\r\n"); } } } } else{ if(list[i].startsWith("http://")){ if(title.equals(IDENTIFIER)){ if(list[i].length()>=100){ out.write(""+list[i].substring(0,100)+""); } else{ out.write(""+list[i]+""); } out.write("Source\r\n"); } else{ //ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],"dc.Identifier", collectionFolder,valueMap,linkMap); ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],IDENTIFIER, collectionFolder,valueMap,linkMap); if(list[i].length()>=100){ if(alist.size()==1){ out.write(""+list[i].substring(0,100)+""); out.write("Source\r\n"); } else if(alist.size()>1){ out.write(""+list[i].substring(0,100)+""); out.write("Source...\r\n"); } else{ out.write(""+list[i].substring(0,100)+""); out.write("No Source Available\r\n"); } } else{ if(alist.size()==1){ out.write(""+list[i]+""); out.write("Source\r\n"); } else if(alist.size()>1){ out.write(""+list[i]+""); out.write("Source...\r\n"); } else{ out.write(""+list[i]+""); out.write("No Source Available\r\n"); } } } } else{ ArrayList alist= sl.CreateIndentifierLinkPage(title, list[i],IDENTIFIER, collectionFolder,valueMap,linkMap); if(list[i].startsWith(" ")){ list[i]=list[i].substring(1); if(alist.size()==1){ out.write(""+spaceLeft+space+spaceRight+list[i]+ "Source\r\n"); } else if(alist.size()>1){ out.write(""+spaceLeft+space+spaceRight+list[i]+ "Source...\r\n"); } else{ out.write(""+spaceLeft+space+spaceRight+list[i]+"No Source Available\r\n"); } } else{ if(alist.size()==1){ out.write(""+list[i]+"Source\r\n"); } else if(alist.size()>1){ out.write(""+list[i]+"Source...\r\n"); } else{ out.write(""+list[i]+"No Source Available\r\n"); } } } } counter++; out.write("View"); } } out.write("
\r\n"); } else{ out.write("

Sorry! The list is empty

"); } out.write("
Document IDSource Link\r\n"); while(i.hasNext()){ counter++; String keys = (String)i.next(); InternalLink il = (InternalLink) hp.get(keys); ArrayList alist = il.retrieveList(); String url = (String)alist.get(0); out.write("
"+counter+""+url+"\r\n"); } out.write("
\r\n"); out.write("

Summary"); out.write("»"); if(metadataSetName.equalsIgnoreCase("dublin")){ out.write ("Metadata Detail ("+metadataSet1+")"); } else if (metadataSetName.equalsIgnoreCase("extracted")){ out.write ("Metadata Detail ("+metadataSet2+")"); } else{ out.write ("Metadata Detail ("+metadataSetName+")"); } out.write("»"); out.write(""+fileName+""); out.close(); fstream.close(); }catch(IOException ex){ex.printStackTrace();} } public boolean compareElement(String fileName,String collectionFolder,HashMap suggestionMap, String metadataSetName){ boolean status = false; Set kset = suggestionMap.keySet(); ArrayList arrayList = new ArrayList(); Iterator is = kset.iterator(); while(is.hasNext()){ arrayList.add((String)is.next()); } HashMap distanceMap = new HashMap(); int totalLength = 0; int arrayListLength = arrayList.size(); int counter = 0; printWriter.write(""); printWriter.flush(); double distance ; try{ for(int i = 0; i1){ if(_keywordClone.charAt(0)==' '){ _keywordClone = _keywordClone.substring(1); spaceCounterFront1++; } else{ if(_keywordClone.charAt(_keywordClone.length()-1)==' '){ _keywordClone = _keywordClone.substring(0,_keywordClone.length()-1); spaceCounterEnd1++; } else{ break; } } } else{ break; } } int keywordLength = keywordClone.length(); totalLength = totalLength + keywordLength; for(int j = i+1; j1){ if(_keywordClone2.charAt(0)==' '){ _keywordClone2 = _keywordClone2.substring(1); spaceCounterFront2++; } else{ if(_keywordClone2.charAt(_keywordClone2.length()-1)==' '){ _keywordClone2 = _keywordClone2.substring(0,_keywordClone2.length()-1); spaceCounterEnd2++; } else{ break; } }} else{ break; } } if(counter == 50000){ printWriter.write(""); printWriter.flush(); counter = 0; } int keyword2Length = keywordClone2.length(); if(keywordLength>(keyword2Length+2) || (keywordLength+2)"); printWriter.flush(); if(distanceMap.size()!=0){ generateHTML(distanceMap,fileName,metadataSetName); status = true; } }catch(Exception ex){ex.printStackTrace();} return status; } private void generateHTML(HashMap distanceMap, String fileName,String metadataSetName){ printWriter.write(""); printWriter.flush(); String collectionFolder = mds.getCollectionName(); try{ FileWriter fstream = new FileWriter(destination+fileName+"_Suggestion.html"); BufferedWriter out = new BufferedWriter(fstream); out.write(""); out.write ("\r\n"); out.write("\r\nPotential Duplicate List\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("

Summary"); out.write("»"); if(metadataSetName.equalsIgnoreCase("dublin")){ out.write ("Metadata Detail ("+metadataSet1+")"); } else if (metadataSetName.equalsIgnoreCase("extracted")){ out.write ("Metadata Detail ("+metadataSet2+")"); } else{ out.write ("Metadata Detail ("+metadataSetName+")"); } out.write("»"); out.write(""+fileName+""); out.write("

Potential Duplicate List

"); out.write("\r\n"); Set s = distanceMap.keySet(); Iterator i = s.iterator(); while(i.hasNext()){ String keyword = (String)i.next(); InternalLink il = (InternalLink)distanceMap.get(keyword); ArrayList alist = il.retrieveNodeList(); out.write(""); } out.write("
Original TextSource Link\r\n"); for(int a = 0; a"+il2.getValue()+""+url+""); } out.write("
\r\n"); out.write("

Summary"); out.write("»"); if(metadataSetName.equalsIgnoreCase("dublin")){ out.write ("Metadata Detail ("+metadataSet1+")"); } else if (metadataSetName.equalsIgnoreCase("extracted")){ out.write ("Metadata Detail ("+metadataSet2+")"); } else{ out.write ("Metadata Detail ("+metadataSetName+")"); } out.write("»"); out.write(""+fileName+""); out.write("\r\n"); out.close(); fstream.close(); }catch(IOException ex){ex.printStackTrace();} } public int calculateEditDistance(char[] args1, char[] args2){ int n = args1.length; int m = args2.length; if (n == 0) { return m; } else if (m == 0) { return n; } int[] p = new int[n + 1]; int[] d = new int[n + 1]; int[] _d; int i; int j; int cost; // cost for (i = 0; i <= n; i++) { p[i] = i; } for (j = 1; j <= m; j++) { d[0] = j; for (i = 1; i <= n; i++) { cost = (args1[i-1] == args2[j-1]) ? 0 : 1; d[i] = Math.min(Math.min(d[i - 1] + 1, p[i] + 1), p[i - 1] + cost); } // copy current distance counts to 'previous row' distance counts _d = p; p = d; d = _d; } // our last action in the above loop was to switch d and p, so p now // actually has the most recent cost counts return p[n]; } private String removeUnusedCharacter(String target){ //printWriter.write(""); //printWriter.flush(); // remove \n while(true){ if(target.indexOf("\\n")!=-1){ target = target.replaceFirst("\\\\n",""); } else{ break; } } //remove spaces at the end of string if(target.length()>1){ while(true){ if(target.length()>1){ if(target.charAt(target.length()-1)==' '){ target = target.substring(0,target.length()-1); } else{ break; } } else{ break; } } } //remove leading spaces if(target.length()>1){ while(true){ if(target.length()>1){ if(target.charAt(0)==' '){ target = target.substring(1,target.length()); } else{ break; }} else{ break; } } } //remove multiple spaces between words while(true){ if(target.indexOf(" ")!=-1){ target = target.replaceFirst(" "," "); } else{ break; } } return target; } private costModel removeUnusedCharacter(String target, double cost){ costModel cm = new costModel(); // remove \n plus cost while(true){ if(target.indexOf("\\n")!=-1){ target = target.replaceFirst("\\\\n",""); cost = cost + 0.2; } else{ break; } } //remove spaces at the end of string if(target.length()>1){ while(true){ if(target.length()>1){ if(target.charAt(target.length()-1)==' '){ target = target.substring(0,target.length()-1); cost = cost + 0.2; } else{ break; }} else{ break; } } } //remove leading spaces if(target.length()>1){ while(true){ if(target.length()>1){ if(target.charAt(0)==' '){ target = target.substring(1,target.length()); cost = cost + 0.2; } else{ break; }} else{ break; } } } //remove multiple spaces between words while(true){ if(target.indexOf(" ")!=-1){ target = target.replaceFirst(" "," "); cost = cost + 0.2; } else{ break; } } cm.setCost(cost); cm.setString(target); return cm; } private HashMap generateMetadataElementSortListNoIdentifier(String title, String sort, String metadataSetName,PrintWriter outx){ String fileName = title+"_"+sort; String collectionFolder = mds.getCollectionName(); System.out.println("level 3"); SearchLink sl = new SearchLink(mds.StatsDirectory); System.out.println("level 4"); HashMap suggestionMap = new HashMap(); HashMap internalIDMap = dm.getInternalIdentifier(title); boolean status = false; try{ printWriter.write(""); printWriter.flush(); FileWriter fstream = new FileWriter(destination+fileName+".html"); BufferedWriter out = new BufferedWriter(fstream); out.write(""); out.write ("\r\n"); out.write("\r\nMetadata Element Sort List\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("

Summary"); out.write("»"); if(metadataSetName.equalsIgnoreCase("dublin")){ out.write ("Metadata Detail ("+metadataSet1+")"); } else if (metadataSetName.equalsIgnoreCase("extracted")){ out.write ("Metadata Detail ("+metadataSet2+")"); } else{ out.write ("Metadata Detail ("+metadataSetName+")"); } out.write("»"); out.write(""+title+"
"); out.write("
Please send feedback about the Mat tool

"); String[] list = dm.getSortList(title, sort); if(list.length>=1){ if(sort.equals("ASCII")){ out.write("\r\n"); out.write ("

"+ title+"

\r\n"); out.write ("\r\n"); if(list[i].length()>=201){ if(list[i].startsWith("http://")){ if(list[i].length()>=100){ out.write("\r\n"); out.write("

Summary"); out.write("»"); if(metadataSetName.equalsIgnoreCase("dublin")){ out.write ("Metadata Detail ("+metadataSet1+")"); } else if (metadataSetName.equalsIgnoreCase("extracted")){ out.write ("Metadata Detail ("+metadataSet2+")"); } else{ out.write ("Metadata Detail ("+metadataSetName+")"); } out.write("»"); out.write(""+title+"

"); out.write("\r\n"); out.close(); fstream.close(); }catch(Exception e){ e.printStackTrace(outx); } return new HashMap(); } private void createIncompletedListNoIdentifier(String fileName, String metadataSetName, String collectionFolder,PrintWriter outx){ //HashMap hp = dm.getIdentifierLink("dc.Identifier"); HashMap hp = dm.getIdentifierLinkNoIdentifier(); String[] ids = dm.getDocumentIDList(fileName); System.out.println("Length: "+ids.length+" "+fileName+ " "+hp.size()); Set st = hp.keySet(); Iterator it = st.iterator(); while(it.hasNext()){ System.out.println(it.next()); } for(int i = 0; i"); out.write ("\r\n"); out.write("\r\nIncompleted Document List\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("\r\n"); out.write("

Summary"); out.write("»"); if(metadataSetName.equalsIgnoreCase("dublin")){ out.write ("Metadata Detail ("+metadataSet1+")"); } else if (metadataSetName.equalsIgnoreCase("extracted")){ out.write ("Metadata Detail ("+metadataSet2+")"); } else{ out.write ("Metadata Detail ("+metadataSetName+")"); } out.write("»"); out.write(""+fileName+""); out.write("

"+fileName+" does not appear in the following documents

"); int counter = 0; System.out.println("link....."); Set s = hp.keySet(); Iterator i = s.iterator(); System.out.println("link2....."); out.write("
ASCII Sort\r\n"); out.write ("Element Values\r\n"); out.write ("Source Documents\r\n"); out.write ("Internal Link\r\n"); out.write ("\r\n"); int counter = 0; for(int i = 0; i\r\n"); } else if((list.length>5) && (list.length-5==i)){ out.write ("\r\n"); } InternalLink il= (InternalLink)internalIDMap.get(list[i]); ArrayList alist2 = il.retrieveList(); String id = (String)alist2.get(0); id = id.substring(4); out.write("
"+(counter+1)+""+list[i].substring(0,100)+""); out.write("Source\r\n"); } else{ out.write(""+(counter+1)+""+list[i]+""); out.write("Source\r\n"); } } else{ if(list[i].startsWith(" ")){ String elements = list[i]; list[i] = list[i].substring(1); out.write(""+(counter+1)+""+spaceLeft+space+spaceRight+list[i].substring(0, 200)+"...No Source Available \r\n"); } else{ out.write(""+(counter+1)+""+list[i].substring(0, 200)+"... No Source Available\r\n"); } } } else{ if(list[i].startsWith("http://")){ if(list[i].length()>=100){ out.write(""+(counter+1)+""+list[i].substring(0,100)+"..."); } else{ out.write(""+(counter+1)+""+list[i]+""); } out.write("Source\r\n"); } else{ if(list[i].startsWith(" ")){ String elements = list[i]; list[i] = list[i].substring(1); out.write(""+(counter+1)+""+spaceLeft+space+spaceRight+list[i]+"No Source Available\r\n"); suggestionMap.put(elements,"No Source Available"); } else{ out.write(""+(counter+1)+""+list[i]+"No Source Available\r\n"); } } } counter++; out.write("View"); } } else{ out.write("\r\n"); out.write ("

"+ title+"

\r\n"); out.write ("\r\n"); out.write("
 \r\n"); out.write ("Frequency\r\n"); out.write ("Element Values\r\n"); out.write ("Source Documents\r\n"); out.write ("Internal Link\r\n"); out.write ("\r\n"); HashMap xMap = dm.getDistinctValueMap(title); int counter = 0; for(int i = 0; i\r\n"); } else if((list.length>5) && (list.length-5==i)){ out.write ("\r\n"); } InternalLink il= (InternalLink)internalIDMap.get(list[i]); ArrayList alist2 = il.retrieveList(); String id = (String)alist2.get(0); id = id.substring(4); out.write("
"+(counter+1)+" "+((Integer)xMap.get(list[i])).toString()+"\r\n"); if(list[i].length()>=201){ if(list[i].startsWith("http://")){ if(list[i].length()>=100){ out.write(""+list[i].substring(0,100)+""); } else{ out.write(""+list[i]+""); } out.write("Source\r\n"); } else{ if(list[i].startsWith(" ")){ list[i]=list[i].substring(1); out.write(""+spaceLeft+space+spaceRight+list[i].substring(0,200)+"...No Source Available\r\n"); } else{ out.write(""+list[i].substring(0,200)+"...No Source Available\r\n"); } } } else{ if(list[i].startsWith("http://")){ if(list[i].length()>=100){ out.write(""+list[i].substring(0,100)+""); } else{ out.write(""+list[i]+""); } out.write("Source\r\n"); } else{ if(list[i].startsWith(" ")){ list[i]=list[i].substring(1); out.write(""+spaceLeft+space+spaceRight+list[i]+"No Source Available\r\n"); } else{ out.write(""+list[i]+"No Source Available\r\n"); } } } counter++; out.write("View"); } } out.write("
\r\n"); } else{ out.write("

Sorry! The list is empty

"); } out.write("
Document IDSource Link\r\n"); while(i.hasNext()){ //printWriter.write(""); System.out.println("link.....3"); counter++; String keys = (String)i.next(); System.out.println("link.....4"+keys); InternalLink il = (InternalLink) hp.get(keys); ArrayList alist = il.retrieveList(); System.out.println("link.....5"+alist.size()); String url = (String)alist.get(0); out.write("
"+counter+""+url+"\r\n"); } out.write("
\r\n"); out.write("

Summary"); out.write("»"); if(metadataSetName.equalsIgnoreCase("dublin")){ out.write ("Metadata Detail ("+metadataSet1+")"); } else if (metadataSetName.equalsIgnoreCase("extracted")){ out.write ("Metadata Detail ("+metadataSet2+")"); } else{ out.write ("Metadata Detail ("+metadataSetName+")"); } out.write("»"); out.write(""+fileName+""); out.close(); fstream.close(); }catch(IOException ex){ex.printStackTrace();} } /////////////////////////////////////////////////////////////////////////////////////// }