[17184] | 1 | package org.greenstone3.ms;
|
---|
| 2 |
|
---|
| 3 | import java.io.BufferedWriter;
|
---|
| 4 | import java.io.File;
|
---|
| 5 | import java.io.FileWriter;
|
---|
| 6 | import java.io.PrintWriter;
|
---|
| 7 | import java.util.ArrayList;
|
---|
| 8 | import java.util.HashMap;
|
---|
| 9 |
|
---|
| 10 | import javax.xml.parsers.DocumentBuilder;
|
---|
| 11 | import javax.xml.parsers.DocumentBuilderFactory;
|
---|
| 12 |
|
---|
| 13 | import org.w3c.dom.Document;
|
---|
| 14 | import org.w3c.dom.Element;
|
---|
| 15 | import org.w3c.dom.NamedNodeMap;
|
---|
| 16 | import org.w3c.dom.Node;
|
---|
| 17 | import org.w3c.dom.NodeList;
|
---|
| 18 |
|
---|
| 19 | public class SearchLink {
|
---|
| 20 |
|
---|
| 21 | //final String path = "/research/cc108/greenstone3/web/mat/";
|
---|
| 22 | final String path = "/home/cc108/MRWks1/describeMessenger/";
|
---|
| 23 |
|
---|
| 24 | String collection ="";
|
---|
| 25 | String metadataElement ="";
|
---|
| 26 | //String core_element="";
|
---|
| 27 | PrintWriter out;
|
---|
| 28 | boolean status = true;
|
---|
| 29 |
|
---|
| 30 | public SearchLink(PrintWriter arg){
|
---|
| 31 | out = arg;
|
---|
| 32 | }
|
---|
| 33 |
|
---|
| 34 | public SearchLink(){
|
---|
| 35 |
|
---|
| 36 | }
|
---|
| 37 |
|
---|
| 38 | public boolean getStatus(){
|
---|
| 39 | return status;
|
---|
| 40 | }
|
---|
| 41 | public static void main(String[] args){
|
---|
| 42 | SearchLink sl = new SearchLink();
|
---|
| 43 | //sl.CreateIndentifierLinkPage("dc.Creator", "Lowe, David J.", "dc.Identifier","poilkjm");
|
---|
| 44 | //sl.CreateIndentifierLinkPage("dc.Publisher2", "ResearchSpace@Auckland", "dc.Identifier1","vmeqlek");
|
---|
| 45 | }
|
---|
| 46 |
|
---|
| 47 | public ArrayList CreateIndentifierLinkPage(String metadataElement, String text, String identifier, String col, HashMap valueMap, HashMap linkMap){
|
---|
| 48 | collection = col;
|
---|
| 49 | //System.out.println("valueMap: "+valueMap.size()+ " linkMap: "+ linkMap.size());
|
---|
| 50 | this.metadataElement = metadataElement;
|
---|
| 51 | //Element ex = openfile(metadataElement,col);
|
---|
| 52 | try{
|
---|
| 53 | if(valueMap.size()==0){
|
---|
| 54 | status = false;
|
---|
| 55 | return new ArrayList();
|
---|
| 56 | }
|
---|
| 57 | else{
|
---|
| 58 | ArrayList alist = retrieveHASHID(valueMap,text);
|
---|
| 59 | if(alist.size()==0){
|
---|
| 60 | status = false;
|
---|
| 61 | return new ArrayList();
|
---|
| 62 | }
|
---|
| 63 | //ex = openfile(identifier,col);
|
---|
| 64 | if(linkMap.size()==0){
|
---|
| 65 | //out.write("<p>"+ identifier +" does not exist </p>");
|
---|
| 66 | status = false;
|
---|
| 67 | return new ArrayList();
|
---|
| 68 | //System.out.println(identifier + "does not exist2");
|
---|
| 69 | //return createResult(new ArrayList());
|
---|
| 70 | }
|
---|
| 71 | else{
|
---|
| 72 | ArrayList alist2 = retrieveHASHID(linkMap,alist,metadataElement);
|
---|
| 73 | //System.out.println("break3");
|
---|
| 74 | return alist2;
|
---|
| 75 | }
|
---|
| 76 | }
|
---|
| 77 | }catch(Exception ex){ex.printStackTrace(); return new ArrayList();}
|
---|
| 78 | //return "";
|
---|
| 79 | }
|
---|
| 80 |
|
---|
| 81 |
|
---|
| 82 | private Element openfile(String core_element, String collection){
|
---|
| 83 |
|
---|
| 84 | try{
|
---|
| 85 | DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
|
---|
| 86 | DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
|
---|
| 87 | Document doc = docBuilder.newDocument();
|
---|
| 88 | //Element rootNode = doc.
|
---|
| 89 | //System.out.println(path+collection+"/metadataStats/"+core_element+".xml");
|
---|
| 90 | doc = docBuilder.parse (new File(path+"/"+collection+"/metadataStats/"+core_element+".xml"));
|
---|
| 91 | Element rootNode = doc.getDocumentElement();
|
---|
| 92 | return rootNode;
|
---|
| 93 | }catch (Exception e) {
|
---|
| 94 | //e.printStackTrace();
|
---|
| 95 | //out.write("<p>"+core_element +" does not exist </p>");
|
---|
| 96 | System.out.println(core_element + " does not exist1");
|
---|
| 97 |
|
---|
| 98 | return null;
|
---|
| 99 | }
|
---|
| 100 | }
|
---|
| 101 |
|
---|
| 102 | private ArrayList retrieveHASHID (HashMap hp, String text){
|
---|
| 103 | //ArrayList alist = new ArrayList();
|
---|
| 104 |
|
---|
| 105 |
|
---|
| 106 |
|
---|
| 107 | if(hp.containsKey(text)){
|
---|
| 108 | InternalLink il = (InternalLink) hp.get(text);
|
---|
| 109 | return il.retrieveList();
|
---|
| 110 | }
|
---|
| 111 | else{
|
---|
| 112 | return new ArrayList();
|
---|
| 113 | }
|
---|
| 114 | }
|
---|
| 115 |
|
---|
| 116 | private ArrayList retrieveHASHID (HashMap hp, ArrayList arg,String metadataElement){
|
---|
| 117 | //System.out.println(metadataElement);
|
---|
| 118 | ArrayList alist = new ArrayList();
|
---|
| 119 | for(int i = 0; i<arg.size(); i++){
|
---|
| 120 | String text = (String)arg.get(i);
|
---|
| 121 | if(hp.containsKey(text)){
|
---|
| 122 | InternalLink il = (InternalLink) hp.get(text);
|
---|
| 123 | ArrayList arrayList = il.retrieveList();
|
---|
| 124 | if(arrayList.size()!=0){
|
---|
| 125 | alist.add(il.retrieveList().get(0));
|
---|
| 126 | }
|
---|
| 127 | else{
|
---|
| 128 | System.out.println(text);
|
---|
| 129 | alist.add(" ");
|
---|
| 130 | }
|
---|
| 131 | //return il.retrieveList();
|
---|
| 132 | }
|
---|
| 133 | /*
|
---|
| 134 | else{
|
---|
| 135 | return new ArrayList();
|
---|
| 136 | }
|
---|
| 137 | */
|
---|
| 138 | }
|
---|
| 139 | return alist;
|
---|
| 140 | }
|
---|
| 141 | /*
|
---|
| 142 | private ArrayList retrieveHASHID (Element ex, ArrayList alist1){
|
---|
| 143 | ArrayList alist = new ArrayList();
|
---|
| 144 |
|
---|
| 145 | NodeList listOfFrequency = ex.getElementsByTagName("Document");
|
---|
| 146 |
|
---|
| 147 | if(listOfFrequency.getLength()==0){
|
---|
| 148 | //System.out.println("return 0");
|
---|
| 149 | return alist;
|
---|
| 150 | }
|
---|
| 151 |
|
---|
| 152 | for(int s=0; s<listOfFrequency.getLength() ; s++){
|
---|
| 153 | Node docNode = listOfFrequency.item(s);
|
---|
| 154 | NamedNodeMap NodeIDMap = docNode.getAttributes();
|
---|
| 155 | Node DocNodeID = NodeIDMap.item(0);
|
---|
| 156 | String DocID = DocNodeID.getNodeValue();
|
---|
| 157 |
|
---|
| 158 | if(alist1.contains(DocID)){
|
---|
| 159 | //System.out.println("same");
|
---|
| 160 | Element xNode = (Element)docNode;
|
---|
| 161 | NodeList valueList = xNode.getElementsByTagName("ActualValue");
|
---|
| 162 |
|
---|
| 163 |
|
---|
| 164 | for(int y = 0; y<valueList.getLength(); y++){
|
---|
| 165 | Element valueElement = (Element)valueList.item(y);
|
---|
| 166 | NodeList textFNList = valueElement.getChildNodes();
|
---|
| 167 | String textValue = ((Node)textFNList.item(0)).getNodeValue();
|
---|
| 168 | //System.out.println(textValue);
|
---|
| 169 | if(textValue.startsWith("http://")){
|
---|
| 170 | alist.add(textValue);
|
---|
| 171 | break;
|
---|
| 172 | }
|
---|
| 173 | }
|
---|
| 174 | }
|
---|
| 175 | }
|
---|
| 176 |
|
---|
| 177 | return alist;
|
---|
| 178 | }
|
---|
| 179 | */
|
---|
| 180 | /*
|
---|
| 181 | private String createResult(ArrayList alist){
|
---|
| 182 | //System.out.println("test");\
|
---|
| 183 | String url = path+collection+"/"+metadataElement+"_result.html";
|
---|
| 184 | //System.out.println(url);
|
---|
| 185 | try{
|
---|
| 186 | //FileWriter fstream = new FileWriter("/home/cc108/MRWks1/Quality"+"/"+collection+"/"+"hello"+"_result.html");
|
---|
| 187 | //path /research/cc108/greenstone3/web/mat/
|
---|
| 188 |
|
---|
| 189 | //FileWriter fstream = new FileWriter(path+collection+"/"+metadataElement+"result.html");
|
---|
| 190 | FileWriter fstream = new FileWriter(url);
|
---|
| 191 | BufferedWriter out = new BufferedWriter(fstream);
|
---|
| 192 | out.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\r\n");
|
---|
| 193 | out.write("<html>\r\n");
|
---|
| 194 | out.write("<head>\r\n<title>Result</title>\r\n");
|
---|
| 195 |
|
---|
| 196 | out.write("<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n");
|
---|
| 197 | out.write("</head>\r\n");
|
---|
| 198 | out.write("<body>\r\n");
|
---|
| 199 | out.write("<table border=\"1\" align=\"center\">\r\n");
|
---|
| 200 | out.write("<tr>");
|
---|
| 201 | out.write("<td align=center> No.");
|
---|
| 202 | out.write("<td align=center> URL");
|
---|
| 203 | //System.out.println(alist.size());
|
---|
| 204 | for(int i = 0; i < alist.size(); i++){
|
---|
| 205 | out.write("<tr>");
|
---|
| 206 | out.write("<td align>"+(i+1));
|
---|
| 207 | out.write("<td align><a href=\""+(String)alist.get(i)+"\">"+(String)alist.get(i)+"</a>\n");
|
---|
| 208 |
|
---|
| 209 | }
|
---|
| 210 | out.write("</table></body></html>\r\n");
|
---|
| 211 | out.close();
|
---|
| 212 | fstream.close();
|
---|
| 213 | //System.out.println("/home/cc108/MRWks1/Quality"+"/"+collection+"/"+metadataElement+"_result.html");
|
---|
| 214 | }catch(Exception ex){
|
---|
| 215 | //ex.printStackTrace();
|
---|
| 216 | out.write("<p> links cannot be generated </p>");
|
---|
| 217 | }
|
---|
| 218 | return url;
|
---|
| 219 | }
|
---|
| 220 | */
|
---|
| 221 |
|
---|
| 222 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
---|
| 223 | public HashMap createValueMap(String core_element, String collection){
|
---|
| 224 | //System.out.println("Create value map1 "+core_element);
|
---|
| 225 | HashMap hp = new HashMap();
|
---|
| 226 | Element ex = openfile(core_element,collection);
|
---|
| 227 | NodeList valueList = ex.getElementsByTagName("ActualValue");
|
---|
| 228 |
|
---|
| 229 | for(int s=0; s<valueList.getLength() ; s++){
|
---|
| 230 | Element docNode = (Element)valueList.item(s);
|
---|
| 231 | NodeList NodeIDMap = docNode.getChildNodes();
|
---|
| 232 | Node DocNodeID = NodeIDMap.item(0);
|
---|
| 233 | String actualValue = DocNodeID.getNodeValue();
|
---|
| 234 | String DocID = DocNodeID.getParentNode().getParentNode().getAttributes().item(0).getNodeValue();
|
---|
| 235 |
|
---|
| 236 | if(hp.containsKey(actualValue)){
|
---|
| 237 | InternalLink il = (InternalLink) hp.get(actualValue);
|
---|
| 238 | il.increaseElement(DocID);
|
---|
| 239 | hp.put(actualValue, il);
|
---|
| 240 | }
|
---|
| 241 | else{
|
---|
| 242 | InternalLink il = new InternalLink();
|
---|
| 243 | il.setValue(actualValue);
|
---|
| 244 | il.increaseElement(DocID);
|
---|
| 245 | hp.put(actualValue, il);
|
---|
| 246 | }
|
---|
| 247 | }
|
---|
| 248 | System.out.println("Create value map2 "+core_element);
|
---|
| 249 | return hp;
|
---|
| 250 | }
|
---|
| 251 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
---|
| 252 | public HashMap createLinkMap (String core_element, String collection){
|
---|
| 253 | //System.out.println("Create link map1 "+core_element);
|
---|
| 254 | //ArrayList alist = new ArrayList();
|
---|
| 255 | HashMap hp = new HashMap();
|
---|
| 256 | Element ex = openfile(core_element,collection);
|
---|
| 257 | NodeList listOfFrequency = ex.getElementsByTagName("Document");
|
---|
| 258 |
|
---|
| 259 | if(listOfFrequency.getLength()==0){
|
---|
| 260 | return hp;
|
---|
| 261 | }
|
---|
| 262 |
|
---|
| 263 | for(int s=0; s<listOfFrequency.getLength() ; s++){
|
---|
| 264 | Node docNode = listOfFrequency.item(s);
|
---|
| 265 | NamedNodeMap NodeIDMap = docNode.getAttributes();
|
---|
| 266 | Node DocNodeID = NodeIDMap.item(0);
|
---|
| 267 | String DocID = DocNodeID.getNodeValue();
|
---|
| 268 |
|
---|
| 269 |
|
---|
| 270 | if(hp.containsKey(DocID)){
|
---|
| 271 |
|
---|
| 272 | Element xNode = (Element)docNode;
|
---|
| 273 | NodeList valueList = xNode.getElementsByTagName("ActualValue");
|
---|
| 274 |
|
---|
| 275 | ArrayList alist = new ArrayList();
|
---|
| 276 | for(int y = 0; y<valueList.getLength(); y++){
|
---|
| 277 | Element valueElement = (Element)valueList.item(y);
|
---|
| 278 | NodeList textFNList = valueElement.getChildNodes();
|
---|
| 279 | String textValue = ((Node)textFNList.item(0)).getNodeValue();
|
---|
| 280 |
|
---|
| 281 | if(textValue.startsWith("http://")){
|
---|
| 282 | InternalLink il = (InternalLink)hp.get(DocID);
|
---|
| 283 | //il.retrieveList();
|
---|
| 284 | //alist.add(textValue);
|
---|
| 285 | il.increaseElement(textValue);
|
---|
| 286 | break;
|
---|
| 287 | }
|
---|
| 288 | }
|
---|
| 289 | }
|
---|
| 290 |
|
---|
| 291 |
|
---|
| 292 | else {
|
---|
| 293 | InternalLink il = new InternalLink();
|
---|
| 294 | il.setValue(DocID);
|
---|
| 295 |
|
---|
| 296 | Element xNode = (Element)docNode;
|
---|
| 297 | NodeList valueList = xNode.getElementsByTagName("ActualValue");
|
---|
| 298 |
|
---|
| 299 | for(int y = 0; y<valueList.getLength(); y++){
|
---|
| 300 | Element valueElement = (Element)valueList.item(y);
|
---|
| 301 | NodeList textFNList = valueElement.getChildNodes();
|
---|
| 302 | String textValue = ((Node)textFNList.item(0)).getNodeValue();
|
---|
| 303 |
|
---|
| 304 | if(textValue.startsWith("http://")){
|
---|
| 305 | //alist.add(textValue);
|
---|
| 306 | il.increaseElement(textValue);
|
---|
| 307 | break;
|
---|
| 308 | }
|
---|
| 309 | }
|
---|
| 310 | hp.put(DocID, il);
|
---|
| 311 | }
|
---|
| 312 | }
|
---|
| 313 | System.out.println("Create value map2 "+core_element);
|
---|
| 314 | return hp;
|
---|
| 315 | }
|
---|
| 316 |
|
---|
| 317 |
|
---|
| 318 |
|
---|
| 319 |
|
---|
| 320 |
|
---|
| 321 |
|
---|
| 322 |
|
---|
| 323 |
|
---|
| 324 |
|
---|
| 325 |
|
---|
| 326 |
|
---|
| 327 |
|
---|
| 328 |
|
---|
| 329 |
|
---|
| 330 |
|
---|
| 331 |
|
---|
| 332 |
|
---|
| 333 |
|
---|
| 334 |
|
---|
| 335 |
|
---|
| 336 |
|
---|
| 337 | ////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
---|
| 338 | }
|
---|