import java.io.BufferedReader; import java.io.File; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.io.StringWriter; import java.net.Authenticator; import java.net.HttpURLConnection; import java.net.PasswordAuthentication; import java.net.URL; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; //import org.json1.JSONException; //import org.json1.JSONTokener; import org.w3c.dom.Document; import com.google.gson.Gson; import com.google.gson.JsonArray; import com.google.gson.JsonElement; import com.google.gson.JsonObject; import com.google.gson.JsonParser; public class main { // add your place ids to this private static final int[] PLACE_IDS = { 5 }; private static final boolean DEBUG_MODE = true; private static HashMap< String, ArrayList > place_ids; public static void main(String[] args) { setUpProxyConnection(); // key = place_id, value = array list of locations corresponding to that place_id place_ids = new HashMap< String, ArrayList >(); for(int i = 0; i < PLACE_IDS.length; i++) { // construct the url and get its contents String url = "http://thisistheplace.org.nz/index2.php?option=com_kttw&task=display&view=ajaxplacedetails&place_id=" + PLACE_IDS[i] + "&zoom_level=1"; String url_contents = getFileFromUrl(url); url_contents = getJsonFromHtml(url_contents); System.out.println(url_contents); //Gson gson = new Gson(); //gson.fromjson JsonElement jElement = new JsonParser().parse(url_contents); JsonArray jArray = jElement.getAsJsonArray(); // the json has 4 top levels, and we want the 4th level JsonElement je = jArray.get(3); // get all the entries JsonArray jArray2 = je.getAsJsonArray(); for(int x = 0; x < jArray2.size(); x++) { // ok we have an entry, now extract information about it JsonElement entry = jArray2.get(x); JsonObject object_for_entry = entry.getAsJsonObject(); // if the story type is "History" (for historical pieces) if( object_for_entry.get("story_type").toString().equals("\"History\"") ) { String name = object_for_entry.get("name").toString(); String text_content = object_for_entry.get("text_content").toString(); // now we have to strip the quotes off the data name = name.substring(1, name.length()-1); text_content = text_content.substring(1, text_content.length()-1); //text_content = text_content.replace("\\\"","""); //text_content = text_content.replace("<","<"); //text_content = text_content.replace(">",">"); if(DEBUG_MODE) { System.out.print(name); System.out.println(text_content); } // lets strip off the quotes for place_id String place_id = object_for_entry.get("place_id").toString(); place_id = place_id.substring(1, place_id.length()-1); // now grab the latlng for this place_id url = "http://thisistheplace.org.nz/index2.php?option=com_kttw&task=display&view=ajaxplacedetails&place_id=" + place_id; if(DEBUG_MODE) System.out.println(url); url_contents = getFileFromUrl(url); url_contents = getJsonFromHtml(url_contents); jElement = new JsonParser().parse(url_contents); jArray = jElement.getAsJsonArray(); je = jArray.get(0); object_for_entry = je.getAsJsonObject(); String longitude = object_for_entry.get("longitude").toString(); String latitude = object_for_entry.get("latitude").toString(); // again, strip the quotes off the data longitude = longitude.substring(1, longitude.length()-1); latitude = latitude.substring(1, latitude.length()-1); if(DEBUG_MODE) { System.out.println ("For place_id " + place_id + ", we have: " + longitude + ", " + latitude ); System.out.println(""); System.out.println(""); } // ok, create a location Location loc = new Location(); loc.name = name; loc.text_content = text_content; loc.latitude = latitude; loc.longitude = longitude; // ok, grab the array list out of our hash map ArrayList list_for_place_id = place_ids.get(place_id); // if the array list doesn't exist, create it, add our location, and stick the list into the hashmap with the place_id as the key if( list_for_place_id == null) { list_for_place_id = new ArrayList(); list_for_place_id.add(loc); place_ids.put(place_id, list_for_place_id); } else { list_for_place_id.add(loc); place_ids.put(place_id, list_for_place_id); } } } } // now iterate through the hashmap processHashMap(); } private static void processHashMap() { File f = new File("articles/"); if (!f.isDirectory()) f.mkdir(); Iterator it = place_ids.entrySet().iterator(); while( it.hasNext() ) { Map.Entry pairs = (Map.Entry) it.next(); String key = (String) pairs.getKey(); ArrayList values = (ArrayList) pairs.getValue(); f = new File("articles/" + key); if (!f.isDirectory()) f.mkdir(); // ok, let's go through all the locations for this particular place_id for(int x = 0; x < values.size(); x++) { Location loc = values.get(x); writeHtmlForGreenstone(loc, "articles/" + key + "/"); } } } private static void writeHtmlForGreenstone(Location loc, String dir) { try { String name = loc.name; String text_content = loc.text_content; String longitude = loc.longitude; String latitude = loc.latitude; // Don't want any back-slashed quotes (generated in the JSON syntax) or any HTML elements in the title String name_no_formatting = name.replaceAll("<[^>+]>", ""); name_no_formatting = name_no_formatting.replaceAll("\\\\\"", """); // For the HTML body, are OK, but still don't want any JSON escaped quotes text_content = text_content.replaceAll("\\\\\"", """); // ok, we'll first write out a basic html file FileWriter fw = new FileWriter(dir + loc.name.replace("?", "") + ".html"); StringWriter sw = new StringWriter(); sw.write(""+name_no_formatting+""); sw.write("

"+name+"

"); sw.write(text_content); sw.write(""); fw.write(sw.toString()); fw.close(); sw.close(); // ok, now lastly we'll write out a metadata.xml fw = new FileWriter(dir + "metadata.xml"); sw = new StringWriter(); sw.write(""); sw.write(""); sw.write(""); sw.write(""); sw.write(".*"); sw.write(""); sw.write("" + longitude + ""); sw.write("" + latitude + ""); sw.write(""); sw.write(""); sw.write(""); fw.write(sw.toString()); fw.close(); sw.close(); } catch(Exception ex) { ex.printStackTrace(); } } // when thisistheplace.org gives you a json, it's actually a json inside html, so we have to try get the json from the html... // (ideally I'd use an html parser but it didn't seem to like the structure of the html...) private static String getJsonFromHtml(String url_contents) { int idx = url_contents.lastIndexOf(""); int idx2 = url_contents.lastIndexOf(""); String new_url_contents = url_contents.substring(idx + "".length(), idx2); return new_url_contents; } private static Document getDocumentFromUrl(String uri) { Document doc = null; try { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); //factory.setValidating(true); factory.setIgnoringElementContentWhitespace(true); URL url = new URL(uri); HttpURLConnection connection = (HttpURLConnection)url.openConnection(); connection.setRequestMethod("GET"); connection.connect(); InputStream stream = connection.getInputStream(); DocumentBuilder builder = factory.newDocumentBuilder(); doc = builder.parse(stream); } catch(Exception ex) { ex.printStackTrace(); } return doc; } private static String getFileFromUrl(String uri) { String totalLine = ""; try { URL url = new URL(uri); BufferedReader in = new BufferedReader(new InputStreamReader(url.openStream())); String inputLine; while ( (inputLine = in.readLine()) != null ) totalLine += inputLine; in.close(); } catch(Exception ex) { ex.printStackTrace(); } return totalLine; } private static void setUpProxyConnection() { System.setProperty("http.proxyHost", "proxy.cms.waikato.ac.nz"); System.setProperty("http.proxyPort", "3128"); System.setProperty("http.proxyUser", "cjb60"); System.setProperty("http.proxyPassword", "cjbeck64oo"); Authenticator.setDefault( new Authenticator() { public PasswordAuthentication getPasswordAuthentication() { return new PasswordAuthentication("cjb60", "cjbeck64oo".toCharArray()); } } ); } }