source: other-projects/tipple-android/this-is-the-place/src/main.java@ 26899

Last change on this file since 26899 was 26899, checked in by davidb, 11 years ago

Tipple reborn after Chris's Summer of Code 2013

File size: 9.8 KB
Line 
1import java.io.BufferedReader;
2import java.io.File;
3import java.io.FileOutputStream;
4import java.io.FileWriter;
5import java.io.InputStream;
6import java.io.InputStreamReader;
7import java.io.OutputStreamWriter;
8import java.io.StringWriter;
9import java.net.Authenticator;
10import java.net.HttpURLConnection;
11import java.net.PasswordAuthentication;
12import java.net.URL;
13import java.util.ArrayList;
14import java.util.HashMap;
15import java.util.Iterator;
16import java.util.Map;
17
18import javax.xml.parsers.DocumentBuilder;
19import javax.xml.parsers.DocumentBuilderFactory;
20import javax.xml.parsers.ParserConfigurationException;
21
22//import org.json1.JSONException;
23//import org.json1.JSONTokener;
24import org.w3c.dom.Document;
25
26import com.google.gson.Gson;
27import com.google.gson.JsonArray;
28import com.google.gson.JsonElement;
29import com.google.gson.JsonObject;
30import com.google.gson.JsonParser;
31
32
33public class main
34{
35
36 // add your place ids to this
37 private static final int[] PLACE_IDS = { 5 };
38 private static final boolean DEBUG_MODE = true;
39 private static HashMap< String, ArrayList<Location> > place_ids;
40
41 public static void main(String[] args)
42 {
43 setUpProxyConnection();
44
45 // key = place_id, value = array list of locations corresponding to that place_id
46 place_ids = new HashMap< String, ArrayList<Location> >();
47
48 for(int i = 0; i < PLACE_IDS.length; i++)
49 {
50 // construct the url and get its contents
51 String url = "http://thisistheplace.org.nz/index2.php?option=com_kttw&task=display&view=ajaxplacedetails&place_id=" + PLACE_IDS[i] + "&zoom_level=1";
52 String url_contents = getFileFromUrl(url);
53 url_contents = getJsonFromHtml(url_contents);
54 System.out.println(url_contents);
55
56 //Gson gson = new Gson();
57 //gson.fromjson
58
59 JsonElement jElement = new JsonParser().parse(url_contents);
60 JsonArray jArray = jElement.getAsJsonArray();
61 // the json has 4 top levels, and we want the 4th level
62 JsonElement je = jArray.get(3);
63
64 // get all the entries
65 JsonArray jArray2 = je.getAsJsonArray();
66 for(int x = 0; x < jArray2.size(); x++)
67 {
68
69 // ok we have an entry, now extract information about it
70 JsonElement entry = jArray2.get(x);
71 JsonObject object_for_entry = entry.getAsJsonObject();
72
73 // if the story type is "History" (for historical pieces)
74 if( object_for_entry.get("story_type").toString().equals("\"History\"") )
75 {
76 String name = object_for_entry.get("name").toString();
77 String text_content = object_for_entry.get("text_content").toString();
78 // now we have to strip the quotes off the data
79 name = name.substring(1, name.length()-1);
80 text_content = text_content.substring(1, text_content.length()-1);
81 //text_content = text_content.replace("\\\"","&quot;");
82 //text_content = text_content.replace("<","&lt;");
83 //text_content = text_content.replace(">","&gt;");
84
85 if(DEBUG_MODE)
86 {
87 System.out.print(name);
88 System.out.println(text_content);
89 }
90
91 // lets strip off the quotes for place_id
92 String place_id = object_for_entry.get("place_id").toString();
93 place_id = place_id.substring(1, place_id.length()-1);
94
95 // now grab the latlng for this place_id
96 url = "http://thisistheplace.org.nz/index2.php?option=com_kttw&task=display&view=ajaxplacedetails&place_id=" + place_id;
97 if(DEBUG_MODE)
98 System.out.println(url);
99 url_contents = getFileFromUrl(url);
100 url_contents = getJsonFromHtml(url_contents);
101 jElement = new JsonParser().parse(url_contents);
102 jArray = jElement.getAsJsonArray();
103 je = jArray.get(0);
104 object_for_entry = je.getAsJsonObject();
105
106 String longitude = object_for_entry.get("longitude").toString();
107 String latitude = object_for_entry.get("latitude").toString();
108 // again, strip the quotes off the data
109 longitude = longitude.substring(1, longitude.length()-1);
110 latitude = latitude.substring(1, latitude.length()-1);
111
112 if(DEBUG_MODE)
113 {
114 System.out.println ("For place_id " + place_id + ", we have: " + longitude + ", " + latitude );
115 System.out.println("");
116 System.out.println("");
117 }
118
119 // ok, create a location
120 Location loc = new Location();
121 loc.name = name;
122 loc.text_content = text_content;
123 loc.latitude = latitude;
124 loc.longitude = longitude;
125
126 // ok, grab the array list out of our hash map
127 ArrayList<Location> list_for_place_id = place_ids.get(place_id);
128
129 // if the array list doesn't exist, create it, add our location, and stick the list into the hashmap with the place_id as the key
130 if( list_for_place_id == null)
131 {
132 list_for_place_id = new ArrayList<Location>();
133 list_for_place_id.add(loc);
134 place_ids.put(place_id, list_for_place_id);
135 }
136 else
137 {
138 list_for_place_id.add(loc);
139 place_ids.put(place_id, list_for_place_id);
140 }
141
142 }
143 }
144
145 }
146
147 // now iterate through the hashmap
148 processHashMap();
149 }
150
151 private static void processHashMap()
152 {
153 File f = new File("articles/");
154 if (!f.isDirectory())
155 f.mkdir();
156
157 Iterator it = place_ids.entrySet().iterator();
158 while( it.hasNext() )
159 {
160 Map.Entry pairs = (Map.Entry) it.next();
161
162 String key = (String) pairs.getKey();
163 ArrayList<Location> values = (ArrayList<Location>) pairs.getValue();
164
165 f = new File("articles/" + key);
166 if (!f.isDirectory())
167 f.mkdir();
168
169 // ok, let's go through all the locations for this particular place_id
170 for(int x = 0; x < values.size(); x++)
171 {
172 Location loc = values.get(x);
173
174 writeHtmlForGreenstone(loc, "articles/" + key + "/");
175 }
176
177
178 }
179 }
180
181 private static void writeHtmlForGreenstone(Location loc, String dir)
182 {
183 try
184 {
185 String name = loc.name;
186 String text_content = loc.text_content;
187 String longitude = loc.longitude;
188 String latitude = loc.latitude;
189
190 // Don't want any back-slashed quotes (generated in the JSON syntax) or any HTML elements in the title
191 String name_no_formatting = name.replaceAll("<[^>+]>", "");
192 name_no_formatting = name_no_formatting.replaceAll("\\\\\"", "&quot;");
193
194 // For the HTML body, <element> are OK, but still don't want any JSON escaped quotes
195 text_content = text_content.replaceAll("\\\\\"", "&quot;");
196
197 // ok, we'll first write out a basic html file
198 FileWriter fw = new FileWriter(dir + loc.name.replace("?", "") + ".html");
199 StringWriter sw = new StringWriter();
200 sw.write("<html><head><title>"+name_no_formatting+"</title></head><body>");
201 sw.write("<h2>"+name+"</h2>");
202 sw.write(text_content);
203 sw.write("</body></html>");
204 fw.write(sw.toString());
205 fw.close();
206 sw.close();
207
208
209 // ok, now lastly we'll write out a metadata.xml
210 fw = new FileWriter(dir + "metadata.xml");
211 sw = new StringWriter();
212
213 sw.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
214 sw.write("<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">");
215 sw.write("<DirectoryMetadata>");
216 sw.write("<FileSet>");
217 sw.write("<FileName>.*</FileName>");
218 sw.write("<Description>");
219
220
221 sw.write("<Metadata mode=\"override\" name=\"tp.Longitude\">" + longitude + "</Metadata>");
222 sw.write("<Metadata mode=\"override\" name=\"tp.Latitude\">" + latitude + "</Metadata>");
223
224 sw.write("</Description>");
225 sw.write("</FileSet>");
226 sw.write("</DirectoryMetadata>");
227
228 fw.write(sw.toString());
229 fw.close();
230 sw.close();
231 }
232 catch(Exception ex)
233 {
234 ex.printStackTrace();
235 }
236 }
237
238 // when thisistheplace.org gives you a json, it's actually a json inside html, so we have to try get the json from the html...
239 // (ideally I'd use an html parser but it didn't seem to like the structure of the html...)
240 private static String getJsonFromHtml(String url_contents)
241 {
242 int idx = url_contents.lastIndexOf("<body class=\"contentpane\">");
243 int idx2 = url_contents.lastIndexOf("</body>");
244 String new_url_contents = url_contents.substring(idx + "<body class=\"contentpane\">".length(), idx2);
245
246 return new_url_contents;
247 }
248
249 private static Document getDocumentFromUrl(String uri)
250 {
251 Document doc = null;
252
253 try
254 {
255 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
256 //factory.setValidating(true);
257 factory.setIgnoringElementContentWhitespace(true);
258
259 URL url = new URL(uri);
260 HttpURLConnection connection = (HttpURLConnection)url.openConnection();
261 connection.setRequestMethod("GET");
262 connection.connect();
263 InputStream stream = connection.getInputStream();
264
265 DocumentBuilder builder = factory.newDocumentBuilder();
266 doc = builder.parse(stream);
267 }
268 catch(Exception ex)
269 {
270 ex.printStackTrace();
271 }
272
273 return doc;
274 }
275
276 private static String getFileFromUrl(String uri)
277 {
278 String totalLine = "";
279
280 try
281 {
282 URL url = new URL(uri);
283 BufferedReader in = new BufferedReader(new InputStreamReader(url.openStream()));
284
285 String inputLine;
286 while ( (inputLine = in.readLine()) != null )
287 totalLine += inputLine;
288
289 in.close();
290 }
291 catch(Exception ex)
292 {
293 ex.printStackTrace();
294 }
295
296 return totalLine;
297 }
298
299 private static void setUpProxyConnection()
300 {
301 System.setProperty("http.proxyHost", "proxy.cms.waikato.ac.nz");
302 System.setProperty("http.proxyPort", "3128");
303 System.setProperty("http.proxyUser", "cjb60");
304 System.setProperty("http.proxyPassword", "cjbeck64oo");
305
306 Authenticator.setDefault(
307 new Authenticator()
308 {
309 public PasswordAuthentication getPasswordAuthentication()
310 {
311 return new PasswordAuthentication("cjb60", "cjbeck64oo".toCharArray());
312 }
313 }
314 );
315 }
316}
Note: See TracBrowser for help on using the repository browser.