source: other-projects/ir-harvest/trunk/oai-download/org/waikato/harvest/OAIBase2IdList.java@ 31698

Last change on this file since 31698 was 31698, checked in by davidb, 7 years ago

Some initial work on dowloading over OAI

  • Property svn:executable set to *
File size: 2.7 KB
Line 
1package org.waikato.harvest;
2
3import java.io.File;
4import java.io.FileInputStream;
5import java.io.InputStream;
6import java.io.InputStreamReader;
7import java.io.Reader;
8import javax.xml.parsers.SAXParser;
9import javax.xml.parsers.SAXParserFactory;
10import org.xml.sax.Attributes;
11import org.xml.sax.InputSource;
12import org.xml.sax.SAXException;
13import org.xml.sax.helpers.DefaultHandler;
14
15
16import java.io.BufferedReader;
17import java.io.BufferedWriter;
18import java.io.IOException;
19import java.io.InputStream;
20import java.io.InputStreamReader;
21import java.io.OutputStream;
22import java.io.Reader;
23import java.io.StringReader;
24import java.net.HttpURLConnection;
25import java.net.URL;
26import java.util.ArrayList;
27import java.util.HashMap;
28import java.util.Iterator;
29import java.util.Set;
30
31import org.waikato.harvest.OAIListSAXHandler;
32
33public class OAIBase2IdList
34{
35
36 public static InputSource GetOAIListPage(String list_url, String list_args)
37 {
38 InputSource isource = null;
39
40 try {
41 HttpURLConnection httpcon = (HttpURLConnection) ((new URL(list_url).openConnection()));
42 httpcon.setDoOutput(true);
43 //httpcon.setRequestProperty("Content-Type", "application/json");
44 //httpcon.setRequestProperty("Accept", "application/json");
45 httpcon.setRequestMethod("GET");
46 httpcon.connect();
47
48 byte[] outputBytes = list_args.getBytes("UTF-8");
49 OutputStream os = httpcon.getOutputStream();
50 os.write(outputBytes);
51 os.close();
52
53 // Read response
54 InputStream istream = httpcon.getInputStream();
55 Reader isreader = new InputStreamReader(istream,"UTF-8");
56 BufferedReader breader = new BufferedReader(isreader);
57 isource = new InputSource(breader);
58 isource.setEncoding("UTF-8");
59
60
61 }
62 catch (Exception e) {
63 e.printStackTrace();
64 }
65
66 return isource;
67 }
68
69 public static void main( String[] args )
70 {
71
72 String[] oai_base_urls = new String[]
73 {
74 "http://nrl.northumbria.ac.uk/cgi/oai2",
75 "http://discovery.ucl.ac.uk/cgi/oai2",
76 "http://etheses.whiterose.ac.uk/cgi/oai2",
77 "http://theses.gla.ac.uk/cgi/oai2"
78 };
79
80 for (String oai_base_url: oai_base_urls) {
81 try {
82 System.out.println("base url = " + oai_base_url);
83 InputSource is = GetOAIListPage(oai_base_url, "verb=ListIdentifiers&metadataPrefix=oai_dc");
84
85 SAXParserFactory factory = SAXParserFactory.newInstance();
86 SAXParser saxParser = factory.newSAXParser();
87
88 OAIListSAXHandler handler = new OAIListSAXHandler();
89
90 saxParser.parse(is, handler);
91 //is.close();
92
93 handler.printIdentifiers();
94 String resumptionToken = handler.getResumptionToken();
95
96
97 }
98 catch (Exception e) {
99 e.printStackTrace();
100 }
101
102 }
103
104 }
105}
Note: See TracBrowser for help on using the repository browser.