1 | package org.waikato.harvest;
|
---|
2 |
|
---|
3 | import java.io.File;
|
---|
4 | import java.io.FileInputStream;
|
---|
5 | import java.io.InputStream;
|
---|
6 | import java.io.InputStreamReader;
|
---|
7 | import java.io.Reader;
|
---|
8 | import javax.xml.parsers.SAXParser;
|
---|
9 | import javax.xml.parsers.SAXParserFactory;
|
---|
10 | import org.xml.sax.Attributes;
|
---|
11 | import org.xml.sax.InputSource;
|
---|
12 | import org.xml.sax.SAXException;
|
---|
13 | import org.xml.sax.helpers.DefaultHandler;
|
---|
14 |
|
---|
15 |
|
---|
16 | import java.io.BufferedReader;
|
---|
17 | import java.io.BufferedWriter;
|
---|
18 | import java.io.IOException;
|
---|
19 | import java.io.InputStream;
|
---|
20 | import java.io.InputStreamReader;
|
---|
21 | import java.io.OutputStream;
|
---|
22 | import java.io.Reader;
|
---|
23 | import java.io.StringReader;
|
---|
24 | import java.net.HttpURLConnection;
|
---|
25 | import java.net.URL;
|
---|
26 | import java.util.ArrayList;
|
---|
27 | import java.util.HashMap;
|
---|
28 | import java.util.Iterator;
|
---|
29 | import java.util.Set;
|
---|
30 |
|
---|
31 | import org.waikato.harvest.OAIListSAXHandler;
|
---|
32 |
|
---|
33 | public class OAIBase2IdList
|
---|
34 | {
|
---|
35 |
|
---|
36 | public static InputSource GetOAIListPage(String list_url, String list_args)
|
---|
37 | {
|
---|
38 | InputSource isource = null;
|
---|
39 |
|
---|
40 | try {
|
---|
41 | HttpURLConnection httpcon = (HttpURLConnection) ((new URL(list_url).openConnection()));
|
---|
42 | httpcon.setDoOutput(true);
|
---|
43 | //httpcon.setRequestProperty("Content-Type", "application/json");
|
---|
44 | //httpcon.setRequestProperty("Accept", "application/json");
|
---|
45 | httpcon.setRequestMethod("GET");
|
---|
46 | httpcon.connect();
|
---|
47 |
|
---|
48 | byte[] outputBytes = list_args.getBytes("UTF-8");
|
---|
49 | OutputStream os = httpcon.getOutputStream();
|
---|
50 | os.write(outputBytes);
|
---|
51 | os.close();
|
---|
52 |
|
---|
53 | // Read response
|
---|
54 | InputStream istream = httpcon.getInputStream();
|
---|
55 | Reader isreader = new InputStreamReader(istream,"UTF-8");
|
---|
56 | BufferedReader breader = new BufferedReader(isreader);
|
---|
57 | isource = new InputSource(breader);
|
---|
58 | isource.setEncoding("UTF-8");
|
---|
59 |
|
---|
60 |
|
---|
61 | }
|
---|
62 | catch (Exception e) {
|
---|
63 | e.printStackTrace();
|
---|
64 | }
|
---|
65 |
|
---|
66 | return isource;
|
---|
67 | }
|
---|
68 |
|
---|
69 | public static void main( String[] args )
|
---|
70 | {
|
---|
71 |
|
---|
72 | String[] oai_base_urls = new String[]
|
---|
73 | {
|
---|
74 | "http://nrl.northumbria.ac.uk/cgi/oai2",
|
---|
75 | "http://discovery.ucl.ac.uk/cgi/oai2",
|
---|
76 | "http://etheses.whiterose.ac.uk/cgi/oai2",
|
---|
77 | "http://theses.gla.ac.uk/cgi/oai2"
|
---|
78 | };
|
---|
79 |
|
---|
80 | for (String oai_base_url: oai_base_urls) {
|
---|
81 | try {
|
---|
82 | System.out.println("base url = " + oai_base_url);
|
---|
83 | InputSource is = GetOAIListPage(oai_base_url, "verb=ListIdentifiers&metadataPrefix=oai_dc");
|
---|
84 |
|
---|
85 | SAXParserFactory factory = SAXParserFactory.newInstance();
|
---|
86 | SAXParser saxParser = factory.newSAXParser();
|
---|
87 |
|
---|
88 | OAIListSAXHandler handler = new OAIListSAXHandler();
|
---|
89 |
|
---|
90 | saxParser.parse(is, handler);
|
---|
91 | //is.close();
|
---|
92 |
|
---|
93 | handler.printIdentifiers();
|
---|
94 | String resumptionToken = handler.getResumptionToken();
|
---|
95 |
|
---|
96 |
|
---|
97 | }
|
---|
98 | catch (Exception e) {
|
---|
99 | e.printStackTrace();
|
---|
100 | }
|
---|
101 |
|
---|
102 | }
|
---|
103 |
|
---|
104 | }
|
---|
105 | }
|
---|