1 |
|
---|
2 | import java.io.*;
|
---|
3 | import java.net.*;
|
---|
4 | import java.nio.*;
|
---|
5 | import java.nio.channels.*;
|
---|
6 | import java.nio.file.*;
|
---|
7 | import java.util.*;
|
---|
8 |
|
---|
9 | import org.jsoup.Jsoup;
|
---|
10 | import org.jsoup.nodes.Document;
|
---|
11 | import org.jsoup.nodes.Element;
|
---|
12 | import org.jsoup.select.Elements;
|
---|
13 |
|
---|
14 |
|
---|
15 | class AMCArtistScrape
|
---|
16 | {
|
---|
17 |
|
---|
18 | public static void downloadURL(String url_str) {
|
---|
19 |
|
---|
20 | try {
|
---|
21 | URL url = new URL(url_str);
|
---|
22 |
|
---|
23 | String local_file_str = url_str.substring(url_str.lastIndexOf('/')+1, url_str.length() );
|
---|
24 | File local_file = new File(local_file_str);
|
---|
25 |
|
---|
26 | if (!local_file.exists()) {
|
---|
27 | System.out.printf("Downloading audio to '%s'...\n",local_file_str);
|
---|
28 | FileOutputStream fos = new FileOutputStream(local_file);
|
---|
29 |
|
---|
30 | ReadableByteChannel rbc = Channels.newChannel(url.openStream());
|
---|
31 | fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE);
|
---|
32 |
|
---|
33 | System.out.println("... done");
|
---|
34 | }
|
---|
35 | else {
|
---|
36 | System.out.printf("Skipping '%s' as is already exists\n",local_file_str);
|
---|
37 | }
|
---|
38 |
|
---|
39 | }
|
---|
40 | catch (Exception e) {
|
---|
41 | e.printStackTrace();
|
---|
42 | }
|
---|
43 | }
|
---|
44 |
|
---|
45 | public static ArrayList<MusicRecord> extractResults(String urlstr)
|
---|
46 | {
|
---|
47 |
|
---|
48 | ArrayList<MusicRecord> records = new ArrayList<MusicRecord>();
|
---|
49 |
|
---|
50 | try {
|
---|
51 | Document doc = Jsoup.connect(urlstr).get();
|
---|
52 |
|
---|
53 | Elements table_rows = doc.select("table.search_results tr");
|
---|
54 |
|
---|
55 |
|
---|
56 |
|
---|
57 | for (Element tr : table_rows) {
|
---|
58 |
|
---|
59 | Elements td_cells = tr.select("td");
|
---|
60 |
|
---|
61 | if (td_cells.size() == 6) {
|
---|
62 | Element title_anchor = td_cells.get(0).select("a").first();
|
---|
63 | String track_url = title_anchor.attr("href");
|
---|
64 | String title = title_anchor.ownText();
|
---|
65 | String instrumentation = td_cells.get(1).ownText();
|
---|
66 | String composer = td_cells.get(2).ownText();
|
---|
67 | String year = td_cells.get(3).ownText();
|
---|
68 |
|
---|
69 | Element audio_elem = td_cells.get(4).select("audio").first();
|
---|
70 | if (audio_elem != null) {
|
---|
71 | String audio_url = audio_elem.attr("src");
|
---|
72 |
|
---|
73 | MusicRecord mr = new MusicRecord(track_url,title,composer,year,audio_url);
|
---|
74 | System.out.println(mr);
|
---|
75 |
|
---|
76 | records.add(mr);
|
---|
77 | }
|
---|
78 |
|
---|
79 | }
|
---|
80 |
|
---|
81 | }
|
---|
82 | }
|
---|
83 | catch (Exception e) {
|
---|
84 | e.printStackTrace();
|
---|
85 | }
|
---|
86 |
|
---|
87 | return records;
|
---|
88 | }
|
---|
89 |
|
---|
90 | private static void printf(String msg, Object... args)
|
---|
91 | {
|
---|
92 | System.out.println(String.format(msg, args));
|
---|
93 | }
|
---|
94 |
|
---|
95 |
|
---|
96 | public static void main(String[] args)
|
---|
97 | {
|
---|
98 |
|
---|
99 | // http://www.australianmusiccentre.com.au/search?type=work&sort=alphaTitleSort&wfc[]=Roger+Dean
|
---|
100 | String baseUrl = "http://www.australianmusiccentre.com.au/search?type=work&sort=alphaTitleSort";
|
---|
101 | baseUrl += "&wfc[]=";
|
---|
102 |
|
---|
103 | ArrayList<MusicRecord> music_records = extractResults(baseUrl+"Roger+Dean");
|
---|
104 |
|
---|
105 | for (MusicRecord mr: music_records) {
|
---|
106 | downloadURL(mr.getAudioURL());
|
---|
107 | }
|
---|
108 | }
|
---|
109 | }
|
---|