1 | #!/usr/bin/perl -w
|
---|
2 |
|
---|
3 |
|
---|
4 | sub download_detail
|
---|
5 | {
|
---|
6 | my ($dl) = @_;
|
---|
7 |
|
---|
8 | open(PIN, "wget -q \"http://careo.ucalgary.ca/$dl\" -O - |")
|
---|
9 | || die "Unable to open wget command: $!\n";
|
---|
10 |
|
---|
11 | my $detail_html="";
|
---|
12 | while (<PIN>) {
|
---|
13 | $detail_html .= $_;
|
---|
14 | }
|
---|
15 |
|
---|
16 | close(PIN);
|
---|
17 |
|
---|
18 | return $detail_html
|
---|
19 | }
|
---|
20 |
|
---|
21 | sub download_lom_html
|
---|
22 | {
|
---|
23 | my ($wosid,$object,$detail_html) = @_;
|
---|
24 |
|
---|
25 | my @links = ($detail_html =~ m/<a\s+href=\"(.*?)\">/sg);
|
---|
26 |
|
---|
27 |
|
---|
28 | my @lom_links = grep { $_ =~ m/1\.1\.1\.1\.1/ } @links;
|
---|
29 |
|
---|
30 |
|
---|
31 | foreach my $ll (@lom_links) {
|
---|
32 |
|
---|
33 | open(PIN, "wget -q \"http://careo.ucalgary.ca/$ll\" -O - |")
|
---|
34 | || die "Unable to open wget command: $!\n";
|
---|
35 |
|
---|
36 | my $lom_html="";
|
---|
37 | while (<PIN>) {
|
---|
38 | $lom_html .= $_;
|
---|
39 | }
|
---|
40 |
|
---|
41 | close(PIN);
|
---|
42 |
|
---|
43 | my ($lom_xml) = ($lom_html =~ m/(<lom.*\/lom>)/s);
|
---|
44 |
|
---|
45 | $lom_xml =~ s/</</g;
|
---|
46 | $lom_xml =~ s/>/>/g;
|
---|
47 | $lom_xml =~ s/&/&/g;
|
---|
48 | $lom_xml =~ s/"/"/g;
|
---|
49 |
|
---|
50 | print "Saving lom as $object.xml\n";
|
---|
51 | open(HOUT,">$object.xml")
|
---|
52 | || die "Unable to open file out for writing: $!\n";
|
---|
53 |
|
---|
54 | print HOUT $lom_xml;
|
---|
55 |
|
---|
56 | close(HOUT);
|
---|
57 |
|
---|
58 | }
|
---|
59 | }
|
---|
60 |
|
---|
61 |
|
---|
62 | sub browse_category
|
---|
63 | {
|
---|
64 | my ($cat_url) = @_;
|
---|
65 |
|
---|
66 | open(PIN, "wget -q \"$cat_url\" -O - |")
|
---|
67 | || die "Unable to open wget command: $!\n";
|
---|
68 |
|
---|
69 | my $file="";
|
---|
70 | while (<PIN>) {
|
---|
71 | $file .= $_;
|
---|
72 | }
|
---|
73 |
|
---|
74 | close(PIN);
|
---|
75 |
|
---|
76 | my @links = ($file =~ m/<a\s+href=\"(.*?)\">/sg);
|
---|
77 |
|
---|
78 | my @detail_links = grep { $_ =~ m/details\?/ } @links;
|
---|
79 |
|
---|
80 | foreach my $dl (@detail_links) {
|
---|
81 | $dl =~ s/&/&/g;
|
---|
82 | my ($wosid,$object) = ($dl =~ m/wosid=(\w+).*object=(\w+)$/);
|
---|
83 | print "$wosid, $object\n";
|
---|
84 |
|
---|
85 | ## print "$dl\n";
|
---|
86 |
|
---|
87 | my $detail_html = download_detail($dl);
|
---|
88 |
|
---|
89 | download_lom_html($wosid,$object,$detail_html);
|
---|
90 | }
|
---|
91 | }
|
---|
92 |
|
---|
93 |
|
---|
94 | sub browse_arts
|
---|
95 | {
|
---|
96 | my $arts_url = "http://careo.ucalgary.ca/cgi-bin/WebObjects/CAREO.woa/1/wo/tUkzqf9mt6KIbfa9Dmbd0g/40.0.6.7.5.1.7";
|
---|
97 |
|
---|
98 | browse_category($arts_url);
|
---|
99 | }
|
---|
100 |
|
---|
101 | sub browse_science
|
---|
102 | {
|
---|
103 | my $science_url = "http://careo.ucalgary.ca/cgi-bin/WebObjects/CAREO.woa/1/wo/BeWs1oNbUyFqlcTa4wWTuM/8.0.5.3.1.1.1.0.2";
|
---|
104 |
|
---|
105 | browse_category($science_url);
|
---|
106 | }
|
---|
107 |
|
---|
108 | browse_arts();
|
---|
109 | browse_science();
|
---|
110 |
|
---|
111 |
|
---|
112 | #http://careo.ucalgary.ca/cgi-bin/WebObjects/CAREO.woa/1/wo/Nvlr48HkBbktBsDlEKDZWM/34.0.6.1.3.0.1.1.1.1.1.0
|
---|
113 |
|
---|
114 |
|
---|
115 | #http://careo.ucalgary.ca/cgi-bin/WebObjects/CAREO.woa/1/wo/Nvlr48HkBbktBsDlEKDZWM/34.0.6.1.3.0.1.1.1.1.1.0
|
---|
116 |
|
---|
117 |
|
---|
118 | #http://careo.ucalgary.ca/cgi-bin/WebObjects/CAREO.woa/1/wa/details?theme=careo&wosid=tUkzqf9mt6KIbfa9Dmbd0g&object=651479
|
---|
119 | #http://careo.ucalgary.ca/cgi-bin/WebObjects/CAREO.woa/1/wa/discuss?theme=careo&wosid=tUkzqf9mt6KIbfa9Dmbd0g&object=651479
|
---|