source: gs2-extensions/music-ir-src/trunk/perllib/plugins/jSongMinerExtractor.pm@ 24432

Last change on this file since 24432 was 24432, checked in by davidb, 13 years ago

Fix to parsing of returned metadata stored. Bracketed terms could include newline character. Need regexp that uses '/s'

  • Property svn:executable set to *
File size: 6.4 KB
Line 
1###########################################################################
2#
3# jSongMinerExtractor - helper plugin that identifies audio through
4# external web services based on either an audio
5# computed fingerprint or ID3 title and album
6#
7# A component of the Greenstone digital library software
8# from the New Zealand Digital Library Project at the
9# University of Waikato, New Zealand.
10#
11# Copyright (C) 2010 New Zealand Digital Library Project
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by
15# the Free Software Foundation; either version 2 of the License, or
16# (at your option) any later version.
17#
18# This program is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21# GNU General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26#
27###########################################################################
28package jSongMinerExtractor;
29
30use BaseMediaConverter;
31
32use Cwd;
33use URI::Escape;
34
35use strict;
36no strict 'refs'; # allow filehandles to be variables and viceversa
37
38
39BEGIN {
40 @jSongMinerExtractor::ISA = ('BaseMediaConverter');
41}
42
43
44my $arguments = [
45 { 'name' => "track_identification",
46 'desc' => "{jSongMinerExtractor.track_identification}",
47 'type' => "enum",
48 'list' => [{'name' => "Fingerprint then ID3 tags", 'desc' => "{jSongMinerExtractor.fingerprint_first}"},
49 {'name' => "ID3 tags only", 'desc' => "{jSongMinerExtractor.only_ids}"},
50 {'name' => "Disabled", 'desc' => "{jSongMinerExtractor.off}"} ],
51 'deft' => 'Fingerprint then ID3 tags',
52 'reqd' => "no" }
53 ];
54
55my $options = { 'name' => "jSongMinerExtractor",
56 'desc' => "{jSongMinerExtractor.desc}",
57 'abstract' => "yes",
58 'inherits' => "yes",
59 'args' => $arguments };
60
61sub new {
62 my ($class) = shift (@_);
63 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
64 push(@$pluginlist, $class);
65
66 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
67 push(@{$hashArgOptLists->{"OptList"}},$options);
68
69 my $self = new BaseMediaConverter($pluginlist, $inputargs, $hashArgOptLists, 1);
70
71 # Set controlling variables
72 my $gsdl_home = $ENV{'GSDLHOME'};
73 my $music_ir_home = $ENV{'GEXT_MUSICIR'};
74
75 $self->{'jmir_directory'} = &util::filename_cat($music_ir_home,"lib","java"); # Set the directory holding the jMIR .jar files
76
77 return bless $self, $class;
78}
79
80sub urlEncode{
81 # ARG 1: $to_encode is the string to URL encode
82 my ($to_encode) = @_;
83 return uri_escape($to_encode);
84}
85
86# URL Decode the given string
87sub urlDecode{
88 # ARG 1: $to_decode is the string to URL decode
89 my ($to_decode) = @_;
90 my $decoded= uri_unescape($to_decode);
91
92 return $decoded;
93}
94
95sub parse_txt_metadata
96{
97 my $self = shift @_;
98 my ($doc_obj,$target_txt_file_path) = @_;
99
100 if (open(MIN,"<$target_txt_file_path")) {
101
102 my ($md_name, $md_value);
103
104 while (defined($md_name=<MIN>) && defined($md_value=<MIN>)) {
105
106 chomp $md_name;
107 chomp $md_value;
108
109 my $top_section=$doc_obj->get_top_section();
110
111 $md_name =~ s/\+//g;
112 $md_value =~ s/\+/ /g;
113
114 $md_name = urlDecode($md_name);
115 $md_value = urlDecode($md_value);
116
117 # $md_name =~ s/\s+/ /sg;
118 $md_name =~ s/\(.*?\)$//s; # can stretch over multiple lines
119 $md_name =~ s/Last\.FM/LastFM/g;
120 $md_name =~ s/:/^/g;
121 $md_name =~ s/(API)?\^/./;
122
123 $doc_obj->add_utf8_metadata($top_section,$md_name,$md_value);
124 }
125
126 close(MIN);
127 }
128 else {
129 print STDERR "Error: Failed to open $target_txt_file_path\n";
130 print STDERR " !$\n";
131 }
132}
133
134
135sub retrieve_metadata
136{
137 my $self = shift(@_);
138 my ($source_file_path,$id3_title,$id3_artist,$convert_options) = @_;
139
140 $convert_options = "" if (!defined $convert_options);
141
142 my $outhandle = $self->{'outhandle'};
143 my $verbosity = $self->{'verbosity'};
144
145 my $source_file_no_path = &File::Basename::basename($source_file_path);
146
147 $self->init_cache_for_file($source_file_path);
148
149 my $target_txt_file_path;
150 my $target_acexml_file_path;
151
152 if ($self->{'enable_cache'}) {
153 my $cached_dir = $self->{'cached_dir'};
154 my $file_root = $self->{'cached_file_root'};
155
156 my $target_txt_file = "${file_root}_metadata.txt";
157 my $target_acexml_file = "${file_root}.xml";
158
159 $target_txt_file_path = &util::filename_cat($cached_dir,$target_txt_file);
160 $target_acexml_file_path = &util::filename_cat($cached_dir,$target_acexml_file);
161 }
162 else {
163 $target_txt_file_path = &util::get_tmp_filename("_metadata.txt");
164 $target_acexml_file_path = &util::get_tmp_filename(".xml");
165 }
166
167 my $jmir_directory = $self->{'jmir_directory'};
168
169
170 my $store_cwd = cwd();
171
172 if (!-d $jmir_directory) {
173 print STDERR "Error: Unable able to find directory '$jmir_directory'\n";
174 print STDERR " Cannot run jAudio\n";
175 }
176 elsif (chdir($jmir_directory)) {
177
178
179 my $jsongminer_cmd = "java -Xmx1024M -jar jSongMiner.jar $convert_options";
180 $jsongminer_cmd .= " -title \"$id3_title\"";
181 $jsongminer_cmd .= " -artist \"$id3_artist\"";
182 $jsongminer_cmd .= " -audio \"$source_file_path\"";
183 $jsongminer_cmd .= " -savetxtfile \"$target_txt_file_path\"";
184 $jsongminer_cmd .= " -saveacexmlfile \"$target_acexml_file_path\"";
185
186 if ($verbosity>2) {
187 print $outhandle "jSongMinerExtractor: Running ...\n";
188 print $outhandle "jSongMinerExtractor: $jsongminer_cmd\n";
189 }
190
191 my $print_info = { 'message_prefix' => "jSongMiner",
192 'message' => "jSongMinerExtractor: Retrieving audio metadata for $source_file_no_path" };
193
194 my ($regenerated,$result,$had_error)
195 = $self->autorun_general_cmd($jsongminer_cmd,$source_file_path,$target_txt_file_path,$print_info);
196
197 if ($verbosity>2) {
198 print $outhandle "jSongMinerExtractor: ...done\n";
199 }
200 }
201 else {
202 print STDERR "Error: failed to change directory to '$jmir_directory'\n";
203 print STDERR " Cannot run jAudio\n";
204 }
205
206 chdir($store_cwd);
207
208 return ($target_acexml_file_path,$target_txt_file_path);
209}
210
211
212
213
2141;
Note: See TracBrowser for help on using the repository browser.