########################################################################### # # jSongMinerExtractor - helper plugin that identifies audio through # external web services based on either an audio # computed fingerprint or ID3 title and album # # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright (C) 2010 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### package jSongMinerExtractor; use BaseMediaConverter; use Cwd; use URI::Escape; use strict; no strict 'refs'; # allow filehandles to be variables and viceversa BEGIN { @jSongMinerExtractor::ISA = ('BaseMediaConverter'); } my $arguments = [ { 'name' => "track_identification", 'desc' => "{jSongMinerExtractor.track_identification}", 'type' => "enum", 'list' => [{'name' => "Fingerprint then ID3 tags", 'desc' => "{jSongMinerExtractor.fingerprint_first}"}, {'name' => "ID3 tags only", 'desc' => "{jSongMinerExtractor.only_ids}"}, {'name' => "Disabled", 'desc' => "{jSongMinerExtractor.off}"} ], 'deft' => 'Fingerprint then ID3 tags', 'reqd' => "no" } ]; my $options = { 'name' => "jSongMinerExtractor", 'desc' => "{jSongMinerExtractor.desc}", 'abstract' => "yes", 'inherits' => "yes", 'args' => $arguments }; sub new { my ($class) = shift (@_); my ($pluginlist,$inputargs,$hashArgOptLists) = @_; push(@$pluginlist, $class); push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); push(@{$hashArgOptLists->{"OptList"}},$options); my $self = new BaseMediaConverter($pluginlist, $inputargs, $hashArgOptLists, 1); # Set controlling variables my $gsdl_home = $ENV{'GSDLHOME'}; my $music_ir_home = $ENV{'GEXT_MUSICIR'}; $self->{'jmir_directory'} = &util::filename_cat($music_ir_home,"lib","java"); # Set the directory holding the jMIR .jar files return bless $self, $class; } sub urlEncode{ # ARG 1: $to_encode is the string to URL encode my ($to_encode) = @_; return uri_escape($to_encode); } # URL Decode the given string sub urlDecode{ # ARG 1: $to_decode is the string to URL decode my ($to_decode) = @_; my $decoded= uri_unescape($to_decode); return $decoded; } sub map_id3v1_genre_num { my ($genre_num) = @_; $genre_num =~ s/(/\(/g; $genre_num =~ s/)/\)/g; $genre_num =~ s/^[\(\[\{](.*)[\)\]\}]$/$1/; if (($genre_num =~ m/^\d+$/) && ($genre_num >= 148)) { return "Unknown"; } my $lookup_id3v1_genre = { "0" => "Blues", "1" => "Classic Rock", "2" => "Country", "3" => "Dance", "4" => "Disco", "5" => "Funk", "6" => "Grunge", "7" => "Hip-Hop", "8" => "Jazz", "9" => "Metal", "10" => "New Age", "11" => "Oldies", "12" => "Other", "13" => "Pop", "14" => "R&B", "15" => "Rap", "16" => "Reggae", "17" => "Rock", "18" => "Techno", "19" => "Industrial", "20" => "Alternative", "21" => "Ska", "22" => "Death Metal", "23" => "Pranks", "24" => "Soundtrack", "25" => "Euro-Techno", "26" => "Ambient", "27" => "Trip-Hop", "28" => "Vocal", "29" => "Jazz+Funk", "30" => "Fusion", "31" => "Trance", "32" => "Classical", "33" => "Instrumental", "34" => "Acid", "35" => "House", "36" => "Game", "37" => "Sound Clip", "38" => "Gospel", "39" => "Noise", "40" => "Alternative Rock", "41" => "Bass", "42" => "Soul", "43" => "Punk", "44" => "Space", "45" => "Meditative", "46" => "Instrumental Pop", "47" => "Instrumental Rock", "48" => "Ethnic", "49" => "Gothic", "50" => "Darkwave", "51" => "Techno-Industrial", "52" => "Electronic", "53" => "Pop-Folk", "54" => "Eurodance", "55" => "Dream", "56" => "Southern Rock", "57" => "Comedy", "58" => "Cult", "59" => "Gangsta", "60" => "Top 40", "61" => "Christian Rap", "62" => "Pop/Funk", "63" => "Jungle", "64" => "Native US", "65" => "Cabaret", "66" => "New Wave", "67" => "Psychadelic", "68" => "Rave", "69" => "Showtunes", "70" => "Trailer", "71" => "Lo-Fi", "72" => "Tribal", "73" => "Acid Punk", "74" => "Acid Jazz", "75" => "Polka", "76" => "Retro", "77" => "Musical", "78" => "Rock & Roll", "79" => "Hard Rock", "80" => "Folk", "81" => "Folk-Rock", "82" => "National Folk", "83" => "Swing", "84" => "Fast Fusion", "85" => "Bebob", "86" => "Latin", "87" => "Revival", "88" => "Celtic", "89" => "Bluegrass", "90" => "Avantgarde", "91" => "Gothic Rock", "92" => "Progressive Rock", "93" => "Psychedelic Rock", "94" => "Symphonic Rock", "95" => "Slow Rock", "96" => "Big Band", "97" => "Chorus", "98" => "Easy Listening", "99" => "Acoustic", "100" => "Humour", "101" => "Speech", "102" => "Chanson", "103" => "Opera", "104" => "Chamber Music", "105" => "Sonata", "106" => "Symphony", "107" => "Booty Bass", "108" => "Primus", "109" => "Porn Groove", "110" => "Satire", "111" => "Slow Jam", "112" => "Club", "113" => "Tango", "114" => "Samba", "115" => "Folklore", "116" => "Ballad", "117" => "Power Ballad", "118" => "Rhythmic Soul", "119" => "Freestyle", "120" => "Duet", "121" => "Punk Rock", "122" => "Drum Solo", "123" => "Acapella", "124" => "Euro-House", "125" => "Dance Hall", "126" => "Goa", "127" => "Drum & Bass", "128" => "Club - House", "129" => "Hardcore", "130" => "Terror", "131" => "Indie", "132" => "BritPop", "133" => "Negerpunk", "134" => "Polsk Punk", "135" => "Beat", "136" => "Christian Gangsta Rap", "137" => "Heavy Metal", "138" => "Black Metal", "139" => "Crossover", "140" => "Contemporary Christian", "141" => "Christian Rock", "142" => "Merengue", "143" => "Salsa", "144" => "Thrash Metal", "145" => "Anime", "146" => "JPop", "147" => "Synthpop" }; my $mapped_genre; if (defined $lookup_id3v1_genre->{$genre_num}) { print STDERR "*** changing $genre_num -> ", $lookup_id3v1_genre->{$genre_num}, "\n"; $mapped_genre = $lookup_id3v1_genre->{$genre_num}; } else { $mapped_genre = $genre_num; } return $mapped_genre; } sub check_for_existing_id3_genre { my $self = shift @_; my ($doc_obj) = @_; my $top_section=$doc_obj->get_top_section(); # Look for ex.ID3.Genre as well, as special case my $genre_md_list = $doc_obj->get_metadata($top_section,"ex.ID3.Genre"); my @new_genre_md_list = (); foreach my $gv (@$genre_md_list) { print STDERR "*** got match on ex.ID3.Genre -> '$gv'\n"; my $new_gv = map_id3v1_genre_num($gv); push(@new_genre_md_list,$new_gv); } $doc_obj->delete_metadata($top_section,"ex.ID3.Genre"); foreach my $gv (@new_genre_md_list) { $doc_obj->add_utf8_metadata($top_section,"ex.ID3.Genre",$gv); } } sub parse_txt_metadata { my $self = shift @_; my ($doc_obj,$target_txt_file_path) = @_; print STDERR "**#####** jSongMiner parsing txt metadata\n"; if (open(MIN,"<$target_txt_file_path")) { my ($md_name, $md_value); while (defined($md_name=) && defined($md_value=)) { chomp $md_name; chomp $md_value; my $top_section=$doc_obj->get_top_section(); $md_name =~ s/\+//g; $md_value =~ s/\+/ /g; $md_name = urlDecode($md_name); $md_value = urlDecode($md_value); # $md_name =~ s/\s+/ /sg; $md_name =~ s/\(.*?\)$//s; # can stretch over multiple lines $md_name =~ s/Last\.FM/LastFM/g; $md_name =~ s/:/^/g; $md_name =~ s/(API)?\^/./; # print STDERR "*** md_name = '$md_name'\n"; if ($md_name =~ m/genre$/i) { print STDERR "*** got match on $md_name -> $md_value\n"; $md_value = map_id3v1_genre_num($md_value); } $doc_obj->add_utf8_metadata($top_section,$md_name,$md_value); } close(MIN); $self->check_for_existing_id3_genre($doc_obj); } else { print STDERR "Error: Failed to open $target_txt_file_path\n"; print STDERR " !$\n"; } } sub retrieve_metadata { my $self = shift(@_); my ($source_file_path,$id3_title,$id3_artist,$convert_options) = @_; $convert_options = "" if (!defined $convert_options); my $outhandle = $self->{'outhandle'}; my $verbosity = $self->{'verbosity'}; my $source_file_no_path = &File::Basename::basename($source_file_path); $self->init_cache_for_file($source_file_path); my $target_txt_file_path; my $target_acexml_file_path; if ($self->{'enable_cache'}) { my $cached_dir = $self->{'cached_dir'}; my $file_root = $self->{'cached_file_root'}; my $target_txt_file = "${file_root}_metadata.txt"; my $target_acexml_file = "${file_root}.xml"; $target_txt_file_path = &util::filename_cat($cached_dir,$target_txt_file); $target_acexml_file_path = &util::filename_cat($cached_dir,$target_acexml_file); } else { $target_txt_file_path = &util::get_tmp_filename("_metadata.txt"); $target_acexml_file_path = &util::get_tmp_filename(".xml"); } my $jmir_directory = $self->{'jmir_directory'}; my $store_cwd = cwd(); if (!-d $jmir_directory) { print STDERR "Error: Unable able to find directory '$jmir_directory'\n"; print STDERR " Cannot run jAudio\n"; } elsif (chdir($jmir_directory)) { my $source_file_path_os = $source_file_path; if ($^O eq "cygwin") { $source_file_path_os = `cygpath -w "$source_file_path"`; $source_file_path_os =~ s/\s+$//; } my $target_txt_file_path_os = $target_txt_file_path; if ($^O eq "cygwin") { $target_txt_file_path_os = `cygpath -w "$target_txt_file_path"`; $target_txt_file_path_os =~ s/\s+$//; } my $target_acexml_file_path_os = $target_acexml_file_path; if ($^O eq "cygwin") { $target_acexml_file_path_os = `cygpath -w "$target_acexml_file_path"`; $target_acexml_file_path_os =~ s/\s+$//; } my $jsongminer_cmd = "java -Xmx1024M -jar \"" . $jmir_directory . "/jSongMiner.jar\" $convert_options"; $jsongminer_cmd .= " -title \"$id3_title\"" if defined $id3_title; $jsongminer_cmd .= " -artist \"$id3_artist\"" if defined $id3_artist; $jsongminer_cmd .= " -audio \"$source_file_path_os\""; $jsongminer_cmd .= " -savetxtfile \"$target_txt_file_path_os\""; $jsongminer_cmd .= " -saveacexmlfile \"$target_acexml_file_path_os\""; if ($verbosity>2) { print $outhandle "jSongMinerExtractor: Running ...\n"; print $outhandle "jSongMinerExtractor: $jsongminer_cmd\n"; } my $print_info = { 'message_prefix' => "jSongMiner", 'message' => "jSongMinerExtractor: Retrieving audio metadata for $source_file_no_path" }; my ($regenerated,$result,$had_error) = $self->autorun_general_cmd($jsongminer_cmd,$source_file_path,$target_txt_file_path,$print_info); if ($verbosity>2) { print $outhandle "jSongMinerExtractor: ...done\n"; } } else { print STDERR "Error: failed to change directory to '$jmir_directory'\n"; print STDERR " Cannot run jAudio\n"; } chdir($store_cwd); return ($target_acexml_file_path,$target_txt_file_path); } 1;