Context Navigation

← Previous Changeset
Next Changeset →

Changeset 35224

Timestamp:

2021-07-03T11:44:55+12:00 (3 years ago)

Author:

davidb

Message:

Substantial refactoring and code development so a CSV header line is guaranteed to be present in the generated CSV file AND that any frame that is silent (causing essential features to exit with an error) to generate line of zeros in the feature file

File:

: 1 edited

main/trunk/model-sites-dev/mars/collect/amc-essentia/AUDIO-FRAME-TO-ESSENTIA-CSV-FEATURE-FILE.pl (modified) (2 diffs)

Legend:

: Unmodified
: Added
: Removed

main/trunk/model-sites-dev/mars/collect/amc-essentia/AUDIO-FRAME-TO-ESSENTIA-CSV-FEATURE-FILE.pl

-              r34768
+              r35224
 use Cwd qw(cwd getcwd);
+if (scalar(@ARGV) != 3) {
+    print STDERR "****\n";
+    print STDERR "* Error: incorrect usage\n";
+    print STDERR "* Usage: $0 frame-step-in-secs frame-duration-in-secs audio_input_file\n";
+    print STDERR "****\n";
+    exit(1);
+}
+my $frame_step_secs = $ARGV[0];
+my $frame_dur_secs  = $ARGV[1];
+my $audio_file        = $ARGV[2];
+my ($full_audio_root) = ($audio_file =~ m/^(.*)\..+?$/);
+my ($audio_root)      = ($full_audio_root =~ m/^.*\/(.*?)$/);
+if ( ! -d "tmp" ) {
+    print "Creating directory: tmp\n";
+    mkdir("tmp");
+}
+my $tmp_json_file = "tmp/${audio_root}_essentiafeatures.json";
+my $tmp_csv_file  = "tmp/${audio_root}_essentiafeatures.csv";
+my $csv_file  = "${full_audio_root}_essentiafeatures_frames.csv";
+my $pwd = cwd();
+my $profile_template_file = "$pwd/essentia-2013-2014.profile.in";
+my $profile_file = "$pwd/essentia-2013-2014.profile";
+sub get_audio_duration
+{
+    my ($audio_file) = @_;
+    my $audio_cmd = "./AUDIO-DURATION.sh $audio_file";
+    open(my $fh, '-|', $audio_cmd) or die $!;
+    # Just need to read in one line:
+    my $audio_duration = <$fh>;
+    chomp($audio_duration);
+    # otherwise would do
+    #
+    #while (my $line = <$fh>) {
+    #    # Do stuff with each $line.
+    #}
+    close($fh);
+    return $audio_duration;
+}
+sub generate_bespoke_profile
+{
+    my ($startTime, $frame_dur_secs, $profile_template_file,$profile_file) = @_;
+    #
+    # Generate bespoke profile_file for this time-slice
+    #
+    open(my $ipfh, '<', $profile_template_file) or die $!;
+    open(my $opfh, '>', $profile_file) or die $!;
+    my $endTime   = $startTime + $frame_dur_secs;
+    while (my $line = <$ipfh>) {
+    chomp($line);
+    $line =~ s/\@startTime\@/$startTime/g;
+    $line =~ s/\@endTime\@/$endTime/g;
+    print $opfh "$line\n";
+    }
+    close($opfh);
+    close($ipfh);
+}
+#
+# Extract features and convert to CSV
+#
+sub extract_audio_features_as_csv
+{
+    my ($audio_root,$audio_file,$profile_file,$ignore_fields, $fallback_rec) = @_;
+    my $tmp_json_file = "tmp/${audio_root}_essentiafeatures.json";
+    my $tmp_csv_file  = "tmp/${audio_root}_essentiafeatures.csv";
+    my $extract_cmd = "essentia_streaming_extractor_music $audio_file $tmp_json_file $profile_file";
+    my $extract_status = system($extract_cmd);
+    if ($extract_status != 0) {
+    if (($extract_status == 256) && defined($fallback_rec)) {
+        # effectively error status '1'
+        print "$!\n";
+        print "Warning: Failed to run command with exit status 1:\n";
+        print "    $extract_cmd\n";
+        print "\n";
+        print "The most likely issue is that the segement of audio process was silence\n";
+        print "=> Generating row of 0 feature values for CSV output\n";
+        my @lines = ($fallback_rec->{'headerline'}, $fallback_rec->{'zeroline'});
+        return \@lines;
+    }
+    else {
+        print STDERR "$!\n";
+        print STDERR "Error: Failed to run command:\n";
+        print STDERR "    $extract_cmd\n";
+        return undef;
+    }
+    }
+    my $convert_cmd = "json_to_csv.py -i $tmp_json_file -o $tmp_csv_file --ignore $ignore_fields";
+    my $convert_status = system($convert_cmd);
+    # Whatever the outcome of this command, now finished with the tmp_json_file
+    unlink($tmp_json_file) or die "Can't delete $tmp_json_file: $!\n";
+    if ($convert_status != 0) {
+    print STDERR "$!\n";
+    print STDERR "Error: Failed to run command:\n";
+    print STDERR "    $convert_cmd\n";
+    return undef;
+    }
+    open(my $ifh, '<', $tmp_csv_file) or die $!;
+    # read in lines to array: single line shot
+    chomp(my @lines = <$ifh>);
+    # my @lines = ();
+    #
+    # while (my $line = <$ifh>) {
+    #     chomp($line);
+    #     push(@lines,$line);
+    # }
+    close($ifh);
+    unlink($tmp_csv_file)  or die "Can't delete $tmp_csv_file: $!\n";
+    return \@lines;
+}
+sub generate_csv_feature_header
+{
+    my $exemplar_inputfile="import/00/ds_20491_4100.m4a";
+    # _essentiafeatures_frames.csv
+}
+sub ensure_representative_fallback
+{
+    my ($audio_file,$profile_file,$ignore_fields) = @_;
+    my $fallback_rec = {};
+    my $representative_headerline_file = "etc/representative_header.csv";
+    my $representative_zeroline_file   = "etc/representative_zeroline.csv";
+    if ((! -f $representative_headerline_file) || (! -f $representative_zeroline_file)) {
+    print "Generating Representative Fallback files for CSV Header and Zero-val files:\n";
+    print "  $representative_headerline_file and $representative_zeroline_file\n";
+    print "\n";
+    my ($full_audio_root) = ($audio_file =~ m/^(.*)\..+?$/);
+    my ($audio_root)      = ($full_audio_root =~ m/^.*\/(.*?)$/);
+    my $lines = extract_audio_features_as_csv($audio_root,$audio_file,$profile_file,$ignore_fields,undef);
+    # Count how many elements in the first line (which is the CSV header)
+    my $header_line = $lines->[0];
+    my @header_line_vals = split(",",$header_line);
+    # Build an array full of zeros to match
+    my @zero_vals = ();
+    for my $v (@header_line_vals) {
+        push(@zero_vals,0);
+    }
+    my $zero_line = join(",",@zero_vals);
+    # output headerline
+    open(my $ofh, '>', $representative_headerline_file) or die $!;
+    print $ofh "$header_line\n";
+    close($ofh);
+    # output zeroline
+    open($ofh, '>', $representative_zeroline_file) or die $!;
+    print $ofh "$zero_line\n";
+    close($ofh);
+    $fallback_rec->{'headerline'} = $header_line;
+    $fallback_rec->{'zeroline'}   = $zero_line;
+    }
+    else {
+    # read in files
+    print "Reading in Representative Fallback files for CSV Header and Zero-val files:\n";
+    print "  $representative_headerline_file and $representative_zeroline_file\n";
+    print "\n";
+    open(my $ifh, '<', $representative_headerline_file) or die $!;
+    chomp(my @header_lines = <$ifh>);
+    close($ifh);
+    open($ifh, '<', $representative_zeroline_file) or die $!;
+    chomp(my @zero_lines = <$ifh>);
+    close($ifh);
+    $fallback_rec->{'headerline'} = $header_lines[0];
+    $fallback_rec->{'zeroline'}   = $zero_lines[0];
+    }
+    return $fallback_rec;
+}
+sub main
+{
+    my $representative_audio_file = "import/00/ds_20491_4100.m4a";
+    if (scalar(@ARGV) != 3) {
+    print STDERR "****\n";
+    print STDERR "* Error: incorrect usage\n";
+    print STDERR "* Usage: $0 frame-step-in-secs frame-duration-in-secs audio_input_file\n";
+    print STDERR "****\n";
+    exit(1);
+    }
+    my $frame_step_secs = $ARGV[0];
+    my $frame_dur_secs  = $ARGV[1];
+    my $audio_file        = $ARGV[2];
+    my ($full_audio_root) = ($audio_file =~ m/^(.*)\..+?$/);
+    my ($audio_root)      = ($full_audio_root =~ m/^.*\/(.*?)$/);
+    if ( ! -d "tmp" ) {
+    print "Creating directory: tmp\n";
+    mkdir("tmp");
+    }
+    my $csv_file  = "${full_audio_root}_essentiafeatures_frames.csv";
+    my $pwd = cwd();
+    my $profile_template_file = "$pwd/essentia-2013-2014.profile.in";
+    my $profile_file = "$pwd/essentia-2013-2014.profile";
 # knock out any arrays in the JSON extracted features file
 my $ignore_fields="\
+    my $ignore_fields="\
  lowlevel.barkbands.* \
  lowlevel.erbbands.* \
 …
  tonal.thpcp.*";
+$ignore_fields =~ s/\n//sg;
+my $audio_cmd = "./AUDIO-DURATION.sh $audio_file";
+open(my $fh, '-|', $audio_cmd) or die $!;
+# Just need to read in one line:
+my $audio_duration = <$fh>;
+chomp($audio_duration);
+# otherwise would do
+#
+#while (my $line = <$fh>) {
+#    # Do stuff with each $line.
+#}
+close($fh);
+if ( ! -f $csv_file ) {
+    $ignore_fields =~ s/\n//sg;
+    generate_bespoke_profile(0,$frame_dur_secs,$profile_template_file,$profile_file);
+    my $fallback_rec = ensure_representative_fallback($representative_audio_file,$profile_file,$ignore_fields);
+    my $audio_duration = get_audio_duration($audio_file);
+    if ( ! -f $csv_file ) {
+    print "******\n";
+    print "* Running Essentia music extractor\n";
+    print "*  on input file:     $audio_file (duration $audio_duration)\n";
+    print "*  with profile:      $profile_file\n";
+    print "*  generating output: $csv_file\n";
+    print "****\n";
+    open(my $ofh, '>', $csv_file) or die $!;
+    for (my $t=0; $t<$audio_duration; $t+=$frame_step_secs) {
+    ##print "," if ($t>0);
+    print "*\n";
+    print "*\n";
+    print "* ### [Time step: $t]\n";
+    print "*\n";
+    print "*\n";
+    #
+    # Generate bespoke profile_file for this time-slice
+    #
+    open(my $ipfh, '<', $profile_template_file) or die $!;
+    open(my $opfh, '>', $profile_file) or die $!;
+    my $startTime = $t;
+    my $endTime   = $t + $frame_dur_secs;
+    print "******\n";
+    print "* Running Essentia music extractor\n";
+    print "*  on input file:     $audio_file (duration $audio_duration)\n";
+    print "*  with profile:      $profile_file\n";
+    print "*  generating output: $csv_file\n";
+    print "****\n";
+    while (my $line = <$ipfh>) {
+        chomp($line);
+        $line =~ s/\@startTime\@/$startTime/g;
+        $line =~ s/\@endTime\@/$endTime/g;
+        print $opfh "$line\n";
+    open(my $ofh, '>', $csv_file) or die $!;
+    for (my $t=0; $t<$audio_duration; $t+=$frame_step_secs) {
+        ##print "," if ($t>0);
+        print "*\n";
+        print "*\n";
+        print "* ### [Time step: $t]\n";
+        print "*\n";
+        print "*\n";
+        generate_bespoke_profile($t,$frame_dur_secs,$profile_template_file,$profile_file);
+        my $lines = extract_audio_features_as_csv($audio_root,$audio_file,$profile_file,$ignore_fields, $fallback_rec);
+        if (!defined($lines) ) {
+        next;
+        }
+        if ($t == 0) {
+        # output first line from $tmp_csv_file to $csv_file
+        print $ofh $lines->[0], "\n";
+        }
+        # append 2nd line of $tmp_json_file (i.e. data vals) to $csv_file
+        print $ofh $lines->[1], "\n";
+        # break out of loop if there isn't enough time left for a full $frame_dur_secs
+        my $end_of_next_frame = $t+$frame_step_secs+$frame_dur_secs;
+        last if ($end_of_next_frame > $audio_duration);
+    }
-    close($opfh);
-    close($ipfh);
+    #
-    # Extract features and convert to CSV
+    #
-    my $extract_cmd = "essentia_streaming_extractor_music $audio_file $tmp_json_file $profile_file";
-    my $extract_status = system($extract_cmd);
-    if ($extract_status != 0) {
-        print STDERR "$!\n";
-        print STDERR "Error: Failed to run command:\n";
-        print STDERR "    $extract_cmd\n";
-        next;
+    }
-    my $convert_cmd = "json_to_csv.py -i $tmp_json_file -o $tmp_csv_file --ignore $ignore_fields";
-    my $convert_status = system($convert_cmd);
-    if ($convert_status != 0) {
-        print STDERR "$!\n";
-        print STDERR "Error: Failed to run command:\n";
-        print STDERR "    $convert_cmd\n";
-        next;
+    }
-    open(my $ifh, '<', $tmp_csv_file) or die $!;
-    # read in lines to array: single line shot
-    chomp(my @lines = <$ifh>);
+    # my @lines = ();
+    #
+    # while (my $line = <$ifh>) {
+    #     chomp($line);
+    #     push(@lines,$line);
+    # }
+    close($ifh);
+    close($ofh);
+    if ($t == 0) {
+        # output first line from $tmp_csv_file to $csv_file
+        print $ofh $lines[0], "\n";
+    }
+    # append 2nd line of $tmp_json_file (i.e. data vals) to $csv_file
+    print $ofh $lines[1], "\n";
+    unlink($tmp_json_file) or die "Can't delete $tmp_json_file: $!\n";
+    unlink($tmp_csv_file)  or die "Can't delete $tmp_csv_file: $!\n";
+    # break out of loop if there isn't enough time left for a full $frame_dur_secs
+    my $end_of_next_frame = $t+$frame_step_secs+$frame_dur_secs;
+    last if ($end_of_next_frame > $audio_duration);
+    }
+    close($ofh);
+    print "******\n";
+}
+else {
+    print "*  Skipping frame-by-frame audio features computation as $csv_file already exists\n";
+}
+    print "******\n";
+    }
+    else {
+    print "*  Skipping frame-by-frame audio features computation as $csv_file already exists\n";
+    }
+}
+main();

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 35224

Legend:

main/trunk/model-sites-dev/mars/collect/amc-essentia/AUDIO-FRAME-TO-ESSENTIA-CSV-FEATURE-FILE.pl

Download in other formats: