Changeset 35196


Ignore:
Timestamp:
2021-05-30T23:12:01+12:00 (3 years ago)
Author:
davidb
Message:

av.json files now read in and appended to central CSV file

Location:
gs3-extensions/mars-src/trunk/perllib
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • gs3-extensions/mars-src/trunk/perllib/wekaDBBuilder.pm

    r35195 r35196  
    7878    # init Weka CSV feature file
    7979   
    80     my $adb_filename = &util::filename_cat($wekaDB_dir,"lsh-features.adb");
     80    my $weka_features_filename = &util::filename_cat($wekaDB_dir,"av-features.csv");
    8181
    82     print $outhandle "\n    creating Weka CSV Feature File\n"  if ($verbosity >= 1);
     82    print $outhandle "\n    creating Weka CSV Features File\n"  if ($verbosity >= 1);
    8383
    84     my $init_cmd = "audioDB -N -d $adb_filename";
    85     my $init_status = system($init_cmd);
    86     if ($init_status != 0) {
    87         print STDERR "Error: failed to initialize the audioDB database\n";
    88         print STDERR "         $adb_filename\n";
     84    # Create a CSV features file with the headline in it
     85    if (open(my $WEKA_FOUT, '>', $weka_features_filename)) {
     86        binmode($WEKA_FOUT,":utf8");
     87        print $WEKA_FOUT "IdentifierWithTimeOffset,Arousal,Valence\n";
     88        close($WEKA_FOUT);
     89    }
     90    else {
     91        print STDERR "Error: failed to create:\n";
     92        print STDERR "         $weka_features_filename\n";
    8993        print STDERR "       $!\n";
    90         if ($verbosity>=2) {
    91         print STDERR "       cmd: $init_cmd\n";
    92         }
    9394        return;
    9495    }
    95 
     96   
    9697    }
    9798
  • gs3-extensions/mars-src/trunk/perllib/wekaDBBuildproc.pm

    r35195 r35196  
    1 ##########################################################################
     1#########################################################################
    22#
    3 # wekaDBBuildproc.pm --
     3# wekaDBBuildproc.pm -- builds up a CSV file to be used by Weka
    44# A component of the Greenstone digital library software
    55# from the New Zealand Digital Library Project at the
     
    2424###########################################################################
    2525
    26 # This document processor outputs a document for indexing (should be
    27 # implemented by subclass) and storing in the database
    28 
    2926package wekaDBBuildproc;
    3027
     
    3229no strict 'refs'; # allow filehandles to be variables and viceversa
    3330
     31use multiread;
     32use Encode;
     33use JSON;
     34
    3435use util;
    3536
    3637use extrabuildproc;
    3738
     39my $av_time_slice    = 6; # 6 secs
     40my $av_frame_overlap = 3; # 3 secs
    3841
    3942BEGIN {
     
    5053}
    5154
     55
     56sub read_json_file
     57{
     58    my $self = shift (@_);
     59    my ($json_filename) = @_;
     60
     61    my $json_decoded_data = undef;
     62   
     63    if (open(JSON_FILE, "$json_filename")) {
     64    my $json_file_content;
     65   
     66    my $json_file_reader = new multiread();
     67    $json_file_reader->set_handle('wekaDBBuildproc::JSON_FILE');
     68    $json_file_reader->read_file(\$json_file_content);
     69
     70    # Next 2 lines result in an UTF8-friendly string
     71    my $json_file_content_bytes = encode('UTF-8', $json_file_content);
     72    $json_file_content = decode("utf8",$json_file_content_bytes);
     73   
     74    $json_decoded_data = JSON->new->utf8->decode($json_file_content);
     75   
     76    close(JSON_FILE);
     77    }
     78    else {
     79    print STDERR "wekaDBBuildproc::read_json_file(): Failed to open $json_filename\n";
     80    print STDERR "  $!\n";
     81
     82    }
     83
     84   
     85    return $json_decoded_data;
     86}
    5287
    5388sub textedit {
     
    66101    my $build_dir  = $self->{'build_dir'};
    67102
    68     # full path to adb database
    69     my $adb_filename
    70     = &util::filename_cat($build_dir, "wekaDB", "lsh-features.adb");
     103    # full path to Weka CSV file
     104    my $weka_features_filename
     105    = &util::filename_cat($build_dir, "wekaDB", "av-features.csv");
    71106
    72107    # get doc id
     
    75110    # map to assoc dir
    76111    my $top_section = $doc_obj->get_top_section();
    77     my $assoc_file
    78     = $doc_obj->get_metadata_element ($top_section,"assocfilepath");
    79     my $assoc_filename = &util::filename_cat($source_dir,$assoc_file);
    80112
    81     my $chr12_filename = &util::filename_cat($assoc_filename,"doc.chr12");
    82     my $powerlog_filename = &util::filename_cat($assoc_filename,"doc.power");
     113    my $assoc_file       = $doc_obj->get_metadata_element ($top_section,"assocfilepath");
     114    my $assoc_filename   = &util::filename_cat($source_dir,$assoc_file);
    83115
    84     print $outhandle "  Inserting features for $doc_oid\n";
     116    my $av_json_filename = &util::filename_cat($assoc_filename,"av.json");
     117    my $av_json_hashmap  = $self->read_json_file($av_json_filename);
    85118
    86     my $cmd = "audioDB -d \"$adb_filename\" -I -k \"$doc_oid\" -f \"$chr12_filename\" -w \"$powerlog_filename\"";
     119    if (!defined $av_json_hashmap) {
     120    return;
     121    }
     122   
     123    print $outhandle "  Appending features for $doc_oid\n";
    87124
    88     my $status = system($cmd);
    89     if ($status != 0) {
    90     print STDERR "Error: failed to run:\n  $cmd\n$!\n";
     125    my $AV_OUT;
     126    if (!open($AV_OUT,">>","$weka_features_filename")) {
     127    print STDERR "Failed to append to $weka_features_filename\n";
     128    print STDERR "  $!\n";
     129    return;
    91130    }
     131   
     132    binmode($AV_OUT,":utf8");
     133
     134    my $arousal_vals = $av_json_hashmap->{"arousal"};
     135    my $valence_vals = $av_json_hashmap->{"valence"};
     136
     137    my $num_vals = scalar(@{$arousal_vals});
     138
     139    my $t = $av_time_slice;
     140    for (my $i=0; $i<$num_vals; $i++) {
     141    my $arousal_val = $arousal_vals->[$i];
     142    my $valence_val = $valence_vals->[$i];
     143
     144    print $AV_OUT "$doc_oid-$t,$arousal_val,$valence_val\n";
     145    $t += $av_frame_overlap;
     146    }
     147
     148    close($AV_OUT);
    92149}
    93150
Note: See TracChangeset for help on using the changeset viewer.