Changeset 35196 for gs3-extensions/mars-src/trunk
- Timestamp:
- 2021-05-30T23:12:01+12:00 (3 years ago)
- Location:
- gs3-extensions/mars-src/trunk/perllib
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
gs3-extensions/mars-src/trunk/perllib/wekaDBBuilder.pm
r35195 r35196 78 78 # init Weka CSV feature file 79 79 80 my $ adb_filename = &util::filename_cat($wekaDB_dir,"lsh-features.adb");80 my $weka_features_filename = &util::filename_cat($wekaDB_dir,"av-features.csv"); 81 81 82 print $outhandle "\n creating Weka CSV Feature File\n" if ($verbosity >= 1);82 print $outhandle "\n creating Weka CSV Features File\n" if ($verbosity >= 1); 83 83 84 my $init_cmd = "audioDB -N -d $adb_filename"; 85 my $init_status = system($init_cmd); 86 if ($init_status != 0) { 87 print STDERR "Error: failed to initialize the audioDB database\n"; 88 print STDERR " $adb_filename\n"; 84 # Create a CSV features file with the headline in it 85 if (open(my $WEKA_FOUT, '>', $weka_features_filename)) { 86 binmode($WEKA_FOUT,":utf8"); 87 print $WEKA_FOUT "IdentifierWithTimeOffset,Arousal,Valence\n"; 88 close($WEKA_FOUT); 89 } 90 else { 91 print STDERR "Error: failed to create:\n"; 92 print STDERR " $weka_features_filename\n"; 89 93 print STDERR " $!\n"; 90 if ($verbosity>=2) {91 print STDERR " cmd: $init_cmd\n";92 }93 94 return; 94 95 } 95 96 96 97 } 97 98 -
gs3-extensions/mars-src/trunk/perllib/wekaDBBuildproc.pm
r35195 r35196 1 ######################################################################### #1 ######################################################################### 2 2 # 3 # wekaDBBuildproc.pm -- 3 # wekaDBBuildproc.pm -- builds up a CSV file to be used by Weka 4 4 # A component of the Greenstone digital library software 5 5 # from the New Zealand Digital Library Project at the … … 24 24 ########################################################################### 25 25 26 # This document processor outputs a document for indexing (should be27 # implemented by subclass) and storing in the database28 29 26 package wekaDBBuildproc; 30 27 … … 32 29 no strict 'refs'; # allow filehandles to be variables and viceversa 33 30 31 use multiread; 32 use Encode; 33 use JSON; 34 34 35 use util; 35 36 36 37 use extrabuildproc; 37 38 39 my $av_time_slice = 6; # 6 secs 40 my $av_frame_overlap = 3; # 3 secs 38 41 39 42 BEGIN { … … 50 53 } 51 54 55 56 sub read_json_file 57 { 58 my $self = shift (@_); 59 my ($json_filename) = @_; 60 61 my $json_decoded_data = undef; 62 63 if (open(JSON_FILE, "$json_filename")) { 64 my $json_file_content; 65 66 my $json_file_reader = new multiread(); 67 $json_file_reader->set_handle('wekaDBBuildproc::JSON_FILE'); 68 $json_file_reader->read_file(\$json_file_content); 69 70 # Next 2 lines result in an UTF8-friendly string 71 my $json_file_content_bytes = encode('UTF-8', $json_file_content); 72 $json_file_content = decode("utf8",$json_file_content_bytes); 73 74 $json_decoded_data = JSON->new->utf8->decode($json_file_content); 75 76 close(JSON_FILE); 77 } 78 else { 79 print STDERR "wekaDBBuildproc::read_json_file(): Failed to open $json_filename\n"; 80 print STDERR " $!\n"; 81 82 } 83 84 85 return $json_decoded_data; 86 } 52 87 53 88 sub textedit { … … 66 101 my $build_dir = $self->{'build_dir'}; 67 102 68 # full path to adb database69 my $ adb_filename70 = &util::filename_cat($build_dir, "wekaDB", " lsh-features.adb");103 # full path to Weka CSV file 104 my $weka_features_filename 105 = &util::filename_cat($build_dir, "wekaDB", "av-features.csv"); 71 106 72 107 # get doc id … … 75 110 # map to assoc dir 76 111 my $top_section = $doc_obj->get_top_section(); 77 my $assoc_file78 = $doc_obj->get_metadata_element ($top_section,"assocfilepath");79 my $assoc_filename = &util::filename_cat($source_dir,$assoc_file);80 112 81 my $ chr12_filename = &util::filename_cat($assoc_filename,"doc.chr12");82 my $ powerlog_filename = &util::filename_cat($assoc_filename,"doc.power");113 my $assoc_file = $doc_obj->get_metadata_element ($top_section,"assocfilepath"); 114 my $assoc_filename = &util::filename_cat($source_dir,$assoc_file); 83 115 84 print $outhandle " Inserting features for $doc_oid\n"; 116 my $av_json_filename = &util::filename_cat($assoc_filename,"av.json"); 117 my $av_json_hashmap = $self->read_json_file($av_json_filename); 85 118 86 my $cmd = "audioDB -d \"$adb_filename\" -I -k \"$doc_oid\" -f \"$chr12_filename\" -w \"$powerlog_filename\""; 119 if (!defined $av_json_hashmap) { 120 return; 121 } 122 123 print $outhandle " Appending features for $doc_oid\n"; 87 124 88 my $status = system($cmd); 89 if ($status != 0) { 90 print STDERR "Error: failed to run:\n $cmd\n$!\n"; 125 my $AV_OUT; 126 if (!open($AV_OUT,">>","$weka_features_filename")) { 127 print STDERR "Failed to append to $weka_features_filename\n"; 128 print STDERR " $!\n"; 129 return; 91 130 } 131 132 binmode($AV_OUT,":utf8"); 133 134 my $arousal_vals = $av_json_hashmap->{"arousal"}; 135 my $valence_vals = $av_json_hashmap->{"valence"}; 136 137 my $num_vals = scalar(@{$arousal_vals}); 138 139 my $t = $av_time_slice; 140 for (my $i=0; $i<$num_vals; $i++) { 141 my $arousal_val = $arousal_vals->[$i]; 142 my $valence_val = $valence_vals->[$i]; 143 144 print $AV_OUT "$doc_oid-$t,$arousal_val,$valence_val\n"; 145 $t += $av_frame_overlap; 146 } 147 148 close($AV_OUT); 92 149 } 93 150
Note:
See TracChangeset
for help on using the changeset viewer.