Changeset 28468
- Timestamp:
- 2013-10-16T17:50:17+13:00 (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gs2-extensions/apache-jena/trunk/src/perllib/jenaTDBBuildproc.pm
r28410 r28468 49 49 my $self = new extrabuildproc (@_); 50 50 51 my $xslt_file = "gsdom2rdf.xsl";52 53 my $xslt_filename = &util::locate_config_file($xslt_file);54 if (!defined $xslt_filename) {55 print STDERR "Can not find $xslt_file, please make sure you have supplied the correct file path\n";56 die "\n";57 }58 59 $self->{'xslt_file'} = $xslt_file;60 $self->{'xslt_filename'} = $xslt_filename;61 62 51 # Do the following here so it doesn't keep checking (within the util.pm method) 63 52 # whether it needs to create the directory or not … … 65 54 $self->{'tmp_dir'} = $tmp_dir; 66 55 56 57 my $xslt_file_in = "gsdom2rdf.xsl"; 58 59 my $xslt_filename_in = &util::locate_config_file($xslt_file_in); 60 if (!defined $xslt_filename_in) { 61 print STDERR "Can not find $xslt_file_in, please make sure you have supplied the correct file path\n"; 62 die "\n"; 63 } 64 65 my $xslt_filename_out = &FileUtils::filenameConcatenate($tmp_dir,$xslt_file_in); 66 67 my $collection = $self->{'collection'}; 68 69 my $url_prefix = &util::get_full_greenstone_url_prefix(); 70 71 my $property_hashmap = { 'libraryurl' => $url_prefix, 72 'collect' => $collection }; 73 74 file_copy_with_property_sub($xslt_filename_in,$xslt_filename_out,$property_hashmap); 75 76 $self->{'xslt_file'} = $xslt_file_in; 77 $self->{'xslt_filename'} = $xslt_filename_out; 78 67 79 return bless $self, $class; 68 80 } 69 81 70 82 83 sub property_lookup 84 { 85 my ($hashmap,$value) = @_; 86 87 print STDERR "*** checking value = '$value'\n"; 88 89 print STDERR "*** lookup = ", $hashmap->{$value}, "\n"; 90 91 my $lookup = (defined $hashmap->{$value}) ? $hashmap->{$value} : "\@$value\@"; 92 93 return $lookup; 94 } 95 96 97 # Performs a text file copy, substituding substings of the form 98 # @xxx@ in the input file with the values set in hashmap 99 # passed in 100 101 sub file_copy_with_property_sub 102 { 103 my ($filename_in,$filename_out,$property_hashmap) = @_; 104 105 if (!open(FIN, "<$filename_in")) { 106 print STDERR "util::file_substitute_at_properteis failed to open $filename_in\n $!\n"; 107 return; 108 } 109 binmode(FIN,":utf8"); 110 111 if (!open(FOUT, ">$filename_out")) { 112 print STDERR "util::file_substitute_at_properteis failed to open $filename_out\n $!\n"; 113 return; 114 } 115 binmode(FOUT,":utf8"); 116 117 my $line; 118 while (defined($line = <FIN>)) { 119 120 $line =~ s/\@([^@ ]+)\@/&property_lookup($property_hashmap,$1)/ige; 121 122 print FOUT $line; 123 } 124 125 close(FIN); 126 close(FOUT); 127 } 71 128 72 129 … … 88 145 89 146 my $cmd = "| java -cp \"$java_class_path\" org.nzdl.gsdl.ApplyXSLT -t \"$xslt_file\" "; 147 90 148 91 149 if (defined $self->{'mapping_file'} and $self->{'mapping_file'} ne ""){ … … 94 152 } 95 153 96 print STDERR "*** cmd = $cmd\n";97 98 154 open(*XMLWRITER, $cmd) 99 155 or die "can't open pipe to xslt: $!"; 100 101 156 102 157 $self->{'xslt_writer'} = *XMLWRITER; … … 133 188 134 189 my $tmp_dir = $self->{'tmp_dir'}; 135 my $tmp_doc_filename = &FileUtils::filenameConcatenate($tmp_dir,"doc-$doc_oid. xml");136 $tmp_doc_filename= &util::makeFilenameJavaCygwinCompatible($tmp_doc_filename);190 my $tmp_doc_filename = &FileUtils::filenameConcatenate($tmp_dir,"doc-$doc_oid.ttl"); 191 my $tmp_doc_filename_cc = &util::makeFilenameJavaCygwinCompatible($tmp_doc_filename); 137 192 138 193 my $xslt_filename = $self->{'xslt_filename'}; 139 $self->open_xslt_pipe($tmp_doc_filename , $xslt_filename); # stops with error if not able to open pipe194 $self->open_xslt_pipe($tmp_doc_filename_cc, $xslt_filename); # stops with error if not able to open pipe 140 195 141 196 my $outhandler = $self->{'xslt_writer'}; … … 147 202 $self->close_xslt_pipe(); 148 203 149 150 204 # now feed to generated file to jena's (TDB) tripple store 151 205 152 153 } 154 155 156 sub texteditMG { 157 my $self = shift (@_); 158 my ($doc_obj) = @_; 159 my $handle = $self->{'output_handle'}; 160 161 my $doc_oid = $doc_obj->get_OID(); 162 163 164 my $doc_section = 0; # just for this document 165 166 167 my $text = ""; 168 my $text_extra = ""; 169 170 # get the text for this document 171 my $section = $doc_obj->get_top_section(); 172 while (defined $section) { 173 # update a few statistics 174 $doc_section++; 175 176 my $title = $doc_obj->get_metadata_element($section, "Title"); 177 178 if (defined $title && ($title =~ m/\S/)) { 179 print "$doc_oid: Title = $title\n"; 180 } 181 182 my $dc_title = $doc_obj->get_metadata_element($section, "dc.Title"); 183 184 if (defined $dc_title && ($dc_title =~ m/\S/)) { 185 print "$doc_oid: dc.Title = $dc_title\n"; 186 } 187 188 189 my $id3_title = $doc_obj->get_metadata_element($section, "ex.ID3.Title"); 190 191 if (defined $id3_title && ($id3_title =~ m/\S/)) { 192 print "$doc_oid: id3.Title = $id3_title\n"; 206 my $outhandle = $self->{'outhandle'}; 207 print $outhandle " Inserting tripples for $doc_oid\n"; 208 209 my $collection = $self->{'collection'}; 210 211 if (-f $tmp_doc_filename) { 212 213 my $cmd = "gs-triplestore-add $collection \"$tmp_doc_filename\""; 214 215 my $status = system($cmd); 216 if ($status != 0) { 217 print STDERR "Error: failed to run:\n $cmd\n$!\n"; 193 218 } 194 219 195 $section = $doc_obj->get_next_section($section); 196 } 197 198 print $handle "$text$text_extra"; 199 } 200 201 202 203 sub texteditADB { 204 my $self = shift (@_); 205 my ($doc_obj,$file,$mode) = @_; 206 207 # Code written on the assumption that that jenaTDB does a replace 208 # operation when presented with a docid that already extis. 209 # => don't need to do anything special to distinguish between 210 # a mode of "add" and "update" 211 212 my $outhandle = $self->{'outhandle'}; 213 214 215 my $source_dir = $self->{'source_dir'}; # typically the archives dir 216 my $build_dir = $self->{'build_dir'}; 217 218 # full path to adb database 219 my $adb_filename 220 = &util::filename_cat($build_dir, "jenaTDB", "lsh-features.adb"); 221 222 # get doc id 223 my $doc_oid = $doc_obj->get_OID(); 224 225 # map to assoc dir 226 my $top_section = $doc_obj->get_top_section(); 227 my $assoc_file 228 = $doc_obj->get_metadata_element ($top_section,"assocfilepath"); 229 my $assoc_filename = &util::filename_cat($source_dir,$assoc_file); 230 231 my $chr12_filename = &util::filename_cat($assoc_filename,"doc.chr12"); 232 my $powerlog_filename = &util::filename_cat($assoc_filename,"doc.power"); 233 234 print $outhandle " Inserting tripples for $doc_oid\n"; 235 236 # my $cmd = "jenaTDB -d \"$adb_filename\" -I -k \"$doc_oid\" -f \"$chr12_filename\" -w \"$powerlog_filename\""; 237 238 # my $status = system($cmd); 239 # if ($status != 0) { 240 # print STDERR "Error: failed to run:\n $cmd\n$!\n"; 241 # } 242 243 } 220 unlink $tmp_doc_filename; 221 } 222 else { 223 print STDERR "*** Failed to generate: $tmp_doc_filename\n"; 224 } 225 226 } 227 244 228 245 229 sub text {
Note:
See TracChangeset
for help on using the changeset viewer.