########################################################################## # # jenaTDBBuildproc.pm -- # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright (C) 1999 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### # This document processor outputs a document for indexing (should be # implemented by subclass) and storing in the database package jenaTDBBuildproc; use strict; no strict 'refs'; # allow filehandles to be variables and viceversa use docprint; use util; use FileUtils; use extrabuildproc; BEGIN { @jenaTDBBuildproc::ISA = ('extrabuildproc'); } sub new() { my $class = shift @_; my $self = new extrabuildproc (@_); # Do the following here so it doesn't keep checking (within the util.pm method) # whether it needs to create the directory or not my $tmp_dir = &util::get_collectlevel_tmp_dir(); $self->{'tmp_dir'} = $tmp_dir; my $xslt_file_in = "gsdom2rdf.xsl"; my $xslt_filename_in = &util::locate_config_file($xslt_file_in); if (!defined $xslt_filename_in) { print STDERR "Can not find $xslt_file_in, please make sure you have supplied the correct file path\n"; die "\n"; } my $xslt_filename_out = &FileUtils::filenameConcatenate($tmp_dir,$xslt_file_in); my $collection = $self->{'collection'}; my $url_prefix = &util::get_full_greenstone_url_prefix(); my $property_hashmap = { 'libraryurl' => $url_prefix, 'collect' => $collection }; file_copy_with_property_sub($xslt_filename_in,$xslt_filename_out,$property_hashmap); $self->{'xslt_file'} = $xslt_file_in; $self->{'xslt_filename'} = $xslt_filename_out; return bless $self, $class; } sub property_lookup { my ($hashmap,$value) = @_; print STDERR "*** checking value = '$value'\n"; print STDERR "*** lookup = ", $hashmap->{$value}, "\n"; my $lookup = (defined $hashmap->{$value}) ? $hashmap->{$value} : "\@$value\@"; return $lookup; } # Performs a text file copy, substituding substings of the form # @xxx@ in the input file with the values set in hashmap # passed in sub file_copy_with_property_sub { my ($filename_in,$filename_out,$property_hashmap) = @_; if (!open(FIN, "<$filename_in")) { print STDERR "util::file_substitute_at_properteis failed to open $filename_in\n $!\n"; return; } binmode(FIN,":utf8"); if (!open(FOUT, ">$filename_out")) { print STDERR "util::file_substitute_at_properteis failed to open $filename_out\n $!\n"; return; } binmode(FOUT,":utf8"); my $line; while (defined($line = )) { $line =~ s/\@([^@ ]+)\@/&property_lookup($property_hashmap,$1)/ige; print FOUT $line; } close(FIN); close(FOUT); } sub open_xslt_pipe { my $self = shift @_; my ($output_file_name, $xslt_file)=@_; return unless defined $xslt_file and $xslt_file ne "" and &FileUtils::fileExists($xslt_file); my $apply_xslt_jar = &FileUtils::javaFilenameConcatenate($ENV{'GSDLHOME'},"bin","java","ApplyXSLT.jar"); my $xalan_jar = &FileUtils::javaFilenameConcatenate($ENV{'GSDLHOME'},"bin","java","xalan.jar"); my $java_class_path = &util::javapathname_cat($apply_xslt_jar,$xalan_jar); $xslt_file = &util::makeFilenameJavaCygwinCompatible($xslt_file); my $mapping_file_path = ""; my $cmd = "| java -cp \"$java_class_path\" org.nzdl.gsdl.ApplyXSLT -t \"$xslt_file\" "; if (defined $self->{'mapping_file'} and $self->{'mapping_file'} ne ""){ my $mapping_file_path = "\"".$self->{'mapping_file'}."\""; $cmd .= "-m $mapping_file_path"; } open(*XMLWRITER, $cmd) or die "can't open pipe to xslt: $!"; $self->{'xslt_writer'} = *XMLWRITER; print XMLWRITER "\n"; print XMLWRITER "$output_file_name\n"; } sub close_xslt_pipe { my $self = shift @_; return unless defined $self->{'xslt_writer'} ; my $xsltwriter = $self->{'xslt_writer'}; print $xsltwriter "\n"; close($xsltwriter); undef $self->{'xslt_writer'}; } sub textedit { my $self = shift (@_); my ($doc_obj) = @_; my $handle = $self->{'output_handle'}; my $doc_oid = $doc_obj->get_OID(); my $tmp_dir = $self->{'tmp_dir'}; my $tmp_doc_filename = &FileUtils::filenameConcatenate($tmp_dir,"doc-$doc_oid.ttl"); my $tmp_doc_filename_cc = &util::makeFilenameJavaCygwinCompatible($tmp_doc_filename); my $xslt_filename = $self->{'xslt_filename'}; $self->open_xslt_pipe($tmp_doc_filename_cc, $xslt_filename); # stops with error if not able to open pipe my $outhandler = $self->{'xslt_writer'}; binmode($outhandler,":utf8"); my $section_text = &docprint::get_section_xml($doc_obj,$doc_obj->get_top_section()); print $outhandler $section_text; $self->close_xslt_pipe(); # now feed to generated file to jena's (TDB) tripple store my $outhandle = $self->{'outhandle'}; print $outhandle " Inserting tripples for $doc_oid\n"; my $collection = $self->{'collection'}; if (-f $tmp_doc_filename) { my $cmd = "gs-triplestore-add $collection \"$tmp_doc_filename\""; my $status = system($cmd); if ($status != 0) { print STDERR "Error: failed to run:\n $cmd\n$!\n"; } unlink $tmp_doc_filename; } else { print STDERR "*** Failed to generate: $tmp_doc_filename\n"; } } sub text { my $self = shift (@_); my ($doc_obj,$file) = @_; $self->textedit($doc_obj,$file,"add"); } sub textreindex { my $self = shift @_; my ($doc_obj,$file) = @_; $self->textedit($doc_obj,$file,"update"); } sub textdelete { my $self = shift @_; my ($doc_obj,$file) = @_; print STDERR "Warning: jenaTDB command-line does not currently support delete operation\n"; # $self->textedit($doc_obj,$file,"delete"); } 1;