root/gs2-extensions/apache-jena/trunk/src/perllib/jenaTDBBuildproc.pm @ 28468

Revision 28468, 6.5 KB (checked in by davidb, 7 years ago)

Further development of this Perl module.

Line 
1##########################################################################
2#
3# jenaTDBBuildproc.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# This document processor outputs a document for indexing (should be
27# implemented by subclass) and storing in the database
28
29package jenaTDBBuildproc;
30
31use strict;
32no strict 'refs'; # allow filehandles to be variables and viceversa
33
34use docprint;
35use util;
36use FileUtils;
37
38use extrabuildproc;
39
40
41BEGIN {
42    @jenaTDBBuildproc::ISA = ('extrabuildproc');
43}
44
45sub new()
46  {
47    my $class = shift @_;
48
49    my $self = new extrabuildproc (@_);
50
51    # Do the following here so it doesn't keep checking (within the util.pm method)
52    # whether it needs to create the directory or not
53    my $tmp_dir = &util::get_collectlevel_tmp_dir();
54    $self->{'tmp_dir'} = $tmp_dir;
55
56
57    my $xslt_file_in = "gsdom2rdf.xsl";
58
59    my $xslt_filename_in = &util::locate_config_file($xslt_file_in);
60    if (!defined $xslt_filename_in) {
61    print STDERR "Can not find $xslt_file_in, please make sure you have supplied the correct file path\n";
62    die "\n";
63    }
64
65    my $xslt_filename_out = &FileUtils::filenameConcatenate($tmp_dir,$xslt_file_in);
66
67    my $collection = $self->{'collection'};
68
69    my $url_prefix = &util::get_full_greenstone_url_prefix();
70
71    my $property_hashmap = { 'libraryurl' => $url_prefix,
72                 'collect'    => $collection };
73
74    file_copy_with_property_sub($xslt_filename_in,$xslt_filename_out,$property_hashmap);
75
76    $self->{'xslt_file'} = $xslt_file_in;
77    $self->{'xslt_filename'} = $xslt_filename_out;
78
79    return bless $self, $class;
80}
81
82
83sub property_lookup
84{
85    my ($hashmap,$value) = @_;
86   
87    print STDERR "*** checking value = '$value'\n";
88
89    print STDERR "*** lookup = ", $hashmap->{$value}, "\n";
90
91    my $lookup = (defined $hashmap->{$value}) ? $hashmap->{$value} : "\@$value\@";
92
93    return $lookup;
94}
95
96
97# Performs a text file copy, substituding substings of the form
98# @xxx@ in the input file with the values set in hashmap
99# passed in
100
101sub file_copy_with_property_sub
102{
103    my ($filename_in,$filename_out,$property_hashmap) = @_;
104
105    if (!open(FIN, "<$filename_in")) {
106    print STDERR "util::file_substitute_at_properteis failed to open $filename_in\n  $!\n";
107    return;
108    }
109    binmode(FIN,":utf8");
110
111    if (!open(FOUT, ">$filename_out")) {
112    print STDERR "util::file_substitute_at_properteis failed to open $filename_out\n  $!\n";
113    return;
114    }
115    binmode(FOUT,":utf8");
116
117    my $line;
118    while (defined($line = <FIN>)) {
119   
120    $line =~ s/\@([^@ ]+)\@/&property_lookup($property_hashmap,$1)/ige;
121
122    print FOUT $line;
123    }
124
125    close(FIN);
126    close(FOUT);       
127}
128
129
130sub open_xslt_pipe
131{
132    my $self = shift @_;
133    my ($output_file_name, $xslt_file)=@_;
134
135    return unless defined $xslt_file and $xslt_file ne "" and &FileUtils::fileExists($xslt_file);
136   
137    my $apply_xslt_jar = &FileUtils::javaFilenameConcatenate($ENV{'GSDLHOME'},"bin","java","ApplyXSLT.jar");
138    my $xalan_jar      = &FileUtils::javaFilenameConcatenate($ENV{'GSDLHOME'},"bin","java","xalan.jar");
139
140    my $java_class_path = &util::javapathname_cat($apply_xslt_jar,$xalan_jar);
141
142    $xslt_file = &util::makeFilenameJavaCygwinCompatible($xslt_file);
143
144    my $mapping_file_path = "";
145
146    my $cmd = "| java -cp \"$java_class_path\" org.nzdl.gsdl.ApplyXSLT -t \"$xslt_file\" ";
147
148
149    if (defined $self->{'mapping_file'} and $self->{'mapping_file'} ne ""){
150    my $mapping_file_path = "\"".$self->{'mapping_file'}."\"";
151    $cmd .= "-m $mapping_file_path";
152    }
153   
154    open(*XMLWRITER, $cmd)
155    or die "can't open pipe to xslt: $!";
156   
157    $self->{'xslt_writer'} = *XMLWRITER;
158
159    print XMLWRITER "<?DocStart?>\n";       
160    print XMLWRITER "$output_file_name\n";
161 
162  }
163 
164
165sub close_xslt_pipe
166{
167  my $self = shift @_;
168
169 
170  return unless defined $self->{'xslt_writer'} ;
171   
172  my $xsltwriter = $self->{'xslt_writer'};
173 
174  print $xsltwriter "<?DocEnd?>\n";
175  close($xsltwriter);
176
177  undef $self->{'xslt_writer'};
178
179}
180
181
182sub textedit {
183    my $self = shift (@_);
184    my ($doc_obj) = @_;
185    my $handle = $self->{'output_handle'};
186   
187    my $doc_oid = $doc_obj->get_OID();
188
189    my $tmp_dir = $self->{'tmp_dir'};
190    my $tmp_doc_filename = &FileUtils::filenameConcatenate($tmp_dir,"doc-$doc_oid.ttl");
191    my $tmp_doc_filename_cc    = &util::makeFilenameJavaCygwinCompatible($tmp_doc_filename);
192
193    my $xslt_filename = $self->{'xslt_filename'};
194    $self->open_xslt_pipe($tmp_doc_filename_cc, $xslt_filename); # stops with error if not able to open pipe
195
196    my $outhandler = $self->{'xslt_writer'};
197    binmode($outhandler,":utf8");
198
199    my $section_text = &docprint::get_section_xml($doc_obj,$doc_obj->get_top_section());
200    print $outhandler $section_text;
201
202    $self->close_xslt_pipe();
203
204    # now feed to generated file to jena's (TDB) tripple store
205
206    my $outhandle = $self->{'outhandle'};
207    print $outhandle "  Inserting tripples for $doc_oid\n";
208
209    my $collection = $self->{'collection'};
210
211    if (-f $tmp_doc_filename) {
212
213    my $cmd = "gs-triplestore-add $collection \"$tmp_doc_filename\"";
214   
215    my $status = system($cmd);
216    if ($status != 0) {
217        print STDERR "Error: failed to run:\n  $cmd\n$!\n";
218    }
219   
220    unlink $tmp_doc_filename;
221    }
222    else {
223    print STDERR "*** Failed to generate: $tmp_doc_filename\n";
224    }
225
226}
227
228
229sub text {
230    my $self = shift (@_);
231    my ($doc_obj,$file) = @_;
232
233    $self->textedit($doc_obj,$file,"add");
234}
235
236sub textreindex
237{
238    my $self = shift @_;
239    my ($doc_obj,$file) = @_;
240
241    $self->textedit($doc_obj,$file,"update");
242}
243
244sub textdelete
245{
246    my $self = shift @_;
247
248    my ($doc_obj,$file) = @_;
249
250    print STDERR "Warning: jenaTDB command-line does not currently support delete operation\n";
251
252    # $self->textedit($doc_obj,$file,"delete");
253}
254
255
256
257
258
2591;
Note: See TracBrowser for help on using the browser.