source: gs2-extensions/apache-jena/trunk/src/perllib/jenaTDBBuildproc.pm@ 28468

Last change on this file since 28468 was 28468, checked in by davidb, 11 years ago

Further development of this Perl module.

File size: 6.5 KB
Line 
1##########################################################################
2#
3# jenaTDBBuildproc.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# This document processor outputs a document for indexing (should be
27# implemented by subclass) and storing in the database
28
29package jenaTDBBuildproc;
30
31use strict;
32no strict 'refs'; # allow filehandles to be variables and viceversa
33
34use docprint;
35use util;
36use FileUtils;
37
38use extrabuildproc;
39
40
41BEGIN {
42 @jenaTDBBuildproc::ISA = ('extrabuildproc');
43}
44
45sub new()
46 {
47 my $class = shift @_;
48
49 my $self = new extrabuildproc (@_);
50
51 # Do the following here so it doesn't keep checking (within the util.pm method)
52 # whether it needs to create the directory or not
53 my $tmp_dir = &util::get_collectlevel_tmp_dir();
54 $self->{'tmp_dir'} = $tmp_dir;
55
56
57 my $xslt_file_in = "gsdom2rdf.xsl";
58
59 my $xslt_filename_in = &util::locate_config_file($xslt_file_in);
60 if (!defined $xslt_filename_in) {
61 print STDERR "Can not find $xslt_file_in, please make sure you have supplied the correct file path\n";
62 die "\n";
63 }
64
65 my $xslt_filename_out = &FileUtils::filenameConcatenate($tmp_dir,$xslt_file_in);
66
67 my $collection = $self->{'collection'};
68
69 my $url_prefix = &util::get_full_greenstone_url_prefix();
70
71 my $property_hashmap = { 'libraryurl' => $url_prefix,
72 'collect' => $collection };
73
74 file_copy_with_property_sub($xslt_filename_in,$xslt_filename_out,$property_hashmap);
75
76 $self->{'xslt_file'} = $xslt_file_in;
77 $self->{'xslt_filename'} = $xslt_filename_out;
78
79 return bless $self, $class;
80}
81
82
83sub property_lookup
84{
85 my ($hashmap,$value) = @_;
86
87 print STDERR "*** checking value = '$value'\n";
88
89 print STDERR "*** lookup = ", $hashmap->{$value}, "\n";
90
91 my $lookup = (defined $hashmap->{$value}) ? $hashmap->{$value} : "\@$value\@";
92
93 return $lookup;
94}
95
96
97# Performs a text file copy, substituding substings of the form
98# @xxx@ in the input file with the values set in hashmap
99# passed in
100
101sub file_copy_with_property_sub
102{
103 my ($filename_in,$filename_out,$property_hashmap) = @_;
104
105 if (!open(FIN, "<$filename_in")) {
106 print STDERR "util::file_substitute_at_properteis failed to open $filename_in\n $!\n";
107 return;
108 }
109 binmode(FIN,":utf8");
110
111 if (!open(FOUT, ">$filename_out")) {
112 print STDERR "util::file_substitute_at_properteis failed to open $filename_out\n $!\n";
113 return;
114 }
115 binmode(FOUT,":utf8");
116
117 my $line;
118 while (defined($line = <FIN>)) {
119
120 $line =~ s/\@([^@ ]+)\@/&property_lookup($property_hashmap,$1)/ige;
121
122 print FOUT $line;
123 }
124
125 close(FIN);
126 close(FOUT);
127}
128
129
130sub open_xslt_pipe
131{
132 my $self = shift @_;
133 my ($output_file_name, $xslt_file)=@_;
134
135 return unless defined $xslt_file and $xslt_file ne "" and &FileUtils::fileExists($xslt_file);
136
137 my $apply_xslt_jar = &FileUtils::javaFilenameConcatenate($ENV{'GSDLHOME'},"bin","java","ApplyXSLT.jar");
138 my $xalan_jar = &FileUtils::javaFilenameConcatenate($ENV{'GSDLHOME'},"bin","java","xalan.jar");
139
140 my $java_class_path = &util::javapathname_cat($apply_xslt_jar,$xalan_jar);
141
142 $xslt_file = &util::makeFilenameJavaCygwinCompatible($xslt_file);
143
144 my $mapping_file_path = "";
145
146 my $cmd = "| java -cp \"$java_class_path\" org.nzdl.gsdl.ApplyXSLT -t \"$xslt_file\" ";
147
148
149 if (defined $self->{'mapping_file'} and $self->{'mapping_file'} ne ""){
150 my $mapping_file_path = "\"".$self->{'mapping_file'}."\"";
151 $cmd .= "-m $mapping_file_path";
152 }
153
154 open(*XMLWRITER, $cmd)
155 or die "can't open pipe to xslt: $!";
156
157 $self->{'xslt_writer'} = *XMLWRITER;
158
159 print XMLWRITER "<?DocStart?>\n";
160 print XMLWRITER "$output_file_name\n";
161
162 }
163
164
165sub close_xslt_pipe
166{
167 my $self = shift @_;
168
169
170 return unless defined $self->{'xslt_writer'} ;
171
172 my $xsltwriter = $self->{'xslt_writer'};
173
174 print $xsltwriter "<?DocEnd?>\n";
175 close($xsltwriter);
176
177 undef $self->{'xslt_writer'};
178
179}
180
181
182sub textedit {
183 my $self = shift (@_);
184 my ($doc_obj) = @_;
185 my $handle = $self->{'output_handle'};
186
187 my $doc_oid = $doc_obj->get_OID();
188
189 my $tmp_dir = $self->{'tmp_dir'};
190 my $tmp_doc_filename = &FileUtils::filenameConcatenate($tmp_dir,"doc-$doc_oid.ttl");
191 my $tmp_doc_filename_cc = &util::makeFilenameJavaCygwinCompatible($tmp_doc_filename);
192
193 my $xslt_filename = $self->{'xslt_filename'};
194 $self->open_xslt_pipe($tmp_doc_filename_cc, $xslt_filename); # stops with error if not able to open pipe
195
196 my $outhandler = $self->{'xslt_writer'};
197 binmode($outhandler,":utf8");
198
199 my $section_text = &docprint::get_section_xml($doc_obj,$doc_obj->get_top_section());
200 print $outhandler $section_text;
201
202 $self->close_xslt_pipe();
203
204 # now feed to generated file to jena's (TDB) tripple store
205
206 my $outhandle = $self->{'outhandle'};
207 print $outhandle " Inserting tripples for $doc_oid\n";
208
209 my $collection = $self->{'collection'};
210
211 if (-f $tmp_doc_filename) {
212
213 my $cmd = "gs-triplestore-add $collection \"$tmp_doc_filename\"";
214
215 my $status = system($cmd);
216 if ($status != 0) {
217 print STDERR "Error: failed to run:\n $cmd\n$!\n";
218 }
219
220 unlink $tmp_doc_filename;
221 }
222 else {
223 print STDERR "*** Failed to generate: $tmp_doc_filename\n";
224 }
225
226}
227
228
229sub text {
230 my $self = shift (@_);
231 my ($doc_obj,$file) = @_;
232
233 $self->textedit($doc_obj,$file,"add");
234}
235
236sub textreindex
237{
238 my $self = shift @_;
239 my ($doc_obj,$file) = @_;
240
241 $self->textedit($doc_obj,$file,"update");
242}
243
244sub textdelete
245{
246 my $self = shift @_;
247
248 my ($doc_obj,$file) = @_;
249
250 print STDERR "Warning: jenaTDB command-line does not currently support delete operation\n";
251
252 # $self->textedit($doc_obj,$file,"delete");
253}
254
255
256
257
258
2591;
Note: See TracBrowser for help on using the repository browser.