Changeset 28468


Ignore:
Timestamp:
2013-10-16T17:50:17+13:00 (11 years ago)
Author:
davidb
Message:

Further development of this Perl module.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gs2-extensions/apache-jena/trunk/src/perllib/jenaTDBBuildproc.pm

    r28410 r28468  
    4949    my $self = new extrabuildproc (@_);
    5050
    51     my $xslt_file = "gsdom2rdf.xsl";
    52 
    53     my $xslt_filename = &util::locate_config_file($xslt_file);
    54     if (!defined $xslt_filename) {
    55     print STDERR "Can not find $xslt_file, please make sure you have supplied the correct file path\n";
    56     die "\n";
    57     }
    58 
    59     $self->{'xslt_file'} = $xslt_file;
    60     $self->{'xslt_filename'} = $xslt_filename;
    61 
    6251    # Do the following here so it doesn't keep checking (within the util.pm method)
    6352    # whether it needs to create the directory or not
     
    6554    $self->{'tmp_dir'} = $tmp_dir;
    6655
     56
     57    my $xslt_file_in = "gsdom2rdf.xsl";
     58
     59    my $xslt_filename_in = &util::locate_config_file($xslt_file_in);
     60    if (!defined $xslt_filename_in) {
     61    print STDERR "Can not find $xslt_file_in, please make sure you have supplied the correct file path\n";
     62    die "\n";
     63    }
     64
     65    my $xslt_filename_out = &FileUtils::filenameConcatenate($tmp_dir,$xslt_file_in);
     66
     67    my $collection = $self->{'collection'};
     68
     69    my $url_prefix = &util::get_full_greenstone_url_prefix();
     70
     71    my $property_hashmap = { 'libraryurl' => $url_prefix,
     72                 'collect'    => $collection };
     73
     74    file_copy_with_property_sub($xslt_filename_in,$xslt_filename_out,$property_hashmap);
     75
     76    $self->{'xslt_file'} = $xslt_file_in;
     77    $self->{'xslt_filename'} = $xslt_filename_out;
     78
    6779    return bless $self, $class;
    6880}
    6981
    7082
     83sub property_lookup
     84{
     85    my ($hashmap,$value) = @_;
     86   
     87    print STDERR "*** checking value = '$value'\n";
     88
     89    print STDERR "*** lookup = ", $hashmap->{$value}, "\n";
     90
     91    my $lookup = (defined $hashmap->{$value}) ? $hashmap->{$value} : "\@$value\@";
     92
     93    return $lookup;
     94}
     95
     96
     97# Performs a text file copy, substituding substings of the form
     98# @xxx@ in the input file with the values set in hashmap
     99# passed in
     100
     101sub file_copy_with_property_sub
     102{
     103    my ($filename_in,$filename_out,$property_hashmap) = @_;
     104
     105    if (!open(FIN, "<$filename_in")) {
     106    print STDERR "util::file_substitute_at_properteis failed to open $filename_in\n  $!\n";
     107    return;
     108    }
     109    binmode(FIN,":utf8");
     110
     111    if (!open(FOUT, ">$filename_out")) {
     112    print STDERR "util::file_substitute_at_properteis failed to open $filename_out\n  $!\n";
     113    return;
     114    }
     115    binmode(FOUT,":utf8");
     116
     117    my $line;
     118    while (defined($line = <FIN>)) {
     119   
     120    $line =~ s/\@([^@ ]+)\@/&property_lookup($property_hashmap,$1)/ige;
     121
     122    print FOUT $line;
     123    }
     124
     125    close(FIN);
     126    close(FOUT);       
     127}
    71128
    72129
     
    88145
    89146    my $cmd = "| java -cp \"$java_class_path\" org.nzdl.gsdl.ApplyXSLT -t \"$xslt_file\" ";
     147
    90148
    91149    if (defined $self->{'mapping_file'} and $self->{'mapping_file'} ne ""){
     
    94152    }
    95153   
    96     print STDERR "*** cmd = $cmd\n";
    97 
    98154    open(*XMLWRITER, $cmd)
    99155    or die "can't open pipe to xslt: $!";
    100 
    101156   
    102157    $self->{'xslt_writer'} = *XMLWRITER;
     
    133188
    134189    my $tmp_dir = $self->{'tmp_dir'};
    135     my $tmp_doc_filename = &FileUtils::filenameConcatenate($tmp_dir,"doc-$doc_oid.xml");
    136     $tmp_doc_filename    = &util::makeFilenameJavaCygwinCompatible($tmp_doc_filename);
     190    my $tmp_doc_filename = &FileUtils::filenameConcatenate($tmp_dir,"doc-$doc_oid.ttl");
     191    my $tmp_doc_filename_cc    = &util::makeFilenameJavaCygwinCompatible($tmp_doc_filename);
    137192
    138193    my $xslt_filename = $self->{'xslt_filename'};
    139     $self->open_xslt_pipe($tmp_doc_filename, $xslt_filename); # stops with error if not able to open pipe
     194    $self->open_xslt_pipe($tmp_doc_filename_cc, $xslt_filename); # stops with error if not able to open pipe
    140195
    141196    my $outhandler = $self->{'xslt_writer'};
     
    147202    $self->close_xslt_pipe();
    148203
    149 
    150204    # now feed to generated file to jena's (TDB) tripple store
    151205
    152 
    153 }
    154 
    155 
    156 sub texteditMG {
    157     my $self = shift (@_);
    158     my ($doc_obj) = @_;
    159     my $handle = $self->{'output_handle'};
    160    
    161     my $doc_oid = $doc_obj->get_OID();
    162 
    163 
    164     my $doc_section = 0; # just for this document
    165 
    166 
    167     my $text = "";
    168     my $text_extra = "";
    169 
    170     # get the text for this document
    171     my $section = $doc_obj->get_top_section();
    172     while (defined $section) {
    173     # update a few statistics
    174     $doc_section++;
    175 
    176     my $title = $doc_obj->get_metadata_element($section, "Title");
    177 
    178     if (defined $title && ($title =~ m/\S/)) {
    179         print "$doc_oid: Title = $title\n";
    180     }
    181 
    182     my $dc_title = $doc_obj->get_metadata_element($section, "dc.Title");
    183 
    184     if (defined $dc_title && ($dc_title =~ m/\S/)) {
    185         print "$doc_oid: dc.Title = $dc_title\n";
    186     }
    187 
    188 
    189     my $id3_title = $doc_obj->get_metadata_element($section, "ex.ID3.Title");
    190 
    191     if (defined $id3_title && ($id3_title =~ m/\S/)) {
    192         print "$doc_oid: id3.Title = $id3_title\n";
     206    my $outhandle = $self->{'outhandle'};
     207    print $outhandle "  Inserting tripples for $doc_oid\n";
     208
     209    my $collection = $self->{'collection'};
     210
     211    if (-f $tmp_doc_filename) {
     212
     213    my $cmd = "gs-triplestore-add $collection \"$tmp_doc_filename\"";
     214   
     215    my $status = system($cmd);
     216    if ($status != 0) {
     217        print STDERR "Error: failed to run:\n  $cmd\n$!\n";
    193218    }
    194219   
    195     $section = $doc_obj->get_next_section($section);
    196     }
    197 
    198     print $handle "$text$text_extra";
    199 }
    200 
    201 
    202 
    203 sub texteditADB {
    204     my $self = shift (@_);
    205     my ($doc_obj,$file,$mode) = @_;
    206 
    207     # Code written on the assumption that that jenaTDB does a replace
    208     # operation when presented with a docid that already extis.
    209     # => don't need to do anything special to distinguish between
    210     #    a mode of "add" and "update"
    211 
    212     my $outhandle = $self->{'outhandle'};
    213 
    214 
    215     my $source_dir = $self->{'source_dir'}; # typically the archives dir
    216     my $build_dir  = $self->{'build_dir'};
    217 
    218     # full path to adb database
    219     my $adb_filename
    220     = &util::filename_cat($build_dir, "jenaTDB", "lsh-features.adb");
    221 
    222     # get doc id
    223     my $doc_oid = $doc_obj->get_OID();
    224 
    225     # map to assoc dir
    226     my $top_section = $doc_obj->get_top_section();
    227     my $assoc_file
    228     = $doc_obj->get_metadata_element ($top_section,"assocfilepath");
    229     my $assoc_filename = &util::filename_cat($source_dir,$assoc_file);
    230 
    231     my $chr12_filename = &util::filename_cat($assoc_filename,"doc.chr12");
    232     my $powerlog_filename = &util::filename_cat($assoc_filename,"doc.power");
    233 
    234     print $outhandle "  Inserting tripples for $doc_oid\n";
    235 
    236 #    my $cmd = "jenaTDB -d \"$adb_filename\" -I -k \"$doc_oid\" -f \"$chr12_filename\" -w \"$powerlog_filename\"";
    237 
    238 #    my $status = system($cmd);
    239 #    if ($status != 0) {
    240 #   print STDERR "Error: failed to run:\n  $cmd\n$!\n";
    241 #    }
    242 
    243 }
     220    unlink $tmp_doc_filename;
     221    }
     222    else {
     223    print STDERR "*** Failed to generate: $tmp_doc_filename\n";
     224    }
     225
     226}
     227
    244228
    245229sub text {
Note: See TracChangeset for help on using the changeset viewer.