Changeset 28468 for gs2-extensions

Show
Ignore:
Timestamp:
16.10.2013 17:50:17 (6 years ago)
Author:
davidb
Message:

Further development of this Perl module.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • gs2-extensions/apache-jena/trunk/src/perllib/jenaTDBBuildproc.pm

    r28410 r28468  
    4949    my $self = new extrabuildproc (@_); 
    5050 
    51     my $xslt_file = "gsdom2rdf.xsl"; 
    52  
    53     my $xslt_filename = &util::locate_config_file($xslt_file); 
    54     if (!defined $xslt_filename) { 
    55     print STDERR "Can not find $xslt_file, please make sure you have supplied the correct file path\n"; 
    56     die "\n"; 
    57     } 
    58  
    59     $self->{'xslt_file'} = $xslt_file; 
    60     $self->{'xslt_filename'} = $xslt_filename; 
    61  
    6251    # Do the following here so it doesn't keep checking (within the util.pm method) 
    6352    # whether it needs to create the directory or not 
     
    6554    $self->{'tmp_dir'} = $tmp_dir; 
    6655 
     56 
     57    my $xslt_file_in = "gsdom2rdf.xsl"; 
     58 
     59    my $xslt_filename_in = &util::locate_config_file($xslt_file_in); 
     60    if (!defined $xslt_filename_in) { 
     61    print STDERR "Can not find $xslt_file_in, please make sure you have supplied the correct file path\n"; 
     62    die "\n"; 
     63    } 
     64 
     65    my $xslt_filename_out = &FileUtils::filenameConcatenate($tmp_dir,$xslt_file_in); 
     66 
     67    my $collection = $self->{'collection'}; 
     68 
     69    my $url_prefix = &util::get_full_greenstone_url_prefix(); 
     70 
     71    my $property_hashmap = { 'libraryurl' => $url_prefix, 
     72                 'collect'    => $collection }; 
     73 
     74    file_copy_with_property_sub($xslt_filename_in,$xslt_filename_out,$property_hashmap); 
     75 
     76    $self->{'xslt_file'} = $xslt_file_in; 
     77    $self->{'xslt_filename'} = $xslt_filename_out; 
     78 
    6779    return bless $self, $class; 
    6880} 
    6981 
    7082 
     83sub property_lookup 
     84{ 
     85    my ($hashmap,$value) = @_; 
     86     
     87    print STDERR "*** checking value = '$value'\n"; 
     88 
     89    print STDERR "*** lookup = ", $hashmap->{$value}, "\n"; 
     90 
     91    my $lookup = (defined $hashmap->{$value}) ? $hashmap->{$value} : "\@$value\@"; 
     92 
     93    return $lookup; 
     94} 
     95 
     96 
     97# Performs a text file copy, substituding substings of the form 
     98# @xxx@ in the input file with the values set in hashmap 
     99# passed in  
     100 
     101sub file_copy_with_property_sub 
     102{ 
     103    my ($filename_in,$filename_out,$property_hashmap) = @_; 
     104 
     105    if (!open(FIN, "<$filename_in")) { 
     106    print STDERR "util::file_substitute_at_properteis failed to open $filename_in\n  $!\n"; 
     107    return; 
     108    } 
     109    binmode(FIN,":utf8"); 
     110 
     111    if (!open(FOUT, ">$filename_out")) { 
     112    print STDERR "util::file_substitute_at_properteis failed to open $filename_out\n  $!\n"; 
     113    return; 
     114    } 
     115    binmode(FOUT,":utf8"); 
     116 
     117    my $line; 
     118    while (defined($line = <FIN>)) { 
     119     
     120    $line =~ s/\@([^@ ]+)\@/&property_lookup($property_hashmap,$1)/ige; 
     121 
     122    print FOUT $line; 
     123    } 
     124 
     125    close(FIN); 
     126    close(FOUT);         
     127} 
    71128 
    72129 
     
    88145 
    89146    my $cmd = "| java -cp \"$java_class_path\" org.nzdl.gsdl.ApplyXSLT -t \"$xslt_file\" ";  
     147 
    90148 
    91149    if (defined $self->{'mapping_file'} and $self->{'mapping_file'} ne ""){ 
     
    94152    } 
    95153     
    96     print STDERR "*** cmd = $cmd\n"; 
    97  
    98154    open(*XMLWRITER, $cmd) 
    99155    or die "can't open pipe to xslt: $!"; 
    100  
    101156     
    102157    $self->{'xslt_writer'} = *XMLWRITER; 
     
    133188 
    134189    my $tmp_dir = $self->{'tmp_dir'}; 
    135     my $tmp_doc_filename = &FileUtils::filenameConcatenate($tmp_dir,"doc-$doc_oid.xml"); 
    136     $tmp_doc_filename    = &util::makeFilenameJavaCygwinCompatible($tmp_doc_filename); 
     190    my $tmp_doc_filename = &FileUtils::filenameConcatenate($tmp_dir,"doc-$doc_oid.ttl"); 
     191    my $tmp_doc_filename_cc    = &util::makeFilenameJavaCygwinCompatible($tmp_doc_filename); 
    137192 
    138193    my $xslt_filename = $self->{'xslt_filename'}; 
    139     $self->open_xslt_pipe($tmp_doc_filename, $xslt_filename); # stops with error if not able to open pipe 
     194    $self->open_xslt_pipe($tmp_doc_filename_cc, $xslt_filename); # stops with error if not able to open pipe 
    140195 
    141196    my $outhandler = $self->{'xslt_writer'}; 
     
    147202    $self->close_xslt_pipe(); 
    148203 
    149  
    150204    # now feed to generated file to jena's (TDB) tripple store 
    151205 
    152  
    153 } 
    154  
    155  
    156 sub texteditMG { 
    157     my $self = shift (@_); 
    158     my ($doc_obj) = @_; 
    159     my $handle = $self->{'output_handle'}; 
    160      
    161     my $doc_oid = $doc_obj->get_OID(); 
    162  
    163  
    164     my $doc_section = 0; # just for this document 
    165  
    166  
    167     my $text = ""; 
    168     my $text_extra = ""; 
    169  
    170     # get the text for this document 
    171     my $section = $doc_obj->get_top_section(); 
    172     while (defined $section) { 
    173     # update a few statistics 
    174     $doc_section++; 
    175  
    176     my $title = $doc_obj->get_metadata_element($section, "Title"); 
    177  
    178     if (defined $title && ($title =~ m/\S/)) { 
    179         print "$doc_oid: Title = $title\n"; 
    180     } 
    181  
    182     my $dc_title = $doc_obj->get_metadata_element($section, "dc.Title"); 
    183  
    184     if (defined $dc_title && ($dc_title =~ m/\S/)) { 
    185         print "$doc_oid: dc.Title = $dc_title\n"; 
    186     } 
    187  
    188  
    189     my $id3_title = $doc_obj->get_metadata_element($section, "ex.ID3.Title"); 
    190  
    191     if (defined $id3_title && ($id3_title =~ m/\S/)) { 
    192         print "$doc_oid: id3.Title = $id3_title\n"; 
     206    my $outhandle = $self->{'outhandle'}; 
     207    print $outhandle "  Inserting tripples for $doc_oid\n"; 
     208 
     209    my $collection = $self->{'collection'}; 
     210 
     211    if (-f $tmp_doc_filename) { 
     212 
     213    my $cmd = "gs-triplestore-add $collection \"$tmp_doc_filename\""; 
     214     
     215    my $status = system($cmd); 
     216    if ($status != 0) { 
     217        print STDERR "Error: failed to run:\n  $cmd\n$!\n"; 
    193218    } 
    194219     
    195     $section = $doc_obj->get_next_section($section); 
    196     } 
    197  
    198     print $handle "$text$text_extra"; 
    199 } 
    200  
    201  
    202  
    203 sub texteditADB { 
    204     my $self = shift (@_); 
    205     my ($doc_obj,$file,$mode) = @_; 
    206  
    207     # Code written on the assumption that that jenaTDB does a replace 
    208     # operation when presented with a docid that already extis. 
    209     # => don't need to do anything special to distinguish between 
    210     #    a mode of "add" and "update" 
    211  
    212     my $outhandle = $self->{'outhandle'}; 
    213  
    214  
    215     my $source_dir = $self->{'source_dir'}; # typically the archives dir 
    216     my $build_dir  = $self->{'build_dir'}; 
    217  
    218     # full path to adb database 
    219     my $adb_filename  
    220     = &util::filename_cat($build_dir, "jenaTDB", "lsh-features.adb"); 
    221  
    222     # get doc id 
    223     my $doc_oid = $doc_obj->get_OID(); 
    224  
    225     # map to assoc dir 
    226     my $top_section = $doc_obj->get_top_section(); 
    227     my $assoc_file 
    228     = $doc_obj->get_metadata_element ($top_section,"assocfilepath"); 
    229     my $assoc_filename = &util::filename_cat($source_dir,$assoc_file); 
    230  
    231     my $chr12_filename = &util::filename_cat($assoc_filename,"doc.chr12"); 
    232     my $powerlog_filename = &util::filename_cat($assoc_filename,"doc.power"); 
    233  
    234     print $outhandle "  Inserting tripples for $doc_oid\n"; 
    235  
    236 #    my $cmd = "jenaTDB -d \"$adb_filename\" -I -k \"$doc_oid\" -f \"$chr12_filename\" -w \"$powerlog_filename\""; 
    237  
    238 #    my $status = system($cmd); 
    239 #    if ($status != 0) { 
    240 #   print STDERR "Error: failed to run:\n  $cmd\n$!\n"; 
    241 #    } 
    242  
    243 } 
     220    unlink $tmp_doc_filename; 
     221    } 
     222    else { 
     223    print STDERR "*** Failed to generate: $tmp_doc_filename\n"; 
     224    } 
     225 
     226} 
     227 
    244228 
    245229sub text {