Changeset 19494


Ignore:
Timestamp:
2009-05-18T10:58:18+12:00 (15 years ago)
Author:
davidb
Message:

Supporting routines that exploit the new 'metafiles' structures, introduction to track which metadata.xml file a piece of metadata came from

Location:
gsdl/trunk/perllib
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/doc.pm

    r18561 r19494  
    999999}
    10001000
     1001
     1002sub metadata_file {
     1003    my $self = shift (@_);
     1004    my ($real_filename, $filename) = @_;
     1005   
     1006    push (@{$self->{'metadata_files'}},
     1007      [$real_filename, $filename]);
     1008}
     1009
     1010sub get_meta_files {
     1011    my $self = shift (@_);
     1012
     1013    return $self->{'metadata_files'};
     1014}
     1015
    10011016sub delete_section_assoc_files {
    10021017    my $self = shift (@_);
  • gsdl/trunk/perllib/plugouts/BasePlugout.pm

    r19180 r19494  
    493493        print $outhandle "Writing associated files to $doc_dir\n";
    494494    $self->process_assoc_files ($doc_obj, $doc_dir);
     495
     496    # look up 'gsdlmetafile' metadata and store that information
     497    # explicitly in $doc_obj
     498    $self->process_metafiles_metadata ($doc_obj);
    495499    }
    496500
     
    643647
    644648
    645 sub archiveinf_gdbm
     649sub process_metafiles_metadata
    646650{
    647651    my $self = shift (@_);
    648652    my ($doc_obj) = @_;
    649653
    650     my $verbosity = $self->{'verbosity'};
    651 
    652     my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
    653     if (defined $collect_dir) {
    654     my $dirsep_regexp = &util::get_os_dirsep();
    655 
    656     if ($collect_dir !~ /$dirsep_regexp$/) {
    657         # ensure there is a slash at the end
    658         $collect_dir .= &util::get_dirsep();
    659     }
    660     }
    661 
    662     my $oid = $doc_obj->get_OID();
    663     my $source_filename = $doc_obj->get_source_filename();
    664 
    665     my $working_info = $self->{'output_info'};
    666     my $doc_info = $working_info->get_info($oid);
    667     my ($doc_file,$index_status) = @$doc_info;
    668 
    669     my $oid_files = { 'doc-file' => $doc_file,
    670               'index-status' => $index_status,
    671               'src-file' => $source_filename,
    672               'assoc-files' => [] };
    673    
    674     my %reverse_lookups = ( $source_filename => "1" );
    675 
    676     foreach my $assoc_file_rec (@{$doc_obj->get_assoc_files()}) {
    677     my $real_filename = $assoc_file_rec->[0];
    678     my $full_afile = $assoc_file_rec->[1];
     654    my $top_section = $doc_obj->get_top_section();
     655    my $metafiles = $doc_obj->get_metadata($top_section,"gsdlmetafile");
     656
     657    foreach my $metafile_pair (@$metafiles) {
     658    my ($full_metafile,$metafile) = split(/:/,$metafile_pair);
     659
     660    $doc_obj->metadata_file($full_metafile,$metafile);
     661    }
     662
     663    $doc_obj->delete_metadata($top_section,"gsdlmetafile");
     664}
     665
     666sub archiveinf_files_to_field
     667{
     668    my $self = shift(@_);
     669    my ($files,$field,$collect_dir,$oid_files,$reverse_lookups) = @_;
     670
     671    foreach my $file_rec (@$files) {
     672    my $real_filename = $file_rec->[0];
     673    my $full_file = $file_rec->[1];
    679674
    680675    # for some reasons the image associate file has / before the full path
     
    690685        }
    691686
    692         $reverse_lookups{$real_filename} = 1;
    693 
    694         push(@{$oid_files->{'assoc-files'}},$full_afile);
    695 
     687        $reverse_lookups->{$real_filename} = 1;
     688
     689        push(@{$oid_files->{$field}},$full_file);
    696690    }
    697691    else {
     
    699693    }
    700694    }
     695}
     696
     697sub archiveinf_gdbm
     698{
     699    my $self = shift (@_);
     700    my ($doc_obj) = @_;
     701
     702    my $verbosity = $self->{'verbosity'};
     703
     704    my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
     705    if (defined $collect_dir) {
     706    my $dirsep_regexp = &util::get_os_dirsep();
     707
     708    if ($collect_dir !~ /$dirsep_regexp$/) {
     709        # ensure there is a slash at the end
     710        $collect_dir .= &util::get_dirsep();
     711    }
     712    }
     713
     714    my $oid = $doc_obj->get_OID();
     715    my $source_filename = $doc_obj->get_source_filename();
     716
     717    my $working_info = $self->{'output_info'};
     718    my $doc_info = $working_info->get_info($oid);
     719    my ($doc_file,$index_status) = @$doc_info;
     720
     721    my $oid_files = { 'doc-file' => $doc_file,
     722              'index-status' => $index_status,
     723              'src-file' => $source_filename,
     724              'assoc-files' => [],
     725              'meta-files'  => [] };
     726   
     727    my $reverse_lookups = { $source_filename => "1" };
     728
     729
     730    $self->archiveinf_files_to_field($doc_obj->get_assoc_files(),"assoc-files",
     731                     $collect_dir,$oid_files,$reverse_lookups);
     732
     733#    foreach my $assoc_file_rec (@{$doc_obj->get_assoc_files()}) {
     734#   my $real_filename = $assoc_file_rec->[0];
     735#   my $full_afile = $assoc_file_rec->[1];
     736#
     737#   # for some reasons the image associate file has / before the full path
     738#   $real_filename =~ s/^\\(.*)/$1/i;
     739#   if (-e $real_filename) {
     740#
     741#       if (defined $collect_dir) {
     742#       my $collect_dir_re_safe = $collect_dir;
     743#       $collect_dir_re_safe =~ s/\\/\\\\/g;
     744#       $collect_dir_re_safe =~ s/\./\\./g;
     745#
     746#       $real_filename =~ s/^$collect_dir_re_safe//;
     747#       }
     748#
     749#       $reverse_lookups->{$real_filename} = 1;
     750#
     751#       push(@{$oid_files->{'assoc-files'}},$full_afile);
     752#   }
     753#   else {
     754#       print STDERR "Warning: archiveinf_gdbm()\n  $real_filename does not appear to be on the file system\n";
     755#   }
     756#    }
     757
     758    $self->archiveinf_files_to_field($doc_obj->get_meta_files(),"meta-files",
     759                     $collect_dir,$oid_files,$reverse_lookups);
     760
    701761
    702762    # better not to commit to a particular db implementation, but
     
    712772    $doc_db_text .= "<index-status>$oid_files->{'index-status'}\n";
    713773    $doc_db_text .= "<src-file>$oid_files->{'src-file'}\n";
     774
    714775    foreach my $af (@{$oid_files->{'assoc-files'}}) {
    715776    $doc_db_text .= "<assoc-file>$af\n";
    716777    }
     778
     779    foreach my $mf (@{$oid_files->{'meta-files'}}) {
     780    $doc_db_text .= "<meta-file>$mf\n";
     781    }
     782
    717783    chomp($doc_db_text); # remove trailing \n
    718784
     
    720786    &GDBMUtils::gdbmDatabaseSet($doc_db,$oid,$doc_db_text);
    721787
    722     foreach my $rl (keys %reverse_lookups) {
     788    foreach my $rl (keys %$reverse_lookups) {
    723789    &GDBMUtils::gdbmDatabaseAppend($src_db,$rl,"<oid>$oid\n");
    724790    }
  • gsdl/trunk/perllib/plugouts/GreenstoneXMLPlugout.pm

    r17743 r19494  
    7777    $self->process_assoc_files ($doc_obj, $doc_dir, '');
    7878   
     79    $self->process_metafiles_metadata ($doc_obj);
     80
    7981    my $output_file = util::filename_cat ($working_dir, "doc.xml");
    8082   
Note: See TracChangeset for help on using the changeset viewer.