Changeset 19494

Show
Ignore:
Timestamp:
18.05.2009 10:58:18 (10 years ago)
Author:
davidb
Message:

Supporting routines that exploit the new 'metafiles' structures, introduction to track which metadata.xml file a piece of metadata came from

Location:
gsdl/trunk/perllib
Files:
3 modified

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/doc.pm

    r18561 r19494  
    999999} 
    10001000 
     1001 
     1002sub metadata_file { 
     1003    my $self = shift (@_); 
     1004    my ($real_filename, $filename) = @_; 
     1005     
     1006    push (@{$self->{'metadata_files'}},  
     1007      [$real_filename, $filename]); 
     1008} 
     1009 
     1010sub get_meta_files { 
     1011    my $self = shift (@_); 
     1012 
     1013    return $self->{'metadata_files'}; 
     1014} 
     1015 
    10011016sub delete_section_assoc_files { 
    10021017    my $self = shift (@_); 
  • gsdl/trunk/perllib/plugouts/BasePlugout.pm

    r19180 r19494  
    493493        print $outhandle "Writing associated files to $doc_dir\n"; 
    494494    $self->process_assoc_files ($doc_obj, $doc_dir); 
     495 
     496    # look up 'gsdlmetafile' metadata and store that information 
     497    # explicitly in $doc_obj 
     498    $self->process_metafiles_metadata ($doc_obj); 
    495499    } 
    496500 
     
    643647 
    644648 
    645 sub archiveinf_gdbm 
     649sub process_metafiles_metadata  
    646650{ 
    647651    my $self = shift (@_); 
    648652    my ($doc_obj) = @_; 
    649653 
    650     my $verbosity = $self->{'verbosity'}; 
    651  
    652     my $collect_dir = $ENV{'GSDLCOLLECTDIR'}; 
    653     if (defined $collect_dir) { 
    654     my $dirsep_regexp = &util::get_os_dirsep(); 
    655  
    656     if ($collect_dir !~ /$dirsep_regexp$/) { 
    657         # ensure there is a slash at the end 
    658         $collect_dir .= &util::get_dirsep();  
    659     } 
    660     } 
    661  
    662     my $oid = $doc_obj->get_OID(); 
    663     my $source_filename = $doc_obj->get_source_filename(); 
    664  
    665     my $working_info = $self->{'output_info'};  
    666     my $doc_info = $working_info->get_info($oid); 
    667     my ($doc_file,$index_status) = @$doc_info; 
    668  
    669     my $oid_files = { 'doc-file' => $doc_file, 
    670               'index-status' => $index_status, 
    671               'src-file' => $source_filename, 
    672               'assoc-files' => [] }; 
    673      
    674     my %reverse_lookups = ( $source_filename => "1" ); 
    675  
    676     foreach my $assoc_file_rec (@{$doc_obj->get_assoc_files()}) { 
    677     my $real_filename = $assoc_file_rec->[0]; 
    678     my $full_afile = $assoc_file_rec->[1]; 
     654    my $top_section = $doc_obj->get_top_section(); 
     655    my $metafiles = $doc_obj->get_metadata($top_section,"gsdlmetafile"); 
     656 
     657    foreach my $metafile_pair (@$metafiles) { 
     658    my ($full_metafile,$metafile) = split(/:/,$metafile_pair); 
     659 
     660    $doc_obj->metadata_file($full_metafile,$metafile); 
     661    } 
     662 
     663    $doc_obj->delete_metadata($top_section,"gsdlmetafile"); 
     664} 
     665 
     666sub archiveinf_files_to_field 
     667{ 
     668    my $self = shift(@_); 
     669    my ($files,$field,$collect_dir,$oid_files,$reverse_lookups) = @_; 
     670 
     671    foreach my $file_rec (@$files) { 
     672    my $real_filename = $file_rec->[0]; 
     673    my $full_file = $file_rec->[1]; 
    679674 
    680675    # for some reasons the image associate file has / before the full path 
     
    690685        } 
    691686 
    692         $reverse_lookups{$real_filename} = 1; 
    693  
    694         push(@{$oid_files->{'assoc-files'}},$full_afile); 
    695  
     687        $reverse_lookups->{$real_filename} = 1; 
     688 
     689        push(@{$oid_files->{$field}},$full_file); 
    696690    } 
    697691    else { 
     
    699693    } 
    700694    } 
     695} 
     696 
     697sub archiveinf_gdbm 
     698{ 
     699    my $self = shift (@_); 
     700    my ($doc_obj) = @_; 
     701 
     702    my $verbosity = $self->{'verbosity'}; 
     703 
     704    my $collect_dir = $ENV{'GSDLCOLLECTDIR'}; 
     705    if (defined $collect_dir) { 
     706    my $dirsep_regexp = &util::get_os_dirsep(); 
     707 
     708    if ($collect_dir !~ /$dirsep_regexp$/) { 
     709        # ensure there is a slash at the end 
     710        $collect_dir .= &util::get_dirsep();  
     711    } 
     712    } 
     713 
     714    my $oid = $doc_obj->get_OID(); 
     715    my $source_filename = $doc_obj->get_source_filename(); 
     716 
     717    my $working_info = $self->{'output_info'};  
     718    my $doc_info = $working_info->get_info($oid); 
     719    my ($doc_file,$index_status) = @$doc_info; 
     720 
     721    my $oid_files = { 'doc-file' => $doc_file, 
     722              'index-status' => $index_status, 
     723              'src-file' => $source_filename, 
     724              'assoc-files' => [], 
     725              'meta-files'  => [] }; 
     726     
     727    my $reverse_lookups = { $source_filename => "1" }; 
     728 
     729 
     730    $self->archiveinf_files_to_field($doc_obj->get_assoc_files(),"assoc-files", 
     731                     $collect_dir,$oid_files,$reverse_lookups); 
     732 
     733#    foreach my $assoc_file_rec (@{$doc_obj->get_assoc_files()}) { 
     734#   my $real_filename = $assoc_file_rec->[0]; 
     735#   my $full_afile = $assoc_file_rec->[1]; 
     736# 
     737#   # for some reasons the image associate file has / before the full path 
     738#   $real_filename =~ s/^\\(.*)/$1/i; 
     739#   if (-e $real_filename) { 
     740# 
     741#       if (defined $collect_dir) { 
     742#       my $collect_dir_re_safe = $collect_dir; 
     743#       $collect_dir_re_safe =~ s/\\/\\\\/g; 
     744#       $collect_dir_re_safe =~ s/\./\\./g; 
     745# 
     746#       $real_filename =~ s/^$collect_dir_re_safe//; 
     747#       } 
     748# 
     749#       $reverse_lookups->{$real_filename} = 1; 
     750# 
     751#       push(@{$oid_files->{'assoc-files'}},$full_afile); 
     752#   } 
     753#   else { 
     754#       print STDERR "Warning: archiveinf_gdbm()\n  $real_filename does not appear to be on the file system\n"; 
     755#   } 
     756#    } 
     757 
     758    $self->archiveinf_files_to_field($doc_obj->get_meta_files(),"meta-files", 
     759                     $collect_dir,$oid_files,$reverse_lookups); 
     760 
    701761 
    702762    # better not to commit to a particular db implementation, but 
     
    712772    $doc_db_text .= "<index-status>$oid_files->{'index-status'}\n"; 
    713773    $doc_db_text .= "<src-file>$oid_files->{'src-file'}\n"; 
     774 
    714775    foreach my $af (@{$oid_files->{'assoc-files'}}) { 
    715776    $doc_db_text .= "<assoc-file>$af\n"; 
    716777    } 
     778 
     779    foreach my $mf (@{$oid_files->{'meta-files'}}) { 
     780    $doc_db_text .= "<meta-file>$mf\n"; 
     781    } 
     782 
    717783    chomp($doc_db_text); # remove trailing \n 
    718784 
     
    720786    &GDBMUtils::gdbmDatabaseSet($doc_db,$oid,$doc_db_text); 
    721787 
    722     foreach my $rl (keys %reverse_lookups) { 
     788    foreach my $rl (keys %$reverse_lookups) { 
    723789    &GDBMUtils::gdbmDatabaseAppend($src_db,$rl,"<oid>$oid\n"); 
    724790    } 
  • gsdl/trunk/perllib/plugouts/GreenstoneXMLPlugout.pm

    r17743 r19494  
    7777    $self->process_assoc_files ($doc_obj, $doc_dir, ''); 
    7878     
     79    $self->process_metafiles_metadata ($doc_obj); 
     80 
    7981    my $output_file = util::filename_cat ($working_dir, "doc.xml"); 
    8082