Changeset 36372


Ignore:
Timestamp:
2022-08-16T19:38:05+12:00 (21 months ago)
Author:
kjdon
Message:

tidy up of extrametautil, renaming some methods to make them easier to understand, removing anything unused. then modifying plugins to use new methods. Also, moved some common code to MetadataRead function, can call this from several plugins instead of duplicating code. This is an interim commit, where I have left in the old code to make it easier to track changes. Next commit will have everything tidied up.

Location:
main/trunk/greenstone2/perllib
Files:
9 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/extrametautil.pm

    r29819 r36372  
    3434
    3535
    36 #******************* ADD: add extrametakey, add extrametadata *********************#
     36# extrametakeys - an array that contains all the filenames for which we have
     37# some added metadata
     38#---------------------------------
    3739sub addmetakey {
    3840    my ($extrametakeys, $filename_re_for_metadata) = @_;
     
    4042}
    4143
     44# extrametafiles - a hash on filenames (the files to which metadata will be
     45# associated), containing a hash of file->fullfilename for all the metadata
     46# files that metadata was obtained from
     47#-------------------------------------------
     48
     49sub addmetafile {
     50    my ($extrametafile, $filename_re_for_metadata, $file, $filename_full_path) = @_;
     51    if (!defined $extrametafile->{$filename_re_for_metadata}) {
     52    $extrametafile->{$filename_re_for_metadata} = {};
     53    }
     54    $extrametafile->{$filename_re_for_metadata}->{$file} = $filename_full_path;
     55}
     56
     57# was called setmetafile
     58sub setmetafilehash {
     59    my ($extrametafile, $filename_re_for_metadata, $file_hash) = @_;
     60    $extrametafile->{$filename_re_for_metadata} = $file_hash;
     61}
     62 
     63# was called getmetafile
     64sub getmetafilehash {
     65    my ($extrametafile, $filename_re_for_metadata) = @_;
     66    return $extrametafile->{$filename_re_for_metadata};
     67}
     68
     69# extrametadata - a hash on filenames (the files to which metadata will be
     70# associated), containing a hash of name->[value array] pairs of metadata
     71#----------------------------------------
     72
     73# set the metadata hash for a particular file
     74sub setmetadata {
     75    my ($extrametadata, $filename_re_for_metadata, $meta_hash) = @_;
     76    $extrametadata->{$filename_re_for_metadata} = $meta_hash;
     77}
     78
     79# gets the metadata hash for a particular file
     80sub getmetadata {
     81    my ($extrametadata, $filename_re_for_metadata) = @_;
     82    return $extrametadata->{$filename_re_for_metadata};
     83}
     84
     85# add a single value to a specific metadata field
     86sub addmetadatum {
     87    my ($extrametadata, $filename_re_for_metadata, $field_name, $value) = @_;
     88    if (!defined  $extrametadata->{$filename_re_for_metadata}->{$field_name}) {
     89    $extrametadata->{$filename_re_for_metadata}->{$field_name} = [];
     90    }
     91    my $metaname_vals = $extrametadata->{$filename_re_for_metadata}->{$field_name};
     92    push(@$metaname_vals, $value);
     93}
     94
     95# get a specific value for a particular metadata field
     96# e.g. $extrametadata->{$filename_re_for_metadata}->{"dc.Identifier"}->[0]
     97sub getmetadatum_by_index {
     98    my ($extrametadata, $filename_re_for_metadata, $metaname, $index) = @_;
     99    return $extrametadata->{$filename_re_for_metadata}->{$metaname}->[$index];
     100}
     101
     102#----------------------------------------------
     103#### unused below here
     104
    42105# Unused. Added for symmetry
    43 sub addmetadata {
     106sub addmetadata_UNUSED {
    44107    my ($extrametadata, $filename_re_for_metadata, $value) = @_;   
    45108    my $metanames = $extrametadata->{$filename_re_for_metadata};
     
    48111
    49112# Unused. Added for symmetry
    50 sub addmetafile {
     113sub addmetafile_UNUSED {
    51114    my ($extrametafile, $filename_re_for_metadata, $file) = @_;
    52115    my $metafiles = $extrametafile->{$filename_re_for_metadata};
     
    54117}
    55118
    56 sub addmetadata_for_named_metaname { # e.g. push(@{$extrametadata->{$filename_re_for_metadata}->{$field_name}}, $value);
     119sub addmetadata_for_named_metaname_OLD { # e.g. push(@{$extrametadata->{$filename_re_for_metadata}->{$field_name}}, $value);
    57120    my ($extrametadata, $filename_re_for_metadata, $field_name, $value) = @_;
    58121    my $metaname_vals = $extrametadata->{$filename_re_for_metadata}->{$field_name};
     
    61124
    62125# Unused. Added for symmetry
    63 sub addmetafile_for_named_file {   
     126sub addmetafile_for_named_file_UNUSED {
    64127    my ($extrametafile, $filename_re_for_metadata, $file, $filename_full_path) = @_;
    65128    my $metafile_vals = $extrametafile->{$filename_re_for_metadata}->{$file};
     
    73136
    74137#******************* GET methods
    75 sub getmetadata {
    76     my ($extrametadata, $filename_re_for_metadata) = @_;
    77     return $extrametadata->{$filename_re_for_metadata};
    78 }
    79138
    80 sub getmetafile {
    81     my ($extrametafile, $filename_re_for_metadata) = @_;
    82     return $extrametafile->{$filename_re_for_metadata};
    83 }
    84139
    85 sub getmetadata_for_named_metaname {
     140sub getmetadata_for_named_metaname_OLD {
    86141    my ($extrametadata, $filename_re_for_metadata, $field_name) = @_;
    87142    return $extrametadata->{$filename_re_for_metadata}->{$field_name}; # e.g. $extrametadata->{$filename_re_for_metadata}->{$field_name}
     
    89144
    90145# Unused. Added for symmetry
    91 sub getmetadata_for_named_file {
     146sub getmetafile_for_named_file_UNUSED {
    92147    my ($extrametafile, $filename_re_for_metadata, $file) = @_;
    93148    return $extrametafile->{$filename_re_for_metadata}->{$file};
    94149}
    95150
    96 sub getmetadata_for_named_pos {
    97     my ($extrametadata, $filename_re_for_metadata, $metaname, $index) = @_;
    98     return $extrametadata->{$filename_re_for_metadata}->{$metaname}->[$index]; # e.g. $extrametadata->{$filename_re_for_metadata}->{"dc.Identifier"}->[0]
    99 }
    100151
    101152
    102153#******************* SET methods
    103 sub setmetadata {
    104     my ($extrametadata, $filename_re_for_metadata, $value) = @_;
    105     $extrametadata->{$filename_re_for_metadata} = $value;
     154
     155# set an individual name-value pair in the metadata hash for a particular file
     156sub setmetadata_for_named_metaname_OLD {
     157    my ($extrametadata, $filename_re_for_metadata, $field_name, $value) = @_;
     158    $extrametadata->{$filename_re_for_metadata}->{$field_name} = $value;
    106159}
    107 
    108 sub setmetafile { # e.g. $extrametafile{$filename_re_for_metadata} = $file;
    109     my ($extrametafile, $filename_re_for_metadata, $file) = @_;
     160sub setmetafile_UNUSED { # e.g. $extrametafile{$filename_re_for_metadata} = $file;
     161    my ($extrametafile, $filename_re_for_metadata, $file, $filename_full_path) = @_;
    110162    $extrametafile->{$filename_re_for_metadata} = $file;
    111163}
    112164
    113 sub setmetadata_for_named_metaname {
    114     my ($extrametadata, $filename_re_for_metadata, $field_name, $value) = @_;
    115     $extrametadata->{$filename_re_for_metadata}->{$field_name} = $value;
    116 }
    117 
    118 sub setmetafile_for_named_file {
     165sub setmetafile_for_named_file_OLD {
    119166    my ($extrametafile, $filename_re_for_metadata, $file, $filename_full_path) = @_;
    120167    $extrametafile->{$filename_re_for_metadata}->{$file} = $filename_full_path;
     
    122169
    123170# Unused. Added for symmetry
    124 sub setmetadata_for_named_pos {
     171sub setmetadata_for_named_pos_UNUSED {
    125172    my ($extrametadata, $filename_re_for_metadata, $metaname, $index, $value) = @_;
    126173    $extrametadata->{$filename_re_for_metadata}->{$metaname}->[$index] =  $value;
  • main/trunk/greenstone2/perllib/plugins/DirectoryPlugin.pm

    r33721 r36372  
    419419        &extrametautil::addmetakey(\@extrametakeys, $extrakeys_re);
    420420        &extrametautil::setmetadata(\%extrametadata, $extrakeys_re, $extrakeys_md);
    421         &extrametautil::setmetafile(\%extrametafile, $extrakeys_re, $extrakeys_mf);
     421        &extrametautil::setmetafilehash(\%extrametafile, $extrakeys_re, $extrakeys_mf);
    422422        }
    423423        delete($self->{'subdir_extrametakeys'}->{$local_dirname});
    424424    }
    425425    }
     426
    426427    # apply metadata pass for each of the files in the directory -- ignore
    427428    # maxdocs here
     
    430431    my $subfile = $dir[$i];
    431432    next if ($subfile =~ m/^\.\.?$/);
    432 
    433433    my $this_file_base_dir = $base_dir;
    434434    my $raw_subfile = &unicode::url_encoded_to_raw_filename($subfile);
     
    460460        # a subdir was specified
    461461        my $md = &extrametautil::getmetadata(\%extrametadata, $ek);
    462         my $mf = &extrametautil::getmetafile(\%extrametafile, $ek);
     462        my $mf = &extrametautil::getmetafilehash(\%extrametafile, $ek);
    463463
    464464        my $subdir_extrametakeys = $self->{'subdir_extrametakeys'};
     
    579579            if ($verbosity > 2);
    580580            my $mdref = &extrametautil::getmetadata(\%extrametadata, $filespec);
    581             my $mfref = &extrametautil::getmetafile(\%extrametafile, $filespec);
     581            my $mfref = &extrametautil::getmetafilehash(\%extrametafile, $filespec);
    582582
    583583            # Add the list files where the metadata came from
  • main/trunk/greenstone2/perllib/plugins/EmbeddedMetadataPlugin.pm

    r34921 r36372  
    2929
    3030use BaseImporter;
     31use MetadataRead;
    3132use extrametautil;
    3233use util;
     
    4142sub BEGIN
    4243{
    43     @EmbeddedMetadataPlugin::ISA = ('BaseImporter');
     44    @EmbeddedMetadataPlugin::ISA = ('MetadataRead', 'BaseImporter');
    4445    binmode(STDERR, ":utf8");
    4546}
     
    441442    #print STDERR "file = $file " . &unicode::debug_unicode_string($file);
    442443    $file = &util::raw_filename_to_unicode(&util::filename_head($filename), $file);
     444
     445    $self->store_meta_in_extrametadata($file, $exif_metadata, undef, undef, $extrametakeys, $extrametadata, $extrametafile);
     446
     447    if (0) {
    443448    #print STDERR "$file ". &unicode::debug_unicode_string($file);
    444449    $file = &util::filepath_to_url_format($file);
     
    465470    &extrametautil::addmetakey($extrametakeys, $file);
    466471    }
    467 
     472    }
    468473}
    469474
  • main/trunk/greenstone2/perllib/plugins/LOMPlugin.pm

    r36297 r36372  
    171171    }
    172172   
     173    if (defined $lom_srcdoc) {
     174    $self->store_meta_in_extrametadata($filename_re, $self->{'saved_metadata'}, $file, $filename_full_path, $extrametakeys, $extrametadata, $extrametafile);
     175    } else {
     176    $self->store_meta_in_extrametadata($filename_for_metadata, $self->{'saved_metadata'}, undef, undef, $extrametakeys, $extrametadata, $extrametafile);
     177    }
     178    $self->{'lom_srcdoc'} = undef; # reset for next file to be processed
    173179    # Indexing into the extrameta data structures requires the filename's style of slashes to be in URL format
    174180    # Then need to convert the filename to a regex, no longer to protect windows directory chars \, but for
    175181    # protecting special characters like brackets in the filepath such as "C:\Program Files (x86)\Greenstone".
     182    if (0) {
    176183    $file_re = &util::filepath_to_url_format($file_re);
    177184    $file_re = &util::filename_to_regex($file_re);
     
    188195    &extrametautil::setmetafile_for_named_file($extrametafile, $file_re, $file, $filename_full_path);
    189196    }
    190    
     197    }
    191198    return 1;
    192199}
  • main/trunk/greenstone2/perllib/plugins/MetadataCSVPlugin.pm

    r34249 r36372  
    270270
    271271    # Associate the metadata now
     272    $self->store_meta_in_extrametadata($csv_line_filename, \%csv_line_metadata, $file, $filename_full_path, $extrametakeys, $extrametadata, $extrametafile);
    272273    # Indexing into the extrameta data structures requires the filename's style of slashes to be in URL format
    273274    # Then need to convert the filename to a regex, no longer to protect windows directory chars \, but for
    274275    # protecting special characters like brackets in the filepath such as "C:\Program Files (x86)\Greenstone".
     276
     277    if (0) {
    275278    $csv_line_filename = &util::filepath_to_url_format($csv_line_filename);
    276279    $csv_line_filename = &util::filename_to_regex($csv_line_filename);
     
    297300    # maps the file to full path
    298301    &extrametautil::setmetafile_for_named_file($extrametafile, $csv_line_filename, $file, $filename_full_path);
     302    }
    299303    }
    300304}
  • main/trunk/greenstone2/perllib/plugins/MetadataRead.pm

    r31492 r36372  
    6161    push(@{$hashArgOptLists->{"OptList"}},$options);
    6262
    63     # Like PrintInfo, MetadataRead has no superclass,
    64     # so $self is intialised to an empty array.
    65     my $self = {};
     63    # Like PrintInfo, MetadataRead has no superclass,
     64    # so $self is intialised to an empty array.
     65    my $self = {};
    6666    return bless $self, $class;
    6767
     
    8080}
    8181
     82# filename_for_metadata is the name of the file to attach metadata to. $new_metadata is a hash of all the metadata. file is the metadata file, filename_full_path is full path to metadata file
     83sub store_meta_in_extrametadata
     84{
     85     my $self = shift(@_);
     86     
     87     my ($filename_for_metadata, $new_metadata, $file, $filename_full_path,
     88     $extrametakeys, $extrametadata, $extrametafile) = @_;
     89
     90     # Extrametadata keys should be regular expressions
     91     # Indexing into the extrameta data structures requires the filename's style of slashes to be in URL format
     92     # Then need to convert the filename to a regex, no longer to protect windows directory chars \, but for
     93     # protecting special characters like brackets in the filepath such as "C:\Program Files (x86)\Greenstone".
     94     $filename_for_metadata = &util::filepath_to_url_format($filename_for_metadata);
     95    $filename_for_metadata = &util::filename_to_regex($filename_for_metadata);
     96
     97    # Check that we haven't already got some metadata
     98    if (defined &extrametautil::getmetadata($extrametadata, $filename_for_metadata)) {
     99    print STDERR "\n****  MetadataRead: Need to merge new metadata with existing stored metadata: file = $filename_for_metadata\n" if $self->{'verbosity'} > 3;
     100
     101    my $file_metadata_table = &extrametautil::getmetadata($extrametadata, $filename_for_metadata);
     102
     103    foreach my $metaname (keys %{$new_metadata}) {
     104        # will create new entry if one does not already exist
     105        push(@{$file_metadata_table->{$metaname}}, @{$new_metadata->{$metaname}});     
     106    }
     107
     108    } else {
     109    &extrametautil::setmetadata($extrametadata, $filename_for_metadata, $new_metadata);
     110    &extrametautil::addmetakey($extrametakeys, $filename_for_metadata);
     111    }
     112
     113    #if ($srcdoc_exists) { 
     114#   if (!defined &extrametautil::getmetafile($extrametafile, $filename_for_metadata)) {
     115#       &extrametautil::setmetafile($extrametafile, $filename_for_metadata, {});
     116#   }
     117     if (defined $file && defined $filename_for_metadata) {
     118     #maps the file to full path
     119    &extrametautil::addmetafile($extrametafile, $filename_for_metadata, $file, $filename_full_path);
     120     }
     121
     122
     123#    }
     124}
    82125
    831261;
  • main/trunk/greenstone2/perllib/plugins/MetadataXMLPlugin.pm

    r32159 r36372  
    221221
    222222    if (($ENV{'GSDLOS'} =~ m/^windows$/) && ($^O ne "cygwin")) {
    223         # convert to full name - paths stored in block hash are long filenames
     223    # convert to full name - paths stored in block hash are long filenames
    224224    $filename_full_path = &util::upgrade_if_dos_filename($filename_full_path);
    225     my $lower_drive = $filename_full_path;
    226     $lower_drive =~ s/^([A-Z]):/\l$1:/i;
    227    
    228     my $upper_drive = $filename_full_path;
    229     $upper_drive =~ s/^([A-Z]):/\u$1:/i;
    230    
    231     $block_hash->{'metadata_files'}->{$lower_drive} = 1;
    232     $block_hash->{'metadata_files'}->{$upper_drive} = 1;
     225    }
     226# kjdon - upgrade method converts everyhting to lower case drive letter.
     227# so would we need the following stuff???
     228#   my $lower_drive = $filename_full_path;
     229#   $lower_drive =~ s/^([A-Z]):/\l$1:/i;
     230   
     231#   my $upper_drive = $filename_full_path;
     232#   $upper_drive =~ s/^([A-Z]):/\u$1:/i;
     233   
     234#   $block_hash->{'metadata_files'}->{$lower_drive} = 1;
     235#   $block_hash->{'metadata_files'}->{$upper_drive} = 1;
    233236       
    234     }
    235     else {
     237#    }
     238#    else {
    236239    $block_hash->{'metadata_files'}->{$filename_full_path} = 1;
    237     }
     240 #   }
    238241
    239242    return 1;
     
    366369        my $filename = $self->{'metadata-filename'};
    367370
    368         if (!defined &extrametautil::getmetafile($self->{'metafileref'}, $target)) {
    369             &extrametautil::setmetafile($self->{'metafileref'}, $target, {});
    370         }
    371 
    372         &extrametautil::setmetafile_for_named_file($self->{'metafileref'}, $target, $file, $filename);
     371#       if (!defined &extrametautil::getmetafile($self->{'metafileref'}, $target)) {
     372#           &extrametautil::setmetafile($self->{'metafileref'}, $target, {});
     373#       }
     374
     375#       &extrametautil::setmetafile_for_named_file($self->{'metafileref'}, $target, $file, $filename);
     376        &extrametautil::addmetafile($self->{'metafileref'}, $target, $file, $filename);
    373377    }
    374378    }
  • main/trunk/greenstone2/perllib/plugins/OAIMetadataXMLPlugin.pm

    r24951 r36372  
    126126  {
    127127    # Don't harvest file sets that don't have dc.Identifier set, "dc.Identifier" is usde as the key between Greenstone and OAI Server!
    128     my $dc_identifier = &extrametautil::getmetadata_for_named_pos($extrametadata, $one_file, "dc.Identifier", 0);
     128    #my $dc_identifier = &extrametautil::getmetadata_for_named_pos($extrametadata, $one_file, "dc.Identifier", 0);
     129      my $dc_identifier = &extrametautil::getmetadatum_by_index($extrametadata, $one_file, "dc.Identifier", 0);
    129130    next if (!defined($dc_identifier) || $dc_identifier eq "");
    130131
     
    199200      }   
    200201     
    201       &extrametautil::setmetadata_for_named_metaname($extrametadata, $one_file, $field_name, []) if (!defined (&extrametautil::getmetadata_for_named_metaname($extrametadata, $one_file, $field_name)));
    202       &extrametautil::addmetadata_for_named_metaname($extrametadata, $one_file, $field_name, $value);
     202      #&extrametautil::setmetadata_for_named_metaname($extrametadata, $one_file, $field_name, []) if (!defined (&extrametautil::getmetadata_for_named_metaname($extrametadata, $one_file, $field_name)));
     203      #&extrametautil::addmetadata_for_named_metaname($extrametadata, $one_file, $field_name, $value);
     204      &extrametautil::addmetadatum($extrametadata, $one_file, $field_name, $value);
    203205    }
    204206    #======================================================================#
  • main/trunk/greenstone2/perllib/plugins/OAIPlugin.pm

    r31492 r36372  
    264264    # Directory plug will pass it back in at read time, so we don't need to extract it again.
    265265   
     266    # Store the metadata for later in extrameta. if we have a srcdoc, then treat this file as a metadata file, and pass it in to the store_meta method.
     267# If there is no srcdoc, then this is the actual doc, so we don't want it treated as a metadata file. - pass in undef.
     268    if ($srcdoc_exists) {
     269    $self->store_meta_in_extrametadata($filename_for_metadata, $new_metadata, $file, $filename_full_path, $extrametakeys, $extrametadata, $extrametafile);
     270    } else {
     271    $self->store_meta_in_extrametadata($filename_for_metadata, $new_metadata, undef, undef, $extrametakeys, $extrametadata, $extrametafile);
     272    }
    266273    # Extrametadata keys should be regular expressions
    267274    # Indexing into the extrameta data structures requires the filename's style of slashes to be in URL format
    268275    # Then need to convert the filename to a regex, no longer to protect windows directory chars \, but for
    269276    # protecting special characters like brackets in the filepath such as "C:\Program Files (x86)\Greenstone".
     277
     278    if (0) {
    270279    $filename_for_metadata = &util::filepath_to_url_format($filename_for_metadata);
    271280    $filename_for_metadata = &util::filename_to_regex($filename_for_metadata);
     
    288297
    289298    if ($srcdoc_exists) {   
    290     if (!defined &extrametautil::getmetafile($extrametafile, $filename_for_metadata)) {
    291         &extrametautil::setmetafile($extrametafile, $filename_for_metadata, {});
    292     }
     299#   if (!defined &extrametautil::getmetafile($extrametafile, $filename_for_metadata)) {
     300#       &extrametautil::setmetafile($extrametafile, $filename_for_metadata, {});
     301#   }
    293302     #maps the file to full path
    294     &extrametautil::setmetafile_for_named_file($extrametafile, $filename_for_metadata, $file, $filename_full_path);
     303#   &extrametautil::setmetafile_for_named_file($extrametafile, $filename_for_metadata, $file, $filename_full_path);
     304    &extrametautil::addmetafile($extrametafile, $filename_for_metadata, $file, $filename_full_path);
    295305   
    296306    }
     307    } # if 0
     308
    297309    return 1;
    298310   
Note: See TracChangeset for help on using the changeset viewer.