Changeset 38745 for main/trunk


Ignore:
Timestamp:
2024-02-14T16:44:10+13:00 (4 months ago)
Author:
davidb
Message:

Rule mapping code extended to now allow a match or substitute prefix to the rule

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/BaseImporter.pm

    r38743 r38745  
    190190   
    191191    if (($num_csv_entries == 5) || ($num_csv_entries >= 6 && $csv_entries[4] =~ m/\s*#.*$/)) {
     192
    192193        my $src_metadata_name  = $csv_entries[0];
    193         (my $src_metadata_regex = $csv_entries[1]) =~ s/(^\/)|(\/$)//g; #assigns as a /qr (quoted regex) and then strips off leading and trailing slash
    194         (my $dst_metadata_regex = $csv_entries[2]) =~ s/(^\/)|(\/$)//g; #as line above
    195         (my $regex_modifiers    = $csv_entries[3]) =~ s/(^')|('$)//g;   #similar to above but strips out single quotes
     194        my ($regex_type,$src_metadata_regex) = ($csv_entries[1] =~ m/^(s|m)\/(.*)\/$/); # use matching to extract regex_type ('s' or 'm') and src_metadata_regex
     195        (my $dst_metadata_regex = $csv_entries[2]) =~ s/(^\/)|(\/$)//g; # strips off leading and trailing slash
     196        (my $regex_modifiers    = $csv_entries[3]) =~ s/(^')|('$)//g;   # similar to above but strips out single quotes
    196197        my $dst_metadata_name  = $csv_entries[4];
    197198
    198         # If the last entry has a '# comments' after it, needs a bit more string manipulation
    199         # to tidy it up
    200         $dst_metadata_name =~ s/#.*$//;
    201         $dst_metadata_name =~ s/\s*$//;
    202        
    203         my $regex_sub = "s/$src_metadata_regex/$dst_metadata_regex/$regex_modifiers";
    204 
    205         my $metadata_mapping_rule_rec = {
    206         "src_metadata_name" => $src_metadata_name,
    207         "regex_sub"         => $regex_sub,
    208         "dst_metadata_name" => $dst_metadata_name           
    209         };
    210            
    211         push(@$metadata_mapping_rules,$metadata_mapping_rule_rec);
     199        if ((defined $regex_type) && (defined $src_metadata_regex)) {
     200        # If the last entry has a '# comments' after it, needs a bit more string manipulation
     201        # to tidy it up
     202        $dst_metadata_name =~ s/#.*$//;
     203        $dst_metadata_name =~ s/\s*$//;
     204       
     205        my $regex_sub = "s/$src_metadata_regex/$dst_metadata_regex/$regex_modifiers";
     206
     207        my $metadata_mapping_rule_rec = {
     208            "regex_type"        => $regex_type,
     209            "src_metadata_name" => $src_metadata_name,
     210            "regex_sub"         => $regex_sub,
     211            "dst_metadata_name" => $dst_metadata_name           
     212        };
     213
     214        if ($regex_type eq "m") {
     215            my $regex_match = "m/$src_metadata_regex/";
     216            $metadata_mapping_rule_rec->{"regex_match"} = $regex_match;
     217        }
     218       
     219        push(@$metadata_mapping_rules,$metadata_mapping_rule_rec);
     220        }
     221        else {
     222        print STDERR "Warning: syntax error in $metadata_mapping_full_filename, line $line_num\n";
     223        print STDERR "  $csv_line\n";
     224        print STDERR "Did not specify source metadata regular expression entry inside slashes, e.g., s/(-|_)/\n";
     225        if (!defined $regex_type) {
     226            print STDERR "Prefix to source metadata regular expression entry must be either s/ or m/ for substitute or match respectively\n";
     227        }
     228        }
    212229    }
    213230    else {
     
    801818   
    802819    foreach my $metadata_mapping_rule_rec (@$metadata_mapping_rules) {
    803        
     820
     821    my $regex_type = $metadata_mapping_rule_rec->{"regex_type"};
     822    my $regex_match = undef;
     823    if ($regex_type eq "m") {
     824        $regex_match = $metadata_mapping_rule_rec->{"regex_match"};
     825    }
     826   
    804827    my $src_metadata_name = $metadata_mapping_rule_rec->{"src_metadata_name"};
    805828    my $dst_metadata_name = $metadata_mapping_rule_rec->{"dst_metadata_name"};
     
    811834    foreach my $metadata_val (@$metadata_vals) {
    812835
     836        if (defined $regex_match) {
     837        if ($verbosity >= 4) {
     838            print $outhandle "    Applying match guard: '$metadata_val' must match $regex_match ...\n"
     839        }
     840        my $match_guard = 0;
     841
     842        # print STDERR "***** \$match_guard = 1 if \$metadata_val =~ $regex_match\n" ;
     843        eval ( "\$match_guard = 1 if (\$metadata_val =~ $regex_match)" );
     844        if ($@) {
     845            warn "$@";
     846        }
     847        if ($match_guard) {
     848            if ($verbosity >= 4) {
     849            print $outhandle "    ... Matched!\n"
     850            }
     851        }
     852        else {
     853            if ($verbosity >= 4) {
     854            print $outhandle "    ... Did not match\n"
     855            }
     856            next;
     857        }
     858        }
     859       
    813860        my $store_metadata_val = $metadata_val;
     861
    814862       
    815863        if ($verbosity >= 4) {
    816         print $outhandle "    Testing for match with: \$metadata_val =~ $regex_sub\n"
     864        print $outhandle "    Testing '$store_metadata_val' for match with: \$metadata_val =~ $regex_sub\n"
    817865        }
    818866        eval ( "\$metadata_val =~ $regex_sub" );
Note: See TracChangeset for help on using the changeset viewer.