Changeset 24404


Ignore:
Timestamp:
2011-08-12T19:35:50+12:00 (13 years ago)
Author:
ak19
Message:

Changes to perl code to do with removing the ex. prefix: ex. is only removed if it is the sole prefix (i.e. ex.dc.* prefixes are not removed).

Location:
main/trunk/greenstone2
Files:
13 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/bin/script/explode_metadata_database.pl

    r22317 r24404  
    411411    foreach my $pair (@$record_metadata) {
    412412        my ($field, $value) = (@$pair);
    413         $field =~ s/^ex\.//; # remove ex. if present
     413        $field =~ s/^ex\.([^.]+)$/$1/; #remove any ex. iff it's the only metadata set prefix (will leave ex.dc.* intact)
    414414        $value =~ s/\\\\/\\/g;
    415415        my $document_file_full;
     
    509509    next if ($field =~ /\^all$/);  # ISISPlug specific!
    510510
    511     # strip off any ex.
    512     $field =~ s/^ex\.//;
     511    $field =~ s/^ex\.([^.]+)$/$1/; #remove any ex. iff it's the only metadata set prefix (will leave ex.dc.* intact)
    513512
    514513    # Square brackets in metadata values need to be escaped so they don't confuse Greenstone/GLI
  • main/trunk/greenstone2/perllib/basebuildproc.pm

    r23387 r24404  
    244244    my $self = shift (@_);
    245245    my ($lang_meta, $langarr) = @_;
    246     $lang_meta =~ s/^ex\.//; # strip ex. if there
     246    $lang_meta =~ s/^ex\.([^.]+)$/$1/; # strip any ex. namespace iff it's the only namespace prefix (will leave ex.dc.* intact)
     247
    247248    $self->{'lang_meta'} = $lang_meta;
    248249    $self->{'langarr'} = $langarr;
     
    512513        $value =~ s/\n/\\n/g;
    513514        $value =~ s/\r/\\r/g;
    514         # remove ex. if there
    515         $field =~ s/^ex\.//;
     515        # remove any ex. iff it's the only namespace prefix (will leave ex.dc.* intact)
     516        $field =~ s/^ex\.([^.]+)$/$1/; # $field =~ s/^ex\.//;
    516517
    517518        # special case for UTF8URL metadata
     
    696697        }
    697698        else {
    698         $field =~ s/^ex\.//; #strip ex. if present
     699        $field =~ s/^ex\.([^.]+)$/$1/; # remove any ex. iff it's the only namespace prefix (will leave ex.dc.* intact)
    699700        @metadata_values = @{$doc_obj->get_metadata($doc_obj->get_top_section(), $field)};
    700701        }
  • main/trunk/greenstone2/perllib/classify/BaseClassifier.pm

    r24193 r24404  
    383383    return $metadata unless defined $metadata && $metadata =~ /\S/;
    384384
    385     $metadata =~ s/^ex\.//;
    386     $metadata =~ s/([,;:\/])ex\./$1/g;
     385    # only remove ex. metadata prefix if there are no other prefixes after it
     386    $metadata =~ s/(,|;|:|\/)/$1 /g; # insert a space separator so meta names like flex.Image don't become fl.Image
     387    $metadata =~ s/(^| )ex\.([^.,;:\/]+)(,|;|:|\/|$)/$1$2$3/g;
     388                     $metadata =~ s/(,|;|:|\/) /$1/g;
     389
    387390    return $metadata;
    388391}
  • main/trunk/greenstone2/perllib/doc.pm

    r23939 r24404  
    807807    }
    808808
    809     # Remove the namespace if we are being told to ignore them
     809    # Remove any namespace if we are being told to ignore them
    810810    if($ignore_namespace) {
    811     $field =~ s/^\w*\.//;
     811    $field =~ s/^.*\.//; #$field =~ s/^\w*\.//;
    812812    }
    813813
     
    816816    my $data_name = $data->[0];
    817817
    818     # Remove the any namespace if we are being told to ignore them
     818    # Remove any namespace if we are being told to ignore them
    819819    if($ignore_namespace) {
    820         $data_name =~ s/^\w*\.//;
    821     }
    822     $data_name =~ s/^ex\.//; # we always remove ex. - it maybe there in doc_obj, but we will never ask for it.
     820        $data_name =~ s/^.*\.//; #$data_name =~ s/^\w*\.//;
     821    }
     822    # we always remove ex. (but not any subsequent namespace) - ex. maybe there in doc_obj, but we will never ask for it.
     823    $data_name =~ s/^ex\.([^.]+)$/$1/; #$data_name =~ s/^ex\.//;
     824   
    823825    return $data->[1] if (scalar(@$data) >= 2 && $data_name eq $field);
    824826    }
     
    842844    }
    843845
    844     # Remove the any namespace if we are being told to ignore them
     846    # Remove any namespace if we are being told to ignore them
    845847    if($ignore_namespace) {
    846     $field =~ s/^\w*\.//;
     848    $field =~ s/^.*\.//;
    847849    }
    848850
     
    851853
    852854    my $data_name = $data->[0];
    853     # Remove the any namespace if we are being told to ignore them
     855
     856    # Remove any namespace if we are being told to ignore them
    854857    if($ignore_namespace) {
    855         $data_name =~ s/^\w*\.//;
    856     }
    857     $data_name =~ s/^ex\.//; # we always remove ex. - it maybe there in doc_obj, but we will never ask for it.
     858        $data_name =~ s/^.*\.//;
     859    }   
     860    # we always remove ex. (but not any subsequent namespace) - ex. maybe there in doc_obj, but we will never ask for it.
     861    $data_name =~ s/^ex\.([^.]+)$/$1/;
     862
    858863        push (@metadata, $data->[1]) if ($data_name eq $field);
    859864    }
  • main/trunk/greenstone2/perllib/lucenebuildproc.pm

    r23198 r24404  
    226226        }
    227227        else {
    228             $submeta =~ s/^ex\.//; #strip off ex.
     228            $submeta =~ s/^ex\.([^.]+)$/$1/; #strip off ex. iff it's the only metadata set prefix (will leave ex.dc.* intact)
    229229
    230230            # its a metadata element
  • main/trunk/greenstone2/perllib/mgbuilder.pm

    r22820 r24404  
    7777    push (@{$self->{'collect_cfg'}->{'indexes'}}, "dummy:text");   
    7878    }
    79     # remove any ex.
     79    # remove any ex. but only if there are no other metadata prefixes
    8080    my @orig_indexes = @{$self->{'collect_cfg'}->{'indexes'}};
    8181    $self->{'collect_cfg'}->{'indexes'} = [];
    8282    foreach my $index (@orig_indexes) {
    83     $index =~ s/([:,])ex\./$1/g;
     83    #$index =~ s/ex\.([^.,:]+)(,|:|$)/$1$2/g; # doesn't preserve flex.Image, which is turned into fl.Image
     84    $index =~ s/(,|:)/$1 /g;
     85    $index =~ s/(^| )ex\.([^.,:]+)(,|:|$)/$1$2$3/g;
     86    $index =~ s/(,|:) /$1/g;
     87
    8488    push (@{$self->{'collect_cfg'}->{'indexes'}}, $index);
    8589    }
  • main/trunk/greenstone2/perllib/mgbuildproc.pm

    r20419 r24404  
    107107            } else {
    108108            my $first = 1;
    109             $real_field =~ s/^ex\.//; # remove ex namespace if there
     109            $real_field =~ s/^ex\.([^.]+)$/$1/; # remove ex. namespace iff it's the only namespace prefix (will leave ex.dc.* intact)
    110110            my @section_metadata = @{$doc_obj->get_metadata ($section, $real_field)};
    111111            if ($level eq "section" && $section ne $doc_obj->get_top_section() && $self->{'indexing_text'} && defined ($self->{'sections_index_document_metadata'})) {
  • main/trunk/greenstone2/perllib/mgppbuilder.pm

    r22820 r24404  
    101101    if (defined $indexes) {
    102102    $self->{'collect_cfg'}->{'indexes'} = [];
    103     my $single_index = join(';', @$indexes).";";
    104     # remove any ex. from index spec
    105     $single_index =~ s/^ex\.//;
    106     $single_index =~ s/([,;])ex\./$1/g;
     103
     104    # remove any ex. from index spec but iff it is the only namespace in the metadata name
     105    my @indexes_copy = @$indexes; # make a copy, as 'map' changes entry in array
     106    #map { $_ =~ s/(^|,|;)ex\.([^.]+)$/$1$2/; } @indexes_copy; # No. Will replace metanames like flex.Image with fl.Image
     107    map { $_ =~ s/(,|;)/$1 /g; } @indexes_copy; # introduce a space after every separator
     108    map { $_ =~ s/(^| )ex\.([^.,:]+)(,|;|$)/$1$2$3/g; } @indexes_copy; # replace all <ex.> at start of metanames or <, ex.> when in a comma separated list
     109    map { $_ =~ s/(,|:) /$1/g; } @indexes_copy; # remove space introduced after every separator
     110    my $single_index = join(';', @indexes_copy).";";
     111
    107112    push (@{$self->{'collect_cfg'}->{'indexes'}}, $single_index);
    108113    }
  • main/trunk/greenstone2/perllib/mgppbuildproc.pm

    r20419 r24404  
    326326            }
    327327            else {
    328             $submeta =~ s/^ex\.//; #strip off ex.
     328            $submeta =~ s/^ex\.([^.]+)$/$1/; #strip off ex. iff it's the only metadata set prefix (will leave ex.dc.* intact)
    329329            # its a metadata element
    330330            my @section_metadata = @{$doc_obj->get_metadata ($section, $submeta)};
  • main/trunk/greenstone2/perllib/plugins/BasePlugin.pm

    r24403 r24404  
    204204    $self->{'plugin_type'} = $plugin_name;
    205205
    206     # remove ex. from OIDmetadata
    207     $self->{'OIDmetadata'} =~ s/^ex\.// if defined $self->{'OIDmetadata'};
     206    # remove ex. from OIDmetadata iff it's the only namespace prefix
     207    $self->{'OIDmetadata'} =~ s/^ex\.([^.]+)$/$1/ if defined $self->{'OIDmetadata'};
    208208    $self->{'num_processed'} = 0;
    209209    $self->{'num_not_processed'} = 0;
  • main/trunk/greenstone2/perllib/plugins/EmbeddedMetadataPlugin.pm

    r24403 r24404  
    287287
    288288
    289 sub metadata_read()
     289sub metadata_read
    290290{
    291291    my $self = shift (@_);
     
    314314}
    315315
    316 sub process()
     316sub process
    317317{
    318318    # not used
  • main/trunk/greenstone2/perllib/plugins/OAIPlugin.pm

    r24403 r24404  
    9090    return bless $self, $class;
    9191    }
    92     # trim ex. from document field (if there)
    93     $self->{'document_field'} =~ s/^ex\.//;
     92    # trim any ex. from document field iff it's the only metadata namespace prefix   
     93    $self->{'document_field'} =~ s/^ex\.([^.]+)$/$1/;
    9494    return bless $self, $class;
    9595}
     
    531531
    532532        # so that GLI can see this metadata, store here as ex.dc.Title etc
    533         my $ex_metaname = "ex.$metaname";
     533        my $ex_metaname = $metaname;
     534        $ex_metaname =~ s/^ex\.//; # remove any pre-existing ex. prefix
     535        $ex_metaname = "ex.$ex_metaname"; # at last can prefix ex.
    534536
    535537        if (defined $metadata->{$ex_metaname})
  • main/trunk/greenstone2/perllib/plugouts/BasePlugout.pm

    r23939 r24404  
    965965
    966966# returns an XML representation of the dublin core metadata
    967 # if dc meta is not found, try ex mete
     967# if dc meta is not found, try ex meta
     968# This method is not used by the DSpacePlugout, which has its
     969# own method to save its dc metadata
    968970sub get_dc_metadata {
    969971    my $self = shift(@_);
     
    978980
    979981    my $explicit_dc = {};
     982    my $explicit_ex_dc = {};
    980983    my $explicit_ex = {};
    981984
    982985    my $all_text="";
     986   
     987    # We want high quality dc metadata to go in first, so we store all the
     988    # assigned dc.* values first. Then, for all those dc metadata names in
     989    # the official dc set that are as yet unassigned, we look to see whether
     990    # embedded ex.dc.* metadata has defined some values for them. If not,
     991    # then for the same missing dc metadata names, we look in ex metadata.
     992
    983993    foreach my $data (@{$section_ptr->{'metadata'}}){
    984994    my $escaped_value = &docprint::escape_text($data->[1]);
     
    10021012        }
    10031013
    1004     }
    1005     elsif (($data->[0] =~ m/^ex\./) || ($data->[0] !~ m/\./)) {
     1014    } elsif ($data->[0]=~ m/^ex\.dc\./) { # now look through ex.dc.* to fill in as yet unassigned fields in dc metaset
     1015        $data->[0] =~ m/^ex\.dc\.(.*)/;
     1016        my $ex_dc_element = $1;
     1017        my $lc_ex_dc_element = lc($ex_dc_element);
     1018
     1019        # only store the ex.dc value for this dc metaname if no dc.* was assigned for it
     1020        if (defined $dc_set->{$ex_dc_element}) {
     1021        if (!defined $explicit_ex_dc->{$lc_ex_dc_element}) {
     1022            $explicit_ex_dc->{$lc_ex_dc_element} = [];
     1023        }
     1024        push(@{$explicit_ex_dc->{$lc_ex_dc_element}},$escaped_value);
     1025        }
     1026    }
     1027    elsif (($data->[0] =~ m/^ex\./) || ($data->[0] !~ m/\./)) { # look through ex. meta (incl. meta without prefix)
    10061028        $data->[0] =~ m/^(ex\.)?(.*)/;
    1007         my $ex_element =  $2;
     1029        my $ex_element = $2;
    10081030        my $lc_ex_element = lc($ex_element);
    10091031
     
    10231045
    10241046    if (!defined $explicit_dc->{$lc_k}) {
    1025         if (defined $explicit_ex->{$lc_k}) {
    1026 
     1047        # try to find if ex.dc.* defines this dc.* meta,
     1048        # if not, then look for whether there's an ex.* equivalent
     1049
     1050        if (defined $explicit_ex_dc->{$lc_k}) {
     1051        foreach my $v (@{$explicit_ex_dc->{$lc_k}}) {
     1052            my $dc_element    = $lc_k;
     1053            my $escaped_value = $v;
     1054           
     1055            if (defined $version && ($version eq "oai_dc")) {
     1056            $all_text .= "   <dc:$dc_element>$escaped_value</dc:$dc_element>\n";
     1057            }
     1058            else {
     1059            $all_text .= '   <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n";
     1060            }           
     1061        }
     1062        } elsif (defined $explicit_ex->{$lc_k}) {
    10271063        foreach my $v (@{$explicit_ex->{$lc_k}}) {
    10281064            my $dc_element    = $lc_k;
     
    10351071            $all_text .= '   <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n";
    10361072            }
    1037            
    10381073        }
    10391074        }
Note: See TracChangeset for help on using the changeset viewer.