Changeset 24404

Show
Ignore:
Timestamp:
12.08.2011 19:35:50 (8 years ago)
Author:
ak19
Message:

Changes to perl code to do with removing the ex. prefix: ex. is only removed if it is the sole prefix (i.e. ex.dc.* prefixes are not removed).

Location:
main/trunk/greenstone2
Files:
13 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/bin/script/explode_metadata_database.pl

    r22317 r24404  
    411411    foreach my $pair (@$record_metadata) { 
    412412        my ($field, $value) = (@$pair); 
    413         $field =~ s/^ex\.//; # remove ex. if present 
     413        $field =~ s/^ex\.([^.]+)$/$1/; #remove any ex. iff it's the only metadata set prefix (will leave ex.dc.* intact) 
    414414        $value =~ s/\\\\/\\/g; 
    415415        my $document_file_full; 
     
    509509    next if ($field =~ /\^all$/);  # ISISPlug specific! 
    510510 
    511     # strip off any ex. 
    512     $field =~ s/^ex\.//; 
     511    $field =~ s/^ex\.([^.]+)$/$1/; #remove any ex. iff it's the only metadata set prefix (will leave ex.dc.* intact) 
    513512 
    514513    # Square brackets in metadata values need to be escaped so they don't confuse Greenstone/GLI 
  • main/trunk/greenstone2/perllib/basebuildproc.pm

    r23387 r24404  
    244244    my $self = shift (@_); 
    245245    my ($lang_meta, $langarr) = @_; 
    246     $lang_meta =~ s/^ex\.//; # strip ex. if there 
     246    $lang_meta =~ s/^ex\.([^.]+)$/$1/; # strip any ex. namespace iff it's the only namespace prefix (will leave ex.dc.* intact) 
     247 
    247248    $self->{'lang_meta'} = $lang_meta; 
    248249    $self->{'langarr'} = $langarr; 
     
    512513        $value =~ s/\n/\\n/g; 
    513514        $value =~ s/\r/\\r/g; 
    514         # remove ex. if there 
    515         $field =~ s/^ex\.//; 
     515        # remove any ex. iff it's the only namespace prefix (will leave ex.dc.* intact) 
     516        $field =~ s/^ex\.([^.]+)$/$1/; # $field =~ s/^ex\.//;  
    516517 
    517518        # special case for UTF8URL metadata 
     
    696697        } 
    697698        else { 
    698         $field =~ s/^ex\.//; #strip ex. if present 
     699        $field =~ s/^ex\.([^.]+)$/$1/; # remove any ex. iff it's the only namespace prefix (will leave ex.dc.* intact) 
    699700        @metadata_values = @{$doc_obj->get_metadata($doc_obj->get_top_section(), $field)}; 
    700701        } 
  • main/trunk/greenstone2/perllib/classify/BaseClassifier.pm

    r24193 r24404  
    383383    return $metadata unless defined $metadata && $metadata =~ /\S/; 
    384384 
    385     $metadata =~ s/^ex\.//; 
    386     $metadata =~ s/([,;:\/])ex\./$1/g; 
     385    # only remove ex. metadata prefix if there are no other prefixes after it 
     386    $metadata =~ s/(,|;|:|\/)/$1 /g; # insert a space separator so meta names like flex.Image don't become fl.Image 
     387    $metadata =~ s/(^| )ex\.([^.,;:\/]+)(,|;|:|\/|$)/$1$2$3/g;  
     388                     $metadata =~ s/(,|;|:|\/) /$1/g; 
     389 
    387390    return $metadata; 
    388391} 
  • main/trunk/greenstone2/perllib/doc.pm

    r23939 r24404  
    807807    } 
    808808 
    809     # Remove the namespace if we are being told to ignore them 
     809    # Remove any namespace if we are being told to ignore them 
    810810    if($ignore_namespace) { 
    811     $field =~ s/^\w*\.//; 
     811    $field =~ s/^.*\.//; #$field =~ s/^\w*\.//; 
    812812    } 
    813813 
     
    816816    my $data_name = $data->[0]; 
    817817 
    818     # Remove the any namespace if we are being told to ignore them 
     818    # Remove any namespace if we are being told to ignore them 
    819819    if($ignore_namespace) { 
    820         $data_name =~ s/^\w*\.//; 
    821     } 
    822     $data_name =~ s/^ex\.//; # we always remove ex. - it maybe there in doc_obj, but we will never ask for it. 
     820        $data_name =~ s/^.*\.//; #$data_name =~ s/^\w*\.//; 
     821    } 
     822    # we always remove ex. (but not any subsequent namespace) - ex. maybe there in doc_obj, but we will never ask for it. 
     823    $data_name =~ s/^ex\.([^.]+)$/$1/; #$data_name =~ s/^ex\.//;  
     824     
    823825    return $data->[1] if (scalar(@$data) >= 2 && $data_name eq $field); 
    824826    } 
     
    842844    } 
    843845 
    844     # Remove the any namespace if we are being told to ignore them 
     846    # Remove any namespace if we are being told to ignore them 
    845847    if($ignore_namespace) { 
    846     $field =~ s/^\w*\.//; 
     848    $field =~ s/^.*\.//; 
    847849    } 
    848850 
     
    851853 
    852854    my $data_name = $data->[0]; 
    853     # Remove the any namespace if we are being told to ignore them 
     855 
     856    # Remove any namespace if we are being told to ignore them 
    854857    if($ignore_namespace) { 
    855         $data_name =~ s/^\w*\.//; 
    856     } 
    857     $data_name =~ s/^ex\.//; # we always remove ex. - it maybe there in doc_obj, but we will never ask for it. 
     858        $data_name =~ s/^.*\.//; 
     859    }    
     860    # we always remove ex. (but not any subsequent namespace) - ex. maybe there in doc_obj, but we will never ask for it. 
     861    $data_name =~ s/^ex\.([^.]+)$/$1/; 
     862 
    858863        push (@metadata, $data->[1]) if ($data_name eq $field); 
    859864    } 
  • main/trunk/greenstone2/perllib/lucenebuildproc.pm

    r23198 r24404  
    226226        } 
    227227        else { 
    228             $submeta =~ s/^ex\.//; #strip off ex. 
     228            $submeta =~ s/^ex\.([^.]+)$/$1/; #strip off ex. iff it's the only metadata set prefix (will leave ex.dc.* intact) 
    229229 
    230230            # its a metadata element 
  • main/trunk/greenstone2/perllib/mgbuilder.pm

    r22820 r24404  
    7777    push (@{$self->{'collect_cfg'}->{'indexes'}}, "dummy:text");     
    7878    } 
    79     # remove any ex. 
     79    # remove any ex. but only if there are no other metadata prefixes 
    8080    my @orig_indexes = @{$self->{'collect_cfg'}->{'indexes'}}; 
    8181    $self->{'collect_cfg'}->{'indexes'} = []; 
    8282    foreach my $index (@orig_indexes) { 
    83     $index =~ s/([:,])ex\./$1/g; 
     83    #$index =~ s/ex\.([^.,:]+)(,|:|$)/$1$2/g; # doesn't preserve flex.Image, which is turned into fl.Image  
     84    $index =~ s/(,|:)/$1 /g; 
     85    $index =~ s/(^| )ex\.([^.,:]+)(,|:|$)/$1$2$3/g; 
     86    $index =~ s/(,|:) /$1/g; 
     87 
    8488    push (@{$self->{'collect_cfg'}->{'indexes'}}, $index); 
    8589    } 
  • main/trunk/greenstone2/perllib/mgbuildproc.pm

    r20419 r24404  
    107107            } else { 
    108108            my $first = 1; 
    109             $real_field =~ s/^ex\.//; # remove ex namespace if there 
     109            $real_field =~ s/^ex\.([^.]+)$/$1/; # remove ex. namespace iff it's the only namespace prefix (will leave ex.dc.* intact) 
    110110            my @section_metadata = @{$doc_obj->get_metadata ($section, $real_field)}; 
    111111            if ($level eq "section" && $section ne $doc_obj->get_top_section() && $self->{'indexing_text'} && defined ($self->{'sections_index_document_metadata'})) { 
  • main/trunk/greenstone2/perllib/mgppbuilder.pm

    r22820 r24404  
    101101    if (defined $indexes) { 
    102102    $self->{'collect_cfg'}->{'indexes'} = []; 
    103     my $single_index = join(';', @$indexes).";"; 
    104     # remove any ex. from index spec 
    105     $single_index =~ s/^ex\.//; 
    106     $single_index =~ s/([,;])ex\./$1/g; 
     103 
     104    # remove any ex. from index spec but iff it is the only namespace in the metadata name 
     105    my @indexes_copy = @$indexes; # make a copy, as 'map' changes entry in array 
     106    #map { $_ =~ s/(^|,|;)ex\.([^.]+)$/$1$2/; } @indexes_copy; # No. Will replace metanames like flex.Image with fl.Image 
     107    map { $_ =~ s/(,|;)/$1 /g; } @indexes_copy; # introduce a space after every separator 
     108    map { $_ =~ s/(^| )ex\.([^.,:]+)(,|;|$)/$1$2$3/g; } @indexes_copy; # replace all <ex.> at start of metanames or <, ex.> when in a comma separated list 
     109    map { $_ =~ s/(,|:) /$1/g; } @indexes_copy; # remove space introduced after every separator 
     110    my $single_index = join(';', @indexes_copy).";"; 
     111 
    107112    push (@{$self->{'collect_cfg'}->{'indexes'}}, $single_index); 
    108113    } 
  • main/trunk/greenstone2/perllib/mgppbuildproc.pm

    r20419 r24404  
    326326            } 
    327327            else { 
    328             $submeta =~ s/^ex\.//; #strip off ex. 
     328            $submeta =~ s/^ex\.([^.]+)$/$1/; #strip off ex. iff it's the only metadata set prefix (will leave ex.dc.* intact) 
    329329            # its a metadata element 
    330330            my @section_metadata = @{$doc_obj->get_metadata ($section, $submeta)}; 
  • main/trunk/greenstone2/perllib/plugins/BasePlugin.pm

    r24403 r24404  
    204204    $self->{'plugin_type'} = $plugin_name; 
    205205 
    206     # remove ex. from OIDmetadata 
    207     $self->{'OIDmetadata'} =~ s/^ex\.// if defined $self->{'OIDmetadata'}; 
     206    # remove ex. from OIDmetadata iff it's the only namespace prefix 
     207    $self->{'OIDmetadata'} =~ s/^ex\.([^.]+)$/$1/ if defined $self->{'OIDmetadata'}; 
    208208    $self->{'num_processed'} = 0; 
    209209    $self->{'num_not_processed'} = 0; 
  • main/trunk/greenstone2/perllib/plugins/EmbeddedMetadataPlugin.pm

    r24403 r24404  
    287287 
    288288 
    289 sub metadata_read() 
     289sub metadata_read 
    290290{ 
    291291    my $self = shift (@_); 
     
    314314} 
    315315 
    316 sub process() 
     316sub process 
    317317{ 
    318318    # not used 
  • main/trunk/greenstone2/perllib/plugins/OAIPlugin.pm

    r24403 r24404  
    9090    return bless $self, $class; 
    9191    } 
    92     # trim ex. from document field (if there) 
    93     $self->{'document_field'} =~ s/^ex\.//; 
     92    # trim any ex. from document field iff it's the only metadata namespace prefix     
     93    $self->{'document_field'} =~ s/^ex\.([^.]+)$/$1/; 
    9494    return bless $self, $class; 
    9595} 
     
    531531 
    532532        # so that GLI can see this metadata, store here as ex.dc.Title etc 
    533         my $ex_metaname = "ex.$metaname"; 
     533        my $ex_metaname = $metaname; 
     534        $ex_metaname =~ s/^ex\.//; # remove any pre-existing ex. prefix 
     535        $ex_metaname = "ex.$ex_metaname"; # at last can prefix ex. 
    534536 
    535537        if (defined $metadata->{$ex_metaname}) 
  • main/trunk/greenstone2/perllib/plugouts/BasePlugout.pm

    r23939 r24404  
    965965 
    966966# returns an XML representation of the dublin core metadata 
    967 # if dc meta is not found, try ex mete 
     967# if dc meta is not found, try ex meta 
     968# This method is not used by the DSpacePlugout, which has its 
     969# own method to save its dc metadata 
    968970sub get_dc_metadata { 
    969971    my $self = shift(@_); 
     
    978980 
    979981    my $explicit_dc = {}; 
     982    my $explicit_ex_dc = {}; 
    980983    my $explicit_ex = {}; 
    981984 
    982985    my $all_text=""; 
     986     
     987    # We want high quality dc metadata to go in first, so we store all the 
     988    # assigned dc.* values first. Then, for all those dc metadata names in 
     989    # the official dc set that are as yet unassigned, we look to see whether 
     990    # embedded ex.dc.* metadata has defined some values for them. If not, 
     991    # then for the same missing dc metadata names, we look in ex metadata. 
     992 
    983993    foreach my $data (@{$section_ptr->{'metadata'}}){ 
    984994    my $escaped_value = &docprint::escape_text($data->[1]); 
     
    10021012        } 
    10031013 
    1004     }  
    1005     elsif (($data->[0] =~ m/^ex\./) || ($data->[0] !~ m/\./)) { 
     1014    } elsif ($data->[0]=~ m/^ex\.dc\./) { # now look through ex.dc.* to fill in as yet unassigned fields in dc metaset 
     1015        $data->[0] =~ m/^ex\.dc\.(.*)/; 
     1016        my $ex_dc_element = $1; 
     1017        my $lc_ex_dc_element = lc($ex_dc_element); 
     1018 
     1019        # only store the ex.dc value for this dc metaname if no dc.* was assigned for it 
     1020        if (defined $dc_set->{$ex_dc_element}) {  
     1021        if (!defined $explicit_ex_dc->{$lc_ex_dc_element}) { 
     1022            $explicit_ex_dc->{$lc_ex_dc_element} = []; 
     1023        } 
     1024        push(@{$explicit_ex_dc->{$lc_ex_dc_element}},$escaped_value); 
     1025        } 
     1026    } 
     1027    elsif (($data->[0] =~ m/^ex\./) || ($data->[0] !~ m/\./)) { # look through ex. meta (incl. meta without prefix) 
    10061028        $data->[0] =~ m/^(ex\.)?(.*)/; 
    1007         my $ex_element =  $2; 
     1029        my $ex_element = $2; 
    10081030        my $lc_ex_element = lc($ex_element); 
    10091031 
     
    10231045 
    10241046    if (!defined $explicit_dc->{$lc_k}) { 
    1025         if (defined $explicit_ex->{$lc_k}) { 
    1026  
     1047        # try to find if ex.dc.* defines this dc.* meta, 
     1048        # if not, then look for whether there's an ex.* equivalent 
     1049 
     1050        if (defined $explicit_ex_dc->{$lc_k}) { 
     1051        foreach my $v (@{$explicit_ex_dc->{$lc_k}}) { 
     1052            my $dc_element    = $lc_k; 
     1053            my $escaped_value = $v; 
     1054             
     1055            if (defined $version && ($version eq "oai_dc")) { 
     1056            $all_text .= "   <dc:$dc_element>$escaped_value</dc:$dc_element>\n"; 
     1057            } 
     1058            else { 
     1059            $all_text .= '   <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n"; 
     1060            }            
     1061        } 
     1062        } elsif (defined $explicit_ex->{$lc_k}) { 
    10271063        foreach my $v (@{$explicit_ex->{$lc_k}}) { 
    10281064            my $dc_element    = $lc_k; 
     
    10351071            $all_text .= '   <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n"; 
    10361072            } 
    1037              
    10381073        } 
    10391074        }