Changeset 24404 for main/trunk/greenstone2
- Timestamp:
- 2011-08-12T19:35:50+12:00 (13 years ago)
- Location:
- main/trunk/greenstone2
- Files:
-
- 13 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/bin/script/explode_metadata_database.pl
r22317 r24404 411 411 foreach my $pair (@$record_metadata) { 412 412 my ($field, $value) = (@$pair); 413 $field =~ s/^ex\. //; # remove ex. if present413 $field =~ s/^ex\.([^.]+)$/$1/; #remove any ex. iff it's the only metadata set prefix (will leave ex.dc.* intact) 414 414 $value =~ s/\\\\/\\/g; 415 415 my $document_file_full; … … 509 509 next if ($field =~ /\^all$/); # ISISPlug specific! 510 510 511 # strip off any ex. 512 $field =~ s/^ex\.//; 511 $field =~ s/^ex\.([^.]+)$/$1/; #remove any ex. iff it's the only metadata set prefix (will leave ex.dc.* intact) 513 512 514 513 # Square brackets in metadata values need to be escaped so they don't confuse Greenstone/GLI -
main/trunk/greenstone2/perllib/basebuildproc.pm
r23387 r24404 244 244 my $self = shift (@_); 245 245 my ($lang_meta, $langarr) = @_; 246 $lang_meta =~ s/^ex\.//; # strip ex. if there 246 $lang_meta =~ s/^ex\.([^.]+)$/$1/; # strip any ex. namespace iff it's the only namespace prefix (will leave ex.dc.* intact) 247 247 248 $self->{'lang_meta'} = $lang_meta; 248 249 $self->{'langarr'} = $langarr; … … 512 513 $value =~ s/\n/\\n/g; 513 514 $value =~ s/\r/\\r/g; 514 # remove ex. if there515 $field =~ s/^ex\. //;515 # remove any ex. iff it's the only namespace prefix (will leave ex.dc.* intact) 516 $field =~ s/^ex\.([^.]+)$/$1/; # $field =~ s/^ex\.//; 516 517 517 518 # special case for UTF8URL metadata … … 696 697 } 697 698 else { 698 $field =~ s/^ex\. //; #strip ex. if present699 $field =~ s/^ex\.([^.]+)$/$1/; # remove any ex. iff it's the only namespace prefix (will leave ex.dc.* intact) 699 700 @metadata_values = @{$doc_obj->get_metadata($doc_obj->get_top_section(), $field)}; 700 701 } -
main/trunk/greenstone2/perllib/classify/BaseClassifier.pm
r24193 r24404 383 383 return $metadata unless defined $metadata && $metadata =~ /\S/; 384 384 385 $metadata =~ s/^ex\.//; 386 $metadata =~ s/([,;:\/])ex\./$1/g; 385 # only remove ex. metadata prefix if there are no other prefixes after it 386 $metadata =~ s/(,|;|:|\/)/$1 /g; # insert a space separator so meta names like flex.Image don't become fl.Image 387 $metadata =~ s/(^| )ex\.([^.,;:\/]+)(,|;|:|\/|$)/$1$2$3/g; 388 $metadata =~ s/(,|;|:|\/) /$1/g; 389 387 390 return $metadata; 388 391 } -
main/trunk/greenstone2/perllib/doc.pm
r23939 r24404 807 807 } 808 808 809 # Remove thenamespace if we are being told to ignore them809 # Remove any namespace if we are being told to ignore them 810 810 if($ignore_namespace) { 811 $field =~ s/^ \w*\.//;811 $field =~ s/^.*\.//; #$field =~ s/^\w*\.//; 812 812 } 813 813 … … 816 816 my $data_name = $data->[0]; 817 817 818 # Remove theany namespace if we are being told to ignore them818 # Remove any namespace if we are being told to ignore them 819 819 if($ignore_namespace) { 820 $data_name =~ s/^\w*\.//; 821 } 822 $data_name =~ s/^ex\.//; # we always remove ex. - it maybe there in doc_obj, but we will never ask for it. 820 $data_name =~ s/^.*\.//; #$data_name =~ s/^\w*\.//; 821 } 822 # we always remove ex. (but not any subsequent namespace) - ex. maybe there in doc_obj, but we will never ask for it. 823 $data_name =~ s/^ex\.([^.]+)$/$1/; #$data_name =~ s/^ex\.//; 824 823 825 return $data->[1] if (scalar(@$data) >= 2 && $data_name eq $field); 824 826 } … … 842 844 } 843 845 844 # Remove theany namespace if we are being told to ignore them846 # Remove any namespace if we are being told to ignore them 845 847 if($ignore_namespace) { 846 $field =~ s/^ \w*\.//;848 $field =~ s/^.*\.//; 847 849 } 848 850 … … 851 853 852 854 my $data_name = $data->[0]; 853 # Remove the any namespace if we are being told to ignore them 855 856 # Remove any namespace if we are being told to ignore them 854 857 if($ignore_namespace) { 855 $data_name =~ s/^\w*\.//; 856 } 857 $data_name =~ s/^ex\.//; # we always remove ex. - it maybe there in doc_obj, but we will never ask for it. 858 $data_name =~ s/^.*\.//; 859 } 860 # we always remove ex. (but not any subsequent namespace) - ex. maybe there in doc_obj, but we will never ask for it. 861 $data_name =~ s/^ex\.([^.]+)$/$1/; 862 858 863 push (@metadata, $data->[1]) if ($data_name eq $field); 859 864 } -
main/trunk/greenstone2/perllib/lucenebuildproc.pm
r23198 r24404 226 226 } 227 227 else { 228 $submeta =~ s/^ex\. //; #strip off ex.228 $submeta =~ s/^ex\.([^.]+)$/$1/; #strip off ex. iff it's the only metadata set prefix (will leave ex.dc.* intact) 229 229 230 230 # its a metadata element -
main/trunk/greenstone2/perllib/mgbuilder.pm
r22820 r24404 77 77 push (@{$self->{'collect_cfg'}->{'indexes'}}, "dummy:text"); 78 78 } 79 # remove any ex. 79 # remove any ex. but only if there are no other metadata prefixes 80 80 my @orig_indexes = @{$self->{'collect_cfg'}->{'indexes'}}; 81 81 $self->{'collect_cfg'}->{'indexes'} = []; 82 82 foreach my $index (@orig_indexes) { 83 $index =~ s/([:,])ex\./$1/g; 83 #$index =~ s/ex\.([^.,:]+)(,|:|$)/$1$2/g; # doesn't preserve flex.Image, which is turned into fl.Image 84 $index =~ s/(,|:)/$1 /g; 85 $index =~ s/(^| )ex\.([^.,:]+)(,|:|$)/$1$2$3/g; 86 $index =~ s/(,|:) /$1/g; 87 84 88 push (@{$self->{'collect_cfg'}->{'indexes'}}, $index); 85 89 } -
main/trunk/greenstone2/perllib/mgbuildproc.pm
r20419 r24404 107 107 } else { 108 108 my $first = 1; 109 $real_field =~ s/^ex\. //; # remove ex namespace if there109 $real_field =~ s/^ex\.([^.]+)$/$1/; # remove ex. namespace iff it's the only namespace prefix (will leave ex.dc.* intact) 110 110 my @section_metadata = @{$doc_obj->get_metadata ($section, $real_field)}; 111 111 if ($level eq "section" && $section ne $doc_obj->get_top_section() && $self->{'indexing_text'} && defined ($self->{'sections_index_document_metadata'})) { -
main/trunk/greenstone2/perllib/mgppbuilder.pm
r22820 r24404 101 101 if (defined $indexes) { 102 102 $self->{'collect_cfg'}->{'indexes'} = []; 103 my $single_index = join(';', @$indexes).";"; 104 # remove any ex. from index spec 105 $single_index =~ s/^ex\.//; 106 $single_index =~ s/([,;])ex\./$1/g; 103 104 # remove any ex. from index spec but iff it is the only namespace in the metadata name 105 my @indexes_copy = @$indexes; # make a copy, as 'map' changes entry in array 106 #map { $_ =~ s/(^|,|;)ex\.([^.]+)$/$1$2/; } @indexes_copy; # No. Will replace metanames like flex.Image with fl.Image 107 map { $_ =~ s/(,|;)/$1 /g; } @indexes_copy; # introduce a space after every separator 108 map { $_ =~ s/(^| )ex\.([^.,:]+)(,|;|$)/$1$2$3/g; } @indexes_copy; # replace all <ex.> at start of metanames or <, ex.> when in a comma separated list 109 map { $_ =~ s/(,|:) /$1/g; } @indexes_copy; # remove space introduced after every separator 110 my $single_index = join(';', @indexes_copy).";"; 111 107 112 push (@{$self->{'collect_cfg'}->{'indexes'}}, $single_index); 108 113 } -
main/trunk/greenstone2/perllib/mgppbuildproc.pm
r20419 r24404 326 326 } 327 327 else { 328 $submeta =~ s/^ex\. //; #strip off ex.328 $submeta =~ s/^ex\.([^.]+)$/$1/; #strip off ex. iff it's the only metadata set prefix (will leave ex.dc.* intact) 329 329 # its a metadata element 330 330 my @section_metadata = @{$doc_obj->get_metadata ($section, $submeta)}; -
main/trunk/greenstone2/perllib/plugins/BasePlugin.pm
r24403 r24404 204 204 $self->{'plugin_type'} = $plugin_name; 205 205 206 # remove ex. from OIDmetadata 207 $self->{'OIDmetadata'} =~ s/^ex\. // if defined $self->{'OIDmetadata'};206 # remove ex. from OIDmetadata iff it's the only namespace prefix 207 $self->{'OIDmetadata'} =~ s/^ex\.([^.]+)$/$1/ if defined $self->{'OIDmetadata'}; 208 208 $self->{'num_processed'} = 0; 209 209 $self->{'num_not_processed'} = 0; -
main/trunk/greenstone2/perllib/plugins/EmbeddedMetadataPlugin.pm
r24403 r24404 287 287 288 288 289 sub metadata_read ()289 sub metadata_read 290 290 { 291 291 my $self = shift (@_); … … 314 314 } 315 315 316 sub process ()316 sub process 317 317 { 318 318 # not used -
main/trunk/greenstone2/perllib/plugins/OAIPlugin.pm
r24403 r24404 90 90 return bless $self, $class; 91 91 } 92 # trim ex. from document field (if there)93 $self->{'document_field'} =~ s/^ex\. //;92 # trim any ex. from document field iff it's the only metadata namespace prefix 93 $self->{'document_field'} =~ s/^ex\.([^.]+)$/$1/; 94 94 return bless $self, $class; 95 95 } … … 531 531 532 532 # so that GLI can see this metadata, store here as ex.dc.Title etc 533 my $ex_metaname = "ex.$metaname"; 533 my $ex_metaname = $metaname; 534 $ex_metaname =~ s/^ex\.//; # remove any pre-existing ex. prefix 535 $ex_metaname = "ex.$ex_metaname"; # at last can prefix ex. 534 536 535 537 if (defined $metadata->{$ex_metaname}) -
main/trunk/greenstone2/perllib/plugouts/BasePlugout.pm
r23939 r24404 965 965 966 966 # returns an XML representation of the dublin core metadata 967 # if dc meta is not found, try ex mete 967 # if dc meta is not found, try ex meta 968 # This method is not used by the DSpacePlugout, which has its 969 # own method to save its dc metadata 968 970 sub get_dc_metadata { 969 971 my $self = shift(@_); … … 978 980 979 981 my $explicit_dc = {}; 982 my $explicit_ex_dc = {}; 980 983 my $explicit_ex = {}; 981 984 982 985 my $all_text=""; 986 987 # We want high quality dc metadata to go in first, so we store all the 988 # assigned dc.* values first. Then, for all those dc metadata names in 989 # the official dc set that are as yet unassigned, we look to see whether 990 # embedded ex.dc.* metadata has defined some values for them. If not, 991 # then for the same missing dc metadata names, we look in ex metadata. 992 983 993 foreach my $data (@{$section_ptr->{'metadata'}}){ 984 994 my $escaped_value = &docprint::escape_text($data->[1]); … … 1002 1012 } 1003 1013 1004 } 1005 elsif (($data->[0] =~ m/^ex\./) || ($data->[0] !~ m/\./)) { 1014 } elsif ($data->[0]=~ m/^ex\.dc\./) { # now look through ex.dc.* to fill in as yet unassigned fields in dc metaset 1015 $data->[0] =~ m/^ex\.dc\.(.*)/; 1016 my $ex_dc_element = $1; 1017 my $lc_ex_dc_element = lc($ex_dc_element); 1018 1019 # only store the ex.dc value for this dc metaname if no dc.* was assigned for it 1020 if (defined $dc_set->{$ex_dc_element}) { 1021 if (!defined $explicit_ex_dc->{$lc_ex_dc_element}) { 1022 $explicit_ex_dc->{$lc_ex_dc_element} = []; 1023 } 1024 push(@{$explicit_ex_dc->{$lc_ex_dc_element}},$escaped_value); 1025 } 1026 } 1027 elsif (($data->[0] =~ m/^ex\./) || ($data->[0] !~ m/\./)) { # look through ex. meta (incl. meta without prefix) 1006 1028 $data->[0] =~ m/^(ex\.)?(.*)/; 1007 my $ex_element = 1029 my $ex_element = $2; 1008 1030 my $lc_ex_element = lc($ex_element); 1009 1031 … … 1023 1045 1024 1046 if (!defined $explicit_dc->{$lc_k}) { 1025 if (defined $explicit_ex->{$lc_k}) { 1026 1047 # try to find if ex.dc.* defines this dc.* meta, 1048 # if not, then look for whether there's an ex.* equivalent 1049 1050 if (defined $explicit_ex_dc->{$lc_k}) { 1051 foreach my $v (@{$explicit_ex_dc->{$lc_k}}) { 1052 my $dc_element = $lc_k; 1053 my $escaped_value = $v; 1054 1055 if (defined $version && ($version eq "oai_dc")) { 1056 $all_text .= " <dc:$dc_element>$escaped_value</dc:$dc_element>\n"; 1057 } 1058 else { 1059 $all_text .= ' <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n"; 1060 } 1061 } 1062 } elsif (defined $explicit_ex->{$lc_k}) { 1027 1063 foreach my $v (@{$explicit_ex->{$lc_k}}) { 1028 1064 my $dc_element = $lc_k; … … 1035 1071 $all_text .= ' <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n"; 1036 1072 } 1037 1038 1073 } 1039 1074 }
Note:
See TracChangeset
for help on using the changeset viewer.