Changeset 38742 for main/trunk/greenstone2/perllib/plugins/BaseImporter.pm
- Timestamp:
- 2024-02-13T17:15:40+13:00 (4 months ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/plugins/BaseImporter.pm
r37048 r38742 99 99 'type' => "string", 100 100 'reqd' => "no" }, 101 { 'name' => "metadata_mapping_file", 102 'desc' => "{BaseImporter.metadata_mapping_file}", 103 'type' => "string", 104 #'deft' => "metadata_mapping_rules.csv", # commenting out, explicitly giving this as the default means every plugin automatically activates it 105 'deft' => "", 106 'reqd' => "no" }, 101 107 { 'name' => "OIDtype", 102 108 'desc' => "{import.OIDtype}", … … 155 161 'args' => $arguments }; 156 162 163 sub setup_metadata_mapping_rules 164 { 165 my $self = shift (@_); 166 my ($metadata_mapping_full_filename) = @_; 167 168 my $csv_file = &util::read_utf8_textfile($metadata_mapping_full_filename); 169 170 my @csv_lines = split('\n',$csv_file); 171 172 my $line_num = 0; 173 174 my $metadata_mapping_rules = []; 175 176 foreach my $csv_line (@csv_lines) { 177 chomp($csv_line); 178 179 $line_num++; 180 181 next if ($csv_line =~ m/^\s*$/); 182 next if ($csv_line =~ m/^#/); 183 184 # my @tsv_entries = split('\t',$tsv_line); 185 my @csv_entries = split(/\s*,\s*/,$csv_line); 186 187 my $num_csv_entries = scalar(@csv_entries); 188 189 # print STDERR join(",",@tsv_entries), "\n\n"; 190 191 if (($num_csv_entries == 5) || ($num_csv_entries >= 6 && $csv_entries[5] =~ m/^#/)) { 192 my $src_metadata_name = $csv_entries[0]; 193 (my $src_metadata_regex = $csv_entries[1]) =~ s/(^\/)|(\/$)//g; #assigns as a /qr (quoted regex) and then strips off leading and trailing slash 194 (my $dst_metadata_regex = $csv_entries[2]) =~ s/(^\/)|(\/$)//g; #as line above 195 (my $regex_modifiers = $csv_entries[3]) =~ s/(^')|('$)//g; #similar to above but strips out single quotes 196 my $dst_metadata_name = $csv_entries[4]; 197 198 # If the last entry has a '# comments' after it, needs a bit more string manipulation 199 # to tidy it up 200 $dst_metadata_name =~ s/#.*$//; 201 $dst_metadata_name =~ s/\s*$//; 202 203 my $regex_sub = "s/$src_metadata_regex/$dst_metadata_regex/$regex_modifiers"; 204 205 my $metadata_mapping_rule_rec = { 206 "src_metadata_name" => $src_metadata_name, 207 "regex_sub" => $regex_sub, 208 "dst_metadata_name" => $dst_metadata_name 209 }; 210 211 push(@$metadata_mapping_rules,$metadata_mapping_rule_rec); 212 } 213 else { 214 print STDERR "Warning: syntax error in $metadata_mapping_full_filename, line $line_num\n"; 215 print STDERR " $csv_line\n"; 216 print STDERR "Did not contain 5 tab-delimited entries\n"; 217 } 218 219 } 220 221 if (scalar(@$metadata_mapping_rules)>0) { 222 $self->{'metadata_mapping_rules'} = $metadata_mapping_rules; 223 } 224 } 225 157 226 sub new { 158 227 … … 165 234 166 235 my $self = new CommonUtil($pluginlist, $inputargs, $hashArgOptLists,$auxiliary); 167 236 168 237 if ($self->{'info_only'}) { 169 238 # don't worry about any options etc … … 205 274 } 206 275 207 return bless $self, $class; 276 my $blessed_self = bless $self, $class; 277 278 if ($blessed_self->{'metadata_mapping_file'}) { 279 my $metadata_mapping_file = $blessed_self->{'metadata_mapping_file'}; 280 my $mmf_full_filename; 281 $mmf_full_filename = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'},"etc", $metadata_mapping_file); 282 if (!-e $mmf_full_filename) { 283 my $mmf_full_col_filename = $mmf_full_filename; 284 $mmf_full_filename = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"etc", $metadata_mapping_file); 285 286 if (!-e $mmf_full_filename) { 287 my $outhandle = $blessed_self->{'outhandle'}; 288 print STDERR "\nBaseImporter Error: Can't locate metadata_mapping_file '$metadata_mapping_file'\n"; 289 print STDERR "This file should be in $mmf_full_col_filename or $mmf_full_filename\n"; 290 $blessed_self->print_txt_usage(""); # Use default resource bundle 291 print STDERR "\nBaseImporter Error: Can't locate metadata_mapping_file '$metadata_mapping_file'\n"; 292 print STDERR "This file should be in $mmf_full_col_filename or $mmf_full_filename\n"; 293 die "\n"; 294 } 295 } 296 297 $blessed_self->{'metadata_mapping_full_filename'} = $mmf_full_filename; 298 $blessed_self->setup_metadata_mapping_rules($mmf_full_filename); 299 } 300 301 302 return $blessed_self; 208 303 209 304 } … … 688 783 } 689 784 785 786 sub apply_metadata_mapping_file { 787 my $self = shift (@_); 788 my ($doc_obj) = @_; 789 790 my $verbosity = $self->{'verbosity'}; 791 my $outhandle = $self->{'outhandle'}; 792 793 my $top_section = $doc_obj->get_top_section(); 794 795 my $metadata_mapping_rules = $self->{'metadata_mapping_rules'}; 796 797 my $transient_metadata_names = {}; 798 my $new_metadata_names = {}; 799 800 print $outhandle " Applying metadata_mapping_file to document\n"; 801 802 foreach my $metadata_mapping_rule_rec (@$metadata_mapping_rules) { 803 804 my $src_metadata_name = $metadata_mapping_rule_rec->{"src_metadata_name"}; 805 my $dst_metadata_name = $metadata_mapping_rule_rec->{"dst_metadata_name"}; 806 807 my $regex_sub = $metadata_mapping_rule_rec->{"regex_sub"}; 808 809 my $metadata_vals = $doc_obj->get_metadata($top_section,$src_metadata_name); 810 811 foreach my $metadata_val (@$metadata_vals) { 812 813 my $store_metadata_val = $metadata_val; 814 815 if ($verbosity >= 4) { 816 print $outhandle " Testing for match with: \$metadata_val =~ $regex_sub\n" 817 } 818 eval ( "\$metadata_val =~ $regex_sub" ); 819 if ($@) { 820 warn "$@"; 821 } 822 else { 823 # print STDERR "**** after metadata_val = $metadata_val\n\n"; 824 if ($verbosity >=2) { 825 if ($metadata_val ne $store_metadata_val) { 826 827 if ($verbosity >= 3) { 828 print $outhandle " Transformed metadata to $dst_metadata_name: '$store_metadata_val' -> '$metadata_val'\n" 829 } 830 if ($verbosity >= 2 && ($dst_metadata_name !~ m/^_transient/)) { 831 print $outhandle " Added new metadata $dst_metadata_name: '$metadata_val'\n" 832 } 833 } 834 } 835 $doc_obj->add_utf8_metadata($top_section,$dst_metadata_name,$metadata_val); 836 837 if ($dst_metadata_name =~ m/^_transient/) { 838 $transient_metadata_names->{$dst_metadata_name} = 1; 839 } 840 else { 841 if ($metadata_val ne $store_metadata_val) { 842 $new_metadata_names->{$dst_metadata_name} = 1; 843 } 844 } 845 } 846 } 847 848 } 849 850 if ($verbosity >=1) { 851 print $outhandle " Number of metadata_mapping_file transformations for this document: ", scalar(keys %$new_metadata_names), "\n"; 852 853 if ($verbosity >=2) { 854 print $outhandle " Generated metadata names: ", join(", ",keys %$new_metadata_names), "\n"; 855 } 856 } 857 858 # Now remove any of dst_metadata_name which began _transient 859 foreach my $transient_metadata_name (keys %$transient_metadata_names) { 860 #print STDERR "transient_metadata_name = $transient_metadata_name\n"; 861 $doc_obj->delete_metadata($top_section,$transient_metadata_name); 862 } 863 } 864 690 865 sub post_process_doc_obj { 691 866 my $self = shift (@_); 692 867 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 693 868 869 if ($self->{'metadata_mapping_rules'}) { 870 $self->apply_metadata_mapping_file($doc_obj); 871 } 872 694 873 return 1; 695 874 } … … 820 999 return undef unless $self->can_process_this_file($filename_full_path); 821 1000 822 1001 #print STDERR "**** BEFORE READ INTO DOC OBJ: $file\n"; 823 1002 my ($process_status,$doc_obj) = $self->read_into_doc_obj(@_); 824 1003 #print STDERR "**** AFTER READ INTO DOC OBJ: $file\n";
Note:
See TracChangeset
for help on using the changeset viewer.