Changeset 16692


Ignore:
Timestamp:
2008-08-11T10:44:04+12:00 (16 years ago)
Author:
kjdon
Message:

code to read in marc mapping files moved from MARCXMLPlugin to marcmapping.pm, and its now also used by MARCPlugin.pm so that MARCplugin can use qualified dublin core

Location:
gsdl/trunk/perllib
Files:
1 added
2 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/plugins/MARCPlugin.pm

    r16104 r16692  
    3131use unicode;
    3232use util;
     33use marcmapping;
    3334
    3435use strict;
     
    4243my $arguments =
    4344    [ { 'name' => "metadata_mapping",
    44     'desc' => "{MARCPlugin.metadata_mapping}",
     45    'desc' => "{common.deprecated} {MARCPlugin.metadata_mapping}",
    4546    'type' => "string",
    46     'deft' => "marctodc.txt",
     47    'deft' => "",
    4748    'hiddengli' => "yes", # deprecated in favour of 'metadata_mapping_file'
    4849    'reqd' => "no" },
     
    5051    'desc' => "{MARCXMLPlugin.metadata_mapping_file}",
    5152    'type' => "string",
    52     'deft' => "",
     53    'deft' => "marctodc.txt",
    5354    'reqd' => "no" },
    5455      { 'name' => "process_exp",
     
    99100    # preference
    100101
    101     if ($self->{'metadata_mapping_file'} eq "") {
    102     # If nothing set in the new version, use the old version
    103     # that defaults to 'marctodc.txt'
     102    if ($self->{'metadata_mapping'} ne "") {
     103    print STDERR "MARCPlugin WARNING:: the metadata_mapping option is set but has been deprecated. Please use metadata_mapping_file option instead\n";
     104    # If the old version is set, use it.
    104105    $self->{'metadata_mapping_file'} = $self->{'metadata_mapping'};
    105106    }
    106 
     107    $self->{'metadata_mapping'} = undef;
    107108    $self->{'type'} = "";
    108109   
     
    114115    my ($verbosity, $outhandle, $failhandle) = @_;
    115116
    116     my @metadata_mapping = ();
    117 
    118     # read in the metadata mapping file
    119 
    120     my $mm_file = &util::locate_config_file($self->{'metadata_mapping_file'});
    121 
    122     if (!defined $mm_file)
    123     {
    124 
     117    ## the mapping file has already been loaded
     118    if (defined $self->{'metadata_mapping'} ){
     119    $self->SUPER::init(@_);
     120    return;
     121    }
     122
     123    # read in the metadata mapping files
     124    my $mm_files = &util::locate_config_files($self->{'metadata_mapping_file'});
     125
     126    if (scalar(@$mm_files)==0)
     127    {
    125128    my $msg = "MARCPlugin ERROR: Can't locate mapping file \"" .
    126         $self->{'metadata_mapping_file'} . "\".\n" .
     129        $self->{'metadata_mapping_file'} . "\".\n " .
    127130        "    No marc files can be processed.\n";
    128131
     
    133136    # If we exit here, then pluginfo.pl will exit too!
    134137    }
    135     elsif (open(MMIN, "<$mm_file"))
    136     {
    137     my $l=1;
    138     my $line;
    139     while (defined($line=<MMIN>))
    140     {
    141         chomp $line;
    142         if ($line =~ m/^(\d+)\s*->\s*([\w\^]+)$/)
    143         {
    144         my $marc_info = $1;
    145         my $gsdl_info = $2;
    146         my $mapping = { 'marc' => $marc_info, 'gsdl' => $gsdl_info };
    147         push(@metadata_mapping,$mapping);
    148         }
    149         elsif ($line !~ m/^\#/       # allow comments (# in first column)
    150            && $line !~ m/^\s*$/) # allow blank lines
    151         {
    152         print $outhandle "Parse error on line $l of $mm_file:\n";
    153         print $outhandle "  \"$line\"\n";
    154         }
    155         $l++
    156     }
    157     close(MMIN);
    158 
    159     $self->{'metadata_mapping'} = \@metadata_mapping;
    160     }
    161     else
    162     {
    163     print STDERR "Unable to open $mm_file: $!\n";
    164     }
    165 
    166 
     138    else {
     139    $self->{'metadata_mapping'} = &marcmapping::parse_marc_metadata_mapping($mm_files, $outhandle);
     140    }
     141
     142    ##map { print STDERR $_."=>".$self->{'metadata_mapping'}->{$_}."\n"; } keys %{$self->{'metadata_mapping'}};
    167143
    168144    $self->SUPER::init(@_);
    169145}
    170    
     146
     147
    171148
    172149sub get_default_process_exp {
     
    340317
    341318
    342 sub extract_metadata 
     319sub extract_metadata
    343320{
    344321    my $self = shift (@_);
     322 
    345323    my ($marc, $metadata, $encoding, $doc_obj, $section) = @_;
    346324    my $outhandle = $self->{'outhandle'};
     
    350328    }
    351329
    352     my $metadata_mapping = $self->{'metadata_mapping'};
    353     my $mm;
    354 
    355     foreach $mm ( @$metadata_mapping )
    356     {
    357     my $marc_field = $mm->{'marc'};
    358 
    359     my @metavalues = $marc->field($marc_field);
    360 
    361     if (scalar(@metavalues)>0)
    362     {
    363         my $metaname = $mm->{'gsdl'};
    364         my $metavalue;
    365         foreach $metavalue ( @metavalues )
     330    my $metadata_mapping = $self->{'metadata_mapping'};;
     331
     332    foreach my $marc_field ( keys %$metadata_mapping )
     333    {
     334    my $gsdl_field = $metadata_mapping->{$marc_field};
     335    my $meta_value = undef;
     336   
     337    # have we got a subfield?
     338    my $subfield = undef;
     339    if ($marc_field =~ /(\d\d\d)(?:\$|\^)?(\w)/){
     340        $marc_field = $1;
     341        $subfield = $2;
     342    }
     343    if (defined $subfield) {
     344        $meta_value = $marc->subfield($marc_field, $subfield);
     345        if (defined $meta_value) {
     346        ## escape [ and ]
     347        $meta_value =~ s/\[/\\\[/g;
     348        $meta_value =~ s/\]/\\\]/g;
     349        my $metavalue_str = $self->to_utf8($encoding, $meta_value);
     350        $doc_obj->add_utf8_metadata ($section, $gsdl_field, $metavalue_str);
     351        }
     352    } else {
     353       
     354        my @metavalues = $marc->field($marc_field);
     355       
     356        if (scalar(@metavalues)>0)
    366357        {
    367         my $metavalue_str = $self->to_utf8($encoding,$metavalue->as_string());
    368         $doc_obj->add_utf8_metadata ($section, $metaname, $metavalue_str);
    369         }
    370     }
    371     }
    372 }
     358        my $metavalue = undef;
     359        foreach $metavalue ( @metavalues )
     360        {
     361            my $metavalue_str = $self->to_utf8($encoding,$metavalue->as_string());
     362            $doc_obj->add_utf8_metadata ($section, $gsdl_field, $metavalue_str);
     363        }
     364        }
     365    }
     366    }
     367}
     368
    373369
    374370sub extract_ascii_metadata
    375371{
    376372    my $self = shift (@_);
     373
    377374    my ($text, $metadata,$doc_obj, $section) = @_;
    378375    my $outhandle = $self->{'outhandle'};
     
    386383        $field =~ /^(\d\d\d)\s/;
    387384        my $code = $1;
    388         $field = $';
     385        $field = $'; #'
    389386        ##get subfields
    390387        my @subfields = split(/\$/,$field);
     
    403400             ##print STDERR "$1=>$'\n";
    404401             push(@{$marc_mapping->{$code}},$1);
    405                      push(@{$marc_mapping->{$code}},$');
     402                     push(@{$marc_mapping->{$code}},$'); #'
    406403         }
    407404        }
     
    410407
    411408
    412      foreach my $mm ( @$metadata_mapping )
    413     {
    414     my $marc_field = $mm->{'marc'};
     409     foreach my $marc_field ( keys %$metadata_mapping )
     410    {
    415411       
    416412    my $matched_field = $marc_mapping->{$marc_field};
     
    422418        $subfield = $1;
    423419        }
    424         my $metaname = $mm->{'gsdl'};
     420        my $metaname = $metadata_mapping->{$marc_field};
    425421 
    426422        my $metavalue;
  • gsdl/trunk/perllib/plugins/MARCXMLPlugin.pm

    r16521 r16692  
    3131
    3232use ReadXMLFile;
     33use marcmapping;
    3334
    3435use strict;
     
    8586
    8687
    87 sub _parse_marc_metadata_mapping
    88 {
    89     my $self = shift(@_);
    90     my ($mm_file,$metadata_mapping) = @_;
    91 
    92     my $outhandle = $self->{'outhandle'};
    93 
    94     if (open(MMIN, "<$mm_file"))
    95     {
    96     my $l=0;
    97     my $line;
    98     while (defined($line=<MMIN>))
    99     {
    100         $l++;
    101         chomp $line;
    102         $line =~ s/#.*$//; # strip out any comments, including end of line
    103         next if ($line =~ m/^\s*$/);
    104         $line =~ s/\s+$//; # remove any white space at end of line
    105 
    106         my $parse_error_count = 0;
    107         if ($line =~ m/^-(\d+)\s*$/) {
    108         # special "remove" rule syntax
    109         my $marc_info = $1;
    110         if (defined $metadata_mapping->{$marc_info}) {
    111             delete $metadata_mapping->{$marc_info};
    112         }
    113         else {
    114             print $outhandle "Parse Warning: Did not file pre-existing rule $marc_info to remove";
    115             print $outhandle " on line $l of $mm_file:\n";
    116             print $outhandle "  $line\n";
    117         }
    118         }
    119         elsif ($line =~ m/^(.*?)->\s*([\w\^]+)$/)
    120         {
    121         my $lhs = $1;
    122         my $gsdl_info = $2;
    123 
    124         my @fields = split(/,\s*/,$lhs);
    125         my $f;
    126         while ($f  = shift (@fields)) {
    127             $f =~ s/\s+$//; # remove any white space at end of line
    128 
    129             if ($f =~ m/^(\d+)\-(\d+)$/) {
    130             # number range => genrate number in range and
    131             # push on to array
    132             push(@fields,$1..$2);
    133             next;
    134             }
    135 
    136             if ($f =~ m/^(\d+)((?:(?:\$|\^)\w)*)\s*$/) {
    137 
    138             my $marc_info = $1;
    139             my $opt_sub_fields = $2;
    140 
    141             if ($opt_sub_fields ne "") {           
    142                 my @sub_fields = split(/\$|\^/,$opt_sub_fields);
    143                 shift @sub_fields; # skip first entry, which is blank
    144 
    145                 foreach my $sub_field (@sub_fields) {
    146                 $metadata_mapping->{$marc_info."\$".$sub_field} = $gsdl_info;
    147                 }
    148             }
    149             else {
    150                 # no subfields to worry about
    151                 $marc_info =~ s/\^/\$/;
    152                 $metadata_mapping->{$marc_info} = $gsdl_info;
    153             }
    154             }
    155             else {
    156             $parse_error_count++;
    157             }
    158         }
    159         }
    160         else
    161         {
    162         $parse_error_count++;
    163         }
    164 
    165         if ($parse_error_count>0) {
    166        
    167         print $outhandle "Parse Error: $parse_error_count syntax error(s) on line $l of $mm_file:\n";
    168         print $outhandle "  $line\n";
    169         }
    170     }
    171     close(MMIN);
    172     }
    173     else
    174     {
    175     print STDERR "Unable to open $mm_file: $!\n";
    176     }
    177 }
    178 
    179 
    180 sub parse_marc_metadata_mapping
    181 {
    182     my $self = shift(@_);
    183     my ($mm_file_or_files) = @_;
    184 
    185     my $metadata_mapping = {};
    186 
    187     if (ref ($mm_file_or_files) eq 'SCALAR') {
    188     my $mm_file = $mm_file_or_files;
    189     $self->_parse_marc_metadata_mapping($mm_file,$metadata_mapping);
    190     }
    191     else {
    192     my $mm_files = $mm_file_or_files;
    193    
    194     # Need to process files in reverse order.  This is so in the
    195     # case where we have both a "collect" and "main" version,
    196     # the "collect" one tops up the main one
    197 
    198     my $mm_file;
    199     while ($mm_file = pop(@$mm_files)) {
    200         $self->_parse_marc_metadata_mapping($mm_file,$metadata_mapping);
    201     }
    202     }
    203 
    204     return $metadata_mapping;
    205 }
    206 
    207 
    20888sub init {
    20989    my $self = shift (@_);
     
    21696    }
    21797
    218     # read in the metadata mapping file
     98    # read in the metadata mapping files
    21999    my $mm_files = &util::locate_config_files($self->{'metadata_mapping_file'});
    220100
     
    233113    }
    234114    else {
    235     $self->{'metadata_mapping'} = $self->parse_marc_metadata_mapping($mm_files);
     115    $self->{'metadata_mapping'} = &marcmapping::parse_marc_metadata_mapping($mm_files, $outhandle);
    236116    }
    237117
     
    316196    }
    317197    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
    318     $doc_obj->add_metadata($doc_obj->get_top_section(), "FileFormat", "XML");
     198    $doc_obj->add_metadata($doc_obj->get_top_section(), "FileFormat", "MARCXML");
    319199
    320200    my $outhandle = $self->{'outhandle'};
Note: See TracChangeset for help on using the changeset viewer.