root/gsdl/trunk/perllib/marcmapping.pm @ 18200

Revision 18200, 3.8 KB (checked in by kjdon, 12 years ago)

allow namespaces in mapping values (added a . to regex)

Line 
1###########################################################################
2#
3# marcmapping.pm -- code to read in the marc mapping files
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 2008 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package marcmapping;
28
29sub parse_marc_metadata_mapping
30{
31    my ($mm_file_or_files, $outhandle) = @_;
32
33    my $metadata_mapping = {};
34
35    if (ref ($mm_file_or_files) eq 'SCALAR') {
36    my $mm_file = $mm_file_or_files;
37    &_parse_marc_metadata_mapping($mm_file,$metadata_mapping, $outhandle);
38    }
39    else {
40    my $mm_files = $mm_file_or_files;
41   
42    # Need to process files in reverse order.  This is so in the
43    # case where we have both a "collect" and "main" version,
44    # the "collect" one tops up the main one
45
46    my $mm_file;
47    while ($mm_file = pop(@$mm_files)) {
48        &_parse_marc_metadata_mapping($mm_file,$metadata_mapping, $outhandle);
49    }
50    }
51
52    return $metadata_mapping;
53}
54
55sub _parse_marc_metadata_mapping
56{
57    my ($mm_file,$metadata_mapping, $outhandle) = @_;
58
59    if (open(MMIN, "<$mm_file"))
60    {
61    my $l=0;
62    my $line;
63    while (defined($line=<MMIN>))
64    {
65        $l++;
66        chomp $line;
67        $line =~ s/#.*$//; # strip out any comments, including end of line
68        next if ($line =~ m/^\s*$/);
69        $line =~ s/\s+$//; # remove any white space at end of line
70
71        my $parse_error_count = 0;
72        if ($line =~ m/^-(\d+)\s*$/) {
73        # special "remove" rule syntax
74        my $marc_info = $1;
75        if (defined $metadata_mapping->{$marc_info}) {
76            delete $metadata_mapping->{$marc_info};
77        }
78        else {
79            print $outhandle "Parse Warning: Did not find pre-existing rule $marc_info to remove";
80            print $outhandle " on line $l of $mm_file:\n";
81            print $outhandle "  $line\n";
82        }
83        }
84        elsif ($line =~ m/^(.*?)->\s*([\w\.\^]+)$/)
85        {
86        my $lhs = $1;
87        my $gsdl_info = $2;
88
89        my @fields = split(/,\s*/,$lhs);
90        my $f;
91        while ($f  = shift (@fields)) {
92            $f =~ s/\s+$//; # remove any white space at end of line
93
94            if ($f =~ m/^(\d+)\-(\d+)$/) {
95            # number range => generate number in range and
96            # push on to array
97            push(@fields,$1..$2);
98            next;
99            }
100
101            if ($f =~ m/^(\d+)((?:(?:\$|\^)\w)*)\s*$/) {
102
103            my $marc_info = $1;
104            my $opt_sub_fields = $2;
105
106            if ($opt_sub_fields ne "") {           
107                my @sub_fields = split(/\$|\^/,$opt_sub_fields);
108                shift @sub_fields; # skip first entry, which is blank
109
110                foreach my $sub_field (@sub_fields) {
111                $metadata_mapping->{$marc_info."\$".$sub_field} = $gsdl_info;
112                }
113            }
114            else {
115                # no subfields to worry about
116                $marc_info =~ s/\^/\$/;
117                $metadata_mapping->{$marc_info} = $gsdl_info;
118            }
119            }
120            else {
121            $parse_error_count++;
122            }
123        }
124        }
125        else
126        {
127        $parse_error_count++;
128        }
129
130        if ($parse_error_count>0) {
131       
132        print $outhandle "Parse Error: $parse_error_count syntax error(s) on line $l of $mm_file:\n";
133        print $outhandle "  $line\n";
134        }
135    }
136    close(MMIN);
137    }
138    else
139    {
140    print $outhandle "Unable to open $mm_file: $!\n";
141    }
142}
143
144
145
1461;
Note: See TracBrowser for help on using the browser.