source: gsdl/trunk/perllib/marcmapping.pm@ 18200

Last change on this file since 18200 was 18200, checked in by kjdon, 15 years ago

allow namespaces in mapping values (added a . to regex)

File size: 3.8 KB
RevLine 
[16692]1###########################################################################
2#
3# marcmapping.pm -- code to read in the marc mapping files
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 2008 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package marcmapping;
28
29sub parse_marc_metadata_mapping
30{
31 my ($mm_file_or_files, $outhandle) = @_;
32
33 my $metadata_mapping = {};
34
35 if (ref ($mm_file_or_files) eq 'SCALAR') {
36 my $mm_file = $mm_file_or_files;
37 &_parse_marc_metadata_mapping($mm_file,$metadata_mapping, $outhandle);
38 }
39 else {
40 my $mm_files = $mm_file_or_files;
41
42 # Need to process files in reverse order. This is so in the
43 # case where we have both a "collect" and "main" version,
44 # the "collect" one tops up the main one
45
46 my $mm_file;
47 while ($mm_file = pop(@$mm_files)) {
48 &_parse_marc_metadata_mapping($mm_file,$metadata_mapping, $outhandle);
49 }
50 }
51
52 return $metadata_mapping;
53}
54
55sub _parse_marc_metadata_mapping
56{
57 my ($mm_file,$metadata_mapping, $outhandle) = @_;
58
59 if (open(MMIN, "<$mm_file"))
60 {
61 my $l=0;
62 my $line;
63 while (defined($line=<MMIN>))
64 {
65 $l++;
66 chomp $line;
67 $line =~ s/#.*$//; # strip out any comments, including end of line
68 next if ($line =~ m/^\s*$/);
69 $line =~ s/\s+$//; # remove any white space at end of line
70
71 my $parse_error_count = 0;
72 if ($line =~ m/^-(\d+)\s*$/) {
73 # special "remove" rule syntax
74 my $marc_info = $1;
75 if (defined $metadata_mapping->{$marc_info}) {
76 delete $metadata_mapping->{$marc_info};
77 }
78 else {
79 print $outhandle "Parse Warning: Did not find pre-existing rule $marc_info to remove";
80 print $outhandle " on line $l of $mm_file:\n";
81 print $outhandle " $line\n";
82 }
83 }
[18200]84 elsif ($line =~ m/^(.*?)->\s*([\w\.\^]+)$/)
[16692]85 {
86 my $lhs = $1;
87 my $gsdl_info = $2;
88
89 my @fields = split(/,\s*/,$lhs);
90 my $f;
91 while ($f = shift (@fields)) {
92 $f =~ s/\s+$//; # remove any white space at end of line
93
94 if ($f =~ m/^(\d+)\-(\d+)$/) {
95 # number range => generate number in range and
96 # push on to array
97 push(@fields,$1..$2);
98 next;
99 }
100
101 if ($f =~ m/^(\d+)((?:(?:\$|\^)\w)*)\s*$/) {
102
103 my $marc_info = $1;
104 my $opt_sub_fields = $2;
105
106 if ($opt_sub_fields ne "") {
107 my @sub_fields = split(/\$|\^/,$opt_sub_fields);
108 shift @sub_fields; # skip first entry, which is blank
109
110 foreach my $sub_field (@sub_fields) {
111 $metadata_mapping->{$marc_info."\$".$sub_field} = $gsdl_info;
112 }
113 }
114 else {
115 # no subfields to worry about
116 $marc_info =~ s/\^/\$/;
117 $metadata_mapping->{$marc_info} = $gsdl_info;
118 }
119 }
120 else {
121 $parse_error_count++;
122 }
123 }
124 }
125 else
126 {
127 $parse_error_count++;
128 }
129
130 if ($parse_error_count>0) {
131
132 print $outhandle "Parse Error: $parse_error_count syntax error(s) on line $l of $mm_file:\n";
133 print $outhandle " $line\n";
134 }
135 }
136 close(MMIN);
137 }
138 else
139 {
140 print $outhandle "Unable to open $mm_file: $!\n";
141 }
142}
143
144
145
1461;
Note: See TracBrowser for help on using the repository browser.