Changeset 11298
- Timestamp:
- 2006-03-02T14:53:11+13:00 (18 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/ISISPlug.pm
r11295 r11298 143 143 # Parse the associated ISIS database Field Definition Table file (.fdt) 144 144 my %fdtmapping = &parse_field_definition_table($fdtfilename, $encoding); 145 146 # Map the tag numbers to tag names, using the FDT mapping 147 $$textref =~ s/\r?\ntag=(\d+) /\ntag=$fdtmapping{$1}{'title'} /g; 145 $self->{'fdt_mapping'} = \%fdtmapping; 148 146 149 147 # Remove the line at the start so it is split and processed properly … … 158 156 my $outhandle = $self->{'outhandle'}; 159 157 160 my $cursection = $doc_obj->get_top_section(); 158 my $section = $doc_obj->get_top_section(); 159 my $fdt_mapping = $self->{'fdt_mapping'}; 161 160 my $subfield_separator = $self->{'subfield_separator'}; 162 161 my $entry_separator = $self->{'entry_separator'}; … … 164 163 # Report that we're processing the file 165 164 print STDERR "<Processing n='$file' p='ISISPlug'>\n" if ($gli); 166 print $outhandle "IsisPlug: processing $file\n" 167 if ($self->{'verbosity'}) > 1; 165 print $outhandle "IsisPlug: processing $file\n" if ($self->{'verbosity'}) > 1; 168 166 169 167 # Process each line of the ISIS record, one at a time 170 168 foreach my $line (split(/\n/, $$textref)) { 171 169 $line =~ /^tag=(.*) data=(.+)$/; 172 my $rawtagname = $1; 173 my $rawtagdata = $2; 174 # print STDERR "Raw tag: $rawtagname, Raw data: $rawtagdata\n"; 175 next if ($rawtagname eq ""); 170 my $tag = $1; 171 my $tag_data = $2; 172 # print STDERR "\nTag: $tag, Data: $tag_data\n"; 173 174 # Convert the tag number into a name, and remove any invalid characters 175 my $raw_metadata_name = $fdt_mapping->{$tag}{'name'} || ""; 176 $raw_metadata_name =~ s/&//g; 177 next if ($raw_metadata_name eq ""); 176 178 177 179 # Metadata field names: title case, then remove spaces 178 my $ tagname = "";179 foreach my $word (split(/\s+/, $raw tagname)) {180 my $metadata_name = ""; 181 foreach my $word (split(/\s+/, $raw_metadata_name)) { 180 182 substr($word, 0, 1) =~ tr/a-z/A-Z/; 181 $tagname .= $word; 182 } 183 184 # Make sure there is nothing bad in the tag names 185 $tagname =~ s/&//g; 186 187 # Handle each piece of metadata ('%' separated) 188 my $completetagvalue = ""; 189 foreach my $rawtagvalue (split(/%/, $rawtagdata)) { 190 $completetagvalue .= $entry_separator unless ($completetagvalue eq ""); 191 192 # Metadata field values: take care with subfields 193 my $completeentryvalue = ""; 194 while ($rawtagvalue ne "") { 183 $metadata_name .= $word; 184 } 185 186 my $all_metadata_name = $metadata_name . "^all"; 187 my $all_metadata_value = ""; 188 189 # Handle repeatable fields 190 if ($fdt_mapping->{$tag}{'repeatable'}) { 191 # Multiple values are separated using the '%' character 192 foreach my $raw_metadata_value (split(/%/, $tag_data)) { 193 my $metadata_value = ""; 194 195 # Handle subfields 196 while ($raw_metadata_value ne "") { 197 # If there is a subfield specifier, parse it off 198 my $sub_metadata_name = $metadata_name; 199 if ($raw_metadata_value =~ s/^\^// && $raw_metadata_value =~ /^([a-z])/) { 200 $sub_metadata_name .= "^$1"; 201 } 202 203 # Parse the value off and add it as metadata 204 $raw_metadata_value =~ s/^([^\^]*)//; 205 my $sub_metadata_value = $1; 206 207 # Escape any '<' and '>' characters so they appear correctly in the final collection 208 $sub_metadata_value =~ s/\</</g; 209 $sub_metadata_value =~ s/\>/>/g; 210 211 # print STDERR "Sub metadata name: $sub_metadata_name, value: $sub_metadata_value\n"; 212 if ($sub_metadata_name ne $metadata_name) { 213 $doc_obj->add_utf8_metadata($section, $sub_metadata_name, $sub_metadata_value); 214 } 215 216 $metadata_value .= $subfield_separator unless ($metadata_value eq ""); 217 $metadata_value .= $sub_metadata_value; 218 } 219 220 # Add the metadata value 221 # print STDERR "Metadata name: $metadata_name, value: $metadata_value\n"; 222 $doc_obj->add_utf8_metadata($section, $metadata_name, $metadata_value); 223 224 $all_metadata_value .= $entry_separator unless ($all_metadata_value eq ""); 225 $all_metadata_value .= $metadata_value; 226 } 227 } 228 229 # Handle non-repeatable fields 230 else { 231 my $raw_metadata_value = $tag_data; 232 my $metadata_value = ""; 233 234 # Handle subfields 235 while ($raw_metadata_value ne "") { 195 236 # If there is a subfield specifier, parse it off 196 my $sub fieldname = "";197 if ($raw tagvalue =~ s/^\^// && $rawtagvalue =~ s/([a-z])//) {198 $sub fieldname = "^$1";237 my $sub_metadata_name = $metadata_name; 238 if ($raw_metadata_value =~ s/^(\^[a-z])//) { 239 $sub_metadata_name .= $1; 199 240 } 200 241 201 # Parse the metadata value off 202 $rawtagvalue =~ s/^([^\^]*)//; 203 my $metadatafieldname = $tagname . $subfieldname; 204 my $metadatafieldvalue = $1; 205 206 # Handle Keywords specially 207 if ($metadatafieldname eq "Keywords") { 208 my $keywordmetadatavalue = $metadatafieldvalue; 209 my $keywordlist = ""; 210 while ($keywordmetadatavalue =~ s/\<(.+?)\>//) { 211 my $keyword = $1; 212 $doc_obj->add_utf8_metadata($cursection, $metadatafieldname, $keyword); 213 $keywordlist .= ", " unless ($keywordlist eq ""); 214 $keywordlist .= $keyword; 242 # Parse the value off and add it as metadata 243 $raw_metadata_value =~ s/^([^\^]*)//; 244 my $sub_metadata_value = $1; 245 246 # Deal with the case when multiple values are specified using <...> 247 if ($sub_metadata_value =~ /\<(.*)\>$/) { 248 my $sub_sub_metadata_name = $sub_metadata_name . "^sub"; 249 my $tmp_sub_metadata_value = $sub_metadata_value; 250 while ($tmp_sub_metadata_value =~ s/\<(.*?)\>//) { 251 my $sub_sub_metadata_value = $1; 252 $doc_obj->add_utf8_metadata($section, $sub_sub_metadata_name, $sub_sub_metadata_value); 215 253 } 216 217 $metadatafieldvalue = $keywordlist;218 254 } 219 255 220 256 # Escape any '<' and '>' characters so they appear correctly in the final collection 221 $ metadatafieldvalue =~ s/\</</g;222 $ metadatafieldvalue =~ s/\>/>/g;223 224 # We have already added Keywords metadata above225 unless ($metadatafieldname eq "Keywords") {226 $doc_obj->add_utf8_metadata($ cursection, $metadatafieldname, $metadatafieldvalue);257 $sub_metadata_value =~ s/\</</g; 258 $sub_metadata_value =~ s/\>/>/g; 259 260 # print STDERR "Sub metadata name: $sub_metadata_name, value: $sub_metadata_value\n"; 261 if ($sub_metadata_name ne $metadata_name) { 262 $doc_obj->add_utf8_metadata($section, $sub_metadata_name, $sub_metadata_value); 227 263 } 228 264 229 $ completeentryvalue .= $subfield_separator unless ($completeentryvalue eq "");230 $ completeentryvalue .= $metadatafieldvalue;265 $metadata_value .= $subfield_separator unless ($metadata_value eq ""); 266 $metadata_value .= $sub_metadata_value; 231 267 } 232 268 233 $completetagvalue .= $completeentryvalue; 234 } 235 236 $doc_obj->add_utf8_metadata($cursection, $tagname . "^all", $completetagvalue); 269 # Add the metadata value 270 # print STDERR "Metadata name: $metadata_name, value: $metadata_value\n"; 271 $doc_obj->add_utf8_metadata($section, $metadata_name, $metadata_value); 272 273 $all_metadata_value .= $entry_separator unless ($all_metadata_value eq ""); 274 $all_metadata_value .= $metadata_value; 275 } 276 277 # Add the "^all" metadata value 278 # print STDERR "All metadata name: $all_metadata_name, value: $all_metadata_value\n"; 279 $doc_obj->add_utf8_metadata($section, $all_metadata_name, $all_metadata_value); 237 280 } 238 281 … … 240 283 $$textref =~ s/\</</g; 241 284 $$textref =~ s/\>/>/g; 242 $doc_obj->add_utf8_text($ cursection, $$textref);285 $doc_obj->add_utf8_text($section, $$textref); 243 286 244 287 # Add FileFormat metadata 245 $doc_obj->add_utf8_metadata($ cursection, "FileFormat", "CDS/ISIS");246 247 # Record was processed successfully (and there was no document obtained)288 $doc_obj->add_utf8_metadata($section, "FileFormat", "CDS/ISIS"); 289 290 # Record was processed successfully 248 291 return 1; 249 292 } … … 270 313 271 314 if ($amongstdefinitions) { 272 my $field title= substr($fdtfileline, 0, 30);315 my $fieldname = substr($fdtfileline, 0, 30); 273 316 my $fieldsubfields = substr($fdtfileline, 30, 20); 274 317 my $fieldspecs = substr($fdtfileline, 50); 275 318 276 319 # Remove extra spaces 277 $field title =~ s/(\s*)$//;320 $fieldname =~ s/(\s*)$//; 278 321 $fieldsubfields =~ s/(\s*)$//; 279 280 # Map from tag number to metadata field title and subfields 281 my ($fieldtag) = ($fieldspecs =~ /^\s*(\d+)\s+/); 282 $fdtmapping{$fieldtag} = { 'title' => $fieldtitle, 283 'subfields' => $fieldsubfields }; 322 $fieldspecs =~ s/(\s*)$//; 323 324 # Map from tag number to metadata field title, subfields, and repeatability 325 my $fieldtag = (split(/ /, $fieldspecs))[0]; 326 my $fieldrepeatable = (split(/ /, $fieldspecs))[3]; 327 $fdtmapping{$fieldtag} = { 'name' => $fieldname, 328 'subfields' => $fieldsubfields, 329 'repeatable' => $fieldrepeatable }; 284 330 } 285 331 elsif ($fdtfileline eq "***") {
Note:
See TracChangeset
for help on using the changeset viewer.