Changeset 13496 for trunk/gsdl/perllib/plugins/MARCXMLPlug.pm
- Timestamp:
- 2006-12-12T12:12:39+13:00 (17 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/MARCXMLPlug.pm
r13486 r13496 71 71 $self->{'metadata_mapping'} = undef; 72 72 $self->{'num_processed'} = 0; 73 73 $self->{'indent'} = 0; 74 74 75 return bless $self, $class; 75 76 } … … 168 169 my $file = $self->{'file'}; 169 170 my $filename = $self->{'filename'}; 170 171 171 172 my ($language, $encoding) = $self->textcat_get_language_encoding ($filename); 172 173 … … 174 175 $self->{'encoding'} = $encoding; 175 176 $self->{'element_count'} = 1; 176 177 $self->{'indent'} = 0; 177 178 my $outhandle = $self->{'outhandle'}; 178 179 print $outhandle "MARCXMLPlug: processing $self->{'file'}\n" if $self->{'verbosity'} > 1; … … 189 190 my $expat = shift; 190 191 my $element = shift; 192 193 my $text = $self->escape_text($_); 191 194 192 195 $self->{'current_element'} = $element; 193 194 if ($element ne "collection"){195 $self->{'content'} .= $_;196 }197 196 198 197 ##get all atributes of this element and store it in a map name=>value … … 201 200 while ($attrstring =~ /(\w+)=\"(\w+)\"/){ 202 201 $attr_map{$1}=$2; 203 $attrstring = $'; 202 $attrstring = $'; #' 204 203 } 205 204 … … 209 208 ##create a new document for each record 210 209 if ($element eq "record") { 211 210 my $filename = $self->{'filename'}; 212 211 my $language = $self->{'language'}; 213 212 my $encoding = $self->{'encoding'}; … … 240 239 ## get the marc code, for example 520 241 240 if ($element eq "datafield") { 242 if (defined $attr_map{'tag'} and $attr_map{'tag'} ne ""){241 if (defined $attr_map{'tag'} and $attr_map{'tag'} ne ""){ 243 242 $self->{'current_tag'} = $attr_map{tag}; 244 243 } … … 248 247 ## append the subcode to the marc code for example 520a or 520b 249 248 if ($element eq "subfield"){ 250 if (defined $attr_map{'code'} and $attr_map{'code'} ne "" and $self->{'current_tag'} ne ""){249 if (defined $attr_map{'code'} and $attr_map{'code'} ne "" and $self->{'current_tag'} ne ""){ 251 250 $self->{'current_code'} = $attr_map{'code'}; 252 251 } 253 252 } 253 254 if ($element eq "record"){ 255 $self->{'indent'} = 0; 256 } 257 else { 258 if ($element ne "subfield"){ 259 $self->{'indent'} = 1; 260 } 261 else{ 262 $self->{'indent'} = 2; 263 } 264 } 265 266 267 if ($element ne "collection"){ 268 $self->{'content'} .= "<br/>".$self->calculate_indent($self->{'indent'}).$text; 269 } 254 270 } 255 271 … … 259 275 my $self = shift(@_); 260 276 my ($expat, $element) = @_; 261 262 $self->{'content'} .= $_; 263 277 my $text = $self->escape_text($_); 278 264 279 if ($element eq "record" and defined $self->{'doc_obj'}) { 265 280 # process the document 266 281 my $processor = $self->{'processor'}; 267 282 my $doc_obj = $self->{'doc_obj'}; 268 $doc_obj->add_utf8_text($doc_obj->get_top_section(), $self->{'content'}); 283 $self->{'content'} .= "<br/>".$text; 284 285 $doc_obj->add_utf8_text($doc_obj->get_top_section(),$self->{'content'}); 269 286 $processor->process($doc_obj); 270 287 … … 272 289 $self->{'content'} = ""; 273 290 $self->{'doc_obj'} = undef; 291 return; 274 292 } 275 293 … … 322 340 $self->{'current_tag'} = ""; 323 341 } 342 343 if ($element eq "datafield"){ 344 $self->{'indent'} = 1; 345 $self->{'content'} .= "<br/>".$self->calculate_indent($self->{'indent'}).$text; 346 } 347 else{ 348 $self->{'content'} .= $text; 349 } 350 324 351 } 325 352 … … 345 372 } 346 373 347 $self->{'content'} .= $_;374 $self->{'content'} .=$self->escape_text($_); 348 375 349 376 } 350 377 378 sub calculate_indent{ 379 my ($self,$num) = @_; 380 381 my $indent =""; 382 383 for (my $i=0; $i<$num;$i++){ 384 $indent .= " "; 385 } 386 387 return $indent; 388 389 } 390 391 sub escape_text { 392 my ($self,$text) = @_; 393 # special characters in the xml encoding 394 $text =~ s/&/&/g; # this has to be first... 395 $text =~ s/</</g; 396 $text =~ s/>/>/g; 397 $text =~ s/\"/"/g; 398 399 return $text; 400 } 401 351 402 352 403 1;
Note:
See TracChangeset
for help on using the changeset viewer.