Changeset 16390
- Timestamp:
- 2008-07-14T14:54:58+12:00 (15 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/plugins/BasePlugin.pm
r16022 r16390 76 76 'deft' => "", 77 77 'reqd' => "no" }, 78 { 'name' => "no_blocking", 79 'desc' => "{BasePlugin.no_blocking}", 80 'type' => "flag", 81 'reqd' => "no"}, 78 82 { 'name' => "block_exp", 79 83 'desc' => "{BasePlugin.block_exp}", 80 84 'type' => "regexp", 81 85 'deft' => "", 82 'reqd' => "no" },83 { 'name' => "smart_block",84 'desc' => "{BasePlugin.smart_block}",85 'type' => "flag",86 86 'reqd' => "no" }, 87 87 { 'name' => "associate_ext", … … 107 107 'deft' => "auto", 108 108 'list' => $encoding_plus_auto_list, 109 'reqd' => "no" } 109 'reqd' => "no" }, 110 { 'name' => "smart_block", 111 'desc' => "{BasePlugin.smart_block}", 112 'type' => "flag", 113 'reqd' => "no", 114 'hiddengli' => "yes" } # deprecated, but leave in for old collections 115 110 116 111 117 ]; … … 129 135 130 136 my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists); 137 138 if ($self->{'info_only'}) { 139 # don't worry about any options etc 140 return bless $self, $class; 141 } 142 143 if ($self->{'smart_block'}) { 144 print STDERR "WARNING: -smart_block option has been deprecated and is no longer useful\n"; 145 } 146 $self->{'smart_block'} = undef; 131 147 132 148 my $plugin_name = (defined $pluginlist->[0]) ? $pluginlist->[0] : $class; … … 161 177 } 162 178 163 $self->{'shared_fileroot'} = {};164 $self->{'file_blocks'} = {};165 166 167 179 return bless $self, $class; 168 180 … … 181 193 $self->{'outhandle'} = $outhandle if defined $outhandle; 182 194 $self->{'failhandle'} = $failhandle; 183 195 # $self->SUPER::init(@_); 196 184 197 # set process_exp and block_exp to defaults unless they were 185 198 # explicitly set … … 245 258 } 246 259 247 # default implementation is to do nothing .248 sub store_block_files 249 { 260 # default implementation is to do nothing 261 sub store_block_files { 262 250 263 my $self =shift (@_); 251 my ($filename) = @_; 252 return; 264 my ($filename_full_path, $block_hash) = @_; 265 266 } 267 268 # put files to block into hash 269 sub use_block_expressions { 270 271 my $self =shift (@_); 272 my ($filename_full_path, $block_hash) = @_; 273 274 if ($self->{'block_exp'} ne "" && $filename_full_path =~ /$self->{'block_exp'}/) { 275 $block_hash->{'file_blocks'}->{$filename_full_path} = 1; 276 } 277 253 278 } 254 279 … … 257 282 { 258 283 my $self =shift; 259 my $filename = shift;284 my ($filename, $block_hash) = @_; 260 285 261 286 if ($self->{'cover_image'}) { … … 266 291 } 267 292 if (-e $coverfile) { 268 $ self->{'file_blocks'}->{$coverfile} = 1;293 $block_hash->{'file_blocks'}->{$coverfile} = 1; 269 294 } 270 295 } … … 273 298 } 274 299 275 sub root_ext_split 276 { 277 my $self = shift (@_); 278 my ($filename,$tail_re) = @_; 279 280 my ($file_prefix,$file_ext) = ($filename =~ m/^(.*?)($tail_re)$/); 281 282 if ((!defined $file_prefix) || (!defined $file_ext)) { 283 ($file_prefix,$file_ext) = ($filename =~ m/^(.*)(\..*?)$/); 284 } 285 286 return ($file_prefix,$file_ext); 287 } 288 289 sub metadata_read { 300 301 # discover all the files that should be blocked by this plugin 302 # check the args ... 303 sub file_block_read { 304 290 305 my $self = shift (@_); 291 my ($pluginfo, $base_dir, $file, $ metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_;306 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_; 292 307 # Keep track of filenames with same root but different extensions 293 308 # Used to support -associate_ext and the more generalised 294 309 # -associate_tail_re 310 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 295 311 296 312 my $associate_tail_re = $self->{'associate_tail_re'}; 297 313 if ((defined $associate_tail_re) && ($associate_tail_re ne "")) { 298 314 299 315 my ($file_prefix,$file_ext) 300 = $self->root_ext_split($file,$associate_tail_re);301 316 = &util::get_prefix_and_tail_by_regex($filename_full_path,$associate_tail_re); 317 302 318 if ((defined $file_prefix) && (defined $file_ext)) { 303 304 my $shared_fileroot = $self->{'shared_fileroot'}; 319 my $shared_fileroot = $block_hash->{'shared_fileroot'}; 305 320 if (!defined $shared_fileroot->{$file_prefix}) { 306 321 my $file_prefix_rec = { 'tie_to' => undef, … … 311 326 my $file_prefix_rec = $shared_fileroot->{$file_prefix}; 312 327 313 my $process_exp = $self->{'process_exp'}; 314 315 if ($file =~ m/$process_exp/) { 328 if ($self->can_process_this_file($filename_full_path)) { 316 329 # This is the document the others should be tied to 317 330 $file_prefix_rec->{'tie_to'} = $file_ext; … … 319 332 else { 320 333 if ($file_ext =~ m/$associate_tail_re$/) { 334 # this file should be associated to the main one 321 335 $file_prefix_rec->{'exts'}->{$file_ext} = 1; 322 336 } … … 326 340 } 327 341 342 # check block expressions 343 $self->use_block_expressions($filename_full_path, $block_hash) unless $self->{'no_blocking'}; 344 328 345 # now check whether we are actually processing this 329 my $filename = $file; 330 $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/; 331 if ($self->{'process_exp'} eq "" || $filename !~ /$self->{'process_exp'}/ || !-f $filename) { 346 if (!-f $filename_full_path || !$self->can_process_this_file($filename_full_path)) { 332 347 return undef; # can't recognise 333 348 } 334 335 # do smart blocking if appropriate 336 if ($self->{'smart_block'}) { 337 $self->store_block_files($filename); 338 } 349 350 $self->store_block_files($filename_full_path, $block_hash) unless $self->{'no_blocking'}; 351 339 352 # block the cover image if there is one 340 353 if ($self->{'cover_image'}) { 341 $self->block_cover_image($filename );354 $self->block_cover_image($filename_full_path, $block_hash) unless $self->{'no_blocking'}; 342 355 } 343 356 … … 345 358 } 346 359 347 sub tie_to_filename 348 { 349 my $self = shift (@_); 350 351 my ($file_ext,$file_prefix_rec) = @_; 352 353 if (defined $file_prefix_rec) { 354 my $tie_to = $file_prefix_rec->{'tie_to'}; 355 356 if (defined $tie_to) { 357 if ($tie_to eq $file_ext) { 358 return 1; 359 } 360 } 361 } 362 360 # plugins that rely on more than process_exp (eg XML plugins) can override this method 361 sub can_process_this_file { 362 my $self = shift(@_); 363 my ($filename) = @_; 364 365 if ($self->{'process_exp'} ne "" && $filename =~ /$self->{'process_exp'}/) { 366 return 1; 367 } 363 368 return 0; 364 } 365 366 sub tie_to_assoc_file 367 { 368 my $self = shift (@_); 369 my ($file_ext,$file_prefix_rec) = @_; 370 371 if (defined $file_prefix_rec) { 372 my $tie_to = $file_prefix_rec->{'tie_to'}; 373 if (defined $tie_to) { 374 375 my $exts = $file_prefix_rec->{'exts'}; 376 377 my $has_file_ext = $exts->{$file_ext}; 378 379 if ($has_file_ext) { 380 return 1; 381 } 382 } 383 } 384 385 return 0; 386 } 387 388 389 sub associate_with 390 { 391 my $self = shift (@_); 392 my ($file, $filename, $metadata) = @_; 393 394 my $associate_tail_re = $self->{'associate_tail_re'}; 395 return 0 if (!$associate_tail_re); 396 397 # If file, see if matches with "tie_to" doc or is one of the 398 # associated filename extensions. 399 400 my ($file_prefix,$file_ext) = $self->root_ext_split($file,$associate_tail_re); 401 402 if ((defined $file_prefix) && (defined $file_ext)) { 403 404 my $file_prefix_rec = $self->{'shared_fileroot'}->{$file_prefix}; 405 406 if ($self->tie_to_filename($file_ext,$file_prefix_rec)) { 407 408 # Set up gsdlassocfile_tobe 409 410 my $exts = $file_prefix_rec->{'exts'}; 411 412 if (!defined $metadata->{'gsdlassocfile_tobe'}) { 413 $metadata->{'gsdlassocfile_tobe'} = []; 414 } 415 416 my $assoc_tobe = $metadata->{'gsdlassocfile_tobe'}; 417 418 my ($full_prefix) = ($filename =~ m/^(.*)\..*?$/); 419 foreach my $e (keys %$exts) { 420 my $assoc_file = "$full_prefix$e"; 421 print STDERR " $self->{'plugin_type'}: Associating $file_prefix$e with $file_prefix_rec->{'tie_to'} version\n"; 422 my $mime_type = ""; # let system auto detect this 423 push(@$assoc_tobe,"$assoc_file:$mime_type:"); 424 } 425 426 } 427 elsif ($self->tie_to_assoc_file($file_ext,$file_prefix_rec)) { 428 429 430 # a form of smart block 431 return 1; 432 } 433 } 434 435 return 0; 436 } 437 438 sub get_full_filenames { 439 my $self = shift (@_); 440 my ($base_dir, $file) = @_; 441 442 my $filename_full_path = $file; 443 # add on directory if present 444 $filename_full_path = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/; 445 my $filename_no_path = $file; 446 # remove directory if present 447 $filename_no_path =~ s/^.*[\/\\]//; 448 return ($filename_full_path, $filename_no_path); 449 } 450 451 sub read_block { 452 my $self = shift (@_); 453 454 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 455 456 457 my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file); 458 459 if ($self->associate_with($file,$filename_full_path,$metadata)) { 460 # a form of smart block 461 $self->{'num_blocked'} ++; 462 return (0,undef); # blocked 463 } 464 465 my $smart_block = $self->{'smart_block'}; 466 my $smart_block_BN = $self->{'smart_block_BN'}; 467 468 if ($smart_block || $smart_block_BN) { 469 if (defined $self->{'file_blocks'}->{$filename_full_path} && $self->{'file_blocks'}->{$filename_full_path} == 1){ 470 $self->{'num_blocked'} ++; 471 return (0,undef); # blocked 472 } 473 } else { 474 if ($self->{'block_exp'} ne "" && $filename_full_path =~ /$self->{'block_exp'}/) { 475 $self->{'num_blocked'} ++; 476 return (0,undef); # blocked 477 } 478 if ($self->{'cover_image'}) { 479 if (defined $self->{'file_blocks'}->{$filename_full_path} && $self->{'file_blocks'}->{$filename_full_path} == 1){ 480 $self->{'num_blocked'} ++; 481 return (0,undef); # blocked 482 } 483 } 484 } 485 486 if ($filename_full_path !~ /$self->{'process_exp'}/ || !-f $filename_full_path) { 487 return (undef,undef); # can't recognise 488 } 489 490 ##why are we returning the full filename - do we need this?? 491 return (1,$filename_full_path); 492 } 493 494 495 #filename_encoding set by user 496 sub filename_to_utf8_metadata 497 { 369 370 } 371 372 # just converts path as is to utf8. 373 sub filepath_to_utf8 { 498 374 my $self = shift (@_); 499 375 my ($file, $file_encoding) = @_; 500 501 my $outhandle = $self->{'outhandle'}; 502 503 my ($filemeta) = $file =~ /([^\\\/]+)$/; # getting the tail of the filepath (skips all string parts containing slashes upto the end) 376 my $filemeta = $file; 504 377 505 378 my $filename_encoding = $self->{'filename_encoding'}; … … 529 402 ); 530 403 } 404 405 return $filemeta; 406 } 407 408 # gets the filename with no path, converts to utf8, and then dm safes it. 409 #filename_encoding set by user 410 sub filename_to_utf8_metadata 411 { 412 my $self = shift (@_); 413 my ($file, $file_encoding) = @_; 414 415 my $outhandle = $self->{'outhandle'}; 416 417 my ($filemeta) = $file =~ /([^\\\/]+)$/; # getting the tail of the filepath (skips all string parts containing slashes upto the end) 418 $filemeta = $self->filepath_to_utf8($filemeta, $file_encoding); 419 531 420 my $dmsafe_filemeta = &ghtml::dmsafe($filemeta); 532 421 … … 649 538 sub read_into_doc_obj { 650 539 my $self = shift (@_); 651 my ($pluginfo, $base_dir, $file, $ metadata, $processor, $maxdocs, $total_count, $gli) = @_;540 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 652 541 653 542 my $outhandle = $self->{'outhandle'}; … … 658 547 if $self->{'verbosity'} > 1; 659 548 660 my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file);549 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 661 550 # create a new document 662 551 my $doc_obj = new doc ($filename_full_path, "indexed_doc"); … … 724 613 } 725 614 615 # implement this if you are extracting metadata for other documents 616 sub metadata_read { 617 my $self = shift (@_); 618 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_; 619 620 # can we process this file?? 621 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 622 return undef unless $self->can_process_this_file($filename_full_path); 623 624 return 1; # we recognise the file, but don't actually do anything with it 625 } 626 627 726 628 # The BasePlugin read() function. This function calls read_into_doc_obj() 727 629 # to ensure all the right things to make general options work for a … … 741 643 sub read { 742 644 my $self = shift (@_); 743 my ($pluginfo, $base_dir, $file, $ metadata, $processor, $maxdocs, $total_count, $gli) = @_;744 745 # c heck that we are not blocked746 my ($ block_status,$filename) = $self->read_block(@_);747 return $block_status if ((!defined $block_status) || ($block_status==0));748 645 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 646 647 # can we process this file?? 648 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 649 return undef unless $self->can_process_this_file($filename_full_path); 650 749 651 my ($process_status,$doc_obj) = $self->read_into_doc_obj(@_); 750 652 … … 771 673 772 674 gsprintf(STDERR, "BasePlugin::process {common.must_be_implemented}\n") && die "\n"; 773 # die "BasePlugin::process function must be implemented in sub-class\n";774 675 775 676 return undef; # never gets here … … 781 682 782 683 } 684 783 685 # write_file -- used by ConvertToPlug, for example in post processing 784 686 # … … 848 750 # need to be associated with a document, but the document hasn't 849 751 # been formed yet. 850 851 752 my $equiv_form = ""; 852 753 foreach my $gaf (@{$metadata->{$field}}) { … … 854 755 my ($tail_filename) = ($full_filename =~ /^.*[\/\\](.+?)$/); 855 756 my $filename = $full_filename; 856 857 757 $doc_obj->associate_file($full_filename,$tail_filename,$mimetype); 858 758 … … 860 760 861 761 my ($file_prefix,$file_extended_ext) 862 = $self->root_ext_split($tail_filename,$associate_tail_re);762 = &util::get_prefix_and_tail_by_regex($tail_filename,$associate_tail_re); 863 763 my ($pre_doc_ext) = ($file_extended_ext =~ m/^(.*)\..*$/); 864 764 … … 943 843 944 844 845 945 846 1;
Note:
See TracChangeset
for help on using the changeset viewer.