Changeset 11090

Show
Ignore:
Timestamp:
24.01.2006 10:29:34 (14 years ago)
Author:
kjdon
Message:

made all plugins that implement read() call read_block to check process_exp, block_exp, smart blocking, cover image blocking etc

Location:
trunk/gsdl/perllib/plugins
Files:
18 modified

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/ConvertToPlug.pm

    r11008 r11090  
    331331    my $self = shift (@_); 
    332332    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
    333     #if ($self->is_recursive()) { 
    334     #   die "BasPlug::read function must be implemented in sub-class for recursive plugins\n"; 
    335     # } 
    336333 
    337334    my $outhandle = $self->{'outhandle'}; 
  • trunk/gsdl/perllib/plugins/ConvertToRogPlug.pm

    r10254 r11090  
    313313    my $self = shift (@_); 
    314314    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
    315 #    if ($self->is_recursive()) { 
    316 #        die "BasPlug::read function must be implemented in sub-class for recursive plugins\n"; 
    317 #    } 
    318315 
    319316    my $outhandle = $self->{'outhandle'}; 
    320317 
    321     my $filename = $file; 
    322     $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/; 
    323  
    324     if ($self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/) { 
    325     $self->{'num_blocked'} ++; 
    326     return 0; 
    327     } 
    328     if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) { 
    329         return undef; 
    330     } 
     318    # check process_exp, block_exp, associate_ext etc 
     319    my ($block_status,$filename) = $self->read_block(@_);     
     320    return $block_status if ((!defined $block_status) || ($block_status==0)); 
     321  
    331322    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up 
    332323 
  • trunk/gsdl/perllib/plugins/DBPlug.pm

    r10956 r11090  
    8888 
    8989    # see if we can handle the passed file... 
    90     my $filename = $file; 
    91     $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/; 
    92     if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) { 
    93     # this plugin can't process this file type... 
    94         return undef; 
    95     } 
     90    my ($block_status,$filename) = $self->read_block(@_);     
     91    return $block_status if ((!defined $block_status) || ($block_status==0)); 
    9692 
    9793    my $outhandle = $self->{'outhandle'}; 
  • trunk/gsdl/perllib/plugins/FOXPlug.pm

    r10254 r11090  
    6969} 
    7070 
    71 sub is_recursive { 
    72     my $self = shift (@_); 
    73  
    74     return 0; # this is not a recursive plugin 
    75 } 
    76  
     71sub get_default_process_exp { 
     72    my $self = shift (@_); 
     73 
     74    return q^(?i)\.dbf$^; 
     75} 
     76 
     77#dbt files are processed at the same time as dbf files 
     78sub get_default_block_exp { 
     79    my $self = shift (@_); 
     80 
     81    return q^(?i)\.dbt$^; 
     82} 
    7783 
    7884# return number of files processed, undef if can't process 
     
    8288    my $self = shift (@_); 
    8389    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
    84     my $fullname = &util::filename_cat ($base_dir, $file); 
    85  
    86     # dbt files are processed at the same time as dbf files 
    87     return 0 if ($fullname =~ /\.dbt$/i); 
    88  
    89     # see if this is a foxbase database 
    90     return undef unless (-f $fullname && $fullname =~ /\.dbf$/i); 
     90  
     91    #check for associate_ext, blocking etc, are we processing this file? 
     92    my ($block_status,$fullname) = $self->read_block(@_);     
     93    return $block_status if ((!defined $block_status) || ($block_status==0)); 
     94 
     95    print STDERR "<Processing n='$file' p='FOXPlug'>\n" if ($gli); 
     96    print STDERR "FOXPlug: processing $file\n" if $self->{'verbosity'} > 1; 
    9197 
    9298    my ($parent_dir) = $fullname =~ /^(.*)\/[^\/]+\.dbf$/i; 
     
    100106    return -1; # error in processing 
    101107    } 
    102  
    103     print STDERR "<Processing n='$file' p='FOXPlug'>\n" if ($gli); 
    104      
    105     print STDERR "FOXPlug: processing $file\n"; 
    106108 
    107109    # read in the database header 
  • trunk/gsdl/perllib/plugins/GMLPlug.pm

    r10254 r11090  
    8282    my $outhandle = $self->{'outhandle'}; 
    8383 
    84     my $filename = $file; 
    85     $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/; 
    86  
    87     return 0 if $self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/; 
    88     if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) { 
    89     return undef; 
    90     } 
     84    #check process and block exps, smart block, etc 
     85    my ($block_status,$filename) = $self->read_block(@_);     
     86    return $block_status if ((!defined $block_status) || ($block_status==0)); 
     87 
    9188    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up 
    9289 
  • trunk/gsdl/perllib/plugins/ImagePlug.pm

    r10254 r11090  
    337337 
    338338 
    339 # The ImagePlug read() function. This function does all the right things 
    340 # to make general options work for a given plugin. It calls the process() 
    341 # function which does all the work specific to a plugin (like the old 
    342 # read functions used to do). Most plugins should define their own  
    343 # process() function and let this read() function keep control. 
    344 # 
     339# The ImagePlug read() function.  
    345340# ImagePlug overrides read() because there is no need to read the actual  
    346341# text of the file in, because the contents of the file is not text... 
     
    356351    my $outhandle = $self->{'outhandle'}; 
    357352 
    358     my $filename = &util::filename_cat($base_dir, $file); 
    359     return 0 if $self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/; 
    360     if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) { 
    361     return undef; 
    362     } 
     353    #check process and block exps, smart block, etc 
     354    my ($block_status,$filename) = $self->read_block(@_);     
     355    return $block_status if ((!defined $block_status) || ($block_status==0)); 
    363356 
    364357    print STDERR "<Processing n='$file' p='ImagePlug'>\n" if ($gli); 
  • trunk/gsdl/perllib/plugins/MP3Plug.pm

    r10347 r11090  
    307307    my $outhandle = $self->{'outhandle'}; 
    308308 
    309     # Make sure we're processing the correct file 
    310     my $filename = &util::filename_cat($base_dir, $file); 
    311     return 0 if $self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/; 
    312     if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) { 
    313     return undef; 
    314     } 
     309    #check for associate_ext, blocking etc 
     310    my ($block_status,$filename) = $self->read_block(@_);     
     311    return $block_status if ((!defined $block_status) || ($block_status==0)); 
     312 
    315313    print STDERR "<Processing n='$file' p='MP3Plug'>\n" if ($gli); 
    316314    print $outhandle "MP3Plug processing \"$filename\"\n" 
  • trunk/gsdl/perllib/plugins/NULPlug.pm

    r10978 r11090  
    9595    my $outhandle = $self->{'outhandle'}; 
    9696 
    97     # Make sure we're processing the correct file 
    98     my $filename = &util::filename_cat($base_dir, $file); 
    99     return 0 if $self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/; 
    100     if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) { 
    101     return undef; 
    102     } 
     97    #check for associate_ext, blocking etc 
     98    my ($block_status,$filename) = $self->read_block(@_);     
     99    return $block_status if ((!defined $block_status) || ($block_status==0)); 
     100 
    103101    print STDERR "<Processing n='$file' p='NULPlug'>\n" if ($gli); 
    104102    print $outhandle "NULPlug processing \"$filename\"\n" 
  • trunk/gsdl/perllib/plugins/OggVorbisPlug.pm

    r10254 r11090  
    8787    my $outhandle = $self->{'outhandle'}; 
    8888 
    89     # filename is the full pathname of the file 
    90     my $filename = &util::filename_cat($base_dir, $file); 
    91     return 0 if $self->{block_exp} ne "" && $filename =~ /$self->{'block_exp'}/; 
    92     if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) { 
    93     return undef; 
    94     } 
     89    #check process and block exps, smart block, etc 
     90    my ($block_status,$filename) = $self->read_block(@_);     
     91    return $block_status if ((!defined $block_status) || ($block_status==0)); 
    9592 
    96     # Report that we're processing the file 
     93     # Report that we're processing the file 
    9794    print STDERR "<Processing n='$file' p='OggVorbisPlug'>\n" if ($gli); 
    9895    print $outhandle "OggVorbisPlug: processing $file\n" 
  • trunk/gsdl/perllib/plugins/OpenDocumentPlug.pm

    r10997 r11090  
    148148    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
    149149 
    150     my $filename = $file; 
    151     $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/; 
    152  
    153     if ($self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/) { 
    154     $self->{'num_blocked'} ++; 
    155     return 0; 
    156     } 
    157     if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) { 
    158     return undef; 
    159     } 
     150    # check process and block exps, smart block, associate_ext etc 
     151    my ($block_status,$filename) = $self->read_block(@_);     
     152    return $block_status if ((!defined $block_status) || ($block_status==0)); 
    160153 
    161154    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up 
  • trunk/gsdl/perllib/plugins/PagedImgPlug.pm

    r10613 r11090  
    539539    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
    540540    my $outhandle   = $self->{'outhandle'}; 
    541     my $smart_block = $self->{'smart_block'}; 
    542      
    543     my $filename = &util::filename_cat($base_dir, $file); 
    544      
    545     if ($self->associate_with($file,$filename,$metadata)) { 
    546     # a form of smart block 
    547     $self->{'num_blocked'} ++; 
    548     return 0; # blocked 
    549     } 
    550      
    551     if ($smart_block) { 
    552     if (defined $self->{'file_blocks'}->{$filename} && $self->{'file_blocks'}->{$filename} == 1){ 
    553         $self->{'num_blocked'} ++; 
    554         return 0; # blocked 
    555     } 
    556     } elsif ($self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/) { 
    557     $self->{'num_blocked'} ++; 
    558     return 0; # blocked 
    559     } 
    560      
    561     if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) { 
    562     return undef; 
    563     } 
     541     
     542    #check process and block exps, smart block, etc 
     543    my ($block_status,$filename) = $self->read_block(@_);     
     544    return $block_status if ((!defined $block_status) || ($block_status==0)); 
    564545     
    565546    print $outhandle "PagedImgPlug processing \"$filename\"\n" 
     
    731712    # assign pagenum as  what?? 
    732713    my $pagenum = $_{'pagenum'}; #TODO!! 
    733     $doc_obj->set_utf8_metadata_element($self->{'current_section'}, 'PageNum', $pagenum); 
     714    if (defined $pagenum) { 
     715        $doc_obj->set_utf8_metadata_element($self->{'current_section'}, 'PageNum', $pagenum); 
     716    } 
    734717    my ($imgfile) = $_{'imgfile'}; 
    735718    if (defined $imgfile) { 
     
    832815    $doc_obj->set_OIDtype ($processor->{'OIDtype'}); 
    833816    my $topsection = $doc_obj->get_top_section(); 
     817    $doc_obj->add_utf8_metadata($topsection, "Plugin", "$self->{'plugin_type'}"); 
     818    $doc_obj->add_metadata($topsection, "FileFormat", "PagedImg"); 
    834819 
    835820    if ($self->{'documenttype'} eq 'paged') { 
     
    849834    next unless $line =~ /\w/; 
    850835    chomp $line; 
     836    next if $line =~ /^#/; # ignore comment lines 
    851837    if ($line =~ /^<([^>]*)>\s*(.*?)\s*$/) { 
    852838        $doc_obj->set_utf8_metadata_element ($topsection, $1, $2); 
  • trunk/gsdl/perllib/plugins/RealMediaPlug.pm

    r10395 r11090  
    8686    my $outhandle = $self->{'outhandle'}; 
    8787 
    88     # filename is the full pathname of the file 
    89     my $filename = &util::filename_cat($base_dir, $file); 
    90     return 0 if $self->{block_exp} ne "" && $filename =~ /$self->{'block_exp'}/; 
    91     if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) { 
    92     return undef; 
    93     } 
     88    #check process and block exps, smart block, etc 
     89    my ($block_status,$filename) = $self->read_block(@_);     
     90    return $block_status if ((!defined $block_status) || ($block_status==0)); 
    9491 
    9592    # Report that we're processing the file 
  • trunk/gsdl/perllib/plugins/RogPlug.pm

    r10254 r11090  
    6464 
    6565    return bless $self, $class; 
    66 } 
    67  
    68 sub is_recursive { 
    69     my $self = shift (@_); 
    70  
    71     return 0; # this is not a recursive plugin 
    7266} 
    7367 
  • trunk/gsdl/perllib/plugins/SplitPlug.pm

    r10254 r11090  
    9393    $self->{'process_exp'} = $self->get_default_process_exp (); 
    9494    if ($self->{'process_exp'} eq "") { 
    95         warn ref($self) . " Warning: Non-recursive plugin has no process_exp\n"; 
     95        warn ref($self) . " Warning: plugin has no process_exp\n"; 
    9696    } 
    9797    } 
     
    185185    my $verbosity = $self->{'verbosity'}; 
    186186 
    187     # Figure out the exact filename of this file (and maybe block it) 
    188     my $filename = &util::filename_cat($base_dir, $file); 
    189     my $block_exp = $self->{'block_exp'}; 
    190     return 0 if $self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/; 
    191     if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) { 
    192     return undef; 
    193     } 
     187    #check process and block exps, smart block, etc 
     188    my ($block_status,$filename) = $self->read_block(@_);     
     189    return $block_status if ((!defined $block_status) || ($block_status==0)); 
     190 
    194191    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up 
    195192 
     
    228225    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "SourceSegment", "$segment"); 
    229226    if ($self->{'cover_image'}) { 
    230       $self->associate_cover_image($doc_obj, $filename); 
     227        $self->associate_cover_image($doc_obj, $filename); 
    231228    } 
    232229    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}"); 
  • trunk/gsdl/perllib/plugins/UnknownPlug.pm

    r10985 r11090  
    179179 
    180180    # Make sure we're processing the correct file 
    181     my $filename = &util::filename_cat($base_dir, $file); 
    182     return 0 if $self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/; 
    183     if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) { 
    184     return undef; 
    185     } 
     181    my ($block_status,$filename) = $self->read_block(@_);     
     182    return $block_status if ((!defined $block_status) || ($block_status==0)); 
     183 
    186184    print STDERR "<Processing n='$file' p='UnknownPlug'>\n" if ($gli); 
    187185    print $outhandle "UnknownPlug processing \"$filename\"\n" 
  • trunk/gsdl/perllib/plugins/W3ImgPlug.pm

    r10347 r11090  
    250250# image extraction done through read() 
    251251sub process { 
    252     my ($self, $textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_; 
     252    my ($self, $textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 
    253253    $self->{'imglist'} = (); 
    254254    if ( $self->{'index_pages'} ) { 
    255     my $ok = $self->SUPER::process($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj); 
     255    my $ok = $self->SUPER::process($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli); 
    256256    if ( ! $ok ) { return $ok } 
    257257    $self->{'htdoc_obj'} = $doc_obj; 
  • trunk/gsdl/perllib/plugins/XMLPlug.pm

    r10254 r11090  
    6969    $self = (defined $hashArgOptLists)? new BasPlug($pluginlist,$inputargs,$hashArgOptLists): new BasPlug($pluginlist,$inputargs); 
    7070 
     71    if ($self->{'info_only'}) { 
     72    # don't worry about any options etc 
     73    return bless $self, $class; 
     74    } 
     75 
    7176    my $parser = new XML::Parser('Style' => 'Stream', 
    7277                 'Handlers' => {'Char' => \&Char, 
     
    153158    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
    154159 
    155     my $filename = $file; 
    156     $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/; 
    157  
    158     if ($self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/) { 
    159     $self->{'num_blocked'} ++; 
    160     return 0; 
    161     } 
    162     if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) { 
    163     return undef; 
    164     } 
     160    # Make sure we're processing the correct file, do blocking etc 
     161    my ($block_status,$filename) = $self->read_block(@_);     
     162    return $block_status if ((!defined $block_status) || ($block_status==0)); 
    165163 
    166164    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up 
  • trunk/gsdl/perllib/plugins/ZIPPlug.pm

    r10254 r11090  
    8282    my $self = (defined $hashArgOptLists)? new BasPlug($pluginlist,$inputargs,$hashArgOptLists): new BasPlug($pluginlist,$inputargs); 
    8383 
    84     # BasPlug is explicitly set not to set process_exp if recursive plugin 
    85     # Not sure of this reasoning.  Want it to be set in ZIPPlug, so explicitly 
    86     # pass it in as default value 
    87      
    88     ## do we need this???? --kjdon 
    89     if (!$self->{'process_exp'}) { 
    90     $self->{'process_exp'} = get_default_process_exp(); 
    91     } 
    92  
    9384    return bless $self, $class; 
    9485} 
     
    113104    my $outhandle = $self->{'outhandle'}; 
    114105 
    115     if ($file =~ /$self->{'process_exp'}/) { 
    116  
    117     my $filename = $file; 
    118     $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/; 
    119     if (!-e $filename) { 
    120         print $outhandle "ZIPPLug: WARNING: $filename does not exist\n"; 
    121         return undef; 
    122     } 
     106    # check process_exp, block_exp, associate_ext etc 
     107    my ($block_status,$filename) = $self->read_block(@_);     
     108    return $block_status if ((!defined $block_status) || ($block_status==0)); 
    123109     
    124     my ($file_only) = $file =~ /([^\\\/]*)$/; 
    125     my $tmpdir = &util::get_tmp_filename (); 
    126     &util::mk_all_dir ($tmpdir); 
    127  
    128     print $outhandle "ZIPPlug: extracting $file_only to $tmpdir\n" 
    129         if $self->{'verbosity'} > 1; 
    130  
    131     # save current working directory 
    132     my $cwd = cwd(); 
    133     chdir ($tmpdir) || die "Unable to change to $tmpdir"; 
    134     &util::cp ($filename, $tmpdir); 
    135      
    136     if ($file =~ /\.bz$/i) { 
    137         $self->bunzip ($file_only); 
    138     } elsif ($file =~ /\.bz2$/i) { 
    139         $self->bunzip2 ($file_only); 
    140     } elsif ($file =~ /\.(zip|jar)$/i) { 
    141         $self->unzip ($file_only); 
    142     } elsif ($file =~ /\.tar$/i) { 
    143         $self->untar ($file_only); 
    144     } else { 
    145         $self->gunzip ($file_only); 
    146     } 
    147  
    148     chdir ($cwd) || die "Unable to change back to $cwd"; 
    149  
    150     my $numdocs = &plugin::read ($pluginfo, "", $tmpdir, $metadata, $processor, $maxdocs, $total_count, $gli); 
    151     &util::rm_r ($tmpdir); 
    152  
    153     $self->{'num_archives'} ++; 
    154  
    155     return $numdocs; 
    156  
     110    my ($file_only) = $file =~ /([^\\\/]*)$/; 
     111    my $tmpdir = &util::get_tmp_filename (); 
     112    &util::mk_all_dir ($tmpdir); 
     113     
     114    print $outhandle "ZIPPlug: extracting $file_only to $tmpdir\n" 
     115    if $self->{'verbosity'} > 1; 
     116     
     117    # save current working directory 
     118    my $cwd = cwd(); 
     119    chdir ($tmpdir) || die "Unable to change to $tmpdir"; 
     120    &util::cp ($filename, $tmpdir); 
     121     
     122    if ($file =~ /\.bz$/i) { 
     123    $self->bunzip ($file_only); 
     124    } elsif ($file =~ /\.bz2$/i) { 
     125    $self->bunzip2 ($file_only); 
     126    } elsif ($file =~ /\.(zip|jar)$/i) { 
     127    $self->unzip ($file_only); 
     128    } elsif ($file =~ /\.tar$/i) { 
     129    $self->untar ($file_only); 
    157130    } else { 
    158     return undef; 
     131    $self->gunzip ($file_only); 
    159132    } 
     133     
     134    chdir ($cwd) || die "Unable to change back to $cwd"; 
     135     
     136    my $numdocs = &plugin::read ($pluginfo, "", $tmpdir, $metadata, $processor, $maxdocs, $total_count, $gli); 
     137    &util::rm_r ($tmpdir); 
     138     
     139    $self->{'num_archives'} ++; 
     140     
     141    return $numdocs; 
     142     
    160143} 
    161144