Changeset 11090


Ignore:
Timestamp:
2006-01-24T10:29:34+13:00 (18 years ago)
Author:
kjdon
Message:

made all plugins that implement read() call read_block to check process_exp, block_exp, smart blocking, cover image blocking etc

Location:
trunk/gsdl/perllib/plugins
Files:
18 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/ConvertToPlug.pm

    r11008 r11090  
    331331    my $self = shift (@_);
    332332    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    333     #if ($self->is_recursive()) {
    334     #   die "BasPlug::read function must be implemented in sub-class for recursive plugins\n";
    335     # }
    336333
    337334    my $outhandle = $self->{'outhandle'};
  • trunk/gsdl/perllib/plugins/ConvertToRogPlug.pm

    r10254 r11090  
    313313    my $self = shift (@_);
    314314    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    315 #    if ($self->is_recursive()) {
    316 #        die "BasPlug::read function must be implemented in sub-class for recursive plugins\n";
    317 #    }
    318315
    319316    my $outhandle = $self->{'outhandle'};
    320317
    321     my $filename = $file;
    322     $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
    323 
    324     if ($self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/) {
    325     $self->{'num_blocked'} ++;
    326     return 0;
    327     }
    328     if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
    329         return undef;
    330     }
     318    # check process_exp, block_exp, associate_ext etc
     319    my ($block_status,$filename) = $self->read_block(@_);   
     320    return $block_status if ((!defined $block_status) || ($block_status==0));
     321 
    331322    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
    332323
  • trunk/gsdl/perllib/plugins/DBPlug.pm

    r10956 r11090  
    8888
    8989    # see if we can handle the passed file...
    90     my $filename = $file;
    91     $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
    92     if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
    93     # this plugin can't process this file type...
    94         return undef;
    95     }
     90    my ($block_status,$filename) = $self->read_block(@_);   
     91    return $block_status if ((!defined $block_status) || ($block_status==0));
    9692
    9793    my $outhandle = $self->{'outhandle'};
  • trunk/gsdl/perllib/plugins/FOXPlug.pm

    r10254 r11090  
    6969}
    7070
    71 sub is_recursive {
    72     my $self = shift (@_);
    73 
    74     return 0; # this is not a recursive plugin
    75 }
    76 
     71sub get_default_process_exp {
     72    my $self = shift (@_);
     73
     74    return q^(?i)\.dbf$^;
     75}
     76
     77#dbt files are processed at the same time as dbf files
     78sub get_default_block_exp {
     79    my $self = shift (@_);
     80
     81    return q^(?i)\.dbt$^;
     82}
    7783
    7884# return number of files processed, undef if can't process
     
    8288    my $self = shift (@_);
    8389    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    84     my $fullname = &util::filename_cat ($base_dir, $file);
    85 
    86     # dbt files are processed at the same time as dbf files
    87     return 0 if ($fullname =~ /\.dbt$/i);
    88 
    89     # see if this is a foxbase database
    90     return undef unless (-f $fullname && $fullname =~ /\.dbf$/i);
     90 
     91    #check for associate_ext, blocking etc, are we processing this file?
     92    my ($block_status,$fullname) = $self->read_block(@_);   
     93    return $block_status if ((!defined $block_status) || ($block_status==0));
     94
     95    print STDERR "<Processing n='$file' p='FOXPlug'>\n" if ($gli);
     96    print STDERR "FOXPlug: processing $file\n" if $self->{'verbosity'} > 1;
    9197
    9298    my ($parent_dir) = $fullname =~ /^(.*)\/[^\/]+\.dbf$/i;
     
    100106    return -1; # error in processing
    101107    }
    102 
    103     print STDERR "<Processing n='$file' p='FOXPlug'>\n" if ($gli);
    104    
    105     print STDERR "FOXPlug: processing $file\n";
    106108
    107109    # read in the database header
  • trunk/gsdl/perllib/plugins/GMLPlug.pm

    r10254 r11090  
    8282    my $outhandle = $self->{'outhandle'};
    8383
    84     my $filename = $file;
    85     $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
    86 
    87     return 0 if $self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/;
    88     if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
    89     return undef;
    90     }
     84    #check process and block exps, smart block, etc
     85    my ($block_status,$filename) = $self->read_block(@_);   
     86    return $block_status if ((!defined $block_status) || ($block_status==0));
     87
    9188    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
    9289
  • trunk/gsdl/perllib/plugins/ImagePlug.pm

    r10254 r11090  
    337337
    338338
    339 # The ImagePlug read() function. This function does all the right things
    340 # to make general options work for a given plugin. It calls the process()
    341 # function which does all the work specific to a plugin (like the old
    342 # read functions used to do). Most plugins should define their own
    343 # process() function and let this read() function keep control.
    344 #
     339# The ImagePlug read() function.
    345340# ImagePlug overrides read() because there is no need to read the actual
    346341# text of the file in, because the contents of the file is not text...
     
    356351    my $outhandle = $self->{'outhandle'};
    357352
    358     my $filename = &util::filename_cat($base_dir, $file);
    359     return 0 if $self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/;
    360     if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
    361     return undef;
    362     }
     353    #check process and block exps, smart block, etc
     354    my ($block_status,$filename) = $self->read_block(@_);   
     355    return $block_status if ((!defined $block_status) || ($block_status==0));
    363356
    364357    print STDERR "<Processing n='$file' p='ImagePlug'>\n" if ($gli);
  • trunk/gsdl/perllib/plugins/MP3Plug.pm

    r10347 r11090  
    307307    my $outhandle = $self->{'outhandle'};
    308308
    309     # Make sure we're processing the correct file
    310     my $filename = &util::filename_cat($base_dir, $file);
    311     return 0 if $self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/;
    312     if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
    313     return undef;
    314     }
     309    #check for associate_ext, blocking etc
     310    my ($block_status,$filename) = $self->read_block(@_);   
     311    return $block_status if ((!defined $block_status) || ($block_status==0));
     312
    315313    print STDERR "<Processing n='$file' p='MP3Plug'>\n" if ($gli);
    316314    print $outhandle "MP3Plug processing \"$filename\"\n"
  • trunk/gsdl/perllib/plugins/NULPlug.pm

    r10978 r11090  
    9595    my $outhandle = $self->{'outhandle'};
    9696
    97     # Make sure we're processing the correct file
    98     my $filename = &util::filename_cat($base_dir, $file);
    99     return 0 if $self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/;
    100     if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
    101     return undef;
    102     }
     97    #check for associate_ext, blocking etc
     98    my ($block_status,$filename) = $self->read_block(@_);   
     99    return $block_status if ((!defined $block_status) || ($block_status==0));
     100
    103101    print STDERR "<Processing n='$file' p='NULPlug'>\n" if ($gli);
    104102    print $outhandle "NULPlug processing \"$filename\"\n"
  • trunk/gsdl/perllib/plugins/OggVorbisPlug.pm

    r10254 r11090  
    8787    my $outhandle = $self->{'outhandle'};
    8888
    89     # filename is the full pathname of the file
    90     my $filename = &util::filename_cat($base_dir, $file);
    91     return 0 if $self->{block_exp} ne "" && $filename =~ /$self->{'block_exp'}/;
    92     if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
    93     return undef;
    94     }
     89    #check process and block exps, smart block, etc
     90    my ($block_status,$filename) = $self->read_block(@_);   
     91    return $block_status if ((!defined $block_status) || ($block_status==0));
    9592
    96     # Report that we're processing the file
     93     # Report that we're processing the file
    9794    print STDERR "<Processing n='$file' p='OggVorbisPlug'>\n" if ($gli);
    9895    print $outhandle "OggVorbisPlug: processing $file\n"
  • trunk/gsdl/perllib/plugins/OpenDocumentPlug.pm

    r10997 r11090  
    148148    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    149149
    150     my $filename = $file;
    151     $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
    152 
    153     if ($self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/) {
    154     $self->{'num_blocked'} ++;
    155     return 0;
    156     }
    157     if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
    158     return undef;
    159     }
     150    # check process and block exps, smart block, associate_ext etc
     151    my ($block_status,$filename) = $self->read_block(@_);   
     152    return $block_status if ((!defined $block_status) || ($block_status==0));
    160153
    161154    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
  • trunk/gsdl/perllib/plugins/PagedImgPlug.pm

    r10613 r11090  
    539539    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    540540    my $outhandle   = $self->{'outhandle'};
    541     my $smart_block = $self->{'smart_block'};
    542    
    543     my $filename = &util::filename_cat($base_dir, $file);
    544    
    545     if ($self->associate_with($file,$filename,$metadata)) {
    546     # a form of smart block
    547     $self->{'num_blocked'} ++;
    548     return 0; # blocked
    549     }
    550    
    551     if ($smart_block) {
    552     if (defined $self->{'file_blocks'}->{$filename} && $self->{'file_blocks'}->{$filename} == 1){
    553         $self->{'num_blocked'} ++;
    554         return 0; # blocked
    555     }
    556     } elsif ($self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/) {
    557     $self->{'num_blocked'} ++;
    558     return 0; # blocked
    559     }
    560    
    561     if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
    562     return undef;
    563     }
     541   
     542    #check process and block exps, smart block, etc
     543    my ($block_status,$filename) = $self->read_block(@_);   
     544    return $block_status if ((!defined $block_status) || ($block_status==0));
    564545   
    565546    print $outhandle "PagedImgPlug processing \"$filename\"\n"
     
    731712    # assign pagenum as  what??
    732713    my $pagenum = $_{'pagenum'}; #TODO!!
    733     $doc_obj->set_utf8_metadata_element($self->{'current_section'}, 'PageNum', $pagenum);
     714    if (defined $pagenum) {
     715        $doc_obj->set_utf8_metadata_element($self->{'current_section'}, 'PageNum', $pagenum);
     716    }
    734717    my ($imgfile) = $_{'imgfile'};
    735718    if (defined $imgfile) {
     
    832815    $doc_obj->set_OIDtype ($processor->{'OIDtype'});
    833816    my $topsection = $doc_obj->get_top_section();
     817    $doc_obj->add_utf8_metadata($topsection, "Plugin", "$self->{'plugin_type'}");
     818    $doc_obj->add_metadata($topsection, "FileFormat", "PagedImg");
    834819
    835820    if ($self->{'documenttype'} eq 'paged') {
     
    849834    next unless $line =~ /\w/;
    850835    chomp $line;
     836    next if $line =~ /^#/; # ignore comment lines
    851837    if ($line =~ /^<([^>]*)>\s*(.*?)\s*$/) {
    852838        $doc_obj->set_utf8_metadata_element ($topsection, $1, $2);
  • trunk/gsdl/perllib/plugins/RealMediaPlug.pm

    r10395 r11090  
    8686    my $outhandle = $self->{'outhandle'};
    8787
    88     # filename is the full pathname of the file
    89     my $filename = &util::filename_cat($base_dir, $file);
    90     return 0 if $self->{block_exp} ne "" && $filename =~ /$self->{'block_exp'}/;
    91     if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
    92     return undef;
    93     }
     88    #check process and block exps, smart block, etc
     89    my ($block_status,$filename) = $self->read_block(@_);   
     90    return $block_status if ((!defined $block_status) || ($block_status==0));
    9491
    9592    # Report that we're processing the file
  • trunk/gsdl/perllib/plugins/RogPlug.pm

    r10254 r11090  
    6464
    6565    return bless $self, $class;
    66 }
    67 
    68 sub is_recursive {
    69     my $self = shift (@_);
    70 
    71     return 0; # this is not a recursive plugin
    7266}
    7367
  • trunk/gsdl/perllib/plugins/SplitPlug.pm

    r10254 r11090  
    9393    $self->{'process_exp'} = $self->get_default_process_exp ();
    9494    if ($self->{'process_exp'} eq "") {
    95         warn ref($self) . " Warning: Non-recursive plugin has no process_exp\n";
     95        warn ref($self) . " Warning: plugin has no process_exp\n";
    9696    }
    9797    }
     
    185185    my $verbosity = $self->{'verbosity'};
    186186
    187     # Figure out the exact filename of this file (and maybe block it)
    188     my $filename = &util::filename_cat($base_dir, $file);
    189     my $block_exp = $self->{'block_exp'};
    190     return 0 if $self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/;
    191     if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
    192     return undef;
    193     }
     187    #check process and block exps, smart block, etc
     188    my ($block_status,$filename) = $self->read_block(@_);   
     189    return $block_status if ((!defined $block_status) || ($block_status==0));
     190
    194191    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
    195192
     
    228225    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "SourceSegment", "$segment");
    229226    if ($self->{'cover_image'}) {
    230       $self->associate_cover_image($doc_obj, $filename);
     227        $self->associate_cover_image($doc_obj, $filename);
    231228    }
    232229    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
  • trunk/gsdl/perllib/plugins/UnknownPlug.pm

    r10985 r11090  
    179179
    180180    # Make sure we're processing the correct file
    181     my $filename = &util::filename_cat($base_dir, $file);
    182     return 0 if $self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/;
    183     if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
    184     return undef;
    185     }
     181    my ($block_status,$filename) = $self->read_block(@_);   
     182    return $block_status if ((!defined $block_status) || ($block_status==0));
     183
    186184    print STDERR "<Processing n='$file' p='UnknownPlug'>\n" if ($gli);
    187185    print $outhandle "UnknownPlug processing \"$filename\"\n"
  • trunk/gsdl/perllib/plugins/W3ImgPlug.pm

    r10347 r11090  
    250250# image extraction done through read()
    251251sub process {
    252     my ($self, $textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
     252    my ($self, $textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
    253253    $self->{'imglist'} = ();
    254254    if ( $self->{'index_pages'} ) {
    255     my $ok = $self->SUPER::process($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj);
     255    my $ok = $self->SUPER::process($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli);
    256256    if ( ! $ok ) { return $ok }
    257257    $self->{'htdoc_obj'} = $doc_obj;
  • trunk/gsdl/perllib/plugins/XMLPlug.pm

    r10254 r11090  
    6969    $self = (defined $hashArgOptLists)? new BasPlug($pluginlist,$inputargs,$hashArgOptLists): new BasPlug($pluginlist,$inputargs);
    7070
     71    if ($self->{'info_only'}) {
     72    # don't worry about any options etc
     73    return bless $self, $class;
     74    }
     75
    7176    my $parser = new XML::Parser('Style' => 'Stream',
    7277                 'Handlers' => {'Char' => \&Char,
     
    153158    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    154159
    155     my $filename = $file;
    156     $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
    157 
    158     if ($self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/) {
    159     $self->{'num_blocked'} ++;
    160     return 0;
    161     }
    162     if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
    163     return undef;
    164     }
     160    # Make sure we're processing the correct file, do blocking etc
     161    my ($block_status,$filename) = $self->read_block(@_);   
     162    return $block_status if ((!defined $block_status) || ($block_status==0));
    165163
    166164    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
  • trunk/gsdl/perllib/plugins/ZIPPlug.pm

    r10254 r11090  
    8282    my $self = (defined $hashArgOptLists)? new BasPlug($pluginlist,$inputargs,$hashArgOptLists): new BasPlug($pluginlist,$inputargs);
    8383
    84     # BasPlug is explicitly set not to set process_exp if recursive plugin
    85     # Not sure of this reasoning.  Want it to be set in ZIPPlug, so explicitly
    86     # pass it in as default value
    87    
    88     ## do we need this???? --kjdon
    89     if (!$self->{'process_exp'}) {
    90     $self->{'process_exp'} = get_default_process_exp();
    91     }
    92 
    9384    return bless $self, $class;
    9485}
     
    113104    my $outhandle = $self->{'outhandle'};
    114105
    115     if ($file =~ /$self->{'process_exp'}/) {
    116 
    117     my $filename = $file;
    118     $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
    119     if (!-e $filename) {
    120         print $outhandle "ZIPPLug: WARNING: $filename does not exist\n";
    121         return undef;
    122     }
     106    # check process_exp, block_exp, associate_ext etc
     107    my ($block_status,$filename) = $self->read_block(@_);   
     108    return $block_status if ((!defined $block_status) || ($block_status==0));
    123109   
    124     my ($file_only) = $file =~ /([^\\\/]*)$/;
    125     my $tmpdir = &util::get_tmp_filename ();
    126     &util::mk_all_dir ($tmpdir);
    127 
    128     print $outhandle "ZIPPlug: extracting $file_only to $tmpdir\n"
    129         if $self->{'verbosity'} > 1;
    130 
    131     # save current working directory
    132     my $cwd = cwd();
    133     chdir ($tmpdir) || die "Unable to change to $tmpdir";
    134     &util::cp ($filename, $tmpdir);
    135    
    136     if ($file =~ /\.bz$/i) {
    137         $self->bunzip ($file_only);
    138     } elsif ($file =~ /\.bz2$/i) {
    139         $self->bunzip2 ($file_only);
    140     } elsif ($file =~ /\.(zip|jar)$/i) {
    141         $self->unzip ($file_only);
    142     } elsif ($file =~ /\.tar$/i) {
    143         $self->untar ($file_only);
    144     } else {
    145         $self->gunzip ($file_only);
    146     }
    147 
    148     chdir ($cwd) || die "Unable to change back to $cwd";
    149 
    150     my $numdocs = &plugin::read ($pluginfo, "", $tmpdir, $metadata, $processor, $maxdocs, $total_count, $gli);
    151     &util::rm_r ($tmpdir);
    152 
    153     $self->{'num_archives'} ++;
    154 
    155     return $numdocs;
    156 
     110    my ($file_only) = $file =~ /([^\\\/]*)$/;
     111    my $tmpdir = &util::get_tmp_filename ();
     112    &util::mk_all_dir ($tmpdir);
     113   
     114    print $outhandle "ZIPPlug: extracting $file_only to $tmpdir\n"
     115    if $self->{'verbosity'} > 1;
     116   
     117    # save current working directory
     118    my $cwd = cwd();
     119    chdir ($tmpdir) || die "Unable to change to $tmpdir";
     120    &util::cp ($filename, $tmpdir);
     121   
     122    if ($file =~ /\.bz$/i) {
     123    $self->bunzip ($file_only);
     124    } elsif ($file =~ /\.bz2$/i) {
     125    $self->bunzip2 ($file_only);
     126    } elsif ($file =~ /\.(zip|jar)$/i) {
     127    $self->unzip ($file_only);
     128    } elsif ($file =~ /\.tar$/i) {
     129    $self->untar ($file_only);
    157130    } else {
    158     return undef;
     131    $self->gunzip ($file_only);
    159132    }
     133   
     134    chdir ($cwd) || die "Unable to change back to $cwd";
     135   
     136    my $numdocs = &plugin::read ($pluginfo, "", $tmpdir, $metadata, $processor, $maxdocs, $total_count, $gli);
     137    &util::rm_r ($tmpdir);
     138   
     139    $self->{'num_archives'} ++;
     140   
     141    return $numdocs;
     142   
    160143}
    161144
Note: See TracChangeset for help on using the changeset viewer.