Ignore:
Timestamp:
2000-07-13T10:21:53+12:00 (24 years ago)
Author:
sjboddie
Message:

merged changes to trunk into New_Config_Format branch

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/New_Config_Format-branch/gsdl/perllib/plugins/GMLPlug.pm

    r1010 r1279  
    3737}
    3838
     39use strict;
     40
    3941sub new {
    4042    my ($class) = @_;
    41     $self = new BasPlug ();
     43    my $self = new BasPlug ("GMLPlug", @_);
    4244
    4345    return bless $self, $class;
    4446}
    4547
    46 
    47 sub is_recursive {
     48sub get_default_process_exp {
    4849    my $self = shift (@_);
    4950
    50     return 0; # this is not a recursive plugin
    51 }
    52 
    53 sub _unescape_text {
    54     my ($text) = @_;
    55 
    56     # special characters in the gml encoding
    57     $text =~ s/&lt;/</g;
    58     $text =~ s/&gt;/>/g;
    59     $text =~ s/&quot;/\"/g;
    60     $text =~ s/&amp;/&/g; # this has to be last...
    61 
    62     return $text;
     51    return q^(?i)\.gml?$^;
    6352}
    6453
     
    6958    my $self = shift (@_);
    7059    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
    71     my $fullname = &util::filename_cat ($base_dir, $file);
    7260
    73     # see if this is a gml book
    74     return undef unless (-f $fullname && $fullname =~ /\.gml(\.gz)?$/io);
    75 
    76     my ($parent_dir, $gz) = $fullname =~ /^(.*?)[\/\\][^\/\\]+.gml(\.gz)?$/io;
    77 
    78     if (defined $gz && $gz =~ /\.gz/io) {
    79     $gz = 1;
    80     } else {
    81     $gz = 0;
     61    my $filename = &util::filename_cat($base_dir, $file);
     62    return 0 if $self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/;
     63    if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
     64    return undef;
    8265    }
     66    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
    8367
    8468    print STDERR "GMLPlug: processing $file\n";
    8569
    86     # read in the document
    87     if ($gz) {
    88     if (!open (INFILE, "zcat $fullname |")) {
    89         print STDERR "GMLPlug::read - zcat couldn't read $fullname\n";
    90         return undef;
    91     }
    92     } else {
    93     if (!open (INFILE, $fullname)) {
    94         print STDERR "GMLPlug::read - couldn't read $fullname\n";
    95         return undef;
    96     }
     70    my $parent_dir = $file;
     71    $parent_dir =~ s/[^\\\/]*$//;
     72    $parent_dir = &util::filename_cat ($base_dir, $parent_dir);
     73
     74    if (!open (INFILE, $filename)) {
     75    print STDERR "GMLPlug::read - couldn't read $filename\n";
     76    return 0;
    9777    }
    9878
     
    10686
    10787    my $no_docs = 0;
    108 #    my $src_filename = ""; #### don't appear to use this anymore - not sure if that's right
    10988
    11089    while (1) {
     
    128107
    129108        } else {
    130             print STDERR "GMLPlug::read - error in file $fullname\n";
     109            print STDERR "GMLPlug::read - error in file $filename\n";
    131110            print STDERR "text: \"$gml\"\n";
    132111            last;
     
    166145        last if $section eq ""; # back to top level again (more than one document in gml file)
    167146        $section = $doc_obj->get_parent_section ($section);
    168     } #while (1) section level
     147    } # while (1) section level
    169148
    170149    # add the associated files
    171     $assoc_files = $doc_obj->get_metadata($doc_obj->get_top_section(), "gsdlassocfile");
     150    my $assoc_files = $doc_obj->get_metadata($doc_obj->get_top_section(), "gsdlassocfile");
    172151    my ($assoc_file_info, $afile);
    173152    foreach $assoc_file_info (@$assoc_files) {
     
    186165    $self->extra_metadata ($doc_obj, $doc_obj->get_top_section(), $metadata);
    187166   
    188     # assume the document has an OID
     167    # do any automatic metadata extraction
     168    $self->auto_extract_metadata ($doc_obj);
     169
     170    # assume the document has an OID already
    189171   
    190172    # process the document
     
    194176    last if ($maxdocs > -1 && $no_docs >= $maxdocs);
    195177    last unless defined $gml && $gml =~ /\w/;
    196     } #while(1) document level
     178    } # while(1) document level
    197179
    198180    return $no_docs; # no of docs processed
    199181}
    200182
     183sub _unescape_text {
     184    my ($text) = @_;
     185
     186    # special characters in the gml encoding
     187    $text =~ s/&lt;/</g;
     188    $text =~ s/&gt;/>/g;
     189    $text =~ s/&quot;/\"/g;
     190    $text =~ s/&amp;/&/g; # this has to be last...
     191
     192    return $text;
     193}
    201194
    2021951;
Note: See TracChangeset for help on using the changeset viewer.