- Timestamp:
- 2000-07-13T10:21:53+12:00 (24 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/New_Config_Format-branch/gsdl/perllib/plugins/GMLPlug.pm
r1010 r1279 37 37 } 38 38 39 use strict; 40 39 41 sub new { 40 42 my ($class) = @_; 41 $self = new BasPlug ();43 my $self = new BasPlug ("GMLPlug", @_); 42 44 43 45 return bless $self, $class; 44 46 } 45 47 46 47 sub is_recursive { 48 sub get_default_process_exp { 48 49 my $self = shift (@_); 49 50 50 return 0; # this is not a recursive plugin 51 } 52 53 sub _unescape_text { 54 my ($text) = @_; 55 56 # special characters in the gml encoding 57 $text =~ s/</</g; 58 $text =~ s/>/>/g; 59 $text =~ s/"/\"/g; 60 $text =~ s/&/&/g; # this has to be last... 61 62 return $text; 51 return q^(?i)\.gml?$^; 63 52 } 64 53 … … 69 58 my $self = shift (@_); 70 59 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_; 71 my $fullname = &util::filename_cat ($base_dir, $file);72 60 73 # see if this is a gml book 74 return undef unless (-f $fullname && $fullname =~ /\.gml(\.gz)?$/io); 75 76 my ($parent_dir, $gz) = $fullname =~ /^(.*?)[\/\\][^\/\\]+.gml(\.gz)?$/io; 77 78 if (defined $gz && $gz =~ /\.gz/io) { 79 $gz = 1; 80 } else { 81 $gz = 0; 61 my $filename = &util::filename_cat($base_dir, $file); 62 return 0 if $self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/; 63 if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) { 64 return undef; 82 65 } 66 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up 83 67 84 68 print STDERR "GMLPlug: processing $file\n"; 85 69 86 # read in the document 87 if ($gz) { 88 if (!open (INFILE, "zcat $fullname |")) { 89 print STDERR "GMLPlug::read - zcat couldn't read $fullname\n"; 90 return undef; 91 } 92 } else { 93 if (!open (INFILE, $fullname)) { 94 print STDERR "GMLPlug::read - couldn't read $fullname\n"; 95 return undef; 96 } 70 my $parent_dir = $file; 71 $parent_dir =~ s/[^\\\/]*$//; 72 $parent_dir = &util::filename_cat ($base_dir, $parent_dir); 73 74 if (!open (INFILE, $filename)) { 75 print STDERR "GMLPlug::read - couldn't read $filename\n"; 76 return 0; 97 77 } 98 78 … … 106 86 107 87 my $no_docs = 0; 108 # my $src_filename = ""; #### don't appear to use this anymore - not sure if that's right109 88 110 89 while (1) { … … 128 107 129 108 } else { 130 print STDERR "GMLPlug::read - error in file $f ullname\n";109 print STDERR "GMLPlug::read - error in file $filename\n"; 131 110 print STDERR "text: \"$gml\"\n"; 132 111 last; … … 166 145 last if $section eq ""; # back to top level again (more than one document in gml file) 167 146 $section = $doc_obj->get_parent_section ($section); 168 } # while (1) section level147 } # while (1) section level 169 148 170 149 # add the associated files 171 $assoc_files = $doc_obj->get_metadata($doc_obj->get_top_section(), "gsdlassocfile");150 my $assoc_files = $doc_obj->get_metadata($doc_obj->get_top_section(), "gsdlassocfile"); 172 151 my ($assoc_file_info, $afile); 173 152 foreach $assoc_file_info (@$assoc_files) { … … 186 165 $self->extra_metadata ($doc_obj, $doc_obj->get_top_section(), $metadata); 187 166 188 # assume the document has an OID 167 # do any automatic metadata extraction 168 $self->auto_extract_metadata ($doc_obj); 169 170 # assume the document has an OID already 189 171 190 172 # process the document … … 194 176 last if ($maxdocs > -1 && $no_docs >= $maxdocs); 195 177 last unless defined $gml && $gml =~ /\w/; 196 } # while(1) document level178 } # while(1) document level 197 179 198 180 return $no_docs; # no of docs processed 199 181 } 200 182 183 sub _unescape_text { 184 my ($text) = @_; 185 186 # special characters in the gml encoding 187 $text =~ s/</</g; 188 $text =~ s/>/>/g; 189 $text =~ s/"/\"/g; 190 $text =~ s/&/&/g; # this has to be last... 191 192 return $text; 193 } 201 194 202 195 1;
Note:
See TracChangeset
for help on using the changeset viewer.