- Timestamp:
- 1999-12-13T16:57:18+13:00 (24 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/GMLPlug.pm
r809 r840 25 25 26 26 # plugin which processes a GML format document 27 # assumes that gml tags are all in lower-case. 27 28 28 29 package GMLPlug; … … 33 34 use doc; 34 35 35 36 36 sub BEGIN { 37 37 @ISA = ('BasPlug'); … … 44 44 return bless $self, $class; 45 45 } 46 46 47 47 48 sub is_recursive { … … 68 69 sub read { 69 70 my $self = shift (@_); 70 my ($pluginfo, $base_dir, $file, $metadata, $processor ) = @_;71 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_; 71 72 my $fullname = &util::filename_cat ($base_dir, $file); 72 73 73 74 # see if this is a gml book 74 return undef unless (-f $fullname && $fullname =~ /\.gml(\.gz)?$/i );75 76 my ($parent_dir, $gz) = $fullname =~ /^(.*?)[\/\\][^\/\\]+.gml(\.gz)?$/i ;77 78 if (defined $gz && $gz =~ /\.gz/i ) {75 return undef unless (-f $fullname && $fullname =~ /\.gml(\.gz)?$/io); 76 77 my ($parent_dir, $gz) = $fullname =~ /^(.*?)[\/\\][^\/\\]+.gml(\.gz)?$/io; 78 79 if (defined $gz && $gz =~ /\.gz/io) { 79 80 $gz = 1; 80 81 } else { 81 82 $gz = 0; 82 83 } 83 84 # create a new document85 my $doc_obj = new doc ();86 my $section = $doc_obj->get_top_section();87 84 88 85 print STDERR "GMLPlug: processing $file\n"; … … 104 101 } 105 102 103 undef $/; 106 104 while (defined ($line = <INFILE>)) { 107 105 $gml .= $line; 108 106 } 107 $/ = "\n"; 108 109 109 close (INFILE); 110 110 111 # process the document 112 my $firstsection = 1; 113 while ($gml =~ /\S/) { 114 if ($gml =~ s/^\s*<gsdlsection([^>]*)>(.*?)(<\/?gsdlsection)/$3/is) { 115 my $tags = $1; 116 $tags = "" unless defined $tags; 117 my $text = &_unescape_text($2); 118 119 # create the section (unless this is the first section) 120 if (!$firstsection) { 121 $tags =~ s/gsdlnum\s*=\s*\"?(\d+)\"?//i; 122 if (defined $1) { 123 $section .= ".$1"; 124 $doc_obj->create_named_section($section); 125 } else { 126 $section = $doc_obj->insert_section($doc_obj->get_end_child($section)); 127 } 111 my @gml_sections = split("</gsdlsection>",$gml); 112 $gml = shift(@gml_sections); 113 114 my $no_docs = 0; 115 my $src_filename = ""; 116 117 while (1) 118 { 119 # create a new document 120 my $doc_obj = new doc (); 121 my $section = $doc_obj->get_top_section(); 122 123 # process the document 124 my $firstsection = 1; 125 while (1) 126 { 127 my $tags = ""; 128 my $text = ""; 129 130 my @indenting_sections = split("<gsdlsection",$gml); 131 shift(@indenting_sections); # first entry is trivially empty 132 #### print STDERR "**** no indenting sections = ", scalar(@indenting_sections), "\n"; 133 134 foreach $gml (@indenting_sections) 135 { 136 #### print STDERR "\n\n\n\n!!!!!!!!!!! gml = $gml\n\n\n"; 137 138 if ($gml =~ m/^\s*([^>]*)>(.*)$/so) 139 { 140 $tags = $1 if (defined $1); 141 $text = &GMLPlug::_unescape_text($2); 142 } 143 else 144 { 145 print STDERR "GMLPlug::read - error in file $fullname\n"; 146 print STDERR "text: \"$gml\"\n"; 147 last; 148 } 149 150 # create the section (unless this is the first section) 151 if ($firstsection) 152 { 153 $firstsection = 0; 154 155 print STDERR " 0 of $src_filename\n" if ($no_docs==1); 156 $tags =~ m/gsdlsourcefilename\s*=\s*(\"([^\"]*)\")|(\w+)/o; 157 $src_filename = $2 || $3; 158 print STDERR " $no_docs of $src_filename\n" if ($no_docs>=1); 159 } 160 else 161 { 162 #### print STDERR "*** tags = $tags\n"; 163 164 $tags =~ s/gsdlnum\s*=\s*\"?(\d+)\"?//o; 165 if (defined $1) 166 { 167 $section .= ".$1"; 168 $doc_obj->create_named_section($section); 169 } 170 else 171 { 172 $section = $doc_obj->insert_section($doc_obj->get_end_child($section)); 173 } 174 } 175 176 177 # add the tags 178 while ((defined $tags) 179 && ($tags =~ s/^\s*(\w+)\s*=\s*\"([^\"]*)\"//o)) 180 { 181 $doc_obj->add_utf8_metadata($section, $1 , 182 &GMLPlug::_unescape_text($2)) 183 if (defined $1 and defined $2); 184 } 185 186 # add the text 187 $doc_obj->add_utf8_text($section, $text) 188 if ((defined $text) && ($text ne "")); 128 189 } 129 $firstsection = 0; 130 131 # add the tags 132 while ((defined $tags) && ($tags =~ s/^\s*(\w+)\s*=\s*\"([^\"]*)\"//)) { 133 $doc_obj->add_utf8_metadata($section, $1 , &_unescape_text($2)) 134 if (defined $1 and defined $2); 135 } 136 137 # add the text 138 $doc_obj->add_utf8_text($section, $text) 139 if ((defined $text) && ($text ne "")); 140 141 } elsif ($gml =~ s/^\s*<\/gsdlsection>//) { 190 191 $gml = shift(@gml_sections); # get next bit of data 192 last if (!defined $gml); 193 ### print "####### before section = $section\n"; 142 194 $section = $doc_obj->get_parent_section ($section); 143 144 } else { 145 print STDERR "GMLPlug::read - error in file $fullname\n"; 146 print STDERR "text: \"$gml\"\n"; 147 last; 148 } 149 } 150 195 ### print "####### after section = $section\n"; 196 #### last if ($section eq ""); 197 } 198 199 <<<<<<< GMLPlug.pm 200 # add the associated files 201 $assoc_files 202 = $doc_obj->get_metadata($doc_obj->get_top_section(), 203 "gsdlassocfile"); 204 my ($assoc_file_info); 205 foreach $assoc_file_info (@$assoc_files) { 206 my ($assoc_file, $mime_type) = split (":", $assoc_file_info); 207 $doc_obj->associate_file(&util::filename_cat($parent_dir, $assoc_file), 208 $assoc_file, $mime_type); 209 ======= 151 210 # add the associated files 152 211 $assoc_files = $doc_obj->get_metadata($doc_obj->get_top_section(), "gsdlassocfile"); … … 157 216 $doc_obj->associate_file(&util::filename_cat($parent_dir, $assoc_file), 158 217 &util::filename_cat($dir, $assoc_file), $mime_type); 218 >>>>>>> 1.10 159 219 160 } 161 $doc_obj->delete_metadata($doc_obj->get_top_section(), "gsdlassocfile"); 162 163 # add metadata 164 foreach $field (keys(%$metadata)) { 165 # $metadata->{$field} may be an array reference 166 if (ref ($metadata->{$field}) eq "ARRAY") { 167 map { 168 $doc_obj->add_metadata ($doc_obj->get_top_section(), $field, $_); 169 } @{$metadata->{$field}}; 170 } else { 171 $doc_obj->add_metadata ($doc_obj->get_top_section(), $field, $metadata->{$field}); 172 } 173 } 174 175 176 # assume the document has an OID 177 178 # process the document 179 $processor->process($doc_obj, $file); 180 181 return 1; # processed the file 220 } 221 $doc_obj->delete_metadata($doc_obj->get_top_section(), "gsdlassocfile"); 222 223 # add metadata 224 foreach $field (keys(%$metadata)) { 225 # $metadata->{$field} may be an array reference 226 if (ref ($metadata->{$field}) eq "ARRAY") { 227 map { 228 $doc_obj->add_metadata ($doc_obj->get_top_section(), $field, $_); 229 } @{$metadata->{$field}}; 230 } else { 231 $doc_obj->add_metadata ($doc_obj->get_top_section(), $field, $metadata->{$field}); 232 } 233 } 234 235 236 # assume the document has an OID 237 238 # process the document 239 $processor->process($doc_obj, $file); 240 241 $no_docs++; 242 243 last if (defined $maxdocs && $maxdocs =~ /\d/ && $no_docs >= $maxdocs); 244 245 last if (!defined $gml); 246 } 247 248 return $no_docs; # no of docs processed 182 249 } 183 250
Note:
See TracChangeset
for help on using the changeset viewer.