Changeset 862
- Timestamp:
- 2000-01-21T16:59:04+13:00 (24 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/GMLPlug.pm
r849 r862 86 86 87 87 # read in the document 88 my $gml = "";89 my $line = "";90 91 88 if ($gz) { 92 89 if (!open (INFILE, "zcat $fullname |")) { … … 102 99 103 100 undef $/; 104 while (defined ($line = <INFILE>)) { 105 $gml .= $line; 106 } 101 my $gml = <INFILE>; 107 102 $/ = "\n"; 108 109 103 close (INFILE); 110 104 111 105 my @gml_sections = split("</gsdlsection>",$gml); 112 106 $gml = shift(@gml_sections); 113 107 114 108 my $no_docs = 0; 115 my $src_filename = ""; 116 117 while (1) 118 { 109 # my $src_filename = ""; #### don't appear to use this anymore - not sure if that's right 110 111 while (1) { 119 112 # create a new document 120 113 my $doc_obj = new doc (); … … 123 116 # process the document 124 117 my $firstsection = 1; 125 while (1) 126 { 127 my $tags = ""; 128 my $text = ""; 129 130 my @indenting_sections = split("<gsdlsection",$gml); 118 while (1) { 119 my ($tags, $text) = ("", ""); 120 121 my @indenting_sections = split("<gsdlsection", $gml); 131 122 shift(@indenting_sections); # first entry is trivially empty 132 #### print STDERR "**** no indenting sections = ", scalar(@indenting_sections), "\n"; 133 134 foreach $gml (@indenting_sections) 135 { 136 #### print STDERR "\n\n\n\n!!!!!!!!!!! gml = $gml\n\n\n"; 137 138 if ($gml =~ m/^\s*([^>]*)>(.*)$/so) 139 { 140 $tags = $1 if (defined $1); 123 124 foreach $gml (@indenting_sections) { 125 126 if ($gml =~ /^\s*([^>]*)>(.*)$/so) { 127 $tags = $1 if defined $1; 141 128 $text = &GMLPlug::_unescape_text($2); 142 } 143 else 144 { 129 130 } else { 145 131 print STDERR "GMLPlug::read - error in file $fullname\n"; 146 132 print STDERR "text: \"$gml\"\n"; … … 149 135 150 136 # create the section (unless this is the first section) 151 if ($firstsection) 152 { 137 if ($firstsection) { 153 138 $firstsection = 0; 154 155 print STDERR " 0 of $src_filename\n" if ($no_docs==1); 156 $tags =~ m/gsdlsourcefilename\s*=\s*(\"([^\"]*)\")|(\w+)/o; 157 $src_filename = $2 || $3; 158 print STDERR " $no_docs of $src_filename\n" if ($no_docs>=1); 159 } 160 else 161 { 162 #### print STDERR "*** tags = $tags\n"; 163 164 if ($tags =~ s/gsdlnum\s*=\s*\"?(\d+)\"?//o) 165 { 139 # $tags =~ /gsdlsourcefilename\s*=\s*(?:\"([^\"]*)\")/o; 140 # $src_filename = $2 || $3; 141 142 } else { 143 144 $tags =~ s/gsdlnum\s*=\s*\"?(\d+)\"?//o; 145 if (defined $1) { 166 146 $section .= ".$1"; 167 147 $doc_obj->create_named_section($section); 168 } 169 else 170 { 148 } else { 171 149 $section = $doc_obj->insert_section($doc_obj->get_end_child($section)); 172 150 } 173 151 } 174 152 175 176 153 # add the tags 177 while ((defined $tags) 178 && ($tags =~ s/^\s*(\w+)\s*=\s*\"([^\"]*)\"//o)) 179 { 180 $doc_obj->add_utf8_metadata($section, $1, 181 &GMLPlug::_unescape_text($2)) 154 while ((defined $tags) && ($tags =~ s/^\s*(\w+)=\"([^\"]*)\"//o)) { 155 $doc_obj->add_utf8_metadata($section, $1, &GMLPlug::_unescape_text($2)) 182 156 if (defined $1 and defined $2); 183 157 } … … 189 163 190 164 $gml = shift(@gml_sections); # get next bit of data 191 last if (!defined $gml); 192 ### print "####### before section = $section\n"; 165 last unless defined $gml; 193 166 $section = $doc_obj->get_parent_section ($section); 194 ### print "####### after section = $section\n"; 195 #### last if ($section eq ""); 196 } 197 198 # add the associated files 199 $assoc_files = $doc_obj->get_metadata($doc_obj->get_top_section(), "gsdlassocfile"); 200 my ($assoc_file_info); 201 foreach $assoc_file_info (@$assoc_files) { 202 my ($assoc_file, $mime_type, $dir) = split (":", $assoc_file_info); 203 $dir = "" unless defined $dir; 204 $doc_obj->associate_file(&util::filename_cat($parent_dir, $assoc_file), 205 &util::filename_cat($dir, $assoc_file), $mime_type); 167 last if $section eq ""; # back to top level again (more than one document in gml file) 168 } 169 170 # add the associated files 171 $assoc_files = $doc_obj->get_metadata($doc_obj->get_top_section(), "gsdlassocfile"); 172 my ($assoc_file_info); 173 foreach $assoc_file_info (@$assoc_files) { 174 my ($assoc_file, $mime_type, $dir) = split (":", $assoc_file_info); 175 $dir = "" unless defined $dir; 176 $doc_obj->associate_file(&util::filename_cat($parent_dir, $assoc_file), 177 &util::filename_cat($dir, $assoc_file), $mime_type); 206 178 } 207 179 $doc_obj->delete_metadata($doc_obj->get_top_section(), "gsdlassocfile"); 208 180 209 181 # add metadata 210 182 foreach $field (keys(%$metadata)) { … … 218 190 } 219 191 } 220 221 192 222 193 # assume the document has an OID 223 194 224 195 # process the document 225 196 $processor->process($doc_obj, $file); 226 197 227 198 $no_docs++; 228 229 last if (defined $maxdocs && $maxdocs =~ /\d/ && $no_docs >= $maxdocs); 230 231 last if (!defined $gml); 199 last if ($maxdocs > -1 && $no_docs >= $maxdocs); 200 last unless defined $gml; 232 201 } 233 202
Note:
See TracChangeset
for help on using the changeset viewer.