Changeset 2845
- Timestamp:
- 2001-11-23T12:38:00+13:00 (23 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/SplitPlug.pm
r2735 r2845 117 117 my $plugin_name = ref ($self); 118 118 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up 119 120 my ($language, $encoding); 121 if ($self->{'input_encoding'} eq "auto") { 122 # use textcat to automatically work out the input encoding and language 123 ($language, $encoding) = $self->get_language_encoding ($filename); 124 125 } elsif ($self->{'extract_language'}) { 126 # use textcat to get language metadata 127 ($language, $extracted_encoding) = $self->get_language_encoding ($filename); 128 $encoding = $self->{'input_encoding'}; 129 130 if ($extracted_encoding ne $encoding && $self->{'verbosity'}) { 131 print $outhandle "$plugin_name: WARNING: $file was read using $encoding encoding but "; 132 print $outhandle "appears to be encoded as $extracted_encoding.\n"; 133 } 134 135 } else { 136 $language = $self->{'default_language'}; 137 $encoding = $self->{'input_encoding'}; 138 } 119 120 # Do encoding stuff 121 my ($language, $encoding) = $self->textcat_get_language_encoding ($filename); 139 122 140 123 # Read in file ($text will be in utf8) … … 146 129 print $outhandle "$plugin_name: ERROR: $file contains no text\n" 147 130 if $self->{'verbosity'}; 131 132 my $failhandle = $self->{'failhandle'}; 133 print $failhandle "$file: " . ref($self) . ": file contains no text\n"; 134 $self->{'num_not_processed'} ++; 135 148 136 return 0; 149 137 } … … 162 150 foreach $segtext (@segments) { 163 151 $segment++; 164 152 165 153 # create a new document 166 154 my $doc_obj = new doc ($filename, "indexed_doc"); … … 168 156 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Language", $language); 169 157 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Encoding", $encoding); 158 my ($filemeta) = $file =~ /([^\\\/]+)$/; 159 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Source", &ghtml::dmsafe($filemeta)); 160 if ($self->{'cover_image'}) { 161 $self->associate_cover_image($doc_obj, $filename); 162 } 170 163 171 164 # Calculate a "base" document ID. … … 195 188 $self->set_OID($doc_obj, $id, $segment); 196 189 197 198 190 # process the document 199 191 $processor->process($doc_obj); 192 193 $self->{'num_processed'} ++; 200 194 } 201 195
Note:
See TracChangeset
for help on using the changeset viewer.