- Timestamp:
- 2000-07-13T10:21:53+12:00 (24 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/New_Config_Format-branch/gsdl/perllib/multiread.pm
r627 r1279 26 26 # encodings currently supported are 27 27 # 28 # utf8 - either utf8 or unicode (automatically detected) 29 # unicode - just unicode (doesn't currently do endian detection) 30 # gb - GB 31 # extended - extended ascii 32 28 # utf8 - either utf8 or unicode (automatically detected) 29 # unicode - just unicode (doesn't currently do endian detection) 30 # gb - GB 31 # iso_8859_1 - extended ascii (iso-8859-1) 32 # iso_8859_6 - 8 bit arabic (iso-8859-6) 33 # windows_1256 - Windows codepage 1256 (Arabic) 33 34 34 35 package multiread; … … 169 170 } 170 171 171 if ($self->{'encoding'} eq " extended") {172 # extended ascii172 if ($self->{'encoding'} eq "iso_8859_1") { 173 # Latin 1 extended ascii (ISO-8859-1) 173 174 return undef if (eof ($handle)); 174 175 return &unicode::ascii2utf8 (getc ($handle)); 176 } 177 178 if ($self->{'encoding'} eq "iso_8859_6") { 179 # 8 bit Arabic (IOS-8859-6) 180 return undef if (eof ($handle)); 181 return &unicode::unicode2utf8(&unicode::arabic2unicode (getc ($handle))); 182 } 183 184 if ($self->{'encoding'} eq "windows_1256") { 185 # Windows 1256 (Arabic) 186 return undef if (eof ($handle)); 187 return &unicode::unicode2utf8(&unicode::windows2unicode ("1256", getc ($handle))); 175 188 } 176 189 … … 236 249 } 237 250 238 if ($self->{'encoding'} eq " extended") {239 # extended ascii 251 if ($self->{'encoding'} eq "iso_8859_1") { 252 # extended ascii (ISO-8859-1) 240 253 my $line = ""; 241 254 if (defined ($line = <$handle>)) { … … 244 257 return undef; 245 258 } 259 260 if ($self->{'encoding'} eq "iso_8859_6") { 261 # 8 bit arabic (ISO-8859-6) 262 my $line = ""; 263 if (defined ($line = <$handle>)) { 264 return &unicode::unicode2utf8(&unicode::arabic2unicode ($line)); 265 } 266 return undef; 267 } 268 269 if ($self->{'encoding'} eq "windows_1256") { 270 # Windows 1256 (Arabic) 271 my $line = ""; 272 if (defined ($line = <$handle>)) { 273 return &unicode::unicode2utf8(&unicode::windows2unicode ("1256", $line)); 274 } 275 return undef; 276 } 246 277 247 278 # unknown encoding … … 250 281 251 282 283 # will convert entire contents of file to utf8 and append result to $outputref 284 # this may be a slightly faster way to get the contents of a file than by 285 # recursively calling read_line() 286 sub read_file { 287 my $self = shift (@_); 288 my ($outputref) = @_; 289 290 # make sure we have a file handle 291 return if ($self->{'handle'} eq ""); 292 293 my $handle = $self->{'handle'}; 294 295 if ($self->{'first'} && $self->{'encoding'} eq "utf8") { 296 # special case for the first line of utf8 text to detect whether 297 # the file is in utf8 or unicode 298 $$text .= $self->read_line (); 299 } 300 301 if ($self->{'encoding'} eq "utf8") { 302 undef $/; 303 $$outputref .= <$handle>; 304 $/ = "\n"; 305 return; 306 } 307 308 if ($self->{'encoding'} eq "unicode") { 309 my $line = ""; 310 while (defined ($line = $self->read_line())) { 311 $$outputref .= $line; 312 } 313 return; 314 } 315 316 if ($self->{'encoding'} eq "gb") { 317 undef $/; 318 my $text = <$handle>; 319 $/ = "\n"; 320 $$outputref .= &unicode::unicode2utf8 (&gb::gb2unicode ($text)); 321 return; 322 } 323 324 if ($self->{'encoding'} eq "iso_8859_1") { 325 undef $/; 326 my $text = <$handle>; 327 $/ = "\n"; 328 $$outputref .= &unicode::ascii2utf8 ($text); 329 return; 330 } 331 332 if ($self->{'encoding'} eq "iso_8859_6") { 333 my $text = <$handle>; 334 undef $/; 335 $/ = "\n"; 336 $$outputref .= &unicode::unicode2utf8(&unicode::arabic2unicode ($text)); 337 return; 338 } 339 340 if ($self->{'encoding'} eq "windows_1256") { 341 undef $/; 342 my $text = <$handle>; 343 $/ = "\n"; 344 $$outputref .= &unicode::unicode2utf8(&unicode::windows2unicode ("1256", $text)); 345 return; 346 } 347 } 348 349 252 350 1;
Note:
See TracChangeset
for help on using the changeset viewer.