Ignore:
Timestamp:
2000-07-13T10:21:53+12:00 (24 years ago)
Author:
sjboddie
Message:

merged changes to trunk into New_Config_Format branch

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/New_Config_Format-branch/gsdl/perllib/multiread.pm

    r627 r1279  
    2626# encodings currently supported are
    2727#
    28 # utf8     - either utf8 or unicode (automatically detected)
    29 # unicode  - just unicode (doesn't currently do endian detection)
    30 # gb       - GB
    31 # extended - extended ascii
    32 
     28# utf8         - either utf8 or unicode (automatically detected)
     29# unicode      - just unicode (doesn't currently do endian detection)
     30# gb           - GB
     31# iso_8859_1   - extended ascii (iso-8859-1)
     32# iso_8859_6   - 8 bit arabic (iso-8859-6)
     33# windows_1256 - Windows codepage 1256 (Arabic)
    3334
    3435package multiread;
     
    169170    }
    170171
    171     if ($self->{'encoding'} eq "extended") {
    172     # extended ascii
     172    if ($self->{'encoding'} eq "iso_8859_1") {
     173    # Latin 1 extended ascii (ISO-8859-1)
    173174    return undef if (eof ($handle));
    174175    return &unicode::ascii2utf8 (getc ($handle));
     176    }
     177
     178    if ($self->{'encoding'} eq "iso_8859_6") {
     179    # 8 bit Arabic (IOS-8859-6)
     180    return undef if (eof ($handle));
     181    return &unicode::unicode2utf8(&unicode::arabic2unicode (getc ($handle)));
     182    }
     183
     184    if ($self->{'encoding'} eq "windows_1256") {
     185    # Windows 1256 (Arabic)
     186    return undef if (eof ($handle));
     187    return &unicode::unicode2utf8(&unicode::windows2unicode ("1256", getc ($handle)));
    175188    }
    176189
     
    236249    }
    237250   
    238     if ($self->{'encoding'} eq "extended") {
    239     # extended ascii
     251    if ($self->{'encoding'} eq "iso_8859_1") {
     252    # extended ascii (ISO-8859-1)
    240253    my $line = "";
    241254    if (defined ($line = <$handle>)) {
     
    244257    return undef;
    245258    }
     259   
     260    if ($self->{'encoding'} eq "iso_8859_6") {
     261    # 8 bit arabic (ISO-8859-6)
     262    my $line = "";
     263    if (defined ($line = <$handle>)) {
     264        return &unicode::unicode2utf8(&unicode::arabic2unicode ($line));
     265    }
     266    return undef;
     267    }
     268   
     269    if ($self->{'encoding'} eq "windows_1256") {
     270    # Windows 1256 (Arabic)
     271    my $line = "";
     272    if (defined ($line = <$handle>)) {
     273        return &unicode::unicode2utf8(&unicode::windows2unicode ("1256", $line));
     274    }
     275    return undef;
     276    }
    246277
    247278    # unknown encoding
     
    250281
    251282
     283# will convert entire contents of file to utf8 and append result to $outputref
     284# this may be a slightly faster way to get the contents of a file than by
     285# recursively calling read_line()
     286sub read_file {
     287    my $self = shift (@_);
     288    my ($outputref) = @_;
     289
     290    # make sure we have a file handle
     291    return if ($self->{'handle'} eq "");
     292
     293    my $handle = $self->{'handle'};
     294
     295    if ($self->{'first'} && $self->{'encoding'} eq "utf8") {
     296    # special case for the first line of utf8 text to detect whether
     297    # the file is in utf8 or unicode
     298    $$text .= $self->read_line ();
     299    }
     300
     301    if ($self->{'encoding'} eq "utf8") {
     302    undef $/;
     303    $$outputref .=  <$handle>;
     304    $/ = "\n";
     305    return;
     306    }
     307
     308    if ($self->{'encoding'} eq "unicode") {
     309    my $line = "";
     310    while (defined ($line = $self->read_line())) {
     311        $$outputref .= $line;
     312    }
     313    return;
     314    }
     315
     316    if ($self->{'encoding'} eq "gb") {
     317    undef $/;
     318    my $text = <$handle>;
     319    $/ = "\n";
     320    $$outputref .= &unicode::unicode2utf8 (&gb::gb2unicode ($text));
     321    return;
     322    }
     323   
     324    if ($self->{'encoding'} eq "iso_8859_1") {
     325    undef $/;
     326    my $text = <$handle>;
     327    $/ = "\n";
     328    $$outputref .= &unicode::ascii2utf8 ($text);
     329    return;
     330    }
     331   
     332    if ($self->{'encoding'} eq "iso_8859_6") {
     333    my $text = <$handle>;
     334    undef $/;
     335    $/ = "\n";
     336    $$outputref .= &unicode::unicode2utf8(&unicode::arabic2unicode ($text));
     337    return;
     338    }
     339
     340    if ($self->{'encoding'} eq "windows_1256") {
     341    undef $/;
     342    my $text = <$handle>;
     343    $/ = "\n";
     344    $$outputref .= &unicode::unicode2utf8(&unicode::windows2unicode ("1256", $text));
     345    return;
     346    }
     347}
     348
     349
    2523501;
Note: See TracChangeset for help on using the changeset viewer.