Changeset 1224


Ignore:
Timestamp:
2000-06-21T10:26:01+12:00 (24 years ago)
Author:
sjboddie
Message:

added handling of arabic encoding and ability to read in an entire
file to a string

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/multiread.pm

    r627 r1224  
    2929# unicode  - just unicode (doesn't currently do endian detection)
    3030# gb       - GB
    31 # extended - extended ascii
    32 
     31# latin1   - extended ascii (iso-8859-1)
     32# arabic   - 8 bit arabic (iso-8859-6)
    3333
    3434package multiread;
     
    169169    }
    170170
    171     if ($self->{'encoding'} eq "extended") {
    172     # extended ascii
     171    if ($self->{'encoding'} eq "latin1") {
     172    # Latin 1 extended ascii (ISO-8859-1)
    173173    return undef if (eof ($handle));
    174174    return &unicode::ascii2utf8 (getc ($handle));
     175    }
     176
     177    if ($self->{'encoding'} eq "arabic") {
     178    # 8 bit Arabic (IOS-8859-6)
     179    return undef if (eof ($handle));
     180    return &unicode::unicode2utf8(&unicode::arabic2unicode (getc ($handle)));
    175181    }
    176182
     
    236242    }
    237243   
    238     if ($self->{'encoding'} eq "extended") {
    239     # extended ascii
     244    if ($self->{'encoding'} eq "latin1") {
     245    # extended ascii (ISO-8859-1)
    240246    my $line = "";
    241247    if (defined ($line = <$handle>)) {
     
    244250    return undef;
    245251    }
     252   
     253    if ($self->{'encoding'} eq "arabic") {
     254    # 8 bit arabic (ISO-8859-6)
     255    my $line = "";
     256    if (defined ($line = <$handle>)) {
     257        return &unicode::unicode2utf8(&unicode::arabic2unicode ($line));
     258    }
     259    return undef;
     260    }
    246261
    247262    # unknown encoding
     
    250265
    251266
     267# will convert entire contents of file to utf8 and append result to $outputref
     268sub read_file {
     269    my $self = shift (@_);
     270    my ($outputref) = @_;
     271
     272    # make sure we have a file handle
     273    return if ($self->{'handle'} eq "");
     274
     275    my $handle = $self->{'handle'};
     276
     277    if ($self->{'first'} && $self->{'encoding'} eq "utf8") {
     278    # special case for the first line of utf8 text to detect whether
     279    # the file is in utf8 or unicode
     280    $$text .= $self->read_line ();
     281    }
     282
     283    if ($self->{'encoding'} eq "utf8") {
     284    undef $/;
     285    $$outputref .=  <$handle>;
     286    $/ = "\n";
     287    return;
     288    }
     289
     290    if ($self->{'encoding'} eq "unicode") {
     291    my $line = "";
     292    while (defined ($line = $self->read_line())) {
     293        $$outputref .= $line;
     294    }
     295    return;
     296    }
     297
     298    if ($self->{'encoding'} eq "gb") {
     299    undef $/;
     300    $$outputref .= &unicode::unicode2utf8 (&gb::gb2unicode (<$handle>));
     301    $/ = "\n";
     302    return;
     303    }
     304   
     305    if ($self->{'encoding'} eq "latin1") {
     306    undef $/;
     307    $$outputref .= &unicode::ascii2utf8 (<$handle>);
     308    $/ = "\n";
     309    return;
     310    }
     311   
     312    if ($self->{'encoding'} eq "arabic") {
     313    undef $/;
     314    $$outputref .= &unicode::unicode2utf8(&unicode::arabic2unicode (<$handle>));
     315    $/ = "\n";
     316    return;
     317    }
     318}
     319
     320
    2523211;
Note: See TracChangeset for help on using the changeset viewer.