Changeset 24586


Ignore:
Timestamp:
09/13/11 20:54:35 (9 years ago)
Author:
ak19
Message:

Dr Bainbridge fixed the unicode bug that Diego had found when using an hfile to specify the Hierarchy of a classifier. The hfile was so far being read as a regular cfg file read, but the read operation needed to read in UTF8 mode to preserve the encodings of characters in the file.

Location:
main/trunk/greenstone2/perllib
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/cfgread.pm

    r20390 r24586  
    137137    } else {
    138138    print STDERR "cfgread::read_cfg_file couldn't read the cfg file $filename\n";
     139    }
     140
     141    return $data;
     142}
     143
     144# If the cfg file contains unicode characters, use this method to read from it
     145# Used by HFileHierarchy classifier, since an HFile is read as a cfg file, but
     146# can contain unicode characters.
     147sub read_cfg_file_unicode {
     148    my ($filename, $stringexp, $arrayexp, $hashexp, $arrayarrayexp,
     149    $hashhashexp) = @_;
     150    my ($line);
     151    my $data = {};
     152
     153    if (open (COLCFG, $filename)) {
     154    binmode(COLCFG,":utf8");
     155    while (defined ($line = &read_cfg_line('COLCFG'))) {
     156        if (scalar(@$line) >= 2) {
     157
     158        #map { decode("utf8",$_) } @$line; #use Encode;
     159
     160        my $key = shift (@$line);
     161        if (defined $stringexp && $key =~ /$stringexp/) {
     162            $data->{$key} = shift (@$line);
     163
     164        } elsif (defined $arrayexp && $key =~ /$arrayexp/) {
     165            push (@{$data->{$key}}, @$line);
     166
     167        } elsif (defined $hashexp && $key =~ /$hashexp/) {
     168            my $k = shift @$line;
     169            my $v = shift @$line;
     170            $data->{$key}->{$k} = $v;
     171        } elsif (defined $arrayarrayexp && $key =~ /$arrayarrayexp/) {
     172            if (!defined $data->{$key}) {
     173            $data->{$key} = [];
     174            }
     175            push (@{$data->{$key}}, $line);
     176        }
     177        elsif (defined $hashhashexp && $key =~ /$hashhashexp/) {
     178            my $k = shift @$line;
     179            my $p = shift @$line;
     180            my $v = shift @$line;
     181            if (!defined $v) {
     182            $v = $p;
     183            $p = 'default';
     184            }
     185            $data->{$key}->{$k}->{$p} = $v;
     186        }
     187        }
     188    }
     189    close (COLCFG);
     190
     191    } else {
     192    print STDERR "cfgread::read_cfg_file_unicode couldn't read the cfg file $filename\n";
    139193    }
    140194
  • main/trunk/greenstone2/perllib/classify/HFileHierarchy.pm

    r23116 r24586  
    163163    my $subjectfile = $self->{'subjectfile'};
    164164    if (defined $subjectfile) {
    165     # read in the subject file
    166     my $list = &cfgread::read_cfg_file ($self->{'subjectfile'}, undef, '^[^#]?\S');
     165    # read in the subject file, but read in unicode mode to preserve special characters
     166    my $list = &cfgread::read_cfg_file_unicode ($self->{'subjectfile'}, undef, '^[^#]?\S');
    167167    # $list is a hash that is indexed by the descriptor. The contents of this
    168168    # hash is a list of two items. The first item is the OID and the second item
Note: See TracChangeset for help on using the changeset viewer.