Changeset 24586

Show
Ignore:
Timestamp:
13.09.2011 20:54:35 (8 years ago)
Author:
ak19
Message:

Dr Bainbridge fixed the unicode bug that Diego had found when using an hfile to specify the Hierarchy of a classifier. The hfile was so far being read as a regular cfg file read, but the read operation needed to read in UTF8 mode to preserve the encodings of characters in the file.

Location:
main/trunk/greenstone2/perllib
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/cfgread.pm

    r20390 r24586  
    137137    } else { 
    138138    print STDERR "cfgread::read_cfg_file couldn't read the cfg file $filename\n"; 
     139    } 
     140 
     141    return $data; 
     142} 
     143 
     144# If the cfg file contains unicode characters, use this method to read from it 
     145# Used by HFileHierarchy classifier, since an HFile is read as a cfg file, but  
     146# can contain unicode characters. 
     147sub read_cfg_file_unicode { 
     148    my ($filename, $stringexp, $arrayexp, $hashexp, $arrayarrayexp, 
     149    $hashhashexp) = @_; 
     150    my ($line); 
     151    my $data = {}; 
     152 
     153    if (open (COLCFG, $filename)) { 
     154    binmode(COLCFG,":utf8"); 
     155    while (defined ($line = &read_cfg_line('COLCFG'))) { 
     156        if (scalar(@$line) >= 2) { 
     157 
     158        #map { decode("utf8",$_) } @$line; #use Encode; 
     159 
     160        my $key = shift (@$line); 
     161        if (defined $stringexp && $key =~ /$stringexp/) { 
     162            $data->{$key} = shift (@$line); 
     163 
     164        } elsif (defined $arrayexp && $key =~ /$arrayexp/) { 
     165            push (@{$data->{$key}}, @$line); 
     166 
     167        } elsif (defined $hashexp && $key =~ /$hashexp/) { 
     168            my $k = shift @$line; 
     169            my $v = shift @$line; 
     170            $data->{$key}->{$k} = $v; 
     171        } elsif (defined $arrayarrayexp && $key =~ /$arrayarrayexp/) { 
     172            if (!defined $data->{$key}) { 
     173            $data->{$key} = []; 
     174            } 
     175            push (@{$data->{$key}}, $line); 
     176        } 
     177        elsif (defined $hashhashexp && $key =~ /$hashhashexp/) { 
     178            my $k = shift @$line; 
     179            my $p = shift @$line; 
     180            my $v = shift @$line; 
     181            if (!defined $v) { 
     182            $v = $p; 
     183            $p = 'default'; 
     184            } 
     185            $data->{$key}->{$k}->{$p} = $v; 
     186        } 
     187        } 
     188    } 
     189    close (COLCFG); 
     190 
     191    } else { 
     192    print STDERR "cfgread::read_cfg_file_unicode couldn't read the cfg file $filename\n"; 
    139193    } 
    140194 
  • main/trunk/greenstone2/perllib/classify/HFileHierarchy.pm

    r23116 r24586  
    163163    my $subjectfile = $self->{'subjectfile'}; 
    164164    if (defined $subjectfile) { 
    165     # read in the subject file 
    166     my $list = &cfgread::read_cfg_file ($self->{'subjectfile'}, undef, '^[^#]?\S'); 
     165    # read in the subject file, but read in unicode mode to preserve special characters 
     166    my $list = &cfgread::read_cfg_file_unicode ($self->{'subjectfile'}, undef, '^[^#]?\S'); 
    167167    # $list is a hash that is indexed by the descriptor. The contents of this 
    168168    # hash is a list of two items. The first item is the OID and the second item