Changeset 24691

Show
Ignore:
Timestamp:
29.09.2011 16:51:48 (8 years ago)
Author:
davidb
Message:

Improved multi-lingual support

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/classify/AZCompactList.pm

    r23354 r24691  
    2727package AZCompactList; 
    2828 
     29use strict; 
     30no strict 'refs'; # allow filehandles to be variables and viceversa 
     31 
    2932use BaseClassifier; 
    3033use sorttools; 
    3134 
    32 use strict; 
    33 no strict 'refs'; # allow filehandles to be variables and viceversa 
     35use Unicode::Normalize; 
    3436 
    3537sub BEGIN { 
     
    4042    [ { 'name' => "top", 
    4143    'desc' => "{AZCompactList.doclevel.top}" }, 
     44      { 'name' => "firstlevel", 
     45    'desc' => "{AZCompactList.doclevel.firstlevel}" }, 
    4246      { 'name' => "section", 
    4347    'desc' => "{AZCompactList.doclevel.section}" } ]; 
     
    200204    push(@sectionlist,$topsection); 
    201205    } 
    202     else 
     206    elsif ($self->{'doclevel'} =~ /^first(level)?/i) 
     207    { 
     208    my $toplevel_children = $doc_obj->get_children($topsection); 
     209    push(@sectionlist,@$toplevel_children); 
     210    } 
     211    else # (all)?section(s)? 
    203212    { 
    204213    my $thissection = $doc_obj->get_next_section($topsection); 
     
    389398        else 
    390399        { 
     400            # first(level)? or (all)?section(s)? 
    391401            eval ("\$listclassobj = new SectionList([],\$ptArgs)"); 
    392402        } 
     
    489499        { 
    490500        my $section=$1; 
    491         if ($self->{'doclevel'} =~ m/^top/i) { # toplevel 
     501        if ($self->{'doclevel'} =~ m/^top(level)?/i) { # toplevel 
    492502            $self->{'classifiers'}->{$node_name}->{'classifyobj'} 
    493503            ->classify($doc_obj,"Section=$section"); 
    494         } else { # section level 
    495             # Thanks to Don Gourley for this... 
    496             # classify can't handle multi-level section 
     504        } else {  
     505            # first(level)? or (all)?section(s)?  
     506 
     507            # classify() can't handle multi-level section, so use 
     508            # classify_section() 
     509            # ... thanks to Don Gourley for this... 
     510 
    497511            $self->{'classifiers'}->{$node_name}->{'classifyobj'} 
    498512            ->classify_section($section, $doc_obj, $sortmeta); 
     
    683697    my $title = $self->{'reclassifylist'}->{$classification}; 
    684698    $title =~ s/&(.){2,4};//g; # remove any HTML special chars 
    685     $title =~ s/^\W+//g; # remove leading non-word chars 
     699    $title =~ s/^(\W|_)+//g; # remove leading non-word chars 
    686700 
    687701    # only want first character for classification 
     
    689703    if (defined($1) && $1 ne "") { 
    690704        $title=$1; 
     705 
     706        # remove any accents on initial character by mapping to Unicode's 
     707        # normalized decomposed form (accents follow initial letter) 
     708        # and then pick off the initial letter  
     709        my $title_decomposed = NFD($title);  
     710        $title = substr($title_decomposed,0,1); 
    691711    } else { 
    692712        print STDERR "no first character found for \"$title\" - \"" .