Changeset 24691


Ignore:
Timestamp:
2011-09-29T16:51:48+13:00 (10 years ago)
Author:
davidb
Message:

Improved multi-lingual support

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/classify/AZCompactList.pm

    r23354 r24691  
    2727package AZCompactList;
    2828
     29use strict;
     30no strict 'refs'; # allow filehandles to be variables and viceversa
     31
    2932use BaseClassifier;
    3033use sorttools;
    3134
    32 use strict;
    33 no strict 'refs'; # allow filehandles to be variables and viceversa
     35use Unicode::Normalize;
    3436
    3537sub BEGIN {
     
    4042    [ { 'name' => "top",
    4143    'desc' => "{AZCompactList.doclevel.top}" },
     44      { 'name' => "firstlevel",
     45    'desc' => "{AZCompactList.doclevel.firstlevel}" },
    4246      { 'name' => "section",
    4347    'desc' => "{AZCompactList.doclevel.section}" } ];
     
    200204    push(@sectionlist,$topsection);
    201205    }
    202     else
     206    elsif ($self->{'doclevel'} =~ /^first(level)?/i)
     207    {
     208    my $toplevel_children = $doc_obj->get_children($topsection);
     209    push(@sectionlist,@$toplevel_children);
     210    }
     211    else # (all)?section(s)?
    203212    {
    204213    my $thissection = $doc_obj->get_next_section($topsection);
     
    389398        else
    390399        {
     400            # first(level)? or (all)?section(s)?
    391401            eval ("\$listclassobj = new SectionList([],\$ptArgs)");
    392402        }
     
    489499        {
    490500        my $section=$1;
    491         if ($self->{'doclevel'} =~ m/^top/i) { # toplevel
     501        if ($self->{'doclevel'} =~ m/^top(level)?/i) { # toplevel
    492502            $self->{'classifiers'}->{$node_name}->{'classifyobj'}
    493503            ->classify($doc_obj,"Section=$section");
    494         } else { # section level
    495             # Thanks to Don Gourley for this...
    496             # classify can't handle multi-level section
     504        } else {
     505            # first(level)? or (all)?section(s)?
     506
     507            # classify() can't handle multi-level section, so use
     508            # classify_section()
     509            # ... thanks to Don Gourley for this...
     510
    497511            $self->{'classifiers'}->{$node_name}->{'classifyobj'}
    498512            ->classify_section($section, $doc_obj, $sortmeta);
     
    683697    my $title = $self->{'reclassifylist'}->{$classification};
    684698    $title =~ s/&(.){2,4};//g; # remove any HTML special chars
    685     $title =~ s/^\W+//g; # remove leading non-word chars
     699    $title =~ s/^(\W|_)+//g; # remove leading non-word chars
    686700
    687701    # only want first character for classification
     
    689703    if (defined($1) && $1 ne "") {
    690704        $title=$1;
     705
     706        # remove any accents on initial character by mapping to Unicode's
     707        # normalized decomposed form (accents follow initial letter)
     708        # and then pick off the initial letter
     709        my $title_decomposed = NFD($title);
     710        $title = substr($title_decomposed,0,1);
    691711    } else {
    692712        print STDERR "no first character found for \"$title\" - \"" .
Note: See TracChangeset for help on using the changeset viewer.