Ignore:
Timestamp:
2002-07-31T04:10:41+12:00 (22 years ago)
Author:
davidb
Message:

Classifier extented to support frequency sort option through -freqsort

Instead of sorting bookshelf nodes alph-numerically, this option
uses the number of books contained at the node for ordering.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/classify/AZCompactList.pm

    r3302 r3303  
    5858   -removeprefix regex    pattern to remove from metadata before sorting
    5959   -doclevel top|section  (Defaults to top)
     60   -freqsort              Sort by node frequency rather than alpha-numeric
    6061   -mingroup N            Minimum num of documents required to form a new group
    6162   -minnesting N          Minimum list size to become a nested list
     
    7879    my $doclevel = "top";
    7980    my $onlyfirst = 0;
     81    my $freqsort = 0;
    8082    my $recopt   = undef;
    8183
     
    9092             q^doclevel/.*/top^, \$doclevel,
    9193             q^onlyfirst/.*/0^, \$onlyfirst,
     94             q^freqsort/.*/0^, \$freqsort,
    9295             q^recopt/.*/-1^, \$recopt,
    9396
     
    127130    $self->{'onlyfirst'} = $onlyfirst;
    128131 
     132    if ($freqsort != 0) {
     133    $freqsort = 1;
     134    }
     135    $self->{'freqsort'} = $freqsort;
     136
    129137    if ($recopt == -1) {
    130138    $recopt = undef;
     
    475483}
    476484
     485sub frequency_cmp
     486{
     487    my ($self,$a,$b) = @_;
     488
     489
     490    my $title_a = $self->{'reclassifylist'}->{$a};
     491    my $title_b = $self->{'reclassifylist'}->{$b};
     492
     493    print STDERR "*** title a = $title_a\n";
     494    print STDERR "*** title b = $title_b\n";
     495
     496    my $a_freq = 1;
     497    my $b_freq = 1;
     498
     499    if ($a =~ m/^CLASSIFY\.(.*)$/)
     500    {
     501    my $a_node = $1;
     502    my $a_nodeinfo = $self->{'classifiers'}->{$a_node}->{'classifyinfo'};
     503    $a_freq = scalar(@{$a_nodeinfo->{'contains'}});
     504    }
     505   
     506    if ($b =~ m/^CLASSIFY\.(.*)$/)
     507    {
     508    my $b_node = $1;
     509    my $b_nodeinfo = $self->{'classifiers'}->{$b_node}->{'classifyinfo'};
     510    $b_freq = scalar(@{$b_nodeinfo->{'contains'}});
     511    }
     512
     513##    print STDERR "*** comparing $a_freq <-> $b_freq\n";
     514
     515    return $b_freq <=> $a_freq;
     516}
     517
    477518sub get_classify_info {
    478519    my $self = shift (@_);
     
    484525    $self->get_reclassify_info();
    485526
    486 
    487 #    my @reclassified_classlist
    488 #   = sort {$self->{'reclassifylist'}->{$a} cmp $self->{'reclassifylist'}->{$b};} keys %{$self->{'reclassifylist'}};
    489 
    490     # alpha_numeric_cmp is slower but handles numbers better ...
    491     my @reclassified_classlist
    492     = sort { $self->alpha_numeric_cmp($a,$b) } keys %{$self->{'reclassifylist'}};
    493 
     527    my @reclassified_classlist;
     528    if ($self->{'freqsort'})
     529    {
     530##  print STDERR "***** Away to start freq sort\n";
     531
     532    @reclassified_classlist
     533        = sort { $self->frequency_cmp($a,$b) } keys %{$self->{'reclassifylist'}};
     534    # supress sub-grouping by alphabet
     535    map { $self->{'reclassifylist'}->{$_} = "A".$self->{'reclassifylist'}; } keys %{$self->{'reclassifylist'}};
     536
     537    }
     538    else
     539    {
     540#   @reclassified_classlist
     541#       = sort {$self->{'reclassifylist'}->{$a} cmp $self->{'reclassifylist'}->{$b};} keys %{$self->{'reclassifylist'}};
     542
     543    # alpha_numeric_cmp is slower but handles numbers better ...
     544
     545    @reclassified_classlist
     546        = sort { $self->frequency_cmp($a,$b) } keys %{$self->{'reclassifylist'}};
     547
     548    }
    494549
    495550    return $self->splitlist (\@reclassified_classlist);
Note: See TracChangeset for help on using the changeset viewer.