Ignore:
Timestamp:
1999-06-30T15:35:07+12:00 (25 years ago)
Author:
sjboddie
Message:
  • removed old infodb stuff
  • changed the way classifiers work
  • added maxdocs and allclassifications options
  • no longer get doctype from collect.cfg but instead set it directly in plugins that don't use the default
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/classify.pm

    r232 r315  
    66
    77
    8 $next_classify_num = "0";
     8$next_classify_num = 1;
    99
    1010# load_classifier will load one classifier. $classinfo is an
     
    3939
    4040# init_classifiers resets all the classifiers and readys them to process
    41 # the documents. They are each given a unique classification OID
     41# the documents.
    4242sub init_classifiers {
    4343    my ($classifiers) = @_;
     
    4545    foreach $classobj (@$classifiers) {
    4646    $classobj->init();
    47     $classobj->set_OID ("CL$next_classify_num");
    48     $next_classify_num++;
    4947    }
    5048}
     
    6260# to the gdbm
    6361sub output_classify_info {
    64     my ($classifiers, $handle) = @_;
     62    my ($classifiers, $handle, $allclassifications) = @_;
    6563#    $handle = "main::STDOUT";
    6664
    67     my $contains_str = "";
    68     my $first = 1;
    69    
    70     # output each of the classifications
     65    # create a classification containing all the info
     66    my $classifyinfo = {'classifyOID'=>'browse',
     67            'contains'=>[]};
     68
     69    # get each of the classifications
    7170    foreach $classobj (@$classifiers) {
    72     $classobj->output_classify_info($handle);
    73     $contains_str .= ";" unless $first;
    74     $first = 0;
    75     $contains_str .= $classobj->get_OID();
     71    my $tempinfo = $classobj->get_classify_info();
     72    $tempinfo->{'classifyOID'} = "CL$next_classify_num";
     73    $next_classify_num++;
     74    push (@{$classifyinfo->{'contains'}}, $tempinfo);
    7675    }
    7776
    78     # output information about each of the classifications
    79     print $handle "[browse]\n";
    80     print $handle "<doctype>classify\n";
    81     print $handle "<hastxt>0\n";
    82     print $handle "<contains>$contains_str\n"
    83     unless $contains_str eq "";
    84     print $handle '-' x 70, "\n";
     77    &print_classify_info ($handle, $classifyinfo, "", $allclassifications);
    8578}
    8679
     80sub print_classify_info {
     81    my ($handle, $classifyinfo, $OID, $allclassifications) = @_;
     82    $OID =~ s/^\.+//; # just for good luck
     83
     84    # book information is printed elsewhere
     85    return if (defined ($classifyinfo->{'OID'}));
     86 
     87    # don't want empty classifications
     88    if ($allclassifications || &clean_contents ($classifyinfo)) {
     89   
     90    $OID = $classifyinfo->{'classifyOID'} if defined ($classifyinfo->{'classifyOID'});
     91   
     92    my $outputtext = "[$OID]\n";
     93    $outputtext .= "<doctype>classify\n";
     94    $outputtext .= "<hastxt>0\n";
     95    $outputtext .= "<classifytype>$classifyinfo->{'classifytype'}\n" if defined $classifyinfo->{'classifytype'};
     96    $outputtext .= "<Title>$classifyinfo->{'Title'}\n" if defined $classifyinfo->{'Title'};
     97   
     98    $outputtext .= "<contains>";
     99   
     100    my $next_subOID = 1;
     101    my $first = 1;
     102    foreach $tempinfo (@{$classifyinfo->{'contains'}}) {
     103        # empty contents were made undefined by clean_contents()
     104        next unless defined $tempinfo;
     105       
     106        $outputtext .= ";" unless $first;
     107        $first = 0;
     108       
     109        if (defined ($tempinfo->{'classifyOID'})) {
     110        $outputtext .= $tempinfo->{'classifyOID'};
     111        &print_classify_info ($handle, $tempinfo, $tempinfo->{'classifyOID'},
     112                      $allclassifications);
     113        } elsif (defined ($tempinfo->{'OID'})) {
     114        $outputtext .= $tempinfo->{'OID'};
     115        # note: we don't want to print the contents of the books
     116        } else {
     117        $outputtext .= "\".$next_subOID";
     118        &print_classify_info ($handle, $tempinfo, "$OID.$next_subOID",
     119                      $allclassifications);
     120        $next_subOID++;
     121        }
     122    }
     123    $outputtext .= "\n";
     124    $outputtext .= '-' x 70 . "\n";
     125   
     126    print $handle $outputtext;
     127    }
     128}
     129
     130sub clean_contents {
     131    my ($classifyinfo) = @_;
     132    my $has_content = 0;
     133
     134    foreach $content (@{$classifyinfo->{'contains'}}) {
     135    if (defined $content->{'OID'}) {
     136        # found a book
     137        $has_content = 1;
     138    } elsif (&clean_contents ($content)) {
     139        # there's a book somewhere below
     140        $has_content = 1;
     141    } else {
     142        # section contains no books so we want to remove
     143        # it from its parents contents
     144        $content = undef;
     145    }
     146    }
     147    return $has_content;
     148}
    87149
    881501;
Note: See TracChangeset for help on using the changeset viewer.