Changeset 2025 for trunk/gsdl


Ignore:
Timestamp:
2001-02-20T15:09:22+13:00 (23 years ago)
Author:
paynter
Message:

You can now have several phind classifiers on one collection. This
required numbering the classifiers (1,2,3...), creating a phind directory
(phind1, phind2...) and filehandles for each, and adding parameter
information to classifier info. The parameter information lets us
differentiate between different classifiers when we construct the Phind web
pages.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/classify/phind.pm

    r2008 r2025  
    155155    # Parse classifier arguments
    156156    my $builddir = "";
    157     my $phinddir = "";
    158157    if (!parsargv::parse(\@_,
    159158             q^text/.*/section:Title,section:text^, \$self->{'indexes'},
     
    185184    }
    186185    $self->{'builddir'} = $builddir;
    187     $self->{'phinddir'} = &util::filename_cat($builddir, "phind");
    188186   
    189187    return bless $self, $class;
     
    201199
    202200    # create phind directory
    203     my $phinddir = $self->{'phinddir'};
    204     if (-e "$phinddir") {
    205     &util::rm_r("$phinddir");
     201    my $phnumber = 1;
     202    my $phinddir = &util::filename_cat($builddir, "phind1");
     203    while (-e "$phinddir") {
     204    $phnumber++;
     205    $phinddir = &util::filename_cat($builddir, "phind$phnumber");
    206206    }
    207207    &util::mk_dir("$phinddir");
     208    $self->{'phinddir'} = $phinddir;
     209    $self->{'phindnumber'} = $phnumber;
    208210
    209211    # open filehandles for documents and text
    210212    my $clausefile =  &util::filename_cat("$phinddir", "clauses");
    211213    &util::rm($clausefile) if (-e $clausefile);
    212     open(TEXT, ">$clausefile") || die "Cannot open $clausefile: $!";
    213     $self->{'txthandle'} = TEXT;
     214
     215    my $txthandle = 'TEXT' . $phnumber;
     216    open($txthandle, ">$clausefile") || die "Cannot open $clausefile: $!";
     217    $self->{'txthandle'} = $txthandle;
    214218
    215219    my $docfile = &util::filename_cat("$phinddir", "docs.txt");
    216220    &util::rm($docfile) if (-e $docfile);
    217     open(DOCS, ">$docfile") || die "Cannot open $docfile: $!";
    218     $self->{'dochandle'} = DOCS;
     221
     222    my $dochandle = 'DOC' . $phnumber;
     223    open($dochandle, ">$docfile") || die "Cannot open $docfile: $!";
     224    $self->{'dochandle'} = $dochandle;
    219225   
    220226}
     
    229235sub classify {
    230236    my $self = shift (@_);
    231     my ($doc_obj) = @_;
     237    my $doc_obj = shift @_;
    232238
    233239    my $verbosity = $self->{'verbosity'};
     
    242248    my $doclanguage = $doc_obj->get_metadata_element ($top_section, "Language");
    243249    my $phrlanguage = $self->{'language_exp'};
    244 
    245     print STDERR "+ CLASSIFY - doclanguage: $doclanguage, phrlanguage $phrlanguage \n";
    246 
    247250    return if ($doclanguage && ($doclanguage !~ /$phrlanguage/i));
    248251   
     
    274277    ($level, $field) = split(/:/, $part);
    275278    die unless ($level && $field);
    276 
     279   
    277280    # Extract the text from every section
    278281    # (In phind, document:text and section:text are equivalent)
     
    301304        $section = $doc_obj->get_top_section();
    302305        while (defined($section)) {
    303         $dataref .= $doc_obj->get_metadata($section, $field);
    304         $data .= join("\n", $$dataref) . "\n";
     306        $dataref = $doc_obj->get_metadata($section, $field);
     307        $data .= join("\n", @$dataref) . "\n";
    305308        $section = $doc_obj->get_next_section($section);
    306309        }
     
    314317   
    315318    }
    316    
     319
    317320    # output the text
    318321    $text =~ tr/\n//s;
     
    338341    if ($verbosity) {
    339342    print $out "\n*** phind.pm generating indexes for ", $self->{'indexes'}, "\n";
     343    print $out "***          in", $self->{'phinddir'}, "\n";
    340344    }
    341345
     
    354358
    355359    # Create the phrase file and put phrase numbers in phind/phrases
    356     print $out "\nSorting and Renumbering phrases for input to mgpp\n" if $verbosity;
     360    print $out "\nSorting and renumbering phrases for input to mgpp\n" if $verbosity;
    357361    &renumber_phrases($self);
    358362   
     
    416420
    417421
    418     # Insert the classifier into.... what?
     422    # Return the information about the classifier that we'll later want to
     423    # use to create macros when the Phind classifier document is displayed.
     424    my %classifyinfo = ('thistype'=>'Invisible',
     425                        'childtype'=>'Phind',
     426                        'Title'=>$self->{'buttonname'},
     427                        'parameters'=>"phindnumber=$self->{'phindnumber'}",
     428            'contains'=>[]);
     429   
    419430    my $collection = $self->{'collection'};
    420431    my $url = "library?a=p&p=phind&c=$collection";
    421 
    422     my %classifyinfo = ('thistype'=>'Invisible',
    423             'childtype'=>'Phind',
    424             'Title'=>$self->{'buttonname'},
    425             'contains'=>[]);
    426    
    427432    push (@{$classifyinfo{'contains'}}, {'OID'=>$url});
     433   
    428434    return \%classifyinfo;
    429435}
     
    435441    my ($language_exp, $text) = @_;
    436442
    437     print STDERR "+ tokenising in $language_exp\n";
    438    
    439443    if ($language_exp =~ /en/) {
    440444    return &convert_gml_to_tokens_EN($text);
Note: See TracChangeset for help on using the changeset viewer.