Changeset 2845


Ignore:
Timestamp:
2001-11-23T12:38:00+13:00 (22 years ago)
Author:
sjboddie
Message:

Caught SplitPlug up with recent changes

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/SplitPlug.pm

    r2735 r2845  
    117117    my $plugin_name = ref ($self);
    118118    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
    119    
    120     my ($language, $encoding);
    121     if ($self->{'input_encoding'} eq "auto") {
    122     # use textcat to automatically work out the input encoding and language
    123     ($language, $encoding) = $self->get_language_encoding ($filename);
    124    
    125     } elsif ($self->{'extract_language'}) {
    126     # use textcat to get language metadata
    127     ($language, $extracted_encoding) = $self->get_language_encoding ($filename);
    128     $encoding = $self->{'input_encoding'};
    129    
    130     if ($extracted_encoding ne $encoding && $self->{'verbosity'}) {
    131         print $outhandle "$plugin_name: WARNING: $file was read using $encoding encoding but ";
    132         print $outhandle "appears to be encoded as $extracted_encoding.\n";
    133     }
    134    
    135     } else {
    136     $language = $self->{'default_language'};
    137     $encoding = $self->{'input_encoding'};
    138     }
     119
     120    # Do encoding stuff
     121    my ($language, $encoding) = $self->textcat_get_language_encoding ($filename);
    139122
    140123    # Read in file ($text will be in utf8)
     
    146129    print $outhandle "$plugin_name: ERROR: $file contains no text\n"
    147130        if $self->{'verbosity'};
     131
     132    my $failhandle = $self->{'failhandle'};
     133    print $failhandle "$file: " . ref($self) . ": file contains no text\n";
     134    $self->{'num_not_processed'} ++;
     135
    148136    return 0;
    149137    }
     
    162150    foreach $segtext (@segments) {
    163151    $segment++;
    164    
     152
    165153    # create a new document
    166154    my $doc_obj = new doc ($filename, "indexed_doc");
     
    168156    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Language", $language);
    169157    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Encoding", $encoding);
     158    my ($filemeta) = $file =~ /([^\\\/]+)$/;
     159    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Source", &ghtml::dmsafe($filemeta));
     160    if ($self->{'cover_image'}) {
     161      $self->associate_cover_image($doc_obj, $filename);
     162    }
    170163
    171164    # Calculate a "base" document ID.
     
    195188    $self->set_OID($doc_obj, $id, $segment);
    196189
    197    
    198190    # process the document
    199191    $processor->process($doc_obj);
     192
     193    $self->{'num_processed'} ++;
    200194    }
    201195
Note: See TracChangeset for help on using the changeset viewer.