Ignore:
Timestamp:
2005-03-14T09:44:10+13:00 (19 years ago)
Author:
davidb
Message:

Introduction of GISBasPlug for Geographic Informatoin System support.
It was decided to put GISBasPlug in the main code, even though the rest
of what is required is bundled as an extension that must be installed
separately. GISBasPlug (included through BasPlug) only becomes active
if it can see the mapdata directory that is installed as part of the
extension. In becoming active, it adds extra -minus options to the
plugin.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/BasPlug.pm

    r9351 r9398  
    2626package BasPlug;
    2727
     28BEGIN {
     29    die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
     30}
     31
    2832eval {require bytes};
    2933
     
    4852use printusage;
    4953
     54use GISBasPlug;
     55
     56@ISA = ( GISBasPlug );
    5057
    5158my $unicode_list =
     
    106113    'desc' => "{BasPlug.markup_acronyms}",
    107114    'type' => "flag",
    108     'reqd' => "no" }, 
     115    'reqd' => "no" },
    109116      { 'name' => "extract_keyphrases",
    110117    'desc' => "{BasPlug.extract_keyphrases}",
     
    147154    'reqd' => "no" } ];
    148155
     156my $gis_arguments =
     157    [ { 'name' => "extract_placenames",
     158    'desc' => "{GISBasPlug.extract_placenames}",
     159    'type' => "flag",
     160    'reqd' => "no" },
     161      { 'name' => "gazetteer",
     162    'desc' => "{GISBasPlug.gazetteer}",
     163    'type' => "string",
     164    'reqd' => "no" },
     165      { 'name' => "place_list",
     166    'desc' => "{GISBasPlug.place_list}",
     167    'type' => "flag",
     168    'reqd' => "no" } ];
     169
     170
    149171my $options = { 'name'     => "BasPlug",
    150172        'desc'     => "{BasPlug.desc}",
     
    301323    my $self = {};
    302324    $self->{'plugin_type'} = "BasPlug";
     325
     326    if (GISBasPlug::has_mapdata()) {
     327    push(@$arguments,@$gis_arguments);
     328    }
     329
    303330    my $enc = "^(";
    304331    map {$enc .= "$_|";} keys %$encodings::encodings;
     
    328355             q^extract_language^, \$self->{'extract_language'},
    329356             q^extract_acronyms^, \$self->{'extract_acronyms'},
    330              q^extract_keyphrases^, \$self->{'kea'},
    331              q^extract_keyphrase_options/.*/^, \$self->{'kea_options'},
     357             q^extract_keyphrases^, \$self->{'kea'}, #with extra options (UNDOCUMENTED)
     358             q^extract_keyphrase_options/.*/^, \$self->{'kea_options'}, #no extra options (UNDOCUMENTED)
    332359             qq^input_encoding/$enc/auto^, \$self->{'input_encoding'},
    333360             qq^default_encoding/$denc/utf8^, \$self->{'default_encoding'},
    334361             q^extract_email^, \$self->{'extract_email'},
     362             q^extract_placenames^, \$self->{'extract_placenames'},
     363             q^gazetteer/.*/^, \$self->{'gazetteer'},
     364             q^place_list^, \$self->{'place_list'},
    335365             q^markup_acronyms^, \$self->{'markup_acronyms'},
    336366             q^default_language/.{2}/en^, \$self->{'default_language'},
     
    367397
    368398    $self->{'cover_image'} = 0 if ($no_cover_image);
    369    
     399
     400    if ($self->{'extract_placenames'}) {
     401
     402    my $outhandle = $self->{'outhandle'};
     403
     404    my $places_ref
     405        = GISBasPlug::loadGISDatabase($outhandle,$self->{'gazetteer'});
     406
     407    if (!defined $places_ref) {
     408        print $outhandle "Warning: Error loading mapdata gazetteer \"$self->{'gazetteer'}\"\n";
     409        print $outhandle "         No placename extraction will take place.\n";
     410        $self->{'extract_placenames'} = undef;
     411    }
     412    else {
     413        $self->{'places'} = $places_ref;
     414    }
     415    }   
    370416    return bless $self, $class;
    371417}
     
    626672    my $smart_block = $self->{'smart_block'};
    627673    my $smart_block_BN = $self->{'smart_block_BN'};
    628    
     674
    629675    my $filename = $file;
    630676    $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
     
    663709
    664710    my ($filemeta) = $file =~ /([^\\\/]+)$/;
    665 
    666711    # how do we know what encoding the filename is in?
    667712    $doc_obj->add_metadata($doc_obj->get_top_section(), "Source", &ghtml::dmsafe($filemeta));
     
    690735
    691736    $self->extra_metadata ($doc_obj, $doc_obj->get_top_section(), $metadata);
    692    
     737
    693738    # do plugin specific processing of doc_obj
    694739    unless (defined ($self->process (\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli))) {
     
    699744    $text='';
    700745    undef $text;
    701 
     746   
    702747    # do any automatic metadata extraction
    703748    $self->auto_extract_metadata ($doc_obj);
     
    716761    $processor->process($doc_obj);
    717762
     763    if(defined($self->{'places_filename'})){
     764    &util::rm($self->{'places_filename'});
     765    $self->{'places_filename'} = undef;
     766    }
     767
    718768    $self->{'num_processed'} ++;
    719769    undef $doc_obj;
     
    784834    return $file_derived_title;
    785835}
     836
    786837
    787838sub title_fallback
     
    9521003    foreach my $field (keys(%$metadata)) {
    9531004    # $metadata->{$field} may be an array reference
    954 
    9551005    if ($field eq "gsdlassocfile_tobe") {
    9561006        # 'gsdlassocfile_tobe' is artificially introduced metadata
     
    10631113    }
    10641114    }
    1065 
     1115    if ($self->{'extract_placenames'}) {
     1116    my $thissection = $doc_obj->get_top_section();
     1117    while (defined $thissection) {
     1118        my $text = $doc_obj->get_text($thissection);
     1119        $self->extract_placenames (\$text, $doc_obj, $thissection) if $text =~ /./;
     1120        $thissection = $doc_obj->get_next_section ($thissection);
     1121    }
     1122    }
    10661123
    10671124    # adding kea keyphrases
    1068 
    1069     if ($self->{'kea'}) { 
     1125    if ($self->{'kea'}) {
    10701126   
    10711127    my $thissection = $doc_obj->get_top_section();
     
    10791135        $thissection = $doc_obj->get_next_section ($thissection);
    10801136    }
    1081        
    1082 
    1083     if($self->{'kea_options'}) {
     1137       
     1138    if ($self->{'kea_options'}) {
    10841139        #if kea options flag is set, call Kea with specified options
    10851140        $list = &Kea::extract_KeyPhrases ($text, $self->{'kea_options'});
     
    10881143        $list = &Kea::extract_KeyPhrases ($text);
    10891144    }
    1090     if($list){
     1145    if ($list){
    10911146        # if a list of kea keyphrases was returned (ie not empty)
    10921147        &gsprintf(STDERR, "{BasPlug.keyphrases}: $list\n");
     
    11041159        }
    11051160    }
    1106     }
    1107  
    1108     #end of kea
     1161    } #end of kea
    11091162
    11101163    if ($self->{'first'}) {
Note: See TracChangeset for help on using the changeset viewer.