source: trunk/gsdl/perllib/plugins/GISBasPlug.pm@ 10254

Last change on this file since 10254 was 10254, checked in by kjdon, 19 years ago

added 'use strict' to all plugins, and made modifications (mostly adding 'my') to make them compile

  • Property svn:keywords set to Author Date Id Revision
File size: 9.7 KB
Line 
1###########################################################################
2#
3# GISBasPlug.pm -- base class to enhance plugins with GIS capabilities
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package GISBasPlug;
27
28use util;
29use locale;
30
31use strict;
32no strict 'refs'; # allow filehandles to be variables and viceversa
33
34#field categories in DataBase files
35#$LAT = 3;
36#$LONG = 4;
37my $FC = 9;
38my $DSG = 10;
39#$CC1 = 12;
40my $FULL_NAME = 22;
41
42BEGIN {
43 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
44}
45
46
47use BasPlug; # uses BasPlug, but is not inherited
48
49
50my $options = { 'name' => "GISBasPlug",
51 'desc' => "{GISBasPlug.desc}",
52 'abstract' => "yes",
53 'inherits' => "no" };
54
55
56sub new {
57 my $class = shift (@_);
58 my $plugin_name = shift (@_);
59
60 my $self = {};
61 $self->{'plugin_type'} = "GISBasPlug";
62
63 $self->{'option_list'} = [ $options ];
64
65 return bless $self, $class;
66}
67
68sub init {
69}
70
71sub print_xml_usage
72{
73 BasPlug::print_xml_usage(@_);
74}
75
76sub print_xml
77{
78 BasPlug::print_xml(@_);
79}
80
81sub print_txt_usage
82{
83 BasPlug::print_txt_usage(@_);
84}
85
86sub has_mapdata
87{
88 my $db_dir = &util::filename_cat($ENV{'GSDLHOME'}, "lamp", "data");
89 return ( -d $db_dir );
90}
91
92
93#returns a hash table of names from database files (specified in collect.cfg).
94sub loadGISDatabase {
95 my ($outhandle,$datasets) = @_;
96 my @dbase = map{$_ = $_ . ".txt";} split(/,/, $datasets);
97 if(scalar(@dbase)==0) { #default is to include all databases
98 @dbase=("UK.txt", "NZ.txt", "NF.txt", "CA.txt", "AS.txt", "GM.txt", "BE.txt", "IN.txt", "JA.txt", "USA.txt");
99 }
100 my $counter=0;
101 my %places = ();
102 my @cats = ();
103 while($counter <= $#dbase){ #loop through all the databases
104 my $folder = $dbase[$counter];
105 $folder =~ s/(.*?)\.txt/$1/;
106#### my $dbName = &util::filename_cat($ENV{'GSDLHOME'}, "etc", "mapdata", "data", $folder, $dbase[$counter]);
107 my $dbName = &util::filename_cat($ENV{'GSDLHOME'}, "lamp", "data", $folder, $dbase[$counter]);
108 if (!open(FILEIN, "<$dbName")) {
109 print $outhandle "Unable to open database $dbName: $!\n";
110 return undef;
111 }
112
113 my $line = <FILEIN>; #database category details.
114 my @catdetails = split("\t", $line);
115
116 while ( defined($line = <FILEIN>)){ #stores all place names in file as keys in hash array %places
117 @cats = split("\t", $line);
118 if( #eliminating "bad" place names without missing real ones
119 ($cats[$FC] eq "A" && !($cats[$DSG] =~ /ADMD|ADM2|PRSH/))
120 ||($cats[$FC] eq "P" && ($cats[$DSG] =~ /PPLA|PPLC|PPLS/))
121 ||($cats[$FC] eq "H" && ($cats[$DSG] =~ /BAY|LK/))
122 ||($cats[$FC] eq "L" && !($cats[$DSG] =~ /LCTY|RGN/))
123 ||($cats[$FC] eq "T" && ($cats[$DSG] =~ /CAPE|ISL|GRGE/))
124 ||($dbase[$counter] eq "USA.txt" && ($cats[$DSG] =~ /ppl|island/))
125 ){$places{$cats[$FULL_NAME]} = [@cats];}
126 @cats = ();
127 }
128 close(FILEIN);
129 $counter++;
130 }
131 return \%places;
132}
133
134#returns a unique hash array of all the places found in a document, along with coordinates, description and country data
135sub getPlaces {
136 my $self = shift @_;
137
138 my ($textref, $places) = @_;
139 my %tempPlaces = ();
140
141 foreach my $plc (%$places){ #search for an occurrence of each place in the text
142 if($$textref =~ m/(\W)$plc(\W)/){
143 $tempPlaces{$plc} = $places->{$plc};
144 }
145 }
146 #make sure each place is only there once
147 my %uniquePlaces = ();
148 foreach my $p (keys %tempPlaces) {
149 if(!defined($uniquePlaces{$p})){
150 $uniquePlaces{$p} = $tempPlaces{$p};
151 }
152 }
153 return \%uniquePlaces;
154}
155
156
157
158
159#returns a lowercase version of the place, with no spaces
160sub placename_to_anchorname {
161 my ($placename) = @_;
162 my $p_tag = lc($placename);
163 $p_tag =~ s/\s+//g;
164 return $p_tag;
165}
166
167#takes a place from the text and wraps an anchor tag, a hyperlink tag and an image tag around it
168sub anchor_wrapper {
169 my ($p, $p_counter_ref, $path) = @_;
170 my $image = "/gsdlgis/gisimages/nextplace.gif";
171 my $endTag = "</a>";
172 my $hrefTag = "<a href=";
173 $$p_counter_ref++;
174 my $next = $$p_counter_ref + 1;
175 my $p_tag = placename_to_anchorname($p);
176 my $place_anchor = "<a name=\"" . $p_tag . $$p_counter_ref . "\">" . $endTag;
177 my $popup_anchor = $hrefTag . "'" . $path . "'>" . $p . $endTag;
178 my $image_anchor = $hrefTag . "#" . $p_tag . $next . "><img src=\"" . $image . "\" name=\"img" . $p_tag . $$p_counter_ref . "\" border=\"0\">" . $endTag;
179 return $place_anchor . $popup_anchor . $image_anchor;
180}
181
182#takes dangerous place names and checks if they are part of another placename or not.
183sub place_name_check {
184 my ($pre, $preSpace, $p, $postSpace, $post, $p_counter_ref, $path, $y) = @_;
185 if($pre =~ /$y/ || $post =~ /$y/) {return $pre . $preSpace . $p . $postSpace . $post;}
186 $pre = $pre . $preSpace;
187 $post = $postSpace . $post;
188 return $pre . &anchor_wrapper("", $p, "", $p_counter_ref, $path) . $post;
189}
190
191sub extract_placenames {
192 my $self = shift (@_);
193 my ($textref, $doc_obj, $thissection) = @_;
194 my $outhandle = $self->{'outhandle'};
195
196 my $GSDLHOME = $ENV{'GSDLHOME'};
197 #field categories in DataBase file for extract_placenames.
198 my $LAT = 3;
199 my $LONG = 4;
200 my $CC1 = 12;
201
202
203 &gsprintf($outhandle, " {BasPlug.extracting_placenames}...\n")
204 if ($self->{'verbosity'} > 2);
205
206 #get all the places found in the document
207 my $uniquePlaces = $self->getPlaces($textref, $self->{'places'});
208
209 #finds 'dangerous' placenames (eg York and New York). Dangerous because program will find "York" within "New York"
210 my %danger = ();
211 foreach my $x (keys %$uniquePlaces){
212 foreach my $y (keys %$uniquePlaces){
213 if(($y =~ m/ /) && ($y =~ m/$x/) && ($y ne $x)){
214 $y =~ s/($x\s)|(\s$x)//;
215 $danger{$x} = $y;
216 }
217 }
218 }
219
220 #creates a list of clickable placenames at top of page, linked to first occurrence of name, and reads them into a file
221 my $tempfname = $doc_obj;
222 $tempfname =~ s/.*\(0x(.*)\)/$1/;
223 my $names = "";
224 my $filename = "tmpfile" . $tempfname;
225 my $tempfile = &util::filename_cat($GSDLHOME, "tmp", $filename);
226 open(FOUT, ">$tempfile") || die "Unable to create a temp file: $!";
227 foreach my $name (sort (keys %$uniquePlaces)){
228 if(!defined($danger{$name})){
229 my $name_tag = placename_to_anchorname($name);
230 print FOUT "$name\t" . $uniquePlaces->{$name}->[$LONG] . "\t" . $uniquePlaces->{$name}->[$LAT] . "\n";
231 if($self->{'place_list'}) {$names = $names . "<a href=\"#" . $name_tag . "1\">" . $name . "</a>" . "\n";}
232 }
233 }
234 close(FOUT);
235 $doc_obj->associate_file($tempfile, "places.txt", "text/plain");
236 $self->{'places_filename'} = $tempfile;
237 my %countries = ();
238
239 foreach my $p (keys %$uniquePlaces){
240 my $place = $p;
241 $place =~ s/\s+|\n+|\r+|\t+/(\\s+)/g;
242 my $cap_place = uc($place);
243 my $long = $uniquePlaces->{$p}->[$LONG];
244 my $lat = $uniquePlaces->{$p}->[$LAT];
245 my $country = $uniquePlaces->{$p}->[$CC1];
246 my $path = "javascript:popUp(\"$long\",\"$lat\",\"$p\",\"$country\")";
247 my $p_counter = 0;
248
249 if(!defined($danger{$p})){
250 #adds html tags to each place name
251 $$textref =~ s/\b($place|$cap_place)\b/&anchor_wrapper($1,\$p_counter,$path)/sge;
252 }
253 #else {
254 #$y = $danger{$p};
255 #$$textref =~ s/(\w+)(\s+?)($place|$cap_place)(\s+?)(\w+)/&place_name_check($1,$2,$3,$4,$5,\$p_counter,$path, $y)/sge;
256 #}
257
258 #edits the last place's image, and removes image if place only occurres once.
259 my $p_tag = placename_to_anchorname($p);
260 $p_counter++;
261 $$textref =~ s/#$p_tag$p_counter(><img src="\/gsdl\/images\/)nextplace.gif/#${p_tag}1$1firstplace.gif/;
262 $$textref =~ s/<img src="\/gsdl\/images\/firstplace.gif" name="img$p_tag(1)" border="0">//;
263
264 #this line removes apostrophes from placenames (they break the javascript function)
265 $$textref =~ s/(javascript:popUp.*?)(\w)'(\w)/$1$2$3/g;
266
267 #for displaying map of document, count num of places from each country
268 if(defined($countries{$country})){$countries{$country}++;}
269 else{$countries{$country} = 1;}
270
271 #adds placename to metadata
272 $doc_obj->add_utf8_metadata ($thissection, "Placename", $p);
273 &gsprintf($outhandle, " {BasPlug.extracting} $p\n")
274 if ($self->{'verbosity'} > 3);
275 }
276 #finding the country that most places are from, in order to display map of the document
277 my $max = 0;
278 my $CNTRY = "";
279 foreach my $c_key (keys %countries){
280 if($countries{$c_key} > $max){
281 $max = $countries{$c_key};
282 $CNTRY = $c_key;
283 }
284 }
285 #allows user to view map with all places from the document on it
286#### my $places_filename = &util::filename_cat($GSDLHOME, "collect", "_cgiargc_", "index", "assoc", "_thisOID_", "places.txt");
287 my $places_filename = &util::filename_cat("collect", "_cgiargc_", "index", "assoc", "_thisOID_", "places.txt");
288 my $docmap = "<a href='javascript:popUp(\"$CNTRY\",\"$places_filename\")'>View map for this document<\/a><br><br>\n";
289 $$textref = $docmap . $names . "<br>" . $$textref;
290
291 $doc_obj->delete_text($thissection);
292 $doc_obj->add_utf8_text($thissection, $$textref);
293 &gsprintf($outhandle, " {BasPlug.done_places_extract}\n")
294 if ($self->{'verbosity'} > 2);
295}
Note: See TracBrowser for help on using the repository browser.