source: trunk/gsdl/perllib/plugins/GISBasPlug.pm@ 9961

Last change on this file since 9961 was 9398, checked in by davidb, 19 years ago

Introduction of GISBasPlug for Geographic Informatoin System support.
It was decided to put GISBasPlug in the main code, even though the rest
of what is required is bundled as an extension that must be installed
separately. GISBasPlug (included through BasPlug) only becomes active
if it can see the mapdata directory that is installed as part of the
extension. In becoming active, it adds extra -minus options to the
plugin.

  • Property svn:keywords set to Author Date Id Revision
File size: 9.4 KB
Line 
1###########################################################################
2#
3# GISBasPlug.pm -- base class to enhance plugins with GIS capabilities
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package GISBasPlug;
27
28use util;
29use locale;
30
31#field categories in DataBase files
32#$LAT = 3;
33#$LONG = 4;
34my $FC = 9;
35my $DSG = 10;
36#$CC1 = 12;
37$FULL_NAME = 22;
38
39BEGIN {
40 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
41}
42
43
44use BasPlug; # uses BasPlug, but is not inherited
45
46
47my $options = { 'name' => "GISBasPlug",
48 'desc' => "{GISBasPlug.desc}",
49 'abstract' => "yes",
50 'inherits' => "no" };
51
52
53sub new {
54 my $class = shift (@_);
55 my $plugin_name = shift (@_);
56
57 my $self = {};
58 $self->{'plugin_type'} = "GISBasPlug";
59
60 $self->{'option_list'} = [ $options ];
61
62 return bless $self, $class;
63}
64
65sub init {
66}
67
68sub print_xml_usage
69{
70 BasPlug::print_xml_usage(@_);
71}
72
73sub print_xml
74{
75 BasPlug::print_xml(@_);
76}
77
78
79sub has_mapdata
80{
81 my $db_dir = &util::filename_cat($ENV{'GSDLHOME'}, "lamp", "data");
82 return ( -d $db_dir );
83}
84
85
86#returns a hash table of names from database files (specified in collect.cfg).
87sub loadGISDatabase {
88 my ($outhandle,$datasets) = @_;
89 my @dbase = map{$_ = $_ . ".txt";} split(/,/, $datasets);
90 if(scalar(@dbase)==0) { #default is to include all databases
91 @dbase=("UK.txt", "NZ.txt", "NF.txt", "CA.txt", "AS.txt", "GM.txt", "BE.txt", "IN.txt", "JA.txt", "USA.txt");
92 }
93 my $counter=0;
94 my %places = ();
95 my @cats = ();
96 while($counter <= $#dbase){ #loop through all the databases
97 my $folder = $dbase[$counter];
98 $folder =~ s/(.*?)\.txt/$1/;
99#### my $dbName = &util::filename_cat($ENV{'GSDLHOME'}, "etc", "mapdata", "data", $folder, $dbase[$counter]);
100 my $dbName = &util::filename_cat($ENV{'GSDLHOME'}, "lamp", "data", $folder, $dbase[$counter]);
101 if (!open(FILEIN, "<$dbName")) {
102 print $outhandle "Unable to open database $dbName: $!\n";
103 return undef;
104 }
105
106 my $line = <FILEIN>; #database category details.
107 my @catdetails = split("\t", $line);
108
109 while ( defined($line = <FILEIN>)){ #stores all place names in file as keys in hash array %places
110 @cats = split("\t", $line);
111 if( #eliminating "bad" place names without missing real ones
112 ($cats[$FC] eq "A" && !($cats[$DSG] =~ /ADMD|ADM2|PRSH/))
113 ||($cats[$FC] eq "P" && ($cats[$DSG] =~ /PPLA|PPLC|PPLS/))
114 ||($cats[$FC] eq "H" && ($cats[$DSG] =~ /BAY|LK/))
115 ||($cats[$FC] eq "L" && !($cats[$DSG] =~ /LCTY|RGN/))
116 ||($cats[$FC] eq "T" && ($cats[$DSG] =~ /CAPE|ISL|GRGE/))
117 ||($dbase[$counter] eq "USA.txt" && ($cats[$DSG] =~ /ppl|island/))
118 ){$places{$cats[$FULL_NAME]} = [@cats];}
119 @cats = ();
120 }
121 close(FILEIN);
122 $counter++;
123 }
124 return \%places;
125}
126
127#returns a unique hash array of all the places found in a document, along with coordinates, description and country data
128sub getPlaces {
129 my $self = shift @_;
130
131 my ($textref, $places) = @_;
132 my %tempPlaces = ();
133
134 foreach my $plc (%$places){ #search for an occurrence of each place in the text
135 if($$textref =~ m/(\W)$plc(\W)/){
136 $tempPlaces{$plc} = $places->{$plc};
137 }
138 }
139 #make sure each place is only there once
140 my %uniquePlaces = ();
141 foreach my $p (keys %tempPlaces) {
142 if(!defined($uniquePlaces{$p})){
143 $uniquePlaces{$p} = $tempPlaces{$p};
144 }
145 }
146 return \%uniquePlaces;
147}
148
149
150
151
152#returns a lowercase version of the place, with no spaces
153sub placename_to_anchorname {
154 my ($placename) = @_;
155 $p_tag = lc($placename);
156 $p_tag =~ s/\s+//g;
157 return $p_tag;
158}
159
160#takes a place from the text and wraps an anchor tag, a hyperlink tag and an image tag around it
161sub anchor_wrapper {
162 my ($p, $p_counter_ref, $path) = @_;
163 my $image = "/gsdlgis/gisimages/nextplace.gif";
164 my $endTag = "</a>";
165 my $hrefTag = "<a href=";
166 $$p_counter_ref++;
167 my $next = $$p_counter_ref + 1;
168 my $p_tag = placename_to_anchorname($p);
169 my $place_anchor = "<a name=\"" . $p_tag . $$p_counter_ref . "\">" . $endTag;
170 my $popup_anchor = $hrefTag . "'" . $path . "'>" . $p . $endTag;
171 my $image_anchor = $hrefTag . "#" . $p_tag . $next . "><img src=\"" . $image . "\" name=\"img" . $p_tag . $$p_counter_ref . "\" border=\"0\">" . $endTag;
172 return $place_anchor . $popup_anchor . $image_anchor;
173}
174
175#takes dangerous place names and checks if they are part of another placename or not.
176sub place_name_check {
177 my ($pre, $preSpace, $p, $postSpace, $post, $p_counter_ref, $path, $y) = @_;
178 if($pre =~ /$y/ || $post =~ /$y/) {return $pre . $preSpace . $p . $postSpace . $post;}
179 $pre = $pre . $preSpace;
180 $post = $postSpace . $post;
181 return $pre . &anchor_wrapper("", $p, "", $p_counter_ref, $path) . $post;
182}
183
184sub extract_placenames {
185 my $self = shift (@_);
186 my ($textref, $doc_obj, $thissection) = @_;
187 my $outhandle = $self->{'outhandle'};
188
189 my $GSDLHOME = $ENV{'GSDLHOME'};
190 #field categories in DataBase file for extract_placenames.
191 my $LAT = 3;
192 my $LONG = 4;
193 my $CC1 = 12;
194
195
196 &gsprintf($outhandle, " {BasPlug.extracting_placenames}...\n")
197 if ($self->{'verbosity'} > 2);
198
199 #get all the places found in the document
200 my $uniquePlaces = $self->getPlaces($textref, $self->{'places'});
201
202 #finds 'dangerous' placenames (eg York and New York). Dangerous because program will find "York" within "New York"
203 my %danger = ();
204 foreach $x (keys %$uniquePlaces){
205 foreach $y (keys %$uniquePlaces){
206 if(($y =~ m/ /) && ($y =~ m/$x/) && ($y ne $x)){
207 $y =~ s/($x\s)|(\s$x)//;
208 $danger{$x} = $y;
209 }
210 }
211 }
212
213 #creates a list of clickable placenames at top of page, linked to first occurrence of name, and reads them into a file
214 my $tempfname = $doc_obj;
215 $tempfname =~ s/.*\(0x(.*)\)/$1/;
216 my $names = "";
217 my $filename = "tmpfile" . $tempfname;
218 my $tempfile = &util::filename_cat($GSDLHOME, "tmp", $filename);
219 open(FOUT, ">$tempfile") || die "Unable to create a temp file: $!";
220 foreach my $name (sort (keys %$uniquePlaces)){
221 if(!defined($danger{$name})){
222 $name_tag = placename_to_anchorname($name);
223 print FOUT "$name\t" . $uniquePlaces->{$name}->[$LONG] . "\t" . $uniquePlaces->{$name}->[$LAT] . "\n";
224 if($self->{'place_list'}) {$names = $names . "<a href=\"#" . $name_tag . "1\">" . $name . "</a>" . "\n";}
225 }
226 }
227 close(FOUT);
228 $doc_obj->associate_file($tempfile, "places.txt", "text/plain");
229 $self->{'places_filename'} = $tempfile;
230 my %countries = ();
231
232 foreach my $p (keys %$uniquePlaces){
233 my $place = $p;
234 $place =~ s/\s+|\n+|\r+|\t+/(\\s+)/g;
235 my $cap_place = uc($place);
236 my $long = $uniquePlaces->{$p}->[$LONG];
237 my $lat = $uniquePlaces->{$p}->[$LAT];
238 my $country = $uniquePlaces->{$p}->[$CC1];
239 my $path = "javascript:popUp(\"$long\",\"$lat\",\"$p\",\"$country\")";
240 my $p_counter = 0;
241
242 if(!defined($danger{$p})){
243 #adds html tags to each place name
244 $$textref =~ s/\b($place|$cap_place)\b/&anchor_wrapper($1,\$p_counter,$path)/sge;
245 }
246 #else {
247 #$y = $danger{$p};
248 #$$textref =~ s/(\w+)(\s+?)($place|$cap_place)(\s+?)(\w+)/&place_name_check($1,$2,$3,$4,$5,\$p_counter,$path, $y)/sge;
249 #}
250
251 #edits the last place's image, and removes image if place only occurres once.
252 my $p_tag = placename_to_anchorname($p);
253 $p_counter++;
254 $$textref =~ s/#$p_tag$p_counter(><img src="\/gsdl\/images\/)nextplace.gif/#${p_tag}1$1firstplace.gif/;
255 $$textref =~ s/<img src="\/gsdl\/images\/firstplace.gif" name="img$p_tag(1)" border="0">//;
256
257 #this line removes apostrophes from placenames (they break the javascript function)
258 $$textref =~ s/(javascript:popUp.*?)(\w)'(\w)/$1$2$3/g;
259
260 #for displaying map of document, count num of places from each country
261 if(defined($countries{$country})){$countries{$country}++;}
262 else{$countries{$country} = 1;}
263
264 #adds placename to metadata
265 $doc_obj->add_utf8_metadata ($thissection, "Placename", $p);
266 &gsprintf($outhandle, " {BasPlug.extracting} $p\n")
267 if ($self->{'verbosity'} > 3);
268 }
269 #finding the country that most places are from, in order to display map of the document
270 my $max = 0;
271 my $CNTRY = "";
272 foreach my $c_key (keys %countries){
273 if($countries{$c_key} > $max){
274 $max = $countries{$c_key};
275 $CNTRY = $c_key;
276 }
277 }
278 #allows user to view map with all places from the document on it
279 my $places_filename = &util::filename_cat($GSDLHOME, "collect", "_cgiargc_", "index", "assoc", "_thisOID_", "places.txt");
280 my $docmap = "<a href='javascript:popUp(\"$CNTRY\",\"$places_filename\")'>View map for this document<\/a><br><br>\n";
281 $$textref = $docmap . $names . "<br>" . $$textref;
282
283 $doc_obj->delete_text($thissection);
284 $doc_obj->add_utf8_text($thissection, $$textref);
285 &gsprintf($outhandle, " {BasPlug.done_places_extract}\n")
286 if ($self->{'verbosity'} > 2);
287}
Note: See TracBrowser for help on using the repository browser.