1 | ###########################################################################
|
---|
2 | #
|
---|
3 | # CICPlug.pm
|
---|
4 | #
|
---|
5 | # Copyright (C) 2005 New Zealand Digital Library Project
|
---|
6 | #
|
---|
7 | # This program is free software; you can redistribute it and/or modify
|
---|
8 | # it under the terms of the GNU General Public License as published by
|
---|
9 | # the Free Software Foundation; either version 2 of the License, or
|
---|
10 | # (at your option) any later version.
|
---|
11 | #
|
---|
12 | # This program is distributed in the hope that it will be useful,
|
---|
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
15 | # GNU General Public License for more details.
|
---|
16 | #
|
---|
17 | # You should have received a copy of the GNU General Public License
|
---|
18 | # along with this program; if not, write to the Free Software
|
---|
19 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
20 | #
|
---|
21 | ###########################################################################
|
---|
22 |
|
---|
23 | package CICPlug;
|
---|
24 |
|
---|
25 |
|
---|
26 | use BasPlug;
|
---|
27 | use DBI;
|
---|
28 | use strict;
|
---|
29 | no strict 'refs';
|
---|
30 |
|
---|
31 |
|
---|
32 | sub BEGIN {
|
---|
33 | @CICPlug::ISA = ('BasPlug');
|
---|
34 | }
|
---|
35 |
|
---|
36 |
|
---|
37 | my $arguments =
|
---|
38 | [
|
---|
39 | { 'name' => "images_directory",
|
---|
40 | 'type' => "string",
|
---|
41 | 'deft' => "",
|
---|
42 | 'reqd' => "yes" },
|
---|
43 | { 'name' => "cache_directory",
|
---|
44 | 'type' => "string",
|
---|
45 | 'deft' => &util::filename_cat($ENV{'GSDLHOME'}, "tmp"),
|
---|
46 | 'reqd' => "no" },
|
---|
47 | { 'name' => "large_image_options",
|
---|
48 | 'type' => "string",
|
---|
49 | 'deft' => "",
|
---|
50 | 'reqd' => "no" },
|
---|
51 | { 'name' => "large_image_type",
|
---|
52 | 'type' => "string",
|
---|
53 | 'deft' => "jpg",
|
---|
54 | 'reqd' => "no" },
|
---|
55 | { 'name' => "large_image_width",
|
---|
56 | 'type' => "string",
|
---|
57 | 'deft' => "800",
|
---|
58 | 'reqd' => "no" },
|
---|
59 | { 'name' => "medium_image_options",
|
---|
60 | 'type' => "string",
|
---|
61 | 'deft' => "",
|
---|
62 | 'reqd' => "no" },
|
---|
63 | { 'name' => "medium_image_type",
|
---|
64 | 'type' => "string",
|
---|
65 | 'deft' => "jpg",
|
---|
66 | 'reqd' => "no" },
|
---|
67 | { 'name' => "medium_image_width",
|
---|
68 | 'type' => "string",
|
---|
69 | 'deft' => "375",
|
---|
70 | 'reqd' => "no" },
|
---|
71 | { 'name' => "small_image_options",
|
---|
72 | 'type' => "string",
|
---|
73 | 'deft' => "",
|
---|
74 | 'reqd' => "no" },
|
---|
75 | { 'name' => "small_image_type",
|
---|
76 | 'type' => "string",
|
---|
77 | 'deft' => "jpg",
|
---|
78 | 'reqd' => "no" },
|
---|
79 | { 'name' => "small_image_width",
|
---|
80 | 'type' => "string",
|
---|
81 | 'deft' => "225",
|
---|
82 | 'reqd' => "no" }
|
---|
83 | ];
|
---|
84 |
|
---|
85 | my $options = { 'name' => "CICPlug",
|
---|
86 | 'desc' => "{CICPlug.desc}",
|
---|
87 | 'abstract' => "no",
|
---|
88 | 'inherits' => "yes" };
|
---|
89 |
|
---|
90 |
|
---|
91 | sub get_default_process_exp
|
---|
92 | {
|
---|
93 | return q^(?i)\.mdb$^;
|
---|
94 | }
|
---|
95 |
|
---|
96 |
|
---|
97 | sub new
|
---|
98 | {
|
---|
99 | my ($class) = shift (@_);
|
---|
100 | my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
|
---|
101 | push(@$pluginlist, $class);
|
---|
102 |
|
---|
103 | if (defined $arguments) { push(@{$hashArgOptLists->{"ArgList"}}, @{$arguments}); }
|
---|
104 | if (defined $options) { push(@{$hashArgOptLists->{"OptList"}}, $options); }
|
---|
105 |
|
---|
106 | my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
|
---|
107 |
|
---|
108 | return bless $self, $class;
|
---|
109 | }
|
---|
110 |
|
---|
111 |
|
---|
112 | my $state_abbr_to_name_mapping = {
|
---|
113 | "AL" => "Alabama",
|
---|
114 | "AK" => "Alaska",
|
---|
115 | "AZ" => "Arizona",
|
---|
116 | "AR" => "Arkansas",
|
---|
117 | "CA" => "California",
|
---|
118 | "CO" => "Colorado",
|
---|
119 | "CT" => "Connecticut",
|
---|
120 | "DC" => "District of Columbia",
|
---|
121 | "DE" => "Delaware",
|
---|
122 | "FL" => "Florida",
|
---|
123 | "GA" => "Georgia",
|
---|
124 | "HI" => "Hawaii",
|
---|
125 | "ID" => "Idaho",
|
---|
126 | "IL" => "Illinois",
|
---|
127 | "IN" => "Indiana",
|
---|
128 | "IA" => "Iowa",
|
---|
129 | "KS" => "Kansas",
|
---|
130 | "KY" => "Kentucky",
|
---|
131 | "LA" => "Louisiana",
|
---|
132 | "ME" => "Maine",
|
---|
133 | "MD" => "Maryland",
|
---|
134 | "MA" => "Massachusetts",
|
---|
135 | "MI" => "Michigan",
|
---|
136 | "MN" => "Minnesota",
|
---|
137 | "MS" => "Mississippi",
|
---|
138 | "MO" => "Missouri",
|
---|
139 | "MT" => "Montana",
|
---|
140 | "NE" => "Nebraska",
|
---|
141 | "NV" => "Nevada",
|
---|
142 | "NH" => "New Hampshire",
|
---|
143 | "NJ" => "New Jersey",
|
---|
144 | "NM" => "New Mexico",
|
---|
145 | "NY" => "New York",
|
---|
146 | "NC" => "North Carolina",
|
---|
147 | "ND" => "North Dakota",
|
---|
148 | "OH" => "Ohio",
|
---|
149 | "OK" => "Oklahoma",
|
---|
150 | "OR" => "Oregon",
|
---|
151 | "PA" => "Pennsylvania",
|
---|
152 | "RI" => "Rhode Island",
|
---|
153 | "SC" => "South Carolina",
|
---|
154 | "SD" => "South Dakota",
|
---|
155 | "TN" => "Tennessee",
|
---|
156 | "TX" => "Texas",
|
---|
157 | "UT" => "Utah",
|
---|
158 | "VT" => "Vermont",
|
---|
159 | "VA" => "Virginia",
|
---|
160 | "WA" => "Washington",
|
---|
161 | "WV" => "West Virginia",
|
---|
162 | "WI" => "Wisconsin",
|
---|
163 | "WY" => "Wyoming"
|
---|
164 | };
|
---|
165 |
|
---|
166 | my %state_name_to_abbr_mapping = reverse(%{$state_abbr_to_name_mapping});
|
---|
167 |
|
---|
168 | my $state_abbr_to_area_mapping = {
|
---|
169 | "AL" => "Southeast",
|
---|
170 | "AK" => "West",
|
---|
171 | "AZ" => "Southwest",
|
---|
172 | "AR" => "Southeast",
|
---|
173 | "CA" => "West",
|
---|
174 | "CO" => "Mountain",
|
---|
175 | "CT" => "Northeast",
|
---|
176 | "DC" => "Northeast",
|
---|
177 | "DE" => "Northeast",
|
---|
178 | "FL" => "Southeast",
|
---|
179 | "GA" => "Southeast",
|
---|
180 | "HI" => "West",
|
---|
181 | "ID" => "Mountain",
|
---|
182 | "IL" => "Midwest",
|
---|
183 | "IN" => "Midwest",
|
---|
184 | "IA" => "Midwest",
|
---|
185 | "KS" => "Midwest",
|
---|
186 | "KY" => "Southeast",
|
---|
187 | "LA" => "Southeast",
|
---|
188 | "ME" => "Northeast",
|
---|
189 | "MD" => "Northeast",
|
---|
190 | "MA" => "Northeast",
|
---|
191 | "MI" => "Midwest",
|
---|
192 | "MN" => "Midwest",
|
---|
193 | "MS" => "Southeast",
|
---|
194 | "MO" => "Midwest",
|
---|
195 | "MT" => "Mountain",
|
---|
196 | "NE" => "Midwest",
|
---|
197 | "NV" => "West",
|
---|
198 | "NH" => "Northeast",
|
---|
199 | "NJ" => "Northeast",
|
---|
200 | "NM" => "Southwest",
|
---|
201 | "NY" => "Northeast",
|
---|
202 | "NC" => "Southeast",
|
---|
203 | "ND" => "Midwest",
|
---|
204 | "OH" => "Midwest",
|
---|
205 | "OK" => "Southwest",
|
---|
206 | "OR" => "West",
|
---|
207 | "PA" => "Northeast",
|
---|
208 | "RI" => "Northeast",
|
---|
209 | "SC" => "Southeast",
|
---|
210 | "SD" => "Midwest",
|
---|
211 | "TN" => "Southeast",
|
---|
212 | "TX" => "Southwest",
|
---|
213 | "UT" => "Mountain",
|
---|
214 | "VT" => "Northeast",
|
---|
215 | "VA" => "Southeast",
|
---|
216 | "WA" => "West",
|
---|
217 | "WV" => "Southeast",
|
---|
218 | "WI" => "Midwest",
|
---|
219 | "WY" => "Mountain"
|
---|
220 | };
|
---|
221 |
|
---|
222 |
|
---|
223 | my $place_type_id_to_name_mapping = {
|
---|
224 | "1" => "Individual building",
|
---|
225 | "2" => "Landscape site",
|
---|
226 | "3" => "Campus arrangement",
|
---|
227 | "4" => "Building group",
|
---|
228 | };
|
---|
229 |
|
---|
230 |
|
---|
231 | # This array must match the values in the tblArchTypes table
|
---|
232 | my @place_styles_array = (
|
---|
233 | "American colonial",
|
---|
234 | "Federal",
|
---|
235 | "Greek revival",
|
---|
236 | "Italianate",
|
---|
237 | "Gothic revival",
|
---|
238 | "Romanesque revival",
|
---|
239 | "Victorian",
|
---|
240 | "Beaux-Arts classicism",
|
---|
241 | "Colonial revival",
|
---|
242 | "Mission/Mission revival",
|
---|
243 | "Modern/pre-WWII",
|
---|
244 | "Modern/post-WWII",
|
---|
245 | "Postmodern",
|
---|
246 | "Contemporary",
|
---|
247 | "Regionalist/Vernacular",
|
---|
248 | "Other"
|
---|
249 | );
|
---|
250 |
|
---|
251 |
|
---|
252 | my $place_functions_mapping = {
|
---|
253 | "academic department building" => "",
|
---|
254 | "administration" => "",
|
---|
255 | "admissions office" => "",
|
---|
256 | "alumni center" => "",
|
---|
257 | "arboretum" => "",
|
---|
258 | "archaeological site" => "",
|
---|
259 | "auditorium" => "",
|
---|
260 | "bell tower" => "",
|
---|
261 | "chapel" => "",
|
---|
262 | "classrooms" => "",
|
---|
263 | "debating society" => "",
|
---|
264 | "dining hall" => "",
|
---|
265 | "facility management building" => "",
|
---|
266 | "faculty offices" => "",
|
---|
267 | "gardens" => "",
|
---|
268 | "greek letter society" => "",
|
---|
269 | "gymnasium" => "",
|
---|
270 | "infirmary" => "",
|
---|
271 | "library" => "",
|
---|
272 | "master plan (campus)" => "",
|
---|
273 | "master plan (landscape)" => "",
|
---|
274 | "memorial site" => "",
|
---|
275 | "museum" => "",
|
---|
276 | "observatory" => "",
|
---|
277 | "old main" => "",
|
---|
278 | "outdoor space" => "",
|
---|
279 | "president's house" => "",
|
---|
280 | "private residence" => "",
|
---|
281 | "residence hall" => "",
|
---|
282 | "stadium" => "",
|
---|
283 | "student union" => "",
|
---|
284 | "theater" => "",
|
---|
285 | "other" => "",
|
---|
286 | };
|
---|
287 |
|
---|
288 |
|
---|
289 | my %designer_name_to_id_mapping;
|
---|
290 | my %designer_name_to_place_ids_mapping;
|
---|
291 | my $place_reference_id = 1;
|
---|
292 |
|
---|
293 |
|
---|
294 | sub read
|
---|
295 | {
|
---|
296 | my $self = shift (@_);
|
---|
297 | my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
|
---|
298 |
|
---|
299 | $self->{'filename'} = &util::filename_cat($base_dir, $file);
|
---|
300 | if ($self->{'filename'} !~ /$self->{'process_exp'}/ || !-f $self->{'filename'}) {
|
---|
301 | return undef;
|
---|
302 | }
|
---|
303 | $self->{'processor'} = $processor;
|
---|
304 | $self->{'gli'} = $gli;
|
---|
305 |
|
---|
306 | # Open connection to Access database
|
---|
307 | my $dbh = DBI->connect('dbi:ODBC:CIC-HCAP');
|
---|
308 |
|
---|
309 | $self->process_institutions($dbh);
|
---|
310 | $self->process_places($dbh);
|
---|
311 | $self->process_designers($dbh);
|
---|
312 |
|
---|
313 | return 1;
|
---|
314 | }
|
---|
315 |
|
---|
316 |
|
---|
317 | sub process_institutions
|
---|
318 | {
|
---|
319 | my $self = shift(@_);
|
---|
320 | my $dbh = shift(@_);
|
---|
321 | my $fail_log_handle = $self->{'failhandle'};
|
---|
322 |
|
---|
323 | # Prepare SQL statement for getting everything from the Institution table
|
---|
324 | my $institution_sql_statement = "SELECT * FROM tblInstitution"; # WHERE Institution_ID<200";
|
---|
325 | my $institution_sql_handle = $dbh->prepare($institution_sql_statement);
|
---|
326 | $institution_sql_handle->{LongReadLen} = 65536;
|
---|
327 | $institution_sql_handle->execute() or die "Could not execute SQL statement.";
|
---|
328 |
|
---|
329 | # Prepare SQL statement for getting the Institution places
|
---|
330 | my $institution_places_sql_statement = "SELECT Entry_ID,Current_name FROM tblPlace WHERE PlaceType>0 AND Institution_ID=?";
|
---|
331 | my $institution_places_sql_handle = $dbh->prepare($institution_places_sql_statement);
|
---|
332 | $institution_places_sql_handle->{LongReadLen} = 65536;
|
---|
333 |
|
---|
334 | # Prepare SQL statement for getting the Institution best place image location
|
---|
335 | my $institution_best_place_image_location_sql_statement = "SELECT Location,Entry_ID FROM tblImages WHERE FileType=1 AND FileName=?";
|
---|
336 | my $institution_best_place_image_location_sql_handle = $dbh->prepare($institution_best_place_image_location_sql_statement);
|
---|
337 | $institution_best_place_image_location_sql_handle->{LongReadLen} = 65536;
|
---|
338 |
|
---|
339 | # Prepare SQL statement for getting the Institution places images (only used to check if an institution has some images)
|
---|
340 | my $institution_places_images_sql_statement = "SELECT FileName FROM tblImages,tblPlace WHERE tblImages.FileType=1 AND tblImages.Entry_ID=tblPlace.Entry_ID AND tblPlace.Institution_ID=?";
|
---|
341 | my $institution_places_images_sql_handle = $dbh->prepare($institution_places_images_sql_statement);
|
---|
342 | $institution_places_images_sql_handle->{LongReadLen} = 65536;
|
---|
343 |
|
---|
344 | # Prepare SQL statement for getting the Institution campus plans
|
---|
345 | my $institution_campus_plans_sql_statement = "SELECT * FROM tblCampusMaps WHERE Electronic=1 AND Institution_ID=?";
|
---|
346 | my $institution_campus_plans_sql_handle = $dbh->prepare($institution_campus_plans_sql_statement);
|
---|
347 | $institution_campus_plans_sql_handle->{LongReadLen} = 65536;
|
---|
348 |
|
---|
349 | # Create a document object for each institution
|
---|
350 | my %institution_id_to_name_mapping;
|
---|
351 | my %institution_state_to_ids_mapping;
|
---|
352 | while (my $row_hashref = $institution_sql_handle->fetchrow_hashref) {
|
---|
353 | # Skip any institutions that didn't respond
|
---|
354 | next if !defined($row_hashref->{"City"});
|
---|
355 |
|
---|
356 | my $institution_id = $row_hashref->{"Institution_ID"};
|
---|
357 | # print STDERR " Institution $institution_id\n";
|
---|
358 | my $institution_doc_obj = new doc($self->{'filename'} . "-", "indexed_doc");
|
---|
359 | $institution_doc_obj->set_OID("i$institution_id");
|
---|
360 | &new_metadata_entry($institution_doc_obj, "DocumentType", "Institution");
|
---|
361 |
|
---|
362 | # For some reason the hyphen seems to be lost from the Zip field, so add it back in
|
---|
363 | my $institution_zip = $row_hashref->{"Zip"};
|
---|
364 | if ($institution_zip =~ /^(\d\d\d\d\d)(\d\d\d\d)$/) {
|
---|
365 | $row_hashref->{"Zip"} = $1 . "-" . $2;
|
---|
366 | }
|
---|
367 |
|
---|
368 | # Fix up the links to the institution webpage
|
---|
369 | if ($row_hashref->{"Institution_webpage"} =~ /\#(.*?)\#/) {
|
---|
370 | $row_hashref->{"Institution_webpage"} = $1;
|
---|
371 | }
|
---|
372 |
|
---|
373 | # Map state to full name
|
---|
374 | $row_hashref->{"State"} = $state_abbr_to_name_mapping->{$row_hashref->{"State"}};
|
---|
375 |
|
---|
376 | # Get the places in this institution
|
---|
377 | my $institution_random_place_id;
|
---|
378 | my $institution_places_list_html = "";
|
---|
379 | $institution_places_sql_handle->execute($institution_id) or die "Could not execute SQL statement.";
|
---|
380 | while (my $institution_places_match_hashref = $institution_places_sql_handle->fetchrow_hashref) {
|
---|
381 | my $institution_place_id = $institution_places_match_hashref->{"Entry_ID"};
|
---|
382 | my $institution_place_name = $institution_places_match_hashref->{"Current_name"};
|
---|
383 | $institution_places_list_html .= "<a href=\"_gwcgi_?a=d&d=p$institution_place_id\">$institution_place_name</a><br />\n";
|
---|
384 | }
|
---|
385 | &new_metadata_entry($institution_doc_obj, "InstitutionPlacesListHTML", $institution_places_list_html);
|
---|
386 |
|
---|
387 | # Get the best place image for this institution
|
---|
388 | my $institution_best_place_image_name = $row_hashref->{"Best_image"};
|
---|
389 | if (!defined($institution_best_place_image_name) || $institution_best_place_image_name eq "") {
|
---|
390 | # Some institutions have no electronic images, and thus have no best image
|
---|
391 | $institution_places_images_sql_handle->execute($institution_id) or die "Could not execute SQL statement.";
|
---|
392 | if (defined($institution_places_images_sql_handle->fetchrow_hashref())) {
|
---|
393 | print STDERR "<ProcessingError n='Institution $institution_id' p='CICPlug' r='No best image'>\n" if ($self->{'gli'});
|
---|
394 | print STDERR "Error: Institution $institution_id -- No best image.\n";
|
---|
395 | print $fail_log_handle "Error: Institution $institution_id -- No best image.\n";
|
---|
396 | $self->{'num_not_processed'}++;
|
---|
397 | next;
|
---|
398 | }
|
---|
399 | &new_metadata_entry($institution_doc_obj, "InstitutionBestPlaceImageHTML", "");
|
---|
400 | }
|
---|
401 | else {
|
---|
402 | # Get the file location of the best place image for this institution
|
---|
403 | $institution_best_place_image_location_sql_handle->execute($institution_best_place_image_name) or die "Could not execute SQL statement.";
|
---|
404 | my $institution_best_place_image_hashref = $institution_best_place_image_location_sql_handle->fetchrow_hashref();
|
---|
405 |
|
---|
406 | my $institution_best_place_image_location = $institution_best_place_image_hashref->{"Location"};
|
---|
407 | if (!defined($institution_best_place_image_location) || $institution_best_place_image_location eq "") {
|
---|
408 | print STDERR "<ProcessingError n='Institution $institution_id' p='CICPlug' r='Could not match best image $institution_best_place_image_name to a file'>\n" if ($self->{'gli'});
|
---|
409 | print STDERR "Error: Institution $institution_id -- Could not match best image $institution_best_place_image_name to a file.\n";
|
---|
410 | print $fail_log_handle "Error: Institution $institution_id -- Could not match best image $institution_best_place_image_name to a file.\n";
|
---|
411 | $self->{'num_not_processed'}++;
|
---|
412 | next;
|
---|
413 | }
|
---|
414 |
|
---|
415 | # PDFs are not allowed for institution best place images
|
---|
416 | if ($institution_best_place_image_location =~ /.pdf$/i) {
|
---|
417 | print STDERR "<ProcessingError n='Institution $institution_id' p='CICPlug' r='PDF not allowed for best image'>\n" if ($self->{'gli'});
|
---|
418 | print STDERR "Error: Institution $institution_id -- PDF not allowed for best image.\n";
|
---|
419 | print $fail_log_handle "Error: Institution $institution_id -- PDF not allowed for best image.\n";
|
---|
420 | $self->{'num_not_processed'}++;
|
---|
421 | next;
|
---|
422 | }
|
---|
423 |
|
---|
424 | my $institution_best_place_id = $institution_best_place_image_hashref->{"Entry_ID"};
|
---|
425 | my $institution_best_place_image_medium_file_href = $self->generate_place_image_variant($institution_doc_obj, $institution_best_place_image_location, "medium");
|
---|
426 | &new_metadata_entry($institution_doc_obj, "InstitutionBestPlaceImageHTML", "<a href=\"_gwcgi_?a=d&d=p$institution_best_place_id\"><img alt=\"$institution_best_place_image_name\" src=\"$institution_best_place_image_medium_file_href\"/><br />$institution_best_place_image_name</a>");
|
---|
427 | }
|
---|
428 |
|
---|
429 | # Get institution campus plans
|
---|
430 | my $institution_campus_plans_list_html = "";
|
---|
431 | $institution_campus_plans_sql_handle->execute($institution_id) or die "Could not execute SQL statement.";
|
---|
432 | while (my $institution_campus_plans_match_hashref = $institution_campus_plans_sql_handle->fetchrow_hashref) {
|
---|
433 | my $institution_campus_plan_name = $institution_campus_plans_match_hashref->{"NameAndFormat"};
|
---|
434 | my $institution_campus_plan_image_location = $institution_campus_plans_match_hashref->{"Location_electronic"};
|
---|
435 |
|
---|
436 | # Deal with PDF files separately: don't convert, just associate
|
---|
437 | if ($institution_campus_plan_image_location =~ /\.pdf$/i) {
|
---|
438 | # Convert the server location of the file into the local location of the file
|
---|
439 | my $institution_campus_plan_pdf_file_path = $institution_campus_plan_image_location;
|
---|
440 | $institution_campus_plan_pdf_file_path =~ s/^[A-Z]:/$self->{'images_directory'}/;
|
---|
441 |
|
---|
442 | if (-f $institution_campus_plan_pdf_file_path) {
|
---|
443 | my $institution_campus_plan_pdf_file_name = $institution_campus_plan_name . ".pdf";
|
---|
444 | $institution_campus_plan_pdf_file_name =~ s/ /%20/g;
|
---|
445 | my $institution_campus_plan_pdf_file_href = "_httpcollection_/index/assoc/[assocfilepath]/$institution_campus_plan_pdf_file_name";
|
---|
446 | $institution_campus_plans_list_html .= "<a href=\"$institution_campus_plan_pdf_file_href\">$institution_campus_plan_name (PDF)</a><br />";
|
---|
447 |
|
---|
448 | $institution_doc_obj->associate_file($institution_campus_plan_pdf_file_path, $institution_campus_plan_name . ".pdf", undef, $institution_doc_obj->get_top_section());
|
---|
449 | }
|
---|
450 | else {
|
---|
451 | print STDERR "<ProcessingError n='$institution_campus_plan_pdf_file_path' p='CICPlug' r='Does not exist'>\n" if ($self->{'gli'});
|
---|
452 | print STDERR "Error: File $institution_campus_plan_pdf_file_path does not exist.\n";
|
---|
453 | print $fail_log_handle "Error: File $institution_campus_plan_pdf_file_path does not exist.\n";
|
---|
454 | }
|
---|
455 | }
|
---|
456 | else {
|
---|
457 | my $institution_campus_plan_image_large_file_href = $self->generate_place_image_variant($institution_doc_obj, $institution_campus_plan_image_location, "large");
|
---|
458 |
|
---|
459 | # Create a new section for each institution campus plan image
|
---|
460 | my $institution_campus_plan_image_section = $institution_doc_obj->insert_section($institution_doc_obj->get_end_child($institution_doc_obj->get_top_section()));
|
---|
461 | $institution_doc_obj->add_utf8_text($institution_campus_plan_image_section, "_"); # This is necessary
|
---|
462 | $institution_doc_obj->add_utf8_metadata($institution_campus_plan_image_section, "DocumentType", "Image");
|
---|
463 | $institution_doc_obj->add_utf8_metadata($institution_campus_plan_image_section, "Title", $institution_campus_plan_name);
|
---|
464 | $institution_doc_obj->add_utf8_metadata($institution_campus_plan_image_section, "ImagePath", $institution_campus_plan_image_large_file_href);
|
---|
465 |
|
---|
466 | $institution_campus_plans_list_html .= "<a href=\"_gwcgi_?a=d&d=i$institution_id.$institution_campus_plan_image_section\">$institution_campus_plan_name</a><br />";
|
---|
467 | }
|
---|
468 | }
|
---|
469 | &new_metadata_entry($institution_doc_obj, "InstitutionCampusPlansListHTML", $institution_campus_plans_list_html);
|
---|
470 |
|
---|
471 | # Add each field from the table as metadata
|
---|
472 | foreach my $key (keys(%$row_hashref)) {
|
---|
473 | my $value = $row_hashref->{$key};
|
---|
474 | if (defined($value)) {
|
---|
475 | &new_metadata_entry($institution_doc_obj, $key, $value);
|
---|
476 | }
|
---|
477 | }
|
---|
478 |
|
---|
479 | $institution_doc_obj->add_utf8_text($institution_doc_obj->get_top_section(), "Some dummy text.");
|
---|
480 | $self->{'processor'}->process($institution_doc_obj);
|
---|
481 | $self->{'num_processed'}++;
|
---|
482 |
|
---|
483 | # Build mappings for creating the static macrofiles
|
---|
484 | my $institution_name = $row_hashref->{"Institution_Name"};
|
---|
485 | $institution_id_to_name_mapping{$institution_doc_obj->get_OID()} = $institution_name;
|
---|
486 | my $institution_state = $row_hashref->{"State"};
|
---|
487 | push(@{$institution_state_to_ids_mapping{$institution_state}}, $institution_doc_obj->get_OID());
|
---|
488 | }
|
---|
489 |
|
---|
490 | # Write the institutions.dm macrofile
|
---|
491 | &write_static_browser_macrofile("institutions", \%institution_id_to_name_mapping);
|
---|
492 |
|
---|
493 | # Write the states.dm macrofile
|
---|
494 | &write_state_browser_macrofile("states", \%institution_state_to_ids_mapping, \%institution_id_to_name_mapping);
|
---|
495 | }
|
---|
496 |
|
---|
497 |
|
---|
498 | sub process_places
|
---|
499 | {
|
---|
500 | my $self = shift(@_);
|
---|
501 | my $dbh = shift(@_);
|
---|
502 | my $fail_log_handle = $self->{'failhandle'};
|
---|
503 |
|
---|
504 | # Prepare SQL statement for getting everything from the Place table
|
---|
505 | my $place_sql_statement = "SELECT * FROM tblPlace"; # WHERE Entry_ID<100";
|
---|
506 | my $place_sql_handle = $dbh->prepare($place_sql_statement);
|
---|
507 | $place_sql_handle->{LongReadLen} = 65536;
|
---|
508 | $place_sql_handle->execute() or die "Could not execute SQL statement.";
|
---|
509 |
|
---|
510 | # Prepare SQL statement for getting the Place institution
|
---|
511 | my $place_institution_sql_statement = "SELECT Institution_Name FROM tblInstitution,tblPlace WHERE tblInstitution.Institution_ID=tblPlace.Institution_ID and tblPlace.Entry_ID=?";
|
---|
512 | my $place_institution_sql_handle = $dbh->prepare($place_institution_sql_statement);
|
---|
513 |
|
---|
514 | # Prepare SQL statement for getting the Place construction dates
|
---|
515 | my $place_construction_dates_sql_statement = "SELECT Prefix,Date,Note,Architect_Name FROM tblConstruction_and_Dates WHERE Entry_ID=? ORDER BY Date";
|
---|
516 | my $place_construction_dates_sql_handle = $dbh->prepare($place_construction_dates_sql_statement);
|
---|
517 |
|
---|
518 | # Prepare SQL statement for getting the Place images
|
---|
519 | my $place_images_sql_statement = "SELECT FileName,Location FROM tblImages WHERE FileType=1 AND Entry_ID=? ORDER BY Image_Order";
|
---|
520 | my $place_images_sql_handle = $dbh->prepare($place_images_sql_statement);
|
---|
521 | $place_images_sql_handle->{LongReadLen} = 65536;
|
---|
522 |
|
---|
523 | # Prepare SQL statement for getting the Place materials
|
---|
524 | my $place_materials_sql_statement = "SELECT * FROM tblDescription_building WHERE Entry_ID=?";
|
---|
525 | my $place_materials_sql_handle = $dbh->prepare($place_materials_sql_statement);
|
---|
526 | $place_materials_sql_handle->{LongReadLen} = 65536;
|
---|
527 |
|
---|
528 | # Prepare SQL statement for getting the Place building styles
|
---|
529 | my $place_styles_sql_statement = "SELECT ArchType_ID FROM ArchPlace WHERE Entry_ID=?";
|
---|
530 | my $place_styles_sql_handle = $dbh->prepare($place_styles_sql_statement);
|
---|
531 | $place_styles_sql_handle->{LongReadLen} = 65536;
|
---|
532 |
|
---|
533 | # Prepare SQL statement for getting the Place functions
|
---|
534 | my $place_functions_sql_statement = "SELECT Function,Year,Prefix FROM tblFunction_and_dates WHERE Entry_ID=? ORDER BY Year";
|
---|
535 | my $place_functions_sql_handle = $dbh->prepare($place_functions_sql_statement);
|
---|
536 | $place_functions_sql_handle->{LongReadLen} = 65536;
|
---|
537 |
|
---|
538 | # Prepare SQL statement for getting the Place significance
|
---|
539 | my $place_significance_sql_statement = "SELECT SigType FROM tblSigTypes,SigPlace WHERE tblSigTypes.SigTypes_ID=SigPlace.SigType_ID+1 AND SigPlace.Entry_ID=?";
|
---|
540 | my $place_significance_sql_handle = $dbh->prepare($place_significance_sql_statement);
|
---|
541 | $place_significance_sql_handle->{LongReadLen} = 65536;
|
---|
542 |
|
---|
543 | # Prepare SQL statement for getting the Place references
|
---|
544 | my $place_references_sql_statement = "SELECT Bibliography FROM tblReferences WHERE Entry_ID=?";
|
---|
545 | my $place_references_sql_handle = $dbh->prepare($place_references_sql_statement);
|
---|
546 | $place_references_sql_handle->{LongReadLen} = 65536;
|
---|
547 |
|
---|
548 | # Prepare SQL statement for getting the Place designations
|
---|
549 | my $place_designations_sql_statement = "SELECT National_Register,Federal_Agency,HABS,HAER,Local_Designation FROM tblReferences WHERE Entry_ID=?";
|
---|
550 | my $place_designations_sql_handle = $dbh->prepare($place_designations_sql_statement);
|
---|
551 | $place_designations_sql_handle->{LongReadLen} = 65536;
|
---|
552 |
|
---|
553 | # Prepare SQL statement for getting the Place narrative
|
---|
554 | my $place_narrative_sql_statement = "SELECT Narrative FROM tblSignificance_Narrative WHERE Entry_ID=?";
|
---|
555 | my $place_narrative_sql_handle = $dbh->prepare($place_narrative_sql_statement);
|
---|
556 | $place_narrative_sql_handle->{LongReadLen} = 65536;
|
---|
557 |
|
---|
558 | # Prepare SQL statement for getting the Place state
|
---|
559 | my $place_state_sql_statement = "SELECT State FROM tblInstitution,tblPlace WHERE tblInstitution.Institution_ID=tblPlace.Institution_ID AND Entry_ID=?";
|
---|
560 | my $place_state_sql_handle = $dbh->prepare($place_state_sql_statement);
|
---|
561 | $place_state_sql_handle->{LongReadLen} = 65536;
|
---|
562 |
|
---|
563 | # Create a document object for each place
|
---|
564 | my %place_type_to_ids_mapping;
|
---|
565 | my %place_style_to_ids_mapping;
|
---|
566 | my %place_date_to_ids_mapping;
|
---|
567 | my %place_function_to_ids_mapping;
|
---|
568 | my %place_id_to_name_mapping;
|
---|
569 | my %place_id_to_institution_name_mapping;
|
---|
570 | while (my $row_hashref = $place_sql_handle->fetchrow_hashref) {
|
---|
571 | my $place_id = $row_hashref->{"Entry_ID"};
|
---|
572 | # print STDERR " Place $place_id\n";
|
---|
573 | my $place_doc_obj = new doc($self->{'filename'} . "-", "indexed_doc");
|
---|
574 | $place_doc_obj->set_OID("p$place_id");
|
---|
575 | &new_metadata_entry($place_doc_obj, "DocumentType", "Place");
|
---|
576 |
|
---|
577 | # Convert the place type ID into a name
|
---|
578 | $row_hashref->{"PlaceType"} = $place_type_id_to_name_mapping->{$row_hashref->{"PlaceType"}};
|
---|
579 |
|
---|
580 | # Add each field from the table as metadata
|
---|
581 | foreach my $key (keys(%$row_hashref)) {
|
---|
582 | my $value = $row_hashref->{$key};
|
---|
583 | if (defined($value)) {
|
---|
584 | &new_metadata_entry($place_doc_obj, $key, $value);
|
---|
585 | }
|
---|
586 | }
|
---|
587 |
|
---|
588 | # Get place name
|
---|
589 | my $place_name = $row_hashref->{"Current_name"};
|
---|
590 | if (!defined($place_name)) {
|
---|
591 | print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Missing place name'>\n" if ($self->{'gli'});
|
---|
592 | print STDERR "Error: Place $place_id -- Missing place name.\n";
|
---|
593 | print $fail_log_handle "Error: Place $place_id -- Missing place name.\n";
|
---|
594 | $self->{'num_not_processed'}++;
|
---|
595 | next;
|
---|
596 | }
|
---|
597 |
|
---|
598 | # Get place type
|
---|
599 | my $place_type = $row_hashref->{"PlaceType"};
|
---|
600 | if (!defined($place_type)) {
|
---|
601 | print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Missing place type'>\n" if ($self->{'gli'});
|
---|
602 | print STDERR "Error: Place $place_id -- Missing place type.\n";
|
---|
603 | print $fail_log_handle "Error: Place $place_id -- Missing place type.\n";
|
---|
604 | $self->{'num_not_processed'}++;
|
---|
605 | next;
|
---|
606 | }
|
---|
607 |
|
---|
608 | # Create place styles mapping
|
---|
609 | $place_styles_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
|
---|
610 | while (my $place_styles_match_hashref = $place_styles_sql_handle->fetchrow_hashref()) {
|
---|
611 | # The ArchType_ID is actually an index into the tblArchType table, NOT a link
|
---|
612 | my $place_style_index = $place_styles_match_hashref->{"ArchType_ID"};
|
---|
613 | my $place_style = $place_styles_array[$place_style_index];
|
---|
614 | push(@{$place_style_to_ids_mapping{$place_style}}, $place_doc_obj->get_OID());
|
---|
615 | &new_metadata_entry($place_doc_obj, "Style", $place_style);
|
---|
616 | }
|
---|
617 |
|
---|
618 | # Get place institution
|
---|
619 | $place_institution_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
|
---|
620 | my $place_institution = $place_institution_sql_handle->fetchrow();
|
---|
621 | &new_metadata_entry($place_doc_obj, "Institution_name", $place_institution);
|
---|
622 | $place_id_to_institution_name_mapping{$place_doc_obj->get_OID()} = ", " . $place_institution;
|
---|
623 |
|
---|
624 | # Get place state and area (for searching)
|
---|
625 | $place_state_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
|
---|
626 | my $place_state_abbr = $place_state_sql_handle->fetchrow();
|
---|
627 | &new_metadata_entry($place_doc_obj, "State", $place_state_abbr . " " . $state_abbr_to_name_mapping->{$place_state_abbr} . " " . $state_abbr_to_area_mapping->{$place_state_abbr});
|
---|
628 |
|
---|
629 | # Get place construction dates
|
---|
630 | my $place_construction_dates_table_html = "";
|
---|
631 | $place_construction_dates_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
|
---|
632 | while (my $place_construction_dates_match_hashref = $place_construction_dates_sql_handle->fetchrow_hashref()) {
|
---|
633 | my $place_construction_date = $place_construction_dates_match_hashref->{"Date"};
|
---|
634 | if (!defined($place_construction_date)) {
|
---|
635 | # Landscape sites are allowed to have no construction information
|
---|
636 | next if ($place_type eq "Landscape site");
|
---|
637 |
|
---|
638 | print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Missing construction date'>\n" if ($self->{'gli'});
|
---|
639 | print STDERR "Error: Place $place_id -- Missing construction date.\n";
|
---|
640 | print $fail_log_handle "Error: Place $place_id -- Missing construction date.\n";
|
---|
641 | next;
|
---|
642 | }
|
---|
643 |
|
---|
644 | # Convert the construction date to a time period (for searching and browsing)
|
---|
645 | if ($place_construction_date =~ /^(\d{1,4}).*$/) {
|
---|
646 | my $place_construction_year = $1;
|
---|
647 |
|
---|
648 | my $place_time_period;
|
---|
649 | if ($place_construction_year < 1800) { $place_time_period = "pre-1800"; }
|
---|
650 | elsif ($place_construction_year < 1850) { $place_time_period = "1800-1850"; }
|
---|
651 | elsif ($place_construction_year < 1900) { $place_time_period = "1850-1900"; }
|
---|
652 | elsif ($place_construction_year < 1945) { $place_time_period = "1900-1945"; }
|
---|
653 | elsif ($place_construction_year <= 1995) { $place_time_period = "1945-1995"; }
|
---|
654 | elsif ($place_construction_year > 1995) { $place_time_period = "post-1995"; }
|
---|
655 | push(@{$place_date_to_ids_mapping{$place_time_period}}, $place_doc_obj->get_OID());
|
---|
656 | &new_metadata_entry($place_doc_obj, "Time_period", $place_time_period);
|
---|
657 | }
|
---|
658 | elsif ($place_construction_date ne "n.d." && $place_construction_date ne "unknown" && $place_construction_date ne "various") {
|
---|
659 | print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Warning: Unknown construction date: $place_construction_date'>\n" if ($self->{'gli'});
|
---|
660 | print STDERR "Warning: Place $place_id -- Unknown construction date: $place_construction_date.\n";
|
---|
661 | print $fail_log_handle "Warning: Place $place_id -- Unknown construction date: $place_construction_date.\n";
|
---|
662 | }
|
---|
663 |
|
---|
664 | my $place_construction_note = $place_construction_dates_match_hashref->{"Note"};
|
---|
665 | if (!defined($place_construction_note)) {
|
---|
666 | # "No date" entries are allowed to have no construction note
|
---|
667 | next if ($place_construction_date eq "n.d");
|
---|
668 |
|
---|
669 | print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Missing construction note'>\n" if ($self->{'gli'});
|
---|
670 | print STDERR "Error: Place $place_id -- Missing construction note.\n";
|
---|
671 | print $fail_log_handle "Error: Place $place_id -- Missing construction note.\n";
|
---|
672 | next;
|
---|
673 | }
|
---|
674 |
|
---|
675 | # Get the architects for this construction and remember them for later for the designer objects
|
---|
676 | my $place_construction_architect = $place_construction_dates_match_hashref->{"Architect_Name"} || "";
|
---|
677 | my $place_construction_architect_links = "";
|
---|
678 | foreach my $designer_name (split(/;/, $place_construction_architect)) {
|
---|
679 | $designer_name =~ s/\(.*?\)//g;
|
---|
680 | $designer_name =~ s/^\s*//;
|
---|
681 | $designer_name =~ s/\s*$//;
|
---|
682 |
|
---|
683 | my $designer_id = $designer_name_to_id_mapping{$designer_name};
|
---|
684 | if (!defined($designer_id)) {
|
---|
685 | $designer_id = scalar(keys(%designer_name_to_id_mapping)) + 1;
|
---|
686 | $designer_name_to_id_mapping{$designer_name} = $designer_id;
|
---|
687 | }
|
---|
688 | $place_construction_architect_links .= "<a href=\"_gwcgi_?a=d&d=d$designer_id\">$designer_name</a> ";
|
---|
689 | push(@{$designer_name_to_place_ids_mapping{$designer_name}}, $place_id);
|
---|
690 | }
|
---|
691 |
|
---|
692 | my $place_construction_date_prefix = $place_construction_dates_match_hashref->{"Prefix"} || "";
|
---|
693 | $place_construction_dates_table_html .= "<tr><td class=\"cicplaceconstructiondatetd\" valign=\"top\">$place_construction_date_prefix $place_construction_date</td><td valign=\"top\">$place_construction_note $place_construction_architect_links</td></tr>";
|
---|
694 | }
|
---|
695 | &new_metadata_entry($place_doc_obj, "PlaceConstructionDatesTableHTML", "<table id=\"cicplaceconstructiondatestable\" cellpadding=\"0\" cellspacing=\"0\">" . $place_construction_dates_table_html . "</table>");
|
---|
696 |
|
---|
697 | # Get place materials (individual buildings only)
|
---|
698 | if ($row_hashref->{"PlaceType"} eq "Individual building") {
|
---|
699 | $place_materials_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
|
---|
700 | my $place_materials_match_hashref = $place_materials_sql_handle->fetchrow_hashref();
|
---|
701 | &new_metadata_entry($place_doc_obj, "MaterialFoundation", $place_materials_match_hashref->{"foundation"} || "");
|
---|
702 | &new_metadata_entry($place_doc_obj, "MaterialRoof", $place_materials_match_hashref->{"roof"} || "");
|
---|
703 | &new_metadata_entry($place_doc_obj, "MaterialWalls", $place_materials_match_hashref->{"walls"} || "");
|
---|
704 |
|
---|
705 | &new_metadata_entry($place_doc_obj, "Materials", $place_materials_match_hashref->{"foundation"} || "");
|
---|
706 | &new_metadata_entry($place_doc_obj, "Materials", $place_materials_match_hashref->{"roof"} || "");
|
---|
707 | &new_metadata_entry($place_doc_obj, "Materials", $place_materials_match_hashref->{"walls"} || "");
|
---|
708 | }
|
---|
709 |
|
---|
710 | # Get place functions
|
---|
711 | my $place_functions = "";
|
---|
712 | my $place_functions_table_html = "";
|
---|
713 | $place_functions_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
|
---|
714 | while (my $place_functions_match_hashref = $place_functions_sql_handle->fetchrow_hashref()) {
|
---|
715 | my $place_function = $place_functions_match_hashref->{"Function"};
|
---|
716 | if (!defined($place_function)) {
|
---|
717 | print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Missing function'>\n" if ($self->{'gli'});
|
---|
718 | print STDERR "Error: Place $place_id -- Missing function.\n";
|
---|
719 | print $fail_log_handle "Error: Place $place_id -- Missing function.\n";
|
---|
720 | next;
|
---|
721 | }
|
---|
722 | # Check for multiline values (these are errors)
|
---|
723 | if ($place_function =~ /\n/) {
|
---|
724 | print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Multiline function'>\n" if ($self->{'gli'});
|
---|
725 | print STDERR "Error: Place $place_id -- Multiline function.\n";
|
---|
726 | print $fail_log_handle "Error: Place $place_id -- Multiline function.\n";
|
---|
727 | next;
|
---|
728 | }
|
---|
729 | my $place_year = $place_functions_match_hashref->{"Year"};
|
---|
730 | if (!defined($place_year)) {
|
---|
731 | print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Missing function year'>\n" if ($self->{'gli'});
|
---|
732 | print STDERR "Error: Place $place_id -- Missing function year.\n";
|
---|
733 | print $fail_log_handle "Error: Place $place_id -- Missing function year.\n";
|
---|
734 | next;
|
---|
735 | }
|
---|
736 | my $place_year_prefix = $place_functions_match_hashref->{"Prefix"} || "";
|
---|
737 | $place_functions_table_html .= "<tr><td class=\"cicplacefunctionyeartd\" valign=\"top\">$place_year_prefix $place_year</td><td valign=\"top\">$place_function</td></tr>";
|
---|
738 |
|
---|
739 | # Prepare function metadata for browsing and searching
|
---|
740 | my $place_function_to_index = lc($place_function); # Casefold
|
---|
741 | $place_function_to_index =~ s/^\s*//; # Remove whitespace from the start
|
---|
742 | if ($place_function_to_index =~ /^master plan/) {
|
---|
743 | $place_function_to_index =~ s/ \(campus,.*/ \(campus\)/;
|
---|
744 | $place_function_to_index =~ s/ \(campus:.*/ \(campus\)/;
|
---|
745 | $place_function_to_index =~ s/ \(landscape,.*/ \(landscape\)/;
|
---|
746 | }
|
---|
747 | else {
|
---|
748 | $place_function_to_index =~ s/\(.*\)//g; # Remove anything in parentheses
|
---|
749 | }
|
---|
750 | $place_function_to_index =~ s/\s*$//; # Remove whitespace from the end
|
---|
751 |
|
---|
752 | # Deal with common plural cases
|
---|
753 | $place_function_to_index =~ s/classroom$/classrooms/;
|
---|
754 | $place_function_to_index =~ s/department buildings$/department building/;
|
---|
755 | $place_function_to_index =~ s/faculty office$/faculty offices/;
|
---|
756 | $place_function_to_index =~ s/garden$/gardens/;
|
---|
757 | $place_function_to_index =~ s/residence halls$/residence hall/;
|
---|
758 | $place_function_to_index =~ s/private residences$/private residence/;
|
---|
759 |
|
---|
760 | # Check it is one of the valid function values
|
---|
761 | if (!defined($place_functions_mapping->{$place_function_to_index})) {
|
---|
762 | print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Warning: Unknown function: $place_function_to_index'>\n" if ($self->{'gli'});
|
---|
763 | print STDERR "Warning: Place $place_id -- Unknown function: $place_function_to_index.\n";
|
---|
764 | print $fail_log_handle "Warning: Place $place_id -- Unknown function: $place_function_to_index.\n";
|
---|
765 | next;
|
---|
766 | }
|
---|
767 | push(@{$place_function_to_ids_mapping{$place_function_to_index}}, $place_doc_obj->get_OID());
|
---|
768 | $place_functions .= "$place_function_to_index ";
|
---|
769 | }
|
---|
770 | &new_metadata_entry($place_doc_obj, "Functions", $place_functions);
|
---|
771 | &new_metadata_entry($place_doc_obj, "PlaceFunctionsTableHTML", "<table id=\"cicplacefunctionstable\" cellpadding=\"0\" cellspacing=\"0\">" . $place_functions_table_html . "</table>");
|
---|
772 |
|
---|
773 | # Get place significance
|
---|
774 | $place_significance_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
|
---|
775 | while (my $place_significance_match_hashref = $place_significance_sql_handle->fetchrow_hashref()) {
|
---|
776 | my $place_significance = $place_significance_match_hashref->{"SigType"};
|
---|
777 | &new_metadata_entry($place_doc_obj, "Significance", lc($place_significance));
|
---|
778 | }
|
---|
779 |
|
---|
780 | # Get place references
|
---|
781 | $place_references_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
|
---|
782 | my $place_references = $place_references_sql_handle->fetchrow();
|
---|
783 | if (defined($place_references)) {
|
---|
784 | $self->process_place_references($place_doc_obj, $place_id, $place_name, $place_institution, $place_references);
|
---|
785 | }
|
---|
786 |
|
---|
787 | # Get place designations
|
---|
788 | $place_designations_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
|
---|
789 | my $place_designations_match_hashref = $place_designations_sql_handle->fetchrow_hashref();
|
---|
790 | if ($place_designations_match_hashref->{"National_Register"} eq "1") {
|
---|
791 | &new_metadata_entry($place_doc_obj, "Designation", "National Register");
|
---|
792 | }
|
---|
793 | if ($place_designations_match_hashref->{"Federal_Agency"} eq "1") {
|
---|
794 | &new_metadata_entry($place_doc_obj, "Designation", "National Historic Landmark");
|
---|
795 | }
|
---|
796 | if ($place_designations_match_hashref->{"HABS"} eq "1" || $place_designations_match_hashref->{"HAER"} eq "1") {
|
---|
797 | &new_metadata_entry($place_doc_obj, "Designation", "HABS/HAER");
|
---|
798 | }
|
---|
799 | if ($place_designations_match_hashref->{"Local_Designation"} eq "1") {
|
---|
800 | &new_metadata_entry($place_doc_obj, "Designation", "Local/State");
|
---|
801 | }
|
---|
802 |
|
---|
803 | # Get place narrative
|
---|
804 | $place_narrative_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
|
---|
805 | my $place_narrative = $place_narrative_sql_handle->fetchrow();
|
---|
806 | if (defined($place_narrative)) {
|
---|
807 | my $place_narrative_html = &rtf_to_html($place_narrative);
|
---|
808 | $place_narrative_html =~ s/(<br \/>(\s|\n)*)*$//; # Remove any trailing <br /> tags
|
---|
809 | &new_metadata_entry($place_doc_obj, "PlaceNarrativeHTML", $place_narrative_html);
|
---|
810 | }
|
---|
811 |
|
---|
812 | # Get place images
|
---|
813 | my $place_images_html = "";
|
---|
814 | $place_images_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
|
---|
815 | while (my $place_images_match_hashref = $place_images_sql_handle->fetchrow_hashref) {
|
---|
816 | my $place_image_location = $place_images_match_hashref->{"Location"};
|
---|
817 | my $place_image_name = $place_images_match_hashref->{"FileName"};
|
---|
818 |
|
---|
819 | # Deal with PDF files separately: don't convert, just associate
|
---|
820 | if ($place_image_location =~ /\.pdf$/i) {
|
---|
821 | # Convert the server location of the PDF file into the local location of the file
|
---|
822 | my $place_pdf_file_path = $place_image_location;
|
---|
823 | $place_pdf_file_path =~ s/^[A-Z]:/$self->{'images_directory'}/;
|
---|
824 |
|
---|
825 | if (-f $place_pdf_file_path) {
|
---|
826 | my ($place_pdf_file_name) = ($place_pdf_file_path =~ /^.+\\(.+)$/);
|
---|
827 | $place_doc_obj->associate_file($place_pdf_file_path, $place_pdf_file_name, undef, $place_doc_obj->get_top_section());
|
---|
828 |
|
---|
829 | $place_pdf_file_name =~ s/ /%20/g;
|
---|
830 | my $place_pdf_file_href = "_httpcollection_/index/assoc/[assocfilepath]/$place_pdf_file_name";
|
---|
831 | $place_images_html .= "<div class=\"cicplacepdf\"><a href=\"$place_pdf_file_href\">_iconpdf_</a><br /><a class=\"ciccaption\" href=\"$place_pdf_file_href\">$place_image_name (PDF)</a></div>\n";
|
---|
832 | }
|
---|
833 | else {
|
---|
834 | print STDERR "<ProcessingError n='$place_pdf_file_path' p='CICPlug' r='Does not exist'>\n" if ($self->{'gli'});
|
---|
835 | print STDERR "Error: File $place_pdf_file_path does not exist.\n";
|
---|
836 | print $fail_log_handle "Error: File $place_pdf_file_path does not exist.\n";
|
---|
837 | }
|
---|
838 | }
|
---|
839 | else {
|
---|
840 | my $place_image_small_file_href = $self->generate_place_image_variant($place_doc_obj, $place_image_location, "small");
|
---|
841 | my $place_image_large_file_href = $self->generate_place_image_variant($place_doc_obj, $place_image_location, "large");
|
---|
842 |
|
---|
843 | # Create a new section for each place image
|
---|
844 | my $place_image_section = $place_doc_obj->insert_section($place_doc_obj->get_end_child($place_doc_obj->get_top_section()));
|
---|
845 | $place_doc_obj->add_utf8_text($place_image_section, "_"); # This is necessary
|
---|
846 | $place_doc_obj->add_utf8_metadata($place_image_section, "DocumentType", "Image");
|
---|
847 | $place_doc_obj->add_utf8_metadata($place_image_section, "Title", $place_image_name);
|
---|
848 | $place_doc_obj->add_utf8_metadata($place_image_section, "ImagePath", $place_image_large_file_href);
|
---|
849 |
|
---|
850 | $place_images_html .= "<div class=\"cicplaceimage\"><a href=\"_gwcgi_?a=d&d=p$place_id.$place_image_section\"><img alt=\"$place_image_name\" src=\"$place_image_small_file_href\"/></a><br /><a class=\"ciccaption\" href=\"_gwcgi_?a=d&d=p$place_id.$place_image_section\">$place_image_name</a></div>\n";
|
---|
851 | }
|
---|
852 | }
|
---|
853 |
|
---|
854 | &new_metadata_entry($place_doc_obj, "PlaceImagesHTML", $place_images_html);
|
---|
855 |
|
---|
856 | $place_doc_obj->add_utf8_text($place_doc_obj->get_top_section(), "Some dummy text.");
|
---|
857 | $self->{'processor'}->process($place_doc_obj);
|
---|
858 | $self->{'num_processed'}++;
|
---|
859 |
|
---|
860 | # Build mappings for creating the static macrofiles
|
---|
861 | $place_id_to_name_mapping{$place_doc_obj->get_OID()} = $place_name;
|
---|
862 | push(@{$place_type_to_ids_mapping{$place_type}}, $place_doc_obj->get_OID());
|
---|
863 | }
|
---|
864 |
|
---|
865 | &write_bilevel_static_browser_macrofile("types", \%place_type_to_ids_mapping, \%place_id_to_name_mapping, \%place_id_to_institution_name_mapping);
|
---|
866 | &write_bilevel_static_browser_macrofile("styles", \%place_style_to_ids_mapping, \%place_id_to_name_mapping, \%place_id_to_institution_name_mapping);
|
---|
867 | &write_bilevel_static_browser_macrofile("dates", \%place_date_to_ids_mapping, \%place_id_to_name_mapping, \%place_id_to_institution_name_mapping);
|
---|
868 | &write_bilevel_static_browser_macrofile("functions", \%place_function_to_ids_mapping, \%place_id_to_name_mapping, \%place_id_to_institution_name_mapping);
|
---|
869 | }
|
---|
870 |
|
---|
871 |
|
---|
872 | sub process_place_references
|
---|
873 | {
|
---|
874 | my $self = shift(@_);
|
---|
875 | my $place_doc_obj = shift(@_);
|
---|
876 | my $place_id = shift(@_);
|
---|
877 | my $place_name = shift(@_);
|
---|
878 | my $place_institution_name = shift(@_);
|
---|
879 | my $place_references_rtf_string = shift(@_);
|
---|
880 | my $fail_log_handle = $self->{'failhandle'};
|
---|
881 |
|
---|
882 | # Convert the place references from RTF to HTML
|
---|
883 | my $place_references_html_raw = &rtf_to_html($place_references_rtf_string);
|
---|
884 | if ($place_references_html_raw =~ /[ <](http|www\.)/) {
|
---|
885 | print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Warning: Found possible web address in references without hyperlink tag.'>\n" if ($self->{'gli'});
|
---|
886 | print STDERR "Warning: Place $place_id -- Found possible web address in references without hyperlink tag.\n";
|
---|
887 | print $fail_log_handle "Warning: Place $place_id -- Found possible web address in references without hyperlink tag.\n";
|
---|
888 | }
|
---|
889 |
|
---|
890 | # Split the references and try to parse title and author
|
---|
891 | my $place_references_html = "";
|
---|
892 | $place_references_html_raw =~ s/(\r|\n)//g; # Remove all newlines
|
---|
893 | $place_references_html_raw =~ s/<br \/><i><br \/>/<br \/><br \/><i>/g; # Move italic tags
|
---|
894 | my @place_references = split(/<br \/>\s*<br \/>/, $place_references_html_raw);
|
---|
895 | foreach my $place_reference (@place_references) {
|
---|
896 | $place_reference =~ s/^(<br \/>\s*)*//;
|
---|
897 | $place_reference =~ s/(<br \/>\s*)*$//;
|
---|
898 | next if ($place_reference !~ /\w/);
|
---|
899 | $place_references_html .= "<p class=\"cicreference\">" . $place_reference . "</p>\n";
|
---|
900 |
|
---|
901 | my $place_reference_author = "";
|
---|
902 | my $place_reference_title = "";
|
---|
903 |
|
---|
904 | # Case 0: A magic word in the first sentence
|
---|
905 | my $place_reference_first_sentence = $place_reference;
|
---|
906 | if ($place_reference =~ /^(.*?)\./) {
|
---|
907 | $place_reference_first_sentence = $1;
|
---|
908 | }
|
---|
909 | if ($place_reference_first_sentence =~ /\b(collection|collections|papers|archives|database|letter|memo|inventory|photographs|minutes|reports|records)\b/i) {
|
---|
910 | # Don't bother trying to parse the reference
|
---|
911 | }
|
---|
912 | # Case 1: Author (possibly empty), then title in italics or quotes
|
---|
913 | elsif ($place_reference =~ /^(.*?)<i>(.*?)<\/i>/ || $place_reference =~ /^(.*)"(.*?)"/) {
|
---|
914 | $place_reference_author = $1;
|
---|
915 | $place_reference_title = $2;
|
---|
916 | }
|
---|
917 | # Case 2: Zero or one fullstops, assume no author and title is complete text
|
---|
918 | elsif ($place_reference =~ /^[^\.]*\.[^\.]*$/ || $place_reference !~ /\./) {
|
---|
919 | $place_reference_title = $place_reference;
|
---|
920 | }
|
---|
921 | else {
|
---|
922 | print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Could not parse reference: $place_reference'>\n" if ($self->{'gli'});
|
---|
923 | # print STDERR "Warning: Place $place_id -- Could not parse reference: $place_reference\n";
|
---|
924 | print $fail_log_handle "Warning: Place $place_id -- Could not parse reference: $place_reference\n";
|
---|
925 | next;
|
---|
926 | }
|
---|
927 |
|
---|
928 | # Create a new Reference document for this place reference
|
---|
929 | my $place_reference_doc_obj = new doc($self->{'file'} . "-", "indexed_doc");
|
---|
930 | $place_reference_doc_obj->set_OID("pr$place_reference_id");
|
---|
931 | &new_metadata_entry($place_reference_doc_obj, "DocumentType", "PlaceReference");
|
---|
932 | &new_metadata_entry($place_reference_doc_obj, "PlaceID", $place_id);
|
---|
933 | &new_metadata_entry($place_reference_doc_obj, "PlaceName", $place_name);
|
---|
934 | &new_metadata_entry($place_reference_doc_obj, "PlaceInstitutionName", $place_institution_name);
|
---|
935 | &new_metadata_entry($place_reference_doc_obj, "Reference", $place_reference);
|
---|
936 | &new_metadata_entry($place_reference_doc_obj, "ReferenceAuthor", $place_reference_author);
|
---|
937 | &new_metadata_entry($place_reference_doc_obj, "ReferenceTitle", $place_reference_title);
|
---|
938 |
|
---|
939 | $place_reference_doc_obj->add_utf8_text($place_reference_doc_obj->get_top_section(), "Some dummy text.");
|
---|
940 | $self->{'processor'}->process($place_reference_doc_obj);
|
---|
941 | $self->{'num_processed'}++;
|
---|
942 | $place_reference_id++;
|
---|
943 | }
|
---|
944 |
|
---|
945 | &new_metadata_entry($place_doc_obj, "PlaceReferencesHTML", $place_references_html);
|
---|
946 | }
|
---|
947 |
|
---|
948 |
|
---|
949 | sub process_designers
|
---|
950 | {
|
---|
951 | my $self = shift(@_);
|
---|
952 | my $dbh = shift(@_);
|
---|
953 | my $fail_log_handle = $self->{'failhandle'};
|
---|
954 |
|
---|
955 | # Prepare SQL statement for getting the Place name
|
---|
956 | my $place_name_sql_statement = "SELECT Current_name FROM tblPlace WHERE Entry_ID=?";
|
---|
957 | my $place_name_sql_handle = $dbh->prepare($place_name_sql_statement);
|
---|
958 |
|
---|
959 | # Prepare SQL statement for getting the Place institution
|
---|
960 | my $place_institution_sql_statement = "SELECT Institution_Name FROM tblInstitution,tblPlace WHERE tblInstitution.Institution_ID=tblPlace.Institution_ID and tblPlace.Entry_ID=?";
|
---|
961 | my $place_institution_sql_handle = $dbh->prepare($place_institution_sql_statement);
|
---|
962 |
|
---|
963 | # Prepare SQL statement for getting the Place "date of construction"
|
---|
964 | my $place_construction_date_sql_statement = "SELECT Date FROM tblConstruction_and_Dates WHERE Entry_ID=?";
|
---|
965 | my $place_construction_date_sql_handle = $dbh->prepare($place_construction_date_sql_statement);
|
---|
966 |
|
---|
967 | # Prepare SQL statement for getting the Place non-PDF images
|
---|
968 | my $place_images_sql_statement = "SELECT Location,FileName FROM tblImages WHERE FileType=1 AND Location NOT LIKE '%.pdf' AND Entry_ID=? ORDER BY Image_Order";
|
---|
969 | my $place_images_sql_handle = $dbh->prepare($place_images_sql_statement);
|
---|
970 | $place_images_sql_handle->{LongReadLen} = 65536;
|
---|
971 |
|
---|
972 | # Create a document object for each designer
|
---|
973 | my %designer_id_to_name_mapping;
|
---|
974 | foreach my $designer_name (keys %designer_name_to_id_mapping) {
|
---|
975 | my $designer_id = $designer_name_to_id_mapping{$designer_name};
|
---|
976 | # print STDERR " Designer $designer_id\n";
|
---|
977 | my $designer_doc_obj = new doc($self->{'filename'} . "-", "indexed_doc");
|
---|
978 | $designer_doc_obj->set_OID("d$designer_id");
|
---|
979 | &new_metadata_entry($designer_doc_obj, "DocumentType", "Designer");
|
---|
980 |
|
---|
981 | &new_metadata_entry($designer_doc_obj, "Designer_name", $designer_name);
|
---|
982 |
|
---|
983 | # Get designer places
|
---|
984 | my $designer_places_list_html = "";
|
---|
985 | my $last_designer_place_id = "";
|
---|
986 | foreach my $designer_place_id (sort(@{$designer_name_to_place_ids_mapping{$designer_name}})) {
|
---|
987 | # The designer may have worked on a place multiple times, so check for this
|
---|
988 | next if ($designer_place_id eq $last_designer_place_id);
|
---|
989 | $last_designer_place_id = $designer_place_id;
|
---|
990 |
|
---|
991 | # Get place name
|
---|
992 | $place_name_sql_handle->execute($designer_place_id) or die "Could not execute SQL statement.";
|
---|
993 | my $designer_place_name = $place_name_sql_handle->fetchrow();
|
---|
994 |
|
---|
995 | # Get place institution name
|
---|
996 | $place_institution_sql_handle->execute($designer_place_id) or die "Could not execute SQL statement.";
|
---|
997 | my $designer_place_institution_name = $place_institution_sql_handle->fetchrow();
|
---|
998 |
|
---|
999 | # Get place date of construction
|
---|
1000 | $place_construction_date_sql_handle->execute($designer_place_id) or die "Could not execute SQL statement.";
|
---|
1001 | my $designer_place_construction_date_value = $place_construction_date_sql_handle->fetchrow() || "";
|
---|
1002 |
|
---|
1003 | # Get the first non-PDF image for this place
|
---|
1004 | my $designer_place_image_small_file_href;
|
---|
1005 | $place_images_sql_handle->execute($designer_place_id) or die "Could not execute SQL statement.";
|
---|
1006 | my $designer_place_images_match_hashref = $place_images_sql_handle->fetchrow_hashref();
|
---|
1007 | my $designer_place_image_location = $designer_place_images_match_hashref->{"Location"};
|
---|
1008 | my $designer_place_image_name = $designer_place_images_match_hashref->{"FileName"} || "No image";
|
---|
1009 | if (defined($designer_place_image_location)) {
|
---|
1010 | $designer_place_image_small_file_href = $self->generate_place_image_variant($designer_doc_obj, $designer_place_image_location, "small");
|
---|
1011 | }
|
---|
1012 | else {
|
---|
1013 | # There is no non-PDF image for this place
|
---|
1014 | $designer_place_image_small_file_href = "_httpcollection_/images/no_image-small.jpg";
|
---|
1015 | }
|
---|
1016 |
|
---|
1017 | $designer_places_list_html .= "<tr><td valign=\"top\"><a href=\"_gwcgi_?a=d&d=p$designer_place_id\"><img alt=\"$designer_place_image_name\" src=\"$designer_place_image_small_file_href\"/></a></td><td valign=\"top\"><a href=\"_gwcgi_?a=d&d=p$designer_place_id\">$designer_place_name</a>, $designer_place_institution_name<br /><b>Date of construction:</b> $designer_place_construction_date_value</td></tr>\n";
|
---|
1018 | }
|
---|
1019 |
|
---|
1020 | &new_metadata_entry($designer_doc_obj, "DesignerPlacesListHTML", "<table>" . $designer_places_list_html . "</table>");
|
---|
1021 |
|
---|
1022 | $designer_doc_obj->add_utf8_text($designer_doc_obj->get_top_section(), "Some dummy text.");
|
---|
1023 | $self->{'processor'}->process($designer_doc_obj);
|
---|
1024 | $self->{'num_processed'}++;
|
---|
1025 |
|
---|
1026 | $designer_id_to_name_mapping{$designer_doc_obj->get_OID()} = $designer_name;
|
---|
1027 | }
|
---|
1028 |
|
---|
1029 | # Write the designers.dm macrofile
|
---|
1030 | &write_static_browser_macrofile("designers", \%designer_id_to_name_mapping);
|
---|
1031 | }
|
---|
1032 |
|
---|
1033 |
|
---|
1034 | sub new_metadata_entry
|
---|
1035 | {
|
---|
1036 | my ($doc_obj, $metadata_name, $metadata_value) = (@_);
|
---|
1037 |
|
---|
1038 | # Don't bother with empty metadata
|
---|
1039 | return if ($metadata_value eq "");
|
---|
1040 |
|
---|
1041 | # Spaces aren't allowed in metadata names
|
---|
1042 | $metadata_name =~ s/ /_/g;
|
---|
1043 |
|
---|
1044 | # Anything from the database is ISO 8859-1 encoded, so convert to UTF-8
|
---|
1045 | $metadata_value = &unicode::ascii2utf8(\$metadata_value);
|
---|
1046 |
|
---|
1047 | # Escape any '[' and ']' characters so Greenstone doesn't try to treat the text as metadata...
|
---|
1048 | $metadata_value =~ s/\[/&\#91;/g;
|
---|
1049 | $metadata_value =~ s/\]/&\#93;/g;
|
---|
1050 |
|
---|
1051 | # ...but don't mess up real metadata references!
|
---|
1052 | $metadata_value =~ s/&\#91;assocfilepath&\#93;/\[assocfilepath\]/g;
|
---|
1053 | $metadata_value =~ s/&\#91;parent\(Top\)\:assocfilepath&\#93;/\[parent(Top):assocfilepath\]/g;
|
---|
1054 |
|
---|
1055 | # Escape any '&' characters so the metadata is HTML 4 compliant when displayed
|
---|
1056 | $metadata_value =~ s/&([^\#])/&$1/g;
|
---|
1057 |
|
---|
1058 | $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), $metadata_name, $metadata_value);
|
---|
1059 | }
|
---|
1060 |
|
---|
1061 |
|
---|
1062 | sub rtf_to_html
|
---|
1063 | {
|
---|
1064 | my $rtf_string = shift(@_);
|
---|
1065 |
|
---|
1066 | # Remove everything in curly braces, but keep any hyperlinks
|
---|
1067 | while ($rtf_string =~ /\{(.*?)\}/) {
|
---|
1068 | if ($1 =~ /HYPERLINK (.*)/) {
|
---|
1069 | my $link_url = $1;
|
---|
1070 | $link_url =~ s/^\"(.*?)\"$/$1/; # Remove surrounding quotes
|
---|
1071 | $link_url =~ s/^\s*(.*?)\s*$/$1/; # Remove surrounding whitespace
|
---|
1072 | $rtf_string =~ s/\{(.*?)\}/<a href=\"$link_url\">$link_url<\/a>/;
|
---|
1073 | }
|
---|
1074 | else {
|
---|
1075 | $rtf_string =~ s/\{(.*?)\}//;
|
---|
1076 | }
|
---|
1077 | }
|
---|
1078 | $rtf_string =~ s/\\ldblquote\s/\"/g;
|
---|
1079 | $rtf_string =~ s/\\ldblquote\b/\"/g;
|
---|
1080 | $rtf_string =~ s/\\rdblquote\s/\"/g;
|
---|
1081 | $rtf_string =~ s/\\rdblquote\b/\"/g;
|
---|
1082 | $rtf_string =~ s/\\lquote\s/\'/g;
|
---|
1083 | $rtf_string =~ s/\\lquote\b/\'/g;
|
---|
1084 | $rtf_string =~ s/\\rquote\s/\'/g;
|
---|
1085 | $rtf_string =~ s/\\rquote\b/\'/g;
|
---|
1086 | $rtf_string =~ s/\\pard//g;
|
---|
1087 | $rtf_string =~ s/\\par/<br \/>/g;
|
---|
1088 | $rtf_string =~ s/\\ul\s/<i>/g;
|
---|
1089 | $rtf_string =~ s/\\ul\b/<i>/g;
|
---|
1090 | $rtf_string =~ s/\\ulnone\s/<\/i>/g;
|
---|
1091 | $rtf_string =~ s/\\ulnone\b/<\/i>/g;
|
---|
1092 | $rtf_string =~ s/\\i\s/<i>/g;
|
---|
1093 | $rtf_string =~ s/\\i\b/<i>/g;
|
---|
1094 | $rtf_string =~ s/\\i0\s/<\/i>/g;
|
---|
1095 | $rtf_string =~ s/\\i0\b/<\/i>/g;
|
---|
1096 | $rtf_string =~ s/\\super //g;
|
---|
1097 | $rtf_string =~ s/\\nosupersub //g;
|
---|
1098 | $rtf_string =~ s/\\~/ /g;
|
---|
1099 | $rtf_string =~ s/\\([A-Za-z0-9\-]+)//g;
|
---|
1100 | $rtf_string =~ s/\}//g;
|
---|
1101 |
|
---|
1102 | # Assume non-ASCII is ISO 8859-1, and convert into HTML entities
|
---|
1103 | while ($rtf_string =~ /\\'([a-z0-9][a-z0-9])/) {
|
---|
1104 | my $dec = hex($1);
|
---|
1105 | $rtf_string =~ s/\\'$1/&#$dec\;/;
|
---|
1106 | }
|
---|
1107 |
|
---|
1108 | # Remove extra less-than and greater-than symbols
|
---|
1109 | $rtf_string =~ s/< </</g;
|
---|
1110 | $rtf_string =~ s/<(<+)/</g;
|
---|
1111 | $rtf_string =~ s/> >/>/g;
|
---|
1112 | $rtf_string =~ s/>(>+)/>/g;
|
---|
1113 |
|
---|
1114 | # Remove empty tags for HTML 4 compliance
|
---|
1115 | $rtf_string =~ s/<i>\s*<\/i>/ /g;
|
---|
1116 |
|
---|
1117 | return $rtf_string;
|
---|
1118 | }
|
---|
1119 |
|
---|
1120 |
|
---|
1121 | sub get_place_image_dimensions
|
---|
1122 | {
|
---|
1123 | my $self = shift(@_);
|
---|
1124 | my $place_image_file_path = shift(@_);
|
---|
1125 | my $fail_log_handle = $self->{'failhandle'};
|
---|
1126 |
|
---|
1127 | # Make sure the place image file actually exists
|
---|
1128 | if (!-f $place_image_file_path) {
|
---|
1129 | print STDERR "<ProcessingError n='$place_image_file_path' p='CICPlug' r='Does not exist'>\n" if ($self->{'gli'});
|
---|
1130 | print STDERR "Error: Image $place_image_file_path does not exist.\n";
|
---|
1131 | print $fail_log_handle "Error: Image $place_image_file_path does not exist.\n";
|
---|
1132 | return;
|
---|
1133 | }
|
---|
1134 | my $place_image_file_date = (stat($place_image_file_path))[9];
|
---|
1135 |
|
---|
1136 | # Check if this place image has already been identified by looking for a ".info" file in the same directory
|
---|
1137 | my $place_image_info_file_path = $place_image_file_path . ".info";
|
---|
1138 | if (-f $place_image_info_file_path) {
|
---|
1139 | # A ".info" file exists, so read the cached place image information from it
|
---|
1140 | open(PLACE_IMAGE_INFO_FILE, "<$place_image_info_file_path");
|
---|
1141 | my @place_image_info = <PLACE_IMAGE_INFO_FILE>;
|
---|
1142 | close(PLACE_IMAGE_INFO_FILE);
|
---|
1143 |
|
---|
1144 | # Read the cached place image file date and check that it matches
|
---|
1145 | my $cached_place_image_file_date = $place_image_info[0];
|
---|
1146 | $cached_place_image_file_date =~ s/\n$//;
|
---|
1147 | if ($cached_place_image_file_date == $place_image_file_date) {
|
---|
1148 | # It does match, so use the cached information from the ".info" file instead of re-identifying the file
|
---|
1149 | my $place_image_width = $place_image_info[1];
|
---|
1150 | $place_image_width =~ s/\n$//;
|
---|
1151 | my $place_image_height = $place_image_info[2];
|
---|
1152 | $place_image_height =~ s/\n$//;
|
---|
1153 | return ($place_image_width, $place_image_height);
|
---|
1154 | }
|
---|
1155 | }
|
---|
1156 |
|
---|
1157 | # We haven't already identified the place image, so do it now
|
---|
1158 | print STDERR "Identifying place image $place_image_file_path...\n";
|
---|
1159 | my $identify_command = "identify -format \"%w %h\" \"$place_image_file_path\"";
|
---|
1160 | my $identify_result = `$identify_command`;
|
---|
1161 | print "Identify result: $identify_result\n" if ($self->{'verbosity'} > 2);
|
---|
1162 |
|
---|
1163 | # Check that the output is what we're expecting
|
---|
1164 | if ($identify_result !~ /(\d+) (\d+)/) {
|
---|
1165 | print STDERR "<ProcessingError n='$place_image_file_path' p='CICPlug' r='Could not identify'>\n" if ($self->{'gli'});
|
---|
1166 | print STDERR "Error: Place image $place_image_file_path could not be identified.\n";
|
---|
1167 | print $fail_log_handle "Error: Place image $place_image_file_path could not be identified.\n";
|
---|
1168 | return;
|
---|
1169 | }
|
---|
1170 |
|
---|
1171 | # Parse the place image width and height from the output
|
---|
1172 | my $place_image_width = $1;
|
---|
1173 | my $place_image_height = $2;
|
---|
1174 |
|
---|
1175 | # Write the place image info file so we don't have to identify this exact image again in the future
|
---|
1176 | open(PLACE_IMAGE_INFO_FILE, ">$place_image_info_file_path");
|
---|
1177 | print PLACE_IMAGE_INFO_FILE "$place_image_file_date\n";
|
---|
1178 | print PLACE_IMAGE_INFO_FILE "$place_image_width\n";
|
---|
1179 | print PLACE_IMAGE_INFO_FILE "$place_image_height\n";
|
---|
1180 | close(PLACE_IMAGE_INFO_FILE);
|
---|
1181 | return ($place_image_width, $place_image_height);
|
---|
1182 | }
|
---|
1183 |
|
---|
1184 |
|
---|
1185 | sub generate_place_image_variant
|
---|
1186 | {
|
---|
1187 | my $self = shift(@_);
|
---|
1188 | my ($doc_obj, $place_image_location, $place_image_variant_size) = (@_);
|
---|
1189 | my $fail_log_handle = $self->{'failhandle'};
|
---|
1190 |
|
---|
1191 | # Convert the server location of the file into the local location of the file
|
---|
1192 | my $place_image_file_path = $place_image_location;
|
---|
1193 | $place_image_file_path =~ s/^[A-Z]:/$self->{'images_directory'}/;
|
---|
1194 |
|
---|
1195 | # Get the width and height of the place image
|
---|
1196 | my ($place_image_width, $place_image_height) = $self->get_place_image_dimensions($place_image_file_path);
|
---|
1197 | if (!defined($place_image_width) || !defined($place_image_height)) {
|
---|
1198 | # An error has occurred (error message generated by get_place_image_dimensions())
|
---|
1199 | return;
|
---|
1200 | }
|
---|
1201 | my $place_image_file_date = (stat($place_image_file_path))[9];
|
---|
1202 |
|
---|
1203 | # Generate the path of the place image variant (in the cache directory)
|
---|
1204 | my $place_image_variant_file_suffix = "-$place_image_variant_size." . $self->{$place_image_variant_size . '_image_type'};
|
---|
1205 | my $place_image_variant_file_path = $place_image_location;
|
---|
1206 | $place_image_variant_file_path =~ s/^[A-Z]:/$self->{'cache_directory'}/;
|
---|
1207 | $place_image_variant_file_path =~ s/^(.+)(\..*)/$1$place_image_variant_file_suffix/;
|
---|
1208 | my ($place_image_variant_file_name) = ($place_image_variant_file_path =~ /^.+\\(.+)$/);
|
---|
1209 |
|
---|
1210 | # Only scale down the place image if it is bigger than the desired width
|
---|
1211 | my $place_image_variant_desired_width = $self->{$place_image_variant_size . '_image_width'};
|
---|
1212 |
|
---|
1213 | if ($place_image_width > $place_image_variant_desired_width) {
|
---|
1214 | # Only generate the place image variant if it doesn't already exist, or if the place image is newer
|
---|
1215 | if (!-f $place_image_variant_file_path || $place_image_file_date > (stat($place_image_variant_file_path))[9]) {
|
---|
1216 | print STDERR "Generating place image variant $place_image_variant_file_path...\n";
|
---|
1217 | my ($place_image_variant_directory) = ($place_image_variant_file_path =~ /^(.+)\\.+$/);
|
---|
1218 | &util::mk_all_dir($place_image_variant_directory);
|
---|
1219 | my $place_image_variant_options = "-scale $place_image_variant_desired_width " . $self->{$place_image_variant_size . '_image_options'};
|
---|
1220 | my $convert_command = "convert $place_image_variant_options \"$place_image_file_path\" \"$place_image_variant_file_path\"";
|
---|
1221 | my $convert_result = `$convert_command`;
|
---|
1222 | }
|
---|
1223 | }
|
---|
1224 | else {
|
---|
1225 | # The desired width is bigger than the place image, so we just use the original
|
---|
1226 | $place_image_variant_file_path = $place_image_file_path;
|
---|
1227 | }
|
---|
1228 |
|
---|
1229 | my ($place_image_variant_width, $place_image_variant_height) = $self->get_place_image_dimensions($place_image_variant_file_path);
|
---|
1230 | if (!defined($place_image_variant_width) || !defined($place_image_variant_height)) {
|
---|
1231 | # An error has occurred (error message generated by get_place_image_dimensions())
|
---|
1232 | return;
|
---|
1233 | }
|
---|
1234 |
|
---|
1235 | # Associate the place image variant file
|
---|
1236 | $doc_obj->associate_file($place_image_variant_file_path, $place_image_variant_file_name, undef, $doc_obj->get_top_section());
|
---|
1237 |
|
---|
1238 | # Add various bits of metadata for the place image variant
|
---|
1239 | my $place_image_variant_href = "_httpcollection_/index/assoc/{Or}{[parent(Top):assocfilepath],[assocfilepath]}/" . $place_image_variant_file_name;
|
---|
1240 | $place_image_variant_href =~ s/ /%20/g;
|
---|
1241 | return $place_image_variant_href;
|
---|
1242 | }
|
---|
1243 |
|
---|
1244 |
|
---|
1245 | sub write_static_browser_macrofile
|
---|
1246 | {
|
---|
1247 | my $static_browser_package_name = shift(@_);
|
---|
1248 | my $id_to_name_mapping = shift(@_);
|
---|
1249 |
|
---|
1250 | my $static_browser_macrofile_path = "$ENV{'GSDLHOME'}\\collect\\cic-hcap\\macros\\$static_browser_package_name.dm";
|
---|
1251 | print STDERR "Writing $static_browser_macrofile_path...\n";
|
---|
1252 | open(BROWSER_MACROFILE, ">$static_browser_macrofile_path") or die "Error: Could not write to $static_browser_macrofile_path.\n";
|
---|
1253 | &write_static_browser_macros($static_browser_package_name, $id_to_name_mapping);
|
---|
1254 | close(BROWSER_MACROFILE);
|
---|
1255 | }
|
---|
1256 |
|
---|
1257 |
|
---|
1258 | sub write_static_browser_macros
|
---|
1259 | {
|
---|
1260 | my $static_browser_package_name = shift(@_);
|
---|
1261 | my $id_to_name_mapping = shift(@_);
|
---|
1262 | my $id_to_extra_mapping = shift(@_);
|
---|
1263 |
|
---|
1264 | print BROWSER_MACROFILE "package $static_browser_package_name\n\n";
|
---|
1265 | print BROWSER_MACROFILE "_cicstaticbrowserquicklinks_ {\n";
|
---|
1266 |
|
---|
1267 | my %letter_to_ids_mapping;
|
---|
1268 | foreach my $id (keys %$id_to_name_mapping) {
|
---|
1269 | my $name = $id_to_name_mapping->{$id};
|
---|
1270 | my ($letter) = ($name =~ /([A-Za-z0-9])/);
|
---|
1271 | push(@{$letter_to_ids_mapping{$letter}}, $id);
|
---|
1272 | }
|
---|
1273 |
|
---|
1274 | print BROWSER_MACROFILE "<b>";
|
---|
1275 | foreach my $letter (split(//, "ABCDEFGHIJKLMNOPQRSTUVWXYZ")) {
|
---|
1276 | if (defined($letter_to_ids_mapping{$letter})) {
|
---|
1277 | print BROWSER_MACROFILE "<a href=\"#$letter\">$letter</a> ";
|
---|
1278 | }
|
---|
1279 | else {
|
---|
1280 | print BROWSER_MACROFILE "$letter ";
|
---|
1281 | }
|
---|
1282 | }
|
---|
1283 | print BROWSER_MACROFILE "</b>\n";
|
---|
1284 | print BROWSER_MACROFILE "}\n\n";
|
---|
1285 |
|
---|
1286 | print BROWSER_MACROFILE "_cicstaticbrowser_ {\n";
|
---|
1287 | print BROWSER_MACROFILE "<table cellpadding=\"0\" cellspacing=\"0\" width=\"_pagewidth_\">\n";
|
---|
1288 | foreach my $letter (sort(keys %letter_to_ids_mapping)) {
|
---|
1289 | my @letter_ids = @{$letter_to_ids_mapping{$letter}};
|
---|
1290 | my $anchor_name;
|
---|
1291 | if ($letter =~ /^[A-Z]$/) {
|
---|
1292 | $anchor_name = $letter;
|
---|
1293 | }
|
---|
1294 | print BROWSER_MACROFILE &get_static_browser_macro_chunk($letter, $anchor_name, \@letter_ids, $id_to_name_mapping, $id_to_extra_mapping);
|
---|
1295 | }
|
---|
1296 | print BROWSER_MACROFILE "</table>\n";
|
---|
1297 |
|
---|
1298 | print BROWSER_MACROFILE "}\n";
|
---|
1299 | }
|
---|
1300 |
|
---|
1301 |
|
---|
1302 | sub get_static_browser_macro_chunk
|
---|
1303 | {
|
---|
1304 | my $chunk_title = shift(@_);
|
---|
1305 | my $anchor_name = shift(@_);
|
---|
1306 | my $chunk_ids_ref = shift(@_);
|
---|
1307 | my $id_to_name_mapping = shift(@_);
|
---|
1308 | my $id_to_extra_mapping = shift(@_);
|
---|
1309 |
|
---|
1310 | my $static_browser_macro_chunk = "<tr><td width=\"50%\"><br />";
|
---|
1311 | if (defined($anchor_name) && $anchor_name ne "") {
|
---|
1312 | $static_browser_macro_chunk .= "<a name=\"$anchor_name\"/>";
|
---|
1313 | }
|
---|
1314 | $static_browser_macro_chunk .= "<span style=\"color: black;\"><b>$chunk_title</b></span></td><td width=\"50%\"></td></tr>";
|
---|
1315 |
|
---|
1316 | my %full_name_to_id_mapping;
|
---|
1317 | foreach my $id (@{$chunk_ids_ref}) {
|
---|
1318 | my $full_name = $id_to_name_mapping->{$id};
|
---|
1319 | if (defined($id_to_extra_mapping)) {
|
---|
1320 | $full_name .= " " . $id_to_extra_mapping->{$id};
|
---|
1321 | }
|
---|
1322 | $full_name_to_id_mapping{$full_name} = $id;
|
---|
1323 | }
|
---|
1324 |
|
---|
1325 | my @full_names = sort { lc($a) cmp lc($b) } (keys(%full_name_to_id_mapping));
|
---|
1326 | my $half_point = ((scalar(@full_names) % 2 == 0) ? scalar(@full_names) / 2 : (scalar(@full_names) + 1) / 2);
|
---|
1327 | for (my $i = 0; $i < $half_point; $i++) {
|
---|
1328 | $static_browser_macro_chunk .= "<tr>";
|
---|
1329 |
|
---|
1330 | my $id = $full_name_to_id_mapping{$full_names[$i]};
|
---|
1331 | my $name = $id_to_name_mapping->{$id};
|
---|
1332 | my $extra = $id_to_extra_mapping->{$id} || "";
|
---|
1333 | $static_browser_macro_chunk .= "<td valign=\"top\"><a href=\"_gwcgi_?a=d&d=$id\">" . &html_safe($name) . "</a>" . &html_safe($extra) . "</td>";
|
---|
1334 |
|
---|
1335 | $static_browser_macro_chunk .= "<td valign=\"top\">";
|
---|
1336 | if (defined($full_names[$i+$half_point])) {
|
---|
1337 | $id = $full_name_to_id_mapping{$full_names[$i+$half_point]};
|
---|
1338 | $name = $id_to_name_mapping->{$id};
|
---|
1339 | $extra = $id_to_extra_mapping->{$id} || "";
|
---|
1340 | $static_browser_macro_chunk .= "<a href=\"_gwcgi_?a=d&d=$id\">" . &html_safe($name) . "</a>" . &html_safe($extra);
|
---|
1341 | }
|
---|
1342 | $static_browser_macro_chunk .= "</td>";
|
---|
1343 |
|
---|
1344 | $static_browser_macro_chunk .= "</tr>";
|
---|
1345 | }
|
---|
1346 |
|
---|
1347 | return $static_browser_macro_chunk;
|
---|
1348 | }
|
---|
1349 |
|
---|
1350 |
|
---|
1351 | sub write_bilevel_static_browser_macrofile
|
---|
1352 | {
|
---|
1353 | my $static_browser_package_root = shift(@_);
|
---|
1354 | my $category_to_ids_mapping = shift(@_);
|
---|
1355 | my $id_to_name_mapping = shift(@_);
|
---|
1356 | my $id_to_extra_mapping = shift(@_);
|
---|
1357 |
|
---|
1358 | my $static_browser_macrofile_path = "$ENV{'GSDLHOME'}\\collect\\cic-hcap\\macros\\$static_browser_package_root.dm";
|
---|
1359 | print STDERR "Writing $static_browser_macrofile_path...\n";
|
---|
1360 | open(BROWSER_MACROFILE, ">$static_browser_macrofile_path") or die "Error: Could not write to $static_browser_macrofile_path.\n";
|
---|
1361 |
|
---|
1362 | foreach my $category (keys(%{$category_to_ids_mapping})) {
|
---|
1363 | my $static_browser_package_name = $static_browser_package_root . $category;
|
---|
1364 | $static_browser_package_name =~ s/\W//g;
|
---|
1365 |
|
---|
1366 | my %id_to_name_mapping_for_category = ();
|
---|
1367 | foreach my $id (@{$category_to_ids_mapping->{$category}}) {
|
---|
1368 | $id_to_name_mapping_for_category{$id} = $id_to_name_mapping->{$id};
|
---|
1369 | }
|
---|
1370 | &write_static_browser_macros($static_browser_package_name, \%id_to_name_mapping_for_category, $id_to_extra_mapping);
|
---|
1371 | }
|
---|
1372 |
|
---|
1373 | close(BROWSER_MACROFILE);
|
---|
1374 | }
|
---|
1375 |
|
---|
1376 |
|
---|
1377 | sub html_safe
|
---|
1378 | {
|
---|
1379 | my $text = shift(@_);
|
---|
1380 | $text =~ s/&/&/g;
|
---|
1381 | $text =~ s/\'/&\#39;/g; # Apostrophes mess up Javascript on the Search by State page
|
---|
1382 | return $text;
|
---|
1383 | }
|
---|
1384 |
|
---|
1385 |
|
---|
1386 | sub write_state_browser_macrofile
|
---|
1387 | {
|
---|
1388 | my $static_browser_package_name = shift(@_);
|
---|
1389 | my $state_to_ids_mapping = shift(@_);
|
---|
1390 | my $id_to_name_mapping = shift(@_);
|
---|
1391 |
|
---|
1392 | my $static_browser_macrofile_path = "$ENV{'GSDLHOME'}\\collect\\cic-hcap\\macros\\$static_browser_package_name.dm";
|
---|
1393 | print STDERR "Writing $static_browser_macrofile_path...\n";
|
---|
1394 | open(BROWSER_MACROFILE, ">$static_browser_macrofile_path") or die "Error: Could not write to $static_browser_macrofile_path.\n";
|
---|
1395 | print BROWSER_MACROFILE "package $static_browser_package_name\n\n";
|
---|
1396 | print BROWSER_MACROFILE "_cicstaticbrowser_ {\n";
|
---|
1397 |
|
---|
1398 | print BROWSER_MACROFILE "<table cellpadding=\"0\" cellspacing=\"0\" width=\"_pagewidth_\">\n";
|
---|
1399 | foreach my $state (sort(keys(%state_name_to_abbr_mapping))) {
|
---|
1400 | my $state_abbr = $state_name_to_abbr_mapping{$state};
|
---|
1401 | print BROWSER_MACROFILE "_cicstate" . $state_abbr . "_\n";
|
---|
1402 | }
|
---|
1403 | print BROWSER_MACROFILE "</table>\n";
|
---|
1404 | print BROWSER_MACROFILE "}\n";
|
---|
1405 |
|
---|
1406 | foreach my $state (sort(keys(%state_name_to_abbr_mapping))) {
|
---|
1407 | my $state_sans_spaces = $state;
|
---|
1408 | $state_sans_spaces =~ s/ //g;
|
---|
1409 | my @state_ids = ();
|
---|
1410 | if (defined($state_to_ids_mapping->{$state})) {
|
---|
1411 | @state_ids = @{$state_to_ids_mapping->{$state}};
|
---|
1412 | }
|
---|
1413 | my $state_abbr = $state_name_to_abbr_mapping{$state};
|
---|
1414 |
|
---|
1415 | my $state_static_browser_macro_chunk = &get_static_browser_macro_chunk($state, $state_sans_spaces, \@state_ids, $id_to_name_mapping, undef);
|
---|
1416 | if (!defined($state_to_ids_mapping->{$state})) {
|
---|
1417 | $state_static_browser_macro_chunk .= "<tr><td colspan=\"2\" valign=\"top\">No institutions for this state</td></tr>";
|
---|
1418 | }
|
---|
1419 |
|
---|
1420 | # Write out the normal macro chunk
|
---|
1421 | print BROWSER_MACROFILE "\n_cicstate" . $state_abbr . "_ {";
|
---|
1422 | print BROWSER_MACROFILE $state_static_browser_macro_chunk;
|
---|
1423 | print BROWSER_MACROFILE "}\n";
|
---|
1424 |
|
---|
1425 | # Write out a Javascript safe version
|
---|
1426 | print BROWSER_MACROFILE "\n_cicstate" . $state_abbr . "js_ {";
|
---|
1427 | print BROWSER_MACROFILE &javascript_safe($state_static_browser_macro_chunk);
|
---|
1428 | print BROWSER_MACROFILE "}\n";
|
---|
1429 | }
|
---|
1430 |
|
---|
1431 | close(BROWSER_MACROFILE);
|
---|
1432 | }
|
---|
1433 |
|
---|
1434 |
|
---|
1435 | sub javascript_safe
|
---|
1436 | {
|
---|
1437 | my $text = shift(@_);
|
---|
1438 | $text =~ s/(\r|\n)//g; # No newlines allowed
|
---|
1439 | $text =~ s/<\//<\\\\\//g;
|
---|
1440 | return $text;
|
---|
1441 | }
|
---|
1442 |
|
---|
1443 |
|
---|
1444 | 1;
|
---|