source: trunk/cic-hcap/perllib/plugins/CICPlug.pm@ 13091

Last change on this file since 13091 was 13091, checked in by mdewsnip, 18 years ago

Changes to the PlaceImagesHTML, PlaceConstructionDatesTableHTML and PlaceFunctionsTableHTML metadata for the new place page design.

  • Property svn:keywords set to Author Date Id Revision
File size: 63.0 KB
Line 
1###########################################################################
2#
3# CICPlug.pm
4#
5# Copyright (C) 2005 New Zealand Digital Library Project
6#
7# This program is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation; either version 2 of the License, or
10# (at your option) any later version.
11#
12# This program is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with this program; if not, write to the Free Software
19# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20#
21###########################################################################
22
23package CICPlug;
24
25
26use BasPlug;
27use DBI;
28use strict;
29no strict 'refs';
30
31
32sub BEGIN {
33 @CICPlug::ISA = ('BasPlug');
34}
35
36
37my $arguments =
38 [
39 { 'name' => "images_directory",
40 'type' => "string",
41 'deft' => "",
42 'reqd' => "yes" },
43 { 'name' => "cache_directory",
44 'type' => "string",
45 'deft' => &util::filename_cat($ENV{'GSDLHOME'}, "tmp"),
46 'reqd' => "no" },
47 { 'name' => "large_image_options",
48 'type' => "string",
49 'deft' => "",
50 'reqd' => "no" },
51 { 'name' => "large_image_type",
52 'type' => "string",
53 'deft' => "jpg",
54 'reqd' => "no" },
55 { 'name' => "large_image_width",
56 'type' => "string",
57 'deft' => "800",
58 'reqd' => "no" },
59 { 'name' => "medium_image_options",
60 'type' => "string",
61 'deft' => "",
62 'reqd' => "no" },
63 { 'name' => "medium_image_type",
64 'type' => "string",
65 'deft' => "jpg",
66 'reqd' => "no" },
67 { 'name' => "medium_image_width",
68 'type' => "string",
69 'deft' => "375",
70 'reqd' => "no" },
71 { 'name' => "small_image_options",
72 'type' => "string",
73 'deft' => "",
74 'reqd' => "no" },
75 { 'name' => "small_image_type",
76 'type' => "string",
77 'deft' => "jpg",
78 'reqd' => "no" },
79 { 'name' => "small_image_width",
80 'type' => "string",
81 'deft' => "125",
82 'reqd' => "no" }
83 ];
84
85my $options = { 'name' => "CICPlug",
86 'desc' => "{CICPlug.desc}",
87 'abstract' => "no",
88 'inherits' => "yes" };
89
90
91sub get_default_process_exp
92{
93 return q^(?i)\.mdb$^;
94}
95
96
97sub new
98{
99 my ($class) = shift (@_);
100 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
101 push(@$pluginlist, $class);
102
103 if (defined $arguments) { push(@{$hashArgOptLists->{"ArgList"}}, @{$arguments}); }
104 if (defined $options) { push(@{$hashArgOptLists->{"OptList"}}, $options); }
105
106 my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
107
108 return bless $self, $class;
109}
110
111
112my $state_abbr_to_name_mapping = {
113 "AL" => "Alabama",
114 "AK" => "Alaska",
115 "AZ" => "Arizona",
116 "AR" => "Arkansas",
117 "CA" => "California",
118 "CO" => "Colorado",
119 "CT" => "Connecticut",
120 "DC" => "District of Columbia",
121 "DE" => "Delaware",
122 "FL" => "Florida",
123 "GA" => "Georgia",
124 "HI" => "Hawaii",
125 "ID" => "Idaho",
126 "IL" => "Illinois",
127 "IN" => "Indiana",
128 "IA" => "Iowa",
129 "KS" => "Kansas",
130 "KY" => "Kentucky",
131 "LA" => "Louisiana",
132 "ME" => "Maine",
133 "MD" => "Maryland",
134 "MA" => "Massachusetts",
135 "MI" => "Michigan",
136 "MN" => "Minnesota",
137 "MS" => "Mississippi",
138 "MO" => "Missouri",
139 "MT" => "Montana",
140 "NE" => "Nebraska",
141 "NV" => "Nevada",
142 "NH" => "New Hampshire",
143 "NJ" => "New Jersey",
144 "NM" => "New Mexico",
145 "NY" => "New York",
146 "NC" => "North Carolina",
147 "ND" => "North Dakota",
148 "OH" => "Ohio",
149 "OK" => "Oklahoma",
150 "OR" => "Oregon",
151 "PA" => "Pennsylvania",
152 "RI" => "Rhode Island",
153 "SC" => "South Carolina",
154 "SD" => "South Dakota",
155 "TN" => "Tennessee",
156 "TX" => "Texas",
157 "UT" => "Utah",
158 "VT" => "Vermont",
159 "VA" => "Virginia",
160 "WA" => "Washington",
161 "WV" => "West Virginia",
162 "WI" => "Wisconsin",
163 "WY" => "Wyoming"
164 };
165
166my %state_name_to_abbr_mapping = reverse(%{$state_abbr_to_name_mapping});
167
168my $state_abbr_to_area_mapping = {
169 "AL" => "Southeast",
170 "AK" => "West",
171 "AZ" => "Southwest",
172 "AR" => "Southeast",
173 "CA" => "West",
174 "CO" => "Mountain",
175 "CT" => "Northeast",
176 "DC" => "Northeast",
177 "DE" => "Northeast",
178 "FL" => "Southeast",
179 "GA" => "Southeast",
180 "HI" => "West",
181 "ID" => "Mountain",
182 "IL" => "Midwest",
183 "IN" => "Midwest",
184 "IA" => "Midwest",
185 "KS" => "Midwest",
186 "KY" => "Southeast",
187 "LA" => "Southeast",
188 "ME" => "Northeast",
189 "MD" => "Northeast",
190 "MA" => "Northeast",
191 "MI" => "Midwest",
192 "MN" => "Midwest",
193 "MS" => "Southeast",
194 "MO" => "Midwest",
195 "MT" => "Mountain",
196 "NE" => "Midwest",
197 "NV" => "West",
198 "NH" => "Northeast",
199 "NJ" => "Northeast",
200 "NM" => "Southwest",
201 "NY" => "Northeast",
202 "NC" => "Southeast",
203 "ND" => "Midwest",
204 "OH" => "Midwest",
205 "OK" => "Southwest",
206 "OR" => "West",
207 "PA" => "Northeast",
208 "RI" => "Northeast",
209 "SC" => "Southeast",
210 "SD" => "Midwest",
211 "TN" => "Southeast",
212 "TX" => "Southwest",
213 "UT" => "Mountain",
214 "VT" => "Northeast",
215 "VA" => "Southeast",
216 "WA" => "West",
217 "WV" => "Southeast",
218 "WI" => "Midwest",
219 "WY" => "Mountain"
220 };
221
222
223my $place_type_id_to_name_mapping = {
224 "1" => "Individual building",
225 "2" => "Landscape site",
226 "3" => "Campus arrangement",
227 "4" => "Building group",
228};
229
230
231# This array must match the values in the tblArchTypes table
232my @place_styles_array = (
233 "American colonial",
234 "Federal",
235 "Greek revival",
236 "Italianate",
237 "Gothic revival",
238 "Romanesque revival",
239 "Victorian",
240 "Beaux-Arts classicism",
241 "Colonial revival",
242 "Mission/Mission revival",
243 "Modern/pre-WWII",
244 "Modern/post-WWII",
245 "Postmodern",
246 "Contemporary",
247 "Regionalist/Vernacular",
248 "Other"
249);
250
251
252my $place_functions_mapping = {
253 "academic department building" => "",
254 "administration" => "",
255 "admissions office" => "",
256 "alumni center" => "",
257 "arboretum" => "",
258 "archaeological site" => "",
259 "auditorium" => "",
260 "bell tower" => "",
261 "chapel" => "",
262 "classrooms" => "",
263 "debating society" => "",
264 "dining hall" => "",
265 "facility management building" => "",
266 "faculty offices" => "",
267 "gardens" => "",
268 "greek letter society" => "",
269 "gymnasium" => "",
270 "infirmary" => "",
271 "library" => "",
272 "master plan (campus)" => "",
273 "master plan (landscape)" => "",
274 "memorial site" => "",
275 "museum" => "",
276 "observatory" => "",
277 "old main" => "",
278 "outdoor space" => "",
279 "president's house" => "",
280 "private residence" => "",
281 "residence hall" => "",
282 "stadium" => "",
283 "student union" => "",
284 "theater" => "",
285 "other" => "",
286};
287
288
289my %designer_name_to_id_mapping;
290my %designer_name_to_place_ids_mapping;
291my $place_reference_id = 1;
292
293
294sub read
295{
296 my $self = shift (@_);
297 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
298
299 $self->{'filename'} = &util::filename_cat($base_dir, $file);
300 if ($self->{'filename'} !~ /$self->{'process_exp'}/ || !-f $self->{'filename'}) {
301 return undef;
302 }
303 $self->{'processor'} = $processor;
304 $self->{'gli'} = $gli;
305
306 # Open connection to Access database
307 my $dbh = DBI->connect('dbi:ODBC:CIC-HCAP');
308
309 $self->process_institutions($dbh);
310 $self->process_places($dbh);
311 $self->process_designers($dbh);
312
313 return 1;
314}
315
316
317sub process_institutions
318{
319 my $self = shift(@_);
320 my $dbh = shift(@_);
321 my $fail_log_handle = $self->{'failhandle'};
322
323 # Prepare SQL statement for getting everything from the Institution table
324 my $institution_sql_statement = "SELECT * FROM tblInstitution"; # WHERE Institution_ID<200";
325 my $institution_sql_handle = $dbh->prepare($institution_sql_statement);
326 $institution_sql_handle->{LongReadLen} = 65536;
327 $institution_sql_handle->execute() or die "Could not execute SQL statement.";
328
329 # Prepare SQL statement for getting the Institution places
330 my $institution_places_sql_statement = "SELECT Entry_ID,Current_name FROM tblPlace WHERE PlaceType>0 AND Institution_ID=?";
331 my $institution_places_sql_handle = $dbh->prepare($institution_places_sql_statement);
332 $institution_places_sql_handle->{LongReadLen} = 65536;
333
334 # Prepare SQL statement for getting the Institution best place image location
335 my $institution_best_place_image_location_sql_statement = "SELECT Location,Entry_ID FROM tblImages WHERE FileType=1 AND FileName=?";
336 my $institution_best_place_image_location_sql_handle = $dbh->prepare($institution_best_place_image_location_sql_statement);
337 $institution_best_place_image_location_sql_handle->{LongReadLen} = 65536;
338
339 # Prepare SQL statement for getting the Institution places images (only used to check if an institution has some images)
340 my $institution_places_images_sql_statement = "SELECT FileName FROM tblImages,tblPlace WHERE tblImages.FileType=1 AND tblImages.Entry_ID=tblPlace.Entry_ID AND tblPlace.Institution_ID=?";
341 my $institution_places_images_sql_handle = $dbh->prepare($institution_places_images_sql_statement);
342 $institution_places_images_sql_handle->{LongReadLen} = 65536;
343
344 # Prepare SQL statement for getting the Institution campus plans
345 my $institution_campus_plans_sql_statement = "SELECT * FROM tblCampusMaps WHERE Electronic=1 AND Institution_ID=?";
346 my $institution_campus_plans_sql_handle = $dbh->prepare($institution_campus_plans_sql_statement);
347 $institution_campus_plans_sql_handle->{LongReadLen} = 65536;
348
349 # Create a document object for each institution
350 my %institution_id_to_name_mapping;
351 my %institution_state_to_ids_mapping;
352 while (my $row_hashref = $institution_sql_handle->fetchrow_hashref) {
353 # Skip any institutions that didn't respond
354 next if !defined($row_hashref->{"City"});
355
356 my $institution_id = $row_hashref->{"Institution_ID"};
357 # print STDERR " Institution $institution_id\n";
358 my $institution_doc_obj = new doc($self->{'filename'} . "-", "indexed_doc");
359 $institution_doc_obj->set_OID("i$institution_id");
360 &new_metadata_entry($institution_doc_obj, "DocumentType", "Institution");
361
362 # For some reason the hyphen seems to be lost from the Zip field, so add it back in
363 my $institution_zip = $row_hashref->{"Zip"};
364 if ($institution_zip =~ /^(\d\d\d\d\d)(\d\d\d\d)$/) {
365 $row_hashref->{"Zip"} = $1 . "-" . $2;
366 }
367
368 # Fix up the links to the institution webpage
369 if ($row_hashref->{"Institution_webpage"} =~ /\#(.*?)\#/) {
370 $row_hashref->{"Institution_webpage"} = $1;
371 }
372
373 # Map state to full name
374 $row_hashref->{"State"} = $state_abbr_to_name_mapping->{$row_hashref->{"State"}};
375
376 # Get the places in this institution
377 my $institution_random_place_id;
378 my $institution_places_list_html = "";
379 $institution_places_sql_handle->execute($institution_id) or die "Could not execute SQL statement.";
380 while (my $institution_places_match_hashref = $institution_places_sql_handle->fetchrow_hashref) {
381 my $institution_place_id = $institution_places_match_hashref->{"Entry_ID"};
382 my $institution_place_name = $institution_places_match_hashref->{"Current_name"};
383 $institution_places_list_html .= "<a href=\"_gwcgi_?a=d&d=p$institution_place_id\">$institution_place_name</a><br />\n";
384 }
385 &new_metadata_entry($institution_doc_obj, "InstitutionPlacesListHTML", $institution_places_list_html);
386
387 # Get the best place image for this institution
388 my $institution_best_place_image_name = $row_hashref->{"Best_image"};
389 if (!defined($institution_best_place_image_name) || $institution_best_place_image_name eq "") {
390 # Some institutions have no electronic images, and thus have no best image
391 $institution_places_images_sql_handle->execute($institution_id) or die "Could not execute SQL statement.";
392 if (defined($institution_places_images_sql_handle->fetchrow_hashref())) {
393 print STDERR "<ProcessingError n='Institution $institution_id' p='CICPlug' r='No best image'>\n" if ($self->{'gli'});
394 print STDERR "Error: Institution $institution_id -- No best image.\n";
395 print $fail_log_handle "Error: Institution $institution_id -- No best image.\n";
396 $self->{'num_not_processed'}++;
397 next;
398 }
399 &new_metadata_entry($institution_doc_obj, "InstitutionBestPlaceImageHTML", "");
400 }
401 else {
402 # Get the file location of the best place image for this institution
403 $institution_best_place_image_location_sql_handle->execute($institution_best_place_image_name) or die "Could not execute SQL statement.";
404 my $institution_best_place_image_hashref = $institution_best_place_image_location_sql_handle->fetchrow_hashref();
405
406 my $institution_best_place_image_location = $institution_best_place_image_hashref->{"Location"};
407 if (!defined($institution_best_place_image_location) || $institution_best_place_image_location eq "") {
408 print STDERR "<ProcessingError n='Institution $institution_id' p='CICPlug' r='Could not match best image $institution_best_place_image_name to a file'>\n" if ($self->{'gli'});
409 print STDERR "Error: Institution $institution_id -- Could not match best image $institution_best_place_image_name to a file.\n";
410 print $fail_log_handle "Error: Institution $institution_id -- Could not match best image $institution_best_place_image_name to a file.\n";
411 $self->{'num_not_processed'}++;
412 next;
413 }
414
415 # PDFs are not allowed for institution best place images
416 if ($institution_best_place_image_location =~ /.pdf$/i) {
417 print STDERR "<ProcessingError n='Institution $institution_id' p='CICPlug' r='PDF not allowed for best image'>\n" if ($self->{'gli'});
418 print STDERR "Error: Institution $institution_id -- PDF not allowed for best image.\n";
419 print $fail_log_handle "Error: Institution $institution_id -- PDF not allowed for best image.\n";
420 $self->{'num_not_processed'}++;
421 next;
422 }
423
424 my $institution_best_place_id = $institution_best_place_image_hashref->{"Entry_ID"};
425 my $institution_best_place_image_medium_file_href = $self->generate_place_image_variant($institution_doc_obj, $institution_best_place_image_location, "medium");
426 &new_metadata_entry($institution_doc_obj, "InstitutionBestPlaceImageHTML", "<a href=\"_gwcgi_?a=d&d=p$institution_best_place_id\"><img alt=\"$institution_best_place_image_name\" src=\"$institution_best_place_image_medium_file_href\"/><br />$institution_best_place_image_name</a>");
427 }
428
429 # Get institution campus plans
430 my $institution_campus_plans_list_html = "";
431 $institution_campus_plans_sql_handle->execute($institution_id) or die "Could not execute SQL statement.";
432 while (my $institution_campus_plans_match_hashref = $institution_campus_plans_sql_handle->fetchrow_hashref) {
433 my $institution_campus_plan_name = $institution_campus_plans_match_hashref->{"NameAndFormat"};
434 my $institution_campus_plan_image_location = $institution_campus_plans_match_hashref->{"Location_electronic"};
435
436 # Deal with PDF files separately: don't convert, just associate
437 if ($institution_campus_plan_image_location =~ /\.pdf$/i) {
438 # Convert the server location of the file into the local location of the file
439 my $institution_campus_plan_pdf_file_path = $institution_campus_plan_image_location;
440 $institution_campus_plan_pdf_file_path =~ s/^[A-Z]:/$self->{'images_directory'}/;
441
442 if (-f $institution_campus_plan_pdf_file_path) {
443 my $institution_campus_plan_pdf_file_name = $institution_campus_plan_name . ".pdf";
444 $institution_campus_plan_pdf_file_name =~ s/ /%20/g;
445 my $institution_campus_plan_pdf_file_href = "_httpcollection_/index/assoc/[assocfilepath]/$institution_campus_plan_pdf_file_name";
446 $institution_campus_plans_list_html .= "<a href=\"$institution_campus_plan_pdf_file_href\">$institution_campus_plan_name (PDF)</a><br />";
447
448 $institution_doc_obj->associate_file($institution_campus_plan_pdf_file_path, $institution_campus_plan_name . ".pdf", undef, $institution_doc_obj->get_top_section());
449 }
450 else {
451 print STDERR "<ProcessingError n='$institution_campus_plan_pdf_file_path' p='CICPlug' r='Does not exist'>\n" if ($self->{'gli'});
452 print STDERR "Error: File $institution_campus_plan_pdf_file_path does not exist.\n";
453 print $fail_log_handle "Error: File $institution_campus_plan_pdf_file_path does not exist.\n";
454 }
455 }
456 else {
457 my $institution_campus_plan_image_large_file_href = $self->generate_place_image_variant($institution_doc_obj, $institution_campus_plan_image_location, "large");
458
459 # Create a new section for each institution campus plan image
460 my $institution_campus_plan_image_section = $institution_doc_obj->insert_section($institution_doc_obj->get_end_child($institution_doc_obj->get_top_section()));
461 $institution_doc_obj->add_utf8_text($institution_campus_plan_image_section, "_"); # This is necessary
462 $institution_doc_obj->add_utf8_metadata($institution_campus_plan_image_section, "DocumentType", "Image");
463 $institution_doc_obj->add_utf8_metadata($institution_campus_plan_image_section, "Title", $institution_campus_plan_name);
464 $institution_doc_obj->add_utf8_metadata($institution_campus_plan_image_section, "ImagePath", $institution_campus_plan_image_large_file_href);
465
466 $institution_campus_plans_list_html .= "<a href=\"_gwcgi_?a=d&d=i$institution_id.$institution_campus_plan_image_section\">$institution_campus_plan_name</a><br />";
467 }
468 }
469 &new_metadata_entry($institution_doc_obj, "InstitutionCampusPlansListHTML", $institution_campus_plans_list_html);
470
471 # Add each field from the table as metadata
472 foreach my $key (keys(%$row_hashref)) {
473 my $value = $row_hashref->{$key};
474 if (defined($value)) {
475 &new_metadata_entry($institution_doc_obj, $key, $value);
476 }
477 }
478
479 $institution_doc_obj->add_utf8_text($institution_doc_obj->get_top_section(), "Some dummy text.");
480 $self->{'processor'}->process($institution_doc_obj);
481 $self->{'num_processed'}++;
482
483 # Build mappings for creating the static macrofiles
484 my $institution_name = $row_hashref->{"Institution_Name"};
485 $institution_id_to_name_mapping{$institution_doc_obj->get_OID()} = $institution_name;
486 my $institution_state = $row_hashref->{"State"};
487 push(@{$institution_state_to_ids_mapping{$institution_state}}, $institution_doc_obj->get_OID());
488 }
489
490 # Write the institutions.dm macrofile
491 &write_static_browser_macrofile("institutions", \%institution_id_to_name_mapping);
492
493 # Write the states.dm macrofile
494 &write_state_browser_macrofile("states", \%institution_state_to_ids_mapping, \%institution_id_to_name_mapping);
495}
496
497
498sub process_places
499{
500 my $self = shift(@_);
501 my $dbh = shift(@_);
502 my $fail_log_handle = $self->{'failhandle'};
503
504 # Prepare SQL statement for getting everything from the Place table
505 my $place_sql_statement = "SELECT * FROM tblPlace"; # WHERE Entry_ID<100";
506 my $place_sql_handle = $dbh->prepare($place_sql_statement);
507 $place_sql_handle->{LongReadLen} = 65536;
508 $place_sql_handle->execute() or die "Could not execute SQL statement.";
509
510 # Prepare SQL statement for getting the Place institution
511 my $place_institution_sql_statement = "SELECT Institution_Name FROM tblInstitution,tblPlace WHERE tblInstitution.Institution_ID=tblPlace.Institution_ID and tblPlace.Entry_ID=?";
512 my $place_institution_sql_handle = $dbh->prepare($place_institution_sql_statement);
513
514 # Prepare SQL statement for getting the Place construction dates
515 my $place_construction_dates_sql_statement = "SELECT Prefix,Date,Note,Architect_Name FROM tblConstruction_and_Dates WHERE Entry_ID=?";
516 my $place_construction_dates_sql_handle = $dbh->prepare($place_construction_dates_sql_statement);
517
518 # Prepare SQL statement for getting the Place images
519 my $place_images_sql_statement = "SELECT FileName,Location FROM tblImages WHERE FileType=1 AND Entry_ID=? ORDER BY Image_Order";
520 my $place_images_sql_handle = $dbh->prepare($place_images_sql_statement);
521 $place_images_sql_handle->{LongReadLen} = 65536;
522
523 # Prepare SQL statement for getting the Place materials
524 my $place_materials_sql_statement = "SELECT * FROM tblDescription_building WHERE Entry_ID=?";
525 my $place_materials_sql_handle = $dbh->prepare($place_materials_sql_statement);
526 $place_materials_sql_handle->{LongReadLen} = 65536;
527
528 # Prepare SQL statement for getting the Place building styles
529 my $place_styles_sql_statement = "SELECT ArchType_ID FROM ArchPlace WHERE Entry_ID=?";
530 my $place_styles_sql_handle = $dbh->prepare($place_styles_sql_statement);
531 $place_styles_sql_handle->{LongReadLen} = 65536;
532
533 # Prepare SQL statement for getting the Place functions
534 my $place_functions_sql_statement = "SELECT Function,Year,Prefix FROM tblFunction_and_dates WHERE Entry_ID=?";
535 my $place_functions_sql_handle = $dbh->prepare($place_functions_sql_statement);
536 $place_functions_sql_handle->{LongReadLen} = 65536;
537
538 # Prepare SQL statement for getting the Place significance
539 my $place_significance_sql_statement = "SELECT SigType FROM tblSigTypes,SigPlace WHERE tblSigTypes.SigTypes_ID=SigPlace.SigType_ID+1 AND SigPlace.Entry_ID=?";
540 my $place_significance_sql_handle = $dbh->prepare($place_significance_sql_statement);
541 $place_significance_sql_handle->{LongReadLen} = 65536;
542
543 # Prepare SQL statement for getting the Place references
544 my $place_references_sql_statement = "SELECT Bibliography FROM tblReferences WHERE Entry_ID=?";
545 my $place_references_sql_handle = $dbh->prepare($place_references_sql_statement);
546 $place_references_sql_handle->{LongReadLen} = 65536;
547
548 # Prepare SQL statement for getting the Place designations
549 my $place_designations_sql_statement = "SELECT National_Register,Federal_Agency,HABS,HAER,Local_Designation FROM tblReferences WHERE Entry_ID=?";
550 my $place_designations_sql_handle = $dbh->prepare($place_designations_sql_statement);
551 $place_designations_sql_handle->{LongReadLen} = 65536;
552
553 # Prepare SQL statement for getting the Place narrative
554 my $place_narrative_sql_statement = "SELECT Narrative FROM tblSignificance_Narrative WHERE Entry_ID=?";
555 my $place_narrative_sql_handle = $dbh->prepare($place_narrative_sql_statement);
556 $place_narrative_sql_handle->{LongReadLen} = 65536;
557
558 # Prepare SQL statement for getting the Place state
559 my $place_state_sql_statement = "SELECT State FROM tblInstitution,tblPlace WHERE tblInstitution.Institution_ID=tblPlace.Institution_ID AND Entry_ID=?";
560 my $place_state_sql_handle = $dbh->prepare($place_state_sql_statement);
561 $place_state_sql_handle->{LongReadLen} = 65536;
562
563 # Create a document object for each place
564 my %place_type_to_ids_mapping;
565 my %place_style_to_ids_mapping;
566 my %place_date_to_ids_mapping;
567 my %place_function_to_ids_mapping;
568 my %place_id_to_name_mapping;
569 my %place_id_to_institution_name_mapping;
570 while (my $row_hashref = $place_sql_handle->fetchrow_hashref) {
571 my $place_id = $row_hashref->{"Entry_ID"};
572 # print STDERR " Place $place_id\n";
573 my $place_doc_obj = new doc($self->{'filename'} . "-", "indexed_doc");
574 $place_doc_obj->set_OID("p$place_id");
575 &new_metadata_entry($place_doc_obj, "DocumentType", "Place");
576
577 # Convert the place type ID into a name
578 $row_hashref->{"PlaceType"} = $place_type_id_to_name_mapping->{$row_hashref->{"PlaceType"}};
579
580 # Add each field from the table as metadata
581 foreach my $key (keys(%$row_hashref)) {
582 my $value = $row_hashref->{$key};
583 if (defined($value)) {
584 &new_metadata_entry($place_doc_obj, $key, $value);
585 }
586 }
587
588 # Get place name
589 my $place_name = $row_hashref->{"Current_name"};
590 if (!defined($place_name)) {
591 print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Missing place name'>\n" if ($self->{'gli'});
592 print STDERR "Error: Place $place_id -- Missing place name.\n";
593 print $fail_log_handle "Error: Place $place_id -- Missing place name.\n";
594 $self->{'num_not_processed'}++;
595 next;
596 }
597
598 # Get place type
599 my $place_type = $row_hashref->{"PlaceType"};
600 if (!defined($place_type)) {
601 print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Missing place type'>\n" if ($self->{'gli'});
602 print STDERR "Error: Place $place_id -- Missing place type.\n";
603 print $fail_log_handle "Error: Place $place_id -- Missing place type.\n";
604 $self->{'num_not_processed'}++;
605 next;
606 }
607
608 # Create place styles mapping
609 $place_styles_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
610 while (my $place_styles_match_hashref = $place_styles_sql_handle->fetchrow_hashref()) {
611 # The ArchType_ID is actually an index into the tblArchType table, NOT a link
612 my $place_style_index = $place_styles_match_hashref->{"ArchType_ID"};
613 my $place_style = $place_styles_array[$place_style_index];
614 push(@{$place_style_to_ids_mapping{$place_style}}, $place_doc_obj->get_OID());
615 &new_metadata_entry($place_doc_obj, "Style", $place_style);
616 }
617
618 # Get place institution
619 $place_institution_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
620 my $place_institution = $place_institution_sql_handle->fetchrow();
621 &new_metadata_entry($place_doc_obj, "Institution_name", $place_institution);
622 $place_id_to_institution_name_mapping{$place_doc_obj->get_OID()} = ", " . $place_institution;
623
624 # Get place state and area (for searching)
625 $place_state_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
626 my $place_state_abbr = $place_state_sql_handle->fetchrow();
627 &new_metadata_entry($place_doc_obj, "State", $place_state_abbr . " " . $state_abbr_to_name_mapping->{$place_state_abbr} . " " . $state_abbr_to_area_mapping->{$place_state_abbr});
628
629 # Get place construction dates
630 my $place_construction_dates_table_html = "";
631 $place_construction_dates_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
632 while (my $place_construction_dates_match_hashref = $place_construction_dates_sql_handle->fetchrow_hashref()) {
633 my $place_construction_date = $place_construction_dates_match_hashref->{"Date"};
634 if (!defined($place_construction_date)) {
635 # Landscape sites are allowed to have no construction information
636 next if ($place_type eq "Landscape site");
637
638 print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Missing construction date'>\n" if ($self->{'gli'});
639 print STDERR "Error: Place $place_id -- Missing construction date.\n";
640 print $fail_log_handle "Error: Place $place_id -- Missing construction date.\n";
641 next;
642 }
643
644 # Convert the construction date to a time period (for searching and browsing)
645 if ($place_construction_date =~ /^(\d{1,4}).*$/) {
646 my $place_construction_year = $1;
647
648 my $place_time_period;
649 if ($place_construction_year < 1800) { $place_time_period = "pre-1800"; }
650 elsif ($place_construction_year < 1850) { $place_time_period = "1800-1850"; }
651 elsif ($place_construction_year < 1900) { $place_time_period = "1850-1900"; }
652 elsif ($place_construction_year < 1945) { $place_time_period = "1900-1945"; }
653 elsif ($place_construction_year <= 1995) { $place_time_period = "1945-1995"; }
654 elsif ($place_construction_year > 1995) { $place_time_period = "post-1995"; }
655 push(@{$place_date_to_ids_mapping{$place_time_period}}, $place_doc_obj->get_OID());
656 &new_metadata_entry($place_doc_obj, "Time_period", $place_time_period);
657 }
658 elsif ($place_construction_date ne "n.d." && $place_construction_date ne "unknown" && $place_construction_date ne "various") {
659 print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Warning: Unknown construction date: $place_construction_date'>\n" if ($self->{'gli'});
660 print STDERR "Warning: Place $place_id -- Unknown construction date: $place_construction_date.\n";
661 print $fail_log_handle "Warning: Place $place_id -- Unknown construction date: $place_construction_date.\n";
662 }
663
664 my $place_construction_note = $place_construction_dates_match_hashref->{"Note"};
665 if (!defined($place_construction_note)) {
666 # "No date" entries are allowed to have no construction note
667 next if ($place_construction_date eq "n.d");
668
669 print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Missing construction note'>\n" if ($self->{'gli'});
670 print STDERR "Error: Place $place_id -- Missing construction note.\n";
671 print $fail_log_handle "Error: Place $place_id -- Missing construction note.\n";
672 next;
673 }
674
675 # Get the architects for this construction and remember them for later for the designer objects
676 my $place_construction_architect = $place_construction_dates_match_hashref->{"Architect_Name"} || "";
677 my $place_construction_architect_links = "";
678 foreach my $designer_name (split(/;/, $place_construction_architect)) {
679 $designer_name =~ s/\(.*?\)//g;
680 $designer_name =~ s/^\s*//;
681 $designer_name =~ s/\s*$//;
682
683 my $designer_id = $designer_name_to_id_mapping{$designer_name};
684 if (!defined($designer_id)) {
685 $designer_id = scalar(keys(%designer_name_to_id_mapping)) + 1;
686 $designer_name_to_id_mapping{$designer_name} = $designer_id;
687 }
688 $place_construction_architect_links .= "<a href=\"_gwcgi_?a=d&d=d$designer_id\">$designer_name</a> ";
689 push(@{$designer_name_to_place_ids_mapping{$designer_name}}, $place_id);
690 }
691
692 my $place_construction_date_prefix = $place_construction_dates_match_hashref->{"Prefix"} || "";
693 $place_construction_dates_table_html .= "<tr><td class=\"cicplaceconstructiondatetd\" valign=\"top\">$place_construction_date_prefix $place_construction_date</td><td valign=\"top\">$place_construction_note $place_construction_architect_links</td></tr>";
694 }
695 &new_metadata_entry($place_doc_obj, "PlaceConstructionDatesTableHTML", "<table id=\"cicplaceconstructiondatestable\" cellpadding=\"0\" cellspacing=\"0\">" . $place_construction_dates_table_html . "</table>");
696
697 # Get place materials (individual buildings only)
698 if ($row_hashref->{"PlaceType"} eq "Individual building") {
699 $place_materials_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
700 my $place_materials_match_hashref = $place_materials_sql_handle->fetchrow_hashref();
701 &new_metadata_entry($place_doc_obj, "MaterialFoundation", $place_materials_match_hashref->{"foundation"} || "");
702 &new_metadata_entry($place_doc_obj, "MaterialRoof", $place_materials_match_hashref->{"roof"} || "");
703 &new_metadata_entry($place_doc_obj, "MaterialWalls", $place_materials_match_hashref->{"walls"} || "");
704
705 &new_metadata_entry($place_doc_obj, "Materials", $place_materials_match_hashref->{"foundation"} || "");
706 &new_metadata_entry($place_doc_obj, "Materials", $place_materials_match_hashref->{"roof"} || "");
707 &new_metadata_entry($place_doc_obj, "Materials", $place_materials_match_hashref->{"walls"} || "");
708 }
709
710 # Get place functions
711 my $place_functions = "";
712 my $place_functions_table_html = "";
713 $place_functions_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
714 while (my $place_functions_match_hashref = $place_functions_sql_handle->fetchrow_hashref()) {
715 my $place_function = $place_functions_match_hashref->{"Function"};
716 if (!defined($place_function)) {
717 print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Missing function'>\n" if ($self->{'gli'});
718 print STDERR "Error: Place $place_id -- Missing function.\n";
719 print $fail_log_handle "Error: Place $place_id -- Missing function.\n";
720 next;
721 }
722 # Check for multiline values (these are errors)
723 if ($place_function =~ /\n/) {
724 print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Multiline function'>\n" if ($self->{'gli'});
725 print STDERR "Error: Place $place_id -- Multiline function.\n";
726 print $fail_log_handle "Error: Place $place_id -- Multiline function.\n";
727 next;
728 }
729 my $place_year = $place_functions_match_hashref->{"Year"};
730 if (!defined($place_year)) {
731 print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Missing function year'>\n" if ($self->{'gli'});
732 print STDERR "Error: Place $place_id -- Missing function year.\n";
733 print $fail_log_handle "Error: Place $place_id -- Missing function year.\n";
734 next;
735 }
736 my $place_year_prefix = $place_functions_match_hashref->{"Prefix"} || "";
737 $place_functions_table_html .= "<tr><td class=\"cicplacefunctionyeartd\" valign=\"top\">$place_year_prefix $place_year</td><td valign=\"top\">$place_function</td></tr>";
738
739 # Prepare function metadata for browsing and searching
740 my $place_function_to_index = lc($place_function); # Casefold
741 $place_function_to_index =~ s/^\s*//; # Remove whitespace from the start
742 if ($place_function_to_index =~ /^master plan/) {
743 $place_function_to_index =~ s/ \(campus,.*/ \(campus\)/;
744 $place_function_to_index =~ s/ \(campus:.*/ \(campus\)/;
745 $place_function_to_index =~ s/ \(landscape,.*/ \(landscape\)/;
746 }
747 else {
748 $place_function_to_index =~ s/\(.*\)//g; # Remove anything in parentheses
749 }
750 $place_function_to_index =~ s/\s*$//; # Remove whitespace from the end
751
752 # Deal with common plural cases
753 $place_function_to_index =~ s/classroom$/classrooms/;
754 $place_function_to_index =~ s/department buildings$/department building/;
755 $place_function_to_index =~ s/faculty office$/faculty offices/;
756 $place_function_to_index =~ s/garden$/gardens/;
757 $place_function_to_index =~ s/residence halls$/residence hall/;
758 $place_function_to_index =~ s/private residences$/private residence/;
759
760 # Check it is one of the valid function values
761 if (!defined($place_functions_mapping->{$place_function_to_index})) {
762 print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Warning: Unknown function: $place_function_to_index'>\n" if ($self->{'gli'});
763 print STDERR "Warning: Place $place_id -- Unknown function: $place_function_to_index.\n";
764 print $fail_log_handle "Warning: Place $place_id -- Unknown function: $place_function_to_index.\n";
765 next;
766 }
767 push(@{$place_function_to_ids_mapping{$place_function_to_index}}, $place_doc_obj->get_OID());
768 $place_functions .= "$place_function_to_index ";
769 }
770 &new_metadata_entry($place_doc_obj, "Functions", $place_functions);
771 &new_metadata_entry($place_doc_obj, "PlaceFunctionsTableHTML", "<table id=\"cicplacefunctionstable\" cellpadding=\"0\" cellspacing=\"0\">" . $place_functions_table_html . "</table>");
772
773 # Get place significance
774 $place_significance_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
775 while (my $place_significance_match_hashref = $place_significance_sql_handle->fetchrow_hashref()) {
776 my $place_significance = $place_significance_match_hashref->{"SigType"};
777 &new_metadata_entry($place_doc_obj, "Significance", lc($place_significance));
778 }
779
780 # Get place references
781 $place_references_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
782 my $place_references = $place_references_sql_handle->fetchrow();
783 if (defined($place_references)) {
784 $self->process_place_references($place_doc_obj, $place_id, $place_name, $place_institution, $place_references);
785 }
786
787 # Get place designations
788 $place_designations_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
789 my $place_designations_match_hashref = $place_designations_sql_handle->fetchrow_hashref();
790 if ($place_designations_match_hashref->{"National_Register"} eq "1") {
791 &new_metadata_entry($place_doc_obj, "Designation", "National Register");
792 }
793 if ($place_designations_match_hashref->{"Federal_Agency"} eq "1") {
794 &new_metadata_entry($place_doc_obj, "Designation", "National Historic Landmark");
795 }
796 if ($place_designations_match_hashref->{"HABS"} eq "1" || $place_designations_match_hashref->{"HAER"} eq "1") {
797 &new_metadata_entry($place_doc_obj, "Designation", "HABS/HAER");
798 }
799 if ($place_designations_match_hashref->{"Local_Designation"} eq "1") {
800 &new_metadata_entry($place_doc_obj, "Designation", "Local/State");
801 }
802
803 # Get place narrative
804 $place_narrative_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
805 my $place_narrative = $place_narrative_sql_handle->fetchrow();
806 if (defined($place_narrative)) {
807 my $place_narrative_html = &rtf_to_html($place_narrative);
808 $place_narrative_html =~ s/(<br \/>(\s|\n)*)*$//; # Remove any trailing <br /> tags
809 &new_metadata_entry($place_doc_obj, "PlaceNarrativeHTML", $place_narrative_html);
810 }
811
812 # Get place images
813 my $place_images_html = "";
814 $place_images_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
815 while (my $place_images_match_hashref = $place_images_sql_handle->fetchrow_hashref) {
816 my $place_image_location = $place_images_match_hashref->{"Location"};
817 my $place_image_name = $place_images_match_hashref->{"FileName"};
818
819 # Deal with PDF files separately: don't convert, just associate
820 if ($place_image_location =~ /\.pdf$/i) {
821 # Convert the server location of the PDF file into the local location of the file
822 my $place_pdf_file_path = $place_image_location;
823 $place_pdf_file_path =~ s/^[A-Z]:/$self->{'images_directory'}/;
824
825 if (-f $place_pdf_file_path) {
826 my ($place_pdf_file_name) = ($place_pdf_file_path =~ /^.+\\(.+)$/);
827 $place_doc_obj->associate_file($place_pdf_file_path, $place_pdf_file_name, undef, $place_doc_obj->get_top_section());
828
829 $place_pdf_file_name =~ s/ /%20/g;
830 my $place_pdf_file_href = "_httpcollection_/index/assoc/[assocfilepath]/$place_pdf_file_name";
831 $place_images_html .= "<div class=\"cicplacepdf\"><a href=\"$place_pdf_file_href\">_iconpdf_</a><br /><a class=\"ciccaption\" href=\"$place_pdf_file_href\">$place_image_name (PDF)</a></div>\n";
832 }
833 else {
834 print STDERR "<ProcessingError n='$place_pdf_file_path' p='CICPlug' r='Does not exist'>\n" if ($self->{'gli'});
835 print STDERR "Error: File $place_pdf_file_path does not exist.\n";
836 print $fail_log_handle "Error: File $place_pdf_file_path does not exist.\n";
837 }
838 }
839 else {
840 my $place_image_small_file_href = $self->generate_place_image_variant($place_doc_obj, $place_image_location, "small");
841 my $place_image_large_file_href = $self->generate_place_image_variant($place_doc_obj, $place_image_location, "large");
842
843 # Create a new section for each place image
844 my $place_image_section = $place_doc_obj->insert_section($place_doc_obj->get_end_child($place_doc_obj->get_top_section()));
845 $place_doc_obj->add_utf8_text($place_image_section, "_"); # This is necessary
846 $place_doc_obj->add_utf8_metadata($place_image_section, "DocumentType", "Image");
847 $place_doc_obj->add_utf8_metadata($place_image_section, "Title", $place_image_name);
848 $place_doc_obj->add_utf8_metadata($place_image_section, "ImagePath", $place_image_large_file_href);
849
850 $place_images_html .= "<div class=\"cicplaceimage\"><a href=\"_gwcgi_?a=d&d=p$place_id.$place_image_section\"><img alt=\"$place_image_name\" src=\"$place_image_small_file_href\"/></a><br /><a class=\"ciccaption\" href=\"_gwcgi_?a=d&d=p$place_id.$place_image_section\">$place_image_name</a></div>\n";
851 }
852 }
853
854 &new_metadata_entry($place_doc_obj, "PlaceImagesHTML", $place_images_html);
855
856 $place_doc_obj->add_utf8_text($place_doc_obj->get_top_section(), "Some dummy text.");
857 $self->{'processor'}->process($place_doc_obj);
858 $self->{'num_processed'}++;
859
860 # Build mappings for creating the static macrofiles
861 $place_id_to_name_mapping{$place_doc_obj->get_OID()} = $place_name;
862 push(@{$place_type_to_ids_mapping{$place_type}}, $place_doc_obj->get_OID());
863 }
864
865 &write_bilevel_static_browser_macrofile("types", \%place_type_to_ids_mapping, \%place_id_to_name_mapping, \%place_id_to_institution_name_mapping);
866 &write_bilevel_static_browser_macrofile("styles", \%place_style_to_ids_mapping, \%place_id_to_name_mapping, \%place_id_to_institution_name_mapping);
867 &write_bilevel_static_browser_macrofile("dates", \%place_date_to_ids_mapping, \%place_id_to_name_mapping, \%place_id_to_institution_name_mapping);
868 &write_bilevel_static_browser_macrofile("functions", \%place_function_to_ids_mapping, \%place_id_to_name_mapping, \%place_id_to_institution_name_mapping);
869}
870
871
872sub process_place_references
873{
874 my $self = shift(@_);
875 my $place_doc_obj = shift(@_);
876 my $place_id = shift(@_);
877 my $place_name = shift(@_);
878 my $place_institution_name = shift(@_);
879 my $place_references_rtf_string = shift(@_);
880 my $fail_log_handle = $self->{'failhandle'};
881
882 # Convert the place references from RTF to HTML
883 my $place_references_html_raw = &rtf_to_html($place_references_rtf_string);
884 if ($place_references_html_raw =~ / (http|www\.)/) {
885 print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Warning: Found possible web address in references without hyperlink tag.'>\n" if ($self->{'gli'});
886 print STDERR "Warning: Place $place_id -- Found possible web address in references without hyperlink tag.\n";
887 print $fail_log_handle "Warning: Place $place_id -- Found possible web address in references without hyperlink tag.\n";
888 }
889
890 # Split the references and try to parse title and author
891 my $place_references_html = "";
892 $place_references_html_raw =~ s/(\r|\n)//g; # Remove all newlines
893 $place_references_html_raw =~ s/<br \/><i><br \/>/<br \/><br \/><i>/g; # Move italic tags
894 my @place_references = split(/<br \/>\s*<br \/>/, $place_references_html_raw);
895 foreach my $place_reference (@place_references) {
896 $place_reference =~ s/^(<br \/>\s*)*//;
897 $place_reference =~ s/(<br \/>\s*)*$//;
898 next if ($place_reference !~ /\w/);
899 $place_references_html .= "<p class=\"cicreference\">" . $place_reference . "</p>\n";
900
901 my $place_reference_author = "";
902 my $place_reference_title = "";
903
904 # Case 0: A magic word in the first sentence
905 my $place_reference_first_sentence = $place_reference;
906 if ($place_reference =~ /^(.*?)\./) {
907 $place_reference_first_sentence = $1;
908 }
909 if ($place_reference_first_sentence =~ /\b(collection|collections|papers|archives|database|letter|memo|inventory|photographs|minutes|reports)\b/i) {
910 # Don't bother trying to parse the reference
911 }
912 # Case 1: Author (possibly empty), then title in italics or quotes
913 elsif ($place_reference =~ /^(.*?)<i>(.*?)<\/i>/ || $place_reference =~ /^(.*)"(.*?)"/) {
914 $place_reference_author = $1;
915 $place_reference_title = $2;
916 }
917 # Case 2: Zero or one fullstops, assume no author and title is complete text
918 elsif ($place_reference =~ /^[^\.]*\.[^\.]*$/ || $place_reference !~ /\./) {
919 $place_reference_title = $place_reference;
920 }
921 else {
922 print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Could not parse reference: $place_reference'>\n" if ($self->{'gli'});
923 # print STDERR "Warning: Place $place_id -- Could not parse reference: $place_reference\n";
924 print $fail_log_handle "Warning: Place $place_id -- Could not parse reference: $place_reference\n";
925 next;
926 }
927
928 # Create a new Reference document for this place reference
929 my $place_reference_doc_obj = new doc($self->{'file'} . "-", "indexed_doc");
930 $place_reference_doc_obj->set_OID("pr$place_reference_id");
931 &new_metadata_entry($place_reference_doc_obj, "DocumentType", "PlaceReference");
932 &new_metadata_entry($place_reference_doc_obj, "PlaceID", $place_id);
933 &new_metadata_entry($place_reference_doc_obj, "PlaceName", $place_name);
934 &new_metadata_entry($place_reference_doc_obj, "PlaceInstitutionName", $place_institution_name);
935 &new_metadata_entry($place_reference_doc_obj, "Reference", $place_reference);
936 &new_metadata_entry($place_reference_doc_obj, "ReferenceAuthor", $place_reference_author);
937 &new_metadata_entry($place_reference_doc_obj, "ReferenceTitle", $place_reference_title);
938
939 $place_reference_doc_obj->add_utf8_text($place_reference_doc_obj->get_top_section(), "Some dummy text.");
940 $self->{'processor'}->process($place_reference_doc_obj);
941 $self->{'num_processed'}++;
942 $place_reference_id++;
943 }
944
945 &new_metadata_entry($place_doc_obj, "PlaceReferencesHTML", $place_references_html);
946}
947
948
949sub process_designers
950{
951 my $self = shift(@_);
952 my $dbh = shift(@_);
953 my $fail_log_handle = $self->{'failhandle'};
954
955 # Prepare SQL statement for getting the Place name
956 my $place_name_sql_statement = "SELECT Current_name FROM tblPlace WHERE Entry_ID=?";
957 my $place_name_sql_handle = $dbh->prepare($place_name_sql_statement);
958
959 # Prepare SQL statement for getting the Place institution
960 my $place_institution_sql_statement = "SELECT Institution_Name FROM tblInstitution,tblPlace WHERE tblInstitution.Institution_ID=tblPlace.Institution_ID and tblPlace.Entry_ID=?";
961 my $place_institution_sql_handle = $dbh->prepare($place_institution_sql_statement);
962
963 # Prepare SQL statement for getting the Place "date of construction"
964 my $place_construction_date_sql_statement = "SELECT Date FROM tblConstruction_and_Dates WHERE Entry_ID=?";
965 my $place_construction_date_sql_handle = $dbh->prepare($place_construction_date_sql_statement);
966
967 # Prepare SQL statement for getting the Place non-PDF images
968 my $place_images_sql_statement = "SELECT Location FROM tblImages WHERE FileType=1 AND Location NOT LIKE '%.pdf' AND Entry_ID=? ORDER BY Image_Order";
969 my $place_images_sql_handle = $dbh->prepare($place_images_sql_statement);
970 $place_images_sql_handle->{LongReadLen} = 65536;
971
972 # Create a document object for each designer
973 my %designer_id_to_name_mapping;
974 foreach my $designer_name (keys %designer_name_to_id_mapping) {
975 my $designer_id = $designer_name_to_id_mapping{$designer_name};
976 # print STDERR " Designer $designer_id\n";
977 my $designer_doc_obj = new doc($self->{'filename'} . "-", "indexed_doc");
978 $designer_doc_obj->set_OID("d$designer_id");
979 &new_metadata_entry($designer_doc_obj, "DocumentType", "Designer");
980
981 &new_metadata_entry($designer_doc_obj, "Designer_name", $designer_name);
982
983 # Get designer places
984 my $designer_places_list_html = "";
985 my $last_designer_place_id = "";
986 foreach my $designer_place_id (sort(@{$designer_name_to_place_ids_mapping{$designer_name}})) {
987 # The designer may have worked on a place multiple times, so check for this
988 next if ($designer_place_id eq $last_designer_place_id);
989 $last_designer_place_id = $designer_place_id;
990
991 # Get place name
992 $place_name_sql_handle->execute($designer_place_id) or die "Could not execute SQL statement.";
993 my $designer_place_name = $place_name_sql_handle->fetchrow();
994
995 # Get place institution name
996 $place_institution_sql_handle->execute($designer_place_id) or die "Could not execute SQL statement.";
997 my $designer_place_institution_name = $place_institution_sql_handle->fetchrow();
998
999 # Get place date of construction
1000 $place_construction_date_sql_handle->execute($designer_place_id) or die "Could not execute SQL statement.";
1001 my $designer_place_construction_date_value = $place_construction_date_sql_handle->fetchrow() || "";
1002
1003 # Get the first non-PDF image for this place
1004 my $designer_place_image_small_file_href;
1005 $place_images_sql_handle->execute($designer_place_id) or die "Could not execute SQL statement.";
1006 my $designer_place_image_location = $place_images_sql_handle->fetchrow();
1007 if (defined($designer_place_image_location)) {
1008 $designer_place_image_small_file_href = $self->generate_place_image_variant($designer_doc_obj, $designer_place_image_location, "small");
1009 }
1010 else {
1011 # There is no non-PDF image for this place
1012 $designer_place_image_small_file_href = "_httpcollection_/images/no_image-small.jpg";
1013 }
1014
1015 $designer_places_list_html .= "<tr><td valign=\"top\"><a href=\"_gwcgi_?a=d&d=p$designer_place_id\"><img src=\"$designer_place_image_small_file_href\"/></a></td><td valign=\"top\"><a href=\"_gwcgi_?a=d&d=p$designer_place_id\">$designer_place_name</a>, $designer_place_institution_name<br /><b>Date of construction:</b> $designer_place_construction_date_value</td></tr>\n";
1016 }
1017
1018 &new_metadata_entry($designer_doc_obj, "DesignerPlacesListHTML", "<table>" . $designer_places_list_html . "</table>");
1019
1020 $designer_doc_obj->add_utf8_text($designer_doc_obj->get_top_section(), "Some dummy text.");
1021 $self->{'processor'}->process($designer_doc_obj);
1022 $self->{'num_processed'}++;
1023
1024 $designer_id_to_name_mapping{$designer_doc_obj->get_OID()} = $designer_name;
1025 }
1026
1027 # Write the designers.dm macrofile
1028 &write_static_browser_macrofile("designers", \%designer_id_to_name_mapping);
1029}
1030
1031
1032sub new_metadata_entry
1033{
1034 my ($doc_obj, $metadata_name, $metadata_value) = (@_);
1035
1036 # Don't bother with empty metadata
1037 return if ($metadata_value eq "");
1038
1039 # Spaces aren't allowed in metadata names
1040 $metadata_name =~ s/ /_/g;
1041
1042 # Anything from the database is ISO 8859-1 encoded, so convert to UTF-8
1043 $metadata_value = &unicode::ascii2utf8(\$metadata_value);
1044
1045 # Escape any '&' characters so the metadata is HTML 4 compliant when displayed
1046 $metadata_value =~ s/&([^\#])/&amp;$1/g;
1047
1048 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), $metadata_name, $metadata_value);
1049}
1050
1051
1052sub rtf_to_html
1053{
1054 my $rtf_string = shift(@_);
1055
1056 # Remove everything in curly braces, but keep any hyperlinks
1057 while ($rtf_string =~ /\{(.*?)\}/) {
1058 if ($1 =~ /HYPERLINK (.*)/) {
1059 my $link_url = $1;
1060 $link_url =~ s/^\"(.*?)\"$/$1/; # Remove surrounding quotes
1061 $link_url =~ s/^\s*(.*?)\s*$/$1/; # Remove surrounding whitespace
1062 $rtf_string =~ s/\{(.*?)\}/<a href=\"$link_url\">$link_url<\/a>/;
1063 }
1064 else {
1065 $rtf_string =~ s/\{(.*?)\}//;
1066 }
1067 }
1068 $rtf_string =~ s/\\ldblquote\s/"/g;
1069 $rtf_string =~ s/\\ldblquote\b/"/g;
1070 $rtf_string =~ s/\\rdblquote\s/"/g;
1071 $rtf_string =~ s/\\rdblquote\b/"/g;
1072 $rtf_string =~ s/\\rquote\s/'/g; # ' # (for Emacs)
1073 $rtf_string =~ s/\\rquote\b/'/g; # ' # (for Emacs)
1074 $rtf_string =~ s/\\pard//g;
1075 $rtf_string =~ s/\\par/<br \/>/g;
1076 $rtf_string =~ s/\\ul\s/<i>/g;
1077 $rtf_string =~ s/\\ul\b/<i>/g;
1078 $rtf_string =~ s/\\ulnone\s/<\/i>/g;
1079 $rtf_string =~ s/\\ulnone\b/<\/i>/g;
1080 $rtf_string =~ s/\\i\s/<i>/g;
1081 $rtf_string =~ s/\\i\b/<i>/g;
1082 $rtf_string =~ s/\\i0\s/<\/i>/g;
1083 $rtf_string =~ s/\\i0\b/<\/i>/g;
1084 $rtf_string =~ s/\\~/ /g;
1085 $rtf_string =~ s/\\([A-Za-z0-9\-]+)//g;
1086 $rtf_string =~ s/\}//g;
1087
1088 # Assume non-ASCII is ISO 8859-1, and convert into HTML entities
1089 while ($rtf_string =~ /\\'([a-z0-9][a-z0-9])/) {
1090 my $dec = hex($1);
1091 $rtf_string =~ s/\\'$1/&#$dec\;/;
1092 }
1093
1094 # Remove empty tags for HTML 4 compliance
1095 $rtf_string =~ s/<i>\s*<\/i>/ /g;
1096
1097 return $rtf_string;
1098}
1099
1100
1101sub get_place_image_dimensions
1102{
1103 my $self = shift(@_);
1104 my $place_image_file_path = shift(@_);
1105 my $fail_log_handle = $self->{'failhandle'};
1106
1107 # Make sure the place image file actually exists
1108 if (!-f $place_image_file_path) {
1109 print STDERR "<ProcessingError n='$place_image_file_path' p='CICPlug' r='Does not exist'>\n" if ($self->{'gli'});
1110 print STDERR "Error: Image $place_image_file_path does not exist.\n";
1111 print $fail_log_handle "Error: Image $place_image_file_path does not exist.\n";
1112 return;
1113 }
1114 my $place_image_file_date = (stat($place_image_file_path))[9];
1115
1116 # Check if this place image has already been identified by looking for a ".info" file in the same directory
1117 my $place_image_info_file_path = $place_image_file_path . ".info";
1118 if (-f $place_image_info_file_path) {
1119 # A ".info" file exists, so read the cached place image information from it
1120 open(PLACE_IMAGE_INFO_FILE, "<$place_image_info_file_path");
1121 my @place_image_info = <PLACE_IMAGE_INFO_FILE>;
1122 close(PLACE_IMAGE_INFO_FILE);
1123
1124 # Read the cached place image file date and check that it matches
1125 my $cached_place_image_file_date = $place_image_info[0];
1126 $cached_place_image_file_date =~ s/\n$//;
1127 if ($cached_place_image_file_date == $place_image_file_date) {
1128 # It does match, so use the cached information from the ".info" file instead of re-identifying the file
1129 my $place_image_width = $place_image_info[1];
1130 $place_image_width =~ s/\n$//;
1131 my $place_image_height = $place_image_info[2];
1132 $place_image_height =~ s/\n$//;
1133 return ($place_image_width, $place_image_height);
1134 }
1135 }
1136
1137 # We haven't already identified the place image, so do it now
1138 print STDERR "Identifying place image $place_image_file_path...\n";
1139 my $identify_command = "identify -format \"%w %h\" \"$place_image_file_path\"";
1140 my $identify_result = `$identify_command`;
1141 print "Identify result: $identify_result\n" if ($self->{'verbosity'} > 2);
1142
1143 # Check that the output is what we're expecting
1144 if ($identify_result !~ /(\d+) (\d+)/) {
1145 print STDERR "<ProcessingError n='$place_image_file_path' p='CICPlug' r='Could not identify'>\n" if ($self->{'gli'});
1146 print STDERR "Error: Place image $place_image_file_path could not be identified.\n";
1147 print $fail_log_handle "Error: Place image $place_image_file_path could not be identified.\n";
1148 return;
1149 }
1150
1151 # Parse the place image width and height from the output
1152 my $place_image_width = $1;
1153 my $place_image_height = $2;
1154
1155 # Write the place image info file so we don't have to identify this exact image again in the future
1156 open(PLACE_IMAGE_INFO_FILE, ">$place_image_info_file_path");
1157 print PLACE_IMAGE_INFO_FILE "$place_image_file_date\n";
1158 print PLACE_IMAGE_INFO_FILE "$place_image_width\n";
1159 print PLACE_IMAGE_INFO_FILE "$place_image_height\n";
1160 close(PLACE_IMAGE_INFO_FILE);
1161 return ($place_image_width, $place_image_height);
1162}
1163
1164
1165sub generate_place_image_variant
1166{
1167 my $self = shift(@_);
1168 my ($doc_obj, $place_image_location, $place_image_variant_size) = (@_);
1169 my $fail_log_handle = $self->{'failhandle'};
1170
1171 # Convert the server location of the file into the local location of the file
1172 my $place_image_file_path = $place_image_location;
1173 $place_image_file_path =~ s/^[A-Z]:/$self->{'images_directory'}/;
1174
1175 # Get the width and height of the place image
1176 my ($place_image_width, $place_image_height) = $self->get_place_image_dimensions($place_image_file_path);
1177 if (!defined($place_image_width) || !defined($place_image_height)) {
1178 # An error has occurred (error message generated by get_place_image_dimensions())
1179 return;
1180 }
1181 my $place_image_file_date = (stat($place_image_file_path))[9];
1182
1183 # Generate the path of the place image variant (in the cache directory)
1184 my $place_image_variant_file_suffix = "-$place_image_variant_size." . $self->{$place_image_variant_size . '_image_type'};
1185 my $place_image_variant_file_path = $place_image_location;
1186 $place_image_variant_file_path =~ s/^[A-Z]:/$self->{'cache_directory'}/;
1187 $place_image_variant_file_path =~ s/^(.+)(\..*)/$1$place_image_variant_file_suffix/;
1188 my ($place_image_variant_file_name) = ($place_image_variant_file_path =~ /^.+\\(.+)$/);
1189
1190 # Only scale down the place image if it is bigger than the desired width
1191 my $place_image_variant_desired_width = $self->{$place_image_variant_size . '_image_width'};
1192 if ($place_image_width > $place_image_variant_desired_width) {
1193 # Only generate the place image variant if it doesn't already exist, or if the place image is newer
1194 if (!-f $place_image_variant_file_path || $place_image_file_date > (stat($place_image_variant_file_path))[9]) {
1195 print STDERR "Generating place image variant $place_image_variant_file_path...\n";
1196 my ($place_image_variant_directory) = ($place_image_variant_file_path =~ /^(.+)\\.+$/);
1197 &util::mk_all_dir($place_image_variant_directory);
1198 my $place_image_variant_options = "-scale $place_image_variant_desired_width " . $self->{$place_image_variant_size . '_image_options'};
1199 my $convert_command = "convert $place_image_variant_options \"$place_image_file_path\" \"$place_image_variant_file_path\"";
1200 my $convert_result = `$convert_command`;
1201 }
1202 }
1203 else {
1204 # The desired width is bigger than the place image, so we just use the original
1205 $place_image_variant_file_path = $place_image_file_path;
1206 }
1207
1208 my ($place_image_variant_width, $place_image_variant_height) = $self->get_place_image_dimensions($place_image_variant_file_path);
1209 if (!defined($place_image_variant_width) || !defined($place_image_variant_height)) {
1210 # An error has occurred (error message generated by get_place_image_dimensions())
1211 return;
1212 }
1213
1214 # Associate the place image variant file
1215 $doc_obj->associate_file($place_image_variant_file_path, $place_image_variant_file_name, undef, $doc_obj->get_top_section());
1216
1217 # Add various bits of metadata for the place image variant
1218 my $place_image_variant_href = "_httpcollection_/index/assoc/{Or}{[parent(Top):assocfilepath],[assocfilepath]}/" . $place_image_variant_file_name;
1219 $place_image_variant_href =~ s/ /%20/g;
1220 return $place_image_variant_href;
1221}
1222
1223
1224sub write_static_browser_macrofile
1225{
1226 my $static_browser_package_name = shift(@_);
1227 my $id_to_name_mapping = shift(@_);
1228
1229 my $static_browser_macrofile_path = "$ENV{'GSDLHOME'}\\collect\\cic-hcap\\macros\\$static_browser_package_name.dm";
1230 print STDERR "Writing $static_browser_macrofile_path...\n";
1231 open(BROWSER_MACROFILE, ">$static_browser_macrofile_path") or die "Error: Could not write to $static_browser_macrofile_path.\n";
1232 &write_static_browser_macros($static_browser_package_name, $id_to_name_mapping);
1233 close(BROWSER_MACROFILE);
1234}
1235
1236
1237sub write_static_browser_macros
1238{
1239 my $static_browser_package_name = shift(@_);
1240 my $id_to_name_mapping = shift(@_);
1241 my $id_to_extra_mapping = shift(@_);
1242
1243 print BROWSER_MACROFILE "package $static_browser_package_name\n\n";
1244 print BROWSER_MACROFILE "_cicstaticbrowserquicklinks_ {\n";
1245
1246 my %letter_to_ids_mapping;
1247 foreach my $id (keys %$id_to_name_mapping) {
1248 my $name = $id_to_name_mapping->{$id};
1249 my ($letter) = ($name =~ /([A-Za-z0-9])/);
1250 push(@{$letter_to_ids_mapping{$letter}}, $id);
1251 }
1252
1253 print BROWSER_MACROFILE "<b>";
1254 foreach my $letter (split(//, "ABCDEFGHIJKLMNOPQRSTUVWXYZ")) {
1255 if (defined($letter_to_ids_mapping{$letter})) {
1256 print BROWSER_MACROFILE "<a href=\"#$letter\">$letter</a>&nbsp;";
1257 }
1258 else {
1259 print BROWSER_MACROFILE "$letter&nbsp;";
1260 }
1261 }
1262 print BROWSER_MACROFILE "</b>\n";
1263 print BROWSER_MACROFILE "}\n\n";
1264
1265 print BROWSER_MACROFILE "_cicstaticbrowser_ {\n";
1266 print BROWSER_MACROFILE "<table cellpadding=\"0\" cellspacing=\"0\" width=\"_pagewidth_\">\n";
1267 foreach my $letter (sort(keys %letter_to_ids_mapping)) {
1268 my @letter_ids = @{$letter_to_ids_mapping{$letter}};
1269 my $anchor_name;
1270 if ($letter =~ /^[A-Z]$/) {
1271 $anchor_name = $letter;
1272 }
1273 print BROWSER_MACROFILE &get_static_browser_macro_chunk($letter, $anchor_name, \@letter_ids, $id_to_name_mapping, $id_to_extra_mapping);
1274 }
1275 print BROWSER_MACROFILE "</table>\n";
1276
1277 print BROWSER_MACROFILE "}\n";
1278}
1279
1280
1281sub get_static_browser_macro_chunk
1282{
1283 my $chunk_title = shift(@_);
1284 my $anchor_name = shift(@_);
1285 my $chunk_ids_ref = shift(@_);
1286 my $id_to_name_mapping = shift(@_);
1287 my $id_to_extra_mapping = shift(@_);
1288
1289 my $static_browser_macro_chunk = "<tr><td width=\"50%\"><br />";
1290 if (defined($anchor_name) && $anchor_name ne "") {
1291 $static_browser_macro_chunk .= "<a name=\"$anchor_name\"/>";
1292 }
1293 $static_browser_macro_chunk .= "<span style=\"color: black;\"><b>$chunk_title</b></span></td><td width=\"50%\"></td></tr>";
1294
1295 my %full_name_to_id_mapping;
1296 foreach my $id (@{$chunk_ids_ref}) {
1297 my $full_name = $id_to_name_mapping->{$id};
1298 if (defined($id_to_extra_mapping)) {
1299 $full_name .= " " . $id_to_extra_mapping->{$id};
1300 }
1301 $full_name_to_id_mapping{$full_name} = $id;
1302 }
1303
1304 my @full_names = sort { lc($a) cmp lc($b) } (keys(%full_name_to_id_mapping));
1305 my $half_point = ((scalar(@full_names) % 2 == 0) ? scalar(@full_names) / 2 : (scalar(@full_names) + 1) / 2);
1306 for (my $i = 0; $i < $half_point; $i++) {
1307 $static_browser_macro_chunk .= "<tr>";
1308
1309 my $id = $full_name_to_id_mapping{$full_names[$i]};
1310 my $name = $id_to_name_mapping->{$id};
1311 my $extra = $id_to_extra_mapping->{$id} || "";
1312 $static_browser_macro_chunk .= "<td valign=\"top\"><a href=\"_gwcgi_?a=d&amp;d=$id\">" . &html_safe($name) . "</a>" . &html_safe($extra) . "</td>";
1313
1314 $static_browser_macro_chunk .= "<td valign=\"top\">";
1315 if (defined($full_names[$i+$half_point])) {
1316 $id = $full_name_to_id_mapping{$full_names[$i+$half_point]};
1317 $name = $id_to_name_mapping->{$id};
1318 $extra = $id_to_extra_mapping->{$id} || "";
1319 $static_browser_macro_chunk .= "<a href=\"_gwcgi_?a=d&amp;d=$id\">" . &html_safe($name) . "</a>" . &html_safe($extra);
1320 }
1321 $static_browser_macro_chunk .= "</td>";
1322
1323 $static_browser_macro_chunk .= "</tr>";
1324 }
1325
1326 return $static_browser_macro_chunk;
1327}
1328
1329
1330sub write_bilevel_static_browser_macrofile
1331{
1332 my $static_browser_package_root = shift(@_);
1333 my $category_to_ids_mapping = shift(@_);
1334 my $id_to_name_mapping = shift(@_);
1335 my $id_to_extra_mapping = shift(@_);
1336
1337 my $static_browser_macrofile_path = "$ENV{'GSDLHOME'}\\collect\\cic-hcap\\macros\\$static_browser_package_root.dm";
1338 print STDERR "Writing $static_browser_macrofile_path...\n";
1339 open(BROWSER_MACROFILE, ">$static_browser_macrofile_path") or die "Error: Could not write to $static_browser_macrofile_path.\n";
1340
1341 foreach my $category (keys(%{$category_to_ids_mapping})) {
1342 my $static_browser_package_name = $static_browser_package_root . $category;
1343 $static_browser_package_name =~ s/\W//g;
1344
1345 my %id_to_name_mapping_for_category = ();
1346 foreach my $id (@{$category_to_ids_mapping->{$category}}) {
1347 $id_to_name_mapping_for_category{$id} = $id_to_name_mapping->{$id};
1348 }
1349 &write_static_browser_macros($static_browser_package_name, \%id_to_name_mapping_for_category, $id_to_extra_mapping);
1350 }
1351
1352 close(BROWSER_MACROFILE);
1353}
1354
1355
1356sub html_safe
1357{
1358 my $text = shift(@_);
1359 $text =~ s/&/&amp;/g;
1360 $text =~ s/\'/&\#39;/g; # Apostrophes mess up Javascript on the Search by State page
1361 return $text;
1362}
1363
1364
1365sub write_state_browser_macrofile
1366{
1367 my $static_browser_package_name = shift(@_);
1368 my $state_to_ids_mapping = shift(@_);
1369 my $id_to_name_mapping = shift(@_);
1370
1371 my $static_browser_macrofile_path = "$ENV{'GSDLHOME'}\\collect\\cic-hcap\\macros\\$static_browser_package_name.dm";
1372 print STDERR "Writing $static_browser_macrofile_path...\n";
1373 open(BROWSER_MACROFILE, ">$static_browser_macrofile_path") or die "Error: Could not write to $static_browser_macrofile_path.\n";
1374 print BROWSER_MACROFILE "package $static_browser_package_name\n\n";
1375 print BROWSER_MACROFILE "_cicstaticbrowser_ {\n";
1376
1377 print BROWSER_MACROFILE "<table cellpadding=\"0\" cellspacing=\"0\" width=\"_pagewidth_\">\n";
1378 foreach my $state (sort(keys(%state_name_to_abbr_mapping))) {
1379 my $state_abbr = $state_name_to_abbr_mapping{$state};
1380 print BROWSER_MACROFILE "_cicstate" . $state_abbr . "_\n";
1381 }
1382 print BROWSER_MACROFILE "</table>\n";
1383 print BROWSER_MACROFILE "}\n";
1384
1385 foreach my $state (sort(keys(%state_name_to_abbr_mapping))) {
1386 my $state_sans_spaces = $state;
1387 $state_sans_spaces =~ s/ //g;
1388 my @state_ids = ();
1389 if (defined($state_to_ids_mapping->{$state})) {
1390 @state_ids = @{$state_to_ids_mapping->{$state}};
1391 }
1392 my $state_abbr = $state_name_to_abbr_mapping{$state};
1393
1394 my $state_static_browser_macro_chunk = &get_static_browser_macro_chunk($state, $state_sans_spaces, \@state_ids, $id_to_name_mapping, undef);
1395 if (!defined($state_to_ids_mapping->{$state})) {
1396 $state_static_browser_macro_chunk .= "<tr><td colspan=\"2\" valign=\"top\">No institutions for this state</td></tr>";
1397 }
1398
1399 # Write out the normal macro chunk
1400 print BROWSER_MACROFILE "\n_cicstate" . $state_abbr . "_ {";
1401 print BROWSER_MACROFILE $state_static_browser_macro_chunk;
1402 print BROWSER_MACROFILE "}\n";
1403
1404 # Write out a Javascript safe version
1405 print BROWSER_MACROFILE "\n_cicstate" . $state_abbr . "js_ {";
1406 print BROWSER_MACROFILE &javascript_safe($state_static_browser_macro_chunk);
1407 print BROWSER_MACROFILE "}\n";
1408 }
1409
1410 close(BROWSER_MACROFILE);
1411}
1412
1413
1414sub javascript_safe
1415{
1416 my $text = shift(@_);
1417 $text =~ s/<\//<\\\\\//g;
1418 return $text;
1419}
1420
1421
14221;
Note: See TracBrowser for help on using the repository browser.