source: trunk/cic-hcap/perllib/plugins/CICPlug.pm@ 12941

Last change on this file since 12941 was 12941, checked in by mdewsnip, 18 years ago

Now removes extra <br /> tags from end of place narrative, and removes empty <i> </i> tags in HTML generated from RTF.

  • Property svn:keywords set to Author Date Id Revision
File size: 62.1 KB
Line 
1###########################################################################
2#
3# CICPlug.pm
4#
5# Copyright (C) 2005 New Zealand Digital Library Project
6#
7# This program is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation; either version 2 of the License, or
10# (at your option) any later version.
11#
12# This program is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with this program; if not, write to the Free Software
19# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20#
21###########################################################################
22
23package CICPlug;
24
25
26use BasPlug;
27use DBI;
28use strict;
29no strict 'refs';
30
31
32sub BEGIN {
33 @CICPlug::ISA = ('BasPlug');
34}
35
36
37my $arguments =
38 [
39 { 'name' => "images_directory",
40 'type' => "string",
41 'deft' => "",
42 'reqd' => "yes" },
43 { 'name' => "cache_directory",
44 'type' => "string",
45 'deft' => &util::filename_cat($ENV{'GSDLHOME'}, "tmp"),
46 'reqd' => "no" },
47 { 'name' => "large_image_options",
48 'type' => "string",
49 'deft' => "",
50 'reqd' => "no" },
51 { 'name' => "large_image_type",
52 'type' => "string",
53 'deft' => "jpg",
54 'reqd' => "no" },
55 { 'name' => "large_image_width",
56 'type' => "string",
57 'deft' => "800",
58 'reqd' => "no" },
59 { 'name' => "medium_image_options",
60 'type' => "string",
61 'deft' => "",
62 'reqd' => "no" },
63 { 'name' => "medium_image_type",
64 'type' => "string",
65 'deft' => "jpg",
66 'reqd' => "no" },
67 { 'name' => "medium_image_width",
68 'type' => "string",
69 'deft' => "375",
70 'reqd' => "no" },
71 { 'name' => "small_image_options",
72 'type' => "string",
73 'deft' => "",
74 'reqd' => "no" },
75 { 'name' => "small_image_type",
76 'type' => "string",
77 'deft' => "jpg",
78 'reqd' => "no" },
79 { 'name' => "small_image_width",
80 'type' => "string",
81 'deft' => "125",
82 'reqd' => "no" }
83 ];
84
85my $options = { 'name' => "CICPlug",
86 'desc' => "{CICPlug.desc}",
87 'abstract' => "no",
88 'inherits' => "yes" };
89
90
91sub get_default_process_exp
92{
93 return q^(?i)\.mdb$^;
94}
95
96
97sub new
98{
99 my ($class) = shift (@_);
100 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
101 push(@$pluginlist, $class);
102
103 if (defined $arguments) { push(@{$hashArgOptLists->{"ArgList"}}, @{$arguments}); }
104 if (defined $options) { push(@{$hashArgOptLists->{"OptList"}}, $options); }
105
106 my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
107
108 return bless $self, $class;
109}
110
111
112my $state_abbr_to_name_mapping = {
113 "AL" => "Alabama",
114 "AK" => "Alaska",
115 "AZ" => "Arizona",
116 "AR" => "Arkansas",
117 "CA" => "California",
118 "CO" => "Colorado",
119 "CT" => "Connecticut",
120 "DC" => "District of Columbia",
121 "DE" => "Delaware",
122 "FL" => "Florida",
123 "GA" => "Georgia",
124 "HI" => "Hawaii",
125 "ID" => "Idaho",
126 "IL" => "Illinois",
127 "IN" => "Indiana",
128 "IA" => "Iowa",
129 "KS" => "Kansas",
130 "KY" => "Kentucky",
131 "LA" => "Louisiana",
132 "ME" => "Maine",
133 "MD" => "Maryland",
134 "MA" => "Massachusetts",
135 "MI" => "Michigan",
136 "MN" => "Minnesota",
137 "MS" => "Mississippi",
138 "MO" => "Missouri",
139 "MT" => "Montana",
140 "NE" => "Nebraska",
141 "NV" => "Nevada",
142 "NH" => "New Hampshire",
143 "NJ" => "New Jersey",
144 "NM" => "New Mexico",
145 "NY" => "New York",
146 "NC" => "North Carolina",
147 "ND" => "North Dakota",
148 "OH" => "Ohio",
149 "OK" => "Oklahoma",
150 "OR" => "Oregon",
151 "PA" => "Pennsylvania",
152 "RI" => "Rhode Island",
153 "SC" => "South Carolina",
154 "SD" => "South Dakota",
155 "TN" => "Tennessee",
156 "TX" => "Texas",
157 "UT" => "Utah",
158 "VT" => "Vermont",
159 "VA" => "Virginia",
160 "WA" => "Washington",
161 "WV" => "West Virginia",
162 "WI" => "Wisconsin",
163 "WY" => "Wyoming"
164 };
165
166my %state_name_to_abbr_mapping = reverse(%{$state_abbr_to_name_mapping});
167
168my $state_abbr_to_area_mapping = {
169 "AL" => "Southeast",
170 "AK" => "West",
171 "AZ" => "Southwest",
172 "AR" => "Southeast",
173 "CA" => "West",
174 "CO" => "Mountain",
175 "CT" => "Northeast",
176 "DC" => "Northeast",
177 "DE" => "Northeast",
178 "FL" => "Southeast",
179 "GA" => "Southeast",
180 "HI" => "West",
181 "ID" => "Mountain",
182 "IL" => "Midwest",
183 "IN" => "Midwest",
184 "IA" => "Midwest",
185 "KS" => "Midwest",
186 "KY" => "Southeast",
187 "LA" => "Southeast",
188 "ME" => "Northeast",
189 "MD" => "Northeast",
190 "MA" => "Northeast",
191 "MI" => "Midwest",
192 "MN" => "Midwest",
193 "MS" => "Southeast",
194 "MO" => "Midwest",
195 "MT" => "Mountain",
196 "NE" => "Midwest",
197 "NV" => "West",
198 "NH" => "Northeast",
199 "NJ" => "Northeast",
200 "NM" => "Southwest",
201 "NY" => "Northeast",
202 "NC" => "Southeast",
203 "ND" => "Midwest",
204 "OH" => "Midwest",
205 "OK" => "Southwest",
206 "OR" => "West",
207 "PA" => "Northeast",
208 "RI" => "Northeast",
209 "SC" => "Southeast",
210 "SD" => "Midwest",
211 "TN" => "Southeast",
212 "TX" => "Southwest",
213 "UT" => "Mountain",
214 "VT" => "Northeast",
215 "VA" => "Southeast",
216 "WA" => "West",
217 "WV" => "Southeast",
218 "WI" => "Midwest",
219 "WY" => "Mountain"
220 };
221
222
223my $place_type_id_to_name_mapping = {
224 "1" => "Individual building",
225 "2" => "Landscape site",
226 "3" => "Campus arrangement",
227 "4" => "Building group",
228};
229
230
231# This array must match the values in the tblArchTypes table
232my @place_styles_array = (
233 "American colonial",
234 "Federal",
235 "Greek revival",
236 "Italianate",
237 "Gothic revival",
238 "Romanesque revival",
239 "Victorian",
240 "Beaux-Arts classicism",
241 "Colonial revival",
242 "Mission/Mission revival",
243 "Modern/pre-WWII",
244 "Modern/post-WWII",
245 "Postmodern",
246 "Contemporary",
247 "Regionalist/Vernacular",
248 "Other"
249);
250
251
252my $place_functions_mapping = {
253 "academic department building" => "",
254 "administration" => "",
255 "admissions office" => "",
256 "alumni center" => "",
257 "arboretum" => "",
258 "archaeological site" => "",
259 "auditorium" => "",
260 "bell tower" => "",
261 "chapel" => "",
262 "classrooms" => "",
263 "debating society" => "",
264 "dining hall" => "",
265 "facility management building" => "",
266 "faculty offices" => "",
267 "gardens" => "",
268 "greek letter society" => "",
269 "gymnasium" => "",
270 "infirmary" => "",
271 "library" => "",
272 "master plan (campus)" => "",
273 "master plan (landscape)" => "",
274 "memorial site" => "",
275 "museum" => "",
276 "observatory" => "",
277 "old main" => "",
278 "outdoor space" => "",
279 "president's house" => "",
280 "private residence" => "",
281 "residence hall" => "",
282 "stadium" => "",
283 "student union" => "",
284 "theater" => "",
285 "other" => "",
286};
287
288
289my %designer_name_to_id_mapping;
290my %designer_name_to_place_ids_mapping;
291
292
293sub read
294{
295 my $self = shift (@_);
296 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
297
298 $self->{'filename'} = &util::filename_cat($base_dir, $file);
299 if ($self->{'filename'} !~ /$self->{'process_exp'}/ || !-f $self->{'filename'}) {
300 return undef;
301 }
302 $self->{'processor'} = $processor;
303 $self->{'gli'} = $gli;
304
305 # Open connection to Access database
306 my $dbh = DBI->connect('dbi:ODBC:CIC-HCAP');
307
308 $self->process_institutions($dbh);
309 $self->process_places($dbh);
310 $self->process_designers($dbh);
311
312 return 1;
313}
314
315
316sub process_institutions
317{
318 my $self = shift(@_);
319 my $dbh = shift(@_);
320 my $fail_log_handle = $self->{'failhandle'};
321
322 # Prepare SQL statement for getting everything from the Institution table
323 my $institution_sql_statement = "SELECT * FROM tblInstitution"; # WHERE Institution_ID<200";
324 my $institution_sql_handle = $dbh->prepare($institution_sql_statement);
325 $institution_sql_handle->{LongReadLen} = 65536;
326 $institution_sql_handle->execute() or die "Could not execute SQL statement.";
327
328 # Prepare SQL statement for getting the Institution places
329 my $institution_places_sql_statement = "SELECT Entry_ID,Current_name FROM tblPlace WHERE PlaceType>0 AND Institution_ID=?";
330 my $institution_places_sql_handle = $dbh->prepare($institution_places_sql_statement);
331 $institution_places_sql_handle->{LongReadLen} = 65536;
332
333 # Prepare SQL statement for getting the Institution best place image location
334 my $institution_best_place_image_location_sql_statement = "SELECT Location,Entry_ID FROM tblImages WHERE FileType=1 AND FileName=?";
335 my $institution_best_place_image_location_sql_handle = $dbh->prepare($institution_best_place_image_location_sql_statement);
336 $institution_best_place_image_location_sql_handle->{LongReadLen} = 65536;
337
338 # Prepare SQL statement for getting the Institution places images (only used to check if an institution has some images)
339 my $institution_places_images_sql_statement = "SELECT FileName FROM tblImages,tblPlace WHERE tblImages.FileType=1 AND tblImages.Entry_ID=tblPlace.Entry_ID AND tblPlace.Institution_ID=?";
340 my $institution_places_images_sql_handle = $dbh->prepare($institution_places_images_sql_statement);
341 $institution_places_images_sql_handle->{LongReadLen} = 65536;
342
343 # Prepare SQL statement for getting the Institution campus plans
344 my $institution_campus_plans_sql_statement = "SELECT * FROM tblCampusMaps WHERE Electronic=1 AND Institution_ID=?";
345 my $institution_campus_plans_sql_handle = $dbh->prepare($institution_campus_plans_sql_statement);
346 $institution_campus_plans_sql_handle->{LongReadLen} = 65536;
347
348 # Create a document object for each institution
349 my %institution_id_to_name_mapping;
350 my %institution_state_to_ids_mapping;
351 while (my $row_hashref = $institution_sql_handle->fetchrow_hashref) {
352 # Skip any institutions that didn't respond
353 next if !defined($row_hashref->{"City"});
354
355 my $institution_id = $row_hashref->{"Institution_ID"};
356 # print STDERR " Institution $institution_id\n";
357 my $institution_doc_obj = new doc($self->{'filename'} . "-", "indexed_doc");
358 $institution_doc_obj->set_OID("i$institution_id");
359 &new_metadata_entry($institution_doc_obj, "DocumentType", "Institution");
360
361 # For some reason the hyphen seems to be lost from the Zip field, so add it back in
362 my $institution_zip = $row_hashref->{"Zip"};
363 if ($institution_zip =~ /^(\d\d\d\d\d)(\d\d\d\d)$/) {
364 $row_hashref->{"Zip"} = $1 . "-" . $2;
365 }
366
367 # Fix up the links to the institution webpage
368 if ($row_hashref->{"Institution_webpage"} =~ /\#(.*?)\#/) {
369 $row_hashref->{"Institution_webpage"} = $1;
370 }
371
372 # Map state to full name
373 $row_hashref->{"State"} = $state_abbr_to_name_mapping->{$row_hashref->{"State"}};
374
375 # Get the places in this institution
376 my $institution_random_place_id;
377 my $institution_places_list_html = "";
378 $institution_places_sql_handle->execute($institution_id) or die "Could not execute SQL statement.";
379 while (my $institution_places_match_hashref = $institution_places_sql_handle->fetchrow_hashref) {
380 my $institution_place_id = $institution_places_match_hashref->{"Entry_ID"};
381 my $institution_place_name = $institution_places_match_hashref->{"Current_name"};
382 $institution_places_list_html .= "<a href=\"_gwcgi_?a=d&d=p$institution_place_id\">$institution_place_name</a><br />\n";
383 }
384 &new_metadata_entry($institution_doc_obj, "InstitutionPlacesListHTML", $institution_places_list_html);
385
386 # Get the best place image for this institution
387 my $institution_best_place_image_name = $row_hashref->{"Best_image"};
388 if (!defined($institution_best_place_image_name) || $institution_best_place_image_name eq "") {
389 # Some institutions have no electronic images, and thus have no best image
390 $institution_places_images_sql_handle->execute($institution_id) or die "Could not execute SQL statement.";
391 if (defined($institution_places_images_sql_handle->fetchrow_hashref())) {
392 print STDERR "<ProcessingError n='Institution $institution_id' p='CICPlug' r='No best image'>\n" if ($self->{'gli'});
393 print STDERR "Error: Institution $institution_id -- No best image.\n";
394 print $fail_log_handle "Error: Institution $institution_id -- No best image.\n";
395 $self->{'num_not_processed'}++;
396 next;
397 }
398 &new_metadata_entry($institution_doc_obj, "InstitutionBestPlaceImageHTML", "");
399 }
400 else {
401 # Get the file location of the best place image for this institution
402 $institution_best_place_image_location_sql_handle->execute($institution_best_place_image_name) or die "Could not execute SQL statement.";
403 my $institution_best_place_image_hashref = $institution_best_place_image_location_sql_handle->fetchrow_hashref();
404
405 my $institution_best_place_image_location = $institution_best_place_image_hashref->{"Location"};
406 if (!defined($institution_best_place_image_location) || $institution_best_place_image_location eq "") {
407 print STDERR "<ProcessingError n='Institution $institution_id' p='CICPlug' r='Could not match best image $institution_best_place_image_name to a file'>\n" if ($self->{'gli'});
408 print STDERR "Error: Institution $institution_id -- Could not match best image $institution_best_place_image_name to a file.\n";
409 print $fail_log_handle "Error: Institution $institution_id -- Could not match best image $institution_best_place_image_name to a file.\n";
410 $self->{'num_not_processed'}++;
411 next;
412 }
413
414 # PDFs are not allowed for institution best place images
415 if ($institution_best_place_image_location =~ /.pdf$/i) {
416 print STDERR "<ProcessingError n='Institution $institution_id' p='CICPlug' r='PDF not allowed for best image'>\n" if ($self->{'gli'});
417 print STDERR "Error: Institution $institution_id -- PDF not allowed for best image.\n";
418 print $fail_log_handle "Error: Institution $institution_id -- PDF not allowed for best image.\n";
419 $self->{'num_not_processed'}++;
420 next;
421 }
422
423 my $institution_best_place_id = $institution_best_place_image_hashref->{"Entry_ID"};
424 my $institution_best_place_image_medium_file_href = $self->generate_place_image_variant($institution_doc_obj, $institution_best_place_image_location, "medium");
425 &new_metadata_entry($institution_doc_obj, "InstitutionBestPlaceImageHTML", "<a href=\"_gwcgi_?a=d&d=p$institution_best_place_id\"><img alt=\"$institution_best_place_image_name\" src=\"$institution_best_place_image_medium_file_href\"/><br />$institution_best_place_image_name</a>");
426 }
427
428 # Get institution campus plans
429 my $institution_campus_plans_list_html = "";
430 $institution_campus_plans_sql_handle->execute($institution_id) or die "Could not execute SQL statement.";
431 while (my $institution_campus_plans_match_hashref = $institution_campus_plans_sql_handle->fetchrow_hashref) {
432 my $institution_campus_plan_name = $institution_campus_plans_match_hashref->{"NameAndFormat"};
433 my $institution_campus_plan_image_location = $institution_campus_plans_match_hashref->{"Location_electronic"};
434
435 # Deal with PDF files separately: don't convert, just associate
436 if ($institution_campus_plan_image_location =~ /\.pdf$/i) {
437 # Convert the server location of the file into the local location of the file
438 my $institution_campus_plan_pdf_file_path = $institution_campus_plan_image_location;
439 $institution_campus_plan_pdf_file_path =~ s/^[A-Z]:/$self->{'images_directory'}/;
440
441 if (-f $institution_campus_plan_pdf_file_path) {
442 my $institution_campus_plan_pdf_file_name = $institution_campus_plan_name . ".pdf";
443 $institution_campus_plan_pdf_file_name =~ s/ /%20/g;
444 my $institution_campus_plan_pdf_file_href = "_httpcollection_/index/assoc/[assocfilepath]/$institution_campus_plan_pdf_file_name";
445 $institution_campus_plans_list_html .= "<a href=\"$institution_campus_plan_pdf_file_href\">$institution_campus_plan_name (PDF)</a><br />";
446
447 $institution_doc_obj->associate_file($institution_campus_plan_pdf_file_path, $institution_campus_plan_name . ".pdf", undef, $institution_doc_obj->get_top_section());
448 }
449 else {
450 print STDERR "<ProcessingError n='$institution_campus_plan_pdf_file_path' p='CICPlug' r='Does not exist'>\n" if ($self->{'gli'});
451 print STDERR "Error: File $institution_campus_plan_pdf_file_path does not exist.\n";
452 print $fail_log_handle "Error: File $institution_campus_plan_pdf_file_path does not exist.\n";
453 }
454 }
455 else {
456 my $institution_campus_plan_image_large_file_href = $self->generate_place_image_variant($institution_doc_obj, $institution_campus_plan_image_location, "large");
457
458 # Create a new section for each institution campus plan image
459 my $institution_campus_plan_image_section = $institution_doc_obj->insert_section($institution_doc_obj->get_end_child($institution_doc_obj->get_top_section()));
460 $institution_doc_obj->add_utf8_text($institution_campus_plan_image_section, "_"); # This is necessary
461 $institution_doc_obj->add_utf8_metadata($institution_campus_plan_image_section, "DocumentType", "Image");
462 $institution_doc_obj->add_utf8_metadata($institution_campus_plan_image_section, "Title", $institution_campus_plan_name);
463 $institution_doc_obj->add_utf8_metadata($institution_campus_plan_image_section, "ImagePath", $institution_campus_plan_image_large_file_href);
464
465 $institution_campus_plans_list_html .= "<a href=\"_gwcgi_?a=d&d=i$institution_id.$institution_campus_plan_image_section\">$institution_campus_plan_name</a><br />";
466 }
467 }
468 &new_metadata_entry($institution_doc_obj, "InstitutionCampusPlansListHTML", $institution_campus_plans_list_html);
469
470 # Add each field from the table as metadata
471 foreach my $key (keys(%$row_hashref)) {
472 my $value = $row_hashref->{$key};
473 if (defined($value)) {
474 &new_metadata_entry($institution_doc_obj, $key, $value);
475 }
476 }
477
478 $institution_doc_obj->add_utf8_text($institution_doc_obj->get_top_section(), "Some dummy text.");
479 $self->{'processor'}->process($institution_doc_obj);
480 $self->{'num_processed'}++;
481
482 # Build mappings for creating the static macrofiles
483 my $institution_name = $row_hashref->{"Institution_Name"};
484 $institution_id_to_name_mapping{$institution_doc_obj->get_OID()} = $institution_name;
485 my $institution_state = $row_hashref->{"State"};
486 push(@{$institution_state_to_ids_mapping{$institution_state}}, $institution_doc_obj->get_OID());
487 }
488
489 # Write the institutions.dm macrofile
490 &write_static_browser_macrofile("institutions", \%institution_id_to_name_mapping);
491
492 # Write the states.dm macrofile
493 &write_state_browser_macrofile("states", \%institution_state_to_ids_mapping, \%institution_id_to_name_mapping);
494}
495
496
497sub process_places
498{
499 my $self = shift(@_);
500 my $dbh = shift(@_);
501 my $fail_log_handle = $self->{'failhandle'};
502
503 # Prepare SQL statement for getting everything from the Place table
504 my $place_sql_statement = "SELECT * FROM tblPlace"; # WHERE Entry_ID<100";
505 my $place_sql_handle = $dbh->prepare($place_sql_statement);
506 $place_sql_handle->{LongReadLen} = 65536;
507 $place_sql_handle->execute() or die "Could not execute SQL statement.";
508
509 # Prepare SQL statement for getting the Place institution
510 my $place_institution_sql_statement = "SELECT Institution_Name FROM tblInstitution,tblPlace WHERE tblInstitution.Institution_ID=tblPlace.Institution_ID and tblPlace.Entry_ID=?";
511 my $place_institution_sql_handle = $dbh->prepare($place_institution_sql_statement);
512
513 # Prepare SQL statement for getting the Place construction dates
514 my $place_construction_dates_sql_statement = "SELECT Prefix,Date,Note,Architect_Name FROM tblConstruction_and_Dates WHERE Entry_ID=?";
515 my $place_construction_dates_sql_handle = $dbh->prepare($place_construction_dates_sql_statement);
516
517 # Prepare SQL statement for getting the Place images
518 my $place_images_sql_statement = "SELECT FileName,Location FROM tblImages WHERE FileType=1 AND Entry_ID=? ORDER BY Image_Order";
519 my $place_images_sql_handle = $dbh->prepare($place_images_sql_statement);
520 $place_images_sql_handle->{LongReadLen} = 65536;
521
522 # Prepare SQL statement for getting the Place materials
523 my $place_materials_sql_statement = "SELECT * FROM tblDescription_building WHERE Entry_ID=?";
524 my $place_materials_sql_handle = $dbh->prepare($place_materials_sql_statement);
525 $place_materials_sql_handle->{LongReadLen} = 65536;
526
527 # Prepare SQL statement for getting the Place building styles
528 my $place_styles_sql_statement = "SELECT ArchType_ID FROM ArchPlace WHERE Entry_ID=?";
529 my $place_styles_sql_handle = $dbh->prepare($place_styles_sql_statement);
530 $place_styles_sql_handle->{LongReadLen} = 65536;
531
532 # Prepare SQL statement for getting the Place functions
533 my $place_functions_sql_statement = "SELECT Function,Year,Prefix FROM tblFunction_and_dates WHERE Entry_ID=?";
534 my $place_functions_sql_handle = $dbh->prepare($place_functions_sql_statement);
535 $place_functions_sql_handle->{LongReadLen} = 65536;
536
537 # Prepare SQL statement for getting the Place significance
538 my $place_significance_sql_statement = "SELECT SigType FROM tblSigTypes,SigPlace WHERE tblSigTypes.SigTypes_ID=SigPlace.SigType_ID+1 AND SigPlace.Entry_ID=?";
539 my $place_significance_sql_handle = $dbh->prepare($place_significance_sql_statement);
540 $place_significance_sql_handle->{LongReadLen} = 65536;
541
542 # Prepare SQL statement for getting the Place references
543 my $place_references_sql_statement = "SELECT Bibliography FROM tblReferences WHERE Entry_ID=?";
544 my $place_references_sql_handle = $dbh->prepare($place_references_sql_statement);
545 $place_references_sql_handle->{LongReadLen} = 65536;
546
547 # Prepare SQL statement for getting the Place designations
548 my $place_designations_sql_statement = "SELECT National_Register,Federal_Agency,HABS,HAER,Local_Designation FROM tblReferences WHERE Entry_ID=?";
549 my $place_designations_sql_handle = $dbh->prepare($place_designations_sql_statement);
550 $place_designations_sql_handle->{LongReadLen} = 65536;
551
552 # Prepare SQL statement for getting the Place narrative
553 my $place_narrative_sql_statement = "SELECT Narrative FROM tblSignificance_Narrative WHERE Entry_ID=?";
554 my $place_narrative_sql_handle = $dbh->prepare($place_narrative_sql_statement);
555 $place_narrative_sql_handle->{LongReadLen} = 65536;
556
557 # Prepare SQL statement for getting the Place state
558 my $place_state_sql_statement = "SELECT State FROM tblInstitution,tblPlace WHERE tblInstitution.Institution_ID=tblPlace.Institution_ID AND Entry_ID=?";
559 my $place_state_sql_handle = $dbh->prepare($place_state_sql_statement);
560 $place_state_sql_handle->{LongReadLen} = 65536;
561
562 # Create a document object for each place
563 my %place_type_to_ids_mapping;
564 my %place_style_to_ids_mapping;
565 my %place_date_to_ids_mapping;
566 my %place_function_to_ids_mapping;
567 my %place_id_to_name_mapping;
568 my %place_id_to_institution_name_mapping;
569 while (my $row_hashref = $place_sql_handle->fetchrow_hashref) {
570 my $place_id = $row_hashref->{"Entry_ID"};
571 # print STDERR " Place $place_id\n";
572 my $place_doc_obj = new doc($self->{'filename'} . "-", "indexed_doc");
573 $place_doc_obj->set_OID("p$place_id");
574 &new_metadata_entry($place_doc_obj, "DocumentType", "Place");
575
576 # Convert the place type ID into a name
577 $row_hashref->{"PlaceType"} = $place_type_id_to_name_mapping->{$row_hashref->{"PlaceType"}};
578
579 # Add each field from the table as metadata
580 foreach my $key (keys(%$row_hashref)) {
581 my $value = $row_hashref->{$key};
582 if (defined($value)) {
583 &new_metadata_entry($place_doc_obj, $key, $value);
584 }
585 }
586
587 # Get place name
588 my $place_name = $row_hashref->{"Current_name"};
589 if (!defined($place_name)) {
590 print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Missing place name'>\n" if ($self->{'gli'});
591 print STDERR "Error: Place $place_id -- Missing place name.\n";
592 print $fail_log_handle "Error: Place $place_id -- Missing place name.\n";
593 $self->{'num_not_processed'}++;
594 next;
595 }
596
597 # Get place type
598 my $place_type = $row_hashref->{"PlaceType"};
599 if (!defined($place_type)) {
600 print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Missing place type'>\n" if ($self->{'gli'});
601 print STDERR "Error: Place $place_id -- Missing place type.\n";
602 print $fail_log_handle "Error: Place $place_id -- Missing place type.\n";
603 $self->{'num_not_processed'}++;
604 next;
605 }
606
607 # Create place styles mapping
608 $place_styles_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
609 while (my $place_styles_match_hashref = $place_styles_sql_handle->fetchrow_hashref()) {
610 # The ArchType_ID is actually an index into the tblArchType table, NOT a link
611 my $place_style_index = $place_styles_match_hashref->{"ArchType_ID"};
612 my $place_style = $place_styles_array[$place_style_index];
613 push(@{$place_style_to_ids_mapping{$place_style}}, $place_doc_obj->get_OID());
614 &new_metadata_entry($place_doc_obj, "Style", $place_style);
615 }
616
617 # Get place institution
618 $place_institution_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
619 my $place_institution = $place_institution_sql_handle->fetchrow();
620 &new_metadata_entry($place_doc_obj, "Institution_name", $place_institution);
621 $place_id_to_institution_name_mapping{$place_doc_obj->get_OID()} = ", " . $place_institution;
622
623 # Get place state and area (for searching)
624 $place_state_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
625 my $place_state_abbr = $place_state_sql_handle->fetchrow();
626 &new_metadata_entry($place_doc_obj, "State", $place_state_abbr . " " . $state_abbr_to_name_mapping->{$place_state_abbr} . " " . $state_abbr_to_area_mapping->{$place_state_abbr});
627
628 # Get place construction dates
629 my $place_construction_dates_table_html = "";
630 $place_construction_dates_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
631 while (my $place_construction_dates_match_hashref = $place_construction_dates_sql_handle->fetchrow_hashref()) {
632 my $place_construction_date = $place_construction_dates_match_hashref->{"Date"};
633 if (!defined($place_construction_date)) {
634 # Landscape sites are allowed to have no construction information
635 next if ($place_type eq "Landscape site");
636
637 print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Missing construction date'>\n" if ($self->{'gli'});
638 print STDERR "Error: Place $place_id -- Missing construction date.\n";
639 print $fail_log_handle "Error: Place $place_id -- Missing construction date.\n";
640 next;
641 }
642
643 # Convert the construction date to a time period (for searching and browsing)
644 if ($place_construction_date =~ /^(\d{1,4}).*$/) {
645 my $place_construction_year = $1;
646
647 my $place_time_period;
648 if ($place_construction_year < 1800) { $place_time_period = "pre-1800"; }
649 elsif ($place_construction_year < 1850) { $place_time_period = "1800-1850"; }
650 elsif ($place_construction_year < 1900) { $place_time_period = "1850-1900"; }
651 elsif ($place_construction_year < 1945) { $place_time_period = "1900-1945"; }
652 elsif ($place_construction_year <= 1995) { $place_time_period = "1945-1995"; }
653 elsif ($place_construction_year > 1995) { $place_time_period = "post-1995"; }
654 push(@{$place_date_to_ids_mapping{$place_time_period}}, $place_doc_obj->get_OID());
655 &new_metadata_entry($place_doc_obj, "Time_period", $place_time_period);
656 }
657 elsif ($place_construction_date ne "n.d." && $place_construction_date ne "unknown" && $place_construction_date ne "various") {
658 print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Warning: Unknown construction date: $place_construction_date'>\n" if ($self->{'gli'});
659 print STDERR "Warning: Place $place_id -- Unknown construction date: $place_construction_date.\n";
660 print $fail_log_handle "Warning: Place $place_id -- Unknown construction date: $place_construction_date.\n";
661 }
662
663 my $place_construction_note = $place_construction_dates_match_hashref->{"Note"};
664 if (!defined($place_construction_note)) {
665 # "No date" entries are allowed to have no construction note
666 next if ($place_construction_date eq "n.d");
667
668 print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Missing construction note'>\n" if ($self->{'gli'});
669 print STDERR "Error: Place $place_id -- Missing construction note.\n";
670 print $fail_log_handle "Error: Place $place_id -- Missing construction note.\n";
671 next;
672 }
673
674 # Get the architects for this construction and remember them for later for the designer objects
675 my $place_construction_architect = $place_construction_dates_match_hashref->{"Architect_Name"} || "";
676 my $place_construction_architect_links = "";
677 foreach my $designer_name (split(/;/, $place_construction_architect)) {
678 $designer_name =~ s/\(.*?\)//g;
679 $designer_name =~ s/^\s*//;
680 $designer_name =~ s/\s*$//;
681
682 my $designer_id = $designer_name_to_id_mapping{$designer_name};
683 if (!defined($designer_id)) {
684 $designer_id = scalar(keys(%designer_name_to_id_mapping)) + 1;
685 $designer_name_to_id_mapping{$designer_name} = $designer_id;
686 }
687 $place_construction_architect_links .= "<a href=\"_gwcgi_?a=d&d=d$designer_id\">$designer_name</a> ";
688 push(@{$designer_name_to_place_ids_mapping{$designer_name}}, $place_id);
689 }
690
691 my $place_construction_date_prefix = $place_construction_dates_match_hashref->{"Prefix"} || "";
692 $place_construction_dates_table_html .= "<tr><td class=\"cicplaceconstructiondatetd\" valign=\"top\">$place_construction_date_prefix $place_construction_date</td><td valign=\"top\">$place_construction_note $place_construction_architect_links</td></tr>";
693 }
694 &new_metadata_entry($place_doc_obj, "PlaceConstructionDatesTableHTML", "<table cellpadding=\"0\" cellspacing=\"0\">" . $place_construction_dates_table_html . "</table>");
695
696 # Get place materials (individual buildings only)
697 if ($row_hashref->{"PlaceType"} eq "Individual building") {
698 $place_materials_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
699 my $place_materials_match_hashref = $place_materials_sql_handle->fetchrow_hashref();
700 &new_metadata_entry($place_doc_obj, "MaterialFoundation", $place_materials_match_hashref->{"foundation"} || "");
701 &new_metadata_entry($place_doc_obj, "MaterialRoof", $place_materials_match_hashref->{"roof"} || "");
702 &new_metadata_entry($place_doc_obj, "MaterialWalls", $place_materials_match_hashref->{"walls"} || "");
703
704 &new_metadata_entry($place_doc_obj, "Materials", $place_materials_match_hashref->{"foundation"} || "");
705 &new_metadata_entry($place_doc_obj, "Materials", $place_materials_match_hashref->{"roof"} || "");
706 &new_metadata_entry($place_doc_obj, "Materials", $place_materials_match_hashref->{"walls"} || "");
707 }
708
709 # Get place functions
710 my $place_functions = "";
711 my $place_functions_table_html = "";
712 $place_functions_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
713 while (my $place_functions_match_hashref = $place_functions_sql_handle->fetchrow_hashref()) {
714 my $place_function = $place_functions_match_hashref->{"Function"};
715 if (!defined($place_function)) {
716 print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Missing function'>\n" if ($self->{'gli'});
717 print STDERR "Error: Place $place_id -- Missing function.\n";
718 print $fail_log_handle "Error: Place $place_id -- Missing function.\n";
719 next;
720 }
721 # Check for multiline values (these are errors)
722 if ($place_function =~ /\n/) {
723 print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Multiline function'>\n" if ($self->{'gli'});
724 print STDERR "Error: Place $place_id -- Multiline function.\n";
725 print $fail_log_handle "Error: Place $place_id -- Multiline function.\n";
726 next;
727 }
728 my $place_year = $place_functions_match_hashref->{"Year"};
729 if (!defined($place_year)) {
730 print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Missing function year'>\n" if ($self->{'gli'});
731 print STDERR "Error: Place $place_id -- Missing function year.\n";
732 print $fail_log_handle "Error: Place $place_id -- Missing function year.\n";
733 next;
734 }
735 my $place_year_prefix = $place_functions_match_hashref->{"Prefix"} || "";
736 $place_functions_table_html .= "<tr><td class=\"cicplacefunctionyeartd\" valign=\"top\">$place_year_prefix $place_year</td><td valign=\"top\">$place_function</td></tr>";
737
738 # Prepare function metadata for browsing and searching
739 my $place_function_to_index = lc($place_function); # Casefold
740 $place_function_to_index =~ s/^\s*//; # Remove whitespace from the start
741 if ($place_function_to_index =~ /^master plan/) {
742 $place_function_to_index =~ s/ \(campus,.*/ \(campus\)/;
743 $place_function_to_index =~ s/ \(campus:.*/ \(campus\)/;
744 $place_function_to_index =~ s/ \(landscape,.*/ \(landscape\)/;
745 }
746 else {
747 $place_function_to_index =~ s/\(.*\)//g; # Remove anything in parentheses
748 }
749 $place_function_to_index =~ s/\s*$//; # Remove whitespace from the end
750
751 # Deal with common plural cases
752 $place_function_to_index =~ s/classroom$/classrooms/;
753 $place_function_to_index =~ s/department buildings$/department building/;
754 $place_function_to_index =~ s/faculty office$/faculty offices/;
755 $place_function_to_index =~ s/garden$/gardens/;
756 $place_function_to_index =~ s/residence halls$/residence hall/;
757 $place_function_to_index =~ s/private residences$/private residence/;
758
759 # Check it is one of the valid function values
760 if (!defined($place_functions_mapping->{$place_function_to_index})) {
761 print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Warning: Unknown function: $place_function_to_index'>\n" if ($self->{'gli'});
762 print STDERR "Warning: Place $place_id -- Unknown function: $place_function_to_index.\n";
763 print $fail_log_handle "Warning: Place $place_id -- Unknown function: $place_function_to_index.\n";
764 next;
765 }
766 push(@{$place_function_to_ids_mapping{$place_function_to_index}}, $place_doc_obj->get_OID());
767 $place_functions .= "$place_function_to_index ";
768 }
769 &new_metadata_entry($place_doc_obj, "Functions", $place_functions);
770 &new_metadata_entry($place_doc_obj, "PlaceFunctionsTableHTML", "<table cellpadding=\"0\" cellspacing=\"0\">" . $place_functions_table_html . "</table>");
771
772 # Get place significance
773 $place_significance_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
774 while (my $place_significance_match_hashref = $place_significance_sql_handle->fetchrow_hashref()) {
775 my $place_significance = $place_significance_match_hashref->{"SigType"};
776 &new_metadata_entry($place_doc_obj, "Significance", lc($place_significance));
777 }
778
779 # Get place references
780 $place_references_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
781 my $place_references = $place_references_sql_handle->fetchrow();
782 if (defined($place_references)) {
783 $self->add_place_references_metadata($place_doc_obj, $place_id, $place_references);
784 }
785
786 # Get place designations
787 $place_designations_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
788 my $place_designations_match_hashref = $place_designations_sql_handle->fetchrow_hashref();
789 if ($place_designations_match_hashref->{"National_Register"} eq "1") {
790 &new_metadata_entry($place_doc_obj, "Designation", "National Register");
791 }
792 if ($place_designations_match_hashref->{"Federal_Agency"} eq "1") {
793 &new_metadata_entry($place_doc_obj, "Designation", "National Historic Landmark");
794 }
795 if ($place_designations_match_hashref->{"HABS"} eq "1" || $place_designations_match_hashref->{"HAER"} eq "1") {
796 &new_metadata_entry($place_doc_obj, "Designation", "HABS/HAER");
797 }
798 if ($place_designations_match_hashref->{"Local_Designation"} eq "1") {
799 &new_metadata_entry($place_doc_obj, "Designation", "Local/State");
800 }
801
802 # Get place narrative
803 $place_narrative_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
804 my $place_narrative = $place_narrative_sql_handle->fetchrow();
805 if (defined($place_narrative)) {
806 my $place_narrative_html = &rtf_to_html($place_narrative);
807 $place_narrative_html =~ s/(<br \/>(\s|\n)*)*$//; # Remove any trailing <br /> tags
808 &new_metadata_entry($place_doc_obj, "PlaceNarrativeHTML", $place_narrative_html);
809 }
810
811 # Get place images
812 my $place_images_html = "";
813 $place_images_sql_handle->execute($place_id) or die "Could not execute SQL statement.";
814 while (my $place_images_match_hashref = $place_images_sql_handle->fetchrow_hashref) {
815 my $place_image_location = $place_images_match_hashref->{"Location"};
816 my $place_image_name = $place_images_match_hashref->{"FileName"};
817
818 # Deal with PDF files separately: don't convert, just associate
819 if ($place_image_location =~ /\.pdf$/i) {
820 # Convert the server location of the PDF file into the local location of the file
821 my $place_pdf_file_path = $place_image_location;
822 $place_pdf_file_path =~ s/^[A-Z]:/$self->{'images_directory'}/;
823
824 if (-f $place_pdf_file_path) {
825 my ($place_pdf_file_name) = ($place_pdf_file_path =~ /^.+\\(.+)$/);
826 $place_doc_obj->associate_file($place_pdf_file_path, $place_pdf_file_name, undef, $place_doc_obj->get_top_section());
827
828 $place_pdf_file_name =~ s/ /%20/g;
829 my $place_pdf_file_href = "_httpcollection_/index/assoc/[assocfilepath]/$place_pdf_file_name";
830 $place_images_html .= "<tr><td align=\"right\" valign=\"top\"><a href=\"$place_pdf_file_href\">_iconpdf_</a></td><td valign=\"top\"><a href=\"$place_pdf_file_href\">$place_image_name (PDF)</a></td></tr>\n";
831 }
832 else {
833 print STDERR "<ProcessingError n='$place_pdf_file_path' p='CICPlug' r='Does not exist'>\n" if ($self->{'gli'});
834 print STDERR "Error: File $place_pdf_file_path does not exist.\n";
835 print $fail_log_handle "Error: File $place_pdf_file_path does not exist.\n";
836 }
837 }
838 else {
839 my $place_image_small_file_href = $self->generate_place_image_variant($place_doc_obj, $place_image_location, "small");
840 my $place_image_large_file_href = $self->generate_place_image_variant($place_doc_obj, $place_image_location, "large");
841
842 if (defined($place_image_name)) {
843 # Create a new section for each place image
844 my $place_image_section = $place_doc_obj->insert_section($place_doc_obj->get_end_child($place_doc_obj->get_top_section()));
845 $place_doc_obj->add_utf8_text($place_image_section, "_"); # This is necessary
846 $place_doc_obj->add_utf8_metadata($place_image_section, "DocumentType", "Image");
847 $place_doc_obj->add_utf8_metadata($place_image_section, "Title", $place_image_name);
848 $place_doc_obj->add_utf8_metadata($place_image_section, "ImagePath", $place_image_large_file_href);
849
850 $place_images_html .= "<tr><td valign=\"top\"><a href=\"_gwcgi_?a=d&d=p$place_id.$place_image_section\"><img alt=\"$place_image_name\" src=\"$place_image_small_file_href\"/></a></td><td valign=\"top\"><a href=\"_gwcgi_?a=d&d=p$place_id.$place_image_section\">$place_image_name</a></td></tr>\n";
851 }
852 else {
853 $place_images_html .= "<tr><td valign=\"top\"><img alt=\"No image\" src=\"_httpcollection_/images/no_image-small.jpg\"/></td><td valign=\"top\">&nbsp;</td></tr>\n";
854 }
855 }
856 }
857
858 &new_metadata_entry($place_doc_obj, "PlaceImagesHTML", "<table>" . $place_images_html . "</table>");
859
860 $place_doc_obj->add_utf8_text($place_doc_obj->get_top_section(), "Some dummy text.");
861 $self->{'processor'}->process($place_doc_obj);
862 $self->{'num_processed'}++;
863
864 # Build mappings for creating the static macrofiles
865 $place_id_to_name_mapping{$place_doc_obj->get_OID()} = $place_name;
866 push(@{$place_type_to_ids_mapping{$place_type}}, $place_doc_obj->get_OID());
867 }
868
869 &write_bilevel_static_browser_macrofile("types", \%place_type_to_ids_mapping, \%place_id_to_name_mapping, \%place_id_to_institution_name_mapping);
870 &write_bilevel_static_browser_macrofile("styles", \%place_style_to_ids_mapping, \%place_id_to_name_mapping, \%place_id_to_institution_name_mapping);
871 &write_bilevel_static_browser_macrofile("dates", \%place_date_to_ids_mapping, \%place_id_to_name_mapping, \%place_id_to_institution_name_mapping);
872 &write_bilevel_static_browser_macrofile("functions", \%place_function_to_ids_mapping, \%place_id_to_name_mapping, \%place_id_to_institution_name_mapping);
873}
874
875
876sub process_designers
877{
878 my $self = shift(@_);
879 my $dbh = shift(@_);
880 my $fail_log_handle = $self->{'failhandle'};
881
882 # Prepare SQL statement for getting the Place name
883 my $place_name_sql_statement = "SELECT Current_name FROM tblPlace WHERE Entry_ID=?";
884 my $place_name_sql_handle = $dbh->prepare($place_name_sql_statement);
885
886 # Prepare SQL statement for getting the Place institution
887 my $place_institution_sql_statement = "SELECT Institution_Name FROM tblInstitution,tblPlace WHERE tblInstitution.Institution_ID=tblPlace.Institution_ID and tblPlace.Entry_ID=?";
888 my $place_institution_sql_handle = $dbh->prepare($place_institution_sql_statement);
889
890 # Prepare SQL statement for getting the Place "date of construction"
891 my $place_construction_date_sql_statement = "SELECT Date FROM tblConstruction_and_Dates WHERE Entry_ID=?";
892 my $place_construction_date_sql_handle = $dbh->prepare($place_construction_date_sql_statement);
893
894 # Prepare SQL statement for getting the Place non-PDF images
895 my $place_images_sql_statement = "SELECT Location FROM tblImages WHERE FileType=1 AND Location NOT LIKE '%.pdf' AND Entry_ID=? ORDER BY Image_Order";
896 my $place_images_sql_handle = $dbh->prepare($place_images_sql_statement);
897 $place_images_sql_handle->{LongReadLen} = 65536;
898
899 # Create a document object for each designer
900 my %designer_id_to_name_mapping;
901 foreach my $designer_name (keys %designer_name_to_id_mapping) {
902 my $designer_id = $designer_name_to_id_mapping{$designer_name};
903 # print STDERR " Designer $designer_id\n";
904 my $designer_doc_obj = new doc($self->{'filename'} . "-", "indexed_doc");
905 $designer_doc_obj->set_OID("d$designer_id");
906 &new_metadata_entry($designer_doc_obj, "DocumentType", "Designer");
907
908 &new_metadata_entry($designer_doc_obj, "Designer_name", $designer_name);
909
910 # Get designer places
911 my $designer_places_list_html = "";
912 my $last_designer_place_id = "";
913 foreach my $designer_place_id (sort(@{$designer_name_to_place_ids_mapping{$designer_name}})) {
914 # The designer may have worked on a place multiple times, so check for this
915 next if ($designer_place_id eq $last_designer_place_id);
916 $last_designer_place_id = $designer_place_id;
917
918 # Get place name
919 $place_name_sql_handle->execute($designer_place_id) or die "Could not execute SQL statement.";
920 my $designer_place_name = $place_name_sql_handle->fetchrow();
921
922 # Get place institution name
923 $place_institution_sql_handle->execute($designer_place_id) or die "Could not execute SQL statement.";
924 my $designer_place_institution_name = $place_institution_sql_handle->fetchrow();
925
926 # Get place date of construction
927 $place_construction_date_sql_handle->execute($designer_place_id) or die "Could not execute SQL statement.";
928 my $designer_place_construction_date_value = $place_construction_date_sql_handle->fetchrow() || "";
929
930 # Get the first non-PDF image for this place
931 my $designer_place_image_small_file_href;
932 $place_images_sql_handle->execute($designer_place_id) or die "Could not execute SQL statement.";
933 my $designer_place_image_location = $place_images_sql_handle->fetchrow();
934 if (defined($designer_place_image_location)) {
935 $designer_place_image_small_file_href = $self->generate_place_image_variant($designer_doc_obj, $designer_place_image_location, "small");
936 }
937 else {
938 # There is no non-PDF image for this place
939 $designer_place_image_small_file_href = "_httpcollection_/images/no_image-small.jpg";
940 }
941
942 $designer_places_list_html .= "<tr><td valign=\"top\"><a href=\"_gwcgi_?a=d&d=p$designer_place_id\"><img src=\"$designer_place_image_small_file_href\"/></a></td><td valign=\"top\"><a href=\"_gwcgi_?a=d&d=p$designer_place_id\">$designer_place_name</a>, $designer_place_institution_name<br /><b>Date of construction:</b> $designer_place_construction_date_value</td></tr>\n";
943 }
944
945 &new_metadata_entry($designer_doc_obj, "DesignerPlacesListHTML", "<table>" . $designer_places_list_html . "</table>");
946
947 $designer_doc_obj->add_utf8_text($designer_doc_obj->get_top_section(), "Some dummy text.");
948 $self->{'processor'}->process($designer_doc_obj);
949 $self->{'num_processed'}++;
950
951 $designer_id_to_name_mapping{$designer_doc_obj->get_OID()} = $designer_name;
952 }
953
954 # Write the designers.dm macrofile
955 &write_static_browser_macrofile("designers", \%designer_id_to_name_mapping);
956}
957
958
959sub new_metadata_entry
960{
961 my ($doc_obj, $metadata_name, $metadata_value) = (@_);
962
963 # Don't bother with empty metadata
964 return if ($metadata_value eq "");
965
966 # Spaces aren't allowed in metadata names
967 $metadata_name =~ s/ /_/g;
968
969 # Anything from the database is ISO 8859-1 encoded, so convert to UTF-8
970 $metadata_value = &unicode::ascii2utf8(\$metadata_value);
971
972 # Escape any '&' characters so the metadata is HTML 4 compliant when displayed
973 $metadata_value =~ s/&([^\#])/&amp;$1/g;
974
975 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), $metadata_name, $metadata_value);
976}
977
978
979sub add_place_references_metadata
980{
981 my $self = shift(@_);
982 my ($place_doc_obj, $place_id, $place_references_rtf_string) = (@_);
983 my $fail_log_handle = $self->{'failhandle'};
984
985 # Convert the place references from RTF to HTML
986 my $place_references_html_raw = &rtf_to_html($place_references_rtf_string);
987 if ($place_references_html_raw =~ / (http|www\.)/) {
988 print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Warning: Found possible web address in references without hyperlink tag.'>\n" if ($self->{'gli'});
989 print STDERR "Warning: Place $place_id -- Found possible web address in references without hyperlink tag.\n";
990 print $fail_log_handle "Warning: Place $place_id -- Found possible web address in references without hyperlink tag.\n";
991 }
992
993 # Split the references and try to parse title and author
994 my $place_references_html = "";
995 $place_references_html_raw =~ s/(\r|\n)//g; # Remove all newlines
996 $place_references_html_raw =~ s/<br \/><i><br \/>/<br \/><br \/><i>/g; # Move italic tags
997 my @place_references = split(/<br \/>\s*<br \/>/, $place_references_html_raw);
998 foreach my $place_reference (@place_references) {
999 $place_reference =~ s/^(<br \/>\s*)*//;
1000 $place_reference =~ s/(<br \/>\s*)*$//;
1001 next if ($place_reference !~ /\w/);
1002 &new_metadata_entry($place_doc_obj, "Reference", $place_reference);
1003 $place_references_html .= "<p class=\"cicreference\">" . $place_reference . "</p>\n";
1004
1005 # Case 0: A magic word in the first sentence
1006 my $place_reference_first_sentence = $place_reference;
1007 if ($place_reference =~ /^(.*?)\./) {
1008 $place_reference_first_sentence = $1;
1009 }
1010 if ($place_reference_first_sentence =~ /\b(collection|collections|papers|archives|database|letter|memo|inventory|photographs)\b/i) {
1011 # Don't bother trying to parse the reference
1012 }
1013 # Case 1: Author (possibly empty), then title in italics or quotes
1014 elsif ($place_reference =~ /^(.*?)<i>(.*?)<\/i>/ || $place_reference =~ /^(.*)"(.*?)"/) {
1015 &new_metadata_entry($place_doc_obj, "ReferenceAuthor", $1);
1016 &new_metadata_entry($place_doc_obj, "ReferenceTitle", $2);
1017 }
1018 # Case 2: Zero or one fullstops, assume no author and title is complete text
1019 elsif ($place_reference =~ /^[^\.]*\.[^\.]*$/ || $place_reference !~ /\./) {
1020 &new_metadata_entry($place_doc_obj, "ReferenceTitle", $place_reference);
1021 }
1022 else {
1023 print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Could not parse reference: $place_reference'>\n" if ($self->{'gli'});
1024 # print STDERR "Warning: Place $place_id -- Could not parse reference: $place_reference\n";
1025 print $fail_log_handle "Warning: Place $place_id -- Could not parse reference: $place_reference\n";
1026 }
1027 }
1028
1029 &new_metadata_entry($place_doc_obj, "PlaceReferencesHTML", $place_references_html);
1030}
1031
1032
1033sub rtf_to_html
1034{
1035 my $rtf_string = shift(@_);
1036
1037 # Remove everything in curly braces, but keep any hyperlinks
1038 while ($rtf_string =~ /\{(.*?)\}/) {
1039 if ($1 =~ /HYPERLINK (.*)/) {
1040 my $link_url = $1;
1041 $link_url =~ s/^\"(.*?)\"$/$1/; # Remove surrounding quotes
1042 $link_url =~ s/^\s*(.*?)\s*$/$1/; # Remove surrounding whitespace
1043 $rtf_string =~ s/\{(.*?)\}/<a href=\"$link_url\">$link_url<\/a>/;
1044 }
1045 else {
1046 $rtf_string =~ s/\{(.*?)\}//;
1047 }
1048 }
1049 $rtf_string =~ s/\\ldblquote\s/"/g;
1050 $rtf_string =~ s/\\ldblquote\b/"/g;
1051 $rtf_string =~ s/\\rdblquote\s/"/g;
1052 $rtf_string =~ s/\\rdblquote\b/"/g;
1053 $rtf_string =~ s/\\rquote\s/'/g; # ' # (for Emacs)
1054 $rtf_string =~ s/\\rquote\b/'/g; # ' # (for Emacs)
1055 $rtf_string =~ s/\\pard//g;
1056 $rtf_string =~ s/\\par/<br \/>/g;
1057 $rtf_string =~ s/\\ul\s/<i>/g;
1058 $rtf_string =~ s/\\ul\b/<i>/g;
1059 $rtf_string =~ s/\\ulnone\s/<\/i>/g;
1060 $rtf_string =~ s/\\ulnone\b/<\/i>/g;
1061 $rtf_string =~ s/\\i\s/<i>/g;
1062 $rtf_string =~ s/\\i\b/<i>/g;
1063 $rtf_string =~ s/\\i0\s/<\/i>/g;
1064 $rtf_string =~ s/\\i0\b/<\/i>/g;
1065 $rtf_string =~ s/\\~/ /g;
1066 $rtf_string =~ s/\\([A-Za-z0-9\-]+)//g;
1067 $rtf_string =~ s/\}//g;
1068
1069 # Assume non-ASCII is ISO 8859-1, and convert into HTML entities
1070 while ($rtf_string =~ /\\'([a-z0-9][a-z0-9])/) {
1071 my $dec = hex($1);
1072 $rtf_string =~ s/\\'$1/&#$dec\;/;
1073 }
1074
1075 # Remove empty tags for HTML 4 compliance
1076 $rtf_string =~ s/<i>\s*<\/i>/ /g;
1077
1078 return $rtf_string;
1079}
1080
1081
1082sub get_place_image_dimensions
1083{
1084 my $self = shift(@_);
1085 my $place_image_file_path = shift(@_);
1086 my $fail_log_handle = $self->{'failhandle'};
1087
1088 # Make sure the place image file actually exists
1089 if (!-f $place_image_file_path) {
1090 print STDERR "<ProcessingError n='$place_image_file_path' p='CICPlug' r='Does not exist'>\n" if ($self->{'gli'});
1091 print STDERR "Error: Image $place_image_file_path does not exist.\n";
1092 print $fail_log_handle "Error: Image $place_image_file_path does not exist.\n";
1093 return;
1094 }
1095 my $place_image_file_date = (stat($place_image_file_path))[9];
1096
1097 # Check if this place image has already been identified by looking for a ".info" file in the same directory
1098 my $place_image_info_file_path = $place_image_file_path . ".info";
1099 if (-f $place_image_info_file_path) {
1100 # A ".info" file exists, so read the cached place image information from it
1101 open(PLACE_IMAGE_INFO_FILE, "<$place_image_info_file_path");
1102 my @place_image_info = <PLACE_IMAGE_INFO_FILE>;
1103 close(PLACE_IMAGE_INFO_FILE);
1104
1105 # Read the cached place image file date and check that it matches
1106 my $cached_place_image_file_date = $place_image_info[0];
1107 $cached_place_image_file_date =~ s/\n$//;
1108 if ($cached_place_image_file_date == $place_image_file_date) {
1109 # It does match, so use the cached information from the ".info" file instead of re-identifying the file
1110 my $place_image_width = $place_image_info[1];
1111 $place_image_width =~ s/\n$//;
1112 my $place_image_height = $place_image_info[2];
1113 $place_image_height =~ s/\n$//;
1114 return ($place_image_width, $place_image_height);
1115 }
1116 }
1117
1118 # We haven't already identified the place image, so do it now
1119 print STDERR "Identifying place image $place_image_file_path...\n";
1120 my $identify_command = "identify -format \"%w %h\" \"$place_image_file_path\"";
1121 my $identify_result = `$identify_command`;
1122 print "Identify result: $identify_result\n" if ($self->{'verbosity'} > 2);
1123
1124 # Check that the output is what we're expecting
1125 if ($identify_result !~ /(\d+) (\d+)/) {
1126 print STDERR "<ProcessingError n='$place_image_file_path' p='CICPlug' r='Could not identify'>\n" if ($self->{'gli'});
1127 print STDERR "Error: Place image $place_image_file_path could not be identified.\n";
1128 print $fail_log_handle "Error: Place image $place_image_file_path could not be identified.\n";
1129 return;
1130 }
1131
1132 # Parse the place image width and height from the output
1133 my $place_image_width = $1;
1134 my $place_image_height = $2;
1135
1136 # Write the place image info file so we don't have to identify this exact image again in the future
1137 open(PLACE_IMAGE_INFO_FILE, ">$place_image_info_file_path");
1138 print PLACE_IMAGE_INFO_FILE "$place_image_file_date\n";
1139 print PLACE_IMAGE_INFO_FILE "$place_image_width\n";
1140 print PLACE_IMAGE_INFO_FILE "$place_image_height\n";
1141 close(PLACE_IMAGE_INFO_FILE);
1142 return ($place_image_width, $place_image_height);
1143}
1144
1145
1146sub generate_place_image_variant
1147{
1148 my $self = shift(@_);
1149 my ($doc_obj, $place_image_location, $place_image_variant_size) = (@_);
1150 my $fail_log_handle = $self->{'failhandle'};
1151
1152 # Convert the server location of the file into the local location of the file
1153 my $place_image_file_path = $place_image_location;
1154 $place_image_file_path =~ s/^[A-Z]:/$self->{'images_directory'}/;
1155
1156 # Get the width and height of the place image
1157 my ($place_image_width, $place_image_height) = $self->get_place_image_dimensions($place_image_file_path);
1158 if (!defined($place_image_width) || !defined($place_image_height)) {
1159 # An error has occurred (error message generated by get_place_image_dimensions())
1160 return;
1161 }
1162 my $place_image_file_date = (stat($place_image_file_path))[9];
1163
1164 # Generate the path of the place image variant (in the cache directory)
1165 my $place_image_variant_file_suffix = "-$place_image_variant_size." . $self->{$place_image_variant_size . '_image_type'};
1166 my $place_image_variant_file_path = $place_image_location;
1167 $place_image_variant_file_path =~ s/^[A-Z]:/$self->{'cache_directory'}/;
1168 $place_image_variant_file_path =~ s/^(.+)(\..*)/$1$place_image_variant_file_suffix/;
1169 my ($place_image_variant_file_name) = ($place_image_variant_file_path =~ /^.+\\(.+)$/);
1170
1171 # Only scale down the place image if it is bigger than the desired width
1172 my $place_image_variant_desired_width = $self->{$place_image_variant_size . '_image_width'};
1173 if ($place_image_width > $place_image_variant_desired_width) {
1174 # Only generate the place image variant if it doesn't already exist, or if the place image is newer
1175 if (!-f $place_image_variant_file_path || $place_image_file_date > (stat($place_image_variant_file_path))[9]) {
1176 print STDERR "Generating place image variant $place_image_variant_file_path...\n";
1177 my ($place_image_variant_directory) = ($place_image_variant_file_path =~ /^(.+)\\.+$/);
1178 &util::mk_all_dir($place_image_variant_directory);
1179 my $place_image_variant_options = "-scale $place_image_variant_desired_width " . $self->{$place_image_variant_size . '_image_options'};
1180 my $convert_command = "convert $place_image_variant_options \"$place_image_file_path\" \"$place_image_variant_file_path\"";
1181 my $convert_result = `$convert_command`;
1182 }
1183 }
1184 else {
1185 # The desired width is bigger than the place image, so we just use the original
1186 $place_image_variant_file_path = $place_image_file_path;
1187 }
1188
1189 my ($place_image_variant_width, $place_image_variant_height) = $self->get_place_image_dimensions($place_image_variant_file_path);
1190 if (!defined($place_image_variant_width) || !defined($place_image_variant_height)) {
1191 # An error has occurred (error message generated by get_place_image_dimensions())
1192 return;
1193 }
1194
1195 # Associate the place image variant file
1196 $doc_obj->associate_file($place_image_variant_file_path, $place_image_variant_file_name, undef, $doc_obj->get_top_section());
1197
1198 # Add various bits of metadata for the place image variant
1199 my $place_image_variant_href = "_httpcollection_/index/assoc/{Or}{[parent(Top):assocfilepath],[assocfilepath]}/" . $place_image_variant_file_name;
1200 $place_image_variant_href =~ s/ /%20/g;
1201 return $place_image_variant_href;
1202}
1203
1204
1205sub write_static_browser_macrofile
1206{
1207 my $static_browser_package_name = shift(@_);
1208 my $id_to_name_mapping = shift(@_);
1209
1210 my $static_browser_macrofile_path = "$ENV{'GSDLHOME'}\\collect\\cic-hcap\\macros\\$static_browser_package_name.dm";
1211 print STDERR "Writing $static_browser_macrofile_path...\n";
1212 open(BROWSER_MACROFILE, ">$static_browser_macrofile_path") or die "Error: Could not write to $static_browser_macrofile_path.\n";
1213 &write_static_browser_macros($static_browser_package_name, $id_to_name_mapping);
1214 close(BROWSER_MACROFILE);
1215}
1216
1217
1218sub write_static_browser_macros
1219{
1220 my $static_browser_package_name = shift(@_);
1221 my $id_to_name_mapping = shift(@_);
1222 my $id_to_extra_mapping = shift(@_);
1223
1224 print BROWSER_MACROFILE "package $static_browser_package_name\n\n";
1225 print BROWSER_MACROFILE "_cicstaticbrowserquicklinks_ {\n";
1226
1227 my %letter_to_ids_mapping;
1228 foreach my $id (keys %$id_to_name_mapping) {
1229 my $name = $id_to_name_mapping->{$id};
1230 my ($letter) = ($name =~ /([A-Za-z0-9])/);
1231 push(@{$letter_to_ids_mapping{$letter}}, $id);
1232 }
1233
1234 print BROWSER_MACROFILE "<b>";
1235 foreach my $letter (split(//, "ABCDEFGHIJKLMNOPQRSTUVWXYZ")) {
1236 if (defined($letter_to_ids_mapping{$letter})) {
1237 print BROWSER_MACROFILE "<a href=\"#$letter\">$letter</a>&nbsp;";
1238 }
1239 else {
1240 print BROWSER_MACROFILE "$letter&nbsp;";
1241 }
1242 }
1243 print BROWSER_MACROFILE "</b>\n";
1244 print BROWSER_MACROFILE "}\n\n";
1245
1246 print BROWSER_MACROFILE "_cicstaticbrowser_ {\n";
1247 print BROWSER_MACROFILE "<table cellpadding=\"0\" cellspacing=\"0\" width=\"_pagewidth_\">\n";
1248 foreach my $letter (sort(keys %letter_to_ids_mapping)) {
1249 my @letter_ids = @{$letter_to_ids_mapping{$letter}};
1250 my $anchor_name;
1251 if ($letter =~ /^[A-Z]$/) {
1252 $anchor_name = $letter;
1253 }
1254 print BROWSER_MACROFILE &get_static_browser_macro_chunk($letter, $anchor_name, \@letter_ids, $id_to_name_mapping, $id_to_extra_mapping);
1255 }
1256 print BROWSER_MACROFILE "</table>\n";
1257
1258 print BROWSER_MACROFILE "}\n";
1259}
1260
1261
1262sub get_static_browser_macro_chunk
1263{
1264 my $chunk_title = shift(@_);
1265 my $anchor_name = shift(@_);
1266 my $chunk_ids_ref = shift(@_);
1267 my $id_to_name_mapping = shift(@_);
1268 my $id_to_extra_mapping = shift(@_);
1269
1270 my $static_browser_macro_chunk = "<tr><td width=\"50%\"><br />";
1271 if (defined($anchor_name) && $anchor_name ne "") {
1272 $static_browser_macro_chunk .= "<a name=\"$anchor_name\"/>";
1273 }
1274 $static_browser_macro_chunk .= "<span style=\"color: black;\"><b>$chunk_title</b></span></td><td width=\"50%\"></td></tr>";
1275
1276 my %full_name_to_id_mapping;
1277 foreach my $id (@{$chunk_ids_ref}) {
1278 my $full_name = $id_to_name_mapping->{$id};
1279 if (defined($id_to_extra_mapping)) {
1280 $full_name .= " " . $id_to_extra_mapping->{$id};
1281 }
1282 $full_name_to_id_mapping{$full_name} = $id;
1283 }
1284
1285 my @full_names = sort { lc($a) cmp lc($b) } (keys(%full_name_to_id_mapping));
1286 my $half_point = ((scalar(@full_names) % 2 == 0) ? scalar(@full_names) / 2 : (scalar(@full_names) + 1) / 2);
1287 for (my $i = 0; $i < $half_point; $i++) {
1288 $static_browser_macro_chunk .= "<tr>";
1289
1290 my $id = $full_name_to_id_mapping{$full_names[$i]};
1291 my $name = $id_to_name_mapping->{$id};
1292 my $extra = $id_to_extra_mapping->{$id} || "";
1293 $static_browser_macro_chunk .= "<td valign=\"top\"><a href=\"_gwcgi_?a=d&amp;d=$id\">" . &html_safe($name) . "</a>" . &html_safe($extra) . "</td>";
1294
1295 $static_browser_macro_chunk .= "<td valign=\"top\">";
1296 if (defined($full_names[$i+$half_point])) {
1297 $id = $full_name_to_id_mapping{$full_names[$i+$half_point]};
1298 $name = $id_to_name_mapping->{$id};
1299 $extra = $id_to_extra_mapping->{$id} || "";
1300 $static_browser_macro_chunk .= "<a href=\"_gwcgi_?a=d&amp;d=$id\">" . &html_safe($name) . "</a>" . &html_safe($extra);
1301 }
1302 $static_browser_macro_chunk .= "</td>";
1303
1304 $static_browser_macro_chunk .= "</tr>";
1305 }
1306
1307 return $static_browser_macro_chunk;
1308}
1309
1310
1311sub write_bilevel_static_browser_macrofile
1312{
1313 my $static_browser_package_root = shift(@_);
1314 my $category_to_ids_mapping = shift(@_);
1315 my $id_to_name_mapping = shift(@_);
1316 my $id_to_extra_mapping = shift(@_);
1317
1318 my $static_browser_macrofile_path = "$ENV{'GSDLHOME'}\\collect\\cic-hcap\\macros\\$static_browser_package_root.dm";
1319 print STDERR "Writing $static_browser_macrofile_path...\n";
1320 open(BROWSER_MACROFILE, ">$static_browser_macrofile_path") or die "Error: Could not write to $static_browser_macrofile_path.\n";
1321
1322 foreach my $category (keys(%{$category_to_ids_mapping})) {
1323 my $static_browser_package_name = $static_browser_package_root . $category;
1324 $static_browser_package_name =~ s/\W//g;
1325
1326 my %id_to_name_mapping_for_category = ();
1327 foreach my $id (@{$category_to_ids_mapping->{$category}}) {
1328 $id_to_name_mapping_for_category{$id} = $id_to_name_mapping->{$id};
1329 }
1330 &write_static_browser_macros($static_browser_package_name, \%id_to_name_mapping_for_category, $id_to_extra_mapping);
1331 }
1332
1333 close(BROWSER_MACROFILE);
1334}
1335
1336
1337sub html_safe
1338{
1339 my $text = shift(@_);
1340 $text =~ s/&/&amp;/g;
1341 $text =~ s/\'/&\#39;/g; # Apostrophes mess up Javascript on the Search by State page
1342 return $text;
1343}
1344
1345
1346sub write_state_browser_macrofile
1347{
1348 my $static_browser_package_name = shift(@_);
1349 my $state_to_ids_mapping = shift(@_);
1350 my $id_to_name_mapping = shift(@_);
1351
1352 my $static_browser_macrofile_path = "$ENV{'GSDLHOME'}\\collect\\cic-hcap\\macros\\$static_browser_package_name.dm";
1353 print STDERR "Writing $static_browser_macrofile_path...\n";
1354 open(BROWSER_MACROFILE, ">$static_browser_macrofile_path") or die "Error: Could not write to $static_browser_macrofile_path.\n";
1355 print BROWSER_MACROFILE "package $static_browser_package_name\n\n";
1356 print BROWSER_MACROFILE "_cicstaticbrowser_ {\n";
1357
1358 print BROWSER_MACROFILE "<table cellpadding=\"0\" cellspacing=\"0\" width=\"_pagewidth_\">\n";
1359 foreach my $state (sort(keys(%state_name_to_abbr_mapping))) {
1360 my $state_abbr = $state_name_to_abbr_mapping{$state};
1361 print BROWSER_MACROFILE "_cicstate" . $state_abbr . "_\n";
1362 }
1363 print BROWSER_MACROFILE "</table>\n";
1364 print BROWSER_MACROFILE "}\n";
1365
1366 foreach my $state (sort(keys(%state_name_to_abbr_mapping))) {
1367 my $state_sans_spaces = $state;
1368 $state_sans_spaces =~ s/ //g;
1369 my @state_ids = ();
1370 if (defined($state_to_ids_mapping->{$state})) {
1371 @state_ids = @{$state_to_ids_mapping->{$state}};
1372 }
1373 my $state_abbr = $state_name_to_abbr_mapping{$state};
1374
1375 my $state_static_browser_macro_chunk = &get_static_browser_macro_chunk($state, $state_sans_spaces, \@state_ids, $id_to_name_mapping, undef);
1376 if (!defined($state_to_ids_mapping->{$state})) {
1377 $state_static_browser_macro_chunk .= "<tr><td colspan=\"2\" valign=\"top\">No institutions for this state</td></tr>";
1378 }
1379
1380 # Write out the normal macro chunk
1381 print BROWSER_MACROFILE "\n_cicstate" . $state_abbr . "_ {";
1382 print BROWSER_MACROFILE $state_static_browser_macro_chunk;
1383 print BROWSER_MACROFILE "}\n";
1384
1385 # Write out a Javascript safe version
1386 print BROWSER_MACROFILE "\n_cicstate" . $state_abbr . "js_ {";
1387 print BROWSER_MACROFILE &javascript_safe($state_static_browser_macro_chunk);
1388 print BROWSER_MACROFILE "}\n";
1389 }
1390
1391 close(BROWSER_MACROFILE);
1392}
1393
1394
1395sub javascript_safe
1396{
1397 my $text = shift(@_);
1398 $text =~ s/<\//<\\\\\//g;
1399 return $text;
1400}
1401
1402
14031;
Note: See TracBrowser for help on using the repository browser.