########################################################################### # # CICPlug.pm # # Copyright (C) 2005 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### package CICPlug; use BasPlug; use DBI; use strict; no strict 'refs'; sub BEGIN { @CICPlug::ISA = ('BasPlug'); } my $arguments = [ { 'name' => "images_directory", 'type' => "string", 'deft' => "", 'reqd' => "yes" }, { 'name' => "cache_directory", 'type' => "string", 'deft' => &util::filename_cat($ENV{'GSDLHOME'}, "tmp"), 'reqd' => "no" }, { 'name' => "large_image_options", 'type' => "string", 'deft' => "", 'reqd' => "no" }, { 'name' => "large_image_type", 'type' => "string", 'deft' => "jpg", 'reqd' => "no" }, { 'name' => "large_image_width", 'type' => "string", 'deft' => "800", 'reqd' => "no" }, { 'name' => "medium_image_options", 'type' => "string", 'deft' => "", 'reqd' => "no" }, { 'name' => "medium_image_type", 'type' => "string", 'deft' => "jpg", 'reqd' => "no" }, { 'name' => "medium_image_width", 'type' => "string", 'deft' => "375", 'reqd' => "no" }, { 'name' => "small_image_options", 'type' => "string", 'deft' => "", 'reqd' => "no" }, { 'name' => "small_image_type", 'type' => "string", 'deft' => "jpg", 'reqd' => "no" }, { 'name' => "small_image_width", 'type' => "string", 'deft' => "225", 'reqd' => "no" } ]; my $options = { 'name' => "CICPlug", 'desc' => "{CICPlug.desc}", 'abstract' => "no", 'inherits' => "yes" }; sub get_default_process_exp { return q^(?i)\.mdb$^; } sub new { my ($class) = shift (@_); my ($pluginlist,$inputargs,$hashArgOptLists) = @_; push(@$pluginlist, $class); if (defined $arguments) { push(@{$hashArgOptLists->{"ArgList"}}, @{$arguments}); } if (defined $options) { push(@{$hashArgOptLists->{"OptList"}}, $options); } my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists); return bless $self, $class; } my $state_abbr_to_name_mapping = { "AL" => "Alabama", "AK" => "Alaska", "AZ" => "Arizona", "AR" => "Arkansas", "CA" => "California", "CO" => "Colorado", "CT" => "Connecticut", "DC" => "District of Columbia", "DE" => "Delaware", "FL" => "Florida", "GA" => "Georgia", "HI" => "Hawaii", "ID" => "Idaho", "IL" => "Illinois", "IN" => "Indiana", "IA" => "Iowa", "KS" => "Kansas", "KY" => "Kentucky", "LA" => "Louisiana", "ME" => "Maine", "MD" => "Maryland", "MA" => "Massachusetts", "MI" => "Michigan", "MN" => "Minnesota", "MS" => "Mississippi", "MO" => "Missouri", "MT" => "Montana", "NE" => "Nebraska", "NV" => "Nevada", "NH" => "New Hampshire", "NJ" => "New Jersey", "NM" => "New Mexico", "NY" => "New York", "NC" => "North Carolina", "ND" => "North Dakota", "OH" => "Ohio", "OK" => "Oklahoma", "OR" => "Oregon", "PA" => "Pennsylvania", "RI" => "Rhode Island", "SC" => "South Carolina", "SD" => "South Dakota", "TN" => "Tennessee", "TX" => "Texas", "UT" => "Utah", "VT" => "Vermont", "VA" => "Virginia", "WA" => "Washington", "WV" => "West Virginia", "WI" => "Wisconsin", "WY" => "Wyoming" }; my %state_name_to_abbr_mapping = reverse(%{$state_abbr_to_name_mapping}); my $state_abbr_to_area_mapping = { "AL" => "Southeast", "AK" => "West", "AZ" => "Southwest", "AR" => "Southeast", "CA" => "West", "CO" => "Mountain", "CT" => "Northeast", "DC" => "Northeast", "DE" => "Northeast", "FL" => "Southeast", "GA" => "Southeast", "HI" => "West", "ID" => "Mountain", "IL" => "Midwest", "IN" => "Midwest", "IA" => "Midwest", "KS" => "Midwest", "KY" => "Southeast", "LA" => "Southeast", "ME" => "Northeast", "MD" => "Northeast", "MA" => "Northeast", "MI" => "Midwest", "MN" => "Midwest", "MS" => "Southeast", "MO" => "Midwest", "MT" => "Mountain", "NE" => "Midwest", "NV" => "West", "NH" => "Northeast", "NJ" => "Northeast", "NM" => "Southwest", "NY" => "Northeast", "NC" => "Southeast", "ND" => "Midwest", "OH" => "Midwest", "OK" => "Southwest", "OR" => "West", "PA" => "Northeast", "RI" => "Northeast", "SC" => "Southeast", "SD" => "Midwest", "TN" => "Southeast", "TX" => "Southwest", "UT" => "Mountain", "VT" => "Northeast", "VA" => "Southeast", "WA" => "West", "WV" => "Southeast", "WI" => "Midwest", "WY" => "Mountain" }; my $place_type_id_to_name_mapping = { "1" => "Individual building", "2" => "Landscape site", "3" => "Campus arrangement", "4" => "Building group", }; # This array must match the values in the tblArchTypes table my @place_styles_array = ( "American colonial", "Federal", "Greek revival", "Italianate", "Gothic revival", "Romanesque revival", "Victorian", "Beaux-Arts classicism", "Colonial revival", "Mission/Mission revival", "Modern/pre-WWII", "Modern/post-WWII", "Postmodern", "Contemporary", "Regionalist/Vernacular", "Other" ); my $place_functions_mapping = { "academic department building" => "", "administration" => "", "admissions office" => "", "alumni center" => "", "arboretum" => "", "archaeological site" => "", "auditorium" => "", "bell tower" => "", "chapel" => "", "classrooms" => "", "debating society" => "", "dining hall" => "", "facility management building" => "", "faculty offices" => "", "gardens" => "", "greek letter society" => "", "gymnasium" => "", "infirmary" => "", "library" => "", "master plan (campus)" => "", "master plan (landscape)" => "", "memorial site" => "", "museum" => "", "observatory" => "", "old main" => "", "outdoor space" => "", "president's house" => "", "private residence" => "", "residence hall" => "", "stadium" => "", "student union" => "", "theater" => "", "other" => "", }; my %designer_name_to_id_mapping; my %designer_name_to_place_ids_mapping; my $place_reference_id = 1; sub read { my $self = shift (@_); my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; $self->{'filename'} = &util::filename_cat($base_dir, $file); if ($self->{'filename'} !~ /$self->{'process_exp'}/ || !-f $self->{'filename'}) { return undef; } $self->{'processor'} = $processor; $self->{'gli'} = $gli; # Open connection to Access database my $dbh = DBI->connect('dbi:ODBC:CIC-HCAP'); $self->process_institutions($dbh); $self->process_places($dbh); $self->process_designers($dbh); return 1; } sub process_institutions { my $self = shift(@_); my $dbh = shift(@_); my $fail_log_handle = $self->{'failhandle'}; # Prepare SQL statement for getting everything from the Institution table my $institution_sql_statement = "SELECT * FROM tblInstitution"; # WHERE Institution_ID<200"; my $institution_sql_handle = $dbh->prepare($institution_sql_statement); $institution_sql_handle->{LongReadLen} = 65536; $institution_sql_handle->execute() or die "Could not execute SQL statement."; # Prepare SQL statement for getting the Institution places my $institution_places_sql_statement = "SELECT Entry_ID,Current_name FROM tblPlace WHERE PlaceType>0 AND Institution_ID=?"; my $institution_places_sql_handle = $dbh->prepare($institution_places_sql_statement); $institution_places_sql_handle->{LongReadLen} = 65536; # Prepare SQL statement for getting the Institution best place image location my $institution_best_place_image_location_sql_statement = "SELECT Location,Entry_ID FROM tblImages WHERE FileType=1 AND FileName=?"; my $institution_best_place_image_location_sql_handle = $dbh->prepare($institution_best_place_image_location_sql_statement); $institution_best_place_image_location_sql_handle->{LongReadLen} = 65536; # Prepare SQL statement for getting the Institution places images (only used to check if an institution has some images) my $institution_places_images_sql_statement = "SELECT FileName FROM tblImages,tblPlace WHERE tblImages.FileType=1 AND tblImages.Entry_ID=tblPlace.Entry_ID AND tblPlace.Institution_ID=?"; my $institution_places_images_sql_handle = $dbh->prepare($institution_places_images_sql_statement); $institution_places_images_sql_handle->{LongReadLen} = 65536; # Prepare SQL statement for getting the Institution campus plans my $institution_campus_plans_sql_statement = "SELECT * FROM tblCampusMaps WHERE Electronic=1 AND Institution_ID=?"; my $institution_campus_plans_sql_handle = $dbh->prepare($institution_campus_plans_sql_statement); $institution_campus_plans_sql_handle->{LongReadLen} = 65536; # Create a document object for each institution my %institution_id_to_name_mapping; my %institution_state_to_ids_mapping; while (my $row_hashref = $institution_sql_handle->fetchrow_hashref) { # Skip any institutions that didn't respond next if !defined($row_hashref->{"City"}); my $institution_id = $row_hashref->{"Institution_ID"}; # print STDERR " Institution $institution_id\n"; my $institution_doc_obj = new doc($self->{'filename'} . "-", "indexed_doc"); $institution_doc_obj->set_OID("i$institution_id"); &new_metadata_entry($institution_doc_obj, "DocumentType", "Institution"); # For some reason the hyphen seems to be lost from the Zip field, so add it back in my $institution_zip = $row_hashref->{"Zip"}; if ($institution_zip =~ /^(\d\d\d\d\d)(\d\d\d\d)$/) { $row_hashref->{"Zip"} = $1 . "-" . $2; } # Fix up the links to the institution webpage if ($row_hashref->{"Institution_webpage"} =~ /\#(.*?)\#/) { $row_hashref->{"Institution_webpage"} = $1; } # Map state to full name $row_hashref->{"State"} = $state_abbr_to_name_mapping->{$row_hashref->{"State"}}; # Get the places in this institution my $institution_random_place_id; my $institution_places_list_html = ""; $institution_places_sql_handle->execute($institution_id) or die "Could not execute SQL statement."; while (my $institution_places_match_hashref = $institution_places_sql_handle->fetchrow_hashref) { my $institution_place_id = $institution_places_match_hashref->{"Entry_ID"}; my $institution_place_name = $institution_places_match_hashref->{"Current_name"}; $institution_places_list_html .= "$institution_place_name
\n"; } &new_metadata_entry($institution_doc_obj, "InstitutionPlacesListHTML", $institution_places_list_html); # Get the best place image for this institution my $institution_best_place_image_name = $row_hashref->{"Best_image"}; if (!defined($institution_best_place_image_name) || $institution_best_place_image_name eq "") { # Some institutions have no electronic images, and thus have no best image $institution_places_images_sql_handle->execute($institution_id) or die "Could not execute SQL statement."; if (defined($institution_places_images_sql_handle->fetchrow_hashref())) { print STDERR "\n" if ($self->{'gli'}); print STDERR "Error: Institution $institution_id -- No best image.\n"; print $fail_log_handle "Error: Institution $institution_id -- No best image.\n"; $self->{'num_not_processed'}++; next; } &new_metadata_entry($institution_doc_obj, "InstitutionBestPlaceImageHTML", ""); } else { # Get the file location of the best place image for this institution $institution_best_place_image_location_sql_handle->execute($institution_best_place_image_name) or die "Could not execute SQL statement."; my $institution_best_place_image_hashref = $institution_best_place_image_location_sql_handle->fetchrow_hashref(); my $institution_best_place_image_location = $institution_best_place_image_hashref->{"Location"}; if (!defined($institution_best_place_image_location) || $institution_best_place_image_location eq "") { print STDERR "\n" if ($self->{'gli'}); print STDERR "Error: Institution $institution_id -- Could not match best image $institution_best_place_image_name to a file.\n"; print $fail_log_handle "Error: Institution $institution_id -- Could not match best image $institution_best_place_image_name to a file.\n"; $self->{'num_not_processed'}++; next; } # PDFs are not allowed for institution best place images if ($institution_best_place_image_location =~ /.pdf$/i) { print STDERR "\n" if ($self->{'gli'}); print STDERR "Error: Institution $institution_id -- PDF not allowed for best image.\n"; print $fail_log_handle "Error: Institution $institution_id -- PDF not allowed for best image.\n"; $self->{'num_not_processed'}++; next; } my $institution_best_place_id = $institution_best_place_image_hashref->{"Entry_ID"}; my $institution_best_place_image_medium_file_href = $self->generate_place_image_variant($institution_doc_obj, $institution_best_place_image_location, "medium"); &new_metadata_entry($institution_doc_obj, "InstitutionBestPlaceImageHTML", "\"$institution_best_place_image_name\"
$institution_best_place_image_name
"); } # Get institution campus plans my $institution_campus_plans_list_html = ""; $institution_campus_plans_sql_handle->execute($institution_id) or die "Could not execute SQL statement."; while (my $institution_campus_plans_match_hashref = $institution_campus_plans_sql_handle->fetchrow_hashref) { my $institution_campus_plan_name = $institution_campus_plans_match_hashref->{"NameAndFormat"}; my $institution_campus_plan_image_location = $institution_campus_plans_match_hashref->{"Location_electronic"}; # Deal with PDF files separately: don't convert, just associate if ($institution_campus_plan_image_location =~ /\.pdf$/i) { # Convert the server location of the file into the local location of the file my $institution_campus_plan_pdf_file_path = $institution_campus_plan_image_location; $institution_campus_plan_pdf_file_path =~ s/^[A-Z]:/$self->{'images_directory'}/; if (-f $institution_campus_plan_pdf_file_path) { my $institution_campus_plan_pdf_file_name = $institution_campus_plan_name . ".pdf"; $institution_campus_plan_pdf_file_name =~ s/ /%20/g; my $institution_campus_plan_pdf_file_href = "_httpcollection_/index/assoc/[assocfilepath]/$institution_campus_plan_pdf_file_name"; $institution_campus_plans_list_html .= "$institution_campus_plan_name (PDF)
"; $institution_doc_obj->associate_file($institution_campus_plan_pdf_file_path, $institution_campus_plan_name . ".pdf", undef, $institution_doc_obj->get_top_section()); } else { print STDERR "\n" if ($self->{'gli'}); print STDERR "Error: File $institution_campus_plan_pdf_file_path does not exist.\n"; print $fail_log_handle "Error: File $institution_campus_plan_pdf_file_path does not exist.\n"; } } else { my $institution_campus_plan_image_large_file_href = $self->generate_place_image_variant($institution_doc_obj, $institution_campus_plan_image_location, "large"); # Create a new section for each institution campus plan image my $institution_campus_plan_image_section = $institution_doc_obj->insert_section($institution_doc_obj->get_end_child($institution_doc_obj->get_top_section())); $institution_doc_obj->add_utf8_text($institution_campus_plan_image_section, "_"); # This is necessary $institution_doc_obj->add_utf8_metadata($institution_campus_plan_image_section, "DocumentType", "Image"); $institution_doc_obj->add_utf8_metadata($institution_campus_plan_image_section, "Title", $institution_campus_plan_name); $institution_doc_obj->add_utf8_metadata($institution_campus_plan_image_section, "ImagePath", $institution_campus_plan_image_large_file_href); $institution_campus_plans_list_html .= "$institution_campus_plan_name
"; } } &new_metadata_entry($institution_doc_obj, "InstitutionCampusPlansListHTML", $institution_campus_plans_list_html); # Add each field from the table as metadata foreach my $key (keys(%$row_hashref)) { my $value = $row_hashref->{$key}; if (defined($value)) { &new_metadata_entry($institution_doc_obj, $key, $value); } } $institution_doc_obj->add_utf8_text($institution_doc_obj->get_top_section(), "Some dummy text."); $self->{'processor'}->process($institution_doc_obj); $self->{'num_processed'}++; # Build mappings for creating the static macrofiles my $institution_name = $row_hashref->{"Institution_Name"}; $institution_id_to_name_mapping{$institution_doc_obj->get_OID()} = $institution_name; my $institution_state = $row_hashref->{"State"}; push(@{$institution_state_to_ids_mapping{$institution_state}}, $institution_doc_obj->get_OID()); } # Write the institutions.dm macrofile &write_static_browser_macrofile("institutions", \%institution_id_to_name_mapping); # Write the states.dm macrofile &write_state_browser_macrofile("states", \%institution_state_to_ids_mapping, \%institution_id_to_name_mapping); } sub process_places { my $self = shift(@_); my $dbh = shift(@_); my $fail_log_handle = $self->{'failhandle'}; # Prepare SQL statement for getting everything from the Place table my $place_sql_statement = "SELECT * FROM tblPlace"; # WHERE Entry_ID<100"; my $place_sql_handle = $dbh->prepare($place_sql_statement); $place_sql_handle->{LongReadLen} = 65536; $place_sql_handle->execute() or die "Could not execute SQL statement."; # Prepare SQL statement for getting the Place institution my $place_institution_sql_statement = "SELECT Institution_Name FROM tblInstitution,tblPlace WHERE tblInstitution.Institution_ID=tblPlace.Institution_ID and tblPlace.Entry_ID=?"; my $place_institution_sql_handle = $dbh->prepare($place_institution_sql_statement); # Prepare SQL statement for getting the Place construction dates my $place_construction_dates_sql_statement = "SELECT Prefix,Date,Note,Architect_Name FROM tblConstruction_and_Dates WHERE Entry_ID=? ORDER BY Date"; my $place_construction_dates_sql_handle = $dbh->prepare($place_construction_dates_sql_statement); # Prepare SQL statement for getting the Place images my $place_images_sql_statement = "SELECT FileName,Location FROM tblImages WHERE FileType=1 AND Entry_ID=? ORDER BY Image_Order"; my $place_images_sql_handle = $dbh->prepare($place_images_sql_statement); $place_images_sql_handle->{LongReadLen} = 65536; # Prepare SQL statement for getting the Place materials my $place_materials_sql_statement = "SELECT * FROM tblDescription_building WHERE Entry_ID=?"; my $place_materials_sql_handle = $dbh->prepare($place_materials_sql_statement); $place_materials_sql_handle->{LongReadLen} = 65536; # Prepare SQL statement for getting the Place building styles my $place_styles_sql_statement = "SELECT ArchType_ID FROM ArchPlace WHERE Entry_ID=?"; my $place_styles_sql_handle = $dbh->prepare($place_styles_sql_statement); $place_styles_sql_handle->{LongReadLen} = 65536; # Prepare SQL statement for getting the Place functions my $place_functions_sql_statement = "SELECT Function,Year,Prefix FROM tblFunction_and_dates WHERE Entry_ID=? ORDER BY Year"; my $place_functions_sql_handle = $dbh->prepare($place_functions_sql_statement); $place_functions_sql_handle->{LongReadLen} = 65536; # Prepare SQL statement for getting the Place significance my $place_significance_sql_statement = "SELECT SigType FROM tblSigTypes,SigPlace WHERE tblSigTypes.SigTypes_ID=SigPlace.SigType_ID+1 AND SigPlace.Entry_ID=?"; my $place_significance_sql_handle = $dbh->prepare($place_significance_sql_statement); $place_significance_sql_handle->{LongReadLen} = 65536; # Prepare SQL statement for getting the Place references my $place_references_sql_statement = "SELECT Bibliography FROM tblReferences WHERE Entry_ID=?"; my $place_references_sql_handle = $dbh->prepare($place_references_sql_statement); $place_references_sql_handle->{LongReadLen} = 65536; # Prepare SQL statement for getting the Place designations my $place_designations_sql_statement = "SELECT National_Register,Federal_Agency,HABS,HAER,Local_Designation FROM tblReferences WHERE Entry_ID=?"; my $place_designations_sql_handle = $dbh->prepare($place_designations_sql_statement); $place_designations_sql_handle->{LongReadLen} = 65536; # Prepare SQL statement for getting the Place narrative my $place_narrative_sql_statement = "SELECT Narrative FROM tblSignificance_Narrative WHERE Entry_ID=?"; my $place_narrative_sql_handle = $dbh->prepare($place_narrative_sql_statement); $place_narrative_sql_handle->{LongReadLen} = 65536; # Prepare SQL statement for getting the Place state my $place_state_sql_statement = "SELECT State FROM tblInstitution,tblPlace WHERE tblInstitution.Institution_ID=tblPlace.Institution_ID AND Entry_ID=?"; my $place_state_sql_handle = $dbh->prepare($place_state_sql_statement); $place_state_sql_handle->{LongReadLen} = 65536; # Create a document object for each place my %place_type_to_ids_mapping; my %place_style_to_ids_mapping; my %place_date_to_ids_mapping; my %place_function_to_ids_mapping; my %place_id_to_name_mapping; my %place_id_to_institution_name_mapping; while (my $row_hashref = $place_sql_handle->fetchrow_hashref) { my $place_id = $row_hashref->{"Entry_ID"}; # print STDERR " Place $place_id\n"; my $place_doc_obj = new doc($self->{'filename'} . "-", "indexed_doc"); $place_doc_obj->set_OID("p$place_id"); &new_metadata_entry($place_doc_obj, "DocumentType", "Place"); # Convert the place type ID into a name $row_hashref->{"PlaceType"} = $place_type_id_to_name_mapping->{$row_hashref->{"PlaceType"}}; # Add each field from the table as metadata foreach my $key (keys(%$row_hashref)) { my $value = $row_hashref->{$key}; if (defined($value)) { &new_metadata_entry($place_doc_obj, $key, $value); } } # Get place name my $place_name = $row_hashref->{"Current_name"}; if (!defined($place_name)) { print STDERR "\n" if ($self->{'gli'}); print STDERR "Error: Place $place_id -- Missing place name.\n"; print $fail_log_handle "Error: Place $place_id -- Missing place name.\n"; $self->{'num_not_processed'}++; next; } # Get place type my $place_type = $row_hashref->{"PlaceType"}; if (!defined($place_type)) { print STDERR "\n" if ($self->{'gli'}); print STDERR "Error: Place $place_id -- Missing place type.\n"; print $fail_log_handle "Error: Place $place_id -- Missing place type.\n"; $self->{'num_not_processed'}++; next; } # Create place styles mapping $place_styles_sql_handle->execute($place_id) or die "Could not execute SQL statement."; while (my $place_styles_match_hashref = $place_styles_sql_handle->fetchrow_hashref()) { # The ArchType_ID is actually an index into the tblArchType table, NOT a link my $place_style_index = $place_styles_match_hashref->{"ArchType_ID"}; my $place_style = $place_styles_array[$place_style_index]; push(@{$place_style_to_ids_mapping{$place_style}}, $place_doc_obj->get_OID()); &new_metadata_entry($place_doc_obj, "Style", $place_style); } # Get place institution $place_institution_sql_handle->execute($place_id) or die "Could not execute SQL statement."; my $place_institution = $place_institution_sql_handle->fetchrow(); &new_metadata_entry($place_doc_obj, "Institution_name", $place_institution); $place_id_to_institution_name_mapping{$place_doc_obj->get_OID()} = ", " . $place_institution; # Get place state and area (for searching) $place_state_sql_handle->execute($place_id) or die "Could not execute SQL statement."; my $place_state_abbr = $place_state_sql_handle->fetchrow(); &new_metadata_entry($place_doc_obj, "State", $place_state_abbr . " " . $state_abbr_to_name_mapping->{$place_state_abbr} . " " . $state_abbr_to_area_mapping->{$place_state_abbr}); # Get place construction dates my $place_construction_dates_table_html = ""; $place_construction_dates_sql_handle->execute($place_id) or die "Could not execute SQL statement."; while (my $place_construction_dates_match_hashref = $place_construction_dates_sql_handle->fetchrow_hashref()) { my $place_construction_date = $place_construction_dates_match_hashref->{"Date"}; if (!defined($place_construction_date)) { # Landscape sites are allowed to have no construction information next if ($place_type eq "Landscape site"); print STDERR "\n" if ($self->{'gli'}); print STDERR "Error: Place $place_id -- Missing construction date.\n"; print $fail_log_handle "Error: Place $place_id -- Missing construction date.\n"; next; } # Convert the construction date to a time period (for searching and browsing) if ($place_construction_date =~ /^(\d{1,4}).*$/) { my $place_construction_year = $1; my $place_time_period; if ($place_construction_year < 1800) { $place_time_period = "pre-1800"; } elsif ($place_construction_year < 1850) { $place_time_period = "1800-1850"; } elsif ($place_construction_year < 1900) { $place_time_period = "1850-1900"; } elsif ($place_construction_year < 1945) { $place_time_period = "1900-1945"; } elsif ($place_construction_year <= 1995) { $place_time_period = "1945-1995"; } elsif ($place_construction_year > 1995) { $place_time_period = "post-1995"; } push(@{$place_date_to_ids_mapping{$place_time_period}}, $place_doc_obj->get_OID()); &new_metadata_entry($place_doc_obj, "Time_period", $place_time_period); } elsif ($place_construction_date ne "n.d." && $place_construction_date ne "unknown" && $place_construction_date ne "various") { print STDERR "\n" if ($self->{'gli'}); print STDERR "Warning: Place $place_id -- Unknown construction date: $place_construction_date.\n"; print $fail_log_handle "Warning: Place $place_id -- Unknown construction date: $place_construction_date.\n"; } my $place_construction_note = $place_construction_dates_match_hashref->{"Note"}; if (!defined($place_construction_note)) { # "No date" entries are allowed to have no construction note next if ($place_construction_date eq "n.d"); print STDERR "\n" if ($self->{'gli'}); print STDERR "Error: Place $place_id -- Missing construction note.\n"; print $fail_log_handle "Error: Place $place_id -- Missing construction note.\n"; next; } # Get the architects for this construction and remember them for later for the designer objects my $place_construction_architect = $place_construction_dates_match_hashref->{"Architect_Name"} || ""; my $place_construction_architect_links = ""; foreach my $designer_name (split(/;/, $place_construction_architect)) { $designer_name =~ s/\(.*?\)//g; $designer_name =~ s/^\s*//; $designer_name =~ s/\s*$//; my $designer_id = $designer_name_to_id_mapping{$designer_name}; if (!defined($designer_id)) { $designer_id = scalar(keys(%designer_name_to_id_mapping)) + 1; $designer_name_to_id_mapping{$designer_name} = $designer_id; } $place_construction_architect_links .= "$designer_name "; push(@{$designer_name_to_place_ids_mapping{$designer_name}}, $place_id); } my $place_construction_date_prefix = $place_construction_dates_match_hashref->{"Prefix"} || ""; $place_construction_dates_table_html .= "$place_construction_date_prefix $place_construction_date$place_construction_note $place_construction_architect_links"; } &new_metadata_entry($place_doc_obj, "PlaceConstructionDatesTableHTML", "" . $place_construction_dates_table_html . "
"); # Get place materials (individual buildings only) if ($row_hashref->{"PlaceType"} eq "Individual building") { $place_materials_sql_handle->execute($place_id) or die "Could not execute SQL statement."; my $place_materials_match_hashref = $place_materials_sql_handle->fetchrow_hashref(); &new_metadata_entry($place_doc_obj, "MaterialFoundation", $place_materials_match_hashref->{"foundation"} || ""); &new_metadata_entry($place_doc_obj, "MaterialRoof", $place_materials_match_hashref->{"roof"} || ""); &new_metadata_entry($place_doc_obj, "MaterialWalls", $place_materials_match_hashref->{"walls"} || ""); &new_metadata_entry($place_doc_obj, "Materials", $place_materials_match_hashref->{"foundation"} || ""); &new_metadata_entry($place_doc_obj, "Materials", $place_materials_match_hashref->{"roof"} || ""); &new_metadata_entry($place_doc_obj, "Materials", $place_materials_match_hashref->{"walls"} || ""); } # Get place functions my $place_functions = ""; my $place_functions_table_html = ""; $place_functions_sql_handle->execute($place_id) or die "Could not execute SQL statement."; while (my $place_functions_match_hashref = $place_functions_sql_handle->fetchrow_hashref()) { my $place_function = $place_functions_match_hashref->{"Function"}; if (!defined($place_function)) { print STDERR "\n" if ($self->{'gli'}); print STDERR "Error: Place $place_id -- Missing function.\n"; print $fail_log_handle "Error: Place $place_id -- Missing function.\n"; next; } # Check for multiline values (these are errors) if ($place_function =~ /\n/) { print STDERR "\n" if ($self->{'gli'}); print STDERR "Error: Place $place_id -- Multiline function.\n"; print $fail_log_handle "Error: Place $place_id -- Multiline function.\n"; next; } my $place_year = $place_functions_match_hashref->{"Year"}; if (!defined($place_year)) { print STDERR "\n" if ($self->{'gli'}); print STDERR "Error: Place $place_id -- Missing function year.\n"; print $fail_log_handle "Error: Place $place_id -- Missing function year.\n"; next; } my $place_year_prefix = $place_functions_match_hashref->{"Prefix"} || ""; $place_functions_table_html .= "$place_year_prefix $place_year$place_function"; # Prepare function metadata for browsing and searching my $place_function_to_index = lc($place_function); # Casefold $place_function_to_index =~ s/^\s*//; # Remove whitespace from the start if ($place_function_to_index =~ /^master plan/) { $place_function_to_index =~ s/ \(campus,.*/ \(campus\)/; $place_function_to_index =~ s/ \(campus:.*/ \(campus\)/; $place_function_to_index =~ s/ \(landscape,.*/ \(landscape\)/; } else { $place_function_to_index =~ s/\(.*\)//g; # Remove anything in parentheses } $place_function_to_index =~ s/\s*$//; # Remove whitespace from the end # Deal with common plural cases $place_function_to_index =~ s/classroom$/classrooms/; $place_function_to_index =~ s/department buildings$/department building/; $place_function_to_index =~ s/faculty office$/faculty offices/; $place_function_to_index =~ s/garden$/gardens/; $place_function_to_index =~ s/residence halls$/residence hall/; $place_function_to_index =~ s/private residences$/private residence/; # Check it is one of the valid function values if (!defined($place_functions_mapping->{$place_function_to_index})) { print STDERR "\n" if ($self->{'gli'}); print STDERR "Warning: Place $place_id -- Unknown function: $place_function_to_index.\n"; print $fail_log_handle "Warning: Place $place_id -- Unknown function: $place_function_to_index.\n"; next; } push(@{$place_function_to_ids_mapping{$place_function_to_index}}, $place_doc_obj->get_OID()); $place_functions .= "$place_function_to_index "; } &new_metadata_entry($place_doc_obj, "Functions", $place_functions); &new_metadata_entry($place_doc_obj, "PlaceFunctionsTableHTML", "" . $place_functions_table_html . "
"); # Get place significance $place_significance_sql_handle->execute($place_id) or die "Could not execute SQL statement."; while (my $place_significance_match_hashref = $place_significance_sql_handle->fetchrow_hashref()) { my $place_significance = $place_significance_match_hashref->{"SigType"}; &new_metadata_entry($place_doc_obj, "Significance", lc($place_significance)); } # Get place references $place_references_sql_handle->execute($place_id) or die "Could not execute SQL statement."; my $place_references = $place_references_sql_handle->fetchrow(); if (defined($place_references)) { $self->process_place_references($place_doc_obj, $place_id, $place_name, $place_institution, $place_references); } # Get place designations $place_designations_sql_handle->execute($place_id) or die "Could not execute SQL statement."; my $place_designations_match_hashref = $place_designations_sql_handle->fetchrow_hashref(); if ($place_designations_match_hashref->{"National_Register"} eq "1") { &new_metadata_entry($place_doc_obj, "Designation", "National Register"); } if ($place_designations_match_hashref->{"Federal_Agency"} eq "1") { &new_metadata_entry($place_doc_obj, "Designation", "National Historic Landmark"); } if ($place_designations_match_hashref->{"HABS"} eq "1" || $place_designations_match_hashref->{"HAER"} eq "1") { &new_metadata_entry($place_doc_obj, "Designation", "HABS/HAER"); } if ($place_designations_match_hashref->{"Local_Designation"} eq "1") { &new_metadata_entry($place_doc_obj, "Designation", "Local/State"); } # Get place narrative $place_narrative_sql_handle->execute($place_id) or die "Could not execute SQL statement."; my $place_narrative = $place_narrative_sql_handle->fetchrow(); if (defined($place_narrative)) { my $place_narrative_html = &rtf_to_html($place_narrative); $place_narrative_html =~ s/(
(\s|\n)*)*$//; # Remove any trailing
tags &new_metadata_entry($place_doc_obj, "PlaceNarrativeHTML", $place_narrative_html); } # Get place images my $place_images_html = ""; $place_images_sql_handle->execute($place_id) or die "Could not execute SQL statement."; while (my $place_images_match_hashref = $place_images_sql_handle->fetchrow_hashref) { my $place_image_location = $place_images_match_hashref->{"Location"}; my $place_image_name = $place_images_match_hashref->{"FileName"}; # Deal with PDF files separately: don't convert, just associate if ($place_image_location =~ /\.pdf$/i) { # Convert the server location of the PDF file into the local location of the file my $place_pdf_file_path = $place_image_location; $place_pdf_file_path =~ s/^[A-Z]:/$self->{'images_directory'}/; if (-f $place_pdf_file_path) { my ($place_pdf_file_name) = ($place_pdf_file_path =~ /^.+\\(.+)$/); $place_doc_obj->associate_file($place_pdf_file_path, $place_pdf_file_name, undef, $place_doc_obj->get_top_section()); $place_pdf_file_name =~ s/ /%20/g; my $place_pdf_file_href = "_httpcollection_/index/assoc/[assocfilepath]/$place_pdf_file_name"; $place_images_html .= "\n"; } else { print STDERR "\n" if ($self->{'gli'}); print STDERR "Error: File $place_pdf_file_path does not exist.\n"; print $fail_log_handle "Error: File $place_pdf_file_path does not exist.\n"; } } else { my $place_image_small_file_href = $self->generate_place_image_variant($place_doc_obj, $place_image_location, "small"); my $place_image_large_file_href = $self->generate_place_image_variant($place_doc_obj, $place_image_location, "large"); # Create a new section for each place image my $place_image_section = $place_doc_obj->insert_section($place_doc_obj->get_end_child($place_doc_obj->get_top_section())); $place_doc_obj->add_utf8_text($place_image_section, "_"); # This is necessary $place_doc_obj->add_utf8_metadata($place_image_section, "DocumentType", "Image"); $place_doc_obj->add_utf8_metadata($place_image_section, "Title", $place_image_name); $place_doc_obj->add_utf8_metadata($place_image_section, "ImagePath", $place_image_large_file_href); $place_images_html .= "\n"; } } &new_metadata_entry($place_doc_obj, "PlaceImagesHTML", $place_images_html); $place_doc_obj->add_utf8_text($place_doc_obj->get_top_section(), "Some dummy text."); $self->{'processor'}->process($place_doc_obj); $self->{'num_processed'}++; # Build mappings for creating the static macrofiles $place_id_to_name_mapping{$place_doc_obj->get_OID()} = $place_name; push(@{$place_type_to_ids_mapping{$place_type}}, $place_doc_obj->get_OID()); } &write_bilevel_static_browser_macrofile("types", \%place_type_to_ids_mapping, \%place_id_to_name_mapping, \%place_id_to_institution_name_mapping); &write_bilevel_static_browser_macrofile("styles", \%place_style_to_ids_mapping, \%place_id_to_name_mapping, \%place_id_to_institution_name_mapping); &write_bilevel_static_browser_macrofile("dates", \%place_date_to_ids_mapping, \%place_id_to_name_mapping, \%place_id_to_institution_name_mapping); &write_bilevel_static_browser_macrofile("functions", \%place_function_to_ids_mapping, \%place_id_to_name_mapping, \%place_id_to_institution_name_mapping); } sub process_place_references { my $self = shift(@_); my $place_doc_obj = shift(@_); my $place_id = shift(@_); my $place_name = shift(@_); my $place_institution_name = shift(@_); my $place_references_rtf_string = shift(@_); my $fail_log_handle = $self->{'failhandle'}; # Convert the place references from RTF to HTML my $place_references_html_raw = &rtf_to_html($place_references_rtf_string); if ($place_references_html_raw =~ /[ <](http|www\.)/) { print STDERR "\n" if ($self->{'gli'}); print STDERR "Warning: Place $place_id -- Found possible web address in references without hyperlink tag.\n"; print $fail_log_handle "Warning: Place $place_id -- Found possible web address in references without hyperlink tag.\n"; } # Split the references and try to parse title and author my $place_references_html = ""; $place_references_html_raw =~ s/(\r|\n)//g; # Remove all newlines $place_references_html_raw =~ s/

/

/g; # Move italic tags my @place_references = split(/
\s*
/, $place_references_html_raw); foreach my $place_reference (@place_references) { $place_reference =~ s/^(
\s*)*//; $place_reference =~ s/(
\s*)*$//; next if ($place_reference !~ /\w/); $place_references_html .= "

" . $place_reference . "

\n"; my $place_reference_author = ""; my $place_reference_title = ""; # Case 0: A magic word in the first sentence my $place_reference_first_sentence = $place_reference; if ($place_reference =~ /^(.*?)\./) { $place_reference_first_sentence = $1; } if ($place_reference_first_sentence =~ /\b(collection|collections|papers|archives|database|letter|memo|inventory|photographs|minutes|reports|records)\b/i) { # Don't bother trying to parse the reference } # Case 1: Author (possibly empty), then title in italics or quotes elsif ($place_reference =~ /^(.*?)(.*?)<\/i>/ || $place_reference =~ /^(.*)"(.*?)"/) { $place_reference_author = $1; $place_reference_title = $2; } # Case 2: Zero or one fullstops, assume no author and title is complete text elsif ($place_reference =~ /^[^\.]*\.[^\.]*$/ || $place_reference !~ /\./) { $place_reference_title = $place_reference; } else { print STDERR "\n" if ($self->{'gli'}); # print STDERR "Warning: Place $place_id -- Could not parse reference: $place_reference\n"; print $fail_log_handle "Warning: Place $place_id -- Could not parse reference: $place_reference\n"; next; } # Create a new Reference document for this place reference my $place_reference_doc_obj = new doc($self->{'file'} . "-", "indexed_doc"); $place_reference_doc_obj->set_OID("pr$place_reference_id"); &new_metadata_entry($place_reference_doc_obj, "DocumentType", "PlaceReference"); &new_metadata_entry($place_reference_doc_obj, "PlaceID", $place_id); &new_metadata_entry($place_reference_doc_obj, "PlaceName", $place_name); &new_metadata_entry($place_reference_doc_obj, "PlaceInstitutionName", $place_institution_name); &new_metadata_entry($place_reference_doc_obj, "Reference", $place_reference); &new_metadata_entry($place_reference_doc_obj, "ReferenceAuthor", $place_reference_author); &new_metadata_entry($place_reference_doc_obj, "ReferenceTitle", $place_reference_title); $place_reference_doc_obj->add_utf8_text($place_reference_doc_obj->get_top_section(), "Some dummy text."); $self->{'processor'}->process($place_reference_doc_obj); $self->{'num_processed'}++; $place_reference_id++; } &new_metadata_entry($place_doc_obj, "PlaceReferencesHTML", $place_references_html); } sub process_designers { my $self = shift(@_); my $dbh = shift(@_); my $fail_log_handle = $self->{'failhandle'}; # Prepare SQL statement for getting the Place name my $place_name_sql_statement = "SELECT Current_name FROM tblPlace WHERE Entry_ID=?"; my $place_name_sql_handle = $dbh->prepare($place_name_sql_statement); # Prepare SQL statement for getting the Place institution my $place_institution_sql_statement = "SELECT Institution_Name FROM tblInstitution,tblPlace WHERE tblInstitution.Institution_ID=tblPlace.Institution_ID and tblPlace.Entry_ID=?"; my $place_institution_sql_handle = $dbh->prepare($place_institution_sql_statement); # Prepare SQL statement for getting the Place "date of construction" my $place_construction_date_sql_statement = "SELECT Date FROM tblConstruction_and_Dates WHERE Entry_ID=?"; my $place_construction_date_sql_handle = $dbh->prepare($place_construction_date_sql_statement); # Prepare SQL statement for getting the Place non-PDF images my $place_images_sql_statement = "SELECT Location,FileName FROM tblImages WHERE FileType=1 AND Location NOT LIKE '%.pdf' AND Entry_ID=? ORDER BY Image_Order"; my $place_images_sql_handle = $dbh->prepare($place_images_sql_statement); $place_images_sql_handle->{LongReadLen} = 65536; # Create a document object for each designer my %designer_id_to_name_mapping; foreach my $designer_name (keys %designer_name_to_id_mapping) { my $designer_id = $designer_name_to_id_mapping{$designer_name}; # print STDERR " Designer $designer_id\n"; my $designer_doc_obj = new doc($self->{'filename'} . "-", "indexed_doc"); $designer_doc_obj->set_OID("d$designer_id"); &new_metadata_entry($designer_doc_obj, "DocumentType", "Designer"); &new_metadata_entry($designer_doc_obj, "Designer_name", $designer_name); # Get designer places my $designer_places_list_html = ""; my $last_designer_place_id = ""; foreach my $designer_place_id (sort(@{$designer_name_to_place_ids_mapping{$designer_name}})) { # The designer may have worked on a place multiple times, so check for this next if ($designer_place_id eq $last_designer_place_id); $last_designer_place_id = $designer_place_id; # Get place name $place_name_sql_handle->execute($designer_place_id) or die "Could not execute SQL statement."; my $designer_place_name = $place_name_sql_handle->fetchrow(); # Get place institution name $place_institution_sql_handle->execute($designer_place_id) or die "Could not execute SQL statement."; my $designer_place_institution_name = $place_institution_sql_handle->fetchrow(); # Get place date of construction $place_construction_date_sql_handle->execute($designer_place_id) or die "Could not execute SQL statement."; my $designer_place_construction_date_value = $place_construction_date_sql_handle->fetchrow() || ""; # Get the first non-PDF image for this place my $designer_place_image_small_file_href; $place_images_sql_handle->execute($designer_place_id) or die "Could not execute SQL statement."; my $designer_place_images_match_hashref = $place_images_sql_handle->fetchrow_hashref(); my $designer_place_image_location = $designer_place_images_match_hashref->{"Location"}; my $designer_place_image_name = $designer_place_images_match_hashref->{"FileName"} || "No image"; if (defined($designer_place_image_location)) { $designer_place_image_small_file_href = $self->generate_place_image_variant($designer_doc_obj, $designer_place_image_location, "small"); } else { # There is no non-PDF image for this place $designer_place_image_small_file_href = "_httpcollection_/images/no_image-small.jpg"; } $designer_places_list_html .= "\"$designer_place_image_name\"$designer_place_name, $designer_place_institution_name
Date of construction: $designer_place_construction_date_value\n"; } &new_metadata_entry($designer_doc_obj, "DesignerPlacesListHTML", "" . $designer_places_list_html . "
"); $designer_doc_obj->add_utf8_text($designer_doc_obj->get_top_section(), "Some dummy text."); $self->{'processor'}->process($designer_doc_obj); $self->{'num_processed'}++; $designer_id_to_name_mapping{$designer_doc_obj->get_OID()} = $designer_name; } # Write the designers.dm macrofile &write_static_browser_macrofile("designers", \%designer_id_to_name_mapping); } sub new_metadata_entry { my ($doc_obj, $metadata_name, $metadata_value) = (@_); # Don't bother with empty metadata return if ($metadata_value eq ""); # Spaces aren't allowed in metadata names $metadata_name =~ s/ /_/g; # Anything from the database is ISO 8859-1 encoded, so convert to UTF-8 $metadata_value = &unicode::ascii2utf8(\$metadata_value); # Escape any '[' and ']' characters so Greenstone doesn't try to treat the text as metadata... $metadata_value =~ s/\[/&\#91;/g; $metadata_value =~ s/\]/&\#93;/g; # ...but don't mess up real metadata references! $metadata_value =~ s/&\#91;assocfilepath&\#93;/\[assocfilepath\]/g; $metadata_value =~ s/&\#91;parent\(Top\)\:assocfilepath&\#93;/\[parent(Top):assocfilepath\]/g; # Escape any '&' characters so the metadata is HTML 4 compliant when displayed $metadata_value =~ s/&([^\#])/&$1/g; $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), $metadata_name, $metadata_value); } sub rtf_to_html { my $rtf_string = shift(@_); # Remove everything in curly braces, but keep any hyperlinks while ($rtf_string =~ /\{(.*?)\}/) { if ($1 =~ /HYPERLINK (.*)/) { my $link_url = $1; $link_url =~ s/^\"(.*?)\"$/$1/; # Remove surrounding quotes $link_url =~ s/^\s*(.*?)\s*$/$1/; # Remove surrounding whitespace $rtf_string =~ s/\{(.*?)\}/$link_url<\/a>/; } else { $rtf_string =~ s/\{(.*?)\}//; } } $rtf_string =~ s/\\ldblquote\s/\"/g; $rtf_string =~ s/\\ldblquote\b/\"/g; $rtf_string =~ s/\\rdblquote\s/\"/g; $rtf_string =~ s/\\rdblquote\b/\"/g; $rtf_string =~ s/\\lquote\s/\'/g; $rtf_string =~ s/\\lquote\b/\'/g; $rtf_string =~ s/\\rquote\s/\'/g; $rtf_string =~ s/\\rquote\b/\'/g; $rtf_string =~ s/\\pard//g; $rtf_string =~ s/\\par/
/g; $rtf_string =~ s/\\ul\s//g; $rtf_string =~ s/\\ul\b//g; $rtf_string =~ s/\\ulnone\s/<\/i>/g; $rtf_string =~ s/\\ulnone\b/<\/i>/g; $rtf_string =~ s/\\i\s//g; $rtf_string =~ s/\\i\b//g; $rtf_string =~ s/\\i0\s/<\/i>/g; $rtf_string =~ s/\\i0\b/<\/i>/g; $rtf_string =~ s/\\super //g; $rtf_string =~ s/\\nosupersub //g; $rtf_string =~ s/\\~/ /g; $rtf_string =~ s/\\([A-Za-z0-9\-]+)//g; $rtf_string =~ s/\}//g; # Assume non-ASCII is ISO 8859-1, and convert into HTML entities while ($rtf_string =~ /\\'([a-z0-9][a-z0-9])/) { my $dec = hex($1); $rtf_string =~ s/\\'$1/&#$dec\;/; } # Remove extra less-than and greater-than symbols $rtf_string =~ s/< >/>/g; $rtf_string =~ s/>(>+)/>/g; # Remove empty tags for HTML 4 compliance $rtf_string =~ s/\s*<\/i>/ /g; return $rtf_string; } sub get_place_image_dimensions { my $self = shift(@_); my $place_image_file_path = shift(@_); my $fail_log_handle = $self->{'failhandle'}; # Make sure the place image file actually exists if (!-f $place_image_file_path) { print STDERR "\n" if ($self->{'gli'}); print STDERR "Error: Image $place_image_file_path does not exist.\n"; print $fail_log_handle "Error: Image $place_image_file_path does not exist.\n"; return; } my $place_image_file_date = (stat($place_image_file_path))[9]; # Check if this place image has already been identified by looking for a ".info" file in the same directory my $place_image_info_file_path = $place_image_file_path . ".info"; if (-f $place_image_info_file_path) { # A ".info" file exists, so read the cached place image information from it open(PLACE_IMAGE_INFO_FILE, "<$place_image_info_file_path"); my @place_image_info = ; close(PLACE_IMAGE_INFO_FILE); # Read the cached place image file date and check that it matches my $cached_place_image_file_date = $place_image_info[0]; $cached_place_image_file_date =~ s/\n$//; if ($cached_place_image_file_date == $place_image_file_date) { # It does match, so use the cached information from the ".info" file instead of re-identifying the file my $place_image_width = $place_image_info[1]; $place_image_width =~ s/\n$//; my $place_image_height = $place_image_info[2]; $place_image_height =~ s/\n$//; return ($place_image_width, $place_image_height); } } # We haven't already identified the place image, so do it now print STDERR "Identifying place image $place_image_file_path...\n"; my $identify_command = "identify -format \"%w %h\" \"$place_image_file_path\""; my $identify_result = `$identify_command`; print "Identify result: $identify_result\n" if ($self->{'verbosity'} > 2); # Check that the output is what we're expecting if ($identify_result !~ /(\d+) (\d+)/) { print STDERR "\n" if ($self->{'gli'}); print STDERR "Error: Place image $place_image_file_path could not be identified.\n"; print $fail_log_handle "Error: Place image $place_image_file_path could not be identified.\n"; return; } # Parse the place image width and height from the output my $place_image_width = $1; my $place_image_height = $2; # Write the place image info file so we don't have to identify this exact image again in the future open(PLACE_IMAGE_INFO_FILE, ">$place_image_info_file_path"); print PLACE_IMAGE_INFO_FILE "$place_image_file_date\n"; print PLACE_IMAGE_INFO_FILE "$place_image_width\n"; print PLACE_IMAGE_INFO_FILE "$place_image_height\n"; close(PLACE_IMAGE_INFO_FILE); return ($place_image_width, $place_image_height); } sub generate_place_image_variant { my $self = shift(@_); my ($doc_obj, $place_image_location, $place_image_variant_size) = (@_); my $fail_log_handle = $self->{'failhandle'}; # Convert the server location of the file into the local location of the file my $place_image_file_path = $place_image_location; $place_image_file_path =~ s/^[A-Z]:/$self->{'images_directory'}/; # Get the width and height of the place image my ($place_image_width, $place_image_height) = $self->get_place_image_dimensions($place_image_file_path); if (!defined($place_image_width) || !defined($place_image_height)) { # An error has occurred (error message generated by get_place_image_dimensions()) return; } my $place_image_file_date = (stat($place_image_file_path))[9]; # Generate the path of the place image variant (in the cache directory) my $place_image_variant_file_suffix = "-$place_image_variant_size." . $self->{$place_image_variant_size . '_image_type'}; my $place_image_variant_file_path = $place_image_location; $place_image_variant_file_path =~ s/^[A-Z]:/$self->{'cache_directory'}/; $place_image_variant_file_path =~ s/^(.+)(\..*)/$1$place_image_variant_file_suffix/; my ($place_image_variant_file_name) = ($place_image_variant_file_path =~ /^.+\\(.+)$/); # Only scale down the place image if it is bigger than the desired width my $place_image_variant_desired_width = $self->{$place_image_variant_size . '_image_width'}; if ($place_image_width > $place_image_variant_desired_width) { # Only generate the place image variant if it doesn't already exist, or if the place image is newer if (!-f $place_image_variant_file_path || $place_image_file_date > (stat($place_image_variant_file_path))[9]) { print STDERR "Generating place image variant $place_image_variant_file_path...\n"; my ($place_image_variant_directory) = ($place_image_variant_file_path =~ /^(.+)\\.+$/); &util::mk_all_dir($place_image_variant_directory); my $place_image_variant_options = "-scale $place_image_variant_desired_width " . $self->{$place_image_variant_size . '_image_options'}; my $convert_command = "convert $place_image_variant_options \"$place_image_file_path\" \"$place_image_variant_file_path\""; my $convert_result = `$convert_command`; } } else { # The desired width is bigger than the place image, so we just use the original $place_image_variant_file_path = $place_image_file_path; } my ($place_image_variant_width, $place_image_variant_height) = $self->get_place_image_dimensions($place_image_variant_file_path); if (!defined($place_image_variant_width) || !defined($place_image_variant_height)) { # An error has occurred (error message generated by get_place_image_dimensions()) return; } # Associate the place image variant file $doc_obj->associate_file($place_image_variant_file_path, $place_image_variant_file_name, undef, $doc_obj->get_top_section()); # Add various bits of metadata for the place image variant my $place_image_variant_href = "_httpcollection_/index/assoc/{Or}{[parent(Top):assocfilepath],[assocfilepath]}/" . $place_image_variant_file_name; $place_image_variant_href =~ s/ /%20/g; return $place_image_variant_href; } sub write_static_browser_macrofile { my $static_browser_package_name = shift(@_); my $id_to_name_mapping = shift(@_); my $static_browser_macrofile_path = "$ENV{'GSDLHOME'}\\collect\\cic-hcap\\macros\\$static_browser_package_name.dm"; print STDERR "Writing $static_browser_macrofile_path...\n"; open(BROWSER_MACROFILE, ">$static_browser_macrofile_path") or die "Error: Could not write to $static_browser_macrofile_path.\n"; &write_static_browser_macros($static_browser_package_name, $id_to_name_mapping); close(BROWSER_MACROFILE); } sub write_static_browser_macros { my $static_browser_package_name = shift(@_); my $id_to_name_mapping = shift(@_); my $id_to_extra_mapping = shift(@_); print BROWSER_MACROFILE "package $static_browser_package_name\n\n"; print BROWSER_MACROFILE "_cicstaticbrowserquicklinks_ {\n"; my %letter_to_ids_mapping; foreach my $id (keys %$id_to_name_mapping) { my $name = $id_to_name_mapping->{$id}; my ($letter) = ($name =~ /([A-Za-z0-9])/); push(@{$letter_to_ids_mapping{$letter}}, $id); } print BROWSER_MACROFILE ""; foreach my $letter (split(//, "ABCDEFGHIJKLMNOPQRSTUVWXYZ")) { if (defined($letter_to_ids_mapping{$letter})) { print BROWSER_MACROFILE "
$letter "; } else { print BROWSER_MACROFILE "$letter "; } } print BROWSER_MACROFILE "\n"; print BROWSER_MACROFILE "}\n\n"; print BROWSER_MACROFILE "_cicstaticbrowser_ {\n"; print BROWSER_MACROFILE "\n"; foreach my $letter (sort(keys %letter_to_ids_mapping)) { my @letter_ids = @{$letter_to_ids_mapping{$letter}}; my $anchor_name; if ($letter =~ /^[A-Z]$/) { $anchor_name = $letter; } print BROWSER_MACROFILE &get_static_browser_macro_chunk($letter, $anchor_name, \@letter_ids, $id_to_name_mapping, $id_to_extra_mapping); } print BROWSER_MACROFILE "
\n"; print BROWSER_MACROFILE "}\n"; } sub get_static_browser_macro_chunk { my $chunk_title = shift(@_); my $anchor_name = shift(@_); my $chunk_ids_ref = shift(@_); my $id_to_name_mapping = shift(@_); my $id_to_extra_mapping = shift(@_); my $static_browser_macro_chunk = "
"; if (defined($anchor_name) && $anchor_name ne "") { $static_browser_macro_chunk .= ""; } $static_browser_macro_chunk .= "$chunk_title"; my %full_name_to_id_mapping; foreach my $id (@{$chunk_ids_ref}) { my $full_name = $id_to_name_mapping->{$id}; if (defined($id_to_extra_mapping)) { $full_name .= " " . $id_to_extra_mapping->{$id}; } $full_name_to_id_mapping{$full_name} = $id; } my @full_names = sort { lc($a) cmp lc($b) } (keys(%full_name_to_id_mapping)); my $half_point = ((scalar(@full_names) % 2 == 0) ? scalar(@full_names) / 2 : (scalar(@full_names) + 1) / 2); for (my $i = 0; $i < $half_point; $i++) { $static_browser_macro_chunk .= ""; my $id = $full_name_to_id_mapping{$full_names[$i]}; my $name = $id_to_name_mapping->{$id}; my $extra = $id_to_extra_mapping->{$id} || ""; $static_browser_macro_chunk .= "" . &html_safe($name) . "" . &html_safe($extra) . ""; $static_browser_macro_chunk .= ""; if (defined($full_names[$i+$half_point])) { $id = $full_name_to_id_mapping{$full_names[$i+$half_point]}; $name = $id_to_name_mapping->{$id}; $extra = $id_to_extra_mapping->{$id} || ""; $static_browser_macro_chunk .= "" . &html_safe($name) . "" . &html_safe($extra); } $static_browser_macro_chunk .= ""; $static_browser_macro_chunk .= ""; } return $static_browser_macro_chunk; } sub write_bilevel_static_browser_macrofile { my $static_browser_package_root = shift(@_); my $category_to_ids_mapping = shift(@_); my $id_to_name_mapping = shift(@_); my $id_to_extra_mapping = shift(@_); my $static_browser_macrofile_path = "$ENV{'GSDLHOME'}\\collect\\cic-hcap\\macros\\$static_browser_package_root.dm"; print STDERR "Writing $static_browser_macrofile_path...\n"; open(BROWSER_MACROFILE, ">$static_browser_macrofile_path") or die "Error: Could not write to $static_browser_macrofile_path.\n"; foreach my $category (keys(%{$category_to_ids_mapping})) { my $static_browser_package_name = $static_browser_package_root . $category; $static_browser_package_name =~ s/\W//g; my %id_to_name_mapping_for_category = (); foreach my $id (@{$category_to_ids_mapping->{$category}}) { $id_to_name_mapping_for_category{$id} = $id_to_name_mapping->{$id}; } &write_static_browser_macros($static_browser_package_name, \%id_to_name_mapping_for_category, $id_to_extra_mapping); } close(BROWSER_MACROFILE); } sub html_safe { my $text = shift(@_); $text =~ s/&/&/g; $text =~ s/\'/&\#39;/g; # Apostrophes mess up Javascript on the Search by State page return $text; } sub write_state_browser_macrofile { my $static_browser_package_name = shift(@_); my $state_to_ids_mapping = shift(@_); my $id_to_name_mapping = shift(@_); my $static_browser_macrofile_path = "$ENV{'GSDLHOME'}\\collect\\cic-hcap\\macros\\$static_browser_package_name.dm"; print STDERR "Writing $static_browser_macrofile_path...\n"; open(BROWSER_MACROFILE, ">$static_browser_macrofile_path") or die "Error: Could not write to $static_browser_macrofile_path.\n"; print BROWSER_MACROFILE "package $static_browser_package_name\n\n"; print BROWSER_MACROFILE "_cicstaticbrowser_ {\n"; print BROWSER_MACROFILE "\n"; foreach my $state (sort(keys(%state_name_to_abbr_mapping))) { my $state_abbr = $state_name_to_abbr_mapping{$state}; print BROWSER_MACROFILE "_cicstate" . $state_abbr . "_\n"; } print BROWSER_MACROFILE "
\n"; print BROWSER_MACROFILE "}\n"; foreach my $state (sort(keys(%state_name_to_abbr_mapping))) { my $state_sans_spaces = $state; $state_sans_spaces =~ s/ //g; my @state_ids = (); if (defined($state_to_ids_mapping->{$state})) { @state_ids = @{$state_to_ids_mapping->{$state}}; } my $state_abbr = $state_name_to_abbr_mapping{$state}; my $state_static_browser_macro_chunk = &get_static_browser_macro_chunk($state, $state_sans_spaces, \@state_ids, $id_to_name_mapping, undef); if (!defined($state_to_ids_mapping->{$state})) { $state_static_browser_macro_chunk .= "No institutions for this state"; } # Write out the normal macro chunk print BROWSER_MACROFILE "\n_cicstate" . $state_abbr . "_ {"; print BROWSER_MACROFILE $state_static_browser_macro_chunk; print BROWSER_MACROFILE "}\n"; # Write out a Javascript safe version print BROWSER_MACROFILE "\n_cicstate" . $state_abbr . "js_ {"; print BROWSER_MACROFILE &javascript_safe($state_static_browser_macro_chunk); print BROWSER_MACROFILE "}\n"; } close(BROWSER_MACROFILE); } sub javascript_safe { my $text = shift(@_); $text =~ s/(\r|\n)//g; # No newlines allowed $text =~ s/<\//<\\\\\//g; return $text; } 1;