Changeset 12202
- Timestamp:
- 2006-07-13T12:45:37+12:00 (18 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/cic-hcap/perllib/plugins/CICPlug.pm
r12185 r12202 710 710 my $place_references = $place_references_sql_handle->fetchrow(); 711 711 if (defined($place_references)) { 712 &new_metadata_entry($place_doc_obj, "References", &rtf_to_html($place_references));712 $self->add_place_references_metadata($place_doc_obj, $place_id, $place_references); 713 713 } 714 714 … … 928 928 929 929 930 sub add_place_references_metadata 931 { 932 my $self = shift(@_); 933 my ($place_doc_obj, $place_id, $place_references_rtf_string) = (@_); 934 my $fail_log_handle = $self->{'failhandle'}; 935 936 # Convert the place references from RTF to HTML 937 my $place_references_html_string = &rtf_to_html($place_references_rtf_string); 938 &new_metadata_entry($place_doc_obj, "PlaceReferences", $place_references_html_string); 939 940 # Split the references and try to parse title and author 941 $place_references_html_string =~ s/(\r|\n)//g; 942 my @place_references = split(/<br \/><br \/>/, $place_references_html_string); 943 foreach my $place_reference (@place_references) { 944 $place_reference =~ s/(<br \/>\s*)*$//; 945 next if ($place_reference !~ /\w/); 946 &new_metadata_entry($place_doc_obj, "Reference", $place_reference); 947 948 # Case 1: Author (possibly empty), then title in italics or quotes 949 if ($place_reference =~ /^(.*?)<i>(.*?)<\/i>/ || $place_reference =~ /^(.*)"(.*?)"/) { 950 &new_metadata_entry($place_doc_obj, "ReferenceAuthor", $1); 951 &new_metadata_entry($place_doc_obj, "ReferenceTitle", $2); 952 } 953 # Case 2: Zero or one fullstops, assume no author and title is complete text 954 elsif ($place_reference =~ /^[^\.]*\.[^\.]*$/ || $place_reference !~ /\./) { 955 &new_metadata_entry($place_doc_obj, "ReferenceTitle", $place_reference); 956 } 957 else { 958 print STDERR "<ProcessingError n='Place $place_id' p='CICPlug' r='Could not parse reference: $place_reference'>\n" if ($self->{'gli'}); 959 print STDERR "Warning: Place $place_id -- Could not parse reference: $place_reference\n"; 960 print $fail_log_handle "Warning: Place $place_id -- Could not parse reference: $place_reference\n"; 961 } 962 } 963 } 964 965 930 966 sub rtf_to_html 931 967 { 932 968 my $rtf_string = shift(@_); 933 969 $rtf_string =~ s/\{(.*?)\}//g; 970 $rtf_string =~ s/\\ldblquote /"/g; 971 $rtf_string =~ s/\\rdblquote /"/g; 934 972 $rtf_string =~ s/\\rquote /'/g; # ' # (for Emacs) 935 973 $rtf_string =~ s/\\pard//g; 936 974 $rtf_string =~ s/\\par/<br \/>/g; 975 $rtf_string =~ s/\\ul /<i>/g; 976 $rtf_string =~ s/\\ulnone /<\/i>/g; 937 977 $rtf_string =~ s/\\i0 /<\/i>/g; 978 $rtf_string =~ s/\\i0\\/<\/i>\\/g; 938 979 $rtf_string =~ s/\\i /<i>/g; 980 $rtf_string =~ s/\\i\\/<i>\\/g; 939 981 $rtf_string =~ s/\\~/ /g; 940 982 $rtf_string =~ s/\\([A-Za-z0-9]+)//g;
Note:
See TracChangeset
for help on using the changeset viewer.