source: main/trunk/greenstone2/perllib/cpan/Image/ExifTool/DarwinCore.pm@ 34921

Last change on this file since 34921 was 34921, checked in by anupama, 3 years ago

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

File size: 15.4 KB
Line 
1#------------------------------------------------------------------------------
2# File: DarwinCore.pm
3#
4# Description: Darwin Core XMP tags
5#
6# Revisions: 2013-01-28 - P. Harvey Created
7#
8# References: 1) http://rs.tdwg.org/dwc/index.htm
9# 2) https://exiftool.org/forum/index.php/topic,4442.0/all.html
10#------------------------------------------------------------------------------
11
12package Image::ExifTool::DarwinCore;
13
14use strict;
15use vars qw($VERSION);
16use Image::ExifTool::XMP;
17
18$VERSION = '1.05';
19
20my %dateTimeInfo = (
21 # NOTE: Do NOT put "Groups" here because Groups hash must not be common!
22 Writable => 'date',
23 Shift => 'Time',
24 PrintConv => '$self->ConvertDateTime($val)',
25 PrintConvInv => '$self->InverseDateTime($val,undef,1)',
26);
27
28my %materialSample = (
29 STRUCT_NAME => 'DarwinCore MaterialSample',
30 NAMESPACE => 'dwc',
31 materialSampleID => { },
32);
33
34my %event = (
35 STRUCT_NAME => 'DarwinCore Event',
36 NAMESPACE => 'dwc',
37 day => { Writable => 'integer', Groups => { 2 => 'Time' } },
38 earliestDate => { %dateTimeInfo, Groups => { 2 => 'Time' } },
39 endDayOfYear => { Writable => 'integer', Groups => { 2 => 'Time' } },
40 eventDate => { %dateTimeInfo, Groups => { 2 => 'Time' } },
41 eventID => { },
42 eventRemarks => { Writable => 'lang-alt' },
43 eventTime => {
44 Groups => { 2 => 'Time' },
45 Writable => 'string', # (so we can format this ourself)
46 Shift => 'Time',
47 # (allow date/time or just time value)
48 ValueConv => 'Image::ExifTool::XMP::ConvertXMPDate($val)',
49 PrintConv => '$self->ConvertDateTime($val)',
50 ValueConvInv => 'Image::ExifTool::XMP::FormatXMPDate($val) or $val',
51 PrintConvInv => q{
52 my $v = $self->InverseDateTime($val,undef,1);
53 undef $Image::ExifTool::evalWarning;
54 return $v if $v;
55 # allow time-only values by adding dummy date (thanks Herb)
56 my $v = $self->InverseDateTime("2000:01:01 $val",undef,1);
57 undef $Image::ExifTool::evalWarning;
58 return $v if $v and $v =~ s/.* //; # strip off dummy date
59 $Image::ExifTool::evalWarning = 'Invalid date/time or time-only value (use HH:MM:SS[.ss][+/-HH:MM|Z])';
60 return undef;
61 },
62 },
63 fieldNotes => { },
64 fieldNumber => { },
65 habitat => { },
66 latestDate => { %dateTimeInfo, Groups => { 2 => 'Time' } },
67 month => { Writable => 'integer', Groups => { 2 => 'Time' } },
68 parentEventID => { },
69 samplingEffort => { },
70 samplingProtocol => { },
71 sampleSizeValue => { },
72 sampleSizeUnit => { },
73 startDayOfYear => { Writable => 'integer', Groups => { 2 => 'Time' } },
74 verbatimEventDate => { Groups => { 2 => 'Time' } },
75 year => { Writable => 'integer', Groups => { 2 => 'Time' } },
76);
77
78# Darwin Core tags
79%Image::ExifTool::DarwinCore::Main = (
80 GROUPS => { 0 => 'XMP', 1 => 'XMP-dwc', 2 => 'Other' },
81 NAMESPACE => 'dwc',
82 WRITABLE => 'string',
83 NOTES => q{
84 Tags defined in the Darwin Core (dwc) XMP namespace. See
85 L<http://rs.tdwg.org/dwc/index.htm> for the official specification.
86 },
87 Event => {
88 Name => 'DCEvent', # (avoid conflict with XMP-iptcExt:Event)
89 FlatName => 'Event',
90 Struct => \%event,
91 },
92 # tweak a few of the flattened tag names
93 EventEventDate => { Name => 'EventDate', Flat => 1 },
94 EventEventID => { Name => 'EventID', Flat => 1 },
95 EventEventRemarks => { Name => 'EventRemarks', Flat => 1 },
96 EventEventTime => { Name => 'EventTime', Flat => 1 },
97 FossilSpecimen => { Struct => \%materialSample },
98 GeologicalContext => {
99 FlatName => '', # ('GeologicalContext' is too long)
100 Struct => {
101 STRUCT_NAME => 'DarwinCore GeologicalContext',
102 NAMESPACE => 'dwc',
103 bed => { },
104 earliestAgeOrLowestStage => { },
105 earliestEonOrLowestEonothem => { },
106 earliestEpochOrLowestSeries => { },
107 earliestEraOrLowestErathem => { },
108 earliestPeriodOrLowestSystem=> { },
109 formation => { },
110 geologicalContextID => { },
111 group => { },
112 highestBiostratigraphicZone => { },
113 latestAgeOrHighestStage => { },
114 latestEonOrHighestEonothem => { },
115 latestEpochOrHighestSeries => { },
116 latestEraOrHighestErathem => { },
117 latestPeriodOrHighestSystem => { },
118 lithostratigraphicTerms => { },
119 lowestBiostratigraphicZone => { },
120 member => { },
121 },
122 },
123 GeologicalContextBed => { Name => 'GeologicalContextBed', Flat => 1 },
124 GeologicalContextFormation => { Name => 'GeologicalContextFormation', Flat => 1 },
125 GeologicalContextGroup => { Name => 'GeologicalContextGroup', Flat => 1 },
126 GeologicalContextMember => { Name => 'GeologicalContextMember', Flat => 1 },
127 HumanObservation => { Struct => \%event },
128 Identification => {
129 FlatName => '', # ('Identification' is redundant)
130 Struct => {
131 STRUCT_NAME => 'DarwinCore Identification',
132 NAMESPACE => 'dwc',
133 dateIdentified => { %dateTimeInfo, Groups => { 2 => 'Time' } },
134 identificationID => { },
135 identificationQualifier => { },
136 identificationReferences => { },
137 identificationRemarks => { },
138 identificationVerificationStatus => { },
139 identifiedBy => { },
140 typeStatus => { },
141 },
142 },
143 LivingSpecimen => { Struct => \%materialSample },
144 MachineObservation => { Struct => \%event },
145 MaterialSample => { Struct => \%materialSample },
146 MaterialSampleMaterialSampleID => { Name => 'MaterialSampleID', Flat => 1 },
147 MeasurementOrFact => {
148 FlatName => '', # ('MeasurementOrFact' is redundant and too long)
149 Struct => {
150 STRUCT_NAME => 'DarwinCore MeasurementOrFact',
151 NAMESPACE => 'dwc',
152 measurementAccuracy => { Format => 'real' },
153 measurementDeterminedBy => { },
154 measurementDeterminedDate => { %dateTimeInfo, Groups => { 2 => 'Time' } },
155 measurementID => { },
156 measurementMethod => { },
157 measurementRemarks => { },
158 measurementType => { },
159 measurementUnit => { },
160 measurementValue => { },
161 },
162 },
163 Occurrence => {
164 Struct => {
165 STRUCT_NAME => 'DarwinCore Occurrence',
166 NAMESPACE => 'dwc',
167 associatedMedia => { },
168 associatedOccurrences => { },
169 associatedReferences => { },
170 associatedSequences => { },
171 associatedTaxa => { },
172 behavior => { },
173 catalogNumber => { },
174 disposition => { },
175 establishmentMeans => { },
176 individualCount => { },
177 individualID => { },
178 lifeStage => { },
179 occurrenceDetails => { },
180 occurrenceID => { },
181 occurrenceRemarks => { },
182 occurrenceStatus => { },
183 organismQuantity => { },
184 organismQuantityType => { },
185 otherCatalogNumbers => { },
186 preparations => { },
187 previousIdentifications => { },
188 recordedBy => { },
189 recordNumber => { },
190 reproductiveCondition => { },
191 sex => { },
192 },
193 },
194 OccurrenceOccurrenceDetails => { Name => 'OccurrenceDetails', Flat => 1 },
195 OccurrenceOccurrenceID => { Name => 'OccurrenceID', Flat => 1 },
196 OccurrenceOccurrenceRemarks => { Name => 'OccurrenceRemarks', Flat => 1 },
197 OccurrenceOccurrenceStatus => { Name => 'OccurrenceStatus', Flat => 1 },
198 Organism => {
199 Struct => {
200 STRUCT_NAME => 'DarwinCore Organism',
201 NAMESPACE => 'dwc',
202 associatedOccurrences => { },
203 associatedOrganisms => { },
204 organismID => { },
205 organismName => { },
206 organismRemarks => { },
207 organismScope => { },
208 previousIdentifications => { },
209 },
210 },
211 OrganismOrganismID => { Name => 'OrganismID', Flat => 1 },
212 OrganismOrganismName => { Name => 'OrganismName', Flat => 1 },
213 OrganismOrganismRemarks => { Name => 'OrganismRemarks', Flat => 1 },
214 OrganismOrganismScope => { Name => 'OrganismScope', Flat => 1 },
215 PreservedSpecimen => { Struct => \%materialSample },
216 Record => {
217 Struct => {
218 STRUCT_NAME => 'DarwinCore Record',
219 NAMESPACE => 'dwc',
220 basisOfRecord => { },
221 collectionCode => { },
222 collectionID => { },
223 dataGeneralizations => { },
224 datasetID => { },
225 datasetName => { },
226 dynamicProperties => { },
227 informationWithheld => { },
228 institutionCode => { },
229 institutionID => { },
230 ownerInstitutionCode => { },
231 },
232 },
233 ResourceRelationship => {
234 FlatName => '', # ('ResourceRelationship' is redundant and too long)
235 Struct => {
236 STRUCT_NAME => 'DarwinCore ResourceRelationship',
237 NAMESPACE => 'dwc',
238 relatedResourceID => { },
239 relationshipAccordingTo => { },
240 relationshipEstablishedDate => { %dateTimeInfo, Groups => { 2 => 'Time' } },
241 relationshipOfResource => { },
242 relationshipRemarks => { },
243 resourceID => { },
244 resourceRelationshipID => { },
245 },
246 },
247 Taxon => {
248 Struct => {
249 STRUCT_NAME => 'DarwinCore Taxon',
250 NAMESPACE => 'dwc',
251 acceptedNameUsage => { },
252 acceptedNameUsageID => { },
253 class => { },
254 family => { },
255 genus => { },
256 higherClassification => { },
257 infraspecificEpithet => { },
258 kingdom => { },
259 nameAccordingTo => { },
260 nameAccordingToID => { },
261 namePublishedIn => { },
262 namePublishedInID => { },
263 namePublishedInYear => { },
264 nomenclaturalCode => { },
265 nomenclaturalStatus => { },
266 order => { },
267 originalNameUsage => { },
268 originalNameUsageID => { },
269 parentNameUsage => { },
270 parentNameUsageID => { },
271 phylum => { },
272 scientificName => { },
273 scientificNameAuthorship => { },
274 scientificNameID => { },
275 specificEpithet => { },
276 subgenus => { },
277 taxonConceptID => { },
278 taxonID => { },
279 taxonRank => { },
280 taxonRemarks => { },
281 taxonomicStatus => { },
282 verbatimTaxonRank => { },
283 vernacularName => { Writable => 'lang-alt' },
284 },
285 },
286 TaxonTaxonConceptID => { Name => 'TaxonConceptID', Flat => 1 },
287 TaxonTaxonID => { Name => 'TaxonID', Flat => 1 },
288 TaxonTaxonRank => { Name => 'TaxonRank', Flat => 1 },
289 TaxonTaxonRemarks => { Name => 'TaxonRemarks', Flat => 1 },
290 dctermsLocation => {
291 Name => 'DCTermsLocation',
292 Groups => { 2 => 'Location' },
293 FlatName => 'DC', # ('dctermsLocation' is too long)
294 Struct => {
295 STRUCT_NAME => 'DarwinCore DCTermsLocation',
296 NAMESPACE => 'dwc',
297 continent => { },
298 coordinatePrecision => { },
299 coordinateUncertaintyInMeters => { },
300 country => { },
301 countryCode => { },
302 county => { },
303 decimalLatitude => { },
304 decimalLongitude => { },
305 footprintSpatialFit => { },
306 footprintSRS => { },
307 footprintWKT => { },
308 geodeticDatum => { },
309 georeferencedBy => { },
310 georeferencedDate => { },
311 georeferenceProtocol => { },
312 georeferenceRemarks => { },
313 georeferenceSources => { },
314 georeferenceVerificationStatus => { },
315 higherGeography => { },
316 higherGeographyID => { },
317 island => { },
318 islandGroup => { },
319 locality => { },
320 locationAccordingTo => { },
321 locationID => { },
322 locationRemarks => { },
323 maximumDepthInMeters => { },
324 maximumDistanceAboveSurfaceInMeters => { },
325 maximumElevationInMeters => { },
326 minimumDepthInMeters => { },
327 minimumDistanceAboveSurfaceInMeters => { },
328 minimumElevationInMeters => { },
329 municipality => { },
330 pointRadiusSpatialFit => { },
331 stateProvince => { },
332 verbatimCoordinates => { },
333 verbatimCoordinateSystem => { },
334 verbatimDepth => { },
335 verbatimElevation => { },
336 verbatimLatitude => { },
337 verbatimLocality => { },
338 verbatimLongitude => { },
339 verbatimSRS => { },
340 waterBody => { },
341 },
342 },
343);
344
3451; #end
346
347__END__
348
349=head1 NAME
350
351Image::ExifTool::DarwinCore - Darwin Core XMP tags
352
353=head1 SYNOPSIS
354
355This module is used by Image::ExifTool
356
357=head1 DESCRIPTION
358
359This file contains tag definitions for the Darwin Core XMP namespace.
360
361=head1 AUTHOR
362
363Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
364
365This library is free software; you can redistribute it and/or modify it
366under the same terms as Perl itself.
367
368=head1 REFERENCES
369
370=over 4
371
372=item L<http://rs.tdwg.org/dwc/index.htm>
373
374=back
375
376=head1 SEE ALSO
377
378L<Image::ExifTool::TagNames/XMP Tags>,
379L<Image::ExifTool(3pm)|Image::ExifTool>
380
381=cut
Note: See TracBrowser for help on using the repository browser.