source: gs3-installations/whakatohea-dl/trunk/sites/wmtb/collect/tipple-waiata/perllib/plugins/TippleExportJSONPlugin.pm@ 37183

Last change on this file since 37183 was 37183, checked in by davidb, 15 months ago

Further refinement of idea, with emphasis on using plugins arguments rather than having things hardwired

File size: 14.6 KB
Line 
1###########################################################################
2#
3# TippleExportJSONPlugin.pm
4# -- A plugin for processing the JSON format exported from a Tipple server
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright 2023 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28package TippleExportJSONPlugin;
29
30
31use SplitJSONFile;
32
33use strict;
34no strict 'refs';
35#use multiread;
36
37use Encode;
38use JSON;
39
40# methods with identical signatures take precedence in the order given in the ISA list.
41sub BEGIN {
42 @TippleExportJSONPlugin::ISA = ('SplitJSONFile');
43}
44
45
46
47my $arguments = [
48# { 'name' => "process_exp",
49# 'desc' => "{BaseImporter.process_exp}",
50# 'type' => "regexp",
51# 'reqd' => "no",
52# 'deft' => &get_default_process_exp() },
53 { 'name' => "split_exp",
54 'desc' => "{SplitJSONFile.split_exp}",
55 'type' => "string",
56# 'deft' => "contentGroups,contentItems",
57 'deft' => "contentItems",
58 'reqd' => "no" },
59 { 'name' => "metadata_exp",
60 'desc' => "{SplitJSONFile.metadata_exp}",
61 'type' => "string",
62 'deft' => "WAIATA",
63 'deft' => "",
64 'reqd' => "no" },
65];
66
67# Other document-level metadata types to consider:
68#
69# .contentGroups:
70# COMPOSER
71# GENRE
72# HAPU
73# OCCASION
74# TOPIC
75# WRITER
76#
77# .contentItems:
78# CONTENT_PAGE
79# TK_LABEL
80
81# =>
82# 'deft' => "COMPOSER,GENRE,HAPU,OCCASION,TOPIC,WRITER , WAIATA,CONTENT_PAGE,TK_LABEL",
83
84
85my $options = { 'name' => "TippleExportJSONPlugin",
86 'desc' => "{TippleExportJSONPlugin.desc}",
87 'abstract' => "no",
88 'inherits' => "yes",
89 'args' => $arguments };
90
91
92sub new
93{
94 my ($class) = shift (@_);
95 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
96 push(@$pluginlist, $class);
97
98 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
99 push(@{$hashArgOptLists->{"OptList"}},$options);
100
101 my $self = new SplitJSONFile($pluginlist, $inputargs, $hashArgOptLists);
102
103 my $blessed_self = bless $self, $class;
104
105 my $metadata_exp = $self->{'metadata_exp'};
106 my @metadata_exps = split(/\s*,\s*/,$metadata_exp);
107
108 $self->{'metadata_exp_lookup'} = {};
109 foreach my $md_exp_and_opt_mapping (@metadata_exps) {
110 my ($md_exp,$opt_mapping) = ($md_exp_and_opt_mapping =~ m/^(.+?)(?:->(.+))$/);
111 $blessed_self->{'metadata_exp_lookup'}->{$md_exp} = { 'exists' => 1, 'gs_metadata_name' => $opt_mapping }; # note: $opt_mapping might be undef
112 }
113
114 return $blessed_self;
115}
116
117
118#sub get_default_process_exp
119#{
120# return q^(?i)\.json$^;
121#}
122
123
124####
125# Example 1: Document Section of Tipple (minimal)
126####
127#
128# {
129# "documents": [],
130# "id": 3786,
131# "mediaItems": [],
132# "name": "Te Kooti Arikirangi Te Turuki",
133# "type": "COMPOSER"
134# },
135
136####
137# Example 2: Document Section of Tipple (audio)
138####
139#
140# {
141# "documents": [
142# {
143# "locale": {
144# "code": "en"
145# },
146# "roles": [
147# {
148# "code": "DESCRIPTION",
149# "type": "DESCRIPTION"
150# }
151# ],
152# "sections": [
153# {
154# "caption": "Lyrics",
155# "collapsed": false,
156# "content": "Dark cloudy night, good for eeling\nHold the eeling rod at Waitangi. Waitangi Tahi.\nI prepare my kete of fern roots, when it is filled\nThen turn around, go south to the mountain Titiwa\nHave to be like a paua to climb it's steepness\nThen carry on past going south to the boundaries of Gisborne.\nThen turn and go west to Uenuku\nThis area is acknowledged by a rock, Te Karoro a Tamatea,\nThe rock where Tamatea hit and split his canoe,\nAlso in the same incident, bumped his nose and it bled\nI then jump the shores of K\u0101napanapa\nTo collect water to take to Te Pua-ki-Te-Reinga\nI then grasp my kete of belongings by the handles\nI look back with sadness and yearning to my land region\nNo one sees the sadness within me\nAs I bow and turn my head with sorrow",
157# "type": "text"
158# },
159# {
160# "caption": "Explanation",
161# "collapsed": false,
162# "content": "This Te Whakat\u014dhea waiata tangi sung at Waioweka, the hap\u016b of Ng\u0101ti Ira, captures the plight of it's people in a war ravaged environment. It reflects the pain and suffering our people have endured for the past 155 years.\n\nSome of the singers that can be heard are our koroua and kuia, Kaiora Tai, Himiona Kahika, Rea Rewiri and Heeni Tawhara. This recording was done at Te Rere p\u0101, 13 Tihema 1971, following the Tekau m\u0101 Rua.\n\nThe explanation given was in regards to a woman named Moa, who composed this waiata, after his lover deserted her.",
163# "type": "text"
164# }
165# ]
166# },
167# {
168# "locale": {
169# "code": "en"
170# },
171# "roles": [
172# {
173# "code": "TEASER",
174# "type": "TEASER"
175# }
176# ],
177# "sections": [
178# {
179# "collapsed": false,
180# "content": "A traditional chant of Ng\u0101ti Ira. In this recording you can hear the voices of our pakeke.",
181# "type": "text"
182# }
183# ]
184# },
185# {
186# "locale": {
187# "code": "mi_NZ"
188# },
189# "roles": [
190# {
191# "code": "TEASER",
192# "type": "TEASER"
193# }
194# ],
195# "sections": [
196# {
197# "collapsed": false,
198# "content": "He m\u014dteatea t\u0113nei o Ng\u0101ti Ira. Ka rongo ki te te reo waiata o r\u0101tou m\u0101",
199# "type": "text"
200# }
201# ]
202# },
203# {
204# "locale": {
205# "code": "mi_NZ"
206# },
207# "roles": [
208# {
209# "code": "DESCRIPTION",
210# "type": "DESCRIPTION"
211# }
212# ],
213# "sections": [
214# {
215# "caption": "Kupu",
216# "collapsed": false,
217# "content": "T\u0113r\u0101 te p\u014d pango\nPuritia mai te tautara ki Waitangi r\u0101 e\nKo te rite i taku tarai i k\u012ba mai nei\nTahuri ka Titiwa tonu\nTitiwa mai ana a roto he paua piri ki te toka\nKia m\u0101m\u0101 tonu atu ko te t\u014dnga atu o te r\u0101\nKi \u014dna papaihoretanga ki a Uenuku\nKo te toka i a Parirau ko te Karoro a Tamatea\nMe he ako ia nei te waka nei ka pakaru ki taku tinana\nPenei i reia e au ki te ihu o te tio hakia e\nPeke ana au ki te tai ng\u0101 riu o K\u0101napanapa\nHei kawe i ng\u0101 wai e ahau ki Te Pua-ki-Te-Reinga\nKia hopu nei \u014d akap\u016b te taunga atu ki t\u0101whiti e\nTe pae ki taiao mai\nKo wai ka kite i au e?\nKo te whakam\u0101 hoki rawa t\u0113nei ka t\u014dtope i taku rae e i...",
218# "type": "text"
219# },
220# {
221# "caption": "Whakam\u0101rama",
222# "collapsed": false,
223# "content": "He waiata aroha t\u0113nei. He k\u014drero e p\u0101 ana ki \u0113tahi whenua o Ng\u0101ti Ira, o Waioweka. Ka rite an\u014d te k\u014drero nei m\u014d te rohe whenua o Ng\u0101ti Ira o Waioweka \"Waitangi Tahi ki te raki, Titiwa ki te rawhiti, Waitangi Rua ki te Tonga, Matiti ki te hauauru. He k\u014drero tuku iho m\u014d te rohe whenua o Ng\u0101ti Ira. He k\u014drero an\u014d e whakaatu mai i te mamae me te p\u0101katokato o te iwi nei, o Te Whakat\u014dhea.\n\nKo \u0113tahi kaiwaiata e rangona nei, ko Kaiora Tai, Himiona Kahika, ko Rea Rewiri, ko Heeni Tawhara me ng\u0101 toenga o ng\u0101 tamariki a Iharaira. I te 13 TIhema 1971, katahi ka mutu te Tekau m\u0101 Rua ki runga o Te Rere p\u0101, n\u0101 ki reira ka hopungia e Mervyn McLean i te waiata nei.\nKi t\u0101 te whakam\u0101rama o ng\u0101 kaiwaiata, n\u0101 t\u0113tahi wahine, ko Moa t\u014dna ingoa, i whakar\u0113rea atu i a ia e tana whai\u0101ip\u014d.",
224# "type": "text"
225# }
226# ]
227# }
228# ],
229# "id": 836,
230# "mediaItems": [
231# {
232# "displayOrder": 1000,
233# "file": {
234# "contentType": "audio/mp3",
235# "size": 1838394,
236# "sourceUri": "https://koicarp.cms.waikato.ac.nz/td/v1/public/files/d9c6c7de-4591-41d3-b079-191f8ce37bd0.mp3",
237# "uri": "~/assets/audio/content/d9c6c7de_4591_41d3_b079_191f8ce37bd0.mp3"
238# },
239# "id": 9760,
240# "roles": [
241# {
242# "code": "READ_ALOUD",
243# "type": "READ_ALOUD"
244# }
245# ]
246# }
247# ],
248# "name": "T\u0113r\u0101 te p\u014d pango",
249# "type": "WAIATA"
250# },
251
252
253
254# do plugin specific processing of doc_obj
255sub process {
256
257 my $self = shift (@_);
258 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
259
260 my $outhandle = $self->{'outhandle'};
261 my $verbosity = $self->{'verbosity'};
262
263 my $metadata_exp = $self->{'metadata_exp'};
264 my $metadata_exp_lookup = $self->{'metadata_exp_lookup'};
265
266 my $cursection = $doc_obj->get_top_section();
267
268
269 my $json_rec = JSON::from_json($$textref); # expects unicode string
270
271 my $json_pretty = JSON->new()->pretty();
272 my $json_unicode_str = $json_pretty->encode($json_rec); # expects unicode string
273
274 if ($verbosity>=4) {
275
276 my $json_utf8_printable_str = Encode::encode("utf8",$json_unicode_str);
277 print $outhandle "TippleExportJSONPlugin.pm::processing segment\n";
278 print $outhandle "===========\n";
279 print $outhandle "$json_utf8_printable_str\n";
280 print $outhandle "===========\n";
281
282 }
283
284 # tipple name -> Title
285 # tipple type (e.g. genre, writer, composer ) -> Type
286
287 my $tipple_name = $json_rec->{'name'};
288 my $tipple_type = $json_rec->{'type'};
289
290 my $tipple_type_formatted = ucfirst(lc($tipple_type));
291
292 my $is_metadata_name_match = 0;
293 my $gs_metadata_name;
294
295 if ($metadata_exp eq "") {
296 $is_metadata_name_match = 1;
297 $gs_metadata_name= $tipple_type_formatted;
298 }
299 elsif (defined $metadata_exp_lookup->{$tipple_type}) {
300 $is_metadata_name_match = 1;
301 if (defined $metadata_exp_lookup->{$tipple_type}->{'gs_metadata_name'}) {
302 $gs_metadata_name = $metadata_exp_lookup->{$tipple_type}->{'gs_metadata_name'};
303 }
304 else {
305 $gs_metadata_name= $tipple_type_formatted;
306 }
307 }
308
309 if ($is_metadata_name_match) {
310
311 $doc_obj->add_utf8_metadata($cursection, "Title",$tipple_name);
312 $doc_obj->add_utf8_metadata($cursection, "Type", $tipple_type_formatted);
313
314 # .documents
315 # .locale
316 # .code
317 # .roles
318 # .type
319 # .sections
320 # .caption + .content
321 #
322
323 my $tipple_documents = $json_rec->{'documents'};
324 foreach my $tipple_document (@$tipple_documents) {
325
326 # 'documents' in tipple corresponds to 'section of document' in greenstone
327
328 my $tipple_locale = $tipple_document->{'locale'};
329 my $tipple_roles = $tipple_document->{'roles'};
330 my $tipple_sections = $tipple_document->{'sections'};
331
332 my $md_name_prefix = $tipple_locale->{'code'};
333 $md_name_prefix .= "_".$tipple_roles->[0]->{'type'} if defined $tipple_roles->[0]->{'type'};
334
335 foreach my $tipple_section (@$tipple_sections) {
336 my $md_val_caption = $tipple_section->{'caption'};
337 my $md_val_content = $tipple_section->{'content'};
338
339 if (defined $md_val_caption) {
340 my $md_name_caption = "${md_name_prefix}_caption";
341 $doc_obj->add_utf8_metadata($cursection,$md_name_caption,$md_val_caption);
342 }
343
344 if (defined $md_val_content) {
345 my $md_name_content = "${md_name_prefix}_content";
346 $doc_obj->add_utf8_metadata($cursection,$md_name_content,$md_val_content);
347 }
348 }
349
350 }
351
352 # .mediaItems
353 # .file
354 # .sourceUri
355 # .contentType
356
357
358 my $tipple_media_items = $json_rec->{'mediaItems'};
359 foreach my $tipple_media_item (@$tipple_media_items) {
360 my $tipple_file = $tipple_media_item->{'file'};
361 if (defined $tipple_file) {
362
363 my $tipple_source_uri = $tipple_file->{'sourceUri'};
364 my $tipple_content_type = $tipple_file->{'contentType'};
365
366 $doc_obj->add_utf8_metadata($cursection,"sourceUri", $tipple_source_uri);
367 $doc_obj->add_utf8_metadata($cursection,"contentType",$tipple_content_type);
368
369 }
370 }
371 }
372
373 # $doc_obj->add_utf8_text($cursection, $$textref);
374 $doc_obj->add_utf8_text($cursection, "<pre>\n$json_unicode_str\n</pre>");
375
376 return 1;
377}
378
379
380
381# sub print_error
382# {
383
384# my $self = shift(@_);
385# my ($outhandle, $failhandle, $gli, $file, $error) = @_;
386
387# print $outhandle "TippleExportJSONPlugin Error: $file: $error\n";
388# print $failhandle "TippleExportJSONPlugin Error: $file: $error\n";
389# print STDERR "<ProcessingError n='$file' r='$error'/>\n" if ($gli);
390# }
391
3921;
Note: See TracBrowser for help on using the repository browser.