1 | ###########################################################################
|
---|
2 | #
|
---|
3 | # TippleExportJSONPlugin.pm
|
---|
4 | # -- A plugin for processing the JSON format exported from a Tipple server
|
---|
5 | #
|
---|
6 | # A component of the Greenstone digital library software
|
---|
7 | # from the New Zealand Digital Library Project at the
|
---|
8 | # University of Waikato, New Zealand.
|
---|
9 | #
|
---|
10 | # Copyright 2023 New Zealand Digital Library Project
|
---|
11 | #
|
---|
12 | # This program is free software; you can redistribute it and/or modify
|
---|
13 | # it under the terms of the GNU General Public License as published by
|
---|
14 | # the Free Software Foundation; either version 2 of the License, or
|
---|
15 | # (at your option) any later version.
|
---|
16 | #
|
---|
17 | # This program is distributed in the hope that it will be useful,
|
---|
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
20 | # GNU General Public License for more details.
|
---|
21 | #
|
---|
22 | # You should have received a copy of the GNU General Public License
|
---|
23 | # along with this program; if not, write to the Free Software
|
---|
24 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
25 | #
|
---|
26 | ###########################################################################
|
---|
27 |
|
---|
28 | package TippleExportJSONPlugin;
|
---|
29 |
|
---|
30 |
|
---|
31 | use SplitJSONFile;
|
---|
32 |
|
---|
33 | use strict;
|
---|
34 | no strict 'refs';
|
---|
35 | #use multiread;
|
---|
36 |
|
---|
37 | use Encode;
|
---|
38 | use JSON;
|
---|
39 |
|
---|
40 | # methods with identical signatures take precedence in the order given in the ISA list.
|
---|
41 | sub BEGIN {
|
---|
42 | @TippleExportJSONPlugin::ISA = ('SplitJSONFile');
|
---|
43 | }
|
---|
44 |
|
---|
45 |
|
---|
46 |
|
---|
47 | my $arguments = [
|
---|
48 | # { 'name' => "process_exp",
|
---|
49 | # 'desc' => "{BaseImporter.process_exp}",
|
---|
50 | # 'type' => "regexp",
|
---|
51 | # 'reqd' => "no",
|
---|
52 | # 'deft' => &get_default_process_exp() },
|
---|
53 | { 'name' => "split_exp",
|
---|
54 | 'desc' => "{SplitJSONFile.split_exp}",
|
---|
55 | 'type' => "string",
|
---|
56 | # 'deft' => "contentGroups,contentItems",
|
---|
57 | 'deft' => "contentItems",
|
---|
58 | 'reqd' => "no" },
|
---|
59 | { 'name' => "metadata_exp",
|
---|
60 | 'desc' => "{SplitJSONFile.metadata_exp}",
|
---|
61 | 'type' => "string",
|
---|
62 | 'deft' => "WAIATA",
|
---|
63 | 'deft' => "",
|
---|
64 | 'reqd' => "no" },
|
---|
65 | ];
|
---|
66 |
|
---|
67 | # Other document-level metadata types to consider:
|
---|
68 | #
|
---|
69 | # .contentGroups:
|
---|
70 | # COMPOSER
|
---|
71 | # GENRE
|
---|
72 | # HAPU
|
---|
73 | # OCCASION
|
---|
74 | # TOPIC
|
---|
75 | # WRITER
|
---|
76 | #
|
---|
77 | # .contentItems:
|
---|
78 | # CONTENT_PAGE
|
---|
79 | # TK_LABEL
|
---|
80 |
|
---|
81 | # =>
|
---|
82 | # 'deft' => "COMPOSER,GENRE,HAPU,OCCASION,TOPIC,WRITER , WAIATA,CONTENT_PAGE,TK_LABEL",
|
---|
83 |
|
---|
84 |
|
---|
85 | my $options = { 'name' => "TippleExportJSONPlugin",
|
---|
86 | 'desc' => "{TippleExportJSONPlugin.desc}",
|
---|
87 | 'abstract' => "no",
|
---|
88 | 'inherits' => "yes",
|
---|
89 | 'args' => $arguments };
|
---|
90 |
|
---|
91 |
|
---|
92 | sub new
|
---|
93 | {
|
---|
94 | my ($class) = shift (@_);
|
---|
95 | my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
|
---|
96 | push(@$pluginlist, $class);
|
---|
97 |
|
---|
98 | push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
|
---|
99 | push(@{$hashArgOptLists->{"OptList"}},$options);
|
---|
100 |
|
---|
101 | my $self = new SplitJSONFile($pluginlist, $inputargs, $hashArgOptLists);
|
---|
102 |
|
---|
103 | my $blessed_self = bless $self, $class;
|
---|
104 |
|
---|
105 | my $metadata_exp = $self->{'metadata_exp'};
|
---|
106 | my @metadata_exps = split(/\s*,\s*/,$metadata_exp);
|
---|
107 |
|
---|
108 | $self->{'metadata_exp_lookup'} = {};
|
---|
109 | foreach my $md_exp_and_opt_mapping (@metadata_exps) {
|
---|
110 | my ($md_exp,$opt_mapping) = ($md_exp_and_opt_mapping =~ m/^(.+?)(?:->(.+))$/);
|
---|
111 | $blessed_self->{'metadata_exp_lookup'}->{$md_exp} = { 'exists' => 1, 'gs_metadata_name' => $opt_mapping }; # note: $opt_mapping might be undef
|
---|
112 | }
|
---|
113 |
|
---|
114 | return $blessed_self;
|
---|
115 | }
|
---|
116 |
|
---|
117 |
|
---|
118 | #sub get_default_process_exp
|
---|
119 | #{
|
---|
120 | # return q^(?i)\.json$^;
|
---|
121 | #}
|
---|
122 |
|
---|
123 |
|
---|
124 | ####
|
---|
125 | # Example 1: Document Section of Tipple (minimal)
|
---|
126 | ####
|
---|
127 | #
|
---|
128 | # {
|
---|
129 | # "documents": [],
|
---|
130 | # "id": 3786,
|
---|
131 | # "mediaItems": [],
|
---|
132 | # "name": "Te Kooti Arikirangi Te Turuki",
|
---|
133 | # "type": "COMPOSER"
|
---|
134 | # },
|
---|
135 |
|
---|
136 | ####
|
---|
137 | # Example 2: Document Section of Tipple (audio)
|
---|
138 | ####
|
---|
139 | #
|
---|
140 | # {
|
---|
141 | # "documents": [
|
---|
142 | # {
|
---|
143 | # "locale": {
|
---|
144 | # "code": "en"
|
---|
145 | # },
|
---|
146 | # "roles": [
|
---|
147 | # {
|
---|
148 | # "code": "DESCRIPTION",
|
---|
149 | # "type": "DESCRIPTION"
|
---|
150 | # }
|
---|
151 | # ],
|
---|
152 | # "sections": [
|
---|
153 | # {
|
---|
154 | # "caption": "Lyrics",
|
---|
155 | # "collapsed": false,
|
---|
156 | # "content": "Dark cloudy night, good for eeling\nHold the eeling rod at Waitangi. Waitangi Tahi.\nI prepare my kete of fern roots, when it is filled\nThen turn around, go south to the mountain Titiwa\nHave to be like a paua to climb it's steepness\nThen carry on past going south to the boundaries of Gisborne.\nThen turn and go west to Uenuku\nThis area is acknowledged by a rock, Te Karoro a Tamatea,\nThe rock where Tamatea hit and split his canoe,\nAlso in the same incident, bumped his nose and it bled\nI then jump the shores of K\u0101napanapa\nTo collect water to take to Te Pua-ki-Te-Reinga\nI then grasp my kete of belongings by the handles\nI look back with sadness and yearning to my land region\nNo one sees the sadness within me\nAs I bow and turn my head with sorrow",
|
---|
157 | # "type": "text"
|
---|
158 | # },
|
---|
159 | # {
|
---|
160 | # "caption": "Explanation",
|
---|
161 | # "collapsed": false,
|
---|
162 | # "content": "This Te Whakat\u014dhea waiata tangi sung at Waioweka, the hap\u016b of Ng\u0101ti Ira, captures the plight of it's people in a war ravaged environment. It reflects the pain and suffering our people have endured for the past 155 years.\n\nSome of the singers that can be heard are our koroua and kuia, Kaiora Tai, Himiona Kahika, Rea Rewiri and Heeni Tawhara. This recording was done at Te Rere p\u0101, 13 Tihema 1971, following the Tekau m\u0101 Rua.\n\nThe explanation given was in regards to a woman named Moa, who composed this waiata, after his lover deserted her.",
|
---|
163 | # "type": "text"
|
---|
164 | # }
|
---|
165 | # ]
|
---|
166 | # },
|
---|
167 | # {
|
---|
168 | # "locale": {
|
---|
169 | # "code": "en"
|
---|
170 | # },
|
---|
171 | # "roles": [
|
---|
172 | # {
|
---|
173 | # "code": "TEASER",
|
---|
174 | # "type": "TEASER"
|
---|
175 | # }
|
---|
176 | # ],
|
---|
177 | # "sections": [
|
---|
178 | # {
|
---|
179 | # "collapsed": false,
|
---|
180 | # "content": "A traditional chant of Ng\u0101ti Ira. In this recording you can hear the voices of our pakeke.",
|
---|
181 | # "type": "text"
|
---|
182 | # }
|
---|
183 | # ]
|
---|
184 | # },
|
---|
185 | # {
|
---|
186 | # "locale": {
|
---|
187 | # "code": "mi_NZ"
|
---|
188 | # },
|
---|
189 | # "roles": [
|
---|
190 | # {
|
---|
191 | # "code": "TEASER",
|
---|
192 | # "type": "TEASER"
|
---|
193 | # }
|
---|
194 | # ],
|
---|
195 | # "sections": [
|
---|
196 | # {
|
---|
197 | # "collapsed": false,
|
---|
198 | # "content": "He m\u014dteatea t\u0113nei o Ng\u0101ti Ira. Ka rongo ki te te reo waiata o r\u0101tou m\u0101",
|
---|
199 | # "type": "text"
|
---|
200 | # }
|
---|
201 | # ]
|
---|
202 | # },
|
---|
203 | # {
|
---|
204 | # "locale": {
|
---|
205 | # "code": "mi_NZ"
|
---|
206 | # },
|
---|
207 | # "roles": [
|
---|
208 | # {
|
---|
209 | # "code": "DESCRIPTION",
|
---|
210 | # "type": "DESCRIPTION"
|
---|
211 | # }
|
---|
212 | # ],
|
---|
213 | # "sections": [
|
---|
214 | # {
|
---|
215 | # "caption": "Kupu",
|
---|
216 | # "collapsed": false,
|
---|
217 | # "content": "T\u0113r\u0101 te p\u014d pango\nPuritia mai te tautara ki Waitangi r\u0101 e\nKo te rite i taku tarai i k\u012ba mai nei\nTahuri ka Titiwa tonu\nTitiwa mai ana a roto he paua piri ki te toka\nKia m\u0101m\u0101 tonu atu ko te t\u014dnga atu o te r\u0101\nKi \u014dna papaihoretanga ki a Uenuku\nKo te toka i a Parirau ko te Karoro a Tamatea\nMe he ako ia nei te waka nei ka pakaru ki taku tinana\nPenei i reia e au ki te ihu o te tio hakia e\nPeke ana au ki te tai ng\u0101 riu o K\u0101napanapa\nHei kawe i ng\u0101 wai e ahau ki Te Pua-ki-Te-Reinga\nKia hopu nei \u014d akap\u016b te taunga atu ki t\u0101whiti e\nTe pae ki taiao mai\nKo wai ka kite i au e?\nKo te whakam\u0101 hoki rawa t\u0113nei ka t\u014dtope i taku rae e i...",
|
---|
218 | # "type": "text"
|
---|
219 | # },
|
---|
220 | # {
|
---|
221 | # "caption": "Whakam\u0101rama",
|
---|
222 | # "collapsed": false,
|
---|
223 | # "content": "He waiata aroha t\u0113nei. He k\u014drero e p\u0101 ana ki \u0113tahi whenua o Ng\u0101ti Ira, o Waioweka. Ka rite an\u014d te k\u014drero nei m\u014d te rohe whenua o Ng\u0101ti Ira o Waioweka \"Waitangi Tahi ki te raki, Titiwa ki te rawhiti, Waitangi Rua ki te Tonga, Matiti ki te hauauru. He k\u014drero tuku iho m\u014d te rohe whenua o Ng\u0101ti Ira. He k\u014drero an\u014d e whakaatu mai i te mamae me te p\u0101katokato o te iwi nei, o Te Whakat\u014dhea.\n\nKo \u0113tahi kaiwaiata e rangona nei, ko Kaiora Tai, Himiona Kahika, ko Rea Rewiri, ko Heeni Tawhara me ng\u0101 toenga o ng\u0101 tamariki a Iharaira. I te 13 TIhema 1971, katahi ka mutu te Tekau m\u0101 Rua ki runga o Te Rere p\u0101, n\u0101 ki reira ka hopungia e Mervyn McLean i te waiata nei.\nKi t\u0101 te whakam\u0101rama o ng\u0101 kaiwaiata, n\u0101 t\u0113tahi wahine, ko Moa t\u014dna ingoa, i whakar\u0113rea atu i a ia e tana whai\u0101ip\u014d.",
|
---|
224 | # "type": "text"
|
---|
225 | # }
|
---|
226 | # ]
|
---|
227 | # }
|
---|
228 | # ],
|
---|
229 | # "id": 836,
|
---|
230 | # "mediaItems": [
|
---|
231 | # {
|
---|
232 | # "displayOrder": 1000,
|
---|
233 | # "file": {
|
---|
234 | # "contentType": "audio/mp3",
|
---|
235 | # "size": 1838394,
|
---|
236 | # "sourceUri": "https://koicarp.cms.waikato.ac.nz/td/v1/public/files/d9c6c7de-4591-41d3-b079-191f8ce37bd0.mp3",
|
---|
237 | # "uri": "~/assets/audio/content/d9c6c7de_4591_41d3_b079_191f8ce37bd0.mp3"
|
---|
238 | # },
|
---|
239 | # "id": 9760,
|
---|
240 | # "roles": [
|
---|
241 | # {
|
---|
242 | # "code": "READ_ALOUD",
|
---|
243 | # "type": "READ_ALOUD"
|
---|
244 | # }
|
---|
245 | # ]
|
---|
246 | # }
|
---|
247 | # ],
|
---|
248 | # "name": "T\u0113r\u0101 te p\u014d pango",
|
---|
249 | # "type": "WAIATA"
|
---|
250 | # },
|
---|
251 |
|
---|
252 |
|
---|
253 |
|
---|
254 | # do plugin specific processing of doc_obj
|
---|
255 | sub process {
|
---|
256 |
|
---|
257 | my $self = shift (@_);
|
---|
258 | my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
|
---|
259 |
|
---|
260 | my $outhandle = $self->{'outhandle'};
|
---|
261 | my $verbosity = $self->{'verbosity'};
|
---|
262 |
|
---|
263 | my $metadata_exp = $self->{'metadata_exp'};
|
---|
264 | my $metadata_exp_lookup = $self->{'metadata_exp_lookup'};
|
---|
265 |
|
---|
266 | my $cursection = $doc_obj->get_top_section();
|
---|
267 |
|
---|
268 |
|
---|
269 | my $json_rec = JSON::from_json($$textref); # expects unicode string
|
---|
270 |
|
---|
271 | my $json_pretty = JSON->new()->pretty();
|
---|
272 | my $json_unicode_str = $json_pretty->encode($json_rec); # expects unicode string
|
---|
273 |
|
---|
274 | if ($verbosity>=4) {
|
---|
275 |
|
---|
276 | my $json_utf8_printable_str = Encode::encode("utf8",$json_unicode_str);
|
---|
277 | print $outhandle "TippleExportJSONPlugin.pm::processing segment\n";
|
---|
278 | print $outhandle "===========\n";
|
---|
279 | print $outhandle "$json_utf8_printable_str\n";
|
---|
280 | print $outhandle "===========\n";
|
---|
281 |
|
---|
282 | }
|
---|
283 |
|
---|
284 | # tipple name -> Title
|
---|
285 | # tipple type (e.g. genre, writer, composer ) -> Type
|
---|
286 |
|
---|
287 | my $tipple_name = $json_rec->{'name'};
|
---|
288 | my $tipple_type = $json_rec->{'type'};
|
---|
289 |
|
---|
290 | my $tipple_type_formatted = ucfirst(lc($tipple_type));
|
---|
291 |
|
---|
292 | my $is_metadata_name_match = 0;
|
---|
293 | my $gs_metadata_name;
|
---|
294 |
|
---|
295 | if ($metadata_exp eq "") {
|
---|
296 | $is_metadata_name_match = 1;
|
---|
297 | $gs_metadata_name= $tipple_type_formatted;
|
---|
298 | }
|
---|
299 | elsif (defined $metadata_exp_lookup->{$tipple_type}) {
|
---|
300 | $is_metadata_name_match = 1;
|
---|
301 | if (defined $metadata_exp_lookup->{$tipple_type}->{'gs_metadata_name'}) {
|
---|
302 | $gs_metadata_name = $metadata_exp_lookup->{$tipple_type}->{'gs_metadata_name'};
|
---|
303 | }
|
---|
304 | else {
|
---|
305 | $gs_metadata_name= $tipple_type_formatted;
|
---|
306 | }
|
---|
307 | }
|
---|
308 |
|
---|
309 | if ($is_metadata_name_match) {
|
---|
310 |
|
---|
311 | $doc_obj->add_utf8_metadata($cursection, "Title",$tipple_name);
|
---|
312 | $doc_obj->add_utf8_metadata($cursection, "Type", $tipple_type_formatted);
|
---|
313 |
|
---|
314 | # .documents
|
---|
315 | # .locale
|
---|
316 | # .code
|
---|
317 | # .roles
|
---|
318 | # .type
|
---|
319 | # .sections
|
---|
320 | # .caption + .content
|
---|
321 | #
|
---|
322 |
|
---|
323 | my $tipple_documents = $json_rec->{'documents'};
|
---|
324 | foreach my $tipple_document (@$tipple_documents) {
|
---|
325 |
|
---|
326 | # 'documents' in tipple corresponds to 'section of document' in greenstone
|
---|
327 |
|
---|
328 | my $tipple_locale = $tipple_document->{'locale'};
|
---|
329 | my $tipple_roles = $tipple_document->{'roles'};
|
---|
330 | my $tipple_sections = $tipple_document->{'sections'};
|
---|
331 |
|
---|
332 | my $md_name_prefix = $tipple_locale->{'code'};
|
---|
333 | $md_name_prefix .= "_".$tipple_roles->[0]->{'type'} if defined $tipple_roles->[0]->{'type'};
|
---|
334 |
|
---|
335 | foreach my $tipple_section (@$tipple_sections) {
|
---|
336 | my $md_val_caption = $tipple_section->{'caption'};
|
---|
337 | my $md_val_content = $tipple_section->{'content'};
|
---|
338 |
|
---|
339 | if (defined $md_val_caption) {
|
---|
340 | my $md_name_caption = "${md_name_prefix}_caption";
|
---|
341 | $doc_obj->add_utf8_metadata($cursection,$md_name_caption,$md_val_caption);
|
---|
342 | }
|
---|
343 |
|
---|
344 | if (defined $md_val_content) {
|
---|
345 | my $md_name_content = "${md_name_prefix}_content";
|
---|
346 | $doc_obj->add_utf8_metadata($cursection,$md_name_content,$md_val_content);
|
---|
347 | }
|
---|
348 | }
|
---|
349 |
|
---|
350 | }
|
---|
351 |
|
---|
352 | # .mediaItems
|
---|
353 | # .file
|
---|
354 | # .sourceUri
|
---|
355 | # .contentType
|
---|
356 |
|
---|
357 |
|
---|
358 | my $tipple_media_items = $json_rec->{'mediaItems'};
|
---|
359 | foreach my $tipple_media_item (@$tipple_media_items) {
|
---|
360 | my $tipple_file = $tipple_media_item->{'file'};
|
---|
361 | if (defined $tipple_file) {
|
---|
362 |
|
---|
363 | my $tipple_source_uri = $tipple_file->{'sourceUri'};
|
---|
364 | my $tipple_content_type = $tipple_file->{'contentType'};
|
---|
365 |
|
---|
366 | $doc_obj->add_utf8_metadata($cursection,"sourceUri", $tipple_source_uri);
|
---|
367 | $doc_obj->add_utf8_metadata($cursection,"contentType",$tipple_content_type);
|
---|
368 |
|
---|
369 | }
|
---|
370 | }
|
---|
371 | }
|
---|
372 |
|
---|
373 | # $doc_obj->add_utf8_text($cursection, $$textref);
|
---|
374 | $doc_obj->add_utf8_text($cursection, "<pre>\n$json_unicode_str\n</pre>");
|
---|
375 |
|
---|
376 | return 1;
|
---|
377 | }
|
---|
378 |
|
---|
379 |
|
---|
380 |
|
---|
381 | # sub print_error
|
---|
382 | # {
|
---|
383 |
|
---|
384 | # my $self = shift(@_);
|
---|
385 | # my ($outhandle, $failhandle, $gli, $file, $error) = @_;
|
---|
386 |
|
---|
387 | # print $outhandle "TippleExportJSONPlugin Error: $file: $error\n";
|
---|
388 | # print $failhandle "TippleExportJSONPlugin Error: $file: $error\n";
|
---|
389 | # print STDERR "<ProcessingError n='$file' r='$error'/>\n" if ($gli);
|
---|
390 | # }
|
---|
391 |
|
---|
392 | 1;
|
---|