source: main/trunk/model-sites-dev/atea/collect/digital-nz/perllib/plugins/DNZJSONPlugin.pm@ 33166

Last change on this file since 33166 was 33166, checked in by davidb, 5 years ago

Collection config files and initial programming work for atea collections

File size: 4.1 KB
Line 
1###########################################################################
2#
3# DNZJSONPlugin.pm -- A plugin for JSON files resulting from downloadin
4# records using the Digital NZ API
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright 2016 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28package DNZJSONPlugin;
29
30
31use BaseImporter;
32use ReadTextFile;
33
34use strict;
35no strict 'refs';
36use multiread;
37
38use Encode;
39use JSON;
40
41# methods with identical signatures take precedence in the order given in the ISA list.
42sub BEGIN {
43# @DNZJSONPlugin::ISA = ('MetadataRead', 'ReadTextFile');
44 @DNZJSONPlugin::ISA = ('ReadTextFile');
45}
46
47
48my $arguments = [
49 { 'name' => "process_exp",
50 'desc' => "{BaseImporter.process_exp}",
51 'type' => "regexp",
52 'reqd' => "no",
53 'deft' => &get_default_process_exp() }
54
55];
56
57
58my $options = { 'name' => "DNZJSONPlugin",
59 'desc' => "{DNZJSONPlugin.desc}",
60 'abstract' => "no",
61 'inherits' => "yes",
62 'args' => $arguments };
63
64
65sub new
66{
67 my ($class) = shift (@_);
68 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
69 push(@$pluginlist, $class);
70
71 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
72 push(@{$hashArgOptLists->{"OptList"}},$options);
73
74 my $self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists);
75
76 return bless $self, $class;
77}
78
79
80sub get_default_process_exp
81{
82 return q^(?i)\.json$^;
83}
84
85sub file_block_read {
86 my $self = shift (@_);
87 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
88
89 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
90
91 if (!-f $filename_full_path || !$self->can_process_this_file($filename_full_path)) {
92 return undef; # can't recognise
93 }
94
95 # set this so we know this is a metadata file - needed for incremental
96 # build
97 # if this file changes, then we need to reimport everything
98 $block_hash->{'metadata_files'}->{$filename_full_path} = 1;
99
100 return 1;
101}
102
103
104
105sub read_file
106{
107 my $self = shift (@_);
108 my ($filename, $encoding, $language, $textref) = @_;
109
110 # Read in file the usual ReadTextFile way
111 # This ensure that $textref is a unicode aware string
112 $self->SUPER::read_file(@_);
113
114 #
115 # Now top-up the processing of the text with what this plugin
116 # needs
117 #
118
119 my $json_metadata = JSON->new->utf8->decode($$textref);
120
121 $self->{'json_metadata'} = $json_metadata;
122}
123
124sub process
125{
126 my $self = shift (@_);
127 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
128 my $outhandle = $self->{'outhandle'};
129
130 my $section = $doc_obj->get_top_section();
131
132 my $json_metadata = $self->{'json_metadata'};
133
134 # Add the raw line as the document text
135 $doc_obj->add_utf8_text($section, $$textref);
136
137 # Process each metadata entry
138 foreach my $k (keys %$json_metadata) {
139 my $md_vals = $json_metadata->{$k};
140 if (ref($md_vals) eq "ARRAY") {
141 foreach my $md_val (@{$md_vals}) {
142 if (defined $md_val && ($md_val ne "")) {
143 $doc_obj->add_utf8_metadata($section, "dnz.$k", $md_val);
144 }
145 }
146 }
147 else {
148 my $md_val = $md_vals;
149 if (defined $md_val && ($md_val ne "")) {
150 $doc_obj->add_utf8_metadata($section, "dnz.$k", $md_val);
151 }
152 }
153
154 }
155
156
157 # Record was processed successfully
158 return 1;
159}
160
161
1621;
Note: See TracBrowser for help on using the repository browser.