source: main/trunk/model-sites-dev/heritage-nz/collect/reports-2019/perllib/plugins/CSVFieldSeparator.pm@ 32818

Last change on this file since 32818 was 32818, checked in by davidb, 5 years ago

New option added to allow metadata values within CSV to be split into separate values

File size: 3.1 KB
Line 
1##########################################################################
2#
3# CSVFieldSeparator -- helper plugin that 'auto' works out what the
4# comma-separated field character is
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 2019 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28package CSVFieldSeparator;
29
30use PrintInfo;
31
32use strict;
33no strict 'refs'; # make an exception so we can use variables as filehandles
34
35BEGIN {
36 @CSVFieldSeparator::ISA = ('PrintInfo');
37}
38
39my $arguments =
40 [
41 { 'name' => "csv_field_separator",
42 'desc' => "{CSVFieldSeparator.csv_field_separator}",
43 'type' => "string",
44 'deft' => "auto",
45 'reqd' => "no" },
46 { 'name' => "metadata_value_separator",
47 'desc' => "{CSVFieldSeparator.metadata_value_separator}",
48 'type' => "string",
49 'deft' => "",
50 'reqd' => "no" }
51 ];
52
53my $options = { 'name' => "CSVFieldSeparator",
54 'desc' => "{CSVFieldSeparator.desc}",
55 'abstract' => "yes",
56 'inherits' => "yes",
57 'args' => $arguments };
58
59sub new {
60 my ($class) = shift (@_);
61 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
62 push(@$pluginlist, $class);
63
64 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
65 push(@{$hashArgOptLists->{"OptList"}},$options);
66
67 my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists, 1);
68
69 return bless $self, $class;
70
71}
72
73
74sub resolve_auto
75{
76 my ($self) = shift @_;
77 my ($line,$plugin_name,$outhandle,$verbosity) = @_;
78
79
80 # count number of char matches for common separates such as ',' ';' '\t' and '|'
81 my $comma_count = () = ($line =~ m/,/g);
82 my $separate_char = ",";
83 my $max_count = $comma_count;
84
85 my $semicolon_count = () = ($line =~ m/\;/g);
86 if ($semicolon_count > $max_count) {
87 $separate_char = ";";
88 $max_count = $semicolon_count;
89 }
90
91 my $tab_count = () = ($line =~ m/\t/g);
92 if ($tab_count > $max_count) {
93 $separate_char = "\t";
94 $max_count = $tab_count;
95 }
96
97 my $pipe_count = () = ($line =~ m/\|/g);
98 if ($pipe_count > $max_count) {
99 $separate_char = "|";
100 $max_count = $pipe_count;
101 }
102
103 if ($outhandle) {
104 print $outhandle "$plugin_name: Auto selecting '$separate_char' as -separate_char\n" if ($verbosity) > 1;
105 }
106
107 return $separate_char;
108}
109
1101;
Note: See TracBrowser for help on using the repository browser.