source: main/trunk/model-sites-dev/heritage-nz/collect/reports-2019/perllib/plugins/CSVFieldSeparator.pm@ 33020

Last change on this file since 33020 was 33020, checked in by davidb, 5 years ago

Refactored

File size: 3.1 KB
Line 
1##########################################################################
2#
3# CSVFieldSeparator -- helper plugin that 'auto' works out what the
4# comma-separated field character is
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 2019 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28package CSVFieldSeparator;
29
30use PrintInfo;
31
32use strict;
33no strict 'refs'; # make an exception so we can use variables as filehandles
34
35BEGIN {
36 @CSVFieldSeparator::ISA = ('PrintInfo');
37}
38
39my $arguments =
40 [
41 { 'name' => "csv_field_separator",
42 'desc' => "{CSVFieldSeparator.csv_field_separator}",
43 'type' => "string",
44 'deft' => "auto",
45 'reqd' => "no" },
46 { 'name' => "metadata_value_separator",
47 'desc' => "{CSVFieldSeparator.metadata_value_separator}",
48 'type' => "string",
49 'deft' => "",
50 'reqd' => "no" }
51 ];
52
53my $options = { 'name' => "CSVFieldSeparator",
54 'desc' => "{CSVFieldSeparator.desc}",
55 'abstract' => "yes",
56 'inherits' => "yes",
57 'args' => $arguments };
58
59sub new {
60 my ($class) = shift (@_);
61 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
62 push(@$pluginlist, $class);
63
64 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
65 push(@{$hashArgOptLists->{"OptList"}},$options);
66
67 my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists, 1);
68
69 return bless $self, $class;
70
71}
72
73
74sub resolve_auto
75{
76 my ($self) = shift @_;
77 my ($line,$plugin_name) = @_;
78
79 my $outhandle = $self->{'outhandle'};
80 my $verbosity = $self->{'verbosity'};
81
82 # count number of char matches for common separates such as ',' ';' '\t' and '|'
83 my $comma_count = () = ($line =~ m/,/g);
84 my $separate_char = ",";
85 my $max_count = $comma_count;
86
87 my $semicolon_count = () = ($line =~ m/\;/g);
88 if ($semicolon_count > $max_count) {
89 $separate_char = ";";
90 $max_count = $semicolon_count;
91 }
92
93 my $tab_count = () = ($line =~ m/\t/g);
94 if ($tab_count > $max_count) {
95 $separate_char = "\t";
96 $max_count = $tab_count;
97 }
98
99 my $pipe_count = () = ($line =~ m/\|/g);
100 if ($pipe_count > $max_count) {
101 $separate_char = "|";
102 $max_count = $pipe_count;
103 }
104
105 if ($outhandle) {
106 print $outhandle "$plugin_name: Auto selecting '$separate_char' as -separate_char\n" if ($verbosity) > 1;
107 }
108
109 return $separate_char;
110}
111
1121;
Note: See TracBrowser for help on using the repository browser.