source: main/trunk/greenstone2/perllib/plugins/CSVFieldSeparator.pm@ 37047

Last change on this file since 37047 was 37047, checked in by davidb, 16 months ago

Introduction of 'metadata_separate_fields', a plugin option that controls which fields get the value separation split applied to. By default all fields get split when the value split character is specified, however you can get situations where you want to split on (say) ',' for a Keyword field but not in a Abstract field that happens to use commas

File size: 3.3 KB
Line 
1##########################################################################
2#
3# CSVFieldSeparator -- helper plugin that 'auto' works out what the
4# comma-separated field character is
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 2019 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28package CSVFieldSeparator;
29
30use PrintInfo;
31
32use strict;
33no strict 'refs'; # make an exception so we can use variables as filehandles
34
35BEGIN {
36 @CSVFieldSeparator::ISA = ('PrintInfo');
37}
38
39my $arguments =
40 [
41 { 'name' => "csv_field_separator",
42 'desc' => "{CSVFieldSeparator.csv_field_separator}",
43 'type' => "string",
44 'deft' => "auto",
45 'reqd' => "no" },
46 { 'name' => "metadata_value_separator",
47 'desc' => "{CSVFieldSeparator.metadata_value_separator}",
48 'type' => "string",
49 'deft' => "",
50 'reqd' => "no" },
51 { 'name' => "metadata_separate_fields",
52 'desc' => "{CSVFieldSeparator.metadata_separate_fields}",
53 'type' => "string",
54 'deft' => "",
55 'reqd' => "no" }
56 ];
57
58my $options = { 'name' => "CSVFieldSeparator",
59 'desc' => "{CSVFieldSeparator.desc}",
60 'abstract' => "yes",
61 'inherits' => "yes",
62 'args' => $arguments };
63
64sub new {
65 my ($class) = shift (@_);
66 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
67 push(@$pluginlist, $class);
68
69 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
70 push(@{$hashArgOptLists->{"OptList"}},$options);
71
72 my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists, 1);
73
74 return bless $self, $class;
75
76}
77
78
79sub resolve_auto
80{
81 my ($self) = shift @_;
82 my ($line,$plugin_name) = @_;
83
84 my $outhandle = $self->{'outhandle'};
85 my $verbosity = $self->{'verbosity'};
86
87 # count number of char matches for common separates such as ',' ';' '\t' and '|'
88 my $comma_count = () = ($line =~ m/,/g);
89 my $separate_char = ",";
90 my $max_count = $comma_count;
91
92 my $semicolon_count = () = ($line =~ m/\;/g);
93 if ($semicolon_count > $max_count) {
94 $separate_char = ";";
95 $max_count = $semicolon_count;
96 }
97
98 my $tab_count = () = ($line =~ m/\t/g);
99 if ($tab_count > $max_count) {
100 $separate_char = "\t";
101 $max_count = $tab_count;
102 }
103
104 my $pipe_count = () = ($line =~ m/\|/g);
105 if ($pipe_count > $max_count) {
106 $separate_char = "|";
107 $max_count = $pipe_count;
108 }
109
110 if ($outhandle) {
111 print $outhandle "$plugin_name: Auto selecting '$separate_char' as -separate_char\n" if ($verbosity) > 1;
112 }
113
114 return $separate_char;
115}
116
1171;
Note: See TracBrowser for help on using the repository browser.