source: main/trunk/greenstone2/perllib/plugins/CSVFieldSeparator.pm@ 34249

Last change on this file since 34249 was 34249, checked in by ak19, 4 years ago

Dr Bainbridge in his commit 32810 had expressed that he intended to commit his MetadataCSVPlugin related work for dlheritage to the main GS after the then upcoming GS3 release. His plugin changes support multiple values for a metadata field work and these changes for me in the GS3tutorials collection that uses a metadata.csv file. Like dlheritage, I also use the pipe symbol to separate multiple meta values for a meta field/column. Kathy had made a bugfix to MetadataCSVPlugin since Dr Bainbridge's branched the code off for dlheritage. I will incorporate her bugfix into Dr Bainbridge's work and test things still work and will commit that separately next. Committing from uni machine, as something weird about WMTB VM where I tested these plugin changes and additions: svn committing hasn't been working for a few days now but freezes trying to transmit data.

File size: 3.1 KB
Line 
1##########################################################################
2#
3# CSVFieldSeparator -- helper plugin that 'auto' works out what the
4# comma-separated field character is
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 2019 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28package CSVFieldSeparator;
29
30use PrintInfo;
31
32use strict;
33no strict 'refs'; # make an exception so we can use variables as filehandles
34
35BEGIN {
36 @CSVFieldSeparator::ISA = ('PrintInfo');
37}
38
39my $arguments =
40 [
41 { 'name' => "csv_field_separator",
42 'desc' => "{CSVFieldSeparator.csv_field_separator}",
43 'type' => "string",
44 'deft' => "auto",
45 'reqd' => "no" },
46 { 'name' => "metadata_value_separator",
47 'desc' => "{CSVFieldSeparator.metadata_value_separator}",
48 'type' => "string",
49 'deft' => "",
50 'reqd' => "no" }
51 ];
52
53my $options = { 'name' => "CSVFieldSeparator",
54 'desc' => "{CSVFieldSeparator.desc}",
55 'abstract' => "yes",
56 'inherits' => "yes",
57 'args' => $arguments };
58
59sub new {
60 my ($class) = shift (@_);
61 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
62 push(@$pluginlist, $class);
63
64 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
65 push(@{$hashArgOptLists->{"OptList"}},$options);
66
67 my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists, 1);
68
69 return bless $self, $class;
70
71}
72
73
74sub resolve_auto
75{
76 my ($self) = shift @_;
77 my ($line,$plugin_name) = @_;
78
79 my $outhandle = $self->{'outhandle'};
80 my $verbosity = $self->{'verbosity'};
81
82 # count number of char matches for common separates such as ',' ';' '\t' and '|'
83 my $comma_count = () = ($line =~ m/,/g);
84 my $separate_char = ",";
85 my $max_count = $comma_count;
86
87 my $semicolon_count = () = ($line =~ m/\;/g);
88 if ($semicolon_count > $max_count) {
89 $separate_char = ";";
90 $max_count = $semicolon_count;
91 }
92
93 my $tab_count = () = ($line =~ m/\t/g);
94 if ($tab_count > $max_count) {
95 $separate_char = "\t";
96 $max_count = $tab_count;
97 }
98
99 my $pipe_count = () = ($line =~ m/\|/g);
100 if ($pipe_count > $max_count) {
101 $separate_char = "|";
102 $max_count = $pipe_count;
103 }
104
105 if ($outhandle) {
106 print $outhandle "$plugin_name: Auto selecting '$separate_char' as -separate_char\n" if ($verbosity) > 1;
107 }
108
109 return $separate_char;
110}
111
1121;
Note: See TracBrowser for help on using the repository browser.