root/main/trunk/greenstone2/perllib/gsmysql.pm @ 32640

Revision 32640, 36.5 KB (checked in by ak19, 6 months ago)

Important changes (and commented out debugging statements) to get charset encodings (utf8 or specifically utf8mb4 in perl mysql case) to work with GS SQL Plugs on Windows. First, Strawberry Perl 5.22 was required and hence committed before this since Strawberry Perl 5.18 came with older mysql DBD/DBI packages that didn't decode utf8 content in the database when content was retrieved. Strawberry Perl 5.22 came with newer versions of DBD and DBI that do this automatically, as has been the case in Linux testing where Ubuntu had Perl 5.22 with sufficiently new versions of the DBI/DBD mysql perl packages. The newer Perl and specifically the newer MySQL DBD/DBI packages required some important changes to the gsmysql.pm code in the way charset encoding stuff is configured. This should work on Linux too, as that already allowed 2 ways to configure DB encoding stuff. I chose the single-line version on Linux, no longer supported with the DBI/DBD upgrade that comes with our new Strawberry Perl 5.22, so opting for the 2 line version to setup the DB encoding stuff which works on Windows and should continue to work on Linux too (where I had tested it before settling on the single-line variant).

Line 
1###########################################################################
2#
3# gsmysql.pm -- Uses DBI for MySQL related utility functions used by
4# GreenstoneSQLPlugout and GreenstoneSQLPlugin too.
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 1999 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package gsmysql;
28
29use strict;
30no strict 'refs';
31no strict 'subs';
32
33use DBI; # the central package for this module used by GreenstoneSQL Plugout and Plugin
34use FileUtils;
35use gsprintf;
36
37#################
38# Database functions that use the perl DBI module (with the DBD driver module for mysql)
39# https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm
40# https://metacpan.org/pod/DBD::mysql
41#################
42
43
44# singleton connection
45my $_dbh_instance = undef; # calls undef() function. See https://perlmaven.com/undef-and-defined-in-perl
46my $ref_count = 0;
47
48
49# Add signal handlers to cleanup and disconnect from db on sudden termination, incl cancel build
50# https://www.perl.com/article/37/2013/8/18/Catch-and-Handle-Signals-in-Perl/
51$SIG{INT}  = \&finish_signal_handler;
52$SIG{TERM}  = \&finish_signal_handler;
53$SIG{KILL}  = \&finish_signal_handler;
54
55sub finish_signal_handler {
56    my ($sig) = @_; # one of INT|KILL|TERM
57
58    if ($_dbh_instance) { # database handle (note, using singleton) still active.
59   
60    # If autocommit wasn't set, then this is a cancel operation.
61    # If we've not disconnected from the sql db yet and if we've not committed
62    # transactions yet, then cancel means we do a rollback here
63   
64    if($_dbh_instance->{AutoCommit} == 0) {
65        print STDERR "   User cancelled: rolling back SQL database transaction.\n";
66        $_dbh_instance->rollback(); # will warn on failure, nothing more we can/want to do,
67
68        print STDERR "****************************\n";
69        &gsprintf::gsprintf(STDERR, "{gsmysql.restore_backups_on_build_cancel_msg}\n");
70        print STDERR "****************************\n";
71
72        # TODO?
73        # Since we'll be disconnecting (cancel -> die() -> dbi::DESTROY() -> dbi::disconnect()),
74        # ensure $sth->finish() called on statement handles if any fetch remnants remain
75    }
76    }
77   
78    die "Caught a $sig signal $!"; # die() will always call destructor (sub DESTROY)
79}
80
81
82# Need to pass in to constructor for creating member variables:
83# - collection_name
84# - verbosity
85
86# For connection to MySQL, need:
87#  - db_driver, db_client_user, db_client_pwd, db_host, (db_port not used at present)
88# So these will be parameterised, but in a hashmap, for just the connect() method.
89
90# Parameterise (one or more methods may use them):
91# - db_name (which is the GS3 sitename, or "greenstone2" for GS2)
92
93# Don't need to parameterise the db_encoding (db content encoding)
94# This is for now an internal variable, as the Greenstone db contents are always going to be utf8
95# reflecting how their doc.xml counterparts should only contain utf8.
96#
97# - MySQL can set the desired db_encoding at server, db, table levels.
98# Not sure whether other DBs support it at the same levels.
99# For MySQL we set the enc during connect at server level.
100#
101sub new
102
103    my $class = shift(@_);
104   
105    my ($params_map) = @_;
106   
107   
108    # https://stackoverflow.com/questions/7083453/copying-a-hashref-in-perl
109    # Making a shallow copy works, and can handle unknown params:
110    #my $self = $params_map;
111
112    # but being explicit for class params needed for MySQL:
113    my $self = {
114    'collection_name' => $params_map->{'collection_name'},
115    'verbosity' => $params_map->{'verbosity'} || 1
116    };
117
118    # The db_encoding option is presently not passed in to this constructor as parameter.
119    # Placed here to indicate it's sort of optional.
120    # Since docxml are all in utf8, the contents of the GS SQL database should be too,
121    # So making utf8 the hidden default at present.
122    $self->{'db_encoding'} = $params_map->{'db_encoding'} || "utf8";
123   
124    $self = bless($self, $class);   
125
126    $self->{'tablename_prefix'} = $self->sanitize_name($params_map->{'collection_name'});
127   
128    return $self;
129}
130
131# On die(), an object's destructor is called.
132# See https://www.perl.com/article/37/2013/8/18/Catch-and-Handle-Signals-in-Perl/
133# We want to ensure we've closed the db connection in such cases.
134# "It’s common to call die when handling SIGINT and SIGTERM. die is useful because it will ensure that Perl stops correctly: for example Perl will execute a destructor method if present when die is called, but the destructor method will not be called if a SIGINT or SIGTERM is received and no signal handler calls die."
135#
136# Useful: https://perldoc.perl.org/perlobj.html#Destructors
137# For more on when destroy is called, see https://www.perlmonks.org/?node_id=1020920
138#
139# However, database is automatically disconnected on DBI DESTROY method called by perl on
140# a perl process' termination:
141#
142# https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#disconnect
143# 'Disconnects the database from the database handle. disconnect is typically only used before exiting the program. The handle is of little use after disconnecting.
144#
145# The transaction behaviour of the disconnect method is, sadly, undefined. Some database systems (such as Oracle and Ingres) will automatically commit any outstanding changes, but others (such as Informix) will rollback any outstanding changes. Applications not using AutoCommit should explicitly call commit or rollback before calling disconnect.
146#
147# The database is automatically disconnected by the DESTROY method if still connected when there are no longer any references to the handle. The DESTROY method for each driver should implicitly call rollback to undo any uncommitted changes. This is vital behaviour to ensure that incomplete transactions don't get committed simply because Perl calls DESTROY on every object before exiting. Also, do not rely on the order of object destruction during "global destruction", as it is undefined.
148#
149# Generally, if you want your changes to be committed or rolled back when you disconnect, then you should explicitly call "commit" or "rollback" before disconnecting.
150#
151# If you disconnect from a database while you still have active statement handles (e.g., SELECT statement handles that may have more data to fetch), you will get a warning. The warning may indicate that a fetch loop terminated early, perhaps due to an uncaught error. To avoid the warning call the finish method on the active handles.'
152#
153#
154sub DESTROY {
155    my $self = shift;
156   
157    if (${^GLOBAL_PHASE} eq 'DESTRUCT') {
158
159    if ($_dbh_instance) { # database handle still active. Use singleton handle!
160                          # dbh instance being active implies build was cancelled
161
162        # rollback code has moved to finish_signal_handler() where it belongs
163        # as rollback() should only happen on cancel/unnatural termination
164        # vs commit() happening in finished() before disconnect, which is natural termination.
165
166       
167        # We're now finally ready to disconnect, as is required for both natural and premature termination
168        # (Though natural termination would have disconnected already)
169        # We now leave DBI's own destructor to do the disconnection when perl calls its DESTROY()
170        # We'll just print a message to stop anyone from worrying whether cancelling build
171        # will ensure disconnection still happens. It happens, but silently.
172        print STDERR "   Global Destruct Phase: DBI's own destructor will disconnect database\n";
173
174        # When we did the disconnection manually on premature termination:
175        #$_dbh_instance->disconnect or warn $_dbh_instance->errstr;
176        #$_dbh_instance = undef;
177        #$ref_count = 0;
178    }
179    return;
180    }
181
182    # "Always include a call to $self->SUPER::DESTROY in our destructors (even if we don't yet have any base/parent classes). (p. 145)"
183    # Superclass and destroy, call to SUPER: https://www.perlmonks.org/?node_id=879920
184    # discussion also covers multiple-inheritance (MI)   
185    $self->SUPER::DESTROY if $self->can("SUPER::DESTROY");
186
187}
188
189
190
191################### BASIC DB OPERATIONS ##################
192
193# NOTE: FULLTEXT is a reserved keyword in (My)SQL. So we can't name a table or any of its columns "fulltext".
194# https://dev.mysql.com/doc/refman/5.5/en/keywords.html
195
196
197
198# SINGLETON / GET INSTANCE PATTERN
199# https://stackoverflow.com/questions/16655603/perl-objects-class-variable-initialization
200# https://stackoverflow.com/questions/7587157/how-can-i-set-a-static-variable-that-can-be-accessed-by-all-subclasses-of-the-sa
201# Singleton without Moose: https://www.perl.com/article/52/2013/12/11/Implementing-the-singleton-pattern-in-Perl/
202
203sub connect_to_db
204{
205    my $self= shift (@_);
206    my ($params_map) = @_; # map instead of named vars with an eye on gssql inheritance
207   
208    $params_map->{'db_encoding'} = $self->{'db_encoding'};
209    $params_map->{'verbosity'} = $self->{'verbosity'};
210   
211    $self->{'db_handle'} = &_get_connection_instance($params_map); # getting singleton (class method)
212    if($self->{'db_handle'}) {
213    $ref_count++; # if successful, keep track of the number of refs to the single db connection
214    return $self->{'db_handle'};
215    }
216    return undef;
217}
218
219# SINGLETON METHOD #
220sub _get_connection_instance
221{
222    #my $self= shift (@_); # singleton method doesn't use self, but callers don't need to know that
223    my ($params_map) = @_;   
224   
225   
226    return $_dbh_instance if($_dbh_instance);
227    # or else make the connection, as happens below
228
229
230    # some useful user messages first
231    if(!defined $params_map->{'autocommit'} && $params_map->{'verbosity'}) {
232    print STDERR "  Autocommit parameter not defined\n";
233    }
234    if($params_map->{'autocommit'}) {
235    print STDERR "   SQL DB CANCEL SUPPORT OFF.\n" if($params_map->{'verbosity'} > 2);
236    } else { # rollback on cancel support on   
237    &issue_backup_on_build_message();
238    }   
239
240   
241    # For proper utf8 support in MySQL, encoding should be 'utf8mb4' as 'utf8' is insufficient
242    my $db_enc = ($params_map->{'db_encoding'} eq "utf8") ? "utf8mb4" : $params_map->{'db_encoding'};
243
244    # Params for connecting to MySQL
245    # These params are ensured default/fallback values by the GS SQL Plugs
246    # so no need to set it here
247    my $db_driver = $params_map->{'db_driver'};
248    my $db_host = $params_map->{'db_host'};
249    my $db_user = $params_map->{'db_client_user'};
250
251    # params that can be undef are db_client_pwd and db_port
252    my $db_pwd = $params_map->{'db_client_pwd'}; # even if undef and password was necessary,
253                                     # we'll see a sensible error message when connect fails
254        # localhost doesn't work for us, but 127.0.0.1 works
255        # https://metacpan.org/pod/DBD::mysql
256        # "The hostname, if not specified or specified as '' or 'localhost', will default to a MySQL server
257        # running on the local machine using the default for the UNIX socket. To connect to a MySQL server
258        # on the local machine via TCP, you must specify the loopback IP address (127.0.0.1) as the host."
259    my $db_port = $params_map->{'db_port'}; # leave as undef if unspecified,
260                 # as our tests never used port anyway (must have internally
261                 # defaulted to whatever default port is used for MySQL)
262
263   
264    #my $connect_str = "dbi:$db_driver:database=$db_name;host=$db_host";
265    # But don't provide db now - this allows checking the db exists later when loading the db
266    my $connect_str = "dbi:$db_driver:host=$db_host";
267    $connect_str .= ";port=$db_port" if $db_port;
268
269    if($params_map->{'verbosity'}) {
270    print STDERR "Away to make connection to $db_driver database with:\n";
271    print STDERR " - hostname $db_host; username: $db_user";
272    print STDERR "; and the provided password" if $db_pwd;
273    print STDERR "\nAssuming the mysql server has been started with: --character_set_server=utf8mb4\n" if $db_driver eq "mysql";
274    }
275
276    # DBI AutoCommit connection param is on/1 by default, so if a value for this is not defined
277    # as a method parameter to _get_connection_instance, then fallback to the default of on/1
278    # More: https://www.oreilly.com/library/view/programming-the-perl/1565926994/re44.html
279    my $autocommit = (defined $params_map->{'autocommit'}) ? $params_map->{'autocommit'} : 1;
280
281    # Useful: https://www.effectiveperlprogramming.com/2010/07/set-custom-dbi-error-handlers/
282   
283    my $dbh = DBI->connect("$connect_str", $db_user, $db_pwd,
284               {
285                   ShowErrorStatement => 1, # more informative as DBI will append failed SQL stmt to error message
286                   PrintError => 1, # on by default, but being explicit
287                   RaiseError => 0, # off by default, but being explicit
288                   AutoCommit => $autocommit,
289                   #mysql_enable_utf8mb4 => 1 # Unrecognised in Windows' Strawberry Perl 5.22. On Ubuntu perl 5.22
290                   # if mysql_enable_utf8mb4 was set to 1 *during connect()*, this tells MySQL to use (4 byte) UTF-8 for
291                   # communication AND tells DBD::mysql to use it to decode the data,
292                   # see https://stackoverflow.com/questions/46727362/perl-mysql-utf8mb4-issue-possible-bug
293               });
294
295    if(!$dbh) {
296    # NOTE, despite handle dbh being undefined, error code will be in DBI->err (note caps)
297    return 0;   
298    }
299
300    # set encoding https://metacpan.org/pod/DBD::mysql
301    # https://dev.mysql.com/doc/refman/5.7/en/charset.html
302    # https://dev.mysql.com/doc/refman/5.7/en/charset-conversion.html
303    # Setting the encoding at db server level: $dbh->do("set NAMES '" . $db_enc . "'");
304    # HOWEVER:
305    # It turned out insufficient setting the encoding to utf8, as that only supports utf8 chars that
306    # need up to 3 bytes. We may need up to 4 bytes per utf8 character, e.g. chars with macron,
307    # and for that, we need the encoding to be set to utf8mb4.
308    # To set up a MySQL db to use utf8mb4 requires configuration on the server side too.
309    # https://stackoverflow.com/questions/10957238/incorrect-string-value-when-trying-to-insert-utf-8-into-mysql-via-jdbc
310    # https://stackoverflow.com/questions/46727362/perl-mysql-utf8mb4-issue-possible-bug
311    # To set up the db for utf8mb4, therefore,
312    # the MySQL server needs to be configured for that char encoding by running the server as:
313    # mysql/mysql-5.7.23-linux-glibc2.12-x86_64/bin>./mysqld_safe --datadir=/PATHTO/mysql/data --character_set_server=utf8mb4
314    # AND when connecting to the server, we can can either set mysql_enable_utf8mb4 => 1
315    # as a connection option
316    # OR we need to do both "set NAMES utf8mb4" AND "$dbh->{mysql_enable_utf8mb4} = 1;" after connecting
317    #
318    # Search results for DBI Set Names imply the "SET NAMES '<enc>'" command is mysql specific too,
319    # so setting the mysql specific option during connection above as "mysql_enable_utf8mb4 => 1"
320    # is no more objectionable. It has the advantage of cutting out the 2 extra lines of doing
321    # set NAMES '<enc>' and $dbh->{mysql_enable_utf8mb4} = 1 here.
322    # These lines may be preferred if more db_driver options are to be supported in future?
323    # (see https://www.perlmonks.org/?node_id=259456)
324   
325    # For Strawberry Perl 5.22, need to the following 2 steps here to take care of encoding issues
326    # The 2 steps are: set names utf8mb4; AND setting mysql_enable_utf8mb4 = 1
327    # Doing this in 2 steps here also works on Ubuntu perl 5.22,
328    # but that also supported setting mysql_enable_utf8mb4 to 1 during connect()
329    # to do both these steps in one go
330    my $stmt = "set NAMES '" . $db_enc . "'";
331    $dbh->do($stmt) || warn("Unable to set charset encoding at db server level to: " . $db_enc . "\n"); # tells MySQL to use UTF-8 for communication
332    $dbh->{mysql_enable_utf8mb4} = 1; # tells DBD::mysql to decode the data
333   
334    # if we're here, then connection succeeded, store handle
335    $_dbh_instance = $dbh;
336    return $_dbh_instance;
337   
338}
339
340# Will disconnect if this instance of gsmysql holds the last reference to the db connection
341# If disconnecting and autocommit is off, then this will commit before disconnecting
342sub finished {
343    my $self= shift (@_);
344    my $dbh = $self->{'db_handle'};
345   
346    my $rc = 1; # return code: everything went fine, regardless of whether we needed to commit
347                # (AutoCommit on or off)
348   
349    $ref_count--;
350    if($ref_count == 0) { # Only commit transaction when we're about to actually disconnect, not before
351   
352    # If AutoCommit was off, meaning transactions were on/enabled,
353    # then here is where we commit our one long transaction.
354    # https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#commit
355    if($dbh->{AutoCommit} == 0) {
356        print STDERR "   Committing transaction to SQL database now.\n" if $self->{'verbosity'};
357        $rc = $dbh->commit() or warn("SQL DB COMMIT FAILED: " . $dbh->errstr); # important problem
358        # worth embellishing error message
359    }
360    # else if autocommit was on, then we'd have committed after every db operation, so nothing to do
361   
362    $self->_force_disconnect_from_db();
363    }
364
365    return $rc;
366}
367
368
369# Call this method on die(), so that you're sure the perl process has disconnected from SQL db
370# Disconnect from db - https://metacpan.org/pod/DBI#disconnect
371# Make sure to have committed or rolled back before disconnect
372sub _force_disconnect_from_db {
373    my $self= shift (@_);
374
375    if($_dbh_instance) {
376    # make sure any active stmt handles are finished
377    # NO: "When all the data has been fetched from a SELECT statement, the driver will automatically call finish for you. So you should not call it explicitly except when you know that you've not fetched all the data from a statement handle and the handle won't be destroyed soon."
378   
379    print STDERR "    GSMySQL disconnecting from database\n" if $self->{'verbosity'};
380    # Just go through the singleton db handle to disconnect
381    $_dbh_instance->disconnect or warn $_dbh_instance->errstr;
382    $_dbh_instance = undef;
383    }
384    # Number of gsmysql objects that share a live connection is now 0, as the connection's dead
385    # either because the last gsmysql object finished() or because connection was killed (force)
386    $ref_count = 0;
387}
388
389
390# Loads the designated database, i.e. 'use <dbname>;'.
391# If the database doesn't yet exist, creates it and loads it.
392# (Don't create the collection's tables yet, though)
393# At the end it will have loaded the requested database (in MySQL: "use <db>;") on success.
394# As usual, returns success or failure value that can be evaluated in a boolean context.
395sub use_db {
396    my $self= shift (@_);
397    my ($db_name) = @_;
398    my $dbh = $self->{'db_handle'};
399    $db_name = $self->sanitize_name($db_name);
400   
401    print STDERR "Attempting to use database $db_name\n" if($self->{'verbosity'});
402   
403    # perl DBI switch database: https://www.perlmonks.org/?node_id=995434
404    # do() returns undef on error.
405    # connection succeeded, try to load our database. If that didn't work, attempt to create db
406    my $success = $dbh->do("use $db_name");
407   
408    if(!$success && $dbh->err == 1049) { # "Unknown database" error has code 1049 (mysql only?) meaning db doesn't exist yet
409
410    print STDERR "Database $db_name didn't exist, creating it along with the tables for the current collection...\n" if($self->{'verbosity'});
411   
412    # attempt to create the db and its tables
413    $self->create_db($db_name) || return 0;
414
415    print STDERR "   Created database $db_name\n" if($self->{'verbosity'} > 1);
416   
417    # once more attempt to use db, now that it exists
418    $dbh->do("use $db_name") || return 0;
419    #$dbh->do("use $db_name") or die "Error (code" . $dbh->err ."): " . $dbh->errstr . "\n";
420
421    $success = 1;
422    }
423    elsif($success) { # database existed and loaded successfully, but
424    # before proceeding check that the current collection's tables exist
425
426    print STDERR "@@@ DATABASE $db_name EXISTED\n" if($self->{'verbosity'} > 2);
427    }
428   
429    return $success; # could still return 0, if database failed to load with an error code != 1049
430}
431
432
433# We should already have done "use <database>;" if this gets called.
434# Just load this collection's metatable
435sub ensure_meta_table_exists {
436    my $self = shift (@_);
437   
438    my $tablename = $self->get_metadata_table_name();
439    # if(!$self->table_exists($tablename)) {
440    #   $self->create_metadata_table() || return 0;
441    # } else {
442    #   print STDERR "@@@ Meta table exists\n" if($self->{'verbosity'} > 2);
443    # }
444    $self->create_metadata_table() || return 0; # will now only create it if it doesn't already exist
445    return 1;
446}
447
448# We should already have done "use <database>;" if this gets called.
449# Just load this collection's metatable
450sub ensure_fulltxt_table_exists {
451    my $self = shift (@_);
452   
453    my $tablename = $self->get_fulltext_table_name();   
454    # if(!$self->table_exists($tablename)) {
455    #   $self->create_fulltext_table() || return 0;
456    # } else {
457    #   print STDERR "@@@ Fulltxt table exists\n" if($self->{'verbosity'} > 2);
458    # }
459    $self->create_fulltext_table() || return 0; # will now only create it if it doesn't already exist
460    return 1;
461}
462
463
464sub create_db {
465    my $self= shift (@_);
466    my ($db_name) = @_;
467    my $dbh = $self->{'db_handle'};
468    $db_name = $self->sanitize_name($db_name);
469   
470    # https://stackoverflow.com/questions/5025768/how-can-i-create-a-mysql-database-from-a-perl-script
471    return $dbh->do("create database $db_name"); # do() will return undef on fail, https://metacpan.org/pod/DBI#do
472}
473
474## NOTE: these 2 create_table methods use mysql specific "CREATE TABLE IF NOT EXISTS" syntax
475## vs general SQL CREATE TABLE syntax which would produce an error message if the table
476## already existed
477## And unless do() fails, these two create methods will now always return true,
478## even if table existed and didn't need to be created.
479sub create_metadata_table {
480    my $self= shift (@_);
481    my $dbh = $self->{'db_handle'};
482   
483    my $table_name = $self->get_metadata_table_name();
484    print STDERR "   Will create table $table_name if it doesn't exist\n" if($self->{'verbosity'} > 2);
485   
486    # If using an auto incremented primary key:
487    my $stmt = "CREATE TABLE IF NOT EXISTS $table_name (id INT NOT NULL AUTO_INCREMENT, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, metaname VARCHAR(127) NOT NULL, metavalue VARCHAR(1023) NOT NULL, PRIMARY KEY(id));";
488    return $dbh->do($stmt);
489}
490
491# TODO: Investigate: https://dev.mysql.com/doc/search/?d=10&p=1&q=FULLTEXT
492# 12.9.1 Natural Language Full-Text Searches
493# to see whether we have to index the 'fulltxt' column of the 'fulltext' tables
494# or let user edit this file, or add it as another option
495sub create_fulltext_table {
496    my $self= shift (@_);
497    my $dbh = $self->{'db_handle'};
498   
499    my $table_name = $self->get_fulltext_table_name();
500    print STDERR "   Will create table $table_name if it doesn't exist\n" if($self->{'verbosity'} > 2);
501   
502    # If using an auto incremented primary key:
503    my $stmt = "CREATE TABLE IF NOT EXISTS $table_name (id INT NOT NULL AUTO_INCREMENT, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, fulltxt LONGTEXT, PRIMARY KEY(id));";
504    return $dbh->do($stmt);
505
506}
507
508## NOTE: this method uses mysql specific "DROP TABLE IF EXISTS" syntax vs general SQL DROP TABLE
509## syntax which would produce an error message if the table didn't exist
510sub delete_collection_tables {
511    my $self= shift (@_);
512    my $dbh = $self->{'db_handle'};
513
514    # drop table <tablename>
515    # my $table = $self->get_metadata_table_name();
516    # if($self->table_exists($table)) {
517    #   $dbh->do("drop table $table");
518    # }
519    # $table = $self->get_fulltext_table_name();
520    # if($self->table_exists($table)) {
521    #   $dbh->do("drop table $table");
522    # }
523    my $table = $self->get_metadata_table_name();   
524    $dbh->do("drop table if exists $table");
525
526    $table = $self->get_fulltext_table_name();
527    $dbh->do("drop table if exists $table");
528
529    # If prepared select statement handles already exist, would need to commit here
530    # so that future select statements using those prepared handles work.
531    # See https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#Transactions
532}
533
534# Don't call this: it will delete the meta and full text tables for ALL collections in $db_name (localsite by default)!
535# This method is just here for debugging (for testing creating a database when there is none)
536#
537# "IF EXISTS is used to prevent an error from occurring if the database does not exist. ... DROP DATABASE returns the number of tables that were removed. The DROP DATABASE statement removes from the given database directory those files and directories that MySQL itself may create during normal operation.Jun 20, 2012"
538# MySQL 8.0 Reference Manual :: 13.1.22 DROP DATABASE Syntax
539# https://dev.mysql.com/doc/en/drop-database.html
540sub _delete_database {
541    my $self= shift (@_);
542    my ($db_name) = @_;
543    my $dbh = $self->{'db_handle'};
544    $db_name = $self->sanitize_name($db_name);
545   
546    print STDERR "!!! Deleting database $db_name\n" if($self->{'verbosity'});
547   
548    # "drop database dbname"
549    $dbh->do("drop database $db_name") || return 0;
550
551    return 1;
552}
553
554
555########################### DB STATEMENTS ###########################
556
557# USEFUL: https://metacpan.org/pod/DBI
558# "Many methods have an optional \%attr parameter which can be used to pass information to the driver implementing the method. Except where specifically documented, the \%attr parameter can only be used to pass driver specific hints. In general, you can ignore \%attr parameters or pass it as undef."
559
560# More efficient to use prepare() to prepare an SQL statement once and then execute() it many times
561# (binding different values to placeholders) than running do() which will prepare each time and
562# execute each time. Also, do() is not useful with SQL select statements as it doesn't fetch rows.
563# Can prepare and cache prepared statements or retrieve prepared statements if cached in one step:
564# https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#prepare_cached
565
566# https://www.guru99.com/insert-into.html
567# and https://dev.mysql.com/doc/refman/8.0/en/example-auto-increment.html
568#     for inserting multiple rows at once
569# https://www.perlmonks.org/bare/?node_id=316183
570# https://metacpan.org/pod/DBI#do
571# https://www.quora.com/What-is-the-difference-between-prepare-and-do-statements-in-Perl-while-we-make-a-connection-to-the-database-for-executing-the-query
572# https://docstore.mik.ua/orelly/linux/dbi/ch05_05.htm
573
574# https://metacpan.org/pod/DBI#performance
575# 'The q{...} style quoting used in this example avoids clashing with quotes that may be used in the SQL statement. Use the double-quote like qq{...} operator if you want to interpolate variables into the string. See "Quote and Quote-like Operators" in perlop for more details.'
576#
577# Each insert method uses lazy loading to prepare the SQL insert stmts once for a table and
578# store it, then executes the (stored) statement each time it's needed for that table.
579sub insert_row_into_metadata_table {
580    my $self = shift (@_);
581    my ($doc_oid, $section_name, $meta_name, $escaped_meta_value, $debug_only) = @_;
582   
583    my $dbh = $self->{'db_handle'};
584   
585    my $tablename = $self->get_metadata_table_name();
586    my $sth = $dbh->prepare_cached(qq{INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES (?, ?, ?, ?)});
587
588    # Now we're ready to execute the command, unless we're only debugging
589   
590    if($debug_only) {
591    # just print the statement we were going to execute
592    print STDERR $sth->{'Statement'} . "($doc_oid, $section_name, $meta_name, $escaped_meta_value)\n";
593    }
594    else {
595    print STDERR $sth->{'Statement'} . "($doc_oid, $section_name, $meta_name, $escaped_meta_value)\n" if $self->{'verbosity'} > 2;
596   
597    $sth->execute($doc_oid, $section_name, $meta_name, $escaped_meta_value)
598        || warn ("Unable to write metadata row to db:\n\tOID $doc_oid, section $section_name,\n\tmeta name: $meta_name, val: $escaped_meta_value");
599    # Execution failure will print out info anyway: since db connection sets PrintError
600    }
601}
602
603# As above. Likewise uses lazy loading to prepare the SQL insert stmt once for a table and store it,
604# then execute the (stored) statement each time it's needed for that table.
605sub insert_row_into_fulltxt_table {
606    my $self = shift (@_);
607    #my ($did, $sid, $fulltext) = @_;
608    my ($doc_oid, $section_name, $section_textref, $debug_only) = @_;
609   
610    my $dbh = $self->{'db_handle'};
611   
612    my $tablename = $self->get_fulltext_table_name();
613    my $sth = $dbh->prepare_cached(qq{INSERT INTO $tablename (did, sid, fulltxt) VALUES (?, ?, ?)});
614   
615    # Now we're ready to execute the command, unless we're only debugging
616
617    # don't display the fulltxt value as it could be too long
618    my $txt_repr = $$section_textref ? "<TXT>" : "NULL";   
619    if($debug_only) { # only print statement, don't execute it
620    print STDERR $sth->{'Statement'} . "($doc_oid, $section_name, $txt_repr)\n";
621    }
622    else { 
623    print STDERR $sth->{'Statement'} . "($doc_oid, $section_name, $txt_repr)\n" if $self->{'verbosity'} > 2;
624   
625    $sth->execute($doc_oid, $section_name, $$section_textref)
626        || warn ("Unable to write fulltxt row to db for row:\n\tOID $doc_oid, section $section_name"); # Execution failure will print out info anyway: since db connection sets PrintError
627    }
628}
629
630
631## The 2 select statements used by GreenstoneSQLPlugin
632
633# Using fetchall_arrayref on statement handle, to run on prepared and executed stmt
634#   https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#fetchall_arrayref
635# instead of selectall_arrayref on database handle which will prepare, execute and fetch
636#   https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#selectall_arrayref
637#
638# Returns the resulting records of preparing and executing
639# a "SELECT * FROM <COLL>_metadata WHERE did = $oid" SQL statement.
640# Have to use prepare() and execute() instead of do() since do() does
641# not allow for fetching result set thereafter:
642# do(): "This method  is typically most useful for non-SELECT statements that either cannot be prepared in advance (due to a limitation of the driver) or do not need to be executed repeatedly. It should not be used for SELECT statements because it does not return a statement handle (so you can't fetch any data)." https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#do
643sub select_from_metatable_matching_docid {
644    my $self= shift (@_);
645    my ($oid, $outhandle) = @_;
646   
647    my $dbh = $self->{'db_handle'};
648    my $tablename = $self->get_metadata_table_name();
649   
650    my $sth = $dbh->prepare_cached(qq{SELECT * FROM $tablename WHERE did = ?});
651    $sth->execute( $oid ); # will print msg on fail
652
653    print $outhandle "### SQL select stmt: ".$sth->{'Statement'}."\n"
654    if ($self->{'verbosity'} > 2);
655   
656    my $rows_ref = $sth->fetchall_arrayref();
657    # "If an error occurs, fetchall_arrayref returns the data fetched thus far, which may be none.
658    # You should check $sth->err afterwards (or use the RaiseError attribute) to discover if the
659    # data is complete or was truncated due to an error."
660    # https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#fetchall_arrayref
661    # https://www.oreilly.com/library/view/programming-the-perl/1565926994/ch04s05.html
662    warn("Data fetching from $tablename terminated early by error: " . $dbh->err) if $dbh->err;
663    return $rows_ref;
664}
665
666
667# See select_from_metatable_matching_docid() above.
668# Returns the resulting records from preparing and executing
669# a "SELECT * FROM <COLL>_fulltxt WHERE did = $oid" SQL statement.
670sub select_from_texttable_matching_docid {
671    my $self= shift (@_);
672    my ($oid, $outhandle) = @_;
673   
674    my $dbh = $self->{'db_handle'};
675    my $tablename = $self->get_fulltext_table_name();
676   
677    my $sth = $dbh->prepare_cached(qq{SELECT * FROM $tablename WHERE did = ?});
678    $sth->execute( $oid ); # will print msg on fail
679   
680    print $outhandle "### SQL select stmt: ".$sth->{'Statement'}."\n"
681    if ($self->{'verbosity'} > 2);
682   
683    my $rows_ref = $sth->fetchall_arrayref();
684    # Need explicit warning:
685    warn("Data fetching from $tablename terminated early by error: " . $dbh->err) if $dbh->err;
686    return $rows_ref;
687
688}
689
690# delete all records in metatable with specified docid
691# https://www.tutorialspoint.com/mysql/mysql-delete-query.htm
692# DELETE FROM table_name [WHERE Clause]
693# see example under 'do' at https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm
694sub delete_recs_from_metatable_with_docid {
695    my $self= shift (@_);
696    my ($oid) = @_;
697   
698    my $dbh = $self->{'db_handle'};
699
700    my $tablename = $self->get_metadata_table_name();
701    my $sth = $dbh->prepare_cached(qq{DELETE FROM $tablename WHERE did = ?});
702    $sth->execute( $oid ) or warn $dbh->errstr; # dbh set to print errors even without doing warn()
703}
704
705# delete all records in fulltxt table with the specified docid
706sub delete_recs_from_texttable_with_docid {
707    my $self= shift (@_);
708    my ($oid) = @_;
709   
710    my $dbh = $self->{'db_handle'};
711
712    my $tablename = $self->get_fulltext_table_name();
713    my $sth = $dbh->prepare_cached(qq{DELETE FROM $tablename WHERE did = ?});
714    $sth->execute( $oid ) or warn $dbh->errstr; # dbh set to print errors even without doing warn()
715}
716
717# Can call this after connection succeeded to get the database handle, dbh,
718# if any specific DB operation (SQL statement, create/delete)
719# needs to be executed that is not already provided as a method of this class.
720sub get_db_handle {
721    my $self= shift (@_);
722    return $self->{'db_handle'};
723}
724
725################ HELPER METHODS ##############
726
727# More basic helper methods
728sub get_metadata_table_name {
729    my $self= shift (@_);
730    my $table_name = $self->{'tablename_prefix'} . "_metadata";
731    return $table_name;
732}
733
734# FULLTEXT is a reserved keyword in (My)SQL. https://dev.mysql.com/doc/refman/5.5/en/keywords.html
735# So we can't name a table or any of its columns "fulltext". We use "fulltxt" instead.
736sub get_fulltext_table_name {
737    my $self= shift (@_);
738    my $table_name = $self->{'tablename_prefix'} . "_fulltxt";
739    return $table_name;
740}
741
742# Attempt to make sure the name parameter (for db or table name) is acceptable syntax
743# for the db in question, e.g. for mysql. For example, MySQL doesn't like tables or
744# databases with '-' (hyphens) in their names
745sub sanitize_name {
746    my $self= shift (@_);
747    my ($name) = @_;
748    $name =~ s/-/_/g;
749    return $name;
750}
751
752
753# MySQL has non-standard command to CREATE TABLE IF NOT EXISTS and DROP TABLE IF EXISTS, using that.
754# See https://www.perlmonks.org/bare/?node=DBI%20Recipes
755#    The page further has a table_exists function that could work with proper comparison
756# Couldn't get the first solution at https://www.perlmonks.org/bare/?node_id=500050 to work though
757# I can get my version of table_exists to work, but it's not so ideal
758sub table_exists {
759    my $self = shift (@_);
760    my $dbh = $self->{'db_handle'};
761    my ($table_name) = @_;
762
763    my @table_list = $dbh->tables;
764    #my $tables_str = @table_list[0];
765    foreach my $table (@table_list) {
766    return 1 if ($table =~ m/$table_name/);
767    }
768    return 0;
769}
770
771# regular function, not method
772# Called when rollback_on_cancel is on.
773# Warns they user to make backups of their archives and index dir
774# and sleeps for 5 seconds so they can do that
775sub issue_backup_on_build_message
776{       
777    # warn the user they'll need to backup their archives (and index?) folders
778    # plugout stores archivedir in $self->{'output_dir'}, but not available in plugin
779    # But we're only making an example copy command anyway:
780    my $archivesdir = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, "archives");
781    my $archives_rollbackdir = $archivesdir.".rollback";
782   
783    # Assume user knows what they're doing if a rollback directory already exists
784    # instead of wasting time waiting for sleep to terminate
785    return if FileUtils::directoryExists("$archives_rollbackdir");
786   
787    my $indexdir = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, "index");
788   
789   
790    # use rsync command on unix
791    my $example_copy_cmds = "rsync -pavH $archivesdir $archivesdir.rollback\n";
792    $example_copy_cmds .= "rsync -pavH $indexdir $indexdir.rollback\n";
793   
794    if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin")) {
795    # https://stackoverflow.com/questions/4601161/copying-all-contents-of-folder-to-another-folder-using-batch-file
796    $example_copy_cmds = "xcopy /EVI $archivesdir $archivesdir.rollback\n";
797    $example_copy_cmds .= "xcopy /EVI $indexdir $indexdir.rollback\n";
798   
799    }
800    print STDERR "****************************\n";
801    &gsprintf::gsprintf(STDERR, "{gsmysql.backup_on_build_msg}\n", $example_copy_cmds);
802    print STDERR "****************************\n";     
803    sleep 5; # 5s   
804}
805
8061;
Note: See TracBrowser for help on using the browser.