source: main/trunk/greenstone2/perllib/gssql.pm@ 32583

Last change on this file since 32583 was 32583, checked in by ak19, 5 years ago
  1. Some tidying up of the code. 2. Removing unnecessary calls to force_disconnect_from_db (now made 'private') after die() since on die() perl calls object destructor which will ensure disconnection anyway.
File size: 34.2 KB
Line 
1###########################################################################
2#
3# gssql.pm -- DBI for SQL related utility functions used by
4# GreenstoneSQLPlugout and GreenstoneSQLPlugin too.
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 1999 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package gssql;
28
29use strict;
30no strict 'refs';
31no strict 'subs';
32
33use DBI; # the central package for this module used by GreenstoneSQL Plugout and Plugin
34
35
36##############################
37
38# TODO: add infrastructure for db_port, AutoCommit etc
39# For port, see https://stackoverflow.com/questions/2248665/perl-script-to-connect-to-mysql-server-port-3307
40
41# + TODO: remove unnecessary warn() since PrintError is active
42# https://perldoc.perl.org/perlobj.html#Destructors
43
44# TODO: drop table if exists and create table if exists are available in MySQL. Use those cmds
45# instead of always first checking for existence ourselves?
46##############################
47
48# singleton connection
49my $_dbh_instance = undef; # calls undef() function. See https://perlmaven.com/undef-and-defined-in-perl
50my $ref_count = 0;
51
52# Need params_map keys:
53# - collection_name
54# - db_encoding (db content encoding) - MySQL can set this at server, db, table levels. For MySQL
55# we set the enc during connect at server level. Not sure whether other DB's support it at the
56# same levels.
57
58# For connection to MySQL, need:
59# - db_driver, db_client_user, db_client_pwd, db_host, (db_port not used at present)
60# So these will be parameterised, but in a hashmap, for just the connect method.
61
62# Parameterise (one or more methods may use them):
63# - db_name (which is the GS3 sitename, or "greenstone2" for GS2)
64
65
66
67$SIG{INT} = \&finish_signal_handler;
68$SIG{TERM} = \&finish_signal_handler;
69$SIG{KILL} = \&finish_signal_handler;
70
71sub finish_signal_handler {
72 my ($sig) = @_; # one of INT|KILL|TERM
73
74 if ($_dbh_instance) { # database handle (note, using singleton) still active.
75
76 # TODO: If autocommit wasn't set, then this is a cancel operation.
77 # If we've not disconnected from the sql db yet and if we've not committed
78 # transactions yet, then cancel means we do a rollback here
79
80 if($_dbh_instance->{AutoCommit} == 0) {
81 print STDERR " User cancelled: rolling back SQL database transaction.\n";
82 $_dbh_instance->rollback(); # will warn on failure, nothing more we can/want to do,
83 }
84 }
85
86 die "Caught a $sig signal $!"; # die() will always call destructor (sub DESTROY)
87}
88
89sub new
90{
91 my $class = shift(@_);
92
93 my ($params_map) = @_;
94
95 # library_url: to be specified on the cmdline if not using a GS-included web server
96 # the GSDL_LIBRARY_URL env var is useful when running cmdline buildcol.pl in the linux package manager versions of GS3
97
98 # https://stackoverflow.com/questions/7083453/copying-a-hashref-in-perl
99 # Making a shallow copy works, and can handle unknown params:
100 #my $self = $params_map;
101
102 # but being explicit for class params needed for MySQL:
103 my $self = {
104 'collection_name' => $params_map->{'collection_name'},
105 'verbosity' => $params_map->{'verbosity'} || 1
106 };
107
108 # The db_encoding option is presently not passed in to this constructor as parameter.
109 # Placed here to indicate it's sort of optional.
110 # Since docxml are all in utf8, the contents of the GS SQL database should be too,
111 # So making utf8 the hidden default at present.
112 $self->{'db_encoding'} = $params_map->{'db_encoding'} || "utf8";
113
114 $self = bless($self, $class);
115
116 $self->{'tablename_prefix'} = $self->sanitize_name($params_map->{'collection_name'});
117
118 return $self;
119}
120
121# On die(), an object's destructor is called.
122# See https://www.perl.com/article/37/2013/8/18/Catch-and-Handle-Signals-in-Perl/
123# We want to ensure we've closed the db connection in such cases.
124# "It’s common to call die when handling SIGINT and SIGTERM. die is useful because it will ensure that Perl stops correctly: for example Perl will execute a destructor method if present when die is called, but the destructor method will not be called if a SIGINT or SIGTERM is received and no signal handler calls die."
125#
126# Useful: https://perldoc.perl.org/perlobj.html#Destructors
127#
128# https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#disconnect
129# "Disconnects the database from the database handle. disconnect is typically only used before exitin# g the program. The handle is of little use after disconnecting.
130#
131# The transaction behaviour of the disconnect method is, sadly, undefined. Some database systems (such as Oracle and Ingres) will automatically commit any outstanding changes, but others (such as Informix) will rollback any outstanding changes. Applications not using AutoCommit should explicitly call commit or rollback before calling disconnect.
132#
133# The database is automatically disconnected by the DESTROY method if still connected when there are no longer any references to the handle. The DESTROY method for each driver should implicitly call rollback to undo any uncommitted changes. This is vital behaviour to ensure that incomplete transactions don't get committed simply because Perl calls DESTROY on every object before exiting. Also, do not rely on the order of object destruction during "global destruction", as it is undefined.
134#
135# Generally, if you want your changes to be committed or rolled back when you disconnect, then you should explicitly call "commit" or "rollback" before disconnecting.
136#
137# If you disconnect from a database while you still have active statement handles (e.g., SELECT statement handles that may have more data to fetch), you will get a warning. The warning may indicate that a fetch loop terminated early, perhaps due to an uncaught error. To avoid the warning call the finish method on the active handles."
138#
139sub DESTROY {
140 my $self = shift;
141
142 if (${^GLOBAL_PHASE} eq 'DESTRUCT') {
143
144 if ($_dbh_instance) { # database handle still active. Use singleton handle!
145
146 # rollback code has moved to finish_signal_handler() where it belongs?
147
148 # NOTE: if RaiseError is set on dbi connection, then on any error, perl process will die()
149 # which will end up calling this DESTROY. If it was a die() that called DESTROY
150 # then need to rollback the db here. However, if it was not a die() but natural termination
151 # of the perl process, destroy() will also get called. In that case we don't want to rollback
152 # but do a commit() to the DB instead.
153 # Perhaps detecting the difference may be accomplished by checking ref_count:
154 # - If ref_count not 0 it may require a rollback?
155 # - If ref_count 0 it may be a natural termination and require a commit? Except that ref_count
156 # is set back to 0 in finished(), which will do the commit when ref_count becomes 0. So shouldn't
157 # (have to) do that here.
158
159 # We're now finally ready to disconnect, as is required for both natural and premature termination
160 print STDERR "XXXXXXXX Global Destruct: Disconnecting from database\n";
161 $_dbh_instance->disconnect or warn $_dbh_instance->errstr;
162 $_dbh_instance = undef;
163 $ref_count = 0;
164 }
165 return;
166 }
167
168}
169
170#################################
171
172# Database access related functions
173# http://g2pc1.bu.edu/~qzpeng/manual/MySQL%20Commands.htm
174# https://www.guru99.com/insert-into.html
175
176# TODO Q: What on cancelling a build: delete table? But what if it was a rebuild and the rebuild is cancelled (not the original build)?
177# Do we create a copy of the orig database as backup, then start populating current db, and if cancelled, delete current db and RENAME backup table to current?
178# https://stackoverflow.com/questions/3280006/duplicating-a-mysql-table-indexes-and-data
179# BUT what if the table is HUGE? (Think of a collection with millions of docs.) Huge overhead in copying?
180# The alternative is we just quit on cancel, but then: cancel could leave the table in a partial committed state, with no way of rolling back.
181# Unless they do a full rebuild, which will recreate the table from scratch?
182# SOLUTION-> rollback transaction on error, see https://www.effectiveperlprogramming.com/2010/07/set-custom-dbi-error-handlers/
183# But then should set AutoCommit to off on connection, and remember to commit every time
184
185#################
186# Database functions that use the perl DBI module (with the DBD driver module for mysql)
187#################
188
189################### BASIC DB OPERATIONS ##################
190
191# THE NEW DB FUNCTIONS
192# NOTE: FULLTEXT is a reserved keyword in (My)SQL. So we can't name a table or any of its columns "fulltext".
193# https://dev.mysql.com/doc/refman/5.5/en/keywords.html
194
195# TODO: Consider AutoCommit status (and Autocommit off allowing commit or rollback for GS coll build cancel) later
196
197
198
199# SINGLETON / GET INSTANCE PATTERN
200# https://stackoverflow.com/questions/16655603/perl-objects-class-variable-initialization
201# https://stackoverflow.com/questions/7587157/how-can-i-set-a-static-variable-that-can-be-accessed-by-all-subclasses-of-the-sa
202# Singleton without Moose: https://www.perl.com/article/52/2013/12/11/Implementing-the-singleton-pattern-in-Perl/
203
204sub connect_to_db
205{
206 my $self= shift (@_);
207 my ($params_map) = @_;
208
209 $params_map->{'db_encoding'} = $self->{'db_encoding'};
210 $params_map->{'verbosity'} = $self->{'verbosity'};
211
212 $self->{'db_handle'} = &_get_connection_instance($params_map); # getting singleton (class method)
213 if($self->{'db_handle'}) {
214 $ref_count++; # if successful, keep track of the number of refs to the single db connection
215 return $self->{'db_handle'};
216 }
217 return undef;
218}
219
220# SINGLETON METHOD #
221# TODO: where should the defaults for these params be, here or in GS-SQLPlugin/Plugout?
222sub _get_connection_instance
223{
224 #my $self= shift (@_); # singleton method doesn't use self, but callers don't need to know that
225 my ($params_map) = @_;
226
227 if($params_map->{'verbosity'}) {
228 if(!defined $params_map->{'autocommit'}) {
229 print STDERR " Autocommit parameter not defined\n";
230 }
231 if($params_map->{'autocommit'}) {
232 print STDERR " SQL DB CANCEL SUPPORT OFF.\n";
233 } else {
234 print STDERR " SQL DB CANCEL SUPPORT ON.\n";
235 }
236 }
237
238 return $_dbh_instance if($_dbh_instance);
239
240 # or make the connection
241
242 # For proper utf8 support in MySQL, encoding should be 'utf8mb4' as 'utf8' is insufficient
243 my $db_enc = "utf8mb4" if $params_map->{'db_encoding'} eq "utf8";
244
245 # these are the params for connecting to MySQL
246 my $db_driver = $params_map->{'db_driver'} || "mysql";
247 my $db_user = $params_map->{'db_client_user'} || "root";
248 my $db_pwd = $params_map->{'db_client_pwd'}; # even if undef and password was necessary,
249 # we'll see a sensible error message when connect fails
250 my $db_host = $params_map->{'db_host'} || "127.0.0.1";
251 # localhost doesn't work for us, but 127.0.0.1 works
252 # https://metacpan.org/pod/DBD::mysql
253 # "The hostname, if not specified or specified as '' or 'localhost', will default to a MySQL server
254 # running on the local machine using the default for the UNIX socket. To connect to a MySQL server
255 # on the local machine via TCP, you must specify the loopback IP address (127.0.0.1) as the host."
256 my $db_port = $params_map->{'db_port'}; # leave as undef if unspecified,
257 # as our tests never used port anyway (must have internally
258 # defaulted to whatever default port is used for MySQL)
259
260
261 #my $connect_str = "dbi:$db_driver:database=$db_name;host=$db_host";
262 # But don't provide db now - this allows checking the db exists later when loading the db
263 my $connect_str = "dbi:$db_driver:host=$db_host";
264 $connect_str .= ";port=$db_port" if $db_port;
265
266 if($params_map->{'verbosity'}) {
267 print STDERR "Away to make connection to $db_driver database with:\n";
268 print STDERR " - hostname $db_host; username: $db_user";
269 print STDERR "; and the provided password" if $db_pwd;
270 print STDERR "\nAssuming the mysql server has been started with: --character_set_server=utf8mb4\n" if $db_driver eq "mysql";
271 }
272
273 # DBI AutoCommit connection param is on/1 by default, so if a value for this is not defined
274 # as a method parameter to _get_connection_instance, then fallback to the default of on/1
275 my $autocommit = (defined $params_map->{'autocommit'}) ? $params_map->{'autocommit'} : 1;
276
277 my $dbh = DBI->connect("$connect_str", $db_user, $db_pwd,
278 {
279 ShowErrorStatement => 1, # more informative as DBI will append failed SQL stmt to error message
280 PrintError => 1, # on by default, but being explicit
281 RaiseError => 0, # off by default, but being explicit
282 AutoCommit => $autocommit,
283 mysql_enable_utf8mb4 => 1 # tells MySQL to use UTF-8 for communication and tells DBD::mysql to decode the data, see https://stackoverflow.com/questions/46727362/perl-mysql-utf8mb4-issue-possible-bug
284 });
285
286 if(!$dbh) {
287 # NOTE, despite handle dbh being undefined, error code will be in DBI->err (note caps)
288 return 0;
289 }
290
291 # set encoding https://metacpan.org/pod/DBD::mysql
292 # https://dev.mysql.com/doc/refman/5.7/en/charset.html
293 # https://dev.mysql.com/doc/refman/5.7/en/charset-conversion.html
294 # Setting the encoding at db server level: $dbh->do("set NAMES '" . $db_enc . "'");
295 # HOWEVER:
296 # It turned out insufficient setting the encoding to utf8, as that only supports utf8 chars that
297 # need up to 3 bytes. We may need up to 4 bytes per utf8 character, e.g. chars with macron,
298 # and for that, we need the encoding to be set to utf8mb4.
299 # To set up a MySQL db to use utf8mb4 requires configuration on the server side too.
300 # https://stackoverflow.com/questions/10957238/incorrect-string-value-when-trying-to-insert-utf-8-into-mysql-via-jdbc
301 # https://stackoverflow.com/questions/46727362/perl-mysql-utf8mb4-issue-possible-bug
302 # To set up the db for utf8mb4, therefore,
303 # the MySQL server needs to be configured for that char encoding by running the server as:
304 # mysql-5.7.23-linux-glibc2.12-x86_64/bin>./mysqld_safe --datadir=/Scratch/ak19/mysql/data --character_set_server=utf8mb4
305 # AND when connecting to the server, we can can either set mysql_enable_utf8mb4 => 1
306 # as a connection option
307 # OR we need to do both "set NAMES utf8mb4" AND "$dbh->{mysql_enable_utf8mb4} = 1;" after connecting
308 #
309 # Search results for DBI Set Names imply the "SET NAMES '<enc>'" command is mysql specific too,
310 # so setting the mysql specific option during connection above as "mysql_enable_utf8mb4 => 1"
311 # is no more objectionable. It has the advantage of cutting out the 2 extra lines of doing
312 # set NAMES '<enc>' and $dbh->{mysql_enable_utf8mb4} = 1 here.
313 # These lines may be preferred if more db_driver options are to be supported in future:
314 # then a separate method called set_db_encoding($enc) can work out what db_driver we're using
315 # and if mysql and enc=utfy, then it can do the following whereas it will issue other do stmts
316 # for other db_drivers, see https://www.perlmonks.org/?node_id=259456:
317
318 #my $stmt = "set NAMES '" . $db_enc . "'";
319 #$dbh->do($stmt) || warn("Unable to set charset encoding at db server level to: " . $db_enc . "\n"); # tells MySQL to use UTF-8 for communication
320 #$dbh->{mysql_enable_utf8mb4} = 1; # tells DBD::mysql to decode the data
321
322 # if we're here, then connection succeeded, store handle
323 $_dbh_instance = $dbh;
324 return $_dbh_instance;
325
326}
327
328# Will disconnect if this instance of gssql holds the last reference to the db connection
329# If disconnecting and autocommit is off, then this will commit before disconnecting
330sub finished {
331 my $self= shift (@_);
332 my $dbh = $self->{'db_handle'};
333
334 my $rc = 1; # return code: everything went fine, regardless of whether we needed to commit
335 # (AutoCommit on or off)
336
337 $ref_count--;
338 if($ref_count == 0) { # Only commit transaction when we're about to actually disconnect, not before
339
340 # TODO: If AutoCommit was off, meaning transactions were on/enabled,
341 # then here is where we commit our one long transaction.
342 # https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#commit
343 if($dbh->{AutoCommit} == 0) {
344 print STDERR " Committing transaction to SQL database now.\n" if $self->{'verbosity'};
345 $rc = $dbh->commit() or warn("SQL DB COMMIT FAILED: " . $dbh->errstr); # important problem
346 # worth embellishing error message
347 }
348 # else if autocommit was on, then we'd have committed after every db operation, so nothing to do
349
350 $self->_force_disconnect_from_db();
351 }
352
353 return $rc;
354}
355
356
357# Call this method on die(), so that you're sure the perl process has disconnected from SQL db
358# Disconnect from db - https://metacpan.org/pod/DBI#disconnect
359# TODO: make sure to have committed or rolled back before disconnect
360# and that you've call finish() on statement handles if any fetch remnants remain
361sub _force_disconnect_from_db {
362 my $self= shift (@_);
363
364 if($_dbh_instance) {
365 # make sure any active stmt handles are finished
366 # NO: "When all the data has been fetched from a SELECT statement, the driver will automatically call finish for you. So you should not call it explicitly except when you know that you've not fetched all the data from a statement handle and the handle won't be destroyed soon."
367
368 print STDERR " GSSQL disconnecting from database\n";
369 # Just go through the singleton db handle to disconnect
370 $_dbh_instance->disconnect or warn $_dbh_instance->errstr;
371 $_dbh_instance = undef;
372 }
373 # Number of gssql objects that share a live connection is now 0, as the connection's dead
374 # either because the last gssql object finished() or because connection was killed (force)
375 $ref_count = 0;
376}
377
378
379# Load the designated database, i.e. 'use <dbname>;'.
380# If the database doesn't yet exist, creates it and loads it.
381# (Don't create the collection's tables yet, though)
382# At the end it will have loaded the requested database (in MySQL: "use <db>;") on success.
383# As usual, returns success or failure value that can be evaluated in a boolean context.
384sub use_db {
385 my $self= shift (@_);
386 my ($db_name) = @_;
387 my $dbh = $self->{'db_handle'};
388 $db_name = $self->sanitize_name($db_name);
389
390 print STDERR "Attempting to use database $db_name\n" if($self->{'verbosity'});
391
392 # perl DBI switch database: https://www.perlmonks.org/?node_id=995434
393 # do() returns undef on error.
394 # connection succeeded, try to load our database. If that didn't work, attempt to create db
395 my $success = $dbh->do("use $db_name");
396
397 if(!$success && $dbh->err == 1049) { # "Unknown database" error has code 1049 (mysql only?) meaning db doesn't exist yet
398
399 print STDERR "Database $db_name didn't exist, creating it along with the tables for the current collection...\n" if($self->{'verbosity'});
400
401 # attempt to create the db and its tables
402 $self->create_db($db_name) || return 0;
403
404 print STDERR " Created database $db_name\n" if($self->{'verbosity'} > 1);
405
406 # once more attempt to use db, now that it exists
407 $dbh->do("use $db_name") || return 0;
408 #$dbh->do("use $db_name") or die "Error (code" . $dbh->err ."): " . $dbh->errstr . "\n";
409
410 $success = 1;
411 }
412 elsif($success) { # database existed and loaded successfully, but
413 # before proceeding check that the current collection's tables exist
414
415 print STDERR "@@@ DATABASE $db_name EXISTED\n" if($self->{'verbosity'} > 2);
416 }
417
418 return $success; # could still return 0, if database failed to load with an error code != 1049
419}
420
421
422# We should already have done "use <database>;" if this gets called.
423# Just load this collection's metatable
424sub ensure_meta_table_exists {
425 my $self = shift (@_);
426
427 my $tablename = $self->get_metadata_table_name();
428 if(!$self->table_exists($tablename)) {
429 #print STDERR " Creating metadata table $tablename\n" if($self->{'verbosity'} > 1);
430 $self->create_metadata_table() || return 0;
431 } else {
432 print STDERR "@@@ Meta table exists\n" if($self->{'verbosity'} > 2);
433 }
434 return 1;
435}
436
437# We should already have done "use <database>;" if this gets called.
438# Just load this collection's metatable
439sub ensure_fulltxt_table_exists {
440 my $self = shift (@_);
441
442 my $tablename = $self->get_fulltext_table_name();
443 if(!$self->table_exists($tablename)) {
444 #print STDERR " Creating fulltxt table $tablename\n" if($self->{'verbosity'} > 1);
445 $self->create_fulltext_table() || return 0;
446 } else {
447 print STDERR "@@@ Fulltxt table exists\n" if($self->{'verbosity'} > 2);
448 }
449 return 1;
450}
451
452
453sub create_db {
454 my $self= shift (@_);
455 my ($db_name) = @_;
456 my $dbh = $self->{'db_handle'};
457 $db_name = $self->sanitize_name($db_name);
458
459 # https://stackoverflow.com/questions/5025768/how-can-i-create-a-mysql-database-from-a-perl-script
460 return $dbh->do("create database $db_name"); # do() will return undef on fail, https://metacpan.org/pod/DBI#do
461}
462
463
464sub create_metadata_table {
465 my $self= shift (@_);
466 my $dbh = $self->{'db_handle'};
467
468 my $table_name = $self->get_metadata_table_name();
469 print STDERR " Creating table $table_name\n" if($self->{'verbosity'} > 1);
470
471 # If using an auto incremented primary key:
472 my $stmt = "CREATE TABLE $table_name (id INT NOT NULL AUTO_INCREMENT, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, metaname VARCHAR(127) NOT NULL, metavalue VARCHAR(1023) NOT NULL, PRIMARY KEY(id));";
473 return $dbh->do($stmt);
474}
475
476# TODO: Investigate: https://dev.mysql.com/doc/search/?d=10&p=1&q=FULLTEXT
477# 12.9.1 Natural Language Full-Text Searches
478# to see whether we have to index the 'fulltxt' column of the 'fulltext' tables
479# or let user edit this file, or add it as another option
480sub create_fulltext_table {
481 my $self= shift (@_);
482 my $dbh = $self->{'db_handle'};
483
484 my $table_name = $self->get_fulltext_table_name();
485 print STDERR " Creating table $table_name\n" if($self->{'verbosity'} > 1);
486
487 # If using an auto incremented primary key:
488 my $stmt = "CREATE TABLE $table_name (id INT NOT NULL AUTO_INCREMENT, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, fulltxt LONGTEXT, PRIMARY KEY(id));";
489 return $dbh->do($stmt);
490
491}
492
493
494sub delete_collection_tables {
495 my $self= shift (@_);
496 my $dbh = $self->{'db_handle'};
497
498 # drop table <tablename>
499 my $table = $self->get_metadata_table_name();
500 if($self->table_exists($table)) {
501 $dbh->do("drop table $table");# || warn("@@@ Couldn't delete $table");
502 }
503 $table = $self->get_fulltext_table_name();
504 if($self->table_exists($table)) {
505 $dbh->do("drop table $table");# || warn("@@@ Couldn't delete $table");
506 }
507
508 # TODO Q: commit here, so that future select statements work?
509 # See https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#Transactions
510}
511
512# Don't call this: it will delete the meta and full text tables for ALL collections in $db_name (localsite by default)!
513# This method is just here for debugging (for testing creating a database when there is none)
514#
515# "IF EXISTS is used to prevent an error from occurring if the database does not exist. ... DROP DATABASE returns the number of tables that were removed. The DROP DATABASE statement removes from the given database directory those files and directories that MySQL itself may create during normal operation.Jun 20, 2012"
516# MySQL 8.0 Reference Manual :: 13.1.22 DROP DATABASE Syntax
517# https://dev.mysql.com/doc/en/drop-database.html
518sub _delete_database {
519 my $self= shift (@_);
520 my ($db_name) = @_;
521 my $dbh = $self->{'db_handle'};
522 $db_name = $self->sanitize_name($db_name);
523
524 print STDERR "!!! Deleting database $db_name\n" if($self->{'verbosity'});
525
526 # "drop database dbname"
527 $dbh->do("drop database $db_name") || return 0;
528
529 return 1;
530}
531
532
533########################### DB STATEMENTS ###########################
534
535# USEFUL: https://metacpan.org/pod/DBI
536# "Many methods have an optional \%attr parameter which can be used to pass information to the driver implementing the method. Except where specifically documented, the \%attr parameter can only be used to pass driver specific hints. In general, you can ignore \%attr parameters or pass it as undef."
537
538# More efficient to use prepare() to prepare an SQL statement once and then execute() it many times
539# (binding different values to placeholders) than running do() which will prepare each time and
540# execute each time. Also, do() is not useful with SQL select statements as it doesn't fetch rows.
541# Can prepare and cache prepared statements or retrieve prepared statements if cached in one step:
542# https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#prepare_cached
543
544# https://www.guru99.com/insert-into.html
545# and https://dev.mysql.com/doc/refman/8.0/en/example-auto-increment.html
546# for inserting multiple rows at once
547# https://www.perlmonks.org/bare/?node_id=316183
548# https://metacpan.org/pod/DBI#do
549# https://www.quora.com/What-is-the-difference-between-prepare-and-do-statements-in-Perl-while-we-make-a-connection-to-the-database-for-executing-the-query
550# https://docstore.mik.ua/orelly/linux/dbi/ch05_05.htm
551
552# https://metacpan.org/pod/DBI#performance
553# 'The q{...} style quoting used in this example avoids clashing with quotes that may be used in the SQL statement. Use the double-quote like qq{...} operator if you want to interpolate variables into the string. See "Quote and Quote-like Operators" in perlop for more details.'
554#
555# This method uses lazy loading to prepare the SQL insert stmt once for a table and store it,
556# then execute the (stored) statement each time it's needed for that table.
557sub insert_row_into_metadata_table {
558 my $self = shift (@_);
559 my ($doc_oid, $section_name, $meta_name, $escaped_meta_value, $debug_only) = @_;
560
561 my $dbh = $self->{'db_handle'};
562
563 my $tablename = $self->get_metadata_table_name();
564 my $sth = $dbh->prepare_cached(qq{INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES (?, ?, ?, ?)});# || warn("Could not prepare insert statement for metadata table\n");
565
566 # Now we're ready to execute the command, unless we're only debugging
567
568 if($debug_only) {
569 # just print the statement we were going to execute
570 print STDERR $sth->{'Statement'} . "($doc_oid, $section_name, $meta_name, $escaped_meta_value)\n";
571 }
572 else {
573 print STDERR $sth->{'Statement'} . "($doc_oid, $section_name, $meta_name, $escaped_meta_value)\n" if $self->{'verbosity'} > 2;
574
575 $sth->execute($doc_oid, $section_name, $meta_name, $escaped_meta_value)
576 || warn ("Unable to write metadata row to db:\n\tOID $doc_oid, section $section_name,\n\tmeta name: $meta_name, val: $escaped_meta_value");
577 # Execution failure will print out info anyway: since db connection sets PrintError
578 }
579}
580
581# As above. Likewise uses lazy loading to prepare the SQL insert stmt once for a table and store it,
582# then execute the (stored) statement each time it's needed for that table.
583sub insert_row_into_fulltxt_table {
584 my $self = shift (@_);
585 #my ($did, $sid, $fulltext) = @_;
586 my ($doc_oid, $section_name, $section_textref, $debug_only) = @_;
587
588 my $dbh = $self->{'db_handle'};
589
590 my $tablename = $self->get_fulltext_table_name();
591 my $sth = $dbh->prepare_cached(qq{INSERT INTO $tablename (did, sid, fulltxt) VALUES (?, ?, ?)});# || warn("Could not prepare insert statement for fulltxt table\n");
592
593 # Now we're ready to execute the command, unless we're only debugging
594
595 # don't display the fulltxt value as it could be too long
596 my $txt_repr = $$section_textref ? "<TXT>" : "NULL";
597 if($debug_only) { # only print statement, don't execute it
598 print STDERR $sth->{'Statement'} . "($doc_oid, $section_name, $txt_repr)\n";
599 }
600 else {
601 print STDERR $sth->{'Statement'} . "($doc_oid, $section_name, $txt_repr)\n" if $self->{'verbosity'} > 2;
602
603 $sth->execute($doc_oid, $section_name, $$section_textref)
604 || warn ("Unable to write fulltxt row to db for row:\n\tOID $doc_oid, section $section_name"); # Execution failure will print out info anyway: since db connection sets PrintError
605 }
606}
607
608
609## The 2 select statements used by GreenstoneSQLPlugin
610
611# Using fetchall_arrayref on statement handle, to run on prepared and executed stmt
612# https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#fetchall_arrayref
613# instead of selectall_arrayref on database handle which will prepare, execute and fetch
614# https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#selectall_arrayref
615#
616# Returns the statement handle that prepared and executed
617# a "SELECT * FROM <COLL>_metadata WHERE did = $oid" SQL statement.
618# Caller can call fetchrow_array() on returned statement handle, $sth
619# Have to use prepare() and execute() instead of do() since do() does
620# not allow for fetching result set thereafter:
621# do(): "This method is typically most useful for non-SELECT statements that either cannot be prepared in advance (due to a limitation of the driver) or do not need to be executed repeatedly. It should not be used for SELECT statements because it does not return a statement handle (so you can't fetch any data)." https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#do
622sub select_from_metatable_matching_docid {
623 my $self= shift (@_);
624 my ($oid, $outhandle) = @_;
625
626 my $dbh = $self->{'db_handle'};
627 my $tablename = $self->get_metadata_table_name();
628
629 my $sth = $dbh->prepare_cached(qq{SELECT * FROM $tablename WHERE did = ?});
630 $sth->execute( $oid ); # will print msg on fail
631
632 print $outhandle "### SQL select stmt: ".$sth->{'Statement'}."\n"
633 if ($self->{'verbosity'} > 2);
634
635 my $rows_ref = $sth->fetchall_arrayref();
636 # "If an error occurs, fetchall_arrayref returns the data fetched thus far, which may be none.
637 # You should check $sth->err afterwards (or use the RaiseError attribute) to discover if the
638 # data is complete or was truncated due to an error."
639 # https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#fetchall_arrayref
640 # https://www.oreilly.com/library/view/programming-the-perl/1565926994/ch04s05.html
641 warn("Data fetching from $tablename terminated early by error: " . $dbh->err) if $dbh->err;
642 return $rows_ref;
643}
644
645
646# See select_from_metatable_matching_docid() above.
647# Returns the statement handle that prepared and executed
648# a "SELECT * FROM <COLL>_metadata WHERE did = $oid" SQL statement.
649# Caller can call fetchrow_array() on returned statement handle, $sth
650sub select_from_texttable_matching_docid {
651 my $self= shift (@_);
652 my ($oid, $outhandle) = @_;
653
654 my $dbh = $self->{'db_handle'};
655 my $tablename = $self->get_fulltext_table_name();
656
657 my $sth = $dbh->prepare_cached(qq{SELECT * FROM $tablename WHERE did = ?});
658 $sth->execute( $oid ); # will print msg on fail
659
660 print $outhandle "### SQL select stmt: ".$sth->{'Statement'}."\n"
661 if ($self->{'verbosity'} > 2);
662
663 my $rows_ref = $sth->fetchall_arrayref();
664 # Need explicit warning:
665 warn("Data fetching from $tablename terminated early by error: " . $dbh->err) if $dbh->err;
666 return $rows_ref;
667
668}
669
670# delete all records in metatable with specified docid
671# https://www.tutorialspoint.com/mysql/mysql-delete-query.htm
672# DELETE FROM table_name [WHERE Clause]
673# see example under 'do' at https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm
674sub delete_recs_from_metatable_with_docid {
675 my $self= shift (@_);
676 my ($oid) = @_;
677
678 my $dbh = $self->{'db_handle'};
679
680 my $tablename = $self->get_metadata_table_name();
681 my $sth = $dbh->prepare_cached(qq{DELETE FROM $tablename WHERE did = ?});
682 $sth->execute( $oid ) or warn $dbh->errstr; # dbh set to print errors even without doing warn()
683}
684
685# delete all records in metatable with specified docid
686sub delete_recs_from_texttable_with_docid {
687 my $self= shift (@_);
688 my ($oid) = @_;
689
690 my $dbh = $self->{'db_handle'};
691
692 my $tablename = $self->get_fulltext_table_name();
693 my $sth = $dbh->prepare_cached(qq{DELETE FROM $tablename WHERE did = ?});
694 $sth->execute( $oid ) or warn $dbh->errstr; # dbh set to print errors even without doing warn()
695}
696
697# Can call this after connection succeeded to get the database handle, dbh,
698# if any specific DB operation (SQL statement, create/delete)
699# needs to be executed that is not already provided as a method of this class.
700sub get_db_handle {
701 my $self= shift (@_);
702 return $self->{'db_handle'};
703}
704
705################ HELPER METHODS ##############
706
707# More basic helper methods
708sub get_metadata_table_name {
709 my $self= shift (@_);
710 my $table_name = $self->{'tablename_prefix'} . "_metadata";
711 return $table_name;
712}
713
714# FULLTEXT is a reserved keyword in (My)SQL. https://dev.mysql.com/doc/refman/5.5/en/keywords.html
715# So we can't name a table or any of its columns "fulltext". We use "fulltxt" instead.
716sub get_fulltext_table_name {
717 my $self= shift (@_);
718 my $table_name = $self->{'tablename_prefix'} . "_fulltxt";
719 return $table_name;
720}
721
722# Attempt to make sure the name parameter (for db or table name) is acceptable syntax
723# for the db in question, e.g. for mysql. For example, (My)SQL doesn't like tables or
724# databases with '-' (hyphens) in their names
725sub sanitize_name {
726 my $self= shift (@_);
727 my ($name) = @_;
728 $name =~ s/-/_/g;
729 return $name;
730}
731
732
733# I can get my version of table_exists to work, but it's not so ideal
734# Interesting that MySQL has non-standard command to CREATE TABLE IF NOT EXISTS and DROP TABLE IF EXISTS,
735# see https://www.perlmonks.org/bare/?node=DBI%20Recipes
736# The page further has a table_exists function that could work with proper comparison
737# TODO Q: Couldn't get the first solution at https://www.perlmonks.org/bare/?node_id=500050 to work though
738sub table_exists {
739 my $self = shift (@_);
740 my $dbh = $self->{'db_handle'};
741 my ($table_name) = @_;
742
743 my @table_list = $dbh->tables;
744 #my $tables_str = @table_list[0];
745 foreach my $table (@table_list) {
746 return 1 if ($table =~ m/$table_name/);
747 }
748 return 0;
749}
750
7511;
Note: See TracBrowser for help on using the repository browser.