source: main/trunk/greenstone2/perllib/gssql.pm@ 32588

Last change on this file since 32588 was 32588, checked in by ak19, 2 years ago

Tidying up

File size: 35.2 KB
Line 
1###########################################################################
2#
3# gssql.pm -- DBI for SQL related utility functions used by
4# GreenstoneSQLPlugout and GreenstoneSQLPlugin too.
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 1999 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package gssql;
28
29use strict;
30no strict 'refs';
31no strict 'subs';
32
33use DBI; # the central package for this module used by GreenstoneSQL Plugout and Plugin
34
35#################
36# Database functions that use the perl DBI module (with the DBD driver module for mysql)
37# https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm
38# https://metacpan.org/pod/DBD::mysql
39#################
40
41##############################
42
43# TODO Q: If disconnect is automatically called when object destroyed, what does that mean
44# for our file-global handle object, is disconnect only called at end of perl process?
45# Does that mean we don't need to explicitly call disconnect in gssql object's destroy during
46# the GLOBAL destruction phase?
47# https://perldoc.perl.org/perlobj.html#Destructors
48
49#+ TODO: add infrastructure for db_port, AutoCommit etc
50# For port, see https://stackoverflow.com/questions/2248665/perl-script-to-connect-to-mysql-server-port-3307
51
52# + TODO: remove unnecessary warn() since PrintError is active
53
54# TODO: drop table if exists and create table if exists are available in MySQL. Use those cmds
55# instead of always first checking for existence ourselves? Only when subclassing to specific
56# mysql class?
57
58
59# + TODO Q: What on cancelling a build: delete table? But what if it was a rebuild and the rebuild is cancelled (not the original build)?
60# Do we create a copy of the orig database as backup, then start populating current db, and if cancelled, delete current db and RENAME backup table to current?
61# https://stackoverflow.com/questions/3280006/duplicating-a-mysql-table-indexes-and-data
62# BUT what if the table is HUGE? (Think of a collection with millions of docs.) Huge overhead in copying?
63# The alternative is we just quit on cancel, but then: cancel could leave the table in a partial committed state, with no way of rolling back.
64# Unless they do a full rebuild, which will recreate the table from scratch?
65# SOLUTION-> rollback transaction on error, see https://www.effectiveperlprogramming.com/2010/07/set-custom-dbi-error-handlers/
66# In that case, should set AutoCommit to off on connection, and remember to commit at end.
67
68# + TODO: Consider AutoCommit status (and Autocommit off allowing commit or rollback for GS coll build cancel) later
69
70
71##############################
72
73# singleton connection
74my $_dbh_instance = undef; # calls undef() function. See https://perlmaven.com/undef-and-defined-in-perl
75my $ref_count = 0;
76
77# Need params_map keys:
78# - collection_name
79# - db_encoding (db content encoding) - MySQL can set this at server, db, table levels. For MySQL
80# we set the enc during connect at server level. Not sure whether other DB's support it at the
81# same levels.
82
83# For connection to MySQL, need:
84# - db_driver, db_client_user, db_client_pwd, db_host, (db_port not used at present)
85# So these will be parameterised, but in a hashmap, for just the connect method.
86
87# Parameterise (one or more methods may use them):
88# - db_name (which is the GS3 sitename, or "greenstone2" for GS2)
89
90# Database access related functions
91# http://g2pc1.bu.edu/~qzpeng/manual/MySQL%20Commands.htm
92# https://www.guru99.com/insert-into.html
93
94# Add signal handlers to cleanup and disconnect from db on sudden termination, incl cancel build
95# https://www.perl.com/article/37/2013/8/18/Catch-and-Handle-Signals-in-Perl/
96$SIG{INT} = \&finish_signal_handler;
97$SIG{TERM} = \&finish_signal_handler;
98$SIG{KILL} = \&finish_signal_handler;
99
100sub finish_signal_handler {
101 my ($sig) = @_; # one of INT|KILL|TERM
102
103 if ($_dbh_instance) { # database handle (note, using singleton) still active.
104
105 # If autocommit wasn't set, then this is a cancel operation.
106 # If we've not disconnected from the sql db yet and if we've not committed
107 # transactions yet, then cancel means we do a rollback here
108
109 if($_dbh_instance->{AutoCommit} == 0) {
110 print STDERR " User cancelled: rolling back SQL database transaction.\n";
111 $_dbh_instance->rollback(); # will warn on failure, nothing more we can/want to do,
112 }
113 }
114
115 die "Caught a $sig signal $!"; # die() will always call destructor (sub DESTROY)
116}
117
118sub new
119{
120 my $class = shift(@_);
121
122 my ($params_map) = @_;
123
124 # library_url: to be specified on the cmdline if not using a GS-included web server
125 # the GSDL_LIBRARY_URL env var is useful when running cmdline buildcol.pl in the linux package manager versions of GS3
126
127 # https://stackoverflow.com/questions/7083453/copying-a-hashref-in-perl
128 # Making a shallow copy works, and can handle unknown params:
129 #my $self = $params_map;
130
131 # but being explicit for class params needed for MySQL:
132 my $self = {
133 'collection_name' => $params_map->{'collection_name'},
134 'verbosity' => $params_map->{'verbosity'} || 1
135 };
136
137 # The db_encoding option is presently not passed in to this constructor as parameter.
138 # Placed here to indicate it's sort of optional.
139 # Since docxml are all in utf8, the contents of the GS SQL database should be too,
140 # So making utf8 the hidden default at present.
141 $self->{'db_encoding'} = $params_map->{'db_encoding'} || "utf8";
142
143 $self = bless($self, $class);
144
145 $self->{'tablename_prefix'} = $self->sanitize_name($params_map->{'collection_name'});
146
147 return $self;
148}
149
150# On die(), an object's destructor is called.
151# See https://www.perl.com/article/37/2013/8/18/Catch-and-Handle-Signals-in-Perl/
152# We want to ensure we've closed the db connection in such cases.
153# "It’s common to call die when handling SIGINT and SIGTERM. die is useful because it will ensure that Perl stops correctly: for example Perl will execute a destructor method if present when die is called, but the destructor method will not be called if a SIGINT or SIGTERM is received and no signal handler calls die."
154#
155# Useful: https://perldoc.perl.org/perlobj.html#Destructors
156# For more on when destroy is called, see https://www.perlmonks.org/?node_id=1020920
157#
158# https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#disconnect
159# "Disconnects the database from the database handle. disconnect is typically only used before exitin# g the program. The handle is of little use after disconnecting.
160#
161# The transaction behaviour of the disconnect method is, sadly, undefined. Some database systems (such as Oracle and Ingres) will automatically commit any outstanding changes, but others (such as Informix) will rollback any outstanding changes. Applications not using AutoCommit should explicitly call commit or rollback before calling disconnect.
162#
163# The database is automatically disconnected by the DESTROY method if still connected when there are no longer any references to the handle. The DESTROY method for each driver should implicitly call rollback to undo any uncommitted changes. This is vital behaviour to ensure that incomplete transactions don't get committed simply because Perl calls DESTROY on every object before exiting. Also, do not rely on the order of object destruction during "global destruction", as it is undefined.
164#
165# Generally, if you want your changes to be committed or rolled back when you disconnect, then you should explicitly call "commit" or "rollback" before disconnecting.
166#
167# If you disconnect from a database while you still have active statement handles (e.g., SELECT statement handles that may have more data to fetch), you will get a warning. The warning may indicate that a fetch loop terminated early, perhaps due to an uncaught error. To avoid the warning call the finish method on the active handles."
168#
169sub DESTROY {
170 my $self = shift;
171
172 if (${^GLOBAL_PHASE} eq 'DESTRUCT') {
173
174 if ($_dbh_instance) { # database handle still active. Use singleton handle!
175
176 # rollback code has moved to finish_signal_handler() where it belongs?
177
178 # NOTE: if RaiseError is set on dbi connection, then on any error, perl process will die()
179 # which will end up calling this DESTROY. If it was a die() that called DESTROY
180 # then need to rollback the db here. However, if it was not a die() but natural termination
181 # of the perl process, destroy() will also get called. In that case we don't want to rollback
182 # but do a commit() to the DB instead.
183 # Perhaps detecting the difference may be accomplished by checking ref_count:
184 # - If ref_count not 0 it may require a rollback?
185 # - If ref_count 0 it may be a natural termination and require a commit? Except that ref_count
186 # is set back to 0 in finished(), which will do the commit when ref_count becomes 0. So shouldn't
187 # (have to) do that here.
188
189 # We're now finally ready to disconnect, as is required for both natural and premature termination
190 print STDERR "XXXXXXXX Global Destruct: Disconnecting from database\n";
191 $_dbh_instance->disconnect or warn $_dbh_instance->errstr;
192 $_dbh_instance = undef;
193 $ref_count = 0;
194 }
195 return;
196 }
197
198 # "Always include a call to $self->SUPER::DESTROY in our destructors (even if we don't yet have any base/parent classes). (p. 145)"
199 # Superclass and destroy, call to SUPER: https://www.perlmonks.org/?node_id=879920
200 # discussion also covers multiple-inheritance (MI)
201 $self->SUPER::DESTROY if $self->can("SUPER::DESTROY");
202
203}
204
205
206
207################### BASIC DB OPERATIONS ##################
208
209# THE NEW DB FUNCTIONS
210# NOTE: FULLTEXT is a reserved keyword in (My)SQL. So we can't name a table or any of its columns "fulltext".
211# https://dev.mysql.com/doc/refman/5.5/en/keywords.html
212
213
214
215# SINGLETON / GET INSTANCE PATTERN
216# https://stackoverflow.com/questions/16655603/perl-objects-class-variable-initialization
217# https://stackoverflow.com/questions/7587157/how-can-i-set-a-static-variable-that-can-be-accessed-by-all-subclasses-of-the-sa
218# Singleton without Moose: https://www.perl.com/article/52/2013/12/11/Implementing-the-singleton-pattern-in-Perl/
219
220sub connect_to_db
221{
222 my $self= shift (@_);
223 my ($params_map) = @_;
224
225 $params_map->{'db_encoding'} = $self->{'db_encoding'};
226 $params_map->{'verbosity'} = $self->{'verbosity'};
227
228 $self->{'db_handle'} = &_get_connection_instance($params_map); # getting singleton (class method)
229 if($self->{'db_handle'}) {
230 $ref_count++; # if successful, keep track of the number of refs to the single db connection
231 return $self->{'db_handle'};
232 }
233 return undef;
234}
235
236# SINGLETON METHOD #
237# TODO: where should the defaults for these params be, here or in GS-SQLPlugin/Plugout?
238sub _get_connection_instance
239{
240 #my $self= shift (@_); # singleton method doesn't use self, but callers don't need to know that
241 my ($params_map) = @_;
242
243 if($params_map->{'verbosity'}) {
244 if(!defined $params_map->{'autocommit'}) {
245 print STDERR " Autocommit parameter not defined\n";
246 }
247 if($params_map->{'autocommit'}) {
248 print STDERR " SQL DB CANCEL SUPPORT OFF.\n";
249 } else {
250 print STDERR " SQL DB CANCEL SUPPORT ON.\n";
251 }
252 }
253
254 return $_dbh_instance if($_dbh_instance);
255
256 # or make the connection
257
258 # For proper utf8 support in MySQL, encoding should be 'utf8mb4' as 'utf8' is insufficient
259 my $db_enc = "utf8mb4" if $params_map->{'db_encoding'} eq "utf8";
260
261 # these are the params for connecting to MySQL
262 my $db_driver = $params_map->{'db_driver'} || "mysql";
263 my $db_user = $params_map->{'db_client_user'} || "root";
264 my $db_pwd = $params_map->{'db_client_pwd'}; # even if undef and password was necessary,
265 # we'll see a sensible error message when connect fails
266 my $db_host = $params_map->{'db_host'} || "127.0.0.1";
267 # localhost doesn't work for us, but 127.0.0.1 works
268 # https://metacpan.org/pod/DBD::mysql
269 # "The hostname, if not specified or specified as '' or 'localhost', will default to a MySQL server
270 # running on the local machine using the default for the UNIX socket. To connect to a MySQL server
271 # on the local machine via TCP, you must specify the loopback IP address (127.0.0.1) as the host."
272 my $db_port = $params_map->{'db_port'}; # leave as undef if unspecified,
273 # as our tests never used port anyway (must have internally
274 # defaulted to whatever default port is used for MySQL)
275
276
277 #my $connect_str = "dbi:$db_driver:database=$db_name;host=$db_host";
278 # But don't provide db now - this allows checking the db exists later when loading the db
279 my $connect_str = "dbi:$db_driver:host=$db_host";
280 $connect_str .= ";port=$db_port" if $db_port;
281
282 if($params_map->{'verbosity'}) {
283 print STDERR "Away to make connection to $db_driver database with:\n";
284 print STDERR " - hostname $db_host; username: $db_user";
285 print STDERR "; and the provided password" if $db_pwd;
286 print STDERR "\nAssuming the mysql server has been started with: --character_set_server=utf8mb4\n" if $db_driver eq "mysql";
287 }
288
289 # DBI AutoCommit connection param is on/1 by default, so if a value for this is not defined
290 # as a method parameter to _get_connection_instance, then fallback to the default of on/1
291 # More: https://www.oreilly.com/library/view/programming-the-perl/1565926994/re44.html
292 my $autocommit = (defined $params_map->{'autocommit'}) ? $params_map->{'autocommit'} : 1;
293
294 my $dbh = DBI->connect("$connect_str", $db_user, $db_pwd,
295 {
296 ShowErrorStatement => 1, # more informative as DBI will append failed SQL stmt to error message
297 PrintError => 1, # on by default, but being explicit
298 RaiseError => 0, # off by default, but being explicit
299 AutoCommit => $autocommit,
300 mysql_enable_utf8mb4 => 1 # tells MySQL to use UTF-8 for communication and tells DBD::mysql to decode the data, see https://stackoverflow.com/questions/46727362/perl-mysql-utf8mb4-issue-possible-bug
301 });
302
303 if(!$dbh) {
304 # NOTE, despite handle dbh being undefined, error code will be in DBI->err (note caps)
305 return 0;
306 }
307
308 # set encoding https://metacpan.org/pod/DBD::mysql
309 # https://dev.mysql.com/doc/refman/5.7/en/charset.html
310 # https://dev.mysql.com/doc/refman/5.7/en/charset-conversion.html
311 # Setting the encoding at db server level: $dbh->do("set NAMES '" . $db_enc . "'");
312 # HOWEVER:
313 # It turned out insufficient setting the encoding to utf8, as that only supports utf8 chars that
314 # need up to 3 bytes. We may need up to 4 bytes per utf8 character, e.g. chars with macron,
315 # and for that, we need the encoding to be set to utf8mb4.
316 # To set up a MySQL db to use utf8mb4 requires configuration on the server side too.
317 # https://stackoverflow.com/questions/10957238/incorrect-string-value-when-trying-to-insert-utf-8-into-mysql-via-jdbc
318 # https://stackoverflow.com/questions/46727362/perl-mysql-utf8mb4-issue-possible-bug
319 # To set up the db for utf8mb4, therefore,
320 # the MySQL server needs to be configured for that char encoding by running the server as:
321 # mysql-5.7.23-linux-glibc2.12-x86_64/bin>./mysqld_safe --datadir=/Scratch/ak19/mysql/data --character_set_server=utf8mb4
322 # AND when connecting to the server, we can can either set mysql_enable_utf8mb4 => 1
323 # as a connection option
324 # OR we need to do both "set NAMES utf8mb4" AND "$dbh->{mysql_enable_utf8mb4} = 1;" after connecting
325 #
326 # Search results for DBI Set Names imply the "SET NAMES '<enc>'" command is mysql specific too,
327 # so setting the mysql specific option during connection above as "mysql_enable_utf8mb4 => 1"
328 # is no more objectionable. It has the advantage of cutting out the 2 extra lines of doing
329 # set NAMES '<enc>' and $dbh->{mysql_enable_utf8mb4} = 1 here.
330 # These lines may be preferred if more db_driver options are to be supported in future:
331 # then a separate method called set_db_encoding($enc) can work out what db_driver we're using
332 # and if mysql and enc=utfy, then it can do the following whereas it will issue other do stmts
333 # for other db_drivers, see https://www.perlmonks.org/?node_id=259456:
334
335 #my $stmt = "set NAMES '" . $db_enc . "'";
336 #$dbh->do($stmt) || warn("Unable to set charset encoding at db server level to: " . $db_enc . "\n"); # tells MySQL to use UTF-8 for communication
337 #$dbh->{mysql_enable_utf8mb4} = 1; # tells DBD::mysql to decode the data
338
339 # if we're here, then connection succeeded, store handle
340 $_dbh_instance = $dbh;
341 return $_dbh_instance;
342
343}
344
345# Will disconnect if this instance of gssql holds the last reference to the db connection
346# If disconnecting and autocommit is off, then this will commit before disconnecting
347sub finished {
348 my $self= shift (@_);
349 my $dbh = $self->{'db_handle'};
350
351 my $rc = 1; # return code: everything went fine, regardless of whether we needed to commit
352 # (AutoCommit on or off)
353
354 $ref_count--;
355 if($ref_count == 0) { # Only commit transaction when we're about to actually disconnect, not before
356
357 # TODO: If AutoCommit was off, meaning transactions were on/enabled,
358 # then here is where we commit our one long transaction.
359 # https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#commit
360 if($dbh->{AutoCommit} == 0) {
361 print STDERR " Committing transaction to SQL database now.\n" if $self->{'verbosity'};
362 $rc = $dbh->commit() or warn("SQL DB COMMIT FAILED: " . $dbh->errstr); # important problem
363 # worth embellishing error message
364 }
365 # else if autocommit was on, then we'd have committed after every db operation, so nothing to do
366
367 $self->_force_disconnect_from_db();
368 }
369
370 return $rc;
371}
372
373
374# Call this method on die(), so that you're sure the perl process has disconnected from SQL db
375# Disconnect from db - https://metacpan.org/pod/DBI#disconnect
376# TODO: make sure to have committed or rolled back before disconnect
377# and that you've call finish() on statement handles if any fetch remnants remain
378sub _force_disconnect_from_db {
379 my $self= shift (@_);
380
381 if($_dbh_instance) {
382 # make sure any active stmt handles are finished
383 # NO: "When all the data has been fetched from a SELECT statement, the driver will automatically call finish for you. So you should not call it explicitly except when you know that you've not fetched all the data from a statement handle and the handle won't be destroyed soon."
384
385 print STDERR " GSSQL disconnecting from database\n" if $self->{'verbosity'};
386 # Just go through the singleton db handle to disconnect
387 $_dbh_instance->disconnect or warn $_dbh_instance->errstr;
388 $_dbh_instance = undef;
389 }
390 # Number of gssql objects that share a live connection is now 0, as the connection's dead
391 # either because the last gssql object finished() or because connection was killed (force)
392 $ref_count = 0;
393}
394
395
396# Load the designated database, i.e. 'use <dbname>;'.
397# If the database doesn't yet exist, creates it and loads it.
398# (Don't create the collection's tables yet, though)
399# At the end it will have loaded the requested database (in MySQL: "use <db>;") on success.
400# As usual, returns success or failure value that can be evaluated in a boolean context.
401sub use_db {
402 my $self= shift (@_);
403 my ($db_name) = @_;
404 my $dbh = $self->{'db_handle'};
405 $db_name = $self->sanitize_name($db_name);
406
407 print STDERR "Attempting to use database $db_name\n" if($self->{'verbosity'});
408
409 # perl DBI switch database: https://www.perlmonks.org/?node_id=995434
410 # do() returns undef on error.
411 # connection succeeded, try to load our database. If that didn't work, attempt to create db
412 my $success = $dbh->do("use $db_name");
413
414 if(!$success && $dbh->err == 1049) { # "Unknown database" error has code 1049 (mysql only?) meaning db doesn't exist yet
415
416 print STDERR "Database $db_name didn't exist, creating it along with the tables for the current collection...\n" if($self->{'verbosity'});
417
418 # attempt to create the db and its tables
419 $self->create_db($db_name) || return 0;
420
421 print STDERR " Created database $db_name\n" if($self->{'verbosity'} > 1);
422
423 # once more attempt to use db, now that it exists
424 $dbh->do("use $db_name") || return 0;
425 #$dbh->do("use $db_name") or die "Error (code" . $dbh->err ."): " . $dbh->errstr . "\n";
426
427 $success = 1;
428 }
429 elsif($success) { # database existed and loaded successfully, but
430 # before proceeding check that the current collection's tables exist
431
432 print STDERR "@@@ DATABASE $db_name EXISTED\n" if($self->{'verbosity'} > 2);
433 }
434
435 return $success; # could still return 0, if database failed to load with an error code != 1049
436}
437
438
439# We should already have done "use <database>;" if this gets called.
440# Just load this collection's metatable
441sub ensure_meta_table_exists {
442 my $self = shift (@_);
443
444 my $tablename = $self->get_metadata_table_name();
445 if(!$self->table_exists($tablename)) {
446 $self->create_metadata_table() || return 0;
447 } else {
448 print STDERR "@@@ Meta table exists\n" if($self->{'verbosity'} > 2);
449 }
450 return 1;
451}
452
453# We should already have done "use <database>;" if this gets called.
454# Just load this collection's metatable
455sub ensure_fulltxt_table_exists {
456 my $self = shift (@_);
457
458 my $tablename = $self->get_fulltext_table_name();
459 if(!$self->table_exists($tablename)) {
460 $self->create_fulltext_table() || return 0;
461 } else {
462 print STDERR "@@@ Fulltxt table exists\n" if($self->{'verbosity'} > 2);
463 }
464 return 1;
465}
466
467
468sub create_db {
469 my $self= shift (@_);
470 my ($db_name) = @_;
471 my $dbh = $self->{'db_handle'};
472 $db_name = $self->sanitize_name($db_name);
473
474 # https://stackoverflow.com/questions/5025768/how-can-i-create-a-mysql-database-from-a-perl-script
475 return $dbh->do("create database $db_name"); # do() will return undef on fail, https://metacpan.org/pod/DBI#do
476}
477
478
479sub create_metadata_table {
480 my $self= shift (@_);
481 my $dbh = $self->{'db_handle'};
482
483 my $table_name = $self->get_metadata_table_name();
484 print STDERR " Creating table $table_name\n" if($self->{'verbosity'} > 1);
485
486 # If using an auto incremented primary key:
487 my $stmt = "CREATE TABLE $table_name (id INT NOT NULL AUTO_INCREMENT, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, metaname VARCHAR(127) NOT NULL, metavalue VARCHAR(1023) NOT NULL, PRIMARY KEY(id));";
488 return $dbh->do($stmt);
489}
490
491# TODO: Investigate: https://dev.mysql.com/doc/search/?d=10&p=1&q=FULLTEXT
492# 12.9.1 Natural Language Full-Text Searches
493# to see whether we have to index the 'fulltxt' column of the 'fulltext' tables
494# or let user edit this file, or add it as another option
495sub create_fulltext_table {
496 my $self= shift (@_);
497 my $dbh = $self->{'db_handle'};
498
499 my $table_name = $self->get_fulltext_table_name();
500 print STDERR " Creating table $table_name\n" if($self->{'verbosity'} > 1);
501
502 # If using an auto incremented primary key:
503 my $stmt = "CREATE TABLE $table_name (id INT NOT NULL AUTO_INCREMENT, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, fulltxt LONGTEXT, PRIMARY KEY(id));";
504 return $dbh->do($stmt);
505
506}
507
508
509sub delete_collection_tables {
510 my $self= shift (@_);
511 my $dbh = $self->{'db_handle'};
512
513 # drop table <tablename>
514 my $table = $self->get_metadata_table_name();
515 if($self->table_exists($table)) {
516 $dbh->do("drop table $table");# || warn("@@@ Couldn't delete $table");
517 }
518 $table = $self->get_fulltext_table_name();
519 if($self->table_exists($table)) {
520 $dbh->do("drop table $table");# || warn("@@@ Couldn't delete $table");
521 }
522
523 # TODO Q: commit here, so that future select statements work?
524 # See https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#Transactions
525}
526
527# Don't call this: it will delete the meta and full text tables for ALL collections in $db_name (localsite by default)!
528# This method is just here for debugging (for testing creating a database when there is none)
529#
530# "IF EXISTS is used to prevent an error from occurring if the database does not exist. ... DROP DATABASE returns the number of tables that were removed. The DROP DATABASE statement removes from the given database directory those files and directories that MySQL itself may create during normal operation.Jun 20, 2012"
531# MySQL 8.0 Reference Manual :: 13.1.22 DROP DATABASE Syntax
532# https://dev.mysql.com/doc/en/drop-database.html
533sub _delete_database {
534 my $self= shift (@_);
535 my ($db_name) = @_;
536 my $dbh = $self->{'db_handle'};
537 $db_name = $self->sanitize_name($db_name);
538
539 print STDERR "!!! Deleting database $db_name\n" if($self->{'verbosity'});
540
541 # "drop database dbname"
542 $dbh->do("drop database $db_name") || return 0;
543
544 return 1;
545}
546
547
548########################### DB STATEMENTS ###########################
549
550# USEFUL: https://metacpan.org/pod/DBI
551# "Many methods have an optional \%attr parameter which can be used to pass information to the driver implementing the method. Except where specifically documented, the \%attr parameter can only be used to pass driver specific hints. In general, you can ignore \%attr parameters or pass it as undef."
552
553# More efficient to use prepare() to prepare an SQL statement once and then execute() it many times
554# (binding different values to placeholders) than running do() which will prepare each time and
555# execute each time. Also, do() is not useful with SQL select statements as it doesn't fetch rows.
556# Can prepare and cache prepared statements or retrieve prepared statements if cached in one step:
557# https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#prepare_cached
558
559# https://www.guru99.com/insert-into.html
560# and https://dev.mysql.com/doc/refman/8.0/en/example-auto-increment.html
561# for inserting multiple rows at once
562# https://www.perlmonks.org/bare/?node_id=316183
563# https://metacpan.org/pod/DBI#do
564# https://www.quora.com/What-is-the-difference-between-prepare-and-do-statements-in-Perl-while-we-make-a-connection-to-the-database-for-executing-the-query
565# https://docstore.mik.ua/orelly/linux/dbi/ch05_05.htm
566
567# https://metacpan.org/pod/DBI#performance
568# 'The q{...} style quoting used in this example avoids clashing with quotes that may be used in the SQL statement. Use the double-quote like qq{...} operator if you want to interpolate variables into the string. See "Quote and Quote-like Operators" in perlop for more details.'
569#
570# This method uses lazy loading to prepare the SQL insert stmt once for a table and store it,
571# then execute the (stored) statement each time it's needed for that table.
572sub insert_row_into_metadata_table {
573 my $self = shift (@_);
574 my ($doc_oid, $section_name, $meta_name, $escaped_meta_value, $debug_only) = @_;
575
576 my $dbh = $self->{'db_handle'};
577
578 my $tablename = $self->get_metadata_table_name();
579 my $sth = $dbh->prepare_cached(qq{INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES (?, ?, ?, ?)});# || warn("Could not prepare insert statement for metadata table\n");
580
581 # Now we're ready to execute the command, unless we're only debugging
582
583 if($debug_only) {
584 # just print the statement we were going to execute
585 print STDERR $sth->{'Statement'} . "($doc_oid, $section_name, $meta_name, $escaped_meta_value)\n";
586 }
587 else {
588 print STDERR $sth->{'Statement'} . "($doc_oid, $section_name, $meta_name, $escaped_meta_value)\n" if $self->{'verbosity'} > 2;
589
590 $sth->execute($doc_oid, $section_name, $meta_name, $escaped_meta_value)
591 || warn ("Unable to write metadata row to db:\n\tOID $doc_oid, section $section_name,\n\tmeta name: $meta_name, val: $escaped_meta_value");
592 # Execution failure will print out info anyway: since db connection sets PrintError
593 }
594}
595
596# As above. Likewise uses lazy loading to prepare the SQL insert stmt once for a table and store it,
597# then execute the (stored) statement each time it's needed for that table.
598sub insert_row_into_fulltxt_table {
599 my $self = shift (@_);
600 #my ($did, $sid, $fulltext) = @_;
601 my ($doc_oid, $section_name, $section_textref, $debug_only) = @_;
602
603 my $dbh = $self->{'db_handle'};
604
605 my $tablename = $self->get_fulltext_table_name();
606 my $sth = $dbh->prepare_cached(qq{INSERT INTO $tablename (did, sid, fulltxt) VALUES (?, ?, ?)});# || warn("Could not prepare insert statement for fulltxt table\n");
607
608 # Now we're ready to execute the command, unless we're only debugging
609
610 # don't display the fulltxt value as it could be too long
611 my $txt_repr = $$section_textref ? "<TXT>" : "NULL";
612 if($debug_only) { # only print statement, don't execute it
613 print STDERR $sth->{'Statement'} . "($doc_oid, $section_name, $txt_repr)\n";
614 }
615 else {
616 print STDERR $sth->{'Statement'} . "($doc_oid, $section_name, $txt_repr)\n" if $self->{'verbosity'} > 2;
617
618 $sth->execute($doc_oid, $section_name, $$section_textref)
619 || warn ("Unable to write fulltxt row to db for row:\n\tOID $doc_oid, section $section_name"); # Execution failure will print out info anyway: since db connection sets PrintError
620 }
621}
622
623
624## The 2 select statements used by GreenstoneSQLPlugin
625
626# Using fetchall_arrayref on statement handle, to run on prepared and executed stmt
627# https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#fetchall_arrayref
628# instead of selectall_arrayref on database handle which will prepare, execute and fetch
629# https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#selectall_arrayref
630#
631# Returns the statement handle that prepared and executed
632# a "SELECT * FROM <COLL>_metadata WHERE did = $oid" SQL statement.
633# Caller can call fetchrow_array() on returned statement handle, $sth
634# Have to use prepare() and execute() instead of do() since do() does
635# not allow for fetching result set thereafter:
636# do(): "This method is typically most useful for non-SELECT statements that either cannot be prepared in advance (due to a limitation of the driver) or do not need to be executed repeatedly. It should not be used for SELECT statements because it does not return a statement handle (so you can't fetch any data)." https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#do
637sub select_from_metatable_matching_docid {
638 my $self= shift (@_);
639 my ($oid, $outhandle) = @_;
640
641 my $dbh = $self->{'db_handle'};
642 my $tablename = $self->get_metadata_table_name();
643
644 my $sth = $dbh->prepare_cached(qq{SELECT * FROM $tablename WHERE did = ?});
645 $sth->execute( $oid ); # will print msg on fail
646
647 print $outhandle "### SQL select stmt: ".$sth->{'Statement'}."\n"
648 if ($self->{'verbosity'} > 2);
649
650 my $rows_ref = $sth->fetchall_arrayref();
651 # "If an error occurs, fetchall_arrayref returns the data fetched thus far, which may be none.
652 # You should check $sth->err afterwards (or use the RaiseError attribute) to discover if the
653 # data is complete or was truncated due to an error."
654 # https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#fetchall_arrayref
655 # https://www.oreilly.com/library/view/programming-the-perl/1565926994/ch04s05.html
656 warn("Data fetching from $tablename terminated early by error: " . $dbh->err) if $dbh->err;
657 return $rows_ref;
658}
659
660
661# See select_from_metatable_matching_docid() above.
662# Returns the statement handle that prepared and executed
663# a "SELECT * FROM <COLL>_metadata WHERE did = $oid" SQL statement.
664# Caller can call fetchrow_array() on returned statement handle, $sth
665sub select_from_texttable_matching_docid {
666 my $self= shift (@_);
667 my ($oid, $outhandle) = @_;
668
669 my $dbh = $self->{'db_handle'};
670 my $tablename = $self->get_fulltext_table_name();
671
672 my $sth = $dbh->prepare_cached(qq{SELECT * FROM $tablename WHERE did = ?});
673 $sth->execute( $oid ); # will print msg on fail
674
675 print $outhandle "### SQL select stmt: ".$sth->{'Statement'}."\n"
676 if ($self->{'verbosity'} > 2);
677
678 my $rows_ref = $sth->fetchall_arrayref();
679 # Need explicit warning:
680 warn("Data fetching from $tablename terminated early by error: " . $dbh->err) if $dbh->err;
681 return $rows_ref;
682
683}
684
685# delete all records in metatable with specified docid
686# https://www.tutorialspoint.com/mysql/mysql-delete-query.htm
687# DELETE FROM table_name [WHERE Clause]
688# see example under 'do' at https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm
689sub delete_recs_from_metatable_with_docid {
690 my $self= shift (@_);
691 my ($oid) = @_;
692
693 my $dbh = $self->{'db_handle'};
694
695 my $tablename = $self->get_metadata_table_name();
696 my $sth = $dbh->prepare_cached(qq{DELETE FROM $tablename WHERE did = ?});
697 $sth->execute( $oid ) or warn $dbh->errstr; # dbh set to print errors even without doing warn()
698}
699
700# delete all records in metatable with specified docid
701sub delete_recs_from_texttable_with_docid {
702 my $self= shift (@_);
703 my ($oid) = @_;
704
705 my $dbh = $self->{'db_handle'};
706
707 my $tablename = $self->get_fulltext_table_name();
708 my $sth = $dbh->prepare_cached(qq{DELETE FROM $tablename WHERE did = ?});
709 $sth->execute( $oid ) or warn $dbh->errstr; # dbh set to print errors even without doing warn()
710}
711
712# Can call this after connection succeeded to get the database handle, dbh,
713# if any specific DB operation (SQL statement, create/delete)
714# needs to be executed that is not already provided as a method of this class.
715sub get_db_handle {
716 my $self= shift (@_);
717 return $self->{'db_handle'};
718}
719
720################ HELPER METHODS ##############
721
722# More basic helper methods
723sub get_metadata_table_name {
724 my $self= shift (@_);
725 my $table_name = $self->{'tablename_prefix'} . "_metadata";
726 return $table_name;
727}
728
729# FULLTEXT is a reserved keyword in (My)SQL. https://dev.mysql.com/doc/refman/5.5/en/keywords.html
730# So we can't name a table or any of its columns "fulltext". We use "fulltxt" instead.
731sub get_fulltext_table_name {
732 my $self= shift (@_);
733 my $table_name = $self->{'tablename_prefix'} . "_fulltxt";
734 return $table_name;
735}
736
737# Attempt to make sure the name parameter (for db or table name) is acceptable syntax
738# for the db in question, e.g. for mysql. For example, (My)SQL doesn't like tables or
739# databases with '-' (hyphens) in their names
740sub sanitize_name {
741 my $self= shift (@_);
742 my ($name) = @_;
743 $name =~ s/-/_/g;
744 return $name;
745}
746
747
748# I can get my version of table_exists to work, but it's not so ideal
749# Interesting that MySQL has non-standard command to CREATE TABLE IF NOT EXISTS and DROP TABLE IF EXISTS,
750# see https://www.perlmonks.org/bare/?node=DBI%20Recipes
751# The page further has a table_exists function that could work with proper comparison
752# TODO Q: Couldn't get the first solution at https://www.perlmonks.org/bare/?node_id=500050 to work though
753sub table_exists {
754 my $self = shift (@_);
755 my $dbh = $self->{'db_handle'};
756 my ($table_name) = @_;
757
758 my @table_list = $dbh->tables;
759 #my $tables_str = @table_list[0];
760 foreach my $table (@table_list) {
761 return 1 if ($table =~ m/$table_name/);
762 }
763 return 0;
764}
765
7661;
Note: See TracBrowser for help on using the repository browser.