root/main/trunk/greenstone2/perllib/gssql.pm @ 32581

Revision 32581, 29.7 KB (checked in by ak19, 10 months ago)

Adding sigint, sigterm and sigkill handlers to gssql, which all call finish_signal_handler which does a die(). When perl executes a die statement, it always calls the object destructor. Added an destructor sub DESTRUCT to gssql to close the connection to the db, on GLOBAL DESTRUCT PHASE, if singleton db_handle is not undef. I'm thinking that this is where autocommit rollbacks will need to be added as well.

Line 
1###########################################################################
2#
3# gssql.pm -- DBI for SQL related utility functions used by
4# GreenstoneSQLPlugout and hereafter by GreenstoneSQLPlugin too.
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 1999 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package gssql;
28
29use strict;
30no strict 'refs';
31no strict 'subs';
32
33use DBI; # the central package for this module used by GreenstoneSQL Plugout and Plugin
34
35#$SIG{INT}  = sub { die "Caught a sigint $!" };
36#$SIG{TERM}  = sub { die "Caught a sigterm $!" };
37#$SIG{KILL}  = sub { die "Caught a sigkill $!" };
38
39
40$SIG{INT}  = \&finish_signal_handler;
41$SIG{TERM}  = \&finish_signal_handler;
42$SIG{KILL}  = \&finish_signal_handler;
43
44sub finish_signal_handler {
45    my ($sig) = @_; # one of INT|KILL|TERM
46    die "Caught a $sig signal $!"; # will call destructor
47}
48##############################
49
50# TODO: add infrastructure for db_port, AutoCommit etc
51# For port, see https://stackoverflow.com/questions/2248665/perl-script-to-connect-to-mysql-server-port-3307
52
53# + TODO: remove unnecessary warn() since PrintError is active
54# https://perldoc.perl.org/perlobj.html#Destructors
55
56# TODO: drop table if exists and create table if exists are available in MySQL. Use those cmds
57# instead of always first checking for existence ourselves?
58##############################
59
60# singleton connection
61my $_dbh_instance = undef; # calls undef() function. See https://perlmaven.com/undef-and-defined-in-perl
62my $ref_count = 0;
63
64# Need params_map keys:
65# - collection_name
66# - db_encoding (db content encoding) - MySQL can set this at server, db, table levels. For MySQL
67# we set the enc during connect at server level. Not sure whether other DB's support it at the
68# same levels.
69
70# For connection to MySQL, need:
71#  - db_driver, db_client_user, db_client_pwd, db_host, (db_port not used at present)
72# So these will be parameterised, but in a hashmap, for just the connect method.
73
74# Parameterise (one or more methods may use them):
75# - db_name (which is the GS3 sitename)
76
77sub new
78
79    my $class = shift(@_);
80   
81    my ($params_map) = @_;
82   
83    # library_url: to be specified on the cmdline if not using a GS-included web server
84    # the GSDL_LIBRARY_URL env var is useful when running cmdline buildcol.pl in the linux package manager versions of GS3
85   
86    # https://stackoverflow.com/questions/7083453/copying-a-hashref-in-perl
87    # Making a shallow copy works, and can handle unknown params:
88    #my $self = $params_map;
89
90    # but being explicit for class params needed for MySQL:
91    my $self = {
92    'collection_name' => $params_map->{'collection_name'},
93    'verbosity' => $params_map->{'verbosity'} || 1
94    };
95
96    # The db_encoding option is presently not passed in to this constructor as parameter.
97    # Placed here to indicate it's sort of optional.
98    # Since docxml are all in utf8, the contents of the GS SQL database should be too,
99    # So making utf8 the hidden default at present.
100    $self->{'db_encoding'} = $params_map->{'db_encoding'} || "utf8";
101   
102    $self = bless($self, $class);   
103
104    $self->{'tablename_prefix'} = $self->sanitize_name($params_map->{'collection_name'});
105   
106    return $self;
107}
108
109# On die(), an object's destructor is called.
110# See https://www.perl.com/article/37/2013/8/18/Catch-and-Handle-Signals-in-Perl/
111# We want to ensure we've closed the db connection in such cases.
112# "It’s common to call die when handling SIGINT and SIGTERM. die is useful because it will ensure that Perl stops correctly: for example Perl will execute a destructor method if present when die is called, but the destructor method will not be called if a SIGINT or SIGTERM is received and no signal handler calls die."
113# https://perldoc.perl.org/perlobj.html#Destructors
114sub DESTROY {
115    my $self = shift;
116
117    if (${^GLOBAL_PHASE} eq 'DESTRUCT') {
118    if ($_dbh_instance) {
119        print STDERR "XXXXXXXX Global Destruct: Disconnecting from database\n";
120        $_dbh_instance->disconnect or warn $_dbh_instance->errstr;
121        $_dbh_instance = undef;
122        $ref_count = 0;
123    }
124    return;
125    }
126}
127
128#################################
129
130# Database access related functions
131# http://g2pc1.bu.edu/~qzpeng/manual/MySQL%20Commands.htm
132# https://www.guru99.com/insert-into.html
133
134# TODO Q: What on cancelling a build: delete table? But what if it was a rebuild and the rebuild is cancelled (not the original build)?
135# Do we create a copy of the orig database as backup, then start populating current db, and if cancelled, delete current db and RENAME backup table to current?
136# https://stackoverflow.com/questions/3280006/duplicating-a-mysql-table-indexes-and-data
137# BUT what if the table is HUGE? (Think of a collection with millions of docs.) Huge overhead in copying?
138# The alternative is we just quit on cancel, but then: cancel could leave the table in a partial committed state, with no way of rolling back.
139# Unless they do a full rebuild, which will recreate the table from scratch?
140# SOLUTION-> rollback transaction on error, see https://www.effectiveperlprogramming.com/2010/07/set-custom-dbi-error-handlers/
141# But then should set AutoCommit to off on connection, and remember to commit every time
142
143#################
144# Database functions that use the perl DBI module (with the DBD driver module for mysql)
145#################
146
147################### BASIC DB OPERATIONS ##################
148
149# THE NEW DB FUNCTIONS
150# NOTE: FULLTEXT is a reserved keyword in (My)SQL. So we can't name a table or any of its columns "fulltext".
151# https://dev.mysql.com/doc/refman/5.5/en/keywords.html
152
153# TODO: Consider AutoCommit status (and Autocommit off allowing commit or rollback for GS coll build cancel) later
154
155
156
157# SINGLETON / GET INSTANCE PATTERN
158# https://stackoverflow.com/questions/16655603/perl-objects-class-variable-initialization
159# https://stackoverflow.com/questions/7587157/how-can-i-set-a-static-variable-that-can-be-accessed-by-all-subclasses-of-the-sa
160# Singleton without Moose: https://www.perl.com/article/52/2013/12/11/Implementing-the-singleton-pattern-in-Perl/
161
162sub connect_to_db
163{
164    my $self= shift (@_);
165    my ($params_map) = @_;
166   
167    $params_map->{'db_encoding'} = $self->{'db_encoding'};
168    $params_map->{'verbosity'} = $self->{'verbosity'};
169   
170    $self->{'db_handle'} = &_get_connection_instance($params_map); # getting singleton (class method)
171    if($self->{'db_handle'}) {
172    $ref_count++; # if successful, keep track of the number of refs to the single db connection
173    return $self->{'db_handle'};
174    }
175    return undef;
176}
177
178# SINGLETON METHOD #
179# TODO: where should the defaults for these params be, here or in GS-SQLPlugin/Plugout?
180sub _get_connection_instance
181{
182    #my $self= shift (@_); # singleton method doesn't use self, but callers don't need to know that
183    my ($params_map) = @_;
184
185    return $_dbh_instance if($_dbh_instance);
186
187    # or make the connection
188   
189    # For proper utf8 support in MySQL, encoding should be 'utf8mb4' as 'utf8' is insufficient
190    my $db_enc = "utf8mb4" if $params_map->{'db_encoding'} eq "utf8";
191
192    # these are the params for connecting to MySQL
193    my $db_driver = $params_map->{'db_driver'} || "mysql";
194    my $db_user = $params_map->{'db_client_user'} || "root";
195    my $db_pwd = $params_map->{'db_client_pwd'}; # even if undef and password was necessary,
196                                     # we'll see a sensible error message when connect fails
197    my $db_host = $params_map->{'db_host'} || "127.0.0.1";
198        # localhost doesn't work for us, but 127.0.0.1 works
199        # https://metacpan.org/pod/DBD::mysql
200        # "The hostname, if not specified or specified as '' or 'localhost', will default to a MySQL server
201        # running on the local machine using the default for the UNIX socket. To connect to a MySQL server
202        # on the local machine via TCP, you must specify the loopback IP address (127.0.0.1) as the host."
203    my $db_port = $params_map->{'db_port'}; # leave as undef if unspecified,
204                 # as our tests never used port anyway (must have internally
205                 # defaulted to whatever default port is used for MySQL)
206
207   
208    #my $connect_str = "dbi:$db_driver:database=$db_name;host=$db_host";
209    # But don't provide db now - this allows checking the db exists later when loading the db
210    my $connect_str = "dbi:$db_driver:host=$db_host";
211    $connect_str .= ";port=$db_port" if $db_port;
212
213    if($params_map->{'verbosity'}) {
214    print STDERR "Away to make connection to $db_driver database with:\n";
215    print STDERR " - hostname $db_host; username: $db_user";
216    print STDERR "; and the provided password" if $db_pwd;
217    print STDERR "\nAssuming the mysql server has been started with: --character_set_server=utf8mb4\n" if $db_driver eq "mysql";
218    }
219   
220    my $dbh = DBI->connect("$connect_str", $db_user, $db_pwd,
221               {
222                   ShowErrorStatement => 1, # more informative as DBI will append failed SQL stmt to error message
223                   PrintError => 1, # on by default, but being explicit
224                   RaiseError => 0, # off by default, but being explicit
225                   AutoCommit => 1, # on by default, but being explicit
226                   mysql_enable_utf8mb4 => 1 # tells MySQL to use UTF-8 for communication and tells DBD::mysql to decode the data, see https://stackoverflow.com/questions/46727362/perl-mysql-utf8mb4-issue-possible-bug
227               });
228
229    if(!$dbh) {
230    # NOTE, despite handle dbh being undefined, error code will be in DBI->err (note caps)
231    return 0;   
232    }
233
234    # set encoding https://metacpan.org/pod/DBD::mysql
235    # https://dev.mysql.com/doc/refman/5.7/en/charset.html
236    # https://dev.mysql.com/doc/refman/5.7/en/charset-conversion.html
237    # Setting the encoding at db server level: $dbh->do("set NAMES '" . $db_enc . "'");
238    # HOWEVER:
239    # It turned out insufficient setting the encoding to utf8, as that only supports utf8 chars that
240    # need up to 3 bytes. We may need up to 4 bytes per utf8 character, e.g. chars with macron,
241    # and for that, we need the encoding to be set to utf8mb4.
242    # To set up a MySQL db to use utf8mb4 requires configuration on the server side too.
243    # https://stackoverflow.com/questions/10957238/incorrect-string-value-when-trying-to-insert-utf-8-into-mysql-via-jdbc
244    # https://stackoverflow.com/questions/46727362/perl-mysql-utf8mb4-issue-possible-bug
245    # To set up the db for utf8mb4, therefore,
246    # the MySQL server needs to be configured for that char encoding by running the server as:
247    # mysql-5.7.23-linux-glibc2.12-x86_64/bin>./mysqld_safe --datadir=/Scratch/ak19/mysql/data --character_set_server=utf8mb4
248    # AND when connecting to the server, we can can either set mysql_enable_utf8mb4 => 1
249    # as a connection option
250    # OR we need to do both "set NAMES utf8mb4" AND "$dbh->{mysql_enable_utf8mb4} = 1;" after connecting
251    #
252    # Search results for DBI Set Names imply the "SET NAMES '<enc>'" command is mysql specific too,
253    # so setting the mysql specific option during connection above as "mysql_enable_utf8mb4 => 1"
254    # is no more objectionable. It has the advantage of cutting out the 2 extra lines of doing
255    # set NAMES '<enc>' and $dbh->{mysql_enable_utf8mb4} = 1 here.
256    # These lines may be preferred if more db_driver options are to be supported in future:
257    # then a separate method called set_db_encoding($enc) can work out what db_driver we're using
258    # and if mysql and enc=utfy, then it can do the following whereas it will issue other do stmts
259    # for other db_drivers, see https://www.perlmonks.org/?node_id=259456:
260   
261    #my $stmt = "set NAMES '" . $db_enc . "'";
262    #$dbh->do($stmt) || warn("Unable to set charset encoding at db server level to: " . $db_enc . "\n"); # tells MySQL to use UTF-8 for communication
263    #$dbh->{mysql_enable_utf8mb4} = 1; # tells DBD::mysql to decode the data
264   
265    # if we're here, then connection succeeded, store handle
266    $_dbh_instance = $dbh;
267    return $_dbh_instance;
268   
269}
270
271# Will disconnect if this instance of gssql holds the last reference to the db connection
272sub finished {
273    my $self= shift (@_);
274
275    $ref_count--;
276    if($ref_count == 0) {
277    $self->force_disconnect_from_db();
278    }   
279}
280
281# Call this method on die(), so that you're sure the perl process has disconnected from SQL db
282# Disconnect from db - https://metacpan.org/pod/DBI#disconnect
283# TODO: make sure to have committed or rolled back before disconnect
284# and that you've call finish() on statement handles if any fetch remnants remain
285sub force_disconnect_from_db {
286    my $self= shift (@_);
287
288    if($_dbh_instance) {
289    # make sure any active stmt handles are finished
290    # NO: "When all the data has been fetched from a SELECT statement, the driver will automatically call finish for you. So you should not call it explicitly except when you know that you've not fetched all the data from a statement handle and the handle won't be destroyed soon."
291   
292    print STDERR "    GSSQL disconnecting from database\n";
293    # Just go through the singleton db handle to disconnect
294    $_dbh_instance->disconnect or warn $_dbh_instance->errstr;
295    $_dbh_instance = undef;
296    }
297    # Number of gssql objects that share a live connection is now 0, as the connection's dead
298    # either because the last gssql object finished() or because connection was killed (force)
299    $ref_count = 0;
300}
301
302
303# Load the designated database, i.e. 'use <dbname>;'.
304# If the database doesn't yet exist, creates it and loads it.
305# (Don't create the collection's tables yet, though)
306# At the end it will have loaded the requested database (in MySQL: "use <db>;") on success.
307# As usual, returns success or failure value that can be evaluated in a boolean context.
308sub use_db {
309    my $self= shift (@_);
310    my ($db_name) = @_;
311    my $dbh = $self->{'db_handle'};
312    $db_name = $self->sanitize_name($db_name);
313   
314    print STDERR "Attempting to use database $db_name\n" if($self->{'verbosity'});
315   
316    # perl DBI switch database: https://www.perlmonks.org/?node_id=995434
317    # do() returns undef on error.
318    # connection succeeded, try to load our database. If that didn't work, attempt to create db
319    my $success = $dbh->do("use $db_name");
320   
321    if(!$success && $dbh->err == 1049) { # "Unknown database" error has code 1049 (mysql only?) meaning db doesn't exist yet
322
323    print STDERR "Database $db_name didn't exist, creating it along with the tables for the current collection...\n" if($self->{'verbosity'});
324   
325    # attempt to create the db and its tables
326    $self->create_db($db_name) || return 0;
327
328    print STDERR "   Created database $db_name\n" if($self->{'verbosity'} > 1);
329   
330    # once more attempt to use db, now that it exists
331    $dbh->do("use $db_name") || return 0;
332    #$dbh->do("use $db_name") or die "Error (code" . $dbh->err ."): " . $dbh->errstr . "\n";
333
334    $success = 1;
335    }
336    elsif($success) { # database existed and loaded successfully, but
337    # before proceeding check that the current collection's tables exist
338
339    print STDERR "@@@ DATABASE $db_name EXISTED\n" if($self->{'verbosity'} > 2);
340    }
341   
342    return $success; # could still return 0, if database failed to load with an error code != 1049
343}
344
345
346# We should already have done "use <database>;" if this gets called.
347# Just load this collection's metatable
348sub ensure_meta_table_exists {
349    my $self = shift (@_);
350   
351    my $tablename = $self->get_metadata_table_name();
352    if(!$self->table_exists($tablename)) {
353    #print STDERR "   Creating metadata table $tablename\n" if($self->{'verbosity'} > 1);
354    $self->create_metadata_table() || return 0;
355    } else {
356    print STDERR "@@@ Meta table exists\n" if($self->{'verbosity'} > 2);
357    }
358    return 1;
359}
360
361# We should already have done "use <database>;" if this gets called.
362# Just load this collection's metatable
363sub ensure_fulltxt_table_exists {
364    my $self = shift (@_);
365   
366    my $tablename = $self->get_fulltext_table_name();   
367    if(!$self->table_exists($tablename)) {
368    #print STDERR "   Creating fulltxt table $tablename\n" if($self->{'verbosity'} > 1);
369    $self->create_fulltext_table() || return 0;
370    } else {
371    print STDERR "@@@ Fulltxt table exists\n" if($self->{'verbosity'} > 2);
372    }
373    return 1;
374}
375
376
377sub create_db {
378    my $self= shift (@_);
379    my ($db_name) = @_;
380    my $dbh = $self->{'db_handle'};
381    $db_name = $self->sanitize_name($db_name);
382   
383    # https://stackoverflow.com/questions/5025768/how-can-i-create-a-mysql-database-from-a-perl-script
384    return $dbh->do("create database $db_name"); # do() will return undef on fail, https://metacpan.org/pod/DBI#do
385}
386
387
388sub create_metadata_table {
389    my $self= shift (@_);
390    my $dbh = $self->{'db_handle'};
391   
392    my $table_name = $self->get_metadata_table_name();
393    print STDERR "   Creating table $table_name\n" if($self->{'verbosity'} > 1);
394   
395    # If using an auto incremented primary key:
396    my $stmt = "CREATE TABLE $table_name (id INT NOT NULL AUTO_INCREMENT, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, metaname VARCHAR(127) NOT NULL, metavalue VARCHAR(1023) NOT NULL, PRIMARY KEY(id));";
397    return $dbh->do($stmt);
398}
399
400# TODO: Investigate: https://dev.mysql.com/doc/search/?d=10&p=1&q=FULLTEXT
401# 12.9.1 Natural Language Full-Text Searches
402# to see whether we have to index the 'fulltxt' column of the 'fulltext' tables
403# or let user edit this file, or add it as another option
404sub create_fulltext_table {
405    my $self= shift (@_);
406    my $dbh = $self->{'db_handle'};
407   
408    my $table_name = $self->get_fulltext_table_name();
409    print STDERR "   Creating table $table_name\n" if($self->{'verbosity'} > 1);
410   
411    # If using an auto incremented primary key:
412    my $stmt = "CREATE TABLE $table_name (id INT NOT NULL AUTO_INCREMENT, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, fulltxt LONGTEXT, PRIMARY KEY(id));";
413    return $dbh->do($stmt);
414
415}
416
417
418sub delete_collection_tables {
419    my $self= shift (@_);
420    my $dbh = $self->{'db_handle'};
421
422    # drop table <tablename>
423    my $table = $self->get_metadata_table_name();
424    if($self->table_exists($table)) {
425    $dbh->do("drop table $table");# || warn("@@@ Couldn't delete $table");
426    }
427    $table = $self->get_fulltext_table_name();
428    if($self->table_exists($table)) {
429    $dbh->do("drop table $table");# || warn("@@@ Couldn't delete $table");
430    }
431}
432
433# Don't call this: it will delete the meta and full text tables for ALL collections in $db_name (localsite by default)!
434# This method is just here for debugging (for testing creating a database when there is none)
435#
436# "IF EXISTS is used to prevent an error from occurring if the database does not exist. ... DROP DATABASE returns the number of tables that were removed. The DROP DATABASE statement removes from the given database directory those files and directories that MySQL itself may create during normal operation.Jun 20, 2012"
437# MySQL 8.0 Reference Manual :: 13.1.22 DROP DATABASE Syntax
438# https://dev.mysql.com/doc/en/drop-database.html
439sub _delete_database {
440    my $self= shift (@_);
441    my ($db_name) = @_;
442    my $dbh = $self->{'db_handle'};
443    $db_name = $self->sanitize_name($db_name);
444   
445    print STDERR "!!! Deleting database $db_name\n" if($self->{'verbosity'});
446   
447    # "drop database dbname"
448    $dbh->do("drop database $db_name") || return 0;
449
450    return 1;
451}
452
453
454########################### DB STATEMENTS ###########################
455
456# USEFUL: https://metacpan.org/pod/DBI
457# "Many methods have an optional \%attr parameter which can be used to pass information to the driver implementing the method. Except where specifically documented, the \%attr parameter can only be used to pass driver specific hints. In general, you can ignore \%attr parameters or pass it as undef."
458
459# More efficient to use prepare() to prepare an SQL statement once and then execute() it many times
460# (binding different values to placeholders) than running do() which will prepare each time and
461# execute each time. Also, do() is not useful with SQL select statements as it doesn't fetch rows.
462# Can prepare and cache prepared statements or retrieve prepared statements if cached in one step:
463# https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#prepare_cached
464
465# https://www.guru99.com/insert-into.html
466# and https://dev.mysql.com/doc/refman/8.0/en/example-auto-increment.html
467#     for inserting multiple rows at once
468# https://www.perlmonks.org/bare/?node_id=316183
469# https://metacpan.org/pod/DBI#do
470# https://www.quora.com/What-is-the-difference-between-prepare-and-do-statements-in-Perl-while-we-make-a-connection-to-the-database-for-executing-the-query
471# https://docstore.mik.ua/orelly/linux/dbi/ch05_05.htm
472
473# https://metacpan.org/pod/DBI#performance
474# 'The q{...} style quoting used in this example avoids clashing with quotes that may be used in the SQL statement. Use the double-quote like qq{...} operator if you want to interpolate variables into the string. See "Quote and Quote-like Operators" in perlop for more details.'
475#
476# This method uses lazy loading to prepare the SQL insert stmt once for a table and store it,
477# then execute the (stored) statement each time it's needed for that table.
478sub insert_row_into_metadata_table {
479    my $self = shift (@_);
480    my ($doc_oid, $section_name, $meta_name, $escaped_meta_value, $debug_only) = @_;
481   
482    my $dbh = $self->{'db_handle'};
483   
484    my $tablename = $self->get_metadata_table_name();
485    my $sth = $dbh->prepare_cached(qq{INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES (?, ?, ?, ?)});# || warn("Could not prepare insert statement for metadata table\n");
486
487    # Now we're ready to execute the command, unless we're only debugging
488   
489    if($debug_only) {
490    # just print the statement we were going to execute
491    print STDERR $sth->{'Statement'} . "($doc_oid, $section_name, $meta_name, $escaped_meta_value)\n";
492    }
493    else {
494    print STDERR $sth->{'Statement'} . "($doc_oid, $section_name, $meta_name, $escaped_meta_value)\n" if $self->{'verbosity'} > 2;
495   
496    $sth->execute($doc_oid, $section_name, $meta_name, $escaped_meta_value)
497        || warn ("Unable to write metadata row to db:\n\tOID $doc_oid, section $section_name,\n\tmeta name: $meta_name, val: $escaped_meta_value");
498    # Execution failure will print out info anyway: since db connection sets PrintError
499    }
500}
501
502# As above. Likewise uses lazy loading to prepare the SQL insert stmt once for a table and store it,
503# then execute the (stored) statement each time it's needed for that table.
504sub insert_row_into_fulltxt_table {
505    my $self = shift (@_);
506    #my ($did, $sid, $fulltext) = @_;
507    my ($doc_oid, $section_name, $section_textref, $debug_only) = @_;
508   
509    my $dbh = $self->{'db_handle'};
510   
511    my $tablename = $self->get_fulltext_table_name();
512    my $sth = $dbh->prepare_cached(qq{INSERT INTO $tablename (did, sid, fulltxt) VALUES (?, ?, ?)});# || warn("Could not prepare insert statement for fulltxt table\n");
513   
514    # Now we're ready to execute the command, unless we're only debugging
515
516    # don't display the fulltxt value as it could be too long
517    my $txt_repr = $$section_textref ? "<TXT>" : "NULL";   
518    if($debug_only) { # only print statement, don't execute it
519    print STDERR $sth->{'Statement'} . "($doc_oid, $section_name, $txt_repr)\n";
520    }
521    else { 
522    print STDERR $sth->{'Statement'} . "($doc_oid, $section_name, $txt_repr)\n" if $self->{'verbosity'} > 2;
523   
524    $sth->execute($doc_oid, $section_name, $$section_textref)
525        || warn ("Unable to write fulltxt row to db for row:\n\tOID $doc_oid, section $section_name"); # Execution failure will print out info anyway: since db connection sets PrintError
526    }
527}
528
529
530## The 2 select statements used by GreenstoneSQLPlugin
531
532# Using fetchall_arrayref on statement handle, to run on prepared and executed stmt
533#   https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#fetchall_arrayref
534# instead of selectall_arrayref on database handle which will prepare, execute and fetch
535#   https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#selectall_arrayref
536#
537# Returns the statement handle that prepared and executed
538# a "SELECT * FROM <COLL>_metadata WHERE did = $oid" SQL statement.
539# Caller can call fetchrow_array() on returned statement handle, $sth
540# Have to use prepare() and execute() instead of do() since do() does
541# not allow for fetching result set thereafter:
542# do(): "This method  is typically most useful for non-SELECT statements that either cannot be prepared in advance (due to a limitation of the driver) or do not need to be executed repeatedly. It should not be used for SELECT statements because it does not return a statement handle (so you can't fetch any data)." https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#do
543sub select_from_metatable_matching_docid {
544    my $self= shift (@_);
545    my ($oid, $outhandle) = @_;
546   
547    my $dbh = $self->{'db_handle'};
548    my $tablename = $self->get_metadata_table_name();
549   
550    my $sth = $dbh->prepare_cached(qq{SELECT * FROM $tablename WHERE did = ?});
551    $sth->execute( $oid ); # will print msg on fail
552
553    print $outhandle "### SQL select stmt: ".$sth->{'Statement'}."\n"
554    if ($self->{'verbosity'} > 2);
555   
556    my $rows_ref = $sth->fetchall_arrayref();
557    # "If an error occurs, fetchall_arrayref returns the data fetched thus far, which may be none.
558    # You should check $sth->err afterwards (or use the RaiseError attribute) to discover if the
559    # data is complete or was truncated due to an error."
560    # https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#fetchall_arrayref
561    # https://www.oreilly.com/library/view/programming-the-perl/1565926994/ch04s05.html
562    warn("Data fetching from $tablename terminated early by error: " . $dbh->err) if $dbh->err;
563    return $rows_ref;
564}
565
566
567# See select_from_metatable_matching_docid() above.
568# Returns the statement handle that prepared and executed
569# a "SELECT * FROM <COLL>_metadata WHERE did = $oid" SQL statement.
570# Caller can call fetchrow_array() on returned statement handle, $sth
571sub select_from_texttable_matching_docid {
572    my $self= shift (@_);
573    my ($oid, $outhandle) = @_;
574   
575    my $dbh = $self->{'db_handle'};
576    my $tablename = $self->get_fulltext_table_name();
577   
578    my $sth = $dbh->prepare_cached(qq{SELECT * FROM $tablename WHERE did = ?});
579    $sth->execute( $oid ); # will print msg on fail
580   
581    print $outhandle "### SQL select stmt: ".$sth->{'Statement'}."\n"
582    if ($self->{'verbosity'} > 2);
583   
584    my $rows_ref = $sth->fetchall_arrayref();
585    # Need explicit warning:
586    warn("Data fetching from $tablename terminated early by error: " . $dbh->err) if $dbh->err;
587    return $rows_ref;
588
589}
590
591# delete all records in metatable with specified docid
592# https://www.tutorialspoint.com/mysql/mysql-delete-query.htm
593# DELETE FROM table_name [WHERE Clause]
594# see example under 'do' at https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm
595sub delete_recs_from_metatable_with_docid {
596    my $self= shift (@_);
597    my ($oid) = @_;
598   
599    my $dbh = $self->{'db_handle'};
600
601    my $tablename = $self->get_metadata_table_name();
602    my $sth = $dbh->prepare_cached(qq{DELETE FROM $tablename WHERE did = ?});
603    $sth->execute( $oid ) or warn $dbh->errstr; # dbh set to print errors even without doing warn()
604}
605
606# delete all records in metatable with specified docid
607sub delete_recs_from_texttable_with_docid {
608    my $self= shift (@_);
609    my ($oid) = @_;
610   
611    my $dbh = $self->{'db_handle'};
612
613    my $tablename = $self->get_fulltext_table_name();
614    my $sth = $dbh->prepare_cached(qq{DELETE FROM $tablename WHERE did = ?});
615    $sth->execute( $oid ) or warn $dbh->errstr; # dbh set to print errors even without doing warn()
616}
617
618# Can call this after connection succeeded to get the database handle, dbh,
619# if any specific DB operation (SQL statement, create/delete)
620# needs to be executed that is not already provided as a method of this class.
621sub get_db_handle {
622    my $self= shift (@_);
623    return $self->{'db_handle'};
624}
625
626################ HELPER METHODS ##############
627
628# More basic helper methods
629sub get_metadata_table_name {
630    my $self= shift (@_);
631    my $table_name = $self->{'tablename_prefix'} . "_metadata";
632    return $table_name;
633}
634
635# FULLTEXT is a reserved keyword in (My)SQL. https://dev.mysql.com/doc/refman/5.5/en/keywords.html
636# So we can't name a table or any of its columns "fulltext". We use "fulltxt" instead.
637sub get_fulltext_table_name {
638    my $self= shift (@_);
639    my $table_name = $self->{'tablename_prefix'} . "_fulltxt";
640    return $table_name;
641}
642
643# Attempt to make sure the name parameter (for db or table name) is acceptable syntax
644# for the db in question, e.g. for mysql. For example, (My)SQL doesn't like tables or
645# databases with '-' (hyphens) in their names
646sub sanitize_name {
647    my $self= shift (@_);
648    my ($name) = @_;
649    $name =~ s/-/_/g;
650    return $name;
651}
652
653
654# I can get my version of table_exists to work, but it's not so ideal
655# Interesting that MySQL has non-standard command to CREATE TABLE IF NOT EXISTS and DROP TABLE IF EXISTS,
656# see https://www.perlmonks.org/bare/?node=DBI%20Recipes
657#    The page further has a table_exists function that could work with proper comparison
658# TODO Q: Couldn't get the first solution at https://www.perlmonks.org/bare/?node_id=500050 to work though
659sub table_exists {
660    my $self = shift (@_);
661    my $dbh = $self->{'db_handle'};
662    my ($table_name) = @_;
663
664    my @table_list = $dbh->tables;
665    #my $tables_str = @table_list[0];
666    foreach my $table (@table_list) {
667    return 1 if ($table =~ m/$table_name/);
668    }
669    return 0;
670}
671
6721;
Note: See TracBrowser for help on using the browser.