root/main/trunk/greenstone2/perllib/gssql.pm @ 32530

Revision 32530, 15.8 KB (checked in by ak19, 8 weeks ago)

Some more tidying up: some params passed by GreenstoneSQLPlugout to gssql should be instance variables, others are connection specific and now restricted to the gssql::connect_to_db() method.

Line 
1###########################################################################
2#
3# gssql.pm -- DBI for SQL related utility functions used by
4# GreenstoneSQLPlugout and hereafter by GreenstoneSQLPlugin too.
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 1999 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package gssql;
28
29use strict;
30no strict 'refs';
31no strict 'subs';
32
33use GreenstoneXMLPlugout;
34use docprint;
35
36use DBI; # the central package for this plugout
37
38# Need params_map keys:
39# - collection_name
40# - db_encoding (db content encoding) - MySQL can set this at server, db, table levels. For MySQL
41# we set the enc during connect at server level. Not sure whether other DB's support it at the
42# same levels.
43
44# For connection to MySQL, need:
45#  - db_driver, db_client_user, db_client_pwd, db_host, (db_port not used at present)
46# So these will be parameterised, but in a hashmap, for just the connect method.
47
48# Parameterise (one or more methods may use them):
49# - build_mode (like removeold)
50# - db_name (which is the GS3 sitename)
51
52# TODO: add infrastructure for db_port, AutoCommit etc
53# For port, see https://stackoverflow.com/questions/2248665/perl-script-to-connect-to-mysql-server-port-3307
54
55sub new
56{
57 
58    my $class = shift(@_);
59   
60    my ($params_map) = @_;
61   
62    # library_url: to be specified on the cmdline if not using a GS-included web server
63    # the GSDL_LIBRARY_URL env var is useful when running cmdline buildcol.pl in the linux package manager versions of GS3
64   
65    # https://stackoverflow.com/questions/7083453/copying-a-hashref-in-perl
66    #my $self = {'db_driver' => $params_map{'db_driver'},
67    #...
68    #};
69    # Making a shallow copy   
70    my $self = $params_map;
71
72    return bless($self, $class);
73}
74
75
76#################################
77
78# Database access related functions
79# http://g2pc1.bu.edu/~qzpeng/manual/MySQL%20Commands.htm
80# https://www.guru99.com/insert-into.html
81
82# TODO Q: What on cancelling a build: delete table? But what if it was a rebuild and the rebuild is cancelled (not the original build)?
83# Do we create a copy of the orig database as backup, then start populating current db, and if cancelled, delete current db and RENAME backup table to current?
84# https://stackoverflow.com/questions/3280006/duplicating-a-mysql-table-indexes-and-data
85# BUT what if the table is HUGE? (Think of a collection with millions of docs.) Huge overhead in copying?
86# The alternative is we just quit on cancel, but then: cancel could leave the table in a partial committed state, with no way of rolling back.
87# Unless they do a full rebuild, which will recreate the table from scratch?
88# SOLUTION-> rollback transaction on error, see https://www.effectiveperlprogramming.com/2010/07/set-custom-dbi-error-handlers/
89# But then should set AutoCommit to off on connection, and remember to commit every time
90
91#################
92# Database functions that use the perl DBI module (with the DBD driver module for mysql)
93#################
94
95# THE NEW DB FUNCTIONS
96# NOTE: FULLTEXT is a reserved keyword in (My)SQL. So we can't name a table or any of its columns "fulltext".
97# https://dev.mysql.com/doc/refman/5.5/en/keywords.html
98
99# TODO: Consider AutoCommit status (and Autocommit off allowing commit or rollback for GS coll build cancel) later
100
101sub connect_to_db {
102    my $self= shift (@_);
103    my ($params_map) = @_;
104    my $db_enc = $self->{'db_encoding'} || "utf8";
105
106    # these are the params for connecting to MySQL
107    my $db_driver = $params_map->{'db_driver'} || "mysql";
108    my $db_user = $params_map->{'db_client_user'} || "root";
109    my $db_pwd = $params_map->{'db_client_pwd'}; # even if undef, we'll see a sensible error message
110                                           # when connect fails
111    my $db_host = $params_map->{'db_host'} || "127.0.0.1";
112    # localhost doesn't work for us, but 127.0.0.1 works
113    # https://metacpan.org/pod/DBD::mysql
114    # "The hostname, if not specified or specified as '' or 'localhost', will default to a MySQL server
115    # running on the local machine using the default for the UNIX socket. To connect to a MySQL server
116    # on the local machine via TCP, you must specify the loopback IP address (127.0.0.1) as the host."
117    #my $connect_str = "dbi:$db_driver:database=$db_name;host=$db_host";
118    my $connect_str = "dbi:$db_driver:host=$db_host"; # don't provide db - allows checking the db exists
119    my $dbh = DBI->connect("$connect_str", $db_user, $db_pwd,
120               {
121                   ShowErrorStatement => 1, # more informative as DBI will append failed SQL stmt to error message
122                   PrintError => 1, # on by default, but being explicit
123                   RaiseError => 0, # off by default, but being explicit
124                   AutoCommit => 1 # on by default, but being explicit
125               });
126
127    if(!$dbh) {
128    # NOTE, despite handle dbh being undefined, error code will be in DBI->err
129    return 0;   
130    }
131
132    # set encoding https://metacpan.org/pod/DBD::mysql
133    # https://dev.mysql.com/doc/refman/5.7/en/charset.html
134    # https://dev.mysql.com/doc/refman/5.7/en/charset-conversion.html
135    # Setting the encoding at db server level.
136    # Not sure if this command is mysql specific:
137    my $stmt = "set NAMES '" . $db_enc . "'";
138    $dbh->do($stmt) || warn("Unable to set charset encoding at db server level to: " . $db_enc . "\n");
139   
140    # if we're here, then connection succeeded, store handle
141    $self->{'db_handle'} = $dbh;
142    return 1;
143}
144
145sub load_db_and_tables {
146    my $self= shift (@_);
147    my ($db_name, $build_mode) = @_;
148    my $dbh = $self->{'db_handle'};
149   
150    # perl DBI switch database: https://www.perlmonks.org/?node_id=995434
151    # do() returns undef on error.
152    # connection succeeded, try to load our database. If that didn't work, attempt to create db
153    my $success = $dbh->do("use $db_name");
154   
155    if(!$success && $dbh->err == 1049) { # "Unknown database" error has code 1049 (mysql only?) meaning db doesn't exist yet
156    # attempt to create the db and its tables
157    $self->create_db($db_name) || return 0;
158
159    print STDERR "@@@ CREATED DATABASE $db_name\n";
160   
161    # once more attempt to use db, now that it exists
162    $dbh->do("use $db_name") || return 0;
163    #$dbh->do("use localsite") or die "Error (code" . $dbh->err ."): " . $dbh->errstr . "\n";
164
165    # attempt to create tables in current db
166    $self->create_metadata_table() || return 0;
167    $self->create_fulltext_table() || return 0;
168
169    $success = 1;
170    }
171    elsif($success) { # database existed and loaded successfully, but
172    # before proceeding check that the current collection's tables exist
173
174    print STDERR "@@@ DATABASE $db_name EXISTED\n";
175   
176
177    if($build_mode eq "removeold") {
178        $self->delete_collection_tables();
179    }
180
181    # use existing tables if any
182    # attempt to create tables in current db   
183    if($build_mode eq "removeold" || !$self->table_exists($self->get_metadata_table_name())) {
184        $self->create_metadata_table() || return 0;
185    } else {
186        print STDERR "@@@ Meta table exists\n";
187    }
188    if($build_mode eq "removeold" || !$self->table_exists($self->get_fulltext_table_name())) {
189        $self->create_fulltext_table() || return 0;
190    } else {
191        print STDERR "@@@ Fulltxt table exists\n";
192    }
193   
194    }
195   
196    return $success; # could still return 0, if database failed to load with an error code != 1049
197}
198
199# this will terminate if the db does not exist
200# it will not attempt to create the requested db (nor its tables)
201# The upcoming GreenstoneSQLPlugin can use this.
202sub use_db {
203    my $self= shift (@_);
204    my ($db_name) = @_;
205    my $dbh = $self->{'db_handle'};
206   
207    # perl DBI switch database: https://www.perlmonks.org/?node_id=995434
208    # do() returns undef on error.
209    # connection succeeded, try to load our database. If that didn't work, attempt to create db
210    return $dbh->do("use $db_name") || warn();
211}
212
213# disconnect from db - https://metacpan.org/pod/DBI#disconnect
214# TODO: make sure to have committed or rolled back before disconnect
215# and that you've call finish() on statement handles if any fetch remnants remain
216sub disconnect_from_db {
217    my $self= shift (@_);   
218    my $dbh = $self->{'db_handle'};
219
220    # make sure any active stmt handles are finished
221    # NO: "When all the data has been fetched from a SELECT statement, the driver will automatically call finish for you. So you should not call it explicitly except when you know that you've not fetched all the data from a statement handle and the handle won't be destroyed soon."
222   
223    #$meta_sth = $self->{'metadata_prepared_insert_statement_handle'};
224    #$txt_sth = $self->{'fulltxt_prepared_insert_statement_handle'};
225    #$meta_sth->finish() if($meta_sth);
226    #$txt_sth->finish() if($txt_sth);
227   
228    my $rc = $dbh->disconnect or warn $dbh->errstr; # The handle is of little use after disconnecting. Possibly PrintError already prints a warning and this duplicates it?
229    return $rc;
230}
231
232sub create_db {
233    my $self= shift (@_);
234    my $db_name = $self->{'db_name'};
235    my $dbh = $self->{'db_handle'};
236   
237    # https://stackoverflow.com/questions/5025768/how-can-i-create-a-mysql-database-from-a-perl-script
238    return $dbh->do("create database $db_name"); # do() will return undef on fail, https://metacpan.org/pod/DBI#do
239}
240
241
242sub create_metadata_table {
243    my $self= shift (@_);
244    my $dbh = $self->{'db_handle'};
245   
246    my $table_name = $self->get_metadata_table_name();
247
248    # If using an auto incremented primary key:
249    my $stmt = "CREATE TABLE $table_name (id INT NOT NULL AUTO_INCREMENT, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, metaname VARCHAR(127) NOT NULL, metavalue VARCHAR(1023) NOT NULL, PRIMARY KEY(id));";
250    return $dbh->do($stmt);
251}
252
253# TODO: Investigate: https://dev.mysql.com/doc/search/?d=10&p=1&q=FULLTEXT
254# 12.9.1 Natural Language Full-Text Searches
255# to see whether we have to index the 'fulltxt' column of the 'fulltext' tables
256# or let user edit this file, or add it as another option
257sub create_fulltext_table {
258    my $self= shift (@_);
259    my $dbh = $self->{'db_handle'};
260   
261    my $table_name = $self->get_fulltext_table_name();
262
263    # If using an auto incremented primary key:
264    my $stmt = "CREATE TABLE $table_name (id INT NOT NULL AUTO_INCREMENT, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, fulltxt LONGTEXT, PRIMARY KEY(id));";
265    return $dbh->do($stmt);
266
267}
268
269
270# USEFUL: https://metacpan.org/pod/DBI
271# "Many methods have an optional \%attr parameter which can be used to pass information to the driver implementing the method. Except where specifically documented, the \%attr parameter can only be used to pass driver specific hints. In general, you can ignore \%attr parameters or pass it as undef."
272
273
274# https://www.guru99.com/insert-into.html
275# and https://dev.mysql.com/doc/refman/8.0/en/example-auto-increment.html
276#     for inserting multiple rows at once
277# https://www.perlmonks.org/bare/?node_id=316183
278# https://metacpan.org/pod/DBI#do
279# https://www.quora.com/What-is-the-difference-between-prepare-and-do-statements-in-Perl-while-we-make-a-connection-to-the-database-for-executing-the-query
280# https://docstore.mik.ua/orelly/linux/dbi/ch05_05.htm
281
282# https://metacpan.org/pod/DBI#performance
283# 'The q{...} style quoting used in this example avoids clashing with quotes that may be used in the SQL statement. Use the double-quote like qq{...} operator if you want to interpolate variables into the string. See "Quote and Quote-like Operators" in perlop for more details.'
284sub prepare_insert_metadata_row_stmthandle {
285    my $self = shift (@_);   
286    #my ($did, $sid, $metaname, $metavalue) = @_;
287    my $dbh = $self->{'db_handle'};
288   
289    my $tablename = $self->get_metadata_table_name();
290
291    #my $stmt = "INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES ('$did', '$sid', '$metaname', '$metavalue');"; # ?, ?, ?, ?
292
293    # using qq{} since we want $tablename placeholder to be filled in
294    # returns Statement Handle object!
295    my $sth = $dbh->prepare(qq{INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES (?, ?, ?, ?)}) || warn("Could not prepare insert statement for metadata table\n");
296
297    print STDERR "@@@@ Prepared meta insert statement: ".$sth->{'Statement'}."\n";
298   
299    return $sth;
300}
301
302sub prepare_insert_fulltxt_row_stmthandle {
303    my $self = shift (@_);
304    #my ($did, $sid, $fulltext) = @_;
305    my $dbh = $self->{'db_handle'};
306   
307    my $tablename = $self->get_fulltext_table_name();
308
309    #my $stmt = "INSERT INTO $tablename (did, sid, fulltxt) VALUES ('$did', '$sid', '$fulltext');"; ?, ?, ?
310
311    # using qq{} since we want $tablename placeholder to be filled in
312    # returns Statement Handle object!
313    my $sth = $dbh->prepare(qq{INSERT INTO $tablename (did, sid, fulltxt) VALUES (?, ?, ?)}) || warn("Could not prepare insert statement for fulltxt table\n");
314   
315    print STDERR "@@@@ Prepared fulltext insert statement: ".$sth->{'Statement'}."\n";
316   
317    return $sth;
318}
319
320# "IF EXISTS is used to prevent an error from occurring if the database does not exist. ... DROP DATABASE returns the number of tables that were removed. The DROP DATABASE statement removes from the given database directory those files and directories that MySQL itself may create during normal operation.Jun 20, 2012"
321#MySQL 8.0 Reference Manual :: 13.1.22 DROP DATABASE Syntax
322# https://dev.mysql.com/doc/en/drop-database.html
323sub delete_collection_tables {
324    my $self= shift (@_);
325    my $dbh = $self->{'db_handle'};
326   
327    print STDERR "### Build mode is removeold, so deleting tables for current collection\n";
328   
329    # drop table <tablename>
330    my $table = $self->get_metadata_table_name();
331    $dbh->do("drop table $table") || warn("@@@ Couldn't delete $table");
332    $table = $self->get_fulltext_table_name();
333    $dbh->do("drop table $table") || warn("@@@ Couldn't delete $table");
334}
335
336# Don't call this: it will delete the meta and full text tables for ALL collections in $db_name (localsite by default)!
337# this is just for debugging
338sub _delete_database {
339    my $self= shift (@_);
340    my ($db_name) = @_;
341    my $dbh = $self->{'db_handle'};
342   
343    # "drop database dbname"
344    $dbh->do("drop database $db_name") || return 0;
345
346    return 1;
347}
348
349# More basic helper methods
350sub get_metadata_table_name {
351    my $self= shift (@_);
352    my $table_name = $self->{'collection_name'} . "_metadata";
353    return $table_name;
354}
355
356# FULLTEXT is a reserved keyword in (My)SQL. https://dev.mysql.com/doc/refman/5.5/en/keywords.html
357# So we can't name a table or any of its columns "fulltext". We use "fulltxt" instead.
358sub get_fulltext_table_name {
359    my $self= shift (@_);
360    my $table_name = $self->{'collection_name'} . "_fulltxt";
361    return $table_name;
362}
363
364# I can get my version of table_exists to work, but it's not so ideal
365# Interesting that MySQL has non-standard command to CREATE TABLE IF NOT EXISTS and DROP TABLE IF EXISTS,
366# see https://www.perlmonks.org/bare/?node=DBI%20Recipes
367#    The page further has a table_exists function that could work with proper comparison
368# Couldn't get the first solution at https://www.perlmonks.org/bare/?node_id=500050 to work though
369sub table_exists {
370    my $self = shift (@_);
371    my $dbh = $self->{'db_handle'};
372    my ($table_name) = @_;
373
374    my @table_list = $dbh->tables;
375    #my $tables_str = @table_list[0];
376    foreach my $table (@table_list) {
377    return 1 if ($table =~ m/$table_name/);
378    }
379    return 0;
380}
381
3821;
Note: See TracBrowser for help on using the browser.