root/main/trunk/greenstone2/perllib/gssql.pm @ 32531

Revision 32531, 16.2 KB (checked in by ak19, 8 weeks ago)

1. bugfix to GS SQLPlugout: recursive call didn't go through self variable, noticed only when processing a doc with structure (subsections). 2. Being more explicit about the params passed to gssql.pm constructor. 3. MySQL didn't let me create a table with hyphens in the tablename. So collection names that contain hyphens need to first be adjusted for use in table names.

Line 
1###########################################################################
2#
3# gssql.pm -- DBI for SQL related utility functions used by
4# GreenstoneSQLPlugout and hereafter by GreenstoneSQLPlugin too.
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 1999 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package gssql;
28
29use strict;
30no strict 'refs';
31no strict 'subs';
32
33use GreenstoneXMLPlugout;
34use docprint;
35
36use DBI; # the central package for this plugout
37
38# Need params_map keys:
39# - collection_name
40# - db_encoding (db content encoding) - MySQL can set this at server, db, table levels. For MySQL
41# we set the enc during connect at server level. Not sure whether other DB's support it at the
42# same levels.
43
44# For connection to MySQL, need:
45#  - db_driver, db_client_user, db_client_pwd, db_host, (db_port not used at present)
46# So these will be parameterised, but in a hashmap, for just the connect method.
47
48# Parameterise (one or more methods may use them):
49# - build_mode (like removeold)
50# - db_name (which is the GS3 sitename)
51
52# TODO: add infrastructure for db_port, AutoCommit etc
53# For port, see https://stackoverflow.com/questions/2248665/perl-script-to-connect-to-mysql-server-port-3307
54
55sub new
56{
57 
58    my $class = shift(@_);
59   
60    my ($params_map) = @_;
61   
62    # library_url: to be specified on the cmdline if not using a GS-included web server
63    # the GSDL_LIBRARY_URL env var is useful when running cmdline buildcol.pl in the linux package manager versions of GS3
64   
65    # https://stackoverflow.com/questions/7083453/copying-a-hashref-in-perl
66    # Making a shallow copy works, and can handle unknown params:
67    #my $self = $params_map;
68
69    # but being explicit for class params needed for MySQL:
70    my $self = {
71    'collection_name' => $params_map->{'collection_name'},
72    'db_encoding' => $params_map->{'db_encoding'}
73    };
74
75    # (My)SQL doesn't like tables with - (hyphens) in their names
76    my $coll_name = $params_map->{'collection_name'};
77    $coll_name =~ s/-/_/g;
78    $self->{'tablename_prefix'} = $coll_name;
79
80    return bless($self, $class);
81}
82
83
84#################################
85
86# Database access related functions
87# http://g2pc1.bu.edu/~qzpeng/manual/MySQL%20Commands.htm
88# https://www.guru99.com/insert-into.html
89
90# TODO Q: What on cancelling a build: delete table? But what if it was a rebuild and the rebuild is cancelled (not the original build)?
91# Do we create a copy of the orig database as backup, then start populating current db, and if cancelled, delete current db and RENAME backup table to current?
92# https://stackoverflow.com/questions/3280006/duplicating-a-mysql-table-indexes-and-data
93# BUT what if the table is HUGE? (Think of a collection with millions of docs.) Huge overhead in copying?
94# The alternative is we just quit on cancel, but then: cancel could leave the table in a partial committed state, with no way of rolling back.
95# Unless they do a full rebuild, which will recreate the table from scratch?
96# SOLUTION-> rollback transaction on error, see https://www.effectiveperlprogramming.com/2010/07/set-custom-dbi-error-handlers/
97# But then should set AutoCommit to off on connection, and remember to commit every time
98
99#################
100# Database functions that use the perl DBI module (with the DBD driver module for mysql)
101#################
102
103# THE NEW DB FUNCTIONS
104# NOTE: FULLTEXT is a reserved keyword in (My)SQL. So we can't name a table or any of its columns "fulltext".
105# https://dev.mysql.com/doc/refman/5.5/en/keywords.html
106
107# TODO: Consider AutoCommit status (and Autocommit off allowing commit or rollback for GS coll build cancel) later
108
109sub connect_to_db {
110    my $self= shift (@_);
111    my ($params_map) = @_;
112    my $db_enc = $self->{'db_encoding'} || "utf8";
113
114    # these are the params for connecting to MySQL
115    my $db_driver = $params_map->{'db_driver'} || "mysql";
116    my $db_user = $params_map->{'db_client_user'} || "root";
117    my $db_pwd = $params_map->{'db_client_pwd'}; # even if undef, we'll see a sensible error message
118                                           # when connect fails
119    my $db_host = $params_map->{'db_host'} || "127.0.0.1";
120    # localhost doesn't work for us, but 127.0.0.1 works
121    # https://metacpan.org/pod/DBD::mysql
122    # "The hostname, if not specified or specified as '' or 'localhost', will default to a MySQL server
123    # running on the local machine using the default for the UNIX socket. To connect to a MySQL server
124    # on the local machine via TCP, you must specify the loopback IP address (127.0.0.1) as the host."
125    #my $connect_str = "dbi:$db_driver:database=$db_name;host=$db_host";
126    my $connect_str = "dbi:$db_driver:host=$db_host"; # don't provide db - allows checking the db exists
127    my $dbh = DBI->connect("$connect_str", $db_user, $db_pwd,
128               {
129                   ShowErrorStatement => 1, # more informative as DBI will append failed SQL stmt to error message
130                   PrintError => 1, # on by default, but being explicit
131                   RaiseError => 0, # off by default, but being explicit
132                   AutoCommit => 1 # on by default, but being explicit
133               });
134
135    if(!$dbh) {
136    # NOTE, despite handle dbh being undefined, error code will be in DBI->err
137    return 0;   
138    }
139
140    # set encoding https://metacpan.org/pod/DBD::mysql
141    # https://dev.mysql.com/doc/refman/5.7/en/charset.html
142    # https://dev.mysql.com/doc/refman/5.7/en/charset-conversion.html
143    # Setting the encoding at db server level.
144    # Not sure if this command is mysql specific:
145    my $stmt = "set NAMES '" . $db_enc . "'";
146    $dbh->do($stmt) || warn("Unable to set charset encoding at db server level to: " . $db_enc . "\n");
147   
148    # if we're here, then connection succeeded, store handle
149    $self->{'db_handle'} = $dbh;
150    return 1;
151}
152
153sub load_db_and_tables {
154    my $self= shift (@_);
155    my ($db_name, $build_mode) = @_;
156    my $dbh = $self->{'db_handle'};
157   
158    # perl DBI switch database: https://www.perlmonks.org/?node_id=995434
159    # do() returns undef on error.
160    # connection succeeded, try to load our database. If that didn't work, attempt to create db
161    my $success = $dbh->do("use $db_name");
162   
163    if(!$success && $dbh->err == 1049) { # "Unknown database" error has code 1049 (mysql only?) meaning db doesn't exist yet
164    # attempt to create the db and its tables
165    $self->create_db($db_name) || return 0;
166
167    print STDERR "@@@ CREATED DATABASE $db_name\n";
168   
169    # once more attempt to use db, now that it exists
170    $dbh->do("use $db_name") || return 0;
171    #$dbh->do("use localsite") or die "Error (code" . $dbh->err ."): " . $dbh->errstr . "\n";
172
173    # attempt to create tables in current db
174    $self->create_metadata_table() || return 0;
175    $self->create_fulltext_table() || return 0;
176
177    $success = 1;
178    }
179    elsif($success) { # database existed and loaded successfully, but
180    # before proceeding check that the current collection's tables exist
181
182    print STDERR "@@@ DATABASE $db_name EXISTED\n";
183   
184
185    if($build_mode eq "removeold") {
186        $self->delete_collection_tables();
187    }
188
189    # use existing tables if any
190    # attempt to create tables in current db   
191    if($build_mode eq "removeold" || !$self->table_exists($self->get_metadata_table_name())) {
192        $self->create_metadata_table() || return 0;
193    } else {
194        print STDERR "@@@ Meta table exists\n";
195    }
196    if($build_mode eq "removeold" || !$self->table_exists($self->get_fulltext_table_name())) {
197        $self->create_fulltext_table() || return 0;
198    } else {
199        print STDERR "@@@ Fulltxt table exists\n";
200    }
201   
202    }
203   
204    return $success; # could still return 0, if database failed to load with an error code != 1049
205}
206
207# this will terminate if the db does not exist
208# it will not attempt to create the requested db (nor its tables)
209# The upcoming GreenstoneSQLPlugin can use this.
210sub use_db {
211    my $self= shift (@_);
212    my ($db_name) = @_;
213    my $dbh = $self->{'db_handle'};
214   
215    # perl DBI switch database: https://www.perlmonks.org/?node_id=995434
216    # do() returns undef on error.
217    # connection succeeded, try to load our database. If that didn't work, attempt to create db
218    return $dbh->do("use $db_name") || warn();
219}
220
221# disconnect from db - https://metacpan.org/pod/DBI#disconnect
222# TODO: make sure to have committed or rolled back before disconnect
223# and that you've call finish() on statement handles if any fetch remnants remain
224sub disconnect_from_db {
225    my $self= shift (@_);   
226    my $dbh = $self->{'db_handle'};
227
228    # make sure any active stmt handles are finished
229    # NO: "When all the data has been fetched from a SELECT statement, the driver will automatically call finish for you. So you should not call it explicitly except when you know that you've not fetched all the data from a statement handle and the handle won't be destroyed soon."
230   
231    #$meta_sth = $self->{'metadata_prepared_insert_statement_handle'};
232    #$txt_sth = $self->{'fulltxt_prepared_insert_statement_handle'};
233    #$meta_sth->finish() if($meta_sth);
234    #$txt_sth->finish() if($txt_sth);
235   
236    my $rc = $dbh->disconnect or warn $dbh->errstr; # The handle is of little use after disconnecting. Possibly PrintError already prints a warning and this duplicates it?
237    return $rc;
238}
239
240sub create_db {
241    my $self= shift (@_);
242    my $db_name = $self->{'db_name'};
243    my $dbh = $self->{'db_handle'};
244   
245    # https://stackoverflow.com/questions/5025768/how-can-i-create-a-mysql-database-from-a-perl-script
246    return $dbh->do("create database $db_name"); # do() will return undef on fail, https://metacpan.org/pod/DBI#do
247}
248
249
250sub create_metadata_table {
251    my $self= shift (@_);
252    my $dbh = $self->{'db_handle'};
253   
254    my $table_name = $self->get_metadata_table_name();
255
256    # If using an auto incremented primary key:
257    my $stmt = "CREATE TABLE $table_name (id INT NOT NULL AUTO_INCREMENT, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, metaname VARCHAR(127) NOT NULL, metavalue VARCHAR(1023) NOT NULL, PRIMARY KEY(id));";
258    return $dbh->do($stmt);
259}
260
261# TODO: Investigate: https://dev.mysql.com/doc/search/?d=10&p=1&q=FULLTEXT
262# 12.9.1 Natural Language Full-Text Searches
263# to see whether we have to index the 'fulltxt' column of the 'fulltext' tables
264# or let user edit this file, or add it as another option
265sub create_fulltext_table {
266    my $self= shift (@_);
267    my $dbh = $self->{'db_handle'};
268   
269    my $table_name = $self->get_fulltext_table_name();
270
271    # If using an auto incremented primary key:
272    my $stmt = "CREATE TABLE $table_name (id INT NOT NULL AUTO_INCREMENT, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, fulltxt LONGTEXT, PRIMARY KEY(id));";
273    return $dbh->do($stmt);
274
275}
276
277
278# USEFUL: https://metacpan.org/pod/DBI
279# "Many methods have an optional \%attr parameter which can be used to pass information to the driver implementing the method. Except where specifically documented, the \%attr parameter can only be used to pass driver specific hints. In general, you can ignore \%attr parameters or pass it as undef."
280
281
282# https://www.guru99.com/insert-into.html
283# and https://dev.mysql.com/doc/refman/8.0/en/example-auto-increment.html
284#     for inserting multiple rows at once
285# https://www.perlmonks.org/bare/?node_id=316183
286# https://metacpan.org/pod/DBI#do
287# https://www.quora.com/What-is-the-difference-between-prepare-and-do-statements-in-Perl-while-we-make-a-connection-to-the-database-for-executing-the-query
288# https://docstore.mik.ua/orelly/linux/dbi/ch05_05.htm
289
290# https://metacpan.org/pod/DBI#performance
291# 'The q{...} style quoting used in this example avoids clashing with quotes that may be used in the SQL statement. Use the double-quote like qq{...} operator if you want to interpolate variables into the string. See "Quote and Quote-like Operators" in perlop for more details.'
292sub prepare_insert_metadata_row_stmthandle {
293    my $self = shift (@_);   
294    #my ($did, $sid, $metaname, $metavalue) = @_;
295    my $dbh = $self->{'db_handle'};
296   
297    my $tablename = $self->get_metadata_table_name();
298
299    #my $stmt = "INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES ('$did', '$sid', '$metaname', '$metavalue');"; # ?, ?, ?, ?
300
301    # using qq{} since we want $tablename placeholder to be filled in
302    # returns Statement Handle object!
303    my $sth = $dbh->prepare(qq{INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES (?, ?, ?, ?)}) || warn("Could not prepare insert statement for metadata table\n");
304
305    print STDERR "@@@@ Prepared meta insert statement: ".$sth->{'Statement'}."\n";
306   
307    return $sth;
308}
309
310sub prepare_insert_fulltxt_row_stmthandle {
311    my $self = shift (@_);
312    #my ($did, $sid, $fulltext) = @_;
313    my $dbh = $self->{'db_handle'};
314   
315    my $tablename = $self->get_fulltext_table_name();
316
317    #my $stmt = "INSERT INTO $tablename (did, sid, fulltxt) VALUES ('$did', '$sid', '$fulltext');"; ?, ?, ?
318
319    # using qq{} since we want $tablename placeholder to be filled in
320    # returns Statement Handle object!
321    my $sth = $dbh->prepare(qq{INSERT INTO $tablename (did, sid, fulltxt) VALUES (?, ?, ?)}) || warn("Could not prepare insert statement for fulltxt table\n");
322   
323    print STDERR "@@@@ Prepared fulltext insert statement: ".$sth->{'Statement'}."\n";
324   
325    return $sth;
326}
327
328# "IF EXISTS is used to prevent an error from occurring if the database does not exist. ... DROP DATABASE returns the number of tables that were removed. The DROP DATABASE statement removes from the given database directory those files and directories that MySQL itself may create during normal operation.Jun 20, 2012"
329#MySQL 8.0 Reference Manual :: 13.1.22 DROP DATABASE Syntax
330# https://dev.mysql.com/doc/en/drop-database.html
331sub delete_collection_tables {
332    my $self= shift (@_);
333    my $dbh = $self->{'db_handle'};
334   
335    print STDERR "### Build mode is removeold, so deleting tables for current collection\n";
336   
337    # drop table <tablename>
338    my $table = $self->get_metadata_table_name();
339    $dbh->do("drop table $table") || warn("@@@ Couldn't delete $table");
340    $table = $self->get_fulltext_table_name();
341    $dbh->do("drop table $table") || warn("@@@ Couldn't delete $table");
342}
343
344# Don't call this: it will delete the meta and full text tables for ALL collections in $db_name (localsite by default)!
345# this is just for debugging
346sub _delete_database {
347    my $self= shift (@_);
348    my ($db_name) = @_;
349    my $dbh = $self->{'db_handle'};
350   
351    # "drop database dbname"
352    $dbh->do("drop database $db_name") || return 0;
353
354    return 1;
355}
356
357# More basic helper methods
358sub get_metadata_table_name {
359    my $self= shift (@_);
360    my $table_name = $self->{'tablename_prefix'} . "_metadata";
361    return $table_name;
362}
363
364# FULLTEXT is a reserved keyword in (My)SQL. https://dev.mysql.com/doc/refman/5.5/en/keywords.html
365# So we can't name a table or any of its columns "fulltext". We use "fulltxt" instead.
366sub get_fulltext_table_name {
367    my $self= shift (@_);
368    my $table_name = $self->{'tablename_prefix'} . "_fulltxt";
369    return $table_name;
370}
371
372
373# I can get my version of table_exists to work, but it's not so ideal
374# Interesting that MySQL has non-standard command to CREATE TABLE IF NOT EXISTS and DROP TABLE IF EXISTS,
375# see https://www.perlmonks.org/bare/?node=DBI%20Recipes
376#    The page further has a table_exists function that could work with proper comparison
377# Couldn't get the first solution at https://www.perlmonks.org/bare/?node_id=500050 to work though
378sub table_exists {
379    my $self = shift (@_);
380    my $dbh = $self->{'db_handle'};
381    my ($table_name) = @_;
382
383    my @table_list = $dbh->tables;
384    #my $tables_str = @table_list[0];
385    foreach my $table (@table_list) {
386    return 1 if ($table =~ m/$table_name/);
387    }
388    return 0;
389}
390
3911;
Note: See TracBrowser for help on using the browser.