source: main/trunk/greenstone2/perllib/gssql.pm@ 32538

Last change on this file since 32538 was 32538, checked in by ak19, 5 years ago

Previous commit message meant to be: string names of strings shared by GS SQL Plugin and Plugout have been changed in strings.properties to indicate both modules used them. Current commit: Some tidying up the new GreenstoneSQLPlugin and moving the select statements from there into gssql.pm.

File size: 18.7 KB
Line 
1###########################################################################
2#
3# gssql.pm -- DBI for SQL related utility functions used by
4# GreenstoneSQLPlugout and hereafter by GreenstoneSQLPlugin too.
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 1999 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package gssql;
28
29use strict;
30no strict 'refs';
31no strict 'subs';
32
33use DBI; # the central package for this module used by GreenstoneSQL Plugout and Plugin
34
35# Need params_map keys:
36# - collection_name
37# - db_encoding (db content encoding) - MySQL can set this at server, db, table levels. For MySQL
38# we set the enc during connect at server level. Not sure whether other DB's support it at the
39# same levels.
40
41# For connection to MySQL, need:
42# - db_driver, db_client_user, db_client_pwd, db_host, (db_port not used at present)
43# So these will be parameterised, but in a hashmap, for just the connect method.
44
45# Parameterise (one or more methods may use them):
46# - build_mode (like removeold)
47# - db_name (which is the GS3 sitename)
48
49# TODO: add infrastructure for db_port, AutoCommit etc
50# For port, see https://stackoverflow.com/questions/2248665/perl-script-to-connect-to-mysql-server-port-3307
51
52sub new
53{
54
55 my $class = shift(@_);
56
57 my ($params_map) = @_;
58
59 # library_url: to be specified on the cmdline if not using a GS-included web server
60 # the GSDL_LIBRARY_URL env var is useful when running cmdline buildcol.pl in the linux package manager versions of GS3
61
62 # https://stackoverflow.com/questions/7083453/copying-a-hashref-in-perl
63 # Making a shallow copy works, and can handle unknown params:
64 #my $self = $params_map;
65
66 # but being explicit for class params needed for MySQL:
67 my $self = {
68 'collection_name' => $params_map->{'collection_name'},
69 'db_encoding' => $params_map->{'db_encoding'}
70 };
71
72 # (My)SQL doesn't like tables with - (hyphens) in their names
73 my $coll_name = $params_map->{'collection_name'};
74 $coll_name =~ s/-/_/g;
75 $self->{'tablename_prefix'} = $coll_name;
76
77 return bless($self, $class);
78}
79
80
81#################################
82
83# Database access related functions
84# http://g2pc1.bu.edu/~qzpeng/manual/MySQL%20Commands.htm
85# https://www.guru99.com/insert-into.html
86
87# TODO Q: What on cancelling a build: delete table? But what if it was a rebuild and the rebuild is cancelled (not the original build)?
88# Do we create a copy of the orig database as backup, then start populating current db, and if cancelled, delete current db and RENAME backup table to current?
89# https://stackoverflow.com/questions/3280006/duplicating-a-mysql-table-indexes-and-data
90# BUT what if the table is HUGE? (Think of a collection with millions of docs.) Huge overhead in copying?
91# The alternative is we just quit on cancel, but then: cancel could leave the table in a partial committed state, with no way of rolling back.
92# Unless they do a full rebuild, which will recreate the table from scratch?
93# SOLUTION-> rollback transaction on error, see https://www.effectiveperlprogramming.com/2010/07/set-custom-dbi-error-handlers/
94# But then should set AutoCommit to off on connection, and remember to commit every time
95
96#################
97# Database functions that use the perl DBI module (with the DBD driver module for mysql)
98#################
99
100################### BASIC DB OPERATIONS ##################
101
102# THE NEW DB FUNCTIONS
103# NOTE: FULLTEXT is a reserved keyword in (My)SQL. So we can't name a table or any of its columns "fulltext".
104# https://dev.mysql.com/doc/refman/5.5/en/keywords.html
105
106# TODO: Consider AutoCommit status (and Autocommit off allowing commit or rollback for GS coll build cancel) later
107
108sub connect_to_db {
109 my $self= shift (@_);
110 my ($params_map) = @_;
111 my $db_enc = $self->{'db_encoding'} || "utf8";
112
113 # these are the params for connecting to MySQL
114 my $db_driver = $params_map->{'db_driver'} || "mysql";
115 my $db_user = $params_map->{'db_client_user'} || "root";
116 my $db_pwd = $params_map->{'db_client_pwd'}; # even if undef, we'll see a sensible error message
117 # when connect fails
118 my $db_host = $params_map->{'db_host'} || "127.0.0.1";
119 # localhost doesn't work for us, but 127.0.0.1 works
120 # https://metacpan.org/pod/DBD::mysql
121 # "The hostname, if not specified or specified as '' or 'localhost', will default to a MySQL server
122 # running on the local machine using the default for the UNIX socket. To connect to a MySQL server
123 # on the local machine via TCP, you must specify the loopback IP address (127.0.0.1) as the host."
124 #my $connect_str = "dbi:$db_driver:database=$db_name;host=$db_host";
125 my $connect_str = "dbi:$db_driver:host=$db_host"; # don't provide db - allows checking the db exists
126 my $dbh = DBI->connect("$connect_str", $db_user, $db_pwd,
127 {
128 ShowErrorStatement => 1, # more informative as DBI will append failed SQL stmt to error message
129 PrintError => 1, # on by default, but being explicit
130 RaiseError => 0, # off by default, but being explicit
131 AutoCommit => 1 # on by default, but being explicit
132 });
133
134 if(!$dbh) {
135 # NOTE, despite handle dbh being undefined, error code will be in DBI->err
136 return 0;
137 }
138
139 # set encoding https://metacpan.org/pod/DBD::mysql
140 # https://dev.mysql.com/doc/refman/5.7/en/charset.html
141 # https://dev.mysql.com/doc/refman/5.7/en/charset-conversion.html
142 # Setting the encoding at db server level.
143 # Not sure if this command is mysql specific:
144 my $stmt = "set NAMES '" . $db_enc . "'";
145 $dbh->do($stmt) || warn("Unable to set charset encoding at db server level to: " . $db_enc . "\n");
146
147 # if we're here, then connection succeeded, store handle
148 $self->{'db_handle'} = $dbh;
149 return 1;
150}
151
152# will attempt to load the specified db and the <coll>_metadata and <coll>_fulltxt for this
153# collection, or create any of these (db, tables) that don't yet exist. At the end
154# it will have loaded the requested database (in MySQL: "use <db>;")
155sub load_db_and_tables {
156 my $self= shift (@_);
157 my ($db_name, $build_mode) = @_;
158 my $dbh = $self->{'db_handle'};
159
160 # perl DBI switch database: https://www.perlmonks.org/?node_id=995434
161 # do() returns undef on error.
162 # connection succeeded, try to load our database. If that didn't work, attempt to create db
163 my $success = $dbh->do("use $db_name");
164
165 if(!$success && $dbh->err == 1049) { # "Unknown database" error has code 1049 (mysql only?) meaning db doesn't exist yet
166 # attempt to create the db and its tables
167 $self->create_db($db_name) || return 0;
168
169 print STDERR "@@@ CREATED DATABASE $db_name\n";
170
171 # once more attempt to use db, now that it exists
172 $dbh->do("use $db_name") || return 0;
173 #$dbh->do("use localsite") or die "Error (code" . $dbh->err ."): " . $dbh->errstr . "\n";
174
175 # attempt to create tables in current db
176 $self->create_metadata_table() || return 0;
177 $self->create_fulltext_table() || return 0;
178
179 $success = 1;
180 }
181 elsif($success) { # database existed and loaded successfully, but
182 # before proceeding check that the current collection's tables exist
183
184 print STDERR "@@@ DATABASE $db_name EXISTED\n";
185
186
187 if($build_mode eq "removeold") {
188 $self->delete_collection_tables();
189 }
190
191 # use existing tables if any
192 # attempt to create tables in current db
193 if($build_mode eq "removeold" || !$self->table_exists($self->get_metadata_table_name())) {
194 $self->create_metadata_table() || return 0;
195 } else {
196 print STDERR "@@@ Meta table exists\n";
197 }
198 if($build_mode eq "removeold" || !$self->table_exists($self->get_fulltext_table_name())) {
199 $self->create_fulltext_table() || return 0;
200 } else {
201 print STDERR "@@@ Fulltxt table exists\n";
202 }
203
204 }
205
206 return $success; # could still return 0, if database failed to load with an error code != 1049
207}
208
209# GreenstoneSQLPlugin calls this method to load an existing db.
210# This will terminate if the db does not exist. Unlike load_db_and_tables() above, used by
211# GreenstoneSQLPlugout, this method will not attempt to create the requested db (nor its tables)
212sub use_db {
213 my $self= shift (@_);
214 my ($db_name) = @_;
215 my $dbh = $self->{'db_handle'};
216
217 # perl DBI switch database: https://www.perlmonks.org/?node_id=995434
218 # do() returns undef on error.
219 # connection succeeded, try to load our database. If that didn't work, attempt to create db
220 return $dbh->do("use $db_name") || warn();
221}
222
223# disconnect from db - https://metacpan.org/pod/DBI#disconnect
224# TODO: make sure to have committed or rolled back before disconnect
225# and that you've call finish() on statement handles if any fetch remnants remain
226sub disconnect_from_db {
227 my $self= shift (@_);
228 my $dbh = $self->{'db_handle'};
229
230 # make sure any active stmt handles are finished
231 # NO: "When all the data has been fetched from a SELECT statement, the driver will automatically call finish for you. So you should not call it explicitly except when you know that you've not fetched all the data from a statement handle and the handle won't be destroyed soon."
232
233 #$meta_sth = $self->{'metadata_prepared_insert_statement_handle'};
234 #$txt_sth = $self->{'fulltxt_prepared_insert_statement_handle'};
235 #$meta_sth->finish() if($meta_sth);
236 #$txt_sth->finish() if($txt_sth);
237
238 my $rc = $dbh->disconnect or warn $dbh->errstr; # The handle is of little use after disconnecting. Possibly PrintError already prints a warning and this duplicates it?
239 return $rc;
240}
241
242sub create_db {
243 my $self= shift (@_);
244 my $db_name = $self->{'db_name'};
245 my $dbh = $self->{'db_handle'};
246
247 # https://stackoverflow.com/questions/5025768/how-can-i-create-a-mysql-database-from-a-perl-script
248 return $dbh->do("create database $db_name"); # do() will return undef on fail, https://metacpan.org/pod/DBI#do
249}
250
251
252sub create_metadata_table {
253 my $self= shift (@_);
254 my $dbh = $self->{'db_handle'};
255
256 my $table_name = $self->get_metadata_table_name();
257
258 # If using an auto incremented primary key:
259 my $stmt = "CREATE TABLE $table_name (id INT NOT NULL AUTO_INCREMENT, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, metaname VARCHAR(127) NOT NULL, metavalue VARCHAR(1023) NOT NULL, PRIMARY KEY(id));";
260 return $dbh->do($stmt);
261}
262
263# TODO: Investigate: https://dev.mysql.com/doc/search/?d=10&p=1&q=FULLTEXT
264# 12.9.1 Natural Language Full-Text Searches
265# to see whether we have to index the 'fulltxt' column of the 'fulltext' tables
266# or let user edit this file, or add it as another option
267sub create_fulltext_table {
268 my $self= shift (@_);
269 my $dbh = $self->{'db_handle'};
270
271 my $table_name = $self->get_fulltext_table_name();
272
273 # If using an auto incremented primary key:
274 my $stmt = "CREATE TABLE $table_name (id INT NOT NULL AUTO_INCREMENT, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, fulltxt LONGTEXT, PRIMARY KEY(id));";
275 return $dbh->do($stmt);
276
277}
278
279# "IF EXISTS is used to prevent an error from occurring if the database does not exist. ... DROP DATABASE returns the number of tables that were removed. The DROP DATABASE statement removes from the given database directory those files and directories that MySQL itself may create during normal operation.Jun 20, 2012"
280#MySQL 8.0 Reference Manual :: 13.1.22 DROP DATABASE Syntax
281# https://dev.mysql.com/doc/en/drop-database.html
282sub delete_collection_tables {
283 my $self= shift (@_);
284 my $dbh = $self->{'db_handle'};
285
286 print STDERR "### Build mode is removeold, so deleting tables for current collection\n";
287
288 # drop table <tablename>
289 my $table = $self->get_metadata_table_name();
290 $dbh->do("drop table $table") || warn("@@@ Couldn't delete $table");
291 $table = $self->get_fulltext_table_name();
292 $dbh->do("drop table $table") || warn("@@@ Couldn't delete $table");
293}
294
295# Don't call this: it will delete the meta and full text tables for ALL collections in $db_name (localsite by default)!
296# this is just for debugging
297sub _delete_database {
298 my $self= shift (@_);
299 my ($db_name) = @_;
300 my $dbh = $self->{'db_handle'};
301
302 # "drop database dbname"
303 $dbh->do("drop database $db_name") || return 0;
304
305 return 1;
306}
307
308
309########################### DB STATEMENTS ###########################
310
311# USEFUL: https://metacpan.org/pod/DBI
312# "Many methods have an optional \%attr parameter which can be used to pass information to the driver implementing the method. Except where specifically documented, the \%attr parameter can only be used to pass driver specific hints. In general, you can ignore \%attr parameters or pass it as undef."
313
314
315# https://www.guru99.com/insert-into.html
316# and https://dev.mysql.com/doc/refman/8.0/en/example-auto-increment.html
317# for inserting multiple rows at once
318# https://www.perlmonks.org/bare/?node_id=316183
319# https://metacpan.org/pod/DBI#do
320# https://www.quora.com/What-is-the-difference-between-prepare-and-do-statements-in-Perl-while-we-make-a-connection-to-the-database-for-executing-the-query
321# https://docstore.mik.ua/orelly/linux/dbi/ch05_05.htm
322
323# https://metacpan.org/pod/DBI#performance
324# 'The q{...} style quoting used in this example avoids clashing with quotes that may be used in the SQL statement. Use the double-quote like qq{...} operator if you want to interpolate variables into the string. See "Quote and Quote-like Operators" in perlop for more details.'
325sub prepare_insert_metadata_row_stmthandle {
326 my $self = shift (@_);
327 #my ($did, $sid, $metaname, $metavalue) = @_;
328 my $dbh = $self->{'db_handle'};
329
330 my $tablename = $self->get_metadata_table_name();
331
332 #my $stmt = "INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES ('$did', '$sid', '$metaname', '$metavalue');"; # ?, ?, ?, ?
333
334 # using qq{} since we want $tablename placeholder to be filled in
335 # returns Statement Handle object!
336 my $sth = $dbh->prepare(qq{INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES (?, ?, ?, ?)}) || warn("Could not prepare insert statement for metadata table\n");
337
338 print STDERR "@@@@ Prepared meta insert statement: ".$sth->{'Statement'}."\n";
339
340 return $sth;
341}
342
343sub prepare_insert_fulltxt_row_stmthandle {
344 my $self = shift (@_);
345 #my ($did, $sid, $fulltext) = @_;
346 my $dbh = $self->{'db_handle'};
347
348 my $tablename = $self->get_fulltext_table_name();
349
350 #my $stmt = "INSERT INTO $tablename (did, sid, fulltxt) VALUES ('$did', '$sid', '$fulltext');"; ?, ?, ?
351
352 # using qq{} since we want $tablename placeholder to be filled in
353 # returns Statement Handle object!
354 my $sth = $dbh->prepare(qq{INSERT INTO $tablename (did, sid, fulltxt) VALUES (?, ?, ?)}) || warn("Could not prepare insert statement for fulltxt table\n");
355
356 print STDERR "@@@@ Prepared fulltext insert statement: ".$sth->{'Statement'}."\n";
357
358 return $sth;
359}
360
361
362## The 2 select statements used by GreenstoneSQLPlugin
363
364# Returns the statement handle that prepared and executed
365# a "SELECT * FROM <COLL>_metadata WHERE did = $oid" SQL statement.
366# Caller can call fetchrow_array() on returned statement handle, $sth
367# Have to use prepare() and execute() instead of do() since do() does
368# not allow for fetching result set thereafter:
369# do(): "This method is typically most useful for non-SELECT statements that either cannot be prepared in advance (due to a limitation of the driver) or do not need to be executed repeatedly. It should not be used for SELECT statements because it does not return a statement handle (so you can't fetch any data)." https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#do
370sub select_from_metatable_matching_docid {
371 my $self= shift (@_);
372 my ($oid) = @_;
373
374 my $dbh = $self->{'db_handle'};
375 my $meta_table = $self->get_metadata_table_name();
376
377 my $sth = $dbh->prepare(qq{SELECT * FROM $meta_table WHERE did = ?});
378 $sth->execute( $oid ); # will print msg on fail
379
380 return $sth; # caller can call fetchrow_array() on returned statement handle, sth
381}
382
383# Returns the statement handle that prepared and executed
384# a "SELECT * FROM <COLL>_metadata WHERE did = $oid" SQL statement.
385# Caller can call fetchrow_array() on returned statement handle, $sth
386sub select_from_texttable_matching_docid {
387 my $self= shift (@_);
388 my ($oid) = @_;
389
390 my $dbh = $self->{'db_handle'};
391 my $fulltxt_table = $self->get_fulltext_table_name();
392
393 my $sth = $dbh->prepare(qq{SELECT * FROM $fulltxt_table WHERE did = ?});
394 $sth->execute( $oid ); # will print msg on fail
395
396 return $sth; # caller can call fetchrow_array() on returned statement handle, sth
397}
398
399
400# Can call this after connection succeeded to get the database handle, dbh,
401# if any specific DB operation (SQL statement, create/delete)
402# needs to be executed that is not already provided as a method of this class.
403sub get_db_handle {
404 my $self= shift (@_);
405 return $self->{'db_handle'};
406}
407
408################ HELPER METHODS ##############
409
410# More basic helper methods
411sub get_metadata_table_name {
412 my $self= shift (@_);
413 my $table_name = $self->{'tablename_prefix'} . "_metadata";
414 return $table_name;
415}
416
417# FULLTEXT is a reserved keyword in (My)SQL. https://dev.mysql.com/doc/refman/5.5/en/keywords.html
418# So we can't name a table or any of its columns "fulltext". We use "fulltxt" instead.
419sub get_fulltext_table_name {
420 my $self= shift (@_);
421 my $table_name = $self->{'tablename_prefix'} . "_fulltxt";
422 return $table_name;
423}
424
425
426# I can get my version of table_exists to work, but it's not so ideal
427# Interesting that MySQL has non-standard command to CREATE TABLE IF NOT EXISTS and DROP TABLE IF EXISTS,
428# see https://www.perlmonks.org/bare/?node=DBI%20Recipes
429# The page further has a table_exists function that could work with proper comparison
430# Couldn't get the first solution at https://www.perlmonks.org/bare/?node_id=500050 to work though
431sub table_exists {
432 my $self = shift (@_);
433 my $dbh = $self->{'db_handle'};
434 my ($table_name) = @_;
435
436 my @table_list = $dbh->tables;
437 #my $tables_str = @table_list[0];
438 foreach my $table (@table_list) {
439 return 1 if ($table =~ m/$table_name/);
440 }
441 return 0;
442}
443
4441;
Note: See TracBrowser for help on using the repository browser.