source: main/trunk/greenstone2/perllib/gssql.pm@ 32529

Last change on this file since 32529 was 32529, checked in by ak19, 6 years ago

Split the database functions into their own file gssql.pm, so that GreenstoneSQLPlugin can share some db releated code used by GreenstoneSQLPlugout.

File size: 15.5 KB
Line 
1###########################################################################
2#
3# gssql.pm -- DBI for SQL related utility functions used by GreenstoneSQL
4# Plugin and Plugout.
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 1999 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package gssql;
28
29use strict;
30no strict 'refs';
31no strict 'subs';
32
33use GreenstoneXMLPlugout;
34use docprint;
35
36use DBI; # the central package for this plugout
37
38# Need params_map keys:
39# - collection_name
40# X parameterise: - build_mode
41# For MySQL:
42# - db_encoding,
43# - db_driver, db_client_user, db_client_pwd, db_host, (db_port not used at present)
44# X parameterise: - db_name (which is the GS3 sitename),
45# TODO: add infrastructure for db_port, AutoCommit etc
46# For port, see https://stackoverflow.com/questions/2248665/perl-script-to-connect-to-mysql-server-port-3307
47
48sub new
49{
50
51 my $class = shift(@_);
52
53 my ($params_map) = @_;
54
55 # library_url: to be specified on the cmdline if not using a GS-included web server
56 # the GSDL_LIBRARY_URL env var is useful when running cmdline buildcol.pl in the linux package manager versions of GS3
57
58 # https://stackoverflow.com/questions/7083453/copying-a-hashref-in-perl
59 #my $self = {'db_driver' => $params_map{'db_driver'},
60 #...
61 #};
62 # Making a shallow copy
63 my $self = $params_map;
64
65 return bless($self, $class);
66}
67
68
69#################################
70
71# Database access related functions
72# http://g2pc1.bu.edu/~qzpeng/manual/MySQL%20Commands.htm
73# https://www.guru99.com/insert-into.html
74
75# TODO Q: What on cancelling a build: delete table? But what if it was a rebuild and the rebuild is cancelled (not the original build)?
76# Do we create a copy of the orig database as backup, then start populating current db, and if cancelled, delete current db and RENAME backup table to current?
77# https://stackoverflow.com/questions/3280006/duplicating-a-mysql-table-indexes-and-data
78# BUT what if the table is HUGE? (Think of a collection with millions of docs.) Huge overhead in copying?
79# The alternative is we just quit on cancel, but then: cancel could leave the table in a partial committed state, with no way of rolling back.
80# Unless they do a full rebuild, which will recreate the table from scratch?
81# SOLUTION-> rollback transaction on error, see https://www.effectiveperlprogramming.com/2010/07/set-custom-dbi-error-handlers/
82# But then should set AutoCommit to off on connection, and remember to commit every time
83
84#################
85# Database functions that use the perl DBI module (with the DBD driver module for mysql)
86#################
87
88# THE NEW DB FUNCTIONS
89# NOTE: FULLTEXT is a reserved keyword in (My)SQL. So we can't name a table or any of its columns "fulltext".
90# https://dev.mysql.com/doc/refman/5.5/en/keywords.html
91
92# TODO: Consider AutoCommit status (and Autocommit off allowing commit or rollback for GS coll build cancel) later
93
94sub connect_to_db {
95 my $self= shift (@_);
96 #my ($db_name) = @_;
97 my $db_driver = $self->{'db_driver'} || "mysql";
98 my $db_user = $self->{'db_client_user'} || "root";
99 my $db_pwd = $self->{'db_client_pwd'}; # even if undef, we'll see a sensible error message
100 # when connect fails
101 my $db_host = $self->{'db_host'} || "127.0.0.1";
102 my $db_enc = $self->{'db_encoding'} || "utf8";
103
104 # try connecting to the mysql db, if that fails it will die
105 # so don't bother preparing GreenstoneXMLPlugout by calling superclass' begin()
106
107 # localhost doesn't work for us, but 127.0.0.1 works
108 # https://metacpan.org/pod/DBD::mysql
109 # "The hostname, if not specified or specified as '' or 'localhost', will default to a MySQL server
110 # running on the local machine using the default for the UNIX socket. To connect to a MySQL server
111 # on the local machine via TCP, you must specify the loopback IP address (127.0.0.1) as the host."
112 #my $connect_str = "dbi:$db_driver:database=$db_name;host=$db_host";
113 my $connect_str = "dbi:$db_driver:host=$db_host"; # don't provide db, so we can check the db is there
114 my $dbh = DBI->connect("$connect_str", $db_user, $db_pwd,
115 {
116 ShowErrorStatement => 1, # more informative as DBI will append failed SQL stmt to error message
117 PrintError => 1, # on by default, but being explicit
118 RaiseError => 0, # off by default, but being explicit
119 AutoCommit => 1 # on by default, but being explicit
120 });
121
122 if(!$dbh) {
123 # NOTE, despite handle dbh being undefined, error code will be in DBI->err
124 return 0;
125 }
126
127 # set encoding https://metacpan.org/pod/DBD::mysql
128 # https://dev.mysql.com/doc/refman/5.7/en/charset.html
129 # https://dev.mysql.com/doc/refman/5.7/en/charset-conversion.html
130 # Setting the encoding at db server level.
131 # Not sure if this command is mysql specific:
132 my $stmt = "set NAMES '" . $db_enc . "'";
133 $dbh->do($stmt) || warn("Unable to set charset encoding at db server level to: " . $db_enc . "\n");
134
135 # if we're here, then connection succeeded, store handle
136 $self->{'db_handle'} = $dbh;
137 return 1;
138}
139
140sub load_db_and_tables {
141 my $self= shift (@_);
142 my ($db_name, $build_mode) = @_;
143 my $dbh = $self->{'db_handle'};
144
145 # perl DBI switch database: https://www.perlmonks.org/?node_id=995434
146 # do() returns undef on error.
147 # connection succeeded, try to load our database. If that didn't work, attempt to create db
148 my $success = $dbh->do("use $db_name");
149
150 if(!$success && $dbh->err == 1049) { # "Unknown database" error has code 1049 (mysql only?) meaning db doesn't exist yet
151 # attempt to create the db and its tables
152 $self->create_db($db_name) || return 0;
153
154 print STDERR "@@@ CREATED DATABASE $db_name\n";
155
156 # once more attempt to use db, now that it exists
157 $dbh->do("use $db_name") || return 0;
158 #$dbh->do("use localsite") or die "Error (code" . $dbh->err ."): " . $dbh->errstr . "\n";
159
160 # attempt to create tables in current db
161 $self->create_metadata_table() || return 0;
162 $self->create_fulltext_table() || return 0;
163
164 $success = 1;
165 }
166 elsif($success) { # database existed and loaded successfully, but
167 # before proceeding check that the current collection's tables exist
168
169 print STDERR "@@@ DATABASE $db_name EXISTED\n";
170
171
172 if($build_mode eq "removeold") {
173 $self->delete_collection_tables();
174 }
175
176 # use existing tables if any
177 # attempt to create tables in current db
178 if($build_mode eq "removeold" || !$self->table_exists($self->get_metadata_table_name())) {
179 $self->create_metadata_table() || return 0;
180 } else {
181 print STDERR "@@@ Meta table exists\n";
182 }
183 if($build_mode eq "removeold" || !$self->table_exists($self->get_fulltext_table_name())) {
184 $self->create_fulltext_table() || return 0;
185 } else {
186 print STDERR "@@@ Fulltxt table exists\n";
187 }
188
189 }
190
191 return $success; # could still return 0, if database failed to load with an error code != 1049
192}
193
194# this will terminate if the db does not exist
195# it will not attempt to create the requested db (nor its tables)
196sub use_db {
197 my $self= shift (@_);
198 my ($db_name) = @_;
199 my $dbh = $self->{'db_handle'};
200
201 # perl DBI switch database: https://www.perlmonks.org/?node_id=995434
202 # do() returns undef on error.
203 # connection succeeded, try to load our database. If that didn't work, attempt to create db
204 return $dbh->do("use $db_name") || warn();
205}
206
207# disconnect from db - https://metacpan.org/pod/DBI#disconnect
208# TODO: make sure to have committed or rolled back before disconnect
209# and that you've call finish() on statement handles if any fetch remnants remain
210sub disconnect_from_db {
211 my $self= shift (@_);
212 my $dbh = $self->{'db_handle'};
213
214 # make sure any active stmt handles are finished
215 # NO: "When all the data has been fetched from a SELECT statement, the driver will automatically call finish for you. So you should not call it explicitly except when you know that you've not fetched all the data from a statement handle and the handle won't be destroyed soon."
216
217 #$meta_sth = $self->{'metadata_prepared_insert_statement_handle'};
218 #$txt_sth = $self->{'fulltxt_prepared_insert_statement_handle'};
219 #$meta_sth->finish() if($meta_sth);
220 #$txt_sth->finish() if($txt_sth);
221
222 my $rc = $dbh->disconnect or warn $dbh->errstr; # The handle is of little use after disconnecting. Possibly PrintError already prints a warning and this duplicates it?
223 return $rc;
224}
225
226sub create_db {
227 my $self= shift (@_);
228 my $db_name = $self->{'db_name'};
229 my $dbh = $self->{'db_handle'};
230
231 # https://stackoverflow.com/questions/5025768/how-can-i-create-a-mysql-database-from-a-perl-script
232 return $dbh->do("create database $db_name"); # do() will return undef on fail, https://metacpan.org/pod/DBI#do
233}
234
235
236sub create_metadata_table {
237 my $self= shift (@_);
238 my $dbh = $self->{'db_handle'};
239
240 my $table_name = $self->get_metadata_table_name();
241
242 # If using an auto incremented primary key:
243 my $stmt = "CREATE TABLE $table_name (id INT NOT NULL AUTO_INCREMENT, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, metaname VARCHAR(127) NOT NULL, metavalue VARCHAR(1023) NOT NULL, PRIMARY KEY(id));";
244 return $dbh->do($stmt);
245}
246
247# TODO: Investigate: https://dev.mysql.com/doc/search/?d=10&p=1&q=FULLTEXT
248# 12.9.1 Natural Language Full-Text Searches
249# to see whether we have to index the 'fulltxt' column of the 'fulltext' tables
250# or let user edit this file, or add it as another option
251sub create_fulltext_table {
252 my $self= shift (@_);
253 my $dbh = $self->{'db_handle'};
254
255 my $table_name = $self->get_fulltext_table_name();
256
257 # If using an auto incremented primary key:
258 my $stmt = "CREATE TABLE $table_name (id INT NOT NULL AUTO_INCREMENT, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, fulltxt LONGTEXT, PRIMARY KEY(id));";
259 return $dbh->do($stmt);
260
261}
262
263
264# USEFUL: https://metacpan.org/pod/DBI
265# "Many methods have an optional \%attr parameter which can be used to pass information to the driver implementing the method. Except where specifically documented, the \%attr parameter can only be used to pass driver specific hints. In general, you can ignore \%attr parameters or pass it as undef."
266
267
268# https://www.guru99.com/insert-into.html
269# and https://dev.mysql.com/doc/refman/8.0/en/example-auto-increment.html
270# for inserting multiple rows at once
271# https://www.perlmonks.org/bare/?node_id=316183
272# https://metacpan.org/pod/DBI#do
273# https://www.quora.com/What-is-the-difference-between-prepare-and-do-statements-in-Perl-while-we-make-a-connection-to-the-database-for-executing-the-query
274# https://docstore.mik.ua/orelly/linux/dbi/ch05_05.htm
275
276# https://metacpan.org/pod/DBI#performance
277# 'The q{...} style quoting used in this example avoids clashing with quotes that may be used in the SQL statement. Use the double-quote like qq{...} operator if you want to interpolate variables into the string. See "Quote and Quote-like Operators" in perlop for more details.'
278sub prepare_insert_metadata_row_stmthandle {
279 my $self = shift (@_);
280 #my ($did, $sid, $metaname, $metavalue) = @_;
281 my $dbh = $self->{'db_handle'};
282
283 my $tablename = $self->get_metadata_table_name();
284
285 #my $stmt = "INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES ('$did', '$sid', '$metaname', '$metavalue');"; # ?, ?, ?, ?
286
287 # using qq{} since we want $tablename placeholder to be filled in
288 # returns Statement Handle object!
289 my $sth = $dbh->prepare(qq{INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES (?, ?, ?, ?)}) || warn("Could not prepare insert statement for metadata table\n");
290
291 print STDERR "@@@@ Prepared meta insert statement: ".$sth->{'Statement'}."\n";
292
293 return $sth;
294}
295
296sub prepare_insert_fulltxt_row_stmthandle {
297 my $self = shift (@_);
298 #my ($did, $sid, $fulltext) = @_;
299 my $dbh = $self->{'db_handle'};
300
301 my $tablename = $self->get_fulltext_table_name();
302
303 #my $stmt = "INSERT INTO $tablename (did, sid, fulltxt) VALUES ('$did', '$sid', '$fulltext');"; ?, ?, ?
304
305 # using qq{} since we want $tablename placeholder to be filled in
306 # returns Statement Handle object!
307 my $sth = $dbh->prepare(qq{INSERT INTO $tablename (did, sid, fulltxt) VALUES (?, ?, ?)}) || warn("Could not prepare insert statement for fulltxt table\n");
308
309 print STDERR "@@@@ Prepared fulltext insert statement: ".$sth->{'Statement'}."\n";
310
311 return $sth;
312}
313
314# "IF EXISTS is used to prevent an error from occurring if the database does not exist. ... DROP DATABASE returns the number of tables that were removed. The DROP DATABASE statement removes from the given database directory those files and directories that MySQL itself may create during normal operation.Jun 20, 2012"
315#MySQL 8.0 Reference Manual :: 13.1.22 DROP DATABASE Syntax
316# https://dev.mysql.com/doc/en/drop-database.html
317sub delete_collection_tables {
318 my $self= shift (@_);
319 my $dbh = $self->{'db_handle'};
320
321 print STDERR "### Build mode is removeold, so deleting tables for current collection\n";
322
323 # drop table <tablename>
324 my $table = $self->get_metadata_table_name();
325 $dbh->do("drop table $table") || warn("@@@ Couldn't delete $table");
326 $table = $self->get_fulltext_table_name();
327 $dbh->do("drop table $table") || warn("@@@ Couldn't delete $table");
328}
329
330# Don't call this: it will delete the meta and full text tables for ALL collections in $db_name (localsite by default)!
331# this is just for debugging
332sub _delete_database {
333 my $self= shift (@_);
334 my ($db_name) = @_;
335 my $dbh = $self->{'db_handle'};
336
337 # "drop database dbname"
338 $dbh->do("drop database $db_name") || return 0;
339
340 return 1;
341}
342
343# More basic helper methods
344sub get_metadata_table_name {
345 my $self= shift (@_);
346 my $table_name = $self->{'collection_name'} . "_metadata";
347 return $table_name;
348}
349
350# FULLTEXT is a reserved keyword in (My)SQL. https://dev.mysql.com/doc/refman/5.5/en/keywords.html
351# So we can't name a table or any of its columns "fulltext". We use "fulltxt" instead.
352sub get_fulltext_table_name {
353 my $self= shift (@_);
354 my $table_name = $self->{'collection_name'} . "_fulltxt";
355 return $table_name;
356}
357
358# I can get my version of table_exists to work, but it's not so ideal
359# Interesting that MySQL has non-standard command to CREATE TABLE IF NOT EXISTS and DROP TABLE IF EXISTS,
360# see https://www.perlmonks.org/bare/?node=DBI%20Recipes
361# The page further has a table_exists function that could work with proper comparison
362# Couldn't get the first solution at https://www.perlmonks.org/bare/?node_id=500050 to work though
363sub table_exists {
364 my $self = shift (@_);
365 my $dbh = $self->{'db_handle'};
366 my ($table_name) = @_;
367
368 my @table_list = $dbh->tables;
369 #my $tables_str = @table_list[0];
370 foreach my $table (@table_list) {
371 return 1 if ($table =~ m/$table_name/);
372 }
373 return 0;
374}
375
3761;
Note: See TracBrowser for help on using the repository browser.