Changeset 32529

Show
Ignore:
Timestamp:
19.10.2018 20:42:08 (4 weeks ago)
Author:
ak19
Message:

Split the database functions into their own file gssql.pm, so that GreenstoneSQLPlugin can share some db releated code used by GreenstoneSQLPlugout.

Location:
main/trunk/greenstone2/perllib
Files:
1 added
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugouts/GreenstoneSQLPlugout.pm

    r32527 r32529  
    3232no strict 'subs'; 
    3333 
    34 use util; 
    35 use FileUtils; 
    3634use GreenstoneXMLPlugout; 
    3735use docprint; 
     36use gssql; 
    3837 
    3938use DBI; # the central package for this plugout 
    4039 
    41 use IPC::Open2; 
    42 use POSIX ":sys_wait_h"; # for waitpid, http://perldoc.perl.org/functions/waitpid.html 
    43  
    4440 
    4541# TODO: SIGTERM rollback and disconnect? 
    46  
    47 # TODO: rename class to GreenstoneSQLPlugout 
    4842 
    4943 
     
    5347} 
    5448 
     49# NOTTODO: die() statements need to be replaced with premature_termination 
     50# which should ensure the GreenstoneXMLPlugin (group)'s stuff is closed and cleaned up SOMEHOW 
     51# It's fine: the die() stmts all take place before setting up the super class' begin 
    5552 
    5653# TODO: deal with -removeold and everything? Or type out instructions for user 
     
    118115    ############ LOAD NECESSARY OPTIONS ########### 
    119116    print STDERR "########## COLLECTION: ". $ENV{'GSDLCOLLECTION'}."\n"; 
    120     $self->{'collection_name'} = $ENV{'GSDLCOLLECTION'}; 
     117    #$self->{'collection_name'} = $ENV{'GSDLCOLLECTION'}; 
    121118    print STDERR "***** GreenstoneSQLPlugout process mode = \"", $self->{'process_mode'}, "\"\n"; 
    122      
    123     if(!$self->connect_to_db()) { 
     119 
     120    my $db_params = { 
     121    'collection_name' => $ENV{'GSDLCOLLECTION'}, 
     122    'db_driver' => $self->{'db_driver'}, 
     123    'db_client_user' => $self->{'db_client_user'}, 
     124    'db_client_pwd' => $self->{'db_client_pwd'}, 
     125    'db_host' => $self->{'db_host'}, 
     126    'db_encoding' => $self->{'db_encoding'} 
     127    #'db_name' => $self->{'site_name'}, 
     128    #'build_mode' => $self->{'build_mode'}, 
     129    }; 
     130 
     131    my $gs_sql = new gssql($db_params); 
     132     
     133    if(!$gs_sql->connect_to_db()) { 
    124134    # This is fatal for the plugout, let's terminate here 
    125135    # PrintError would already have displayed the warning message on connection fail     
     
    129139    my $db_name = $self->{'site_name'} || "localsite"; # one database per GS3 site 
    130140    my $build_mode = $self->{'build_mode'} || "removeold"; 
    131     if(!$self->load_db_and_tables($db_name, $build_mode)) { 
     141    if(!$gs_sql->load_db_and_tables($db_name, $build_mode)) { 
    132142     
    133143    # This is fatal for the plugout, let's terminate here 
     
    140150    my $proc_mode = $self->{'process_mode'}; 
    141151    if($proc_mode eq "all" || $proc_mode eq "meta_only" ) { 
    142     $self->{'metadata_prepared_insert_statement_handle'} = $self->prepare_insert_metadata_row_stmthandle(); 
     152    $self->{'metadata_prepared_insert_statement_handle'} = $gs_sql->prepare_insert_metadata_row_stmthandle(); 
    143153    } 
    144154    if($proc_mode eq "all" || $proc_mode eq "text_only" ) { 
    145     $self->{'fulltxt_prepared_insert_statement_handle'} = $self->prepare_insert_fulltxt_row_stmthandle(); 
    146     } 
    147  
     155    $self->{'fulltxt_prepared_insert_statement_handle'} = $gs_sql->prepare_insert_fulltxt_row_stmthandle(); 
     156    } 
     157 
     158    # store the DBI wrapper instance 
     159    $self->{'gs_sql'} = $gs_sql; 
     160     
    148161    print STDERR "#### Meta stmt: " . $self->{'metadata_prepared_insert_statement_handle'}->{'Statement'} . "\n"; 
    149162    print STDERR "#### Full stmt: " . $self->{'fulltxt_prepared_insert_statement_handle'}->{'Statement'} . "\n"; 
     
    164177    $self->SUPER::end(@_);     
    165178     
    166     $self->disconnect_from_db() || warn("Unable to disconnect from database " . $self->{'site_name'} . "\n"); # disconnect_from_db() will also issue a warning, but this may be clearer 
     179    $self->{'gs_sql'}->disconnect_from_db() || warn("Unable to disconnect from database " . $self->{'site_name'} . "\n"); # disconnect_from_db() will also issue a warning, but this may be clearer 
    167180} 
    168181  
     
    304317} 
    305318 
    306 ################################# 
    307  
    308 # Database access related functions 
    309 # http://g2pc1.bu.edu/~qzpeng/manual/MySQL%20Commands.htm 
    310 # https://www.guru99.com/insert-into.html 
    311  
    312 # TODO Q: What on cancelling a build: delete table? But what if it was a rebuild and the rebuild is cancelled (not the original build)? 
    313 # Do we create a copy of the orig database as backup, then start populating current db, and if cancelled, delete current db and RENAME backup table to current?  
    314 # https://stackoverflow.com/questions/3280006/duplicating-a-mysql-table-indexes-and-data 
    315 # BUT what if the table is HUGE? (Think of a collection with millions of docs.) Huge overhead in copying? 
    316 # The alternative is we just quit on cancel, but then: cancel could leave the table in a partial committed state, with no way of rolling back. 
    317 # Unless they do a full rebuild, which will recreate the table from scratch? 
    318 # SOLUTION-> rollback transaction on error, see https://www.effectiveperlprogramming.com/2010/07/set-custom-dbi-error-handlers/ 
    319 # But then should set AutoCommit to off on connection, and remember to commit every time 
    320  
    321 ################# 
    322 # Database functions that use the perl DBI module (with the DBD driver module for mysql) 
    323 ################# 
    324  
    325 # THE NEW DB FUNCTIONS 
    326 # NOTE: FULLTEXT is a reserved keyword in (My)SQL. So we can't name a table or any of its columns "fulltext". 
    327 # https://dev.mysql.com/doc/refman/5.5/en/keywords.html 
    328  
    329 # TODO: Consider AutoCommit status (and Autocommit off allowing commit or rollback for GS coll build cancel) later 
    330  
    331 sub connect_to_db { 
    332     my $self= shift (@_); 
    333      
    334     my $db_driver = $self->{'db_driver'}; 
    335     my $db_user = $self->{'db_client_user'} || "root"; 
    336     my $db_pwd = $self->{'db_client_pwd'}; 
    337     my $db_host = $self->{'db_host'} || "127.0.0.1"; 
    338     my $db_enc = $self->{'db_encoding'} || "utf8"; 
    339      
    340     #my $db_name = $self->{'site_name'}; 
    341      
    342     # try connecting to the mysql db, if that fails it will die 
    343     # so don't bother preparing GreenstoneXMLPlugout by calling superclass' begin() 
    344  
    345     # localhost doesn't work for us, but 127.0.0.1 works 
    346     # https://metacpan.org/pod/DBD::mysql 
    347     # "The hostname, if not specified or specified as '' or 'localhost', will default to a MySQL server 
    348     # running on the local machine using the default for the UNIX socket. To connect to a MySQL server 
    349     # on the local machine via TCP, you must specify the loopback IP address (127.0.0.1) as the host." 
    350     #my $connect_str = "dbi:$db_driver:database=$db_name;host=$db_host"; 
    351     my $connect_str = "dbi:$db_driver:host=$db_host"; # don't provide db, so we can check the db is there 
    352     my $dbh = DBI->connect("$connect_str", $db_user, $db_pwd, 
    353                { 
    354                    ShowErrorStatement => 1, # more informative as DBI will append failed SQL stmt to error message 
    355                    PrintError => 1, # on by default, but being explicit 
    356                    RaiseError => 0, # off by default, but being explicit 
    357                    AutoCommit => 1 # on by default, but being explicit 
    358                }); 
    359  
    360     if(!$dbh) { 
    361     # NOTE, despite handle dbh being undefined, error code will be in DBI->err 
    362     return 0;    
    363     } 
    364  
    365     # set encoding https://metacpan.org/pod/DBD::mysql 
    366     # https://dev.mysql.com/doc/refman/5.7/en/charset.html 
    367     # https://dev.mysql.com/doc/refman/5.7/en/charset-conversion.html 
    368     # Setting the encoding at db server level. 
    369     # Not sure if this command is mysql specific: 
    370     my $stmt = "set NAMES '" . $db_enc . "'"; 
    371     $dbh->do($stmt) || warn("Unable to set charset encoding at db server level to: " . $db_enc . "\n"); 
    372      
    373     # if we're here, then connection succeeded, store handle 
    374     $self->{'db_handle'} = $dbh; 
    375     return 1; 
    376 } 
    377  
    378 sub load_db_and_tables { 
    379     my $self= shift (@_); 
    380     my ($db_name, $build_mode) = @_; 
    381     my $dbh = $self->{'db_handle'}; 
    382      
    383     # perl DBI switch database: https://www.perlmonks.org/?node_id=995434 
    384     # do() returns undef on error. 
    385     # connection succeeded, try to load our database. If that didn't work, attempt to create db 
    386     my $success = $dbh->do("use $db_name"); 
    387      
    388     if(!$success && $dbh->err == 1049) { # "Unknown database" error has code 1049 (mysql only?) meaning db doesn't exist yet 
    389     # attempt to create the db and its tables 
    390     $self->create_db($db_name) || return 0; 
    391  
    392     print STDERR "@@@ CREATED DATABASE $db_name\n"; 
    393      
    394     # once more attempt to use db, now that it exists 
    395     $dbh->do("use $db_name") || return 0; 
    396     #$dbh->do("use localsite") or die "Error (code" . $dbh->err ."): " . $dbh->errstr . "\n"; 
    397  
    398     # attempt to create tables in current db 
    399     $self->create_metadata_table() || return 0; 
    400     $self->create_fulltext_table() || return 0;  
    401  
    402     $success = 1; 
    403     } 
    404     elsif($success) { # database existed and loaded successfully, but 
    405     # before proceeding check that the current collection's tables exist 
    406  
    407     print STDERR "@@@ DATABASE $db_name EXISTED\n"; 
    408      
    409  
    410     if($build_mode eq "removeold") { 
    411         $self->delete_collection_tables(); 
    412     } 
    413  
    414     # use existing tables if any 
    415     # attempt to create tables in current db     
    416     if($build_mode eq "removeold" || !$self->table_exists($self->get_metadata_table_name())) { 
    417         $self->create_metadata_table() || return 0; 
    418     } else { 
    419         print STDERR "@@@ Meta table exists\n"; 
    420     } 
    421     if($build_mode eq "removeold" || !$self->table_exists($self->get_fulltext_table_name())) { 
    422         $self->create_fulltext_table() || return 0; 
    423     } else { 
    424         print STDERR "@@@ Fulltxt table exists\n"; 
    425     } 
    426      
    427     } 
    428      
    429     return $success; # could still return 0, if database failed to load with an error code != 1049 
    430 } 
    431  
    432 # disconnect from db - https://metacpan.org/pod/DBI#disconnect 
    433 # TODO: make sure to have committed or rolled back before disconnect 
    434 # and that you've call finish() on statement handles if any fetch remnants remain 
    435 sub disconnect_from_db { 
    436     my $self= shift (@_);     
    437     my $dbh = $self->{'db_handle'}; 
    438  
    439     # make sure any active stmt handles are finished 
    440     # NO: "When all the data has been fetched from a SELECT statement, the driver will automatically call finish for you. So you should not call it explicitly except when you know that you've not fetched all the data from a statement handle and the handle won't be destroyed soon." 
    441      
    442     #$meta_sth = $self->{'metadata_prepared_insert_statement_handle'}; 
    443     #$txt_sth = $self->{'fulltxt_prepared_insert_statement_handle'}; 
    444     #$meta_sth->finish() if($meta_sth); 
    445     #$txt_sth->finish() if($txt_sth); 
    446      
    447     my $rc = $dbh->disconnect or warn $dbh->errstr; # The handle is of little use after disconnecting. Possibly PrintError already prints a warning and this duplicates it? 
    448     return $rc; 
    449 } 
    450  
    451 sub create_db { 
    452     my $self= shift (@_); 
    453     my $db_name = $self->{'site_name'}; 
    454     my $dbh = $self->{'db_handle'}; 
    455      
    456     # https://stackoverflow.com/questions/5025768/how-can-i-create-a-mysql-database-from-a-perl-script 
    457     return $dbh->do("create database $db_name"); # do() will return undef on fail, https://metacpan.org/pod/DBI#do 
    458 } 
    459  
    460  
    461 sub create_metadata_table { 
    462     my $self= shift (@_); 
    463     my $dbh = $self->{'db_handle'}; 
    464      
    465     my $table_name = $self->get_metadata_table_name(); 
    466  
    467     # If using an auto incremented primary key: 
    468     my $stmt = "CREATE TABLE $table_name (id INT NOT NULL AUTO_INCREMENT, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, metaname VARCHAR(127) NOT NULL, metavalue VARCHAR(1023) NOT NULL, PRIMARY KEY(id));"; 
    469     return $dbh->do($stmt); 
    470 } 
    471  
    472 # TODO: Investigate: https://dev.mysql.com/doc/search/?d=10&p=1&q=FULLTEXT 
    473 # 12.9.1 Natural Language Full-Text Searches 
    474 # to see whether we have to index the 'fulltxt' column of the 'fulltext' tables 
    475 # or let user edit this file, or add it as another option 
    476 sub create_fulltext_table { 
    477     my $self= shift (@_); 
    478     my $dbh = $self->{'db_handle'}; 
    479      
    480     my $table_name = $self->get_fulltext_table_name(); 
    481  
    482     # If using an auto incremented primary key: 
    483     my $stmt = "CREATE TABLE $table_name (id INT NOT NULL AUTO_INCREMENT, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, fulltxt LONGTEXT, PRIMARY KEY(id));"; 
    484     return $dbh->do($stmt); 
    485  
    486 } 
    487  
    488  
    489 # USEFUL: https://metacpan.org/pod/DBI 
    490 # "Many methods have an optional \%attr parameter which can be used to pass information to the driver implementing the method. Except where specifically documented, the \%attr parameter can only be used to pass driver specific hints. In general, you can ignore \%attr parameters or pass it as undef." 
    491  
    492  
    493 # https://www.guru99.com/insert-into.html 
    494 # and https://dev.mysql.com/doc/refman/8.0/en/example-auto-increment.html 
    495 #     for inserting multiple rows at once 
    496 # https://www.perlmonks.org/bare/?node_id=316183 
    497 # https://metacpan.org/pod/DBI#do 
    498 # https://www.quora.com/What-is-the-difference-between-prepare-and-do-statements-in-Perl-while-we-make-a-connection-to-the-database-for-executing-the-query 
    499 # https://docstore.mik.ua/orelly/linux/dbi/ch05_05.htm 
    500  
    501 # https://metacpan.org/pod/DBI#performance 
    502 # 'The q{...} style quoting used in this example avoids clashing with quotes that may be used in the SQL statement. Use the double-quote like qq{...} operator if you want to interpolate variables into the string. See "Quote and Quote-like Operators" in perlop for more details.' 
    503 sub prepare_insert_metadata_row_stmthandle { 
    504     my $self = shift (@_);     
    505     #my ($did, $sid, $metaname, $metavalue) = @_; 
    506     my $dbh = $self->{'db_handle'}; 
    507      
    508     my $tablename = $self->get_metadata_table_name(); 
    509  
    510     #my $stmt = "INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES ('$did', '$sid', '$metaname', '$metavalue');"; # ?, ?, ?, ? 
    511  
    512     # using qq{} since we want $tablename placeholder to be filled in 
    513     # returns Statement Handle object! 
    514     my $sth = $dbh->prepare(qq{INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES (?, ?, ?, ?)}) || warn("Could not prepare insert statement for metadata table\n"); 
    515  
    516     print STDERR "@@@@ Prepared meta insert statement: ".$sth->{'Statement'}."\n"; 
    517      
    518     return $sth; 
    519 } 
    520  
    521 sub prepare_insert_fulltxt_row_stmthandle { 
    522     my $self = shift (@_); 
    523     #my ($did, $sid, $fulltext) = @_; 
    524     my $dbh = $self->{'db_handle'}; 
    525      
    526     my $tablename = $self->get_fulltext_table_name(); 
    527  
    528     #my $stmt = "INSERT INTO $tablename (did, sid, fulltxt) VALUES ('$did', '$sid', '$fulltext');"; ?, ?, ? 
    529  
    530     # using qq{} since we want $tablename placeholder to be filled in 
    531     # returns Statement Handle object! 
    532     my $sth = $dbh->prepare(qq{INSERT INTO $tablename (did, sid, fulltxt) VALUES (?, ?, ?)}) || warn("Could not prepare insert statement for fulltxt table\n"); 
    533      
    534     print STDERR "@@@@ Prepared fulltext insert statement: ".$sth->{'Statement'}."\n"; 
    535      
    536     return $sth; 
    537 } 
    538  
    539 # "IF EXISTS is used to prevent an error from occurring if the database does not exist. ... DROP DATABASE returns the number of tables that were removed. The DROP DATABASE statement removes from the given database directory those files and directories that MySQL itself may create during normal operation.Jun 20, 2012" 
    540 #MySQL 8.0 Reference Manual :: 13.1.22 DROP DATABASE Syntax 
    541 # https://dev.mysql.com/doc/en/drop-database.html 
    542 sub delete_collection_tables { 
    543     my $self= shift (@_); 
    544     my $dbh = $self->{'db_handle'}; 
    545      
    546     print STDERR "### Build mode is removeold, so deleting tables for current collection\n"; 
    547      
    548     # drop table <tablename> 
    549     my $table = $self->get_metadata_table_name(); 
    550     $dbh->do("drop table $table") || warn("@@@ Couldn't delete $table"); 
    551     $table = $self->get_fulltext_table_name(); 
    552     $dbh->do("drop table $table") || warn("@@@ Couldn't delete $table"); 
    553 } 
    554  
    555 # Don't call this: it will delete the meta and full text tables for ALL collections in $db_name (localsite by default)! 
    556 # this is just for debugging 
    557 sub _delete_database { 
    558     my $self= shift (@_); 
    559     my ($db_name) = @_; 
    560     my $dbh = $self->{'db_handle'}; 
    561      
    562     # "drop database dbname" 
    563     $dbh->do("drop database $db_name") || return 0; 
    564  
    565     return 1; 
    566 } 
    567  
    568 # More basic helper methods 
    569 sub get_metadata_table_name { 
    570     my $self= shift (@_); 
    571     my $table_name = $self->{'collection_name'} . "_metadata"; 
    572     return $table_name; 
    573 } 
    574  
    575 # FULLTEXT is a reserved keyword in (My)SQL. https://dev.mysql.com/doc/refman/5.5/en/keywords.html 
    576 # So we can't name a table or any of its columns "fulltext". We use "fulltxt" instead. 
    577 sub get_fulltext_table_name { 
    578     my $self= shift (@_); 
    579     my $table_name = $self->{'collection_name'} . "_fulltxt"; 
    580     return $table_name; 
    581 } 
    582  
    583 # I can get my version of table_exists to work, but it's not so ideal 
    584 # Interesting that MySQL has non-standard command to CREATE TABLE IF NOT EXISTS and DROP TABLE IF EXISTS,  
    585 # see https://www.perlmonks.org/bare/?node=DBI%20Recipes 
    586 #    The page further has a table_exists function that could work with proper comparison 
    587 # Couldn't get the first solution at https://www.perlmonks.org/bare/?node_id=500050 to work though 
    588 sub table_exists { 
    589     my $self = shift (@_); 
    590     my $dbh = $self->{'db_handle'}; 
    591     my ($table_name) = @_; 
    592  
    593     my @table_list = $dbh->tables; 
    594     #my $tables_str = @table_list[0]; 
    595     foreach my $table (@table_list) { 
    596     return 1 if ($table =~ m/$table_name/); 
    597     } 
    598     return 0; 
    599 } 
    600319 
    6013201;