Ignore:
Timestamp:
2018-11-09T22:33:51+13:00 (5 years ago)
Author:
ak19
Message:

Major tidying up: last remaining debug statements, lots of comments, removed TODO lists.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/gsmysql.pm

    r32594 r32595  
    4141#################
    4242
    43 ##############################
    44 
    45 # TODO Q: If disconnect is automatically called when object destroyed, what does that mean
    46 # for our file-global handle object, is disconnect only called at end of perl process?
    47 # Does that mean we don't need to explicitly call disconnect in gsmysql object's destroy during
    48 # the GLOBAL destruction phase?
    49 # https://perldoc.perl.org/perlobj.html#Destructors
    50 
    51 #+ TODO: add infrastructure for db_port, AutoCommit etc
    52 # For port, see https://stackoverflow.com/questions/2248665/perl-script-to-connect-to-mysql-server-port-3307
    53 
    54 # + TODO: remove unnecessary warn() since PrintError is active
    55 
    56 # + TODO: drop table if exists and create table if exists are available in MySQL. Use those cmds
    57 # instead of always first checking for existence ourselves? Only when subclassing to specific
    58 # mysql class?
    59 
    60 
    61 # + TODO Q: What on cancelling a build: delete table? But what if it was a rebuild and the rebuild is cancelled (not the original build)?
    62 # Do we create a copy of the orig database as backup, then start populating current db, and if cancelled, delete current db and RENAME backup table to current?
    63 # https://stackoverflow.com/questions/3280006/duplicating-a-mysql-table-indexes-and-data
    64 # BUT what if the table is HUGE? (Think of a collection with millions of docs.) Huge overhead in copying?
    65 # The alternative is we just quit on cancel, but then: cancel could leave the table in a partial committed state, with no way of rolling back.
    66 # Unless they do a full rebuild, which will recreate the table from scratch?
    67 # SOLUTION-> rollback transaction on error, see https://www.effectiveperlprogramming.com/2010/07/set-custom-dbi-error-handlers/
    68 # In that case, should set AutoCommit to off on connection, and remember to commit at end.
    69 
    70 # + TODO: Consider AutoCommit status (and Autocommit off allowing commit or rollback for GS coll build cancel) later
    71 
    72 
    73 ##############################
    7443
    7544# singleton connection
     
    7746my $ref_count = 0;
    7847
    79 # Need params_map keys:
    80 # - collection_name
    81 # - db_encoding (db content encoding) - MySQL can set this at server, db, table levels. For MySQL
    82 # we set the enc during connect at server level. Not sure whether other DB's support it at the
    83 # same levels.
    84 
    85 # For connection to MySQL, need:
    86 #  - db_driver, db_client_user, db_client_pwd, db_host, (db_port not used at present)
    87 # So these will be parameterised, but in a hashmap, for just the connect method.
    88 
    89 # Parameterise (one or more methods may use them):
    90 # - db_name (which is the GS3 sitename, or "greenstone2" for GS2)
    91 
    92 # Database access related functions
    93 # http://g2pc1.bu.edu/~qzpeng/manual/MySQL%20Commands.htm
    94 # https://www.guru99.com/insert-into.html
    9548
    9649# Add signal handlers to cleanup and disconnect from db on sudden termination, incl cancel build
     
    11669        &gsprintf::gsprintf(STDERR, "{gsmysql.restore_backups_on_build_cancel_msg}\n");
    11770        print STDERR "****************************\n";
     71
     72        # TODO?
     73        # Since we'll be disconnecting (cancel -> die() -> dbi::DESTROY() -> dbi::disconnect()),
     74        # ensure $sth->finish() called on statement handles if any fetch remnants remain
    11875    }
    11976    }
     
    12279}
    12380
     81
     82# Need to pass in to constructor for creating member variables:
     83# - collection_name
     84# - verbosity
     85
     86# For connection to MySQL, need:
     87#  - db_driver, db_client_user, db_client_pwd, db_host, (db_port not used at present)
     88# So these will be parameterised, but in a hashmap, for just the connect() method.
     89
     90# Parameterise (one or more methods may use them):
     91# - db_name (which is the GS3 sitename, or "greenstone2" for GS2)
     92
     93# Don't need to parameterise the db_encoding (db content encoding)
     94# This is for now an internal variable, as the Greenstone db contents are always going to be utf8
     95# reflecting how their doc.xml counterparts should only contain utf8.
     96#
     97# - MySQL can set the desired db_encoding at server, db, table levels.
     98# Not sure whether other DBs support it at the same levels.
     99# For MySQL we set the enc during connect at server level.
     100#
    124101sub new
    125102
     
    128105    my ($params_map) = @_;
    129106   
    130     # library_url: to be specified on the cmdline if not using a GS-included web server
    131     # the GSDL_LIBRARY_URL env var is useful when running cmdline buildcol.pl in the linux package manager versions of GS3
    132107   
    133108    # https://stackoverflow.com/questions/7083453/copying-a-hashref-in-perl
     
    162137# For more on when destroy is called, see https://www.perlmonks.org/?node_id=1020920
    163138#
     139# However, database is automatically disconnected on DBI DESTROY method called by perl on
     140# a perl process' termination:
     141#
    164142# https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#disconnect
    165 # "Disconnects the database from the database handle. disconnect is typically only used before exiting the program. The handle is of little use after disconnecting.
     143# 'Disconnects the database from the database handle. disconnect is typically only used before exiting the program. The handle is of little use after disconnecting.
    166144#
    167145# The transaction behaviour of the disconnect method is, sadly, undefined. Some database systems (such as Oracle and Ingres) will automatically commit any outstanding changes, but others (such as Informix) will rollback any outstanding changes. Applications not using AutoCommit should explicitly call commit or rollback before calling disconnect.
     
    171149# Generally, if you want your changes to be committed or rolled back when you disconnect, then you should explicitly call "commit" or "rollback" before disconnecting.
    172150#
    173 # If you disconnect from a database while you still have active statement handles (e.g., SELECT statement handles that may have more data to fetch), you will get a warning. The warning may indicate that a fetch loop terminated early, perhaps due to an uncaught error. To avoid the warning call the finish method on the active handles."
     151# If you disconnect from a database while you still have active statement handles (e.g., SELECT statement handles that may have more data to fetch), you will get a warning. The warning may indicate that a fetch loop terminated early, perhaps due to an uncaught error. To avoid the warning call the finish method on the active handles.'
    174152#
    175153#
     
    193171        # will ensure disconnection still happens. It happens, but silently.
    194172        print STDERR "   Global Destruct Phase: DBI's own destructor will disconnect database\n";
     173
     174        # When we did the disconnection manually on premature termination:
    195175        #$_dbh_instance->disconnect or warn $_dbh_instance->errstr;
    196176        #$_dbh_instance = undef;
     
    211191################### BASIC DB OPERATIONS ##################
    212192
    213 # THE NEW DB FUNCTIONS
    214193# NOTE: FULLTEXT is a reserved keyword in (My)SQL. So we can't name a table or any of its columns "fulltext".
    215194# https://dev.mysql.com/doc/refman/5.5/en/keywords.html
     
    239218
    240219# SINGLETON METHOD #
    241 # TODO: where should the defaults for these params be, here or in GS-SQLPlugin/Plugout?
    242220sub _get_connection_instance
    243221{
     
    247225   
    248226    return $_dbh_instance if($_dbh_instance);
    249     # or make the connection
     227    # or else make the connection, as happens below
    250228
    251229
     
    300278    # More: https://www.oreilly.com/library/view/programming-the-perl/1565926994/re44.html
    301279    my $autocommit = (defined $params_map->{'autocommit'}) ? $params_map->{'autocommit'} : 1;
     280
     281    # Useful: https://www.effectiveperlprogramming.com/2010/07/set-custom-dbi-error-handlers/
    302282   
    303283    my $dbh = DBI->connect("$connect_str", $db_user, $db_pwd,
     
    307287                   RaiseError => 0, # off by default, but being explicit
    308288                   AutoCommit => $autocommit,
    309                    mysql_enable_utf8mb4 => 1 # tells MySQL to use UTF-8 for communication and tells DBD::mysql to decode the data, see https://stackoverflow.com/questions/46727362/perl-mysql-utf8mb4-issue-possible-bug
     289                   mysql_enable_utf8mb4 => 1 # tells MySQL to use (4 byte) UTF-8 for
     290                   # communication and tells DBD::mysql to use it to decode the data,
     291                   # see https://stackoverflow.com/questions/46727362/perl-mysql-utf8mb4-issue-possible-bug
    310292               });
    311293
     
    328310    # To set up the db for utf8mb4, therefore,
    329311    # the MySQL server needs to be configured for that char encoding by running the server as:
    330     # mysql-5.7.23-linux-glibc2.12-x86_64/bin>./mysqld_safe --datadir=/Scratch/ak19/mysql/data --character_set_server=utf8mb4
     312    # mysql/mysql-5.7.23-linux-glibc2.12-x86_64/bin>./mysqld_safe --datadir=/PATHTO/mysql/data --character_set_server=utf8mb4
    331313    # AND when connecting to the server, we can can either set mysql_enable_utf8mb4 => 1
    332314    # as a connection option
     
    337319    # is no more objectionable. It has the advantage of cutting out the 2 extra lines of doing
    338320    # set NAMES '<enc>' and $dbh->{mysql_enable_utf8mb4} = 1 here.
    339     # These lines may be preferred if more db_driver options are to be supported in future:
    340     # then a separate method called set_db_encoding($enc) can work out what db_driver we're using
    341     # and if mysql and enc=utfy, then it can do the following whereas it will issue other do stmts
    342     # for other db_drivers, see https://www.perlmonks.org/?node_id=259456:
     321    # These lines may be preferred if more db_driver options are to be supported in future?
     322    # (see https://www.perlmonks.org/?node_id=259456)
    343323   
    344324    #my $stmt = "set NAMES '" . $db_enc . "'";
     
    364344    if($ref_count == 0) { # Only commit transaction when we're about to actually disconnect, not before
    365345   
    366     # + TODO: If AutoCommit was off, meaning transactions were on/enabled,
     346    # If AutoCommit was off, meaning transactions were on/enabled,
    367347    # then here is where we commit our one long transaction.
    368348    # https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#commit
     
    383363# Call this method on die(), so that you're sure the perl process has disconnected from SQL db
    384364# Disconnect from db - https://metacpan.org/pod/DBI#disconnect
    385 # TODO: make sure to have committed or rolled back before disconnect
    386 # and that you've call finish() on statement handles if any fetch remnants remain
     365# Make sure to have committed or rolled back before disconnect
    387366sub _force_disconnect_from_db {
    388367    my $self= shift (@_);
     
    403382
    404383
    405 # Load the designated database, i.e. 'use <dbname>;'.
     384# Loads the designated database, i.e. 'use <dbname>;'.
    406385# If the database doesn't yet exist, creates it and loads it.
    407386# (Don't create the collection's tables yet, though)
     
    590569# 'The q{...} style quoting used in this example avoids clashing with quotes that may be used in the SQL statement. Use the double-quote like qq{...} operator if you want to interpolate variables into the string. See "Quote and Quote-like Operators" in perlop for more details.'
    591570#
    592 # This method uses lazy loading to prepare the SQL insert stmt once for a table and store it,
    593 # then execute the (stored) statement each time it's needed for that table.
     571# Each insert method uses lazy loading to prepare the SQL insert stmts once for a table and
     572# store it, then executes the (stored) statement each time it's needed for that table.
    594573sub insert_row_into_metadata_table {
    595574    my $self = shift (@_);
     
    599578   
    600579    my $tablename = $self->get_metadata_table_name();
    601     my $sth = $dbh->prepare_cached(qq{INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES (?, ?, ?, ?)});# || warn("Could not prepare insert statement for metadata table\n");
     580    my $sth = $dbh->prepare_cached(qq{INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES (?, ?, ?, ?)});
    602581
    603582    # Now we're ready to execute the command, unless we're only debugging
     
    626605   
    627606    my $tablename = $self->get_fulltext_table_name();
    628     my $sth = $dbh->prepare_cached(qq{INSERT INTO $tablename (did, sid, fulltxt) VALUES (?, ?, ?)});# || warn("Could not prepare insert statement for fulltxt table\n");
     607    my $sth = $dbh->prepare_cached(qq{INSERT INTO $tablename (did, sid, fulltxt) VALUES (?, ?, ?)});
    629608   
    630609    # Now we're ready to execute the command, unless we're only debugging
     
    651630#   https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#selectall_arrayref
    652631#
    653 # Returns the statement handle that prepared and executed
     632# Returns the resulting records of preparing and executing
    654633# a "SELECT * FROM <COLL>_metadata WHERE did = $oid" SQL statement.
    655 # Caller can call fetchrow_array() on returned statement handle, $sth
    656634# Have to use prepare() and execute() instead of do() since do() does
    657635# not allow for fetching result set thereafter:
     
    682660
    683661# See select_from_metatable_matching_docid() above.
    684 # Returns the statement handle that prepared and executed
    685 # a "SELECT * FROM <COLL>_metadata WHERE did = $oid" SQL statement.
    686 # Caller can call fetchrow_array() on returned statement handle, $sth
     662# Returns the resulting records from preparing and executing
     663# a "SELECT * FROM <COLL>_fulltxt WHERE did = $oid" SQL statement.
    687664sub select_from_texttable_matching_docid {
    688665    my $self= shift (@_);
     
    720697}
    721698
    722 # delete all records in metatable with specified docid
     699# delete all records in fulltxt table with the specified docid
    723700sub delete_recs_from_texttable_with_docid {
    724701    my $self= shift (@_);
     
    758735
    759736# Attempt to make sure the name parameter (for db or table name) is acceptable syntax
    760 # for the db in question, e.g. for mysql. For example, (My)SQL doesn't like tables or
     737# for the db in question, e.g. for mysql. For example, MySQL doesn't like tables or
    761738# databases with '-' (hyphens) in their names
    762739sub sanitize_name {
     
    768745
    769746
     747# MySQL has non-standard command to CREATE TABLE IF NOT EXISTS and DROP TABLE IF EXISTS, using that.
     748# See https://www.perlmonks.org/bare/?node=DBI%20Recipes
     749#    The page further has a table_exists function that could work with proper comparison
     750# Couldn't get the first solution at https://www.perlmonks.org/bare/?node_id=500050 to work though
    770751# I can get my version of table_exists to work, but it's not so ideal
    771 # Interesting that MySQL has non-standard command to CREATE TABLE IF NOT EXISTS and DROP TABLE IF EXISTS,
    772 # see https://www.perlmonks.org/bare/?node=DBI%20Recipes
    773 #    The page further has a table_exists function that could work with proper comparison
    774 # TODO Q: Couldn't get the first solution at https://www.perlmonks.org/bare/?node_id=500050 to work though
    775752sub table_exists {
    776753    my $self = shift (@_);
Note: See TracChangeset for help on using the changeset viewer.