Changeset 32578

Show
Ignore:
Timestamp:
06.11.2018 19:26:08 (2 weeks ago)
Author:
ak19
Message:

Optimising. The gssql class internally has only one shared connection to the db, making the connection only the first time and disconnecting only when the last gssql is finished(). For keywords: this is implemented using the singleton coding (anti-) pattern. Now each perl process (import or buildcol) will connect to the SQL DB only once, not twice during import where it used to be once for GS SQL plugout and once for GSSQL plugin.

Location:
main/trunk/greenstone2/perllib
Files:
3 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/gssql.pm

    r32576 r32578  
    3333use DBI; # the central package for this module used by GreenstoneSQL Plugout and Plugin 
    3434 
     35# singleton connection 
     36my $_dbh_instance = undef; # calls undef() function. See https://perlmaven.com/undef-and-defined-in-perl 
     37my $ref_count = 0; 
     38 
    3539# Need params_map keys: 
    3640# - collection_name 
     
    4953# For port, see https://stackoverflow.com/questions/2248665/perl-script-to-connect-to-mysql-server-port-3307 
    5054 
     55# TODO: remove unnecessary warn() since PrintError is active 
     56# https://perldoc.perl.org/perlobj.html#Destructors 
     57 
     58# TODO: drop table if exists and create table if exists are available in MySQL. Use those cmds 
     59# instead of always first checking for existence ourselves? 
     60 
    5161sub new 
    5262 
     
    8090    return $self; 
    8191} 
     92 
    8293 
    8394 
     
    109120# TODO: Consider AutoCommit status (and Autocommit off allowing commit or rollback for GS coll build cancel) later 
    110121 
     122 
     123 
     124# SINGLETON / GET INSTANCE PATTERN 
     125# https://stackoverflow.com/questions/16655603/perl-objects-class-variable-initialization 
     126# https://stackoverflow.com/questions/7587157/how-can-i-set-a-static-variable-that-can-be-accessed-by-all-subclasses-of-the-sa 
     127# Singleton without Moose: https://www.perl.com/article/52/2013/12/11/Implementing-the-singleton-pattern-in-Perl/ 
     128 
     129sub connect_to_db 
     130{ 
     131    my $self= shift (@_); 
     132    my ($params_map) = @_; 
     133     
     134    $params_map->{'db_encoding'} = $self->{'db_encoding'}; 
     135    $params_map->{'verbosity'} = $self->{'verbosity'}; 
     136     
     137    $self->{'db_handle'} = &_get_connection_instance($params_map); # getting singleton (class method) 
     138    if($self->{'db_handle'}) { 
     139    $ref_count++; # if successful, keep track of the number of refs to the single db connection 
     140    return $self->{'db_handle'}; 
     141    } 
     142    return undef; 
     143} 
     144 
     145# SINGLETON METHOD # 
    111146# TODO: where should the defaults for these params be, here or in GS-SQLPlugin/Plugout? 
    112 sub connect_to_db { 
    113     my $self= shift (@_); 
     147sub _get_connection_instance 
     148{ 
     149    #my $self= shift (@_); # singleton method doesn't use self, but callers don't need to know that 
    114150    my ($params_map) = @_; 
     151 
     152    return $_dbh_instance if($_dbh_instance); 
     153 
     154    # or make the connection 
     155     
    115156    # For proper utf8 support in MySQL, encoding should be 'utf8mb4' as 'utf8' is insufficient 
    116     my $db_enc = "utf8mb4" if $self->{'db_encoding'} eq "utf8"; 
     157    my $db_enc = "utf8mb4" if $params_map->{'db_encoding'} eq "utf8"; 
    117158 
    118159    # these are the params for connecting to MySQL 
     
    130171    my $connect_str = "dbi:$db_driver:host=$db_host"; # don't provide db - allows checking the db exists later when loading the db 
    131172 
    132     if($self->{'verbosity'}) { 
     173    if($params_map->{'verbosity'}) { 
    133174    print STDERR "Away to make connection to $db_driver database with:\n"; 
    134175    print STDERR " - hostname $db_host; username: $db_user"; 
     
    183224     
    184225    # if we're here, then connection succeeded, store handle 
    185     $self->{'db_handle'} = $dbh; 
    186     return 1; 
    187 } 
     226    $_dbh_instance = $dbh; 
     227    return $_dbh_instance; 
     228     
     229} 
     230 
     231# Will disconnect if this instance of gssql holds the last reference to the db connection 
     232sub finished() { 
     233    my $self= shift (@_); 
     234 
     235    $ref_count--; 
     236    if($ref_count == 0) { 
     237    $self->force_disconnect_from_db(); 
     238    }     
     239} 
     240 
     241# Call this method on die(), so that you're sure the perl process has disconnected from SQL db 
     242# Disconnect from db - https://metacpan.org/pod/DBI#disconnect 
     243# TODO: make sure to have committed or rolled back before disconnect 
     244# and that you've call finish() on statement handles if any fetch remnants remain 
     245sub force_disconnect_from_db { 
     246    my $self= shift (@_); 
     247 
     248    if($_dbh_instance) { 
     249    # make sure any active stmt handles are finished 
     250    # NO: "When all the data has been fetched from a SELECT statement, the driver will automatically call finish for you. So you should not call it explicitly except when you know that you've not fetched all the data from a statement handle and the handle won't be destroyed soon." 
     251     
     252    print STDERR "    GSSQL disconnecting from database\n"; 
     253    # Just go through the singleton db handle to disconnect 
     254    $_dbh_instance->disconnect or warn $_dbh_instance->errstr; 
     255    $_dbh_instance = undef; 
     256    } 
     257    # Number of gssql objects that share a live connection is now 0, as the connection's dead 
     258    # either because the last gssql object finished() or because connection was killed (force) 
     259    $ref_count = 0; 
     260} 
     261 
    188262 
    189263# Load the designated database, i.e. 'use <dbname>;'. 
     
    260334} 
    261335 
    262 # disconnect from db - https://metacpan.org/pod/DBI#disconnect 
    263 # TODO: make sure to have committed or rolled back before disconnect 
    264 # and that you've call finish() on statement handles if any fetch remnants remain 
    265 sub disconnect_from_db { 
    266     my $self= shift (@_);     
    267     my $dbh = $self->{'db_handle'}; 
    268  
    269     # make sure any active stmt handles are finished 
    270     # NO: "When all the data has been fetched from a SELECT statement, the driver will automatically call finish for you. So you should not call it explicitly except when you know that you've not fetched all the data from a statement handle and the handle won't be destroyed soon." 
    271      
    272     print STDERR "Disconnecting from database\n" if($self->{'verbosity'} > 1); 
    273      
    274     my $rc = $dbh->disconnect or warn $dbh->errstr; # The handle is of little use after disconnecting. Possibly PrintError already prints a warning and this duplicates it? 
    275     return $rc; 
    276 } 
    277336 
    278337sub create_db { 
  • main/trunk/greenstone2/perllib/plugins/GreenstoneSQLPlugin.pm

    r32577 r32578  
    250250    #return undef unless $self->can_process_this_file($file); # NO, DON'T DO THIS (inherited remove_one behaviour) HERE:  
    251251           # WE DON'T CARE IF IT'S AN IMAGE FILE THAT WAS DELETED. 
    252            # WE CARE ABOUT REMOVING THE DOCOID OF THAT IMAGE FILE FROM THE SQL DB 
     252           # WE CARE ABOUT REMOVING THE DOC_OID OF THAT IMAGE FILE FROM THE SQL DB 
    253253           # SO DON'T RETURN IF CAN'T_PROCESS_THIS_FILE 
    254254     
     
    430430               }); 
    431431 
    432     # try connecting to the mysql db, if that fails it will die 
     432    # try connecting to the mysql db, die if that fails 
    433433    if(!$gs_sql->connect_to_db({ 
    434434    'db_driver' => $self->{'db_driver'}, 
     
    452452    # This is fatal for the plugout, let's terminate here after disconnecting again 
    453453    # PrintError would already have displayed the warning message on load fail 
    454     $gs_sql->disconnect_from_db() 
    455         || warn("Unable to disconnect from database.\n"); 
     454    $gs_sql->force_disconnect_from_db(); 
    456455    die("Could not use db $db_name. Can't proceed.\n"); 
    457456    } 
     
    475474    if($self->{'gs_sql'}) { # only want to work with sql db if buildcol.pl, gs_sql won't have 
    476475    # a value except during buildcol, so when processor =~ m/buildproc$/. 
    477     $self->{'gs_sql'}->disconnect_from_db() 
    478         || warn("Unable to disconnect from database " . $self->{'site_name'} . "\n"); 
    479  
    480     # explicitly delete gs_sql key (setting key to undef has a different meaning from deleting) 
    481     # so all future use has to make the connection again 
     476    $self->{'gs_sql'}->finished(); 
     477 
     478    # Clear gs_sql (setting key to undef has a different meaning from deleting: 
     479    # undef makes key still exist but its value is unded whereas delete deletes the key) 
     480    # So all future use has to make the connection again 
    482481    delete $self->{'gs_sql'}; 
    483482    } 
  • main/trunk/greenstone2/perllib/plugouts/GreenstoneSQLPlugout.pm

    r32573 r32578  
    143143    my $db_params = { 
    144144    'collection_name' => $ENV{'GSDLCOLLECTION'}, 
    145     'verbosity' => 1     
     145    'verbosity' => 1 
    146146    }; 
    147147 
    148148    my $gs_sql = new gssql($db_params); 
    149149     
    150     # try connecting to the mysql db, if that fails it will die 
    151     # so don't bother preparing GreenstoneXMLPlugout by calling superclass' begin() 
     150    # try connecting to the mysql db, die if that fails 
     151    # So don't bother preparing GreenstoneXMLPlugout by calling superclass' begin() yet 
    152152    if(!$gs_sql->connect_to_db({ 
    153153    'db_driver' => $self->{'db_driver'}, 
     
    181181    # This is fatal for the plugout, let's terminate here after disconnecting again 
    182182    # PrintError would already have displayed the warning message on load fail 
    183     $gs_sql->disconnect_from_db() 
    184         || warn("Unable to disconnect from database.\n"); 
     183    $gs_sql->force_disconnect_from_db(); # disconnect_from_db() will issue a warning on error 
    185184    die("Could not use db $db_name and/or prepare its tables. Can't proceed.\n"); 
    186185    } 
     
    205204    $self->SUPER::end(@_);     
    206205     
    207     $self->{'gs_sql'}->disconnect_from_db() || warn("Unable to disconnect from database " . $self->{'site_name'} . "\n"); # disconnect_from_db() will also issue a warning, but this may be clearer 
     206    $self->{'gs_sql'}->finished(); # will disconnect from db if last instance 
     207    delete $self->{'gs_sql'}; # key gs_sql no longer exists, not just the value being undef 
    208208} 
    209209