Changeset 32583


Ignore:
Timestamp:
2018-11-08T17:22:04+13:00 (5 years ago)
Author:
ak19
Message:
  1. Some tidying up of the code. 2. Removing unnecessary calls to force_disconnect_from_db (now made 'private') after die() since on die() perl calls object destructor which will ensure disconnection anyway.
Location:
main/trunk/greenstone2/perllib
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/gssql.pm

    r32582 r32583  
    22#
    33# gssql.pm -- DBI for SQL related utility functions used by
    4 # GreenstoneSQLPlugout and hereafter by GreenstoneSQLPlugin too.
     4# GreenstoneSQLPlugout and GreenstoneSQLPlugin too.
    55# A component of the Greenstone digital library software
    66# from the New Zealand Digital Library Project at the
     
    6161
    6262# Parameterise (one or more methods may use them):
    63 # - db_name (which is the GS3 sitename)
     63# - db_name (which is the GS3 sitename, or "greenstone2" for GS2)
    6464
    6565
     
    8383    }
    8484    }
    85 
    8685   
    8786    die "Caught a $sig signal $!"; # die() will always call destructor (sub DESTROY)
     
    125124# "It’s common to call die when handling SIGINT and SIGTERM. die is useful because it will ensure that Perl stops correctly: for example Perl will execute a destructor method if present when die is called, but the destructor method will not be called if a SIGINT or SIGTERM is received and no signal handler calls die."
    126125#
    127 # https://perldoc.perl.org/perlobj.html#Destructors
     126# Useful: https://perldoc.perl.org/perlobj.html#Destructors
    128127#
    129128# https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#disconnect
     
    145144    if ($_dbh_instance) { # database handle still active. Use singleton handle!
    146145
    147         # THIS CODE HAS MOVED TO finish_signal_handler() WHERE IT BELONGS
    148         # If autocommit wasn't set, then this is a cancel operation.
    149         # If we've not disconnected from the sql db yet and if we've not committed
    150         # transactions yet, then cancel means we do a rollback here
    151 
    152         # if($_dbh_instance->{AutoCommit} == 0) {
    153        
    154         #   $_dbh_instance->rollback(); # will warn on failure, nothing more we can/want to do,
    155             #     # don't do a die() here: possibility of infinite loop and we still want to disconnect
    156         # }
    157 
    158         # Either way, we're now finally ready to disconnect as is required for premature
    159         # termination too
     146        # rollback code has moved to finish_signal_handler() where it belongs?
     147       
     148        # NOTE: if RaiseError is set on dbi connection, then on any error, perl process will die()
     149        # which will end up calling this DESTROY. If it was a die() that called DESTROY
     150        # then need to rollback the db here. However, if it was not a die() but natural termination
     151        # of the perl process, destroy() will also get called. In that case we don't want to rollback
     152        # but do a commit() to the DB instead.
     153        # Perhaps detecting the difference may be accomplished by checking ref_count:
     154        # - If ref_count not 0 it may require a rollback?
     155        # - If ref_count 0 it may be a natural termination and require a commit? Except that ref_count
     156        # is set back to 0 in finished(), which will do the commit when ref_count becomes 0. So shouldn't
     157        # (have to) do that here.
     158       
     159        # We're now finally ready to disconnect, as is required for both natural and premature termination
    160160        print STDERR "XXXXXXXX Global Destruct: Disconnecting from database\n";
    161161        $_dbh_instance->disconnect or warn $_dbh_instance->errstr;
     
    165165    return;
    166166    }
     167
    167168}
    168169
     
    229230    }
    230231    if($params_map->{'autocommit'}) {
    231         print STDERR "   SQL DB UNDO SUPPORT OFF.\n";
     232        print STDERR "   SQL DB CANCEL SUPPORT OFF.\n";
    232233    } else {
    233         print STDERR "   SQL DB UNDO SUPPORT ON.\n";
     234        print STDERR "   SQL DB CANCEL SUPPORT ON.\n";
    234235    }
    235236    }
     
    326327
    327328# Will disconnect if this instance of gssql holds the last reference to the db connection
     329# If disconnecting and autocommit is off, then this will commit before disconnecting
    328330sub finished {
    329     my $self= shift (@_);
    330 
    331     # TODO: if AutoCommit was off, meaning transactions were on/enabled,
    332     # then here is where we commit our one long transaction.
    333     # https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#commit
    334     my $rc = 1;   
    335    
    336     $ref_count--;
    337     if($ref_count == 0) {
    338     # Only commit transaction when we're about to disconnect, not before
    339     # If autocommit was on, then we'd have committed after every db operation, so nothing to do
    340     $rc = $self->do_commit_if_on();
    341    
    342     $self->force_disconnect_from_db();
    343     }
    344 
    345     return $rc;
    346 }
    347 
    348 sub do_commit_if_on {
    349331    my $self= shift (@_);
    350332    my $dbh = $self->{'db_handle'};
     
    352334    my $rc = 1; # return code: everything went fine, regardless of whether we needed to commit
    353335                # (AutoCommit on or off)
    354 
    355     # https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#commit
    356     if($dbh->{AutoCommit} == 0) {
    357     print STDERR "   Committing transaction to SQL database now.\n" if $self->{'verbosity'};
    358     $rc = $dbh->commit() or warn("SQL DB COMMIT FAILED: " . $dbh->errstr); # important problem
    359                                                             # worth embellishing error message
    360     }
    361     # If autocommit was on, then we'd have committed after every db operation, so nothing to do
     336   
     337    $ref_count--;
     338    if($ref_count == 0) { # Only commit transaction when we're about to actually disconnect, not before
     339   
     340    # TODO: If AutoCommit was off, meaning transactions were on/enabled,
     341    # then here is where we commit our one long transaction.
     342    # https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#commit
     343    if($dbh->{AutoCommit} == 0) {
     344        print STDERR "   Committing transaction to SQL database now.\n" if $self->{'verbosity'};
     345        $rc = $dbh->commit() or warn("SQL DB COMMIT FAILED: " . $dbh->errstr); # important problem
     346        # worth embellishing error message
     347    }
     348    # else if autocommit was on, then we'd have committed after every db operation, so nothing to do
     349   
     350    $self->_force_disconnect_from_db();
     351    }
    362352
    363353    return $rc;
    364354}
     355
    365356
    366357# Call this method on die(), so that you're sure the perl process has disconnected from SQL db
     
    368359# TODO: make sure to have committed or rolled back before disconnect
    369360# and that you've call finish() on statement handles if any fetch remnants remain
    370 sub force_disconnect_from_db {
     361sub _force_disconnect_from_db {
    371362    my $self= shift (@_);
    372363
  • main/trunk/greenstone2/perllib/plugins/GreenstoneSQLPlugin.pm

    r32582 r32583  
    5050
    5151# DONE:
     52# + TODO: For on cancel, add a SIGTERM handler or so to call end()
     53# or to explicitly call gs_sql->close_connection if $gs_sql def
     54#
    5255# + TODO: Incremental delete can't work until GSSQLPlugout has implemented build_mode = incremental
    5356# (instead of tossing away db on every build)
     
    126129
    127130    return q^(?i)docsql(-\d+)?\.xml$^; # regex based on this method in GreenstoneXMLPlugin
    128     #return q^(?i)docsql(-.+)?\.xml$^; # no longer storing the OID embedded in docsql .xml filename
    129131}
    130132
     
    191193
    192194
    193 # TODO: For on cancel, add a SIGTERM handler or so to call end()
    194 # or to explicitly call gs_sql->close_connection if $gs_sql def
     195###### Methods called during buildcol and import #######
    195196
    196197sub new {
     
    217218}
    218219
    219 ###### Called during import.pl
     220# GS SQL Plugin::init() (and deinit()) is called by import.pl and also by buildcol.pl
     221# This means it connects and deconnects during import.pl as well. This is okay
     222# as removeold, which should drop the collection tables, happens during the import phase,
     223# calling GreenstoneSQLPlugin::and therefore also requires a db connection.
     224# TODO: Eventually can try moving get_gssql_instance into gssql.pm? That way both GS SQL Plugin
     225# and Plugout would be using one connection during import.pl phase when both plugs exist.
     226
     227# Call init() not begin() because there can be multiple plugin passes and begin() called for
     228# each pass (one for doc level and another for section level indexing), whereas init() should
     229# be called before any and all passes.
     230# This way, we can connect to the SQL database once per buildcol run.
     231sub init {
     232    my ($self) = shift (@_);
     233    ##print STDERR "@@@@@@@@@@ INIT CALLED\n";
     234   
     235    $self->SUPER::init(@_); # super (GreenstoneXMLPlugin) will not yet be trying to read from doc.xml (docsql .xml) files in init().
     236
     237    ####################
     238#    print "@@@ SITE NAME: ". $self->{'site_name'} . "\n" if defined $self->{'site_name'};
     239#    print "@@@ COLL NAME: ". $ENV{'GSDLCOLLECTION'} . "\n";
     240
     241#    print STDERR "@@@@ db_pwd: " . $self->{'db_client_pwd'} . "\n";
     242#    print STDERR "@@@@ user: " . $self->{'db_client_user'} . "\n";
     243#    print STDERR "@@@@ db_host: " . $self->{'db_host'} . "\n";
     244#    print STDERR "@@@@ db_driver: " . $self->{'db_driver'} . "\n";
     245    ####################
     246
     247    # create gssql object.
     248    # collection name will be used for naming tables (site name will be used for naming database)
     249    my $gs_sql = new gssql({
     250    'collection_name' => $ENV{'GSDLCOLLECTION'},
     251    'verbosity' => $self->{'verbosity'} || 0
     252               });
     253   
     254    # if autocommit is set, there's no rollback support
     255    my $autocommit = ($self->{'rollback_on_cancel'} eq "false") ? 1 : 0;
     256
     257    # try connecting to the mysql db, die if that fails
     258    if(!$gs_sql->connect_to_db({
     259    'db_driver' => $self->{'db_driver'},
     260    'db_client_user' => $self->{'db_client_user'},
     261    'db_client_pwd' => $self->{'db_client_pwd'},
     262    'db_host' => $self->{'db_host'},
     263    'autocommit' => $autocommit
     264                   })
     265    )
     266    {
     267    # This is fatal for the plugout, let's terminate here
     268    # PrintError would already have displayed the warning message on connection fail   
     269    die("Could not connect to db. Can't proceed.\n");
     270    }
     271   
     272    my $db_name = $self->{'site_name'} || "greenstone2"; # one database per GS3 site, for GS2 the db is called greenstone2
     273
     274    # Attempt to use the db, create it if it doesn't exist (but don't create the tables yet)
     275    # Bail if we can't use the database
     276    if(!$gs_sql->use_db($db_name)) {
     277   
     278    # This is fatal for the plugout, let's terminate here after disconnecting again
     279    # PrintError would already have displayed the warning message on load fail
     280    # And on die() perl will call gssql destroy which will ensure a disconnect() from db
     281    #$gs_sql->force_disconnect_from_db();
     282    die("Could not use db $db_name. Can't proceed.\n");
     283    }
     284   
     285   
     286    # store db handle now that we're connected
     287    $self->{'gs_sql'} = $gs_sql;   
     288}
     289
     290
     291# This method also runs on import.pl if gs_sql has a value. But we just want to run it on buildcol
     292# Call deinit() not end() because there can be multiple plugin passes:
     293# one for doc level and another for section level indexing
     294# and deinit() should be called before all passes
     295# This way, we can close the SQL database once per buildcol run.
     296sub deinit {
     297    my ($self) = shift (@_);
     298   
     299    ##print STDERR "@@@@@@@@@@ GreenstoneSQLPlugin::DEINIT CALLED\n";
     300   
     301    if($self->{'gs_sql'}) {
     302
     303    # Important to call finished():
     304    # it will disconnect from db if this is the last gssql instance,
     305    # and it will commit to db before disconnecting if rollbback_on_cancel turned on
     306    $self->{'gs_sql'}->finished();
     307
     308    # Clear gs_sql (setting key to undef has a different meaning from deleting:
     309    # undef makes key still exist but its value is unded whereas delete deletes the key)
     310    # So all future use has to make the connection again
     311    delete $self->{'gs_sql'};
     312    }
     313
     314    $self->SUPER::deinit(@_);
     315}
     316
     317
     318
     319###### Methods only called during import.pl #####
    220320
    221321# This is called once if removeold is set with import.pl. Most plugins will do
     
    246346    }
    247347
    248     # UNNECESSARY
    249     # The removeold related DB transaction (deleting collection tables) is complete
    250     # Don't let GS SQL PlugIN interfere with GS SQL PlugOUT's database transactions
    251     # during import.pl hereafter. Finish up.
    252     #$gs_sql->do_commit_if_on();
    253348}
    254349
     
    294389}
    295390
    296 #### Called during buildcol
     391##### Methods called only during buildcol #####
    297392
    298393sub xml_start_tag {
     
    417512}
    418513
    419 #### Called during buildcol and import
    420 
    421 # GS SQL Plugin::init() (and deinit()) is called by import.pl and also by buildcol.pl
    422 # This means it connects and deconnects during import.pl as well. This is okay
    423 # as removeold, which should drop the collection tables, happens during the import phase,
    424 # calling GreenstoneSQLPlugin::and therefore also requires a db connection.
    425 # TODO: Eventually can try moving get_gssql_instance into gssql.pm? That way both GS SQL Plugin
    426 # and Plugout would be using one connection during import.pl phase when both plugs exist.
    427 
    428 # Call init() not begin() because there can be multiple plugin passes and begin() called for
    429 # each pass (one for doc level and another for section level indexing), whereas init() should
    430 # be called before any and all passes.
    431 # This way, we can connect to the SQL database once per buildcol run.
    432 sub init {
    433     my ($self) = shift (@_);
    434     ##print STDERR "@@@@@@@@@@ INIT CALLED\n";
    435    
    436     $self->SUPER::init(@_); # super (GreenstoneXMLPlugin) will not yet be trying to read from doc.xml (docsql .xml) files in init().
    437 
    438     ####################
    439 #    print "@@@ SITE NAME: ". $self->{'site_name'} . "\n" if defined $self->{'site_name'};
    440 #    print "@@@ COLL NAME: ". $ENV{'GSDLCOLLECTION'} . "\n";
    441 
    442 #    print STDERR "@@@@ db_pwd: " . $self->{'db_client_pwd'} . "\n";
    443 #    print STDERR "@@@@ user: " . $self->{'db_client_user'} . "\n";
    444 #    print STDERR "@@@@ db_host: " . $self->{'db_host'} . "\n";
    445 #    print STDERR "@@@@ db_driver: " . $self->{'db_driver'} . "\n";
    446     ####################
    447 
    448     # create gssql object.
    449     # collection name will be used for naming tables (site name will be used for naming database)
    450     my $gs_sql = new gssql({
    451     'collection_name' => $ENV{'GSDLCOLLECTION'},
    452     'verbosity' => $self->{'verbosity'} || 0
    453                });
    454    
    455     # if autocommit is set, there's no rollback support
    456     my $autocommit = ($self->{'rollback_on_cancel'} eq "false") ? 1 : 0;
    457 
    458     # try connecting to the mysql db, die if that fails
    459     if(!$gs_sql->connect_to_db({
    460     'db_driver' => $self->{'db_driver'},
    461     'db_client_user' => $self->{'db_client_user'},
    462     'db_client_pwd' => $self->{'db_client_pwd'},
    463     'db_host' => $self->{'db_host'},
    464     'autocommit' => $autocommit
    465                    })
    466     )
    467     {
    468     # This is fatal for the plugout, let's terminate here
    469     # PrintError would already have displayed the warning message on connection fail   
    470     die("Could not connect to db. Can't proceed.\n");
    471     }
    472    
    473     my $db_name = $self->{'site_name'} || "greenstone2"; # one database per GS3 site, for GS2 the db is called greenstone2
    474 
    475     # Attempt to use the db, create it if it doesn't exist (but don't create the tables yet)
    476     # Bail if we can't use the database
    477     if(!$gs_sql->use_db($db_name)) {
    478    
    479     # This is fatal for the plugout, let's terminate here after disconnecting again
    480     # PrintError would already have displayed the warning message on load fail
    481     $gs_sql->force_disconnect_from_db();
    482     die("Could not use db $db_name. Can't proceed.\n");
    483     }
    484    
    485    
    486     # store db handle now that we're connected
    487     $self->{'gs_sql'} = $gs_sql;   
    488 }
    489 
    490 
    491 # This method also runs on import.pl if gs_sql has a value. But we just want to run it on buildcol
    492 # Call deinit() not end() because there can be multiple plugin passes:
    493 # one for doc level and another for section level indexing
    494 # and deinit() should be called before all passes
    495 # This way, we can close the SQL database once per buildcol run.
    496 sub deinit {
    497     my ($self) = shift (@_);
    498    
    499     ##print STDERR "@@@@@@@@@@ GreenstoneSQLPlugin::DEINIT CALLED\n";
    500    
    501     if($self->{'gs_sql'}) { # only want to work with sql db if buildcol.pl, gs_sql won't have
    502     # a value except during buildcol, so when processor =~ m/buildproc$/.
    503     $self->{'gs_sql'}->finished();
    504 
    505     # Clear gs_sql (setting key to undef has a different meaning from deleting:
    506     # undef makes key still exist but its value is unded whereas delete deletes the key)
    507     # So all future use has to make the connection again
    508     delete $self->{'gs_sql'};
    509     }
    510 
    511     $self->SUPER::deinit(@_);
    512 }
    513 
    514 
    515 
    516 
     514
     5151;
  • main/trunk/greenstone2/perllib/plugouts/GreenstoneSQLPlugout.pm

    r32582 r32583  
    3939
    4040
    41 # This entire class is called only during import.pl
    42 
    43 # TODO: SIGTERM rollback and disconnect?
    44 # TODO Q: what about verbosity for debugging, instead of current situation of printing out upon debug set at the expense of writing to db
     41# + TODO: SIGTERM rollback and disconnect?
     42# + TODO Q: what about verbosity for debugging, instead of current situation of printing out upon debug set at the expense of writing to db
    4543# TODO Q: introduced site_name param to plugins and plugouts. Did I do it right? And should they have hiddengli = "yes"
    4644# Did I do the pass by ref in docprint's escape and unescape textref functions correctly, and how they're called here?
     
    121119        'args'     => $arguments };
    122120
     121##### This entire class is called only during import.pl #####
     122
     123##### Overridden methods #####
     124
    123125sub new {
    124126    my ($class) = shift (@_);
     
    205207    # This is fatal for the plugout, let's terminate here after disconnecting again
    206208    # PrintError would already have displayed the warning message on load fail
    207     $gs_sql->force_disconnect_from_db(); # disconnect_from_db() will issue a warning on error
     209    # And on die() perl will call gssql destroy which will ensure a disconnect() from db
     210    #$gs_sql->force_disconnect_from_db(); # disconnect_from_db() will issue a warning on error
    208211    die("Could not use db $db_name and/or prepare its tables. Can't proceed.\n");
    209212    }
     
    227230    # do the superclass stuff first, as any sql db failures should not prevent superclass cleanup
    228231    $self->SUPER::end(@_);   
    229    
    230     $self->{'gs_sql'}->finished(); # will disconnect from db if last instance
     232
     233    # Important to call finished():
     234    # it will disconnect from db if this is the last gssql instance,
     235    # and it will commit to db before disconnecting if rollbback_on_cancel turned on
     236    $self->{'gs_sql'}->finished();
    231237    delete $self->{'gs_sql'}; # key gs_sql no longer exists, not just the value being undef
    232238}
     
    295301
    296302   
    297     # 3. post save out
    298     #$self->SUPER::post_saveas(@_);
     303    # 3. post save out
    299304    $self->SUPER::post_saveas($doc_obj, $doc_dir, $docxml_outhandler, $output_file);
    300305   
     
    304309}
    305310
     311##### New methods, not inherited  #####
    306312
    307313# write meta and/or text PER DOC out to DB
Note: See TracChangeset for help on using the changeset viewer.