Changeset 32521


Ignore:
Timestamp:
2018-10-18T20:25:28+13:00 (6 years ago)
Author:
ak19
Message:
  1. Calling superclass GreenstoneXMLPlugout's begin(), end(), close_group_output(), pre_saveas() and post_saveas() only if MySQLPlugout's process mode is not set to output ALL (meta and text) to sql db. 2. Finally removed the open2 versions of the database functions.
File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugouts/MySQLPlugout.pm

    r32520 r32521  
    3232use util;
    3333use FileUtils;
    34 #use BasePlugout;
    3534use GreenstoneXMLPlugout;
    3635use docprint;
     
    3837use IPC::Open2;
    3938use POSIX ":sys_wait_h"; # for waitpid, http://perldoc.perl.org/functions/waitpid.html
     39
     40# TODO: SIGTERM rollback and disconnect?
     41
    4042
    4143# this plugout does not output xml to a file, but outputs rows into a mysql table
     
    100102    $self->{'db_driver'} = "mysql";
    101103    $self->{'site_name'} = "localsite";   
    102     $self->{'client_user'} = "root";
    103     $self->{'client_pwd'} = "6reenstone3";
     104    $self->{'db_client_user'} = "root";
     105    $self->{'db_client_pwd'} = "6reenstone3";
    104106    #$self->{'db_host'} = "127.0.0.1";
    105107    #$self->{'db_encoding'} = "utf8";
     108    #TODO: proc_mode is also a saveas option
    106109   
    107110    ############ LOAD NECESSARY OPTIONS ###########
     
    121124
    122125    # prepare the shared/common HANDLES to SQL insert statements that contain placeholders
    123     # and which we will reuse repeatedly when executing the actual insert statements
     126    # and which we will reuse repeatedly when actually executing the insert statements
    124127    my $proc_mode = $self->{'process_mode'};
    125128    if($proc_mode eq "all" || $proc_mode eq "meta_only" ) {
     
    129132    $self->{'fulltxt_prepared_insert_statement_handle'} = $self->prepare_insert_fulltxt_row_stmthandle();
    130133    }
    131    
    132     # finally, call begin on super
    133     $self->GreenstoneXMLPlugout::begin(@_);   
     134
     135    # if setting up to work with sql db failed, we'd have terminated and wouldn't come up to here:
     136    # won't bother preparing GreenstoneXMLPlugout by calling superclass' begin()
     137    # finally, can call begin on super - important as doc.xml is opened as a group etc
     138
     139    if($proc_mode ne "all") { # TODO Q
     140    $self->GreenstoneXMLPlugout::begin(@_);
     141    }
    134142}
    135143
     
    139147    my $self = shift(@_);
    140148
    141     # do the superclass stuff
    142     $self->GreenstoneXMLPlugout::end(@_);
    143     $self->disconnect_from_db() || warn("Unable to disconnect from database " . $self->{'site_name'} . "\n");
    144     #my $success = $self->GreenstoneXMLPlugout::end(@_);
    145     #$success = $self->disconnect_from_db() && $success;   
    146     #return $success;
    147 }
    148    
     149    # do the superclass stuff first, as any sql db failures should not prevent superclass cleanup
     150   
     151    if($self->{'process_mode'} ne "all") {# TODO Q
     152    $self->GreenstoneXMLPlugout::end(@_);
     153    }
     154   
     155    $self->disconnect_from_db() || warn("Unable to disconnect from database " . $self->{'site_name'} . "\n"); # disconnect_from_db() will also issue a warning, but this may be clearer
     156}
     157 
     158sub close_group_output {
     159    my $self = shift (@_);
     160   
     161    if($proc_mode ne "all") { # TODO Q
     162    $self->GreenstoneXMLPlugout::close_group_output();
     163    }
     164}
     165   
    149166# TODO: check arc-inf.db for whether each entry is to be deleted/indexed/reindexed/been indexed
    150167sub saveas {
     
    152169    my ($doc_obj, $doc_dir) = @_;
    153170
    154     # pre save out
    155     my ($docxml_outhandler, $output_file) = $self->GreenstoneXMLPlugout::pre_saveas(@_);
    156    
    157 
    158     # saving customisation
    159     $self->{'debug_outhandle'} = $docxml_outhandler if ($self->{'debug'}); # STDOUT if debug
    160 
    161     # write the INVERSE into doc.xml as to what is written to the db
    162     my $proc_mode = $self->{'process_mode'};
    163     my $docxml_output_options = { 'output' => docprint::OUTPUT_NONE };
    164     if($proc_mode eq "meta_only" ) { # since only meta to go into MySQL db, text will go into docxml
    165     $docxml_output_options->{'output'} = docprint::OUTPUT_TEXT_ONLY;
    166     } elsif($proc_mode eq "text_only" ) { # since only full text to go into MySQL db, meta will go into docxml
    167     $docxml_output_options->{'output'} = docprint::OUTPUT_META_ONLY;
    168     }
    169 
    170     # now we've prepared to write out whatever is meant to go into docxml
    171     # and can do actual the steps superclass GreenstoneXMLPlugout carries out to write out docxml
    172     # So: write out the doc xml file for the current document
    173     my $section_text = &docprint::get_section_xml($doc_obj, $docxml_output_options);
    174     print $docxml_outhandler $section_text;   
    175 
     171    # 1. pre save out and saving debug handle
     172    if($proc_mode ne "all") { # TODO Q
     173    my ($docxml_outhandler, $output_file) = $self->GreenstoneXMLPlugout::pre_saveas(@_);
     174
     175    $self->{'debug_outhandle'} = $docxml_outhandler if ($self->{'debug'}); # STDOUT if debug
     176    }
     177    elsif ($self->{'debug'}) { # if we didn't call superclass::pre_saveas, then set debug handle
     178    $self->{'debug_outhandle'} = STDOUT; # superclass also debugs to STDOUT
     179    # TODO: also set debugging in begin()? Then stmts creating db and tables also sent to debug out and not executed
     180    }
     181   
     182   
     183    # 2. overriding saving behaviour to do what the superclass does PLUS saving to sql db   
     184
     185    if($proc_mode ne "all" ) { # if all, then nothing to go into doc.xml
     186    # write the INVERSE into doc.xml as to what is written to the db
     187    my $proc_mode = $self->{'process_mode'};
     188   
     189    my $docxml_output_options = { 'output' => docprint::OUTPUT_NONE }; # TODO Q
     190   
     191    if($proc_mode eq "meta_only" ) { # since only meta to go into MySQL db, text will go into docxml
     192        $docxml_output_options->{'output'} = docprint::OUTPUT_TEXT_ONLY;
     193    } elsif($proc_mode eq "text_only" ) { # since only full text to go into MySQL db, meta will go into docxml
     194        $docxml_output_options->{'output'} = docprint::OUTPUT_META_ONLY;
     195    }
     196
     197    # now we've prepared to write out whatever is meant to go into docxml
     198    # and can do actual the steps superclass GreenstoneXMLPlugout carries out to write out docxml
     199    # So: write out the doc xml file for the current document
     200    my $section_text = &docprint::get_section_xml($doc_obj, $docxml_output_options);
     201    print $docxml_outhandler $section_text;   
     202    }
     203   
    176204    # We also write out whatever needs to go into the MySQL database
    177205    $self->write_meta_and_text($doc_obj);
    178206
    179207   
    180     # post save out
    181     $self->GreenstoneXMLPlugout::post_saveas(@_);
    182 
    183     # TODO: close database connection here? Or do something like with groups
    184     # so we don't open and close over and over during a single build?
     208    # 3. post save out
     209    if($proc_mode ne "all" ) { # TODO Q
     210    #$self->GreenstoneXMLPlugout::post_saveas(@_);
     211    $self->GreenstoneXMLPlugout::post_saveas($doc_obj, $doc_dir, $docxml_outhandler, $output_file);
     212    }
     213   
     214    # database connection is closed in end() method
     215    # so we don't open and close over and over for each doc during a single build
    185216}
    186217
     
    193224    my $doc_oid = $doc_obj->get_OID(); # we're processing a single doc at a time, so single OID
    194225
    195     ##binmode($db_handle,":utf8"); ## WRONG FOR DB, NEED TO CREATE IN UTF8 MODE
    196    
    197     # TODO if $self->debug is on
    198    
    199     # Do we want to open and close a connection per doc?
    200     # Would we not rather want to open and close per collection rebuild?
    201    
    202     #$self->create_db_connection();
    203 
    204226    # load the prepared INSERT statement handles for both tables (can be undef for any table depending on whether meta_only or txt_only are set)
    205227    my $metadata_table_sth = $self->{'metadata_prepared_insert_statement_handle'};
     
    207229   
    208230    $self->recursive_write_meta_and_text($doc_obj, $root_section, $metadata_table_sth, $fulltxt_table_sth);
    209 
    210     #$self->close_db_connection();
    211 }
     231}
     232
    212233# Perl: Reading or Writing to Another Program
    213234# https://nnc3.com/mags/Perl3/cookbook/ch16_05.htm
     
    290311# Unless they do a full rebuild, which will recreate the table from scratch?
    291312# SOLUTION-> rollback transaction on error, see https://www.effectiveperlprogramming.com/2010/07/set-custom-dbi-error-handlers/
    292 
    293 # I'm using perl's open2 like in Z3950Download, as opposed to open3 like in WgetDownload
    294 # since I'm assuming each insert statement is atomic: it either does the insertion or fails
    295 # and we (may) get some message back. That means we should hopefully be able to terminate
    296 # as well if we get SIGTERM/SIGKILL: we're not blocking, but are responsive after every INSERT stmt.
    297 
    298 # Just runs the command without displaying it
    299 sub run_command_basic
    300 {
    301     my ($self,$strCMD) = @_;
    302    
    303    
    304     my $process_instr = $self->{'MYSQL_IN'};
    305 
    306     print $process_instr "$strCMD\n"; 
    307 }
    308 
    309 sub run_command
    310 {
    311     my ($self,$strCMD) = @_;   
    312 
    313     print STDERR "Running mysql command: $strCMD\n";
    314     $self->run_command_basic($strCMD);
    315    
    316 }
    317 
    318 sub response_line_contains {
    319     my ($self,$expected) = @_;
    320 
    321     my $out = $self->{'MYSQL_OUT'};
    322     my $opening_line = <$out>;
    323     if ($opening_line =~ m/$expected/i) {
    324     return 1;
    325     }
    326     return 0;
    327 }
    328 
    329 sub response_lines_contain {
    330     my ($self,$expected) = @_;
    331 
    332     my $found_expected = 0;
    333     my $out = $self->{'MYSQL_OUT'};
    334     while (my $line = <$out>) {
    335     print STDERR "$line\n";
    336     if($line =~ m/$expected/){
    337         #return 1;
    338         $found_expected = 1; #won't break out of loop: loop will consume all on child out at present
    339     }   
    340     }
    341     return $found_expected;
    342 }
    343 
    344 # based on Z3950Download.pm::start_yaz()
    345 # ./PATH/TO/mysql -u root -p
    346 # Returns:
    347 #   - 1 if client already quit after pwd fail (so can't send quit message to stopped client)
    348 #   - 0 if load_db failed (needs to be quit),
    349 #   - 1 if load_db succeeded (mysql client still running)
    350 sub create_db_connection {
    351     my $self = shift (@_);
    352     print STDERR "Opening connection to MySQL db\n";
    353 
    354     my $mysql_client = $self->{'client_path'};
    355     my $client_user = $self->{'client_user'} || "root";
    356    
    357    
    358     my $launch_cmd = "\"./$mysql_client\" -u $client_user -p";
    359     my $childpid = open2(*MYSQL_OUT, *MYSQL_IN, $launch_cmd)
    360     or (print STDERR "Done\n" and die "can't open2 pipe to mysql client: $!");
    361    
    362     $self->{'pid'} = $childpid;
    363     $self->{'MYSQL_OUT'} = *MYSQL_OUT;
    364     $self->{'MYSQL_IN'} = *MYSQL_IN;
    365    
    366     # connect with pwd and load the database for this site
    367     #my $conn_open = $self->open_connection(); 
    368     #
    369     #if (!$conn_open) {
    370     #    print STDERR "Cannot connect to mysql db with $launch_cmd\n";
    371     #    print STDERR "Done\n"; 
    372     #    return 0;
    373     #}
    374    
    375     # connect with pwd
    376     my $conn_success = $self->send_pwd();   
    377     if(!$conn_success) {
    378     return -1; # if pwd failed, then the program already exited by itself
    379                # (so don't send quit command after process terminated)
    380     }
    381     else {
    382     return $self->load_db();
    383     }   
    384 
    385     # return $conn_open;
    386     return $conn_success; # 1 if client already quit after pwd fail, 0 if load_db failed (needs to be quit), 1 if load_db succeeded (mysql client still running)
    387 }
    388 
    389 
    390 # Copied from Z3950Download.pm::quit_yaz()
    391 sub close_db_connection {
    392     my $self = shift (@_);
    393    
    394     $self->run_command("quit");
    395     close($self->{'MYSQL_IN'}); # close the input to yaz. It also flushes quit command to mysql client
    396 
    397     # make sure nothing is being output by mysql client
    398     # flush the mysql-client process' outputstream, else we'll be stuck in an infinite
    399     # loop waiting for the process to quit.
    400     my $output = $self->{'MYSQL_OUT'};
    401     my $line;
    402     while (defined ($line = <$output>)) {
    403     if($line !~ m/\w/s) { # print anything other than plain whitespace in case it is important
    404         print STDERR "***### $line";
    405     }
    406     }
    407 
    408     close($self->{'MYSQL_OUT'});
    409 
    410     # Is the following necessary? The PerlDoc on open2 (http://perldoc.perl.org/IPC/Open2.html)
    411     # says that waitpid must be called to "reap the child process", or otherwise it will hang
    412     # around like a zombie process in the background. Adding it here makes the code work as
    413     # before, but it is certainly necessary to call waitpid on wget (see WgetDownload.pm).
    414     # http://perldoc.perl.org/functions/waitpid.html
    415     my $kidpid;
    416     do {
    417     $kidpid = waitpid($self->{'pid'}, WNOHANG);
    418     } while $kidpid > 0; # waiting for pid to become -1
    419 }
    420 
    421 # should be called only once per site
    422 sub create_database {
    423     my $self = shift (@_);
    424     #my $sitename = shift(@_);
    425     my $sitename = $self->{'site_name'};
    426     my $cmd = "CREATE DATABASE $sitename;";
    427     $self->run_command($cmd);
    428 }
    429 
    430 
    431 sub send_pwd {
    432     my $self = shift (@_);
    433 
    434     my $client_pwd = $self->{'client_pwd'};
    435     my $out = $self->{'MYSQL_OUT'};
    436    
    437     # if connected, it's prompting for pwd. Write the pwd to the mysql client process:
    438     $self->run_command_basic($client_pwd);
    439    
    440     #my $opening_line = <$out>;
    441     #if ($opening_line =~ m/Access denied/i) {
    442     #print STDERR "Password not recognised. Got: $opening_line\n";
    443     #return 0;
    444     #}
    445    
    446     if($self->response_line_contains("Access denied")) {
    447     print STDERR "Password not recognised. Got: Access denied.\n";
    448     return 0;
    449     }
    450 
    451     return 1;
    452 }
    453 
    454 
    455 sub load_db {
    456     my $self = shift (@_);
    457 
    458     # attempt to load the db
    459     # use the database
    460     my $db_name = $self->{'site_name'}; # TODO Q: site_name only exists for GS3. What about GS2?
    461     $self->run_command("use " . $db_name . ";");
    462    
    463     my $db_found = 0;
    464     my $out = $self->{'MYSQL_OUT'};
    465     while (my $line = <$out>) {
    466     print STDERR "$line\n";
    467     if($line =~ m/Database changed/){
    468         # return 1; # TODO Q: consume all output of running command
    469         $db_found = 1;
    470     }
    471     elsif($line =~ m/Unknown database/){
    472         $db_found = -1;
    473     }
    474     }
    475 
    476     if($db_found == 1) {
    477     return $db_found;
    478     }
    479     elsif ($db_found == -1) { # a db for the current sitename didn't exist, create it
    480     $self->create_database();
    481 
    482     # attempt to load the newly created db
    483     if($self->_load_db()) { # recursive call!
    484         return 1;
    485     }
    486    
    487    
    488     #my $opening_line = <$out>;
    489     #if ($opening_line !~ m/Query OK/) {
    490     if(!$self->response_line_contains("Query OK")) {
    491         print STDERR "Could not create db\n";
    492         return 0; # couldn't even create the db
    493     } else { # success creating db
    494         # so let's create the metadata and fulltxt tables for the current coll while we're at it
    495         if($self->create_meta_table()) {       
    496         return $self->create_fulltxt_table();
    497         }
    498     }
    499     }
    500     else { # unknown error trying to load db, bail
    501     return 0;
    502     }
    503 }
    504 
    505 
    506 sub create_meta_table {
    507     my $self = shift (@_);
    508     my $table_name = $self->{'collection_name'} . "metadata";
    509     #my $cmd = "CREATE TABLE $table_name (id VARCHAR(255) NOT NULL UNIQUE, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, metaname VARCHAR(127) NOT NULL, metavalue VARCHAR(1023) NOT NULL, PRIMARY KEY(id));";
    510 
    511     # If using an auto incremented primary key:
    512     my $cmd = "CREATE TABLE $table_name (id INT NOT NULL AUTO_INCREMENT, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, metaname VARCHAR(127) NOT NULL, metavalue VARCHAR(1023) NOT NULL, PRIMARY KEY(id));";
    513 
    514     $self->run_command($cmd);
    515    
    516     if(!$self->response_lines_contain("Query OK")) {
    517     print STDERR "Could not create metadata table\n";
    518     return 0;
    519     } else {
    520     return 1;
    521     }
    522 }
    523 
    524 sub create_fulltxt_table {
    525     my $self = shift (@_);
    526     my $table_name = $self->{'collection_name'} . "fulltxt";
    527     #my $cmd = "CREATE TABLE $table_name (id VARCHAR(255) NOT NULL UNIQUE, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, fulltxt LONGTEXT, PRIMARY KEY(id));";
    528 
    529     # If using an auto incremented primary key:
    530     my $cmd = "CREATE TABLE $table_name (id INT NOT NULL AUTO_INCREMENT, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, fulltxt LONGTEXT, PRIMARY KEY(id));";
    531 
    532     $self->run_command($cmd);
    533    
    534     if(!$self->response_lines_contain("Query OK")) {
    535     print STDERR "Could not create table\n";
    536     return 0;
    537     } else {
    538     return 1;
    539     }
    540 }
    541 
    542 # https://www.guru99.com/insert-into.html
    543 # and https://dev.mysql.com/doc/refman/8.0/en/example-auto-increment.html
    544 #     for inserting multiple rows at once
    545 sub get_cmd_insert_new_row_into_meta_table {
    546     my $self = shift (@_);
    547     my ($did, $sid, $metaname, $metavalue) = @_;
    548     my $tablename = $self->{'colname'}."_metadata";
    549 
    550     my $cmd = "INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES\n";
    551     $cmd .= "('$did', '$sid', '$metaname', '$metavalue');\n";
    552     return $cmd;
    553 }
    554 
    555 sub get_cmd_insert_new_row_into_txt_table {
    556     my $self = shift (@_);
    557     my ($did, $sid, $fulltext) = @_;
    558     my $tablename = $self->{'colname'}."_fulltxt";
    559 
    560     my $cmd = "INSERT INTO $tablename (did, sid, fulltxt) VALUES\n";
    561     $cmd .= "('$did', '$sid', '$fulltext');\n";
    562     return $cmd;
    563 }
    564 
    565 # TODO: later add edit and delete (and nothing for "been indexed" status?)
    566 
    567 ##################
    568 
    569 ## UNUSED
    570 sub X_open_connection()
    571 {
    572     my $self = shift (@_);
    573     # connect with pwd
    574     my $conn_success = $self->send_pwd();
    575     if(!$conn_success) {
    576     return -1; # if pwd failed, then the program already exists by itself (so don't send quit command after process terminated)
    577     }
    578    
    579     if($conn_success) {
    580     return $self->load_db();
    581     } else {
    582     return $conn_success;
    583     }
    584 }
     313# But then should set AutoCommit to off on connection, and remember to commit every time
    585314
    586315#################
     
    598327   
    599328    my $db_driver = $self->{'db_driver'};
    600     my $db_user = $self->{'client_user'} || "root";
    601     my $db_pwd = $self->{'client_pwd'};
     329    my $db_user = $self->{'db_client_user'} || "root";
     330    my $db_pwd = $self->{'db_client_pwd'};
    602331    my $db_host = $self->{'db_host'} || "127.0.0.1";
    603332    my $db_enc = $self->{'db_encoding'} || "utf8";
Note: See TracChangeset for help on using the changeset viewer.