Changeset 32573

Show
Ignore:
Timestamp:
05.11.2018 19:24:47 (2 weeks ago)
Author:
ak19
Message:

1. Caching the prepared SQL insert statements inside the gssql class rather than in the GS SQL Plugout. 2. docprint.pm provides an escape_textref function besides the existing escape_text function, to deal with large text (e.g. fulltxt). The old escape_text function has been rewritten to internally use the escape_textref function.

Location:
main/trunk/greenstone2/perllib
Files:
3 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/docprint.pm

    r32536 r32573  
    108108} 
    109109 
     110# pass by ref version: gets a ref to a string and returns ref to the modified string 
     111# so use this for large strings (fulltext) 
     112sub escape_textref { 
     113    my ($textref) = @_; 
     114    # special characters in the xml encoding 
     115    $$textref =~ s/&&/& &/g; 
     116    $$textref =~ s/&/&/g; # this has to be first... 
     117    $$textref =~ s/</&lt;/g; 
     118    $$textref =~ s/>/&gt;/g; 
     119    $$textref =~ s/\"/&quot;/g; 
     120 
     121    return $textref; 
     122} 
     123 
     124# pass by value - the behaviour of the escape_text method used so far is to pass copies of strings 
    110125sub escape_text { 
    111126    my ($text) = @_; 
    112     # special characters in the xml encoding 
    113     $text =~ s/&&/& &/g; 
    114     $text =~ s/&/&amp;/g; # this has to be first... 
    115     $text =~ s/</&lt;/g; 
    116     $text =~ s/>/&gt;/g; 
    117     $text =~ s/\"/&quot;/g; 
    118  
    119     return $text; 
     127    my $textref = &escape_textref(\$text); 
     128    return $$textref; 
    120129} 
    121130 
  • main/trunk/greenstone2/perllib/gssql.pm

    r32571 r32573  
    270270    # NO: "When all the data has been fetched from a SELECT statement, the driver will automatically call finish for you. So you should not call it explicitly except when you know that you've not fetched all the data from a statement handle and the handle won't be destroyed soon." 
    271271     
    272     #$meta_sth = $self->{'metadata_prepared_insert_statement_handle'}; 
    273     #$txt_sth = $self->{'fulltxt_prepared_insert_statement_handle'}; 
    274     #$meta_sth->finish() if($meta_sth); 
    275     #$txt_sth->finish() if($txt_sth); 
    276  
    277272    print STDERR "Disconnecting from database\n" if($self->{'verbosity'} > 1); 
    278273     
     
    372367# https://metacpan.org/pod/DBI#performance 
    373368# 'The q{...} style quoting used in this example avoids clashing with quotes that may be used in the SQL statement. Use the double-quote like qq{...} operator if you want to interpolate variables into the string. See "Quote and Quote-like Operators" in perlop for more details.' 
    374 sub prepare_insert_metadata_row_stmthandle { 
    375     my $self = shift (@_);     
     369# 
     370# This method uses lazy loading to prepare the SQL insert stmt once for a table and store it, 
     371# then execute the (stored) statement each time it's needed for that table. 
     372sub insert_row_into_metadata_table { 
     373    my $self = shift (@_); 
     374    my ($doc_oid, $section_name, $meta_name, $escaped_meta_value, $debug_only, $debug_out) = @_; 
     375     
    376376    #my ($did, $sid, $metaname, $metavalue) = @_; 
    377377    my $dbh = $self->{'db_handle'}; 
    378      
    379     my $tablename = $self->get_metadata_table_name(); 
    380  
    381     #my $stmt = "INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES ('$did', '$sid', '$metaname', '$metavalue');"; # ?, ?, ?, ? 
    382  
    383     # using qq{} since we want $tablename placeholder to be filled in 
    384     # returns Statement Handle object! 
    385     my $sth = $dbh->prepare(qq{INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES (?, ?, ?, ?)}) || warn("Could not prepare insert statement for metadata table\n"); 
    386  
    387     print STDERR "@@@@ Prepared meta insert statement: ".$sth->{'Statement'}."\n" if($self->{'verbosity'} > 2); 
    388      
    389     return $sth; 
    390 } 
    391  
    392 sub prepare_insert_fulltxt_row_stmthandle { 
     378 
     379    my $sth = $self->{"sth_insert_into_meta_table"}; 
     380    if(!$sth) { 
     381    my $tablename = $self->get_metadata_table_name(); 
     382     
     383    #my $stmt = "INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES ('$did', '$sid', '$metaname', '$metavalue');"; # ?, ?, ?, ? 
     384     
     385    # using qq{} since we want $tablename placeholder to be filled in 
     386    # returns Statement Handle object! 
     387    $sth = $dbh->prepare(qq{INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES (?, ?, ?, ?)}) || warn("Could not prepare insert statement for metadata table\n"); 
     388 
     389    print STDERR "@@@@ Prepared meta insert statement: ".$sth->{'Statement'}."\n" if($self->{'verbosity'} > 2); 
     390     
     391    $self->{"sth_insert_into_meta_table"} = $sth; # store it for future use 
     392    } 
     393 
     394    # Now we're ready to execute the command, unless we're only debugging 
     395     
     396    if($debug_only) { 
     397    # just print the statement we were going to execute 
     398     
     399    print $debug_out $sth->{'Statement'} . "($doc_oid, $section_name, $meta_name, $escaped_meta_value)\n"; 
     400    } else { 
     401    $sth->execute($doc_oid, $section_name, $meta_name, $escaped_meta_value) 
     402        || warn ("Unable to write metadata row to db:\n\tOID $doc_oid, section $section_name,\n\tmeta name: $meta_name, val: $escaped_meta_value"); 
     403    # Execution failure will print out info anyway: since db connection sets PrintError 
     404    } 
     405} 
     406 
     407# As above. Likewise uses lazy loading to prepare the SQL insert stmt once for a table and store it, 
     408# then execute the (stored) statement each time it's needed for that table. 
     409sub insert_row_into_fulltxt_table { 
    393410    my $self = shift (@_); 
    394411    #my ($did, $sid, $fulltext) = @_; 
    395     my $dbh = $self->{'db_handle'}; 
    396      
    397     my $tablename = $self->get_fulltext_table_name(); 
    398  
    399     #my $stmt = "INSERT INTO $tablename (did, sid, fulltxt) VALUES ('$did', '$sid', '$fulltext');"; ?, ?, ? 
    400  
    401     # using qq{} since we want $tablename placeholder to be filled in 
    402     # returns Statement Handle object! 
    403     my $sth = $dbh->prepare(qq{INSERT INTO $tablename (did, sid, fulltxt) VALUES (?, ?, ?)}) || warn("Could not prepare insert statement for fulltxt table\n"); 
    404      
    405     print STDERR "@@@@ Prepared fulltext insert statement: ".$sth->{'Statement'}."\n" if($self->{'verbosity'} > 2); 
    406      
    407     return $sth; 
     412    my ($doc_oid, $section_name, $section_textref, $debug_only, $debug_out) = @_; 
     413     
     414    my $dbh = $self->{'db_handle'}; 
     415     
     416    my $sth = $self->{"sth_insert_into_fulltxt_table"}; 
     417    if(!$sth) { 
     418    my $tablename = $self->get_fulltext_table_name(); 
     419 
     420    #my $stmt = "INSERT INTO $tablename (did, sid, fulltxt) VALUES ('$did', '$sid', '$fulltext');"; ?, ?, ? 
     421     
     422    # using qq{} since we want $tablename placeholder to be filled in 
     423    # returns Statement Handle object! 
     424    $sth = $dbh->prepare(qq{INSERT INTO $tablename (did, sid, fulltxt) VALUES (?, ?, ?)}) || warn("Could not prepare insert statement for fulltxt table\n"); 
     425     
     426    print STDERR "@@@@ Prepared fulltext insert statement: ".$sth->{'Statement'}."\n" if($self->{'verbosity'} > 2); 
     427 
     428    $self->{"sth_insert_into_fulltxt_table"} = $sth; # store it for future use 
     429    } 
     430 
     431    # Now we're ready to execute the command, unless we're only debugging 
     432 
     433    if($debug_only) { 
     434    # just print the statement we were going to execute, minus the fulltxt value 
     435    my $txt_repr = $$section_textref ? "<TXT>" : "NULL"; 
     436    print $debug_out $sth->{'Statement'} . "($doc_oid, $section_name, $txt_repr)\n"; 
     437    } else { 
     438    $sth->execute($doc_oid, $section_name, $$section_textref) 
     439        || warn ("Unable to write fulltxt row to db for row:\n\tOID $doc_oid, section $section_name"); 
     440    } 
    408441} 
    409442 
  • main/trunk/greenstone2/perllib/plugouts/GreenstoneSQLPlugout.pm

    r32563 r32573  
    177177    $success = $gs_sql->ensure_fulltxt_table_exists(); 
    178178    } 
    179     #if(!$gs_sql->load_db_and_tables($db_name, $proc_mode)) { 
     179     
    180180    if(!$success) {  
    181181    # This is fatal for the plugout, let's terminate here after disconnecting again 
     
    186186    } 
    187187 
    188     # prepare the shared/common HANDLES to SQL insert statements that contain placeholders 
    189     # and which we will reuse repeatedly when actually executing the insert statements 
    190      
    191     if($proc_mode eq "all" || $proc_mode eq "meta_only" ) { 
    192     $self->{'metadata_prepared_insert_statement_handle'} = $gs_sql->prepare_insert_metadata_row_stmthandle(); 
    193     } 
    194     if($proc_mode eq "all" || $proc_mode eq "text_only" ) { 
    195     $self->{'fulltxt_prepared_insert_statement_handle'} = $gs_sql->prepare_insert_fulltxt_row_stmthandle(); 
    196     } 
    197  
    198188    # store the DBI wrapper instance 
    199189    $self->{'gs_sql'} = $gs_sql; 
    200190     
    201     ##print STDERR "#### Meta stmt: " . $self->{'metadata_prepared_insert_statement_handle'}->{'Statement'} . "\n"; 
    202     ##print STDERR "#### Full stmt: " . $self->{'fulltxt_prepared_insert_statement_handle'}->{'Statement'} . "\n"; 
    203191     
    204192    # if setting up to work with sql db failed, we'd have terminated and wouldn't come up to here: 
     
    300288    my $root_section = $doc_obj->get_top_section(); 
    301289 
    302     # load the prepared INSERT statement handles for both tables (can be undef for any table depending on whether meta_only or txt_only are set) 
    303     my $metadata_table_sth = $self->{'metadata_prepared_insert_statement_handle'}; 
    304     my $fulltxt_table_sth = $self->{'fulltxt_prepared_insert_statement_handle'}; 
    305      
    306     $self->recursive_write_meta_and_text($doc_obj, $doc_oid, $root_section, $metadata_table_sth, $fulltxt_table_sth); 
     290    $self->recursive_write_meta_and_text($doc_obj, $doc_oid, $root_section); 
    307291} 
    308292 
     
    311295sub recursive_write_meta_and_text { 
    312296    my $self = shift (@_); 
    313     my ($doc_obj, $doc_oid, $section, $metadata_table_sth, $fulltxt_table_sth) = @_;     
     297    my ($doc_obj, $doc_oid, $section) = @_;     
    314298 
    315299    # If section=ROOT, write "root" as section name into table 
     
    321305 
    322306    my $debug_out = $self->{'debug_outhandle'}; 
    323 #    print STDERR "#### Meta stmt: " . $metadata_table_sth->{'Statement'} . "\n"; 
    324 #    print STDERR "#### Full stmt: " . $fulltxt_table_sth->{'Statement'} . "\n"; 
    325      
    326     #my $proc_mode = $self->{'process_mode'}; 
    327     #if($proc_mode eq "all" || $proc_mode eq "meta_only" ) { 
    328     if($metadata_table_sth) { # meta insert statement handle will be undef if not writing meta 
     307 
     308    my $gs_sql = $self->{'gs_sql'}; 
     309    my $proc_mode = $self->{'process_mode'}; 
     310    if($proc_mode eq "all" || $proc_mode eq "meta_only" ) { 
    329311     
    330312    foreach my $data (@{$section_ptr->{'metadata'}}) { 
     
    336318        # Write out the current section's meta to collection db's METADATA table         
    337319         
    338         # for each set of values to write to meta table, execute the prepared statement, filling in the values 
    339  
    340         if($self->{'debug'}) { 
    341         # just print the statement we were going to execute 
    342  
    343         print $debug_out $metadata_table_sth->{'Statement'} . "($doc_oid, $section_name, $meta_name, $escaped_meta_value)\n"; 
     320        # For each set of values to write to meta table, this next method call will 
     321        # efficiently execute an insert SQL statement (using a prepared insert statement),  
     322        # filling in the values 
     323        # OR if debugging, then it will print the SQL insert statement but not execute it 
     324 
     325        $gs_sql->insert_row_into_metadata_table($doc_oid, $section_name, $meta_name, $escaped_meta_value, $self->{'debug'}, $debug_out);         
     326 
    344327        } 
    345         else { 
    346          
    347         $metadata_table_sth->execute($doc_oid, $section_name, $meta_name, $escaped_meta_value) 
    348             || warn ("Unable to write metadata row to db:\n\tOID $doc_oid, section $section_name,\n\tmeta name: $meta_name, val: $escaped_meta_value"); 
    349         # Execution failure will print out info anyway: since db connection sets PrintError 
    350         } 
    351     } 
    352     } 
    353      
    354     #if($proc_mode eq "all" || $proc_mode eq "text_only" ) { 
    355     if($fulltxt_table_sth) { # fulltxt insert statement handle will be undef if not writing fulltxt 
    356  
    357     if($self->{'debug'}) { 
    358         # just print the statement we were going to execute, minus the fulltxt value 
    359         my $txt_repr = $section_ptr->{'text'} ? "<TXT>" : "NULL"; 
    360         print $debug_out $fulltxt_table_sth->{'Statement'} . "($doc_oid, $section_name, $txt_repr)\n"; 
    361     } else { 
    362         my $section_text = &docprint::escape_text($section_ptr->{'text'}); 
    363          
    364         # fulltxt column can be SQL NULL. undef value gets written out as NULL: 
    365         # https://stackoverflow.com/questions/12708633/which-one-represents-null-undef-or-empty-string 
    366          
    367         # Write out the current section's text to collection db's FULLTeXT table 
    368         $fulltxt_table_sth->execute($doc_oid, $section_name, $section_text) 
    369         || warn ("Unable to write fulltxt row to db for row:\n\tOID $doc_oid, section $section_name"); 
    370         # Execution failure will print out info anyway: since db connection sets PrintError 
    371     } 
     328    } 
     329     
     330     
     331    if($proc_mode eq "all" || $proc_mode eq "text_only" ) { 
     332     
     333    my $section_textref = &docprint::escape_textref(\$section_ptr->{'text'}); 
     334     
     335    # fulltxt column can be SQL NULL. undef value gets written out as NULL: 
     336    # https://stackoverflow.com/questions/12708633/which-one-represents-null-undef-or-empty-string 
     337    # The following will do the SQL insertion 
     338    # or if debug, the following will print the SQL insert stmt without executing it 
     339    $gs_sql->insert_row_into_fulltxt_table($doc_oid, $section_name, $section_textref, $self->{'debug'}, $debug_out); 
     340     
    372341    } 
    373342     
    374343    # output all subsections: RECURSIVE CALL 
    375344    foreach my $subsection (@{$section_ptr->{'subsection_order'}}) { 
    376     $self->recursive_write_meta_and_text($doc_obj, $doc_oid, "$section.$subsection", $metadata_table_sth, $fulltxt_table_sth); 
     345    $self->recursive_write_meta_and_text($doc_obj, $doc_oid, "$section.$subsection"); 
    377346    } 
    378347}