Changeset 32573


Ignore:
Timestamp:
2018-11-05T19:24:47+13:00 (5 years ago)
Author:
ak19
Message:
  1. Caching the prepared SQL insert statements inside the gssql class rather than in the GS SQL Plugout. 2. docprint.pm provides an escape_textref function besides the existing escape_text function, to deal with large text (e.g. fulltxt). The old escape_text function has been rewritten to internally use the escape_textref function.
Location:
main/trunk/greenstone2/perllib
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/docprint.pm

    r32536 r32573  
    108108}
    109109
     110# pass by ref version: gets a ref to a string and returns ref to the modified string
     111# so use this for large strings (fulltext)
     112sub escape_textref {
     113    my ($textref) = @_;
     114    # special characters in the xml encoding
     115    $$textref =~ s/&&/& &/g;
     116    $$textref =~ s/&/&/g; # this has to be first...
     117    $$textref =~ s/</&lt;/g;
     118    $$textref =~ s/>/&gt;/g;
     119    $$textref =~ s/\"/&quot;/g;
     120
     121    return $textref;
     122}
     123
     124# pass by value - the behaviour of the escape_text method used so far is to pass copies of strings
    110125sub escape_text {
    111126    my ($text) = @_;
    112     # special characters in the xml encoding
    113     $text =~ s/&&/& &/g;
    114     $text =~ s/&/&amp;/g; # this has to be first...
    115     $text =~ s/</&lt;/g;
    116     $text =~ s/>/&gt;/g;
    117     $text =~ s/\"/&quot;/g;
    118 
    119     return $text;
     127    my $textref = &escape_textref(\$text);
     128    return $$textref;
    120129}
    121130
  • main/trunk/greenstone2/perllib/gssql.pm

    r32571 r32573  
    270270    # NO: "When all the data has been fetched from a SELECT statement, the driver will automatically call finish for you. So you should not call it explicitly except when you know that you've not fetched all the data from a statement handle and the handle won't be destroyed soon."
    271271   
    272     #$meta_sth = $self->{'metadata_prepared_insert_statement_handle'};
    273     #$txt_sth = $self->{'fulltxt_prepared_insert_statement_handle'};
    274     #$meta_sth->finish() if($meta_sth);
    275     #$txt_sth->finish() if($txt_sth);
    276 
    277272    print STDERR "Disconnecting from database\n" if($self->{'verbosity'} > 1);
    278273   
     
    372367# https://metacpan.org/pod/DBI#performance
    373368# 'The q{...} style quoting used in this example avoids clashing with quotes that may be used in the SQL statement. Use the double-quote like qq{...} operator if you want to interpolate variables into the string. See "Quote and Quote-like Operators" in perlop for more details.'
    374 sub prepare_insert_metadata_row_stmthandle {
    375     my $self = shift (@_);   
     369#
     370# This method uses lazy loading to prepare the SQL insert stmt once for a table and store it,
     371# then execute the (stored) statement each time it's needed for that table.
     372sub insert_row_into_metadata_table {
     373    my $self = shift (@_);
     374    my ($doc_oid, $section_name, $meta_name, $escaped_meta_value, $debug_only, $debug_out) = @_;
     375   
    376376    #my ($did, $sid, $metaname, $metavalue) = @_;
    377377    my $dbh = $self->{'db_handle'};
    378    
    379     my $tablename = $self->get_metadata_table_name();
    380 
    381     #my $stmt = "INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES ('$did', '$sid', '$metaname', '$metavalue');"; # ?, ?, ?, ?
    382 
    383     # using qq{} since we want $tablename placeholder to be filled in
    384     # returns Statement Handle object!
    385     my $sth = $dbh->prepare(qq{INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES (?, ?, ?, ?)}) || warn("Could not prepare insert statement for metadata table\n");
    386 
    387     print STDERR "@@@@ Prepared meta insert statement: ".$sth->{'Statement'}."\n" if($self->{'verbosity'} > 2);
    388    
    389     return $sth;
    390 }
    391 
    392 sub prepare_insert_fulltxt_row_stmthandle {
     378
     379    my $sth = $self->{"sth_insert_into_meta_table"};
     380    if(!$sth) {
     381    my $tablename = $self->get_metadata_table_name();
     382   
     383    #my $stmt = "INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES ('$did', '$sid', '$metaname', '$metavalue');"; # ?, ?, ?, ?
     384   
     385    # using qq{} since we want $tablename placeholder to be filled in
     386    # returns Statement Handle object!
     387    $sth = $dbh->prepare(qq{INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES (?, ?, ?, ?)}) || warn("Could not prepare insert statement for metadata table\n");
     388
     389    print STDERR "@@@@ Prepared meta insert statement: ".$sth->{'Statement'}."\n" if($self->{'verbosity'} > 2);
     390   
     391    $self->{"sth_insert_into_meta_table"} = $sth; # store it for future use
     392    }
     393
     394    # Now we're ready to execute the command, unless we're only debugging
     395   
     396    if($debug_only) {
     397    # just print the statement we were going to execute
     398   
     399    print $debug_out $sth->{'Statement'} . "($doc_oid, $section_name, $meta_name, $escaped_meta_value)\n";
     400    } else {
     401    $sth->execute($doc_oid, $section_name, $meta_name, $escaped_meta_value)
     402        || warn ("Unable to write metadata row to db:\n\tOID $doc_oid, section $section_name,\n\tmeta name: $meta_name, val: $escaped_meta_value");
     403    # Execution failure will print out info anyway: since db connection sets PrintError
     404    }
     405}
     406
     407# As above. Likewise uses lazy loading to prepare the SQL insert stmt once for a table and store it,
     408# then execute the (stored) statement each time it's needed for that table.
     409sub insert_row_into_fulltxt_table {
    393410    my $self = shift (@_);
    394411    #my ($did, $sid, $fulltext) = @_;
    395     my $dbh = $self->{'db_handle'};
    396    
    397     my $tablename = $self->get_fulltext_table_name();
    398 
    399     #my $stmt = "INSERT INTO $tablename (did, sid, fulltxt) VALUES ('$did', '$sid', '$fulltext');"; ?, ?, ?
    400 
    401     # using qq{} since we want $tablename placeholder to be filled in
    402     # returns Statement Handle object!
    403     my $sth = $dbh->prepare(qq{INSERT INTO $tablename (did, sid, fulltxt) VALUES (?, ?, ?)}) || warn("Could not prepare insert statement for fulltxt table\n");
    404    
    405     print STDERR "@@@@ Prepared fulltext insert statement: ".$sth->{'Statement'}."\n" if($self->{'verbosity'} > 2);
    406    
    407     return $sth;
     412    my ($doc_oid, $section_name, $section_textref, $debug_only, $debug_out) = @_;
     413   
     414    my $dbh = $self->{'db_handle'};
     415   
     416    my $sth = $self->{"sth_insert_into_fulltxt_table"};
     417    if(!$sth) {
     418    my $tablename = $self->get_fulltext_table_name();
     419
     420    #my $stmt = "INSERT INTO $tablename (did, sid, fulltxt) VALUES ('$did', '$sid', '$fulltext');"; ?, ?, ?
     421   
     422    # using qq{} since we want $tablename placeholder to be filled in
     423    # returns Statement Handle object!
     424    $sth = $dbh->prepare(qq{INSERT INTO $tablename (did, sid, fulltxt) VALUES (?, ?, ?)}) || warn("Could not prepare insert statement for fulltxt table\n");
     425   
     426    print STDERR "@@@@ Prepared fulltext insert statement: ".$sth->{'Statement'}."\n" if($self->{'verbosity'} > 2);
     427
     428    $self->{"sth_insert_into_fulltxt_table"} = $sth; # store it for future use
     429    }
     430
     431    # Now we're ready to execute the command, unless we're only debugging
     432
     433    if($debug_only) {
     434    # just print the statement we were going to execute, minus the fulltxt value
     435    my $txt_repr = $$section_textref ? "<TXT>" : "NULL";
     436    print $debug_out $sth->{'Statement'} . "($doc_oid, $section_name, $txt_repr)\n";
     437    } else {
     438    $sth->execute($doc_oid, $section_name, $$section_textref)
     439        || warn ("Unable to write fulltxt row to db for row:\n\tOID $doc_oid, section $section_name");
     440    }
    408441}
    409442
  • main/trunk/greenstone2/perllib/plugouts/GreenstoneSQLPlugout.pm

    r32563 r32573  
    177177    $success = $gs_sql->ensure_fulltxt_table_exists();
    178178    }
    179     #if(!$gs_sql->load_db_and_tables($db_name, $proc_mode)) {
     179   
    180180    if(!$success) {
    181181    # This is fatal for the plugout, let's terminate here after disconnecting again
     
    186186    }
    187187
    188     # prepare the shared/common HANDLES to SQL insert statements that contain placeholders
    189     # and which we will reuse repeatedly when actually executing the insert statements
    190    
    191     if($proc_mode eq "all" || $proc_mode eq "meta_only" ) {
    192     $self->{'metadata_prepared_insert_statement_handle'} = $gs_sql->prepare_insert_metadata_row_stmthandle();
    193     }
    194     if($proc_mode eq "all" || $proc_mode eq "text_only" ) {
    195     $self->{'fulltxt_prepared_insert_statement_handle'} = $gs_sql->prepare_insert_fulltxt_row_stmthandle();
    196     }
    197 
    198188    # store the DBI wrapper instance
    199189    $self->{'gs_sql'} = $gs_sql;
    200190   
    201     ##print STDERR "#### Meta stmt: " . $self->{'metadata_prepared_insert_statement_handle'}->{'Statement'} . "\n";
    202     ##print STDERR "#### Full stmt: " . $self->{'fulltxt_prepared_insert_statement_handle'}->{'Statement'} . "\n";
    203191   
    204192    # if setting up to work with sql db failed, we'd have terminated and wouldn't come up to here:
     
    300288    my $root_section = $doc_obj->get_top_section();
    301289
    302     # load the prepared INSERT statement handles for both tables (can be undef for any table depending on whether meta_only or txt_only are set)
    303     my $metadata_table_sth = $self->{'metadata_prepared_insert_statement_handle'};
    304     my $fulltxt_table_sth = $self->{'fulltxt_prepared_insert_statement_handle'};
    305    
    306     $self->recursive_write_meta_and_text($doc_obj, $doc_oid, $root_section, $metadata_table_sth, $fulltxt_table_sth);
     290    $self->recursive_write_meta_and_text($doc_obj, $doc_oid, $root_section);
    307291}
    308292
     
    311295sub recursive_write_meta_and_text {
    312296    my $self = shift (@_);
    313     my ($doc_obj, $doc_oid, $section, $metadata_table_sth, $fulltxt_table_sth) = @_;   
     297    my ($doc_obj, $doc_oid, $section) = @_;   
    314298
    315299    # If section=ROOT, write "root" as section name into table
     
    321305
    322306    my $debug_out = $self->{'debug_outhandle'};
    323 #    print STDERR "#### Meta stmt: " . $metadata_table_sth->{'Statement'} . "\n";
    324 #    print STDERR "#### Full stmt: " . $fulltxt_table_sth->{'Statement'} . "\n";
    325    
    326     #my $proc_mode = $self->{'process_mode'};
    327     #if($proc_mode eq "all" || $proc_mode eq "meta_only" ) {
    328     if($metadata_table_sth) { # meta insert statement handle will be undef if not writing meta
     307
     308    my $gs_sql = $self->{'gs_sql'};
     309    my $proc_mode = $self->{'process_mode'};
     310    if($proc_mode eq "all" || $proc_mode eq "meta_only" ) {
    329311   
    330312    foreach my $data (@{$section_ptr->{'metadata'}}) {
     
    336318        # Write out the current section's meta to collection db's METADATA table       
    337319       
    338         # for each set of values to write to meta table, execute the prepared statement, filling in the values
    339 
    340         if($self->{'debug'}) {
    341         # just print the statement we were going to execute
    342 
    343         print $debug_out $metadata_table_sth->{'Statement'} . "($doc_oid, $section_name, $meta_name, $escaped_meta_value)\n";
     320        # For each set of values to write to meta table, this next method call will
     321        # efficiently execute an insert SQL statement (using a prepared insert statement),
     322        # filling in the values
     323        # OR if debugging, then it will print the SQL insert statement but not execute it
     324
     325        $gs_sql->insert_row_into_metadata_table($doc_oid, $section_name, $meta_name, $escaped_meta_value, $self->{'debug'}, $debug_out);       
     326
    344327        }
    345         else {
    346        
    347         $metadata_table_sth->execute($doc_oid, $section_name, $meta_name, $escaped_meta_value)
    348             || warn ("Unable to write metadata row to db:\n\tOID $doc_oid, section $section_name,\n\tmeta name: $meta_name, val: $escaped_meta_value");
    349         # Execution failure will print out info anyway: since db connection sets PrintError
    350         }
    351     }
    352     }
    353    
    354     #if($proc_mode eq "all" || $proc_mode eq "text_only" ) {
    355     if($fulltxt_table_sth) { # fulltxt insert statement handle will be undef if not writing fulltxt
    356 
    357     if($self->{'debug'}) {
    358         # just print the statement we were going to execute, minus the fulltxt value
    359         my $txt_repr = $section_ptr->{'text'} ? "<TXT>" : "NULL";
    360         print $debug_out $fulltxt_table_sth->{'Statement'} . "($doc_oid, $section_name, $txt_repr)\n";
    361     } else {
    362         my $section_text = &docprint::escape_text($section_ptr->{'text'});
    363        
    364         # fulltxt column can be SQL NULL. undef value gets written out as NULL:
    365         # https://stackoverflow.com/questions/12708633/which-one-represents-null-undef-or-empty-string
    366        
    367         # Write out the current section's text to collection db's FULLTeXT table
    368         $fulltxt_table_sth->execute($doc_oid, $section_name, $section_text)
    369         || warn ("Unable to write fulltxt row to db for row:\n\tOID $doc_oid, section $section_name");
    370         # Execution failure will print out info anyway: since db connection sets PrintError
    371     }
     328    }
     329   
     330   
     331    if($proc_mode eq "all" || $proc_mode eq "text_only" ) {
     332   
     333    my $section_textref = &docprint::escape_textref(\$section_ptr->{'text'});
     334   
     335    # fulltxt column can be SQL NULL. undef value gets written out as NULL:
     336    # https://stackoverflow.com/questions/12708633/which-one-represents-null-undef-or-empty-string
     337    # The following will do the SQL insertion
     338    # or if debug, the following will print the SQL insert stmt without executing it
     339    $gs_sql->insert_row_into_fulltxt_table($doc_oid, $section_name, $section_textref, $self->{'debug'}, $debug_out);
     340   
    372341    }
    373342   
    374343    # output all subsections: RECURSIVE CALL
    375344    foreach my $subsection (@{$section_ptr->{'subsection_order'}}) {
    376     $self->recursive_write_meta_and_text($doc_obj, $doc_oid, "$section.$subsection", $metadata_table_sth, $fulltxt_table_sth);
     345    $self->recursive_write_meta_and_text($doc_obj, $doc_oid, "$section.$subsection");
    377346    }
    378347}
Note: See TracChangeset for help on using the changeset viewer.