Changeset 32563
- Timestamp:
- 2018-11-02T19:07:16+13:00 (5 years ago)
- Location:
- main/trunk/greenstone2/perllib
- Files:
-
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/gssql.pm
r32561 r32563 44 44 45 45 # Parameterise (one or more methods may use them): 46 # - build_mode (like removeold)47 46 # - db_name (which is the GS3 sitename) 48 47 … … 134 133 print STDERR "Away to make connection to $db_driver database with:\n"; 135 134 print STDERR " - hostname $db_host; username: $db_user"; 136 print STDERR "; and the p assword provided" if $db_pwd;135 print STDERR "; and the provided password" if $db_pwd; 137 136 print STDERR "\nAssuming the mysql server has been started with: --character_set_server=utf8mb4\n" if $db_driver eq "mysql"; 138 137 } … … 188 187 } 189 188 190 # will attempt to load the specified db and the <coll>_metadata and <coll>_fulltxt for this 191 # collection, or create any of these (db, tables) that don't yet exist. At the end 192 # it will have loaded the requested database (in MySQL: "use <db>;") 193 sub load_db_and_tables { 194 my $self= shift (@_); 195 my ($db_name, $build_mode) = @_; 189 # Load the designated database, i.e. 'use <dbname>;'. 190 # If the database doesn't yet exist, creates it and loads it. 191 # (Don't create the collection's tables yet, though) 192 # At the end it will have loaded the requested database (in MySQL: "use <db>;") on success. 193 # As usual, returns success or failure value that can be evaluated in a boolean context. 194 sub use_db { 195 my $self= shift (@_); 196 my ($db_name) = @_; 196 197 my $dbh = $self->{'db_handle'}; 197 198 $db_name = $self->sanitize_name($db_name); … … 215 216 # once more attempt to use db, now that it exists 216 217 $dbh->do("use $db_name") || return 0; 217 #$dbh->do("use localsite") or die "Error (code" . $dbh->err ."): " . $dbh->errstr . "\n"; 218 219 # attempt to create tables in current db 220 $self->create_metadata_table() || return 0; 221 $self->create_fulltext_table() || return 0; 218 #$dbh->do("use $db_name") or die "Error (code" . $dbh->err ."): " . $dbh->errstr . "\n"; 222 219 223 220 $success = 1; … … 227 224 228 225 print STDERR "@@@ DATABASE $db_name EXISTED\n" if($self->{'verbosity'} > 2); 229 230 231 # build_mode can be removeold or incremental. We only do something special on removeold:232 # deleting the existing tables for this collection and recreating empty ones233 if($build_mode eq "removeold") {234 print STDERR " Building with removeold option set, so deleting current collection's tables if they exist\n" if($self->{'verbosity'});235 $self->delete_collection_tables();236 }237 238 # use existing tables if any239 # attempt to create tables in current db240 if($build_mode eq "removeold" || !$self->table_exists($self->get_metadata_table_name())) {241 $self->create_metadata_table() || return 0;242 } else {243 print STDERR "@@@ Meta table exists\n" if($self->{'verbosity'} > 2);244 }245 if($build_mode eq "removeold" || !$self->table_exists($self->get_fulltext_table_name())) {246 $self->create_fulltext_table() || return 0;247 } else {248 print STDERR "@@@ Fulltxt table exists\n" if($self->{'verbosity'} > 2);249 }250 251 226 } 252 227 … … 254 229 } 255 230 256 # GreenstoneSQLPlugin calls this method to load an existing db. 257 # This will terminate if the db does not exist. Unlike load_db_and_tables() above, used by 258 # GreenstoneSQLPlugout, this method will not attempt to create the requested db (nor its tables) 259 # TODO: GS SQLPlugin is called before GS SQLPlugout and attempts to use_db() - called in plugin's 260 # init() method. This will fail if the db does not exist. Ideally want the gssqlplugin only called 261 # during buildcol.pl 262 sub use_db { 263 my $self= shift (@_); 264 my ($db_name) = @_; 265 my $dbh = $self->{'db_handle'}; 266 $db_name = $self->sanitize_name($db_name); 267 268 269 print STDERR "Loading database $db_name\n" if($self->{'verbosity'} > 1); 270 271 # perl DBI switch database: https://www.perlmonks.org/?node_id=995434 272 # do() returns undef on error. 273 # connection succeeded, try to load our database. If that didn't work, attempt to create db 274 return $dbh->do("use $db_name") || warn(); 231 # We should already have done "use <database>;" if this gets called. 232 # Just load this collection's metatable 233 sub ensure_meta_table_exists { 234 my $self = shift (@_); 235 236 my $tablename = $self->get_metadata_table_name(); 237 if(!$self->table_exists($tablename)) { 238 #print STDERR " Creating metadata table $tablename\n" if($self->{'verbosity'} > 1); 239 $self->create_metadata_table() || return 0; 240 } else { 241 print STDERR "@@@ Meta table exists\n" if($self->{'verbosity'} > 2); 242 } 243 return 1; 244 } 245 246 # We should already have done "use <database>;" if this gets called. 247 # Just load this collection's metatable 248 sub ensure_fulltxt_table_exists { 249 my $self = shift (@_); 250 251 my $tablename = $self->get_fulltext_table_name(); 252 if(!$self->table_exists($tablename)) { 253 #print STDERR " Creating fulltxt table $tablename\n" if($self->{'verbosity'} > 1); 254 $self->create_fulltext_table() || return 0; 255 } else { 256 print STDERR "@@@ Fulltxt table exists\n" if($self->{'verbosity'} > 2); 257 } 258 return 1; 275 259 } 276 260 -
main/trunk/greenstone2/perllib/inexport.pm
r32555 r32563 690 690 } 691 691 my $processor = &plugout::load_plugout($plugout); 692 $processor->set_incremental_options($removeold, $keepold, $incremental, $incremental_mode);693 692 $processor->setoutputdir ($archivedir); 694 693 $processor->set_sortmeta ($sortmeta, $removeprefix, $removesuffix) if defined $sortmeta; -
main/trunk/greenstone2/perllib/plugins/GreenstoneSQLPlugin.pm
r32560 r32563 40 40 # TODO: 41 41 # - Run TODOs here, in Plugout and in gssql.pm by Dr Bainbridge. 42 # - "Courier" demo documents in lucene-sql collection: character (degree symbol) not preserved in title. Is this because we encode in utf8 when putting into db and reading back in?43 # Test doc with meta and text like macron in Maori text.44 42 # - Have not yet tested writing out just meta or just fulltxt to sql db and reading just that 45 43 # back in from the sql db while the remainder is to be read back in from the docsql .xml files. … … 47 45 # TODO: deal with incremental vs removeold. If docs removed from import folder, then import step 48 46 # won't delete it from archives but buildcol step will. Need to implement this with this database plugin or wherever the actual flow is 49 50 # TODO: Add public instructions on using this plugin and its plugout: start with installing mysql binary, changing pwd, running the server (and the client against it for checking, basic cmds like create and drop). Then discuss db name, table names (per coll), db cols and col types, and how the plugout and plugin work.51 # Discuss the plugin/plugout parameters.52 53 # + NOTTODO: when db is not running GLI is paralyzed -> can we set timeout on DBI connection attempt?54 # NOT A PROBLEM: Tested to find DBI connection attempt fails immediately when MySQL server not55 # running. The GLI "paralyzing" incident last time was not because of the gs sql connection code,56 # but because my computer was freezing on-and-off.57 47 58 48 # TODO Q: is "reindex" = del from db + add to db? … … 63 53 # be reimplemented in GSSQLPlugin to support the adding portion of reindexing. 64 54 65 66 # TODO Q: During import, the GS SQL Plugin is called before the GS SQL Plugout with undesirable side67 # effect that if the db doesn't exist, gssql::use_db() fails, as it won't create db. 68 69 55 # TODO: Add public instructions on using this plugin and its plugout: start with installing mysql binary, changing pwd, running the server (and the client against it for checking: basic cmds like create and drop). Then discuss db name, table names (per coll), db cols and col types, and how the plugout and plugin work. 56 # Discuss the plugin/plugout parameters. 57 58 59 # DONE: 70 60 # + TODO: Incremental delete can't work until GSSQLPlugout has implemented build_mode = incremental 71 61 # (instead of tossing away db on every build) … … 79 69 # (sections) which in this case had made it easy to reconstruct the doc_obj in memory in the correct order. 80 70 # YES: Otherwise for later db types (drivers), can set order by primary key column and then order by did column 71 # + NOTTODO: when db is not running GLI is paralyzed -> can we set timeout on DBI connection attempt? 72 # NOT A PROBLEM: Tested to find DBI connection attempt fails immediately when MySQL server not 73 # running. The GLI "paralyzing" incident last time was not because of the gs sql connection code, 74 # but because my computer was freezing on-and-off. 75 # + "Courier" demo documents in lucene-sql collection: character (degree symbol) not preserved in title. Is this because we encode in utf8 when putting into db and reading back in? 76 # Test doc with meta and text like macron in Maori text. 77 # + TODO Q: During import, the GS SQL Plugin is called before the GS SQL Plugout with undesirable side 78 # effect that if the db doesn't exist, gssql::use_db() fails, as it won't create db. 79 # This got fixed when GSSQLPlugin stopped connecting on init(). 81 80 82 81 … … 177 176 178 177 # do anything else that needs to be done here when not pluginfo 179 #$self->{'delete_docids'} = (); # list of doc oids to delete during deinit()180 178 181 179 return $self; 182 180 } 183 181 184 sub xml_start_tag { 185 my $self = shift(@_); 186 my ($expat, $element) = @_; 187 188 my $outhandle = $self->{'outhandle'}; 189 190 $self->{'element'} = $element; 191 if ($element eq "Archive") { # docsql.xml files contain a OID attribute on Archive element 192 # the element's attributes are in %_ as per ReadXMLFile::xml_start_tag() (while $_ 193 # contains the tag) 194 195 # Don't access %_{'docoid'} directly: keep getting a warning message to 196 # use $_{'docoid'} for scalar contexts, but %_ is the element's attr hashmap 197 # whereas $_ has the tag info. So we don't want to do $_{'docoid'}. 198 my %attr_hash = %_; # right way, see OAIPlugin.pm 199 $self->{'doc_oid'} = $attr_hash{'docoid'}; 200 print STDERR "XXXXXXXXXXXXXX in SQLPlugin::xml_start_tag()\n"; 201 print $outhandle "Extracted OID from docsql.xml: ".$self->{'doc_oid'}."\n" 202 if $self->{'verbosity'} > 2; 203 204 } 205 else { # let superclass GreenstoneXMLPlugin continue to process <Section> and <Metadata> elements 206 $self->SUPER::xml_start_tag(@_); 207 } 208 } 209 210 # TODO Q: Why are there 4 passes when we're only indexing at doc and section level (2 passes)? What's the dummy pass, why is there a pass for infodb? 211 212 # At the end of superclass GreenstoneXMLPlugin.pm's close_document() method, 213 # the doc_obj in memory is processed (indexed) and then made undef. 214 # So we have to work with doc_obj before superclass close_document() is finished. 215 sub close_document { 216 my $self = shift(@_); 217 218 print STDERR "XXXXXXXXX in SQLPlugin::close_doc()\n"; 219 220 my $gs_sql = $self->get_gssql_instance(); 221 222 my $outhandle = $self->{'outhandle'}; 223 my $doc_obj = $self->{'doc_obj'}; 224 225 my $build_proc_mode = $self->{'processor'}->get_mode(); # can be "text" as per basebuildproc or infodb 226 my $oid = $self->{'doc_oid'}; # we stored current doc's OID during sub xml_start_tag() 182 # This is called once if removeold is set with import.pl. Most plugins will do 183 # nothing but if a plugin does any stuff outside of creating doc obj, then 184 # it may need to clear something. 185 # In the case of GreenstoneSQL plugs: this is the first time we have a chance 186 # to purge the tables of the current collection from the current site's database 187 sub remove_all { 188 my $self = shift (@_); 189 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_; 190 191 print STDERR " Building with removeold option set, so deleting current collection's tables if they exist\n" if($self->{'verbosity'}); 192 193 # if we're in here, we'd already have run 'use database <site_name>;' during sub init() 194 # so we can go ahead and delete the collection's tables 195 my $gs_sql = $self->{'gs_sql'}; 196 $gs_sql->delete_collection_tables(); # will delete them if they exist 197 198 # and recreate tables? No. Tables' existence is ensured in GreenstoneSQLPlugout::begin() 227 199 my $proc_mode = $self->{'process_mode'}; 228 229 print $outhandle "++++ OID of document (meta|text) to be del or read in from DB: ".$self->{'doc_oid'}."\n" 230 if $self->{'verbosity'} > 2; 231 232 # For now, we have access to doc_obj (until just before super::close_document() terminates) 233 234 # no need to call $self->{'doc_obj'}->set_OID($oid); 235 # because either the OID is stored in the SQL db as meta 'Identifier' alongside other metadata 236 # or it's stored in the doc.xml as metadata 'Identifier' alongside other metadata 237 # Either way, Identifier meta will be read into the docobj automatically with other meta. 238 239 if ($self->{'verbosity'} > 2) { 240 print STDERR "+++++++++++ buildproc_mode: $build_proc_mode\n"; 241 print STDERR "+++++++++++ SQLPlug proc_mode: $proc_mode\n"; 242 } 243 244 # TODO: where does reindexing take place, GreenstoneSQL -Plugout or -Plugin? 245 #if($build_proc_mode =~ m/(delete|reindex)$/) { # doc denoted by current OID has been marked for deletion or reindexing (=delete + add) 246 if($build_proc_mode =~ m/(delete)$/) { # doc denoted by current OID has been marked for deletion or reindexing (=delete + add) 247 248 # build_proc_mode could be "(infodb|text)(delete|reindex)" 249 # "...delete" or "...reindex" as per ArchivesInfPlugin 250 251 print STDERR "@@@@ DELETING DOC FROM SQL DB\n"; 252 200 if($proc_mode ne "text_only") { 201 $gs_sql->ensure_meta_table_exists(); 202 } 203 if($proc_mode ne "meta_only") { 204 $gs_sql->ensure_fulltxt_table_exists(); 205 } 206 } 207 208 # This is called per document for docs that have been deleted from the 209 # collection. Most plugins will do nothing 210 # but if a plugin does any stuff outside of creating doc obj, then it may need 211 # to clear something. 212 # remove the doc(s) denoted by oids from GS SQL db 213 # This takes care of incremental deletes (docs marked D by ArchivesInfPlugin when building 214 # incrementally) as well as cases of "Non-icremental Delete", see ArchivesInfPlugin.pm 215 sub remove_one { 216 my $self = shift (@_); 217 218 my ($file, $oids, $archivedir) = @_; 219 220 print STDERR "@@@ IN SQLPLUG::REMOVE_ONE: $file\n"; 221 222 #return undef unless $self->can_process_this_file($file); # NO, DON'T DO THIS: 223 # WE DON'T CARE IF IT'S AN IMAGE FILE THAT WAS DELETED. 224 # WE CARE ABOUT REMOVING THE DOCOID OF THAT IMAGE FILE FROM THE DB 225 226 print STDERR "*****************************\nAsked to remove_one oid\n***********************\n"; 227 228 my $gs_sql = $self->{'gs_sql'} || return 0; # couldn't make the connection or no db etc 229 if(scalar @$oids > 1) { 230 print STDERR "TODO: We now have reason to optimise GreenstoneSQLPlugin::remove_one() by using prepare and execute.\n"; 231 } 232 233 my $proc_mode = $self->{'process_mode'}; 234 foreach my $oid (@$oids) { 253 235 if($proc_mode eq "all" || $proc_mode eq "meta_only") { 254 236 print STDERR "@@@@@@@@ Deleting $oid from meta table\n" if $self->{'verbosity'} > 2; … … 259 241 $gs_sql->delete_recs_from_texttable_with_docid($oid); 260 242 } 261 262 # If we're reindexing the current doc, we will we want to continue: which 263 # will add this doc ID back into the db with the new meta/full txt values 264 # But if we're deleting, then we're done processing the document, so set doc_oid to undef 265 # to prevent adding it back into db 266 #undef $self->{'doc_oid'} if($build_proc_mode =~ m/delete$/); 267 268 } # done deleting doc from SQL db 269 270 else {#if($self->{'doc_oid'}) { # if loading doc from SQL db 271 print STDERR "@@@@ LOADING DOC FROM SQL DB\n"; 272 273 if($proc_mode eq "all" || $proc_mode eq "meta_only") { 274 # read in meta for the collection (i.e. select * from <col>_metadata table 243 } 244 return 1; 245 } 246 247 248 sub xml_start_tag { 249 my $self = shift(@_); 250 my ($expat, $element) = @_; 251 252 my $outhandle = $self->{'outhandle'}; 253 254 $self->{'element'} = $element; 255 if ($element eq "Archive") { # docsql.xml files contain a OID attribute on Archive element 256 # the element's attributes are in %_ as per ReadXMLFile::xml_start_tag() (while $_ 257 # contains the tag) 258 259 # Don't access %_{'docoid'} directly: keep getting a warning message to 260 # use $_{'docoid'} for scalar contexts, but %_ is the element's attr hashmap 261 # whereas $_ has the tag info. So we don't want to do $_{'docoid'}. 262 my %attr_hash = %_; # right way, see OAIPlugin.pm 263 $self->{'doc_oid'} = $attr_hash{'docoid'}; 264 ##print STDERR "XXXXXXXXXXXXXX in SQLPlugin::xml_start_tag()\n"; 265 print $outhandle "Extracted OID from docsql.xml: ".$self->{'doc_oid'}."\n" 266 if $self->{'verbosity'} > 2; 267 268 } 269 else { # let superclass GreenstoneXMLPlugin continue to process <Section> and <Metadata> elements 270 $self->SUPER::xml_start_tag(@_); 271 } 272 } 273 274 # TODO Q: Why are there 4 passes when we're only indexing at doc and section level (2 passes)? What's the dummy pass, why is there a pass for infodb? 275 276 # We should only ever get here during the buildcol.pl phase 277 # At the end of superclass GreenstoneXMLPlugin.pm's close_document() method, 278 # the doc_obj in memory is processed (indexed) and then made undef. 279 # So we have to work with doc_obj before superclass close_document() is finished. 280 sub close_document { 281 my $self = shift(@_); 282 283 ##print STDERR "XXXXXXXXX in SQLPlugin::close_doc()\n"; 284 285 my $gs_sql = $self->{'gs_sql'}; 286 287 my $outhandle = $self->{'outhandle'}; 288 my $doc_obj = $self->{'doc_obj'}; 289 290 my $oid = $self->{'doc_oid'}; # we stored current doc's OID during sub xml_start_tag() 291 my $proc_mode = $self->{'process_mode'}; 292 293 # For now, we have access to doc_obj (until just before super::close_document() terminates) 294 295 # no need to call $self->{'doc_obj'}->set_OID($oid); 296 # because either the OID is stored in the SQL db as meta 'Identifier' alongside other metadata 297 # or it's stored in the doc.xml as metadata 'Identifier' alongside other metadata 298 # Either way, Identifier meta will be read into the docobj automatically with other meta. 299 300 print STDERR " GreenstoneSQLPlugin processing doc $oid (reading into docobj from SQL db)\n" 301 if $self->{'verbosity'} > 0; 302 303 if($proc_mode eq "all" || $proc_mode eq "meta_only") { 304 # read in meta for the collection (i.e. select * from <col>_metadata table 305 306 my $sth = $gs_sql->select_from_metatable_matching_docid($oid); 307 print $outhandle "### SQL select stmt: ".$sth->{'Statement'}."\n" 308 if $self->{'verbosity'} > 2; 309 310 print $outhandle "----------SQL DB contains meta-----------\n" if $self->{'verbosity'} > 2; 311 # https://www.effectiveperlprogramming.com/2010/07/set-custom-dbi-error-handlers/ 312 while( my @row = $sth->fetchrow_array() ) { 313 #print $outhandle "row: @row\n"; 314 my ($primary_key, $did, $sid, $metaname, $metaval) = @row; 275 315 276 my $sth = $gs_sql->select_from_metatable_matching_docid($oid); 277 print $outhandle "### SQL select stmt: ".$sth->{'Statement'}."\n" 316 # get rid of the artificial "root" introduced in section id when saving to sql db 317 $sid =~ s@^root@@; 318 $sid = $doc_obj->get_top_section() unless $sid; 319 print $outhandle "### did: $did, sid: |$sid|, meta: $metaname, val: $metaval\n" 278 320 if $self->{'verbosity'} > 2; 279 321 280 print $outhandle "----------SQL DB contains meta-----------\n" if $self->{'verbosity'} > 2; 281 # https://www.effectiveperlprogramming.com/2010/07/set-custom-dbi-error-handlers/ 282 while( my @row = $sth->fetchrow_array() ) { 283 #print $outhandle "row: @row\n"; 284 my ($primary_key, $did, $sid, $metaname, $metaval) = @row; 322 # TODO: we accessed the db in utf8 mode, so, we can call doc_obj->add_utf8_meta directly: 323 $doc_obj->add_utf8_metadata($sid, $metaname, &docprint::unescape_text($metaval)); 324 } 325 print $outhandle "----------FIN READING DOC's META FROM SQL DB------------\n" 326 if $self->{'verbosity'} > 2; 327 } 328 329 if($proc_mode eq "all" || $proc_mode eq "text_only") { 330 # read in fulltxt for the collection (i.e. select * from <col>_fulltxt table 331 332 my $fulltxt_table = $gs_sql->get_fulltext_table_name(); 333 334 335 my $sth = $gs_sql->select_from_texttable_matching_docid($oid); 336 print $outhandle "### stmt: ".$sth->{'Statement'}."\n" if $self->{'verbosity'} > 2; 337 338 print $outhandle "----------\nSQL DB contains txt entries for-----------\n" 339 if $self->{'verbosity'} > 2; 340 while( my ($primary_key, $did, $sid, $text) = $sth->fetchrow_array() ) { 285 341 286 # get rid of the artificial "root" introduced in section id when saving to sql db 287 $sid =~ s@^root@@; 288 $sid = $doc_obj->get_top_section() unless $sid; 289 print $outhandle "### did: $did, sid: |$sid|, meta: $metaname, val: $metaval\n" 290 if $self->{'verbosity'} > 2; 291 292 # TODO: we accessed the db in utf8 mode, so, we can call doc_obj->add_utf8_meta directly: 293 $doc_obj->add_utf8_metadata($sid, $metaname, &docprint::unescape_text($metaval)); 294 } 295 print $outhandle "----------FIN READING DOC's META FROM SQL DB------------\n" 342 # get rid of the artificial "root" introduced in section id when saving to sql db 343 #$sid =~ s@^root@@; 344 $sid = $doc_obj->get_top_section() if ($sid eq "root"); 345 print $outhandle "### did: $did, sid: |$sid|, fulltext: <TXT>\n" 296 346 if $self->{'verbosity'} > 2; 297 }298 299 if($proc_mode eq "all" || $proc_mode eq "text_only") {300 # read in fulltxt for the collection (i.e. select * from <col>_fulltxt table301 347 302 my $fulltxt_table = $gs_sql->get_fulltext_table_name(); 303 304 305 my $sth = $gs_sql->select_from_texttable_matching_docid($oid); 306 print $outhandle "### stmt: ".$sth->{'Statement'}."\n" if $self->{'verbosity'} > 2; 307 308 print $outhandle "----------\nSQL DB contains txt entries for-----------\n" 309 if $self->{'verbosity'} > 2; 310 while( my ($primary_key, $did, $sid, $text) = $sth->fetchrow_array() ) { 311 312 # get rid of the artificial "root" introduced in section id when saving to sql db 313 #$sid =~ s@^root@@; 314 $sid = $doc_obj->get_top_section() if ($sid eq "root"); 315 print $outhandle "### did: $did, sid: |$sid|, fulltext: <TXT>\n" 316 if $self->{'verbosity'} > 2; 317 318 # TODO - pass by ref? 319 # TODO: we accessed the db in utf8 mode, so, we can call doc_obj->add_utf8_text directly: 320 $doc_obj->add_utf8_text($sid, &docprint::unescape_text($text)); 321 } 322 print $outhandle "----------FIN READING DOC's TXT FROM SQL DB------------\n" 323 if $self->{'verbosity'} > 2; 324 } 325 326 } # done reading into docobj from SQL db 348 # TODO - pass by ref? 349 # TODO: we accessed the db in utf8 mode, so, we can call doc_obj->add_utf8_text directly: 350 $doc_obj->add_utf8_text($sid, &docprint::unescape_text($text)); 351 } 352 print $outhandle "----------FIN READING DOC's TXT FROM SQL DB------------\n" 353 if $self->{'verbosity'} > 2; 354 } 355 356 # done reading into docobj from SQL db 327 357 328 358 # don't forget to clean up on close() in superclass … … 332 362 333 363 334 # We want SQLPlugin to connect to db only during buildcol.pl phase, not during import.pl 335 # This works out okay, as close_document() (called by read()) is only invoked during buildcol.pl 336 # 337 # Further, we want a single db connection for the GS SQL Plugin to be used for 338 # the multiple plugin passes: for "dummy" pass, and for doc level and for section level indexing 339 # By calling the lazy loading get_sql_instance() from close_document(), 340 # we connect to the SQL database once per GSSQLPlugin and only during the buildcol phase. 341 # 342 # get_gssql_instance() is a lazy loading method that returns singleton db connection for a GreenstoneSQLPlugin object. ("Code pattern" get instance vs singleton.) 343 # One instance of db connection that can be used for all the many doc_objects processed by this plugin 344 # 345 # Except in methods get_gssql_instance() and deinit(), don't access self->{'_gs_sql'} directly. 346 # Instead, call method get_gssql_instance() and store return value in a local variable, my $gs_sql 347 # 348 sub get_gssql_instance 349 { 350 my $self = shift(@_); 351 352 # if we failed to successfully connect once before, don't bother attempting to connect again 353 #return undef if(defined $self->{'failed'}); # plugin/process would have terminated with die() 354 # if we couldn't succeed connecting on any connection attempt 355 356 return $self->{'_gs_sql'} if($self->{'_gs_sql'}); 357 358 # assume we'll fail to connect 359 $self->{'failed'} = 1; 360 361 print STDERR "@@@@@@@@@@ LAZY CONNECT CALLED\n"; 362 364 # TODO: only want to work with sql db if buildcol.pl. Unfortunately, also runs on import.pl. 365 # During import, the GS SQL Plugin is called before the GS SQL Plugout with undesirable side 366 # effect that if the db doesn't exist, gssql::use_db() fails, as it won't create db. 367 368 # GS SQL Plugin::init() (and deinit()) is called by import.pl and also by buildcol.pl 369 # This means it connects and deconnects during import.pl as well. This is okay 370 # as removeold, which should drop the collection tables, happens during the import phase 371 # and therefore also requires a db connection. 372 # TODO: Eventually can try moving get_gssql_instance into gssql.pm? That way both GS SQL Plugin 373 # and Plugout would be using one connection during import.pl phase when both plugs exist. 374 375 # Call init() not begin() because there can be multiple plugin passes and begin() called for 376 # each pass (one for doc level and another for section level indexing), whereas init() should 377 # be called before any and all passes. 378 # This way, we can connect to the SQL database once per buildcol run. 379 sub init { 380 my ($self) = shift (@_); 381 ##print STDERR "@@@@@@@@@@ INIT CALLED\n"; 382 383 $self->SUPER::init(@_); # super (GreenstoneXMLPlugin) will not yet be trying to read from doc.xml (docsql .xml) files in init(). 384 363 385 #################### 364 386 # print "@@@ SITE NAME: ". $self->{'site_name'} . "\n" if defined $self->{'site_name'}; … … 393 415 394 416 my $db_name = $self->{'site_name'} || "greenstone2"; # one database per GS3 site, for GS2 the db is called greenstone2 395 #my $build_mode = $self->{'build_mode'} || "removeold"; 396 397 # the db and its tables should exist. Attempt to use the db:417 418 # Attempt to use the db, create it if it doesn't exist (but don't create the tables yet) 419 # Bail if we can't use the database 398 420 if(!$gs_sql->use_db($db_name)) { 399 421 … … 404 426 die("Could not use db $db_name. Can't proceed.\n"); 405 427 } 406 407 #undef $self->{'failed'}; 428 408 429 409 430 # store db handle now that we're connected 410 $self->{'_gs_sql'} = $gs_sql; 411 return $gs_sql; 412 413 } 431 $self->{'gs_sql'} = $gs_sql; 432 } 433 414 434 415 435 # This method also runs on import.pl if gs_sql has a value. But we just want to run it on buildcol … … 421 441 my ($self) = shift (@_); 422 442 423 print STDERR "@@@@@@@@@@ GreenstoneSQLPlugin::DEINIT CALLED\n";424 425 if($self->{' _gs_sql'}) { # only want to work with sql db if buildcol.pl, gs_sql won't have443 ##print STDERR "@@@@@@@@@@ GreenstoneSQLPlugin::DEINIT CALLED\n"; 444 445 if($self->{'gs_sql'}) { # only want to work with sql db if buildcol.pl, gs_sql won't have 426 446 # a value except during buildcol, so when processor =~ m/buildproc$/. 427 $self->{' _gs_sql'}->disconnect_from_db()447 $self->{'gs_sql'}->disconnect_from_db() 428 448 || warn("Unable to disconnect from database " . $self->{'site_name'} . "\n"); 429 449 430 # explicitly set to undef so all future use has to make the connection again 431 undef $self->{'_gs_sql'}; 450 # explicitly delete gs_sql key (setting key to undef has a different meaning from deleting) 451 # so all future use has to make the connection again 452 delete $self->{'gs_sql'}; 432 453 } 433 454 -
main/trunk/greenstone2/perllib/plugouts/BasePlugout.pm
r32555 r32563 347 347 } 348 348 349 # GreenstoneSQLPlugout needs to know whether we're doing removeold or not350 sub set_incremental_options {351 my $self= shift (@_);352 my ($removeold, $keepold, $incremental, $incremental_mode) = @_;353 354 $self->{'removeold'} = $removeold;355 $self->{'keepold'} = $keepold;356 $self->{'incremental'} = $incremental;357 $self->{'incremental_mode'} = $incremental_mode;358 }359 349 360 350 # OIDtype may be "hash" or "hash_on_full_filename" or "incremental" or "filename" or "dirname" or "full_filename" or "assigned" -
main/trunk/greenstone2/perllib/plugouts/GreenstoneSQLPlugout.pm
r32560 r32563 53 53 # It's fine: the die() stmts all take place before setting up the super class' begin 54 54 55 # TODO Q: about build_mode: how to detect removeold 55 # TODO Q: about build_mode: how to detect removeold. Now handled by 56 # GreenstoneSQLPlugout::remove_all(), which is inherited from a base plugin. 56 57 # TODO: deal with -removeold and everything? Or type out instructions for user 57 58 … … 117 118 return bless $self, $class; 118 119 } 119 print STDERR "***** GreenstoneSQLPlugout process mode = \"", $self->{'process_mode'}, "\"\n";120 #print STDERR "***** GreenstoneSQLPlugout process mode = \"", $self->{'process_mode'}, "\"\n"; 120 121 121 122 return bless $self, $class; … … 163 164 164 165 my $db_name = $self->{'site_name'} || "greenstone2"; # one database per GS3 site, for GS2 the db is called greenstone2 165 my $build_mode = ($self->{'removeold'}) ? "removeold" : "incremental"; 166 print STDERR "@@@@@@@@@@@@ remove_old: $build_mode\n"; 167 168 if(!$gs_sql->load_db_and_tables($db_name, $build_mode)) { 169 166 my $proc_mode = $self->{'process_mode'}; 167 168 169 my $success = $gs_sql->use_db($db_name); 170 171 if($success && $proc_mode ne "text_only") { 172 ##print STDERR "@@@@ Ensuring meta table exists\n"; 173 $success = $gs_sql->ensure_meta_table_exists(); 174 } 175 if($success && $proc_mode ne "meta_only") { 176 ##print STDERR "@@@@ Ensuring fulltxt table exists\n"; 177 $success = $gs_sql->ensure_fulltxt_table_exists(); 178 } 179 #if(!$gs_sql->load_db_and_tables($db_name, $proc_mode)) { 180 if(!$success) { 170 181 # This is fatal for the plugout, let's terminate here after disconnecting again 171 182 # PrintError would already have displayed the warning message on load fail … … 177 188 # prepare the shared/common HANDLES to SQL insert statements that contain placeholders 178 189 # and which we will reuse repeatedly when actually executing the insert statements 179 my $proc_mode = $self->{'process_mode'};190 180 191 if($proc_mode eq "all" || $proc_mode eq "meta_only" ) { 181 192 $self->{'metadata_prepared_insert_statement_handle'} = $gs_sql->prepare_insert_metadata_row_stmthandle(); … … 188 199 $self->{'gs_sql'} = $gs_sql; 189 200 190 print STDERR "#### Meta stmt: " . $self->{'metadata_prepared_insert_statement_handle'}->{'Statement'} . "\n";191 print STDERR "#### Full stmt: " . $self->{'fulltxt_prepared_insert_statement_handle'}->{'Statement'} . "\n";201 ##print STDERR "#### Meta stmt: " . $self->{'metadata_prepared_insert_statement_handle'}->{'Statement'} . "\n"; 202 ##print STDERR "#### Full stmt: " . $self->{'fulltxt_prepared_insert_statement_handle'}->{'Statement'} . "\n"; 192 203 193 204 # if setting up to work with sql db failed, we'd have terminated and wouldn't come up to here:
Note:
See TracChangeset
for help on using the changeset viewer.