Changeset 32543 for main/trunk
- Timestamp:
- 2018-10-26T15:10:47+13:00 (5 years ago)
- Location:
- main/trunk/greenstone2/perllib
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/gssql.pm
r32541 r32543 212 212 # GreenstoneSQLPlugout, this method will not attempt to create the requested db (nor its tables) 213 213 # TODO: GS SQLPlugin is called before GS SQLPlugout and attempts to use_db() - called in plugin's 214 # init() method. This will fail if the db does not exist. Ideally want ourplugin only called214 # init() method. This will fail if the db does not exist. Ideally want the gssqlplugin only called 215 215 # during buildcol.pl 216 216 sub use_db { … … 282 282 283 283 # "IF EXISTS is used to prevent an error from occurring if the database does not exist. ... DROP DATABASE returns the number of tables that were removed. The DROP DATABASE statement removes from the given database directory those files and directories that MySQL itself may create during normal operation.Jun 20, 2012" 284 # MySQL 8.0 Reference Manual :: 13.1.22 DROP DATABASE Syntax284 # MySQL 8.0 Reference Manual :: 13.1.22 DROP DATABASE Syntax 285 285 # https://dev.mysql.com/doc/en/drop-database.html 286 286 sub delete_collection_tables { … … 432 432 # see https://www.perlmonks.org/bare/?node=DBI%20Recipes 433 433 # The page further has a table_exists function that could work with proper comparison 434 # TODO : Couldn't get the first solution at https://www.perlmonks.org/bare/?node_id=500050 to work though434 # TODO Q: Couldn't get the first solution at https://www.perlmonks.org/bare/?node_id=500050 to work though 435 435 sub table_exists { 436 436 my $self = shift (@_); -
main/trunk/greenstone2/perllib/plugins/GreenstoneSQLPlugin.pm
r32542 r32543 40 40 # TODO: 41 41 # - Run TODOs here, in Plugout and in gssql.pm by Dr Bainbridge. 42 # Ask about docsql naming convention adopted to identify OID. Better way?42 # + Ask about docsql naming convention adopted to identify OID. Better way? 43 43 # collection names -> table names: it seems hyphens not allowed. Changed to underscores. 44 44 # + Startup parameters (except removeold/build_mode) 45 # - how do we detect we're to do removeold during plugout in import.pl phase 45 46 # - incremental building: where do we need to add code to delete rows from our sql table after 46 47 # incrementally importing a coll with fewer docs (for instance)? What about deleted/modified meta? 47 # - Courier documents in lucene-sql collection: character (degree symbol) not preserved. Is this because we encode in utf8 when putting into db and reading back in?48 # - "Courier" demo documents in lucene-sql collection: character (degree symbol) not preserved in title. Is this because we encode in utf8 when putting into db and reading back in? 48 49 # - Have not yet tested writing out just meta or just fulltxt to sql db and reading just that 49 50 # back in from the sql db while the remainder is to be read back in from the docsql .xml files. … … 51 52 # (sections) which in this case had made it easy to reconstruct the doc_obj in memory in the correct order 52 53 54 # TODO: deal with incremental vs removeold. If docs removed from import folder, then import step 55 # won't delete it from archives but buildcol step will. Need to implement this with this database plugin or wherever the actual flow is 56 57 # TODO: Add public instructions on using this plugin and its plugout: start with installing mysql binary, changing pwd, running the server (and the client against it for checking, basic cmds like create and drop). Then discuss db name, table names (per coll), db cols and col types, and how the plugout and plugin work. 58 # Discuss the plugin/plugout parameters. 59 60 53 61 # GreenstoneSQLPlugin inherits from GreenstoneXMLPlugin so that it if meta or fulltext 54 62 # is still written out to doc.xml (docsql .xml), that will be processed as usual, … … 56 64 # is written out by GreenstoneSQLPlugout into the SQL db). 57 65 58 # TODO:59 # no more docoid in docsql .xml filename, set OID as attribute of root element inside docsql.xml file instead60 # and parse it out61 62 # TODO: deal with incremental vs removeold. If docs removed from import folder, then import step63 # won't delete it from archives but buildcol step will. Need to implement this with this database plugin or wherever the actual flow is64 65 # TODO: Add public instructions on using this plugin and its plugout: start with installing mysql binary, changing pwd, running the server (and the client against it for checking, basic cmds like create and drop). Then discuss db name, table names (per coll), db cols and col types, and how the plugout and plugin work.66 # Discuss the plugin/plugout parameters.67 66 68 67 sub BEGIN { … … 201 200 202 201 my $oid = $self->{'doc_oid'}; # we stored current doc's OID during sub xml_start_tag() 203 print $outhandle " ====OID of document (meta|text) to be read in from DB: $oid\n"202 print $outhandle "++++ OID of document (meta|text) to be read in from DB: $oid\n" 204 203 if $self->{'verbosity'} > 1; 205 206 204 207 205 # For now, we have access to doc_obj (until just before super::close_document() terminates) 208 209 $self->{'doc_obj'}->set_OID($oid); # complex method. Is this necessary, since we just want to write meta and txt for the docobj to index? 210 211 # checking that complicated looking method set_OID() hasn't modified oid 212 if($oid ne $self->{'doc_obj'}->get_OID()) { 213 print STDERR "@@@@ WARNING: OID after setting on doc_obj = " . $self->{'doc_obj'}->get_OID() . " and is not the same as original OID $oid from docsqloid.xml filename\n"; 214 } 215 216 217 # TODO: This function is called on a per doc.xml file basis 218 # but we can process all docs of a collection in one go when dealing with the SQL tables for 219 # the collection. How and where should we read in the collection tables then? 220 # TODO: Perhaps MySQLPlugout could write out a token file (.gssql) into archives during import.pl 221 # and if that file is detected, then MySQLPlugin::read() is passed in that file during 222 # buildcol.pl. And that file will trigger reading the 2 tables for the collection??? 206 207 # no need to call $self->{'doc_obj'}->set_OID($oid); 208 # because either the OID is stored in the SQL db as meta 'Identifier' alongside other metadata 209 # or it's stored in the doc.xml as metadata 'Identifier' alongside other metadata 210 # Either way, Identifier meta will be read into the docobj automatically with other meta. 211 223 212 my $proc_mode = $self->{'process_mode'}; 224 213 if($proc_mode eq "all" || $proc_mode eq "meta_only") { … … 226 215 227 216 my $sth = $gs_sql->select_from_metatable_matching_docid($oid); 228 print $outhandle "### stmt: ".$sth->{'Statement'}."\n" if $self->{'verbosity'} > 1; 217 print $outhandle "### SQL select stmt: ".$sth->{'Statement'}."\n" 218 if $self->{'verbosity'} > 1; 229 219 230 220 print $outhandle "----------SQL DB contains meta-----------\n" if $self->{'verbosity'} > 1; … … 281 271 282 272 283 # TODO: only want to work with sql db if buildcol.pl. Unfortunately, also runs on import.pl 284 # call init() not begin() because there can be multiple plugin passes 273 # TODO: only want to work with sql db if buildcol.pl. Unfortunately, also runs on import.pl. 274 # During import, the GS SQL Plugin is called before the GS SQL Plugout with undesirable side 275 # effect that if the db doesn't exist, gssql::use_db() fails, as it won't create db. 276 277 # Call init() not begin() because there can be multiple plugin passes 285 278 # and init() should be called before all passes: 286 279 # one for doc level and another for section level indexing -
main/trunk/greenstone2/perllib/plugouts/GreenstoneSQLPlugout.pm
r32542 r32543 40 40 41 41 # TODO: SIGTERM rollback and disconnect? 42 # TODO Q: what about verbosity for debugging 42 # TODO Q: what about verbosity for debugging, instead of current situation of printing out upon debug set at the expense of writing to db 43 43 # TODO Q: introduced site_name param to plugins and plugouts. Did I do it right? And should they have hiddengli = "yes" 44 44 45 # this plugout does not output xml to a file, but outputs rows into a mysql table 45 # this plugout does not output the metadata and/or fulltxt xml to a file, 46 # but outputs rows into a mysql table for metadata and/or a table for fulltxt 46 47 sub BEGIN { 47 48 @GreenstoneSQLPlugout::ISA = ('GreenstoneXMLPlugout'); … … 56 57 57 58 # TODO Q: what is "group" in GreenstoneXMLPlugout? 58 # TODO Q: site_name only exists for GS3. What about GS2?59 60 59 61 60 my $process_mode_list = … … 141 140 #print STDERR "@@@@ proc_mode: " . $self->{'process_mode'} . "\n"; 142 141 143 ########### TODO: these should be set from cmdline/GLI options to plugout#########142 ########### TODO: deal with build mode ######### 144 143 145 144 $self->{'build_mode'} = "removeold"; … … 236 235 } 237 236 238 # TODO: check arc-inf.db for whether each entry is to be deleted/indexed/reindexed/been indexed 237 # TODO: check arc-inf.db for whether each entry is to be deleted/indexed/reindexed/been indexed? 238 # That's only for indexing, not for this step which only generates the content in archives dir 239 239 sub saveas { 240 240 my $self = shift (@_); … … 253 253 254 254 # TODO: also set debugging in begin()? Then stmts creating db and tables also sent to debug out and not executed 255 256 # TODO: remove unused old_unused_saveas from GreenstoneXMLPlugout 257 258 259 # 2. overriding saving behaviour to do what the superclass does PLUS saving to sql db 260 261 #NOTE: if proc_mode == all, then "breadcrumbs" go into both meta and txt elements of doc.xml: 262 # statements pointing viewer to the sql db for contents 255 256 257 # 2. overriding saving behaviour to do what the superclass does (writing out doc.xml files, 258 # under new name of docsql.xml, with breadcrumbs pointing to sql db) PLUS saving to sql db 259 260 # NOTE: if proc_mode == all, then "breadcrumbs" (statements pointing viewer to the sql db 261 # for contents) go into both meta and txt elements of doc.xml (docsql.xml specifically): 263 262 264 # write the INVERSE into doc.xml as to what is written to the db263 # write the INVERSE into doc.xml as to what is written to the SQL db 265 264 my $docxml_output_options = { 'output' => docprint::OUTPUT_NONE }; 266 265 if($proc_mode eq "meta_only" ) { # since only meta to go into MySQL db, text will go into docxml … … 272 271 # now we've prepared to write out whatever is meant to go into docxml 273 272 # and can do actual the steps superclass GreenstoneXMLPlugout carries out to write out docxml 274 # So: write out the doc xml file for the current document273 # So: write out the doc xml file, "docsql.xml", for the current document 275 274 my $section_text = &docprint::get_section_xml($doc_obj, $docxml_output_options); 276 275 print $docxml_outhandler $section_text; -
main/trunk/greenstone2/perllib/plugouts/GreenstoneXMLPlugout.pm
r32542 r32543 76 76 } 77 77 78 sub old_unused_saveas {79 my $self = shift (@_);80 my ($doc_obj, $doc_dir) = @_;81 my $outhandler;82 my $output_file;83 if ($self->{'debug'}) {84 $outhandler = STDOUT;85 }86 else {87 88 $self->process_assoc_files($doc_obj, $doc_dir, '');89 $self->process_metafiles_metadata ($doc_obj);90 91 # open up the outhandler92 if ($self->is_group() && !$self->{'new_doc_dir'}) {93 # we already have a handle open ??94 $outhandler = $self->{'group_outhandler'};95 } else {96 $output_file = &FileUtils::filenameConcatenate(97 $self->{'output_dir'}, $doc_dir, $self->get_doc_xml_filename($doc_obj));98 # open the new handle99 $self->open_xslt_pipe($output_file, $self->{'xslt_file'});100 101 if (defined $self->{'xslt_writer'}){102 $outhandler = $self->{'xslt_writer'};103 }104 else{105 $outhandler = $self->get_output_handler($output_file);106 }107 108 if ($self->is_group()) {109 $self->{'group_outhandler'} = $outhandler;110 }111 }112 } # else not debug113 binmode($outhandler,":utf8");114 115 # only output the header if we have started a new doc116 if (!$self->is_group() || $self->{'new_doc_dir'}) {117 $self->output_xml_header($outhandler);118 }119 120 my $section_text = &docprint::get_section_xml($doc_obj);121 print $outhandler $section_text;122 123 # only output the footer if we are not doing group stuff. The group file will be finished in close_group_output124 if (!$self->is_group()) {125 $self->output_xml_footer($outhandler);126 }127 128 # close off the output - in a group process situation, this will be done by close_group_output129 if (!$self->is_group() && !$self->{'debug'}) {130 if (defined $self->{'xslt_writer'}){131 $self->close_xslt_pipe();132 }133 else {134 &FileUtils::closeFileHandle($output_file, \$outhandler) if defined $output_file;135 }136 }137 $self->{'short_doc_file'} = &FileUtils::filenameConcatenate(138 $doc_dir, $self->get_doc_xml_filename($doc_obj));139 140 $self->store_output_info_reference($doc_obj);141 142 }143 144 78 # can be overridden in subclasses, for instance by GreenstoneSQLPlugout, to produce a different filename 145 79 # like docsql.xml
Note:
See TracChangeset
for help on using the changeset viewer.