Changeset 32543


Ignore:
Timestamp:
2018-10-26T15:10:47+13:00 (5 years ago)
Author:
ak19
Message:

Tidying up and adjusting TODO statements

Location:
main/trunk/greenstone2/perllib
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/gssql.pm

    r32541 r32543  
    212212# GreenstoneSQLPlugout, this method will not attempt to create the requested db (nor its tables)
    213213# TODO: GS SQLPlugin is called before GS SQLPlugout and attempts to use_db() - called in plugin's
    214 # init() method. This will fail if the db does not exist. Ideally want our plugin only called
     214# init() method. This will fail if the db does not exist. Ideally want the gssqlplugin only called
    215215# during buildcol.pl
    216216sub use_db {
     
    282282
    283283# "IF EXISTS is used to prevent an error from occurring if the database does not exist. ... DROP DATABASE returns the number of tables that were removed. The DROP DATABASE statement removes from the given database directory those files and directories that MySQL itself may create during normal operation.Jun 20, 2012"
    284 #MySQL 8.0 Reference Manual :: 13.1.22 DROP DATABASE Syntax
     284# MySQL 8.0 Reference Manual :: 13.1.22 DROP DATABASE Syntax
    285285# https://dev.mysql.com/doc/en/drop-database.html
    286286sub delete_collection_tables {
     
    432432# see https://www.perlmonks.org/bare/?node=DBI%20Recipes
    433433#    The page further has a table_exists function that could work with proper comparison
    434 # TODO: Couldn't get the first solution at https://www.perlmonks.org/bare/?node_id=500050 to work though
     434# TODO Q: Couldn't get the first solution at https://www.perlmonks.org/bare/?node_id=500050 to work though
    435435sub table_exists {
    436436    my $self = shift (@_);
  • main/trunk/greenstone2/perllib/plugins/GreenstoneSQLPlugin.pm

    r32542 r32543  
    4040# TODO:
    4141# - Run TODOs here, in Plugout and in gssql.pm by Dr Bainbridge.
    42 # Ask about docsql naming convention adopted to identify OID. Better way?
     42# + Ask about docsql naming convention adopted to identify OID. Better way?
    4343# collection names -> table names: it seems hyphens not allowed. Changed to underscores.
    4444# + Startup parameters (except removeold/build_mode)
     45# - how do we detect we're to do removeold during plugout in import.pl phase
    4546# - incremental building: where do we need to add code to delete rows from our sql table after
    4647# incrementally importing a coll with fewer docs (for instance)? What about deleted/modified meta?
    47 # - Courier documents in lucene-sql collection: character (degree symbol) not preserved. Is this because we encode in utf8 when putting into db and reading back in?
     48# - "Courier" demo documents in lucene-sql collection: character (degree symbol) not preserved in title. Is this because we encode in utf8 when putting into db and reading back in?
    4849# - Have not yet tested writing out just meta or just fulltxt to sql db and reading just that
    4950# back in from the sql db while the remainder is to be read back in from the docsql .xml files.
     
    5152# (sections) which in this case had made it easy to reconstruct the doc_obj in memory in the correct order
    5253
     54# TODO: deal with incremental vs removeold. If docs removed from import folder, then import step
     55# won't delete it from archives but buildcol step will. Need to implement this with this database plugin or wherever the actual flow is
     56
     57# TODO: Add public instructions on using this plugin and its plugout: start with installing mysql binary, changing pwd, running the server (and the client against it for checking, basic cmds like create and drop). Then discuss db name, table names (per coll), db cols and col types, and how the plugout and plugin work.
     58# Discuss the plugin/plugout parameters.
     59
     60
    5361# GreenstoneSQLPlugin inherits from GreenstoneXMLPlugin so that it if meta or fulltext
    5462# is still written out to doc.xml (docsql .xml), that will be processed as usual,
     
    5664# is written out by GreenstoneSQLPlugout into the SQL db).
    5765
    58 # TODO:
    59 # no more docoid in docsql .xml filename, set OID as attribute of root element inside docsql.xml file instead
    60 # and parse it out
    61 
    62 # TODO: deal with incremental vs removeold. If docs removed from import folder, then import step
    63 # won't delete it from archives but buildcol step will. Need to implement this with this database plugin or wherever the actual flow is
    64 
    65 # TODO: Add public instructions on using this plugin and its plugout: start with installing mysql binary, changing pwd, running the server (and the client against it for checking, basic cmds like create and drop). Then discuss db name, table names (per coll), db cols and col types, and how the plugout and plugin work.
    66 # Discuss the plugin/plugout parameters.
    6766
    6867sub BEGIN {
     
    201200
    202201    my $oid = $self->{'doc_oid'}; # we stored current doc's OID during sub xml_start_tag()
    203     print $outhandle "==== OID of document (meta|text) to be read in from DB: $oid\n"
     202    print $outhandle "++++ OID of document (meta|text) to be read in from DB: $oid\n"
    204203    if $self->{'verbosity'} > 1;
    205 
    206204   
    207205    # For now, we have access to doc_obj (until just before super::close_document() terminates)
    208    
    209     $self->{'doc_obj'}->set_OID($oid); # complex method. Is this necessary, since we just want to write meta and txt for the docobj to index?
    210    
    211     # checking that complicated looking method set_OID() hasn't modified oid
    212     if($oid ne $self->{'doc_obj'}->get_OID()) {
    213     print STDERR "@@@@ WARNING: OID after setting on doc_obj = " . $self->{'doc_obj'}->get_OID() . " and is not the same as original OID $oid from docsqloid.xml filename\n";
    214     }
    215 
    216    
    217     # TODO: This function is called on a per doc.xml file basis
    218     # but we can process all docs of a collection in one go when dealing with the SQL tables for
    219     # the collection. How and where should we read in the collection tables then?
    220     # TODO: Perhaps MySQLPlugout could write out a token file (.gssql) into archives during import.pl
    221     # and if that file is detected, then MySQLPlugin::read() is passed in that file during
    222     # buildcol.pl. And that file will trigger reading the 2 tables for the collection???
     206
     207    # no need to call $self->{'doc_obj'}->set_OID($oid);
     208    # because either the OID is stored in the SQL db as meta 'Identifier' alongside other metadata
     209    # or it's stored in the doc.xml as metadata 'Identifier' alongside other metadata
     210    # Either way, Identifier meta will be read into the docobj automatically with other meta.
     211
    223212    my $proc_mode = $self->{'process_mode'};
    224213    if($proc_mode eq "all" || $proc_mode eq "meta_only") {
     
    226215
    227216    my $sth = $gs_sql->select_from_metatable_matching_docid($oid); 
    228     print $outhandle "### stmt: ".$sth->{'Statement'}."\n" if $self->{'verbosity'} > 1;
     217    print $outhandle "### SQL select stmt: ".$sth->{'Statement'}."\n"
     218        if $self->{'verbosity'} > 1;
    229219
    230220    print $outhandle "----------SQL DB contains meta-----------\n" if $self->{'verbosity'} > 1;
     
    281271
    282272
    283 # TODO: only want to work with sql db if buildcol.pl. Unfortunately, also runs on import.pl
    284 # call init() not begin() because there can be multiple plugin passes
     273# TODO: only want to work with sql db if buildcol.pl. Unfortunately, also runs on import.pl.
     274# During import, the GS SQL Plugin is called before the GS SQL Plugout with undesirable side
     275# effect that if the db doesn't exist, gssql::use_db() fails, as it won't create db.
     276
     277# Call init() not begin() because there can be multiple plugin passes
    285278# and init() should be called before all passes:
    286279# one for doc level and another for section level indexing
  • main/trunk/greenstone2/perllib/plugouts/GreenstoneSQLPlugout.pm

    r32542 r32543  
    4040
    4141# TODO: SIGTERM rollback and disconnect?
    42 # TODO Q: what about verbosity for debugging
     42# TODO Q: what about verbosity for debugging, instead of current situation of printing out upon debug set at the expense of writing to db
    4343# TODO Q: introduced site_name param to plugins and plugouts. Did I do it right? And should they have hiddengli = "yes"
    4444
    45 # this plugout does not output xml to a file, but outputs rows into a mysql table
     45# this plugout does not output the metadata and/or fulltxt xml to a file,
     46# but outputs rows into a mysql table for metadata and/or a table for fulltxt
    4647sub BEGIN {
    4748    @GreenstoneSQLPlugout::ISA = ('GreenstoneXMLPlugout');
     
    5657
    5758# TODO Q: what is "group" in GreenstoneXMLPlugout?
    58 # TODO Q: site_name only exists for GS3. What about GS2?
    59 
    6059
    6160my $process_mode_list =
     
    141140    #print STDERR "@@@@ proc_mode: " . $self->{'process_mode'} . "\n";
    142141   
    143     ########### TODO: these should be set from cmdline/GLI options to plugout #########
     142    ########### TODO: deal with build mode #########
    144143
    145144    $self->{'build_mode'} = "removeold";
     
    236235}
    237236 
    238 # TODO: check arc-inf.db for whether each entry is to be deleted/indexed/reindexed/been indexed
     237# TODO: check arc-inf.db for whether each entry is to be deleted/indexed/reindexed/been indexed?
     238# That's only for indexing, not for this step which only generates the content in archives dir
    239239sub saveas {
    240240    my $self = shift (@_);
     
    253253
    254254    # TODO: also set debugging in begin()? Then stmts creating db and tables also sent to debug out and not executed
    255 
    256     # TODO: remove unused old_unused_saveas from GreenstoneXMLPlugout
    257    
    258    
    259     # 2. overriding saving behaviour to do what the superclass does PLUS saving to sql db   
    260 
    261     #NOTE: if proc_mode == all, then "breadcrumbs" go into both meta and txt elements of doc.xml:
    262     # statements pointing viewer to the sql db for contents
     255   
     256   
     257    # 2. overriding saving behaviour to do what the superclass does (writing out doc.xml files,
     258    # under new name of docsql.xml, with breadcrumbs pointing to sql db) PLUS saving to sql db   
     259
     260    # NOTE: if proc_mode == all, then "breadcrumbs" (statements pointing viewer to the sql db
     261    # for contents) go into both meta and txt elements of doc.xml (docsql.xml specifically):
    263262   
    264     # write the INVERSE into doc.xml as to what is written to the db   
     263    # write the INVERSE into doc.xml as to what is written to the SQL db   
    265264    my $docxml_output_options = { 'output' => docprint::OUTPUT_NONE };
    266265    if($proc_mode eq "meta_only" ) { # since only meta to go into MySQL db, text will go into docxml
     
    272271    # now we've prepared to write out whatever is meant to go into docxml
    273272    # and can do actual the steps superclass GreenstoneXMLPlugout carries out to write out docxml
    274     # So: write out the doc xml file for the current document
     273    # So: write out the doc xml file, "docsql.xml", for the current document
    275274    my $section_text = &docprint::get_section_xml($doc_obj, $docxml_output_options);
    276275    print $docxml_outhandler $section_text;   
  • main/trunk/greenstone2/perllib/plugouts/GreenstoneXMLPlugout.pm

    r32542 r32543  
    7676}
    7777
    78 sub old_unused_saveas {
    79     my $self = shift (@_);
    80     my ($doc_obj, $doc_dir) = @_;
    81     my $outhandler;
    82     my $output_file;
    83     if ($self->{'debug'}) {
    84     $outhandler = STDOUT;
    85     }
    86     else {
    87        
    88     $self->process_assoc_files($doc_obj, $doc_dir, '');
    89     $self->process_metafiles_metadata ($doc_obj);
    90    
    91     # open up the outhandler   
    92     if ($self->is_group() && !$self->{'new_doc_dir'}) {
    93         # we already have a handle open ??
    94         $outhandler = $self->{'group_outhandler'};
    95     } else {
    96         $output_file = &FileUtils::filenameConcatenate(
    97         $self->{'output_dir'}, $doc_dir, $self->get_doc_xml_filename($doc_obj));
    98         # open the new handle
    99         $self->open_xslt_pipe($output_file, $self->{'xslt_file'});
    100 
    101         if (defined $self->{'xslt_writer'}){
    102         $outhandler = $self->{'xslt_writer'};
    103         }
    104         else{
    105         $outhandler = $self->get_output_handler($output_file);
    106         }
    107        
    108         if ($self->is_group()) {
    109         $self->{'group_outhandler'} = $outhandler;
    110         }
    111     }
    112     } # else not debug
    113     binmode($outhandler,":utf8");
    114 
    115     # only output the header if we have started a new doc
    116     if (!$self->is_group() || $self->{'new_doc_dir'}) {
    117     $self->output_xml_header($outhandler);
    118     }
    119 
    120     my $section_text = &docprint::get_section_xml($doc_obj);
    121     print $outhandler $section_text;
    122  
    123     # only output the footer if we are not doing group stuff. The group file will be finished in close_group_output
    124     if (!$self->is_group()) {
    125     $self->output_xml_footer($outhandler);
    126     }
    127 
    128     # close off the output - in a group process situation, this will be done by close_group_output
    129     if (!$self->is_group() && !$self->{'debug'}) {
    130     if (defined $self->{'xslt_writer'}){     
    131         $self->close_xslt_pipe();
    132     }
    133     else {
    134         &FileUtils::closeFileHandle($output_file, \$outhandler) if defined $output_file;
    135     }
    136     }
    137     $self->{'short_doc_file'} = &FileUtils::filenameConcatenate(
    138     $doc_dir, $self->get_doc_xml_filename($doc_obj)); 
    139    
    140     $self->store_output_info_reference($doc_obj);
    141    
    142 }
    143 
    14478# can be overridden in subclasses, for instance by GreenstoneSQLPlugout, to produce a different filename
    14579# like docsql.xml
Note: See TracChangeset for help on using the changeset viewer.