- Timestamp:
- 2018-10-25T20:12:42+13:00 (5 years ago)
- Location:
- main/trunk/greenstone2
- Files:
-
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/bin/script/import.pl
r32528 r32541 89 89 'desc' => "{export.saveas.GreenstoneMETS}"}, 90 90 { 'name' => "GreenstoneSQL", 91 'desc' => "{export.saveas.GreenstoneSQL}"}, 92 { 'name' => "GreenstoneSQL_metadata_only", 93 'desc' => "{export.saveas.GreenstoneSQL.meta_only}"}, 94 { 'name' => "GreenstoneSQL_fulltext_only", 95 'desc' => "{export.saveas.GreenstoneSQL.text_only}"}, 91 'desc' => "{export.saveas.GreenstoneSQL}"} 96 92 ]; 97 93 -
main/trunk/greenstone2/perllib/gssql.pm
r32538 r32541 106 106 # TODO: Consider AutoCommit status (and Autocommit off allowing commit or rollback for GS coll build cancel) later 107 107 108 # TODO: where should the defaults for these params be, here or in GS-SQLPlugin/Plugout? 108 109 sub connect_to_db { 109 110 my $self= shift (@_); … … 210 211 # This will terminate if the db does not exist. Unlike load_db_and_tables() above, used by 211 212 # GreenstoneSQLPlugout, this method will not attempt to create the requested db (nor its tables) 213 # TODO: GS SQLPlugin is called before GS SQLPlugout and attempts to use_db() - called in plugin's 214 # init() method. This will fail if the db does not exist. Ideally want our plugin only called 215 # during buildcol.pl 212 216 sub use_db { 213 217 my $self= shift (@_); … … 294 298 295 299 # Don't call this: it will delete the meta and full text tables for ALL collections in $db_name (localsite by default)! 296 # this is just for debugging300 # This method is just here for debugging (for testing creating a database when there is none) 297 301 sub _delete_database { 298 302 my $self= shift (@_); … … 428 432 # see https://www.perlmonks.org/bare/?node=DBI%20Recipes 429 433 # The page further has a table_exists function that could work with proper comparison 430 # Couldn't get the first solution at https://www.perlmonks.org/bare/?node_id=500050 to work though434 # TODO: Couldn't get the first solution at https://www.perlmonks.org/bare/?node_id=500050 to work though 431 435 sub table_exists { 432 436 my $self = shift (@_); -
main/trunk/greenstone2/perllib/plugins/GreenstoneSQLPlugin.pm
r32538 r32541 39 39 40 40 # TODO: 41 # - Run TODOs here and in plugout by Dr Bainbridge. Ask about docsql naming convention adopted42 # to identify OID. Better way?43 # collection names -> table names: hyphens not allowed?Changed to underscores.41 # - Run TODOs here, in Plugout and in gssql.pm by Dr Bainbridge. 42 # Ask about docsql naming convention adopted to identify OID. Better way? 43 # collection names -> table names: it seems hyphens not allowed. Changed to underscores. 44 44 # - Startup parameters 45 45 # - incremental building: where do we need to add code to delete rows from our sql table after … … 48 48 # - Have not yet tested writing out just meta or just fulltxt to sql db and reading just that 49 49 # back in from the sql db while the remainder is to be read back in from the docsql .xml files. 50 50 # - Ask if I can assume that all SQL dbs (not just MySQL) will preserve the order of inserted nodes 51 # (sections) which in this case had made it easy to reconstruct the doc_obj in memory in the correct order 51 52 52 53 # GreenstoneSQLPlugin inherits from GreenstoneXMLPlugin so that it if meta or fulltext … … 55 56 # is written out by GreenstoneSQLPlugout into the SQL db). 56 57 58 # TODO: 59 # no more docoid in docsql .xml filename, set OID as attribute of root element inside docsql.xml file instead 60 # and parse it out 61 62 # TODO: deal with incremental vs removeold. If docs removed from import folder, then import step 63 # won't delete it from archives but buildcol step will. Need to implement this with this database plugin or wherever the actual flow is 57 64 58 65 sub BEGIN { … … 83 90 'deft' => &get_default_process_exp(), 84 91 'reqd' => "no" }, 85 { 'name' => "process_mode", 86 'desc' => "{GreenstoneSQLPlug.process_mode}", 87 'type' => "enum", 88 'list' => $process_mode_list, 89 'deft' => "all", 90 'reqd' => "no"} 92 { 'name' => "process_mode", 93 'desc' => "{GreenstoneSQLPlug.process_mode}", 94 'type' => "enum", 95 'list' => $process_mode_list, 96 'deft' => "all", 97 'reqd' => "no"}, 98 { 'name' => "db_driver", 99 'desc' => "{GreenstoneSQLPlug.db_driver}", 100 'type' => "string", 101 'deft' => "mysql", 102 'reqd' => "yes"}, 103 { 'name' => "db_client_user", 104 'desc' => "{GreenstoneSQLPlug.db_client_user}", 105 'type' => "string", 106 'deft' => "root", 107 'reqd' => "yes"}, 108 { 'name' => "db_client_pwd", 109 'desc' => "{GreenstoneSQLPlug.db_client_pwd}", 110 'type' => "string", 111 'deft' => "", 112 'reqd' => "yes"}, # pwd required? 113 { 'name' => "db_host", 114 'desc' => "{GreenstoneSQLPlug.db_host}", 115 'type' => "string", 116 'deft' => "127.0.0.1", 117 'reqd' => "yes"}, 118 { 'name' => "db_encoding", 119 'desc' => "{GreenstoneSQLPlug.db_encoding}", 120 'type' => "string", 121 'deft' => "utf8", 122 'reqd' => "yes"} 91 123 ]; 92 124 … … 200 232 #$sid =~ s@^root@@; 201 233 $sid = $doc_obj->get_top_section() if ($sid eq "root"); 202 print $outhandle "### did: $did, sid: |$sid|, fulltext: <TXT NOT PRINTED>\n"234 print $outhandle "### did: $did, sid: |$sid|, fulltext: <TXT>\n" 203 235 if $self->{'verbosity'} > 1; 204 236 … … 229 261 $self->SUPER::init(@_); # super (GreenstoneXMLPlugin) will not yet be trying to read from doc.xml (docsql .xml) files in init(). 230 262 231 # TODO: how do we know what site we're dealing with unless this is passed in, by buildcol? 232 ########### 263 #################### 233 264 # print "@@@ SITE NAME: ". $self->{'site_name'} . "\n" if defined $self->{'site_name'}; 234 265 # print "@@@ COLL NAME: ". $ENV{'GSDLCOLLECTION'} . "\n"; 235 236 $self->{'db_driver'} = "mysql"; 237 $self->{'site_name'} = "localsite"; 238 $self->{'db_client_user'} = "root"; 239 $self->{'db_client_pwd'} = "6reenstone3"; 240 $self->{'build_mode'} = "removeold"; 241 $self->{'db_host'} = "127.0.0.1"; 242 $self->{'db_encoding'} = "utf8"; 243 ########### 266 267 # print STDERR "@@@@ db_pwd: " . $self->{'db_client_pwd'} . "\n"; 268 # print STDERR "@@@@ user: " . $self->{'db_client_user'} . "\n"; 269 # print STDERR "@@@@ db_host: " . $self->{'db_host'} . "\n"; 270 # print STDERR "@@@@ db_enc: " . $self->{'db_encoding'} . "\n"; 271 # print STDERR "@@@@ db_driver: " . $self->{'db_driver'} . "\n"; 272 #################### 244 273 245 274 my $gs_sql = new gssql({ 246 275 'collection_name' => $ENV{'GSDLCOLLECTION'}, 247 276 'db_encoding' => $self->{'db_encoding'} 248 #'db_name' => $self->{'site_name'},249 #'build_mode' => $self->{'build_mode'},250 277 } 251 278 ); … … 265 292 } 266 293 267 my $db_name = $self->{'site_name'} || " localsite"; # one database per GS3 site294 my $db_name = $self->{'site_name'} || "greenstone2"; # one database per GS3 site, for GS2 the db is called greenstone2 268 295 #my $build_mode = $self->{'build_mode'} || "removeold"; 269 296 … … 274 301 # PrintError would already have displayed the warning message on load fail 275 302 $gs_sql->disconnect_from_db() 276 || warn("Unable to disconnect from database " . $self->{'site_name'} . "\n");303 || warn("Unable to disconnect from database.\n"); 277 304 die("Could not use db $db_name. Can't proceed.\n"); 278 305 } -
main/trunk/greenstone2/perllib/plugouts/GreenstoneSQLPlugout.pm
r32537 r32541 41 41 # TODO: SIGTERM rollback and disconnect? 42 42 # TODO Q: what about verbosity for debugging 43 # TODO Q: introduced site_name param to plugins and plugouts. Did I do it right? And should they have hiddengli = "yes" 43 44 44 45 # this plugout does not output xml to a file, but outputs rows into a mysql table … … 64 65 'desc' => "{GreenstoneSQLPlug.process_mode.all}" } ]; 65 66 67 # The following are the saveas.options: 66 68 my $arguments = [ 67 { 'name' => "process_mode", 68 'desc' => "{GreenstoneSQLPlug.process_mode}", 69 'type' => "enum", 70 'list' => $process_mode_list, 71 'deft' => "all", 72 'reqd' => "no", 73 'hiddengli' => "no"} ]; 69 { 'name' => "process_mode", 70 'desc' => "{GreenstoneSQLPlug.process_mode}", 71 'type' => "enum", 72 'list' => $process_mode_list, 73 'deft' => "all", 74 'reqd' => "no", 75 'hiddengli' => "no"}, 76 { 'name' => "db_driver", 77 'desc' => "{GreenstoneSQLPlug.db_driver}", 78 'type' => "string", 79 'deft' => "mysql", 80 'reqd' => "yes"}, 81 { 'name' => "db_client_user", 82 'desc' => "{GreenstoneSQLPlug.db_client_user}", 83 'type' => "string", 84 'deft' => "root", 85 'reqd' => "yes"}, 86 { 'name' => "db_client_pwd", 87 'desc' => "{GreenstoneSQLPlug.db_client_pwd}", 88 'type' => "string", 89 'deft' => "", 90 'reqd' => "yes"}, # pwd required? 91 { 'name' => "db_host", 92 'desc' => "{GreenstoneSQLPlug.db_host}", 93 'type' => "string", 94 'deft' => "127.0.0.1", 95 'reqd' => "yes"}, 96 { 'name' => "db_encoding", 97 'desc' => "{GreenstoneSQLPlug.db_encoding}", 98 'type' => "string", 99 'deft' => "utf8", 100 'reqd' => "yes"} 101 ]; 74 102 75 103 my $options = { 'name' => "GreenstoneSQLPlugout", … … 103 131 my $self= shift (@_); 104 132 133 # The saveas.options 134 #print STDERR "@@@@ PLUGOUT db_pwd: " . $self->{'db_client_pwd'} . "\n"; 135 #print STDERR "@@@@ user: " . $self->{'db_client_user'} . "\n"; 136 #print STDERR "@@@@ db_host: " . $self->{'db_host'} . "\n"; 137 #print STDERR "@@@@ db_enc: " . $self->{'db_encoding'} . "\n"; 138 #print STDERR "@@@@ db_driver: " . $self->{'db_driver'} . "\n"; 139 #print STDERR "@@@@ proc_mode: " . $self->{'process_mode'} . "\n"; 140 105 141 ########### TODO: these should be set from cmdline/GLI options to plugout ######### 106 $self->{'db_driver'} = "mysql"; 107 $self->{'site_name'} = "localsite"; 108 $self->{'db_client_user'} = "root"; 109 $self->{'db_client_pwd'} = "6reenstone3"; 142 110 143 $self->{'build_mode'} = "removeold"; 111 #$self->{'db_host'} = "127.0.0.1"; 112 #$self->{'db_encoding'} = "utf8"; 113 #TODO: proc_mode is also a saveas option 114 144 115 145 ############ LOAD NECESSARY OPTIONS ########### 116 print STDERR "########## COLLECTION: ". $ENV{'GSDLCOLLECTION'}."\n"; 117 #$self->{'collection_name'} = $ENV{'GSDLCOLLECTION'}; 146 #print "@@@ plugout SITE NAME: ". $self->{'site_name'} . "\n" if defined $self->{'site_name'}; 147 #print STDERR "########## COLLECTION: ". $ENV{'GSDLCOLLECTION'}."\n"; 148 118 149 print STDERR "***** GreenstoneSQLPlugout process mode = \"", $self->{'process_mode'}, "\"\n"; 119 150 … … 121 152 'collection_name' => $ENV{'GSDLCOLLECTION'}, 122 153 'db_encoding' => $self->{'db_encoding'} 123 #'db_name' => $self->{'site_name'},124 #'build_mode' => $self->{'build_mode'},125 154 }; 126 155 … … 142 171 } 143 172 144 my $db_name = $self->{'site_name'} || " localsite"; # one database per GS3 site173 my $db_name = $self->{'site_name'} || "greenstone2"; # one database per GS3 site, for GS2 the db is called greenstone2 145 174 my $build_mode = $self->{'build_mode'} || "removeold"; 146 175 if(!$gs_sql->load_db_and_tables($db_name, $build_mode)) { … … 149 178 # PrintError would already have displayed the warning message on load fail 150 179 $gs_sql->disconnect_from_db() 151 || warn("Unable to disconnect from database " . $self->{'site_name'} . "\n");180 || warn("Unable to disconnect from database.\n"); 152 181 die("Could not use db $db_name and/or prepare its tables. Can't proceed.\n"); 153 182 } -
main/trunk/greenstone2/perllib/strings.properties
r32540 r32541 417 417 418 418 export.saveas.GreenstoneXML:Greenstone XML Archive format 419 export.saveas.GreenstoneSQL:Save metadata and text in MySQL database 420 export.saveas.GreenstoneSQL.meta_only:Store metadata in a MySQL database (and full text in Greenstone XML Archive format) 421 export.saveas.GreenstoneSQL.text_only:Store full text in a MySQL database (and metadata in Greenstone XML Archive format) 419 export.saveas.GreenstoneSQL:Store the metadata and/or full text in a MySQL database, and the converse in the Greenstone XML Archive format. 422 420 423 421 export.saveas.MARCXML:MARC XML format (an XML version of MARC 21) … … 1448 1446 METSPlugout.xslt_mets:Transform a mets's docmets.xml with the XSLT in the named file. 1449 1447 1450 GreenstoneSQLPlugout.desc:Output metadata and/or full text to a MySQL database (named after GS3 site name for GS3 or named greenstone2 for GS2) instead of doc.xml. For Greenstone 3, the database name is the GS3 site name. For Greenstone 2, the database name is greenstone2. 1448 GreenstoneSQLPlugout.desc:Output metadata and/or full text to a MySQL database (named after GS3 site name for GS3 or named greenstone2 for GS2) instead of doc.xml. For Greenstone 3, the database name is the GS3 site name. For Greenstone 2, the database name is greenstone2. The basic saveas.options for this Plugout are the same as the basic options for the matching GreenstoneSQLPlugin. 1451 1449 1452 1450 # 1453 1451 # GreenstoneSQLPlug strings are shared by both GreenstoneSQLPlugout and GreenstoneSQLPlugin 1454 1452 # 1455 GreenstoneSQLPlug.process_mode:Setting determines whether full text and/or metadata will be output to a MySQL database instead of to doc.xml during import. 1453 GreenstoneSQLPlug.process_mode:Setting determines whether full text and/or metadata will be output to a MySQL database instead of to doc.xml during import. Choose one of meta_only, text_only, or all (default). 1456 1454 GreenstoneSQLPlug.process_mode.all:Import stage outputs the full text and metadata to a MySQL database instead of to doc.xml. 1457 1455 GreenstoneSQLPlug.process_mode.meta_only:Import stage outputs the metadata to a MySQL database and any text to doc.xml.
Note:
See TracChangeset
for help on using the changeset viewer.