Changeset 32557 for main/trunk


Ignore:
Timestamp:
2018-10-31T18:25:03+13:00 (5 years ago)
Author:
ak19
Message:
  1. Bugfix to create_database method: db_name is now a parameter to the method and no longer a member variable of gssql object. 2. UTF8 encodings finally made to work with GreenstoneSQL connections. Doing: 'set NAMES utf8' was insufficient and further causes an error when building a collection with macrons as the utf8 encoding thus specified only supports utf8 chars that take up to 3 bytes to encode. For utf8 chars that can take up to 4 bytes, need to do two things. First, run the mysqld server with minus-minus character_set_server=utf8mb4. Second, EITHER set mysql_enable_utf8mb4 => 1 option when using perl DBI to connect to the db (which in one step tells MySQL to use UTF-8 for communication AND DBD::mysql to decode the data) OR after connection, do BOTH set NAMES 'utf8mb4' (instead of utf8) to tell MySQL to use UTF-8 for communication AND ->{mysql_enable_utf8mb4} = 1 to tell DBD::mysql to decode the data. See https://stackoverflow.com/questions/10957238/incorrect-string-value-when-trying-to-insert-utf-8-into-mysql-via-jdbc and https://stackoverflow.com/questions/46727362/perl-mysql-utf8mb4-issue-possible-bug
File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/gssql.pm

    r32555 r32557  
    110110    my $self= shift (@_);
    111111    my ($params_map) = @_;
    112     my $db_enc = $self->{'db_encoding'} || "utf8";
     112    #my $db_enc = $self->{'db_encoding'} || "utf8";
     113    my $db_enc = "utf8mb4";
    113114
    114115    # these are the params for connecting to MySQL
     
    130131                   PrintError => 1, # on by default, but being explicit
    131132                   RaiseError => 0, # off by default, but being explicit
    132                    AutoCommit => 1 # on by default, but being explicit
     133                   AutoCommit => 1, # on by default, but being explicit
     134                   mysql_enable_utf8mb4 => 1 # tells MySQL to use UTF-8 for communication and tells DBD::mysql to decode the data, see https://stackoverflow.com/questions/46727362/perl-mysql-utf8mb4-issue-possible-bug
    133135               });
    134136
    135137    if(!$dbh) {
    136     # NOTE, despite handle dbh being undefined, error code will be in DBI->err
     138    # NOTE, despite handle dbh being undefined, error code will be in DBI->err (note caps)
    137139    return 0;   
    138140    }
     
    141143    # https://dev.mysql.com/doc/refman/5.7/en/charset.html
    142144    # https://dev.mysql.com/doc/refman/5.7/en/charset-conversion.html
    143     # Setting the encoding at db server level.
    144     # Not sure if this command is mysql specific:
    145     my $stmt = "set NAMES '" . $db_enc . "'";
    146     $dbh->do($stmt) || warn("Unable to set charset encoding at db server level to: " . $db_enc . "\n");
     145    # Setting the encoding at db server level: $dbh->do("set NAMES '" . $db_enc . "'");
     146    # HOWEVER:
     147    # It turned out insufficient setting the encoding to utf8, as that only supports utf8 chars that
     148    # need up to 3 bytes. We may need up to 4 bytes per utf8 character, e.g. chars with macron,
     149    # and for that, we need the encoding to be set to utf8mb4.
     150    # To set up a MySQL db to use utf8mb4 requires configuration on the server side too.
     151    # https://stackoverflow.com/questions/10957238/incorrect-string-value-when-trying-to-insert-utf-8-into-mysql-via-jdbc
     152    # https://stackoverflow.com/questions/46727362/perl-mysql-utf8mb4-issue-possible-bug
     153    # To set up the db for utf8mb4, therefore,
     154    # the MySQL server needs to be configured for that char encoding by running the server as:
     155    # mysql-5.7.23-linux-glibc2.12-x86_64/bin>./mysqld_safe --datadir=/Scratch/ak19/mysql/data --character_set_server=utf8mb4
     156    # AND when connecting to the server, we can can either set mysql_enable_utf8mb4 => 1
     157    # as a connection option
     158    # OR we need to do both "set NAMES utf8mb4" AND "$dbh->{mysql_enable_utf8mb4} = 1;" after connecting
     159    #
     160    # Search results for DBI Set Names imply the "SET NAMES '<enc>'" command is mysql specific too,
     161    # so setting the mysql specific option during connection above as "mysql_enable_utf8mb4 => 1"
     162    # is no more objectionable. It has the advantage of cutting out the 2 extra lines of doing
     163    # set NAMES '<enc>' and $dbh->{mysql_enable_utf8mb4} = 1 here.
     164    # These lines may be preferred if more db_driver options are to be supported in future:
     165    # then a separate method called set_db_encoding($enc) can work out what db_driver we're using
     166    # and if mysql and enc=utfy, then it can do the following whereas it will issue other do stmts
     167    # for other db_drivers, see https://www.perlmonks.org/?node_id=259456:
     168   
     169    #my $stmt = "set NAMES '" . $db_enc . "'";
     170    #$dbh->do($stmt) || warn("Unable to set charset encoding at db server level to: " . $db_enc . "\n"); # tells MySQL to use UTF-8 for communication
     171    #$dbh->{mysql_enable_utf8mb4} = 1; # tells DBD::mysql to decode the data
    147172   
    148173    # if we're here, then connection succeeded, store handle
     
    248273sub create_db {
    249274    my $self= shift (@_);
    250     my $db_name = $self->{'db_name'};
     275    my ($db_name) = @_;
    251276    my $dbh = $self->{'db_handle'};
    252277   
     
    294319    # drop table <tablename>
    295320    my $table = $self->get_metadata_table_name();
    296     $dbh->do("drop table $table") || warn("@@@ Couldn't delete $table");
     321    if($self->table_exists($table)) {
     322    $dbh->do("drop table $table") || warn("@@@ Couldn't delete $table");
     323    }
    297324    $table = $self->get_fulltext_table_name();
    298     $dbh->do("drop table $table") || warn("@@@ Couldn't delete $table");
     325    if($self->table_exists($table)) {
     326    $dbh->do("drop table $table") || warn("@@@ Couldn't delete $table");
     327    }
    299328}
    300329
Note: See TracChangeset for help on using the changeset viewer.