Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added support for utf8mb4 (emoji support) #1

Merged
merged 1 commit into from
Feb 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions qa-config-example.php
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,15 @@

define('QA_MYSQL_TABLE_PREFIX', 'qa_');

/*
QA_USE_UTF8MB4 allows to use utf8mb4 instead of utf8 - this mainly allows compatibility with emojis.
Note that if you enable this setting after your database has been created, you need to change the
charset of all the tables in your database (using an export and reimport). See this discussion for
more info: https://www.question2answer.org/qa/62412/unicode-10-characters-filtered-out
*/

define('QA_USE_UTF8MB4', 'false');

/*
If you wish, you can define QA_MYSQL_USERS_PREFIX separately from QA_MYSQL_TABLE_PREFIX.
If so, tables containing information about user accounts (not including users' activity and points)
Expand Down
12 changes: 10 additions & 2 deletions qa-include/db/install.php
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ function qa_db_table_definitions()
require_once QA_INCLUDE_DIR . 'db/maxima.php';
require_once QA_INCLUDE_DIR . 'app/users.php';

if (defined('QA_USE_UTF8MB4') && QA_USE_UTF8MB4)
$collation = 'utf8mb4_bin';
else
$collation = 'utf_bin';

/*
Important note on character encoding in database and PHP connection to MySQL

Expand Down Expand Up @@ -107,7 +112,7 @@ function qa_db_table_definitions()
'avatarheight' => 'SMALLINT UNSIGNED', // pixel height of stored avatar
'passsalt' => 'BINARY(16)', // salt used to calculate passcheck - null if no password set for direct login
'passcheck' => 'BINARY(20)', // checksum from password and passsalt - null if no password set for direct login
'passhash' => 'VARCHAR(255) CHARACTER SET utf8 COLLATE utf8_bin DEFAULT NULL', // password_hash
'passhash' => 'VARCHAR(255) CHARACTER SET utf8 COLLATE '.$collation.' DEFAULT NULL', // password_hash
'level' => 'TINYINT UNSIGNED NOT NULL', // basic, editor, admin, etc...
'loggedin' => 'DATETIME NOT NULL', // time of last login
'loginip' => 'VARBINARY(16) NOT NULL', // INET6_ATON of IP address of last login
Expand Down Expand Up @@ -714,7 +719,10 @@ function qa_db_create_table_sql($rawname, $definition)
if (isset($coldef))
$querycols .= (strlen($querycols) ? ', ' : '') . (is_int($colname) ? $coldef : ($colname . ' ' . $coldef));

return 'CREATE TABLE ^' . $rawname . ' (' . $querycols . ') ENGINE=InnoDB CHARSET=utf8';
if (defined('QA_USE_UTF8MB4') && QA_USE_UTF8MB4)
return 'CREATE TABLE ^' . $rawname . ' (' . $querycols . ') ENGINE=InnoDB CHARSET=utf8mb4';
else
return 'CREATE TABLE ^' . $rawname . ' (' . $querycols . ') ENGINE=InnoDB CHARSET=utf8';
}


Expand Down
14 changes: 12 additions & 2 deletions qa-include/db/selects.php
Original file line number Diff line number Diff line change
Expand Up @@ -1216,8 +1216,13 @@ function qa_db_tag_recent_qs_selectspec($voteuserid, $tag, $start, $full = false

$selectspec = qa_db_posts_basic_selectspec($voteuserid, $full);

if (defined('QA_USE_UTF8MB4') && QA_USE_UTF8MB4)
$collation = 'utf8mb4_bin';
else
$collation = 'utf8_bin';

// use two tests here - one which can use the index, and the other which narrows it down exactly - then limit to 1 just in case
$selectspec['source'] .= " JOIN (SELECT postid FROM ^posttags WHERE wordid=(SELECT wordid FROM ^words WHERE word=$ AND word=$ COLLATE utf8_bin LIMIT 1) ORDER BY postcreated DESC LIMIT #,#) y ON ^posts.postid=y.postid";
$selectspec['source'] .= " JOIN (SELECT postid FROM ^posttags WHERE wordid=(SELECT wordid FROM ^words WHERE word=$ AND word=$ COLLATE $collation LIMIT 1) ORDER BY postcreated DESC LIMIT #,#) y ON ^posts.postid=y.postid";
array_push($selectspec['arguments'], $tag, qa_strtolower($tag), $start, $count);
$selectspec['sortdesc'] = 'created';

Expand All @@ -1232,9 +1237,14 @@ function qa_db_tag_recent_qs_selectspec($voteuserid, $tag, $start, $full = false
*/
function qa_db_tag_word_selectspec($tag)
{
if (defined('QA_USE_UTF8MB4') && QA_USE_UTF8MB4)
$collation = 'utf8mb4_bin';
else
$collation = 'utf8_bin';

return array(
'columns' => array('wordid', 'word', 'tagcount'),
'source' => '^words WHERE word=$ AND word=$ COLLATE utf8_bin',
'source' => '^words WHERE word=$ AND word=$ COLLATE ' . $collation,
'arguments' => array($tag, qa_strtolower($tag)),
'single' => true,
);
Expand Down
7 changes: 6 additions & 1 deletion qa-include/qa-db.php
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,12 @@ function qa_db_connect($failhandler = null)
// From Q2A 1.5, we explicitly set the character encoding of the MySQL connection, instead of using lots of "SELECT BINARY col"-style queries.
// Testing showed that overhead is minimal, so this seems worth trading off against the benefit of more straightforward queries, especially
// for plugin developers.
if (!$db->set_charset('utf8'))
if (defined('QA_USE_UTF8MB4') && QA_USE_UTF8MB4)
$collation = 'utf8mb4';
else
$collation = 'utf8';

if (!$db->set_charset($collation))
qa_db_fail_error('set_charset', $db->errno, $db->error);

qa_report_process_stage('db_connected');
Expand Down
3 changes: 3 additions & 0 deletions qa-include/util/string.php
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,9 @@ function qa_shorten_string_line($string, $length, $ellipsis = ' ... ')
*/
function qa_remove_utf8mb4($string)
{
if (defined('QA_USE_UTF8MB4') && QA_USE_UTF8MB4)
return $string;

return preg_replace('%(?:
\xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
| [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
Expand Down