diff --git a/classes/ETL/DbModel/Table.php b/classes/ETL/DbModel/Table.php index 4329f22073..51b4c37617 100644 --- a/classes/ETL/DbModel/Table.php +++ b/classes/ETL/DbModel/Table.php @@ -48,10 +48,10 @@ class Table extends SchemaEntity implements iEntity, iDiscoverableEntity, iAlter 'engine' => null, // Optional table default character set - 'charset' => null, + 'charset' => 'utf8', // Optional table collation - 'collation' => null, + 'collation' => 'utf8_unicode_ci', // Associative array where the keys are column names and the values are Column objects 'columns' => array(), diff --git a/classes/ETL/Ingestor/pdoIngestor.php b/classes/ETL/Ingestor/pdoIngestor.php index 68e53875d4..40289b66fc 100644 --- a/classes/ETL/Ingestor/pdoIngestor.php +++ b/classes/ETL/Ingestor/pdoIngestor.php @@ -615,6 +615,7 @@ private function multiDatabaseIngest() if ( $this->options->force_load_data_infile_replace_into ) { $loadStatement = "LOAD DATA LOCAL INFILE '$infileName' replace into table $qualifiedDestTableName " + . "CHARACTER SET utf8 " . "FIELDS TERMINATED BY " . sprintf("0x%02x", ord($this->fieldSeparator)) . " OPTIONALLY ENCLOSED BY " . sprintf("0x%02x", ord($this->stringEnclosure)) . " ESCAPED BY " . sprintf("0x%02x", ord($this->escapeChar)) @@ -639,6 +640,7 @@ function ($s) { $loadStatement = "CREATE TABLE $tmpTable LIKE $qualifiedDestTableName; " . "ALTER TABLE $tmpTable DISABLE KEYS; " . "LOAD DATA LOCAL INFILE '$infileName' INTO TABLE $tmpTable " + . "CHARACTER SET utf8 " . "FIELDS TERMINATED BY " . sprintf("0x%02x", ord($this->fieldSeparator)) . " OPTIONALLY ENCLOSED BY " . sprintf("0x%02x", ord($this->stringEnclosure)) . " ESCAPED BY " . sprintf("0x%02x", ord($this->escapeChar)) diff --git a/classes/OpenXdmod/Shredder/Slurm.php b/classes/OpenXdmod/Shredder/Slurm.php index 51db30e47d..2572b1c3ed 100644 --- a/classes/OpenXdmod/Shredder/Slurm.php +++ b/classes/OpenXdmod/Shredder/Slurm.php @@ -295,9 +295,6 @@ public function shredLine($line) return; } - // Convert job name encoding. - $job['job_name'] = mb_convert_encoding($job['job_name'], 'ISO-8859-1', 'UTF-8'); - // Convert datetime strings into unix timestamps. $dateKeys = array( 'submit_time', diff --git a/configuration/etl/etl_tables.d/logger/log_level.json b/configuration/etl/etl_tables.d/logger/log_level.json index 83fab725bd..b4ec3fab3e 100644 --- a/configuration/etl/etl_tables.d/logger/log_level.json +++ b/configuration/etl/etl_tables.d/logger/log_level.json @@ -3,8 +3,8 @@ "schema": "mod_logger", "name": "log_level", "engine": "InnoDB", - "charset": "latin1", - "collation": "latin1_swedish_ci", + "charset": "utf8", + "collation": "utf8_unicode_ci", "columns": [ { "name": "log_level_id", diff --git a/configuration/etl/etl_tables.d/logger/log_table.json b/configuration/etl/etl_tables.d/logger/log_table.json index 78cfaa09cf..1d4ee648c6 100644 --- a/configuration/etl/etl_tables.d/logger/log_table.json +++ b/configuration/etl/etl_tables.d/logger/log_table.json @@ -3,8 +3,8 @@ "schema": "mod_logger", "name": "log_table", "engine": "InnoDB", - "charset": "latin1", - "collation": "latin1_swedish_ci", + "charset": "utf8", + "collation": "utf8_unicode_ci", "columns": [ { "name": "id", diff --git a/tests/artifacts/xdmod/etlv2/configuration/input/etl_tables_8.0.0.d/db_model_test_2.json b/tests/artifacts/xdmod/etlv2/configuration/input/etl_tables_8.0.0.d/db_model_test_2.json index f4bfd9f75e..6f5a0d8539 100644 --- a/tests/artifacts/xdmod/etlv2/configuration/input/etl_tables_8.0.0.d/db_model_test_2.json +++ b/tests/artifacts/xdmod/etlv2/configuration/input/etl_tables_8.0.0.d/db_model_test_2.json @@ -3,16 +3,22 @@ "table_definition": { "name": "modify_table_test", "engine": "MyISAM", + "charset": "latin1", + "collation": "latin1_swedish_ci", "columns": [ { "#": "New column at start of list", "name": "new_column_1", "type": "varchar(40)", + "charset": "latin1", + "collation": "latin1_swedish_ci", "nullable": true }, { "name": "resource", "type": "varchar(40)", + "charset": "latin1", + "collation": "latin1_swedish_ci", "nullable": true }, { diff --git a/tests/artifacts/xdmod/etlv2/configuration/input/etl_tables_8.0.0.d/db_model_test_3.json b/tests/artifacts/xdmod/etlv2/configuration/input/etl_tables_8.0.0.d/db_model_test_3.json index d23db2f8aa..56442a64e4 100644 --- a/tests/artifacts/xdmod/etlv2/configuration/input/etl_tables_8.0.0.d/db_model_test_3.json +++ b/tests/artifacts/xdmod/etlv2/configuration/input/etl_tables_8.0.0.d/db_model_test_3.json @@ -3,16 +3,22 @@ "table_definition": { "name": "modify_table_test", "engine": "MyISAM", + "charset": "latin1", + "collation": "latin1_swedish_ci", "columns": [ { "name": "resource", "type": "varchar(40)", + "charset": "latin1", + "collation": "latin1_swedish_ci", "nullable": true }, { "#": "This was the 1st column", "name": "new_column_1", "type": "varchar(40)", + "charset": "latin1", + "collation": "latin1_swedish_ci", "nullable": true }, { diff --git a/tests/artifacts/xdmod/etlv2/configuration/input/etl_tables_8.0.0.d/db_model_test_4.json b/tests/artifacts/xdmod/etlv2/configuration/input/etl_tables_8.0.0.d/db_model_test_4.json index 1f1c2ad20a..d1880e4c10 100644 --- a/tests/artifacts/xdmod/etlv2/configuration/input/etl_tables_8.0.0.d/db_model_test_4.json +++ b/tests/artifacts/xdmod/etlv2/configuration/input/etl_tables_8.0.0.d/db_model_test_4.json @@ -3,15 +3,21 @@ "table_definition": { "name": "modify_table_test", "engine": "MyISAM", + "charset": "latin1", + "collation": "latin1_swedish_ci", "columns": [ { "name": "resource", "type": "varchar(40)", + "charset": "latin1", + "collation": "latin1_swedish_ci", "nullable": true }, { "name": "new_column_1", "type": "varchar(40)", + "charset": "latin1", + "collation": "latin1_swedish_ci", "nullable": true }, { diff --git a/tests/artifacts/xdmod/etlv2/configuration/output/create_table.json b/tests/artifacts/xdmod/etlv2/configuration/output/create_table.json index d81e605cd2..838efd0e8c 100644 --- a/tests/artifacts/xdmod/etlv2/configuration/output/create_table.json +++ b/tests/artifacts/xdmod/etlv2/configuration/output/create_table.json @@ -1,13 +1,13 @@ { "comment": "", "engine": "myisam", - "charset": "latin1", - "collation": "latin1_swedish_ci", + "charset": "utf8", + "collation": "utf8_unicode_ci", "columns": [ { "type": "varchar(40)", - "charset": "latin1", - "collation": "latin1_swedish_ci", + "charset": "utf8", + "collation": "utf8_unicode_ci", "nullable": true, "default": null, "extra": null, diff --git a/tests/artifacts/xdmod/etlv2/dbmodel/output/resourceallocationfact_by.aggregation.sql b/tests/artifacts/xdmod/etlv2/dbmodel/output/resourceallocationfact_by.aggregation.sql index 4443919313..6470983385 100644 --- a/tests/artifacts/xdmod/etlv2/dbmodel/output/resourceallocationfact_by.aggregation.sql +++ b/tests/artifacts/xdmod/etlv2/dbmodel/output/resourceallocationfact_by.aggregation.sql @@ -15,4 +15,4 @@ CREATE TABLE IF NOT EXISTS `resourceallocationfact_by_quarter` ( INDEX `index_year` (`year`), INDEX `index_organization_id` (`organization_id`), INDEX `index_resource_id` (`resource_id`) -) ENGINE = myisam COMMENT = 'Resource Allocation facts aggregated by quarter.'; +) ENGINE = myisam CHARSET = utf8 COLLATE = utf8_unicode_ci COMMENT = 'Resource Allocation facts aggregated by quarter.'; diff --git a/tests/artifacts/xdmod/unit/etl/db-model/foreign-key-constraint/create-table.json b/tests/artifacts/xdmod/unit/etl/db-model/foreign-key-constraint/create-table.json index 6ff9a5222b..eb73dfbcd0 100644 --- a/tests/artifacts/xdmod/unit/etl/db-model/foreign-key-constraint/create-table.json +++ b/tests/artifacts/xdmod/unit/etl/db-model/foreign-key-constraint/create-table.json @@ -46,7 +46,7 @@ } }, [ - "CREATE TABLE IF NOT EXISTS `test` (\n `id` int(11) NOT NULL auto_increment,\n `other_id` int(11) NOT NULL,\n PRIMARY KEY (`id`),\n INDEX `idx_other_id` (`other_id`),\n CONSTRAINT `fk_other` FOREIGN KEY (`other_id`) REFERENCES `other` (`id`)\n) ENGINE = innodb;" + "CREATE TABLE IF NOT EXISTS `test` (\n `id` int(11) NOT NULL auto_increment,\n `other_id` int(11) NOT NULL,\n PRIMARY KEY (`id`),\n INDEX `idx_other_id` (`other_id`),\n CONSTRAINT `fk_other` FOREIGN KEY (`other_id`) REFERENCES `other` (`id`)\n) ENGINE = innodb CHARSET = utf8 COLLATE = utf8_unicode_ci;" ] ], "Table with complex foreign key constraint": [ @@ -99,7 +99,7 @@ } }, [ - "CREATE TABLE IF NOT EXISTS `test` (\n `id` int(11) NOT NULL auto_increment,\n `other_id` int(11) NOT NULL,\n PRIMARY KEY (`id`),\n INDEX `idx_other_id` (`other_id`),\n CONSTRAINT `fk_other` FOREIGN KEY (`other_id`) REFERENCES `mod_other`.`other` (`id`) ON DELETE SET NULL ON UPDATE CASCADE\n) ENGINE = innodb;" + "CREATE TABLE IF NOT EXISTS `test` (\n `id` int(11) NOT NULL auto_increment,\n `other_id` int(11) NOT NULL,\n PRIMARY KEY (`id`),\n INDEX `idx_other_id` (`other_id`),\n CONSTRAINT `fk_other` FOREIGN KEY (`other_id`) REFERENCES `mod_other`.`other` (`id`) ON DELETE SET NULL ON UPDATE CASCADE\n) ENGINE = innodb CHARSET = utf8 COLLATE = utf8_unicode_ci;" ] ] } diff --git a/tests/unit/lib/ETL/DbModel/DbModelTest.php b/tests/unit/lib/ETL/DbModel/DbModelTest.php index bf4443f1a0..7177fff738 100644 --- a/tests/unit/lib/ETL/DbModel/DbModelTest.php +++ b/tests/unit/lib/ETL/DbModel/DbModelTest.php @@ -108,7 +108,7 @@ public function testTableSchema() $expected = "CREATE TABLE IF NOT EXISTS `table_no_schema` ( `column1` int(11) NULL DEFAULT 0 COMMENT 'This is my comment', `column2` varchar(16) CHARSET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT 'Test Column' COMMENT 'No comment' -);"; +) CHARSET = utf8 COLLATE = utf8_unicode_ci;"; $this->assertEquals($expected, $generated); // SQL with schema @@ -117,7 +117,7 @@ public function testTableSchema() $expected = "CREATE TABLE IF NOT EXISTS `my_schema`.`table_no_schema` ( `column1` int(11) NULL DEFAULT 0 COMMENT 'This is my comment', `column2` varchar(16) CHARSET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT 'Test Column' COMMENT 'No comment' -);"; +) CHARSET = utf8 COLLATE = utf8_unicode_ci;"; $this->assertEquals($expected, $generated); } diff --git a/tests/unit/lib/OpenXdmod/Tests/Shredder/SlurmShredderTest.php b/tests/unit/lib/OpenXdmod/Tests/Shredder/SlurmShredderTest.php index a6b84098ab..1c8cd0db15 100644 --- a/tests/unit/lib/OpenXdmod/Tests/Shredder/SlurmShredderTest.php +++ b/tests/unit/lib/OpenXdmod/Tests/Shredder/SlurmShredderTest.php @@ -176,8 +176,6 @@ public function testUnknownJobStateHandling($line, $messages) */ public function testUtf8MultibyteCharsParsing($line, $job) { - $jobName = mb_convert_encoding($job['job_name'], 'ISO-8859-1', 'UTF-8'); - $shredder = $this ->getMockBuilder('\OpenXdmod\Shredder\Slurm') ->setConstructorArgs([$this->db]) @@ -186,7 +184,7 @@ public function testUtf8MultibyteCharsParsing($line, $job) $shredder ->expects($this->once()) ->method('insertRow') - ->with(new ArraySubset(['job_name' => $jobName])); + ->with(new ArraySubset(['job_name' => $job['job_name']])); $shredder->setLogger($this->logger); $shredder->shredLine($line);