Skip to content

Commit

Permalink
Add support for UTF8 to XDMoD
Browse files Browse the repository at this point in the history
  • Loading branch information
jpwhite4 committed Nov 27, 2024
1 parent f01fce4 commit 6085568
Show file tree
Hide file tree
Showing 13 changed files with 36 additions and 21 deletions.
4 changes: 2 additions & 2 deletions classes/ETL/DbModel/Table.php
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@ class Table extends SchemaEntity implements iEntity, iDiscoverableEntity, iAlter
'engine' => null,

// Optional table default character set
'charset' => null,
'charset' => 'utf8',

// Optional table collation
'collation' => null,
'collation' => 'utf8_unicode_ci',

// Associative array where the keys are column names and the values are Column objects
'columns' => array(),
Expand Down
2 changes: 2 additions & 0 deletions classes/ETL/Ingestor/pdoIngestor.php
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,7 @@ private function multiDatabaseIngest()

if ( $this->options->force_load_data_infile_replace_into ) {
$loadStatement = "LOAD DATA LOCAL INFILE '$infileName' replace into table $qualifiedDestTableName "
. "CHARACTER SET utf8 "
. "FIELDS TERMINATED BY " . sprintf("0x%02x", ord($this->fieldSeparator))
. " OPTIONALLY ENCLOSED BY " . sprintf("0x%02x", ord($this->stringEnclosure))
. " ESCAPED BY " . sprintf("0x%02x", ord($this->escapeChar))
Expand All @@ -639,6 +640,7 @@ function ($s) {
$loadStatement = "CREATE TABLE $tmpTable LIKE $qualifiedDestTableName; "
. "ALTER TABLE $tmpTable DISABLE KEYS; "
. "LOAD DATA LOCAL INFILE '$infileName' INTO TABLE $tmpTable "
. "CHARACTER SET utf8 "
. "FIELDS TERMINATED BY " . sprintf("0x%02x", ord($this->fieldSeparator))
. " OPTIONALLY ENCLOSED BY " . sprintf("0x%02x", ord($this->stringEnclosure))
. " ESCAPED BY " . sprintf("0x%02x", ord($this->escapeChar))
Expand Down
3 changes: 0 additions & 3 deletions classes/OpenXdmod/Shredder/Slurm.php
Original file line number Diff line number Diff line change
Expand Up @@ -295,9 +295,6 @@ public function shredLine($line)
return;
}

// Convert job name encoding.
$job['job_name'] = mb_convert_encoding($job['job_name'], 'ISO-8859-1', 'UTF-8');

// Convert datetime strings into unix timestamps.
$dateKeys = array(
'submit_time',
Expand Down
4 changes: 2 additions & 2 deletions configuration/etl/etl_tables.d/logger/log_level.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
"schema": "mod_logger",
"name": "log_level",
"engine": "InnoDB",
"charset": "latin1",
"collation": "latin1_swedish_ci",
"charset": "utf8",
"collation": "utf8_unicode_ci",
"columns": [
{
"name": "log_level_id",
Expand Down
4 changes: 2 additions & 2 deletions configuration/etl/etl_tables.d/logger/log_table.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
"schema": "mod_logger",
"name": "log_table",
"engine": "InnoDB",
"charset": "latin1",
"collation": "latin1_swedish_ci",
"charset": "utf8",
"collation": "utf8_unicode_ci",
"columns": [
{
"name": "id",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,22 @@
"table_definition": {
"name": "modify_table_test",
"engine": "MyISAM",
"charset": "latin1",
"collation": "latin1_swedish_ci",
"columns": [
{
"#": "New column at start of list",
"name": "new_column_1",
"type": "varchar(40)",
"charset": "latin1",
"collation": "latin1_swedish_ci",
"nullable": true
},
{
"name": "resource",
"type": "varchar(40)",
"charset": "latin1",
"collation": "latin1_swedish_ci",
"nullable": true
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,22 @@
"table_definition": {
"name": "modify_table_test",
"engine": "MyISAM",
"charset": "latin1",
"collation": "latin1_swedish_ci",
"columns": [
{
"name": "resource",
"type": "varchar(40)",
"charset": "latin1",
"collation": "latin1_swedish_ci",
"nullable": true
},
{
"#": "This was the 1st column",
"name": "new_column_1",
"type": "varchar(40)",
"charset": "latin1",
"collation": "latin1_swedish_ci",
"nullable": true
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,21 @@
"table_definition": {
"name": "modify_table_test",
"engine": "MyISAM",
"charset": "latin1",
"collation": "latin1_swedish_ci",
"columns": [
{
"name": "resource",
"type": "varchar(40)",
"charset": "latin1",
"collation": "latin1_swedish_ci",
"nullable": true
},
{
"name": "new_column_1",
"type": "varchar(40)",
"charset": "latin1",
"collation": "latin1_swedish_ci",
"nullable": true
},
{
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
{
"comment": "",
"engine": "myisam",
"charset": "latin1",
"collation": "latin1_swedish_ci",
"charset": "utf8",
"collation": "utf8_unicode_ci",
"columns": [
{
"type": "varchar(40)",
"charset": "latin1",
"collation": "latin1_swedish_ci",
"charset": "utf8",
"collation": "utf8_unicode_ci",
"nullable": true,
"default": null,
"extra": null,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ CREATE TABLE IF NOT EXISTS `resourceallocationfact_by_quarter` (
INDEX `index_year` (`year`),
INDEX `index_organization_id` (`organization_id`),
INDEX `index_resource_id` (`resource_id`)
) ENGINE = myisam COMMENT = 'Resource Allocation facts aggregated by quarter.';
) ENGINE = myisam CHARSET = utf8 COLLATE = utf8_unicode_ci COMMENT = 'Resource Allocation facts aggregated by quarter.';
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
}
},
[
"CREATE TABLE IF NOT EXISTS `test` (\n `id` int(11) NOT NULL auto_increment,\n `other_id` int(11) NOT NULL,\n PRIMARY KEY (`id`),\n INDEX `idx_other_id` (`other_id`),\n CONSTRAINT `fk_other` FOREIGN KEY (`other_id`) REFERENCES `other` (`id`)\n) ENGINE = innodb;"
"CREATE TABLE IF NOT EXISTS `test` (\n `id` int(11) NOT NULL auto_increment,\n `other_id` int(11) NOT NULL,\n PRIMARY KEY (`id`),\n INDEX `idx_other_id` (`other_id`),\n CONSTRAINT `fk_other` FOREIGN KEY (`other_id`) REFERENCES `other` (`id`)\n) ENGINE = innodb CHARSET = utf8 COLLATE = utf8_unicode_ci;"
]
],
"Table with complex foreign key constraint": [
Expand Down Expand Up @@ -99,7 +99,7 @@
}
},
[
"CREATE TABLE IF NOT EXISTS `test` (\n `id` int(11) NOT NULL auto_increment,\n `other_id` int(11) NOT NULL,\n PRIMARY KEY (`id`),\n INDEX `idx_other_id` (`other_id`),\n CONSTRAINT `fk_other` FOREIGN KEY (`other_id`) REFERENCES `mod_other`.`other` (`id`) ON DELETE SET NULL ON UPDATE CASCADE\n) ENGINE = innodb;"
"CREATE TABLE IF NOT EXISTS `test` (\n `id` int(11) NOT NULL auto_increment,\n `other_id` int(11) NOT NULL,\n PRIMARY KEY (`id`),\n INDEX `idx_other_id` (`other_id`),\n CONSTRAINT `fk_other` FOREIGN KEY (`other_id`) REFERENCES `mod_other`.`other` (`id`) ON DELETE SET NULL ON UPDATE CASCADE\n) ENGINE = innodb CHARSET = utf8 COLLATE = utf8_unicode_ci;"
]
]
}
4 changes: 2 additions & 2 deletions tests/unit/lib/ETL/DbModel/DbModelTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ public function testTableSchema()
$expected = "CREATE TABLE IF NOT EXISTS `table_no_schema` (
`column1` int(11) NULL DEFAULT 0 COMMENT 'This is my comment',
`column2` varchar(16) CHARSET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT 'Test Column' COMMENT 'No comment'
);";
) CHARSET = utf8 COLLATE = utf8_unicode_ci;";
$this->assertEquals($expected, $generated);

// SQL with schema
Expand All @@ -117,7 +117,7 @@ public function testTableSchema()
$expected = "CREATE TABLE IF NOT EXISTS `my_schema`.`table_no_schema` (
`column1` int(11) NULL DEFAULT 0 COMMENT 'This is my comment',
`column2` varchar(16) CHARSET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT 'Test Column' COMMENT 'No comment'
);";
) CHARSET = utf8 COLLATE = utf8_unicode_ci;";
$this->assertEquals($expected, $generated);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,6 @@ public function testUnknownJobStateHandling($line, $messages)
*/
public function testUtf8MultibyteCharsParsing($line, $job)
{
$jobName = mb_convert_encoding($job['job_name'], 'ISO-8859-1', 'UTF-8');

$shredder = $this
->getMockBuilder('\OpenXdmod\Shredder\Slurm')
->setConstructorArgs([$this->db])
Expand All @@ -186,7 +184,7 @@ public function testUtf8MultibyteCharsParsing($line, $job)
$shredder
->expects($this->once())
->method('insertRow')
->with(new ArraySubset(['job_name' => $jobName]));
->with(new ArraySubset(['job_name' => $job['job_name']]));

$shredder->setLogger($this->logger);
$shredder->shredLine($line);
Expand Down

0 comments on commit 6085568

Please sign in to comment.