From b380c85a15462e38e5f6e32539b878f78b4dca19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABl=20Berthaud-M=C3=BCller?= Date: Tue, 24 Oct 2023 15:10:02 +0200 Subject: [PATCH 1/8] use name normalization method --- lib/Zonemaster/Backend/DB.pm | 7 ++++--- lib/Zonemaster/Backend/RPCAPI.pm | 10 ++++------ lib/Zonemaster/Backend/Validator.pm | 31 +++++------------------------ 3 files changed, 13 insertions(+), 35 deletions(-) diff --git a/lib/Zonemaster/Backend/DB.pm b/lib/Zonemaster/Backend/DB.pm index c88cce8a7..8ef7c819e 100644 --- a/lib/Zonemaster/Backend/DB.pm +++ b/lib/Zonemaster/Backend/DB.pm @@ -16,6 +16,7 @@ use POSIX qw( strftime ); use Readonly; use Try::Tiny; +use Zonemaster::Engine::Normalization; use Zonemaster::Backend::Errors; use Zonemaster::Engine::Logger::Entry; @@ -874,10 +875,10 @@ sub process_dead_test { sub _normalize_domain { my ( $domain ) = @_; - $domain = lc( $domain ); - $domain =~ s/\.$// unless $domain eq '.'; + # Discard errors, at this point errors have already checked + my ( $_errors, $normalized_domain ) = normalize_name( $domain ); - return $domain; + return $normalized_domain; } sub _project_params { diff --git a/lib/Zonemaster/Backend/RPCAPI.pm b/lib/Zonemaster/Backend/RPCAPI.pm index 87290a12b..48d98830e 100644 --- a/lib/Zonemaster/Backend/RPCAPI.pm +++ b/lib/Zonemaster/Backend/RPCAPI.pm @@ -22,6 +22,7 @@ use Encode; # Zonemaster Modules use Zonemaster::Engine; +use Zonemaster::Engine::Normalization; use Zonemaster::Engine::Profile; use Zonemaster::Engine::Recursor; use Zonemaster::Backend; @@ -223,16 +224,17 @@ sub get_data_from_parent_zone { my $result = eval { my %result; my $domain = $params->{domain}; + my ( $_errors, $normalized_domain ) = normalize_name( $domain ); my @ns_list; my @ns_names; - my $zone = Zonemaster::Engine->zone( $domain ); + my $zone = Zonemaster::Engine->zone( $normalized_domain ); push @ns_list, { ns => $_->name->string, ip => $_->address->short} for @{$zone->glue}; my @ds_list; - $zone = Zonemaster::Engine->zone($domain); + $zone = Zonemaster::Engine->zone($normalized_domain); my $ds_p = $zone->parent->query_one( $zone->name, 'DS', { dnssec => 1, cd => 1, recurse => 1 } ); if ($ds_p) { my @ds = $ds_p->get_records( 'DS', 'answer' ); @@ -291,10 +293,6 @@ sub start_domain_test { my $result = 0; eval { - $params->{domain} =~ s/^\.// unless ( !$params->{domain} || $params->{domain} eq '.' ); - - die "No domain in parameters\n" unless ( defined $params->{domain} && length($params->{domain}) ); - $params->{profile} //= "default"; $params->{priority} //= 10; $params->{queue} //= 0; diff --git a/lib/Zonemaster/Backend/Validator.pm b/lib/Zonemaster/Backend/Validator.pm index 9c85d8a3f..d23bc377f 100644 --- a/lib/Zonemaster/Backend/Validator.pm +++ b/lib/Zonemaster/Backend/Validator.pm @@ -13,6 +13,7 @@ use Readonly; use Locale::TextDomain qw[Zonemaster-Backend]; use Net::IP::XS; use Zonemaster::Engine::Logger::Entry; +use Zonemaster::Engine::Normalization; use Zonemaster::LDNS; our @EXPORT_OK = qw( @@ -281,35 +282,13 @@ sub check_domain { return N__ 'Domain name required'; } - if ( $domain =~ m/[^[:ascii:]]+/ ) { - if ( Zonemaster::LDNS::has_idn() ) { - eval { $domain = Zonemaster::LDNS::to_idn( $domain ); }; - if ( $@ ) { - return N__ 'The domain name is IDNA invalid'; - } - } - else { - return N__ 'The domain name contains non-ascii characters and IDNA support is not installed'; - } - } - - if ( $domain !~ m{^[a-z0-9_./-]+$}i ) { - return N__ 'The domain name character(s) are not supported'; - } - - if ( $domain =~ m/\.\./i ) { - return N__ 'The domain name contains consecutive dots'; - } + my ( $errors, $_domain ) = normalize_name( $domain ); - my %levels = Zonemaster::Engine::Logger::Entry::levels(); - my @res; - @res = Zonemaster::Engine::Test::Basic->basic00( $domain ); - @res = grep { $_->numeric_level >= $levels{ERROR} } @res; - if ( @res != 0 ) { - return N__ 'The domain name or label is too long'; + if ( @{$errors} != 0 ) { + return @{$errors}[0]->message; } - return undef; + return undef } =head2 check_language_tag($value, %locales) From 8b28668f0cadbf82c8074e9f8ad0b78ae8c13672 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABl=20Berthaud-M=C3=BCller?= Date: Mon, 6 Nov 2023 10:22:35 +0100 Subject: [PATCH 2/8] fix typo in comment --- lib/Zonemaster/Backend/DB.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Zonemaster/Backend/DB.pm b/lib/Zonemaster/Backend/DB.pm index 8ef7c819e..3aa5e9fa1 100644 --- a/lib/Zonemaster/Backend/DB.pm +++ b/lib/Zonemaster/Backend/DB.pm @@ -875,7 +875,7 @@ sub process_dead_test { sub _normalize_domain { my ( $domain ) = @_; - # Discard errors, at this point errors have already checked + # Discard errors, at this point errors have already been checked my ( $_errors, $normalized_domain ) = normalize_name( $domain ); return $normalized_domain; From a8ab03d7554eec270593989208c84a56ae1263e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABl=20Berthaud-M=C3=BCller?= Date: Mon, 6 Nov 2023 10:27:59 +0100 Subject: [PATCH 3/8] use suggestion --- lib/Zonemaster/Backend/Validator.pm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Zonemaster/Backend/Validator.pm b/lib/Zonemaster/Backend/Validator.pm index d23bc377f..9a8dca524 100644 --- a/lib/Zonemaster/Backend/Validator.pm +++ b/lib/Zonemaster/Backend/Validator.pm @@ -284,8 +284,8 @@ sub check_domain { my ( $errors, $_domain ) = normalize_name( $domain ); - if ( @{$errors} != 0 ) { - return @{$errors}[0]->message; + if ( @{$errors} ) { + return $errors->[0]->message; } return undef From 192a77d9f704f7aa862d22d58ca3cf905049bec2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABl=20Berthaud-M=C3=BCller?= Date: Mon, 6 Nov 2023 10:28:33 +0100 Subject: [PATCH 4/8] fix unit test --- t/db.t | 4 ++-- t/parameters_validation.t | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/t/db.t b/t/db.t index 6d85faed8..b9ea2cd36 100644 --- a/t/db.t +++ b/t/db.t @@ -170,8 +170,8 @@ subtest 'encoding and fingerprint' => sub { }; subtest 'IDN domain' => sub { - my $expected_encoded_params = encode_utf8( '{"domain":"café.example","ds_info":[],"ipv4":true,"ipv6":true,"nameservers":[],"profile":"default"}' ); - my $expected_fingerprint = '8c64f7feaa3f13b77e769720991f2a79'; + my $expected_encoded_params = encode_utf8( '{"domain":"xn--caf-dma.example","ds_info":[],"ipv4":true,"ipv6":true,"nameservers":[],"profile":"default"}' ); + my $expected_fingerprint = '8cb027ff2c175f48aed2623abad0cdd2'; my %params = ( domain => "café.example" ); $params{ipv4} = JSON::PP->true; diff --git a/t/parameters_validation.t b/t/parameters_validation.t index 550165029..f0a5331eb 100644 --- a/t/parameters_validation.t +++ b/t/parameters_validation.t @@ -218,7 +218,7 @@ subtest 'Test custom formats' => sub { }, output => [{ path => '/my_domain', - message => 'The domain name character(s) are not supported' + message => 'Domain name has an ASCII label ("not a domain") with a character not permitted.' }] }, { From 6c85dbddb2f93be59ace48f8a6a3807e26a08e0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABl=20Berthaud-M=C3=BCller?= Date: Tue, 5 Dec 2023 15:24:40 +0100 Subject: [PATCH 5/8] update database migration script --- .../patch_db_zonemaster_backend_ver_11.0.3.pl | 66 +++++++++++++++++-- 1 file changed, 59 insertions(+), 7 deletions(-) diff --git a/share/patch/patch_db_zonemaster_backend_ver_11.0.3.pl b/share/patch/patch_db_zonemaster_backend_ver_11.0.3.pl index d36419525..120808ead 100644 --- a/share/patch/patch_db_zonemaster_backend_ver_11.0.3.pl +++ b/share/patch/patch_db_zonemaster_backend_ver_11.0.3.pl @@ -15,23 +15,25 @@ ); my $db_engine = $config->DB_engine; -print "engine: $db_engine\n"; +print "Configured database engine: $db_engine\n"; if ( $db_engine =~ /^(MySQL|PostgreSQL|SQLite)$/ ) { + print( "Starting database migration\n" ); $patch{ lc $db_engine }(); + print( "\nMigration done\n" ); } else { die "Unknown database engine configured: $db_engine\n"; } -sub _update_data { +sub _update_data_result_entries { my ( $dbh ) = @_; my $json = JSON::PP->new->allow_blessed->convert_blessed->canonical; # update only jobs with results my ( $row_total ) = $dbh->selectrow_array( 'SELECT count(*) FROM test_results WHERE results IS NOT NULL' ); - print "count: $row_total\n"; + print "Will update $row_total rows\n"; my %levels = Zonemaster::Engine::Logger::Entry->levels(); @@ -40,7 +42,7 @@ sub _update_data { my $row_count = 50000; my $row_done = 0; while ( $row_done < $row_total ) { - print "row_done/row_total: $row_done / $row_total\n"; + print "Progress update: $row_done / $row_total\n"; my $row_updated = 0; my $sth1 = $dbh->prepare( 'SELECT hash_id, results FROM test_results WHERE results IS NOT NULL ORDER BY id ASC LIMIT ?,?' ); $sth1->execute( $row_done, $row_count ); @@ -77,7 +79,43 @@ sub _update_data { # increase by min(row_updated, row_count) $row_done += ( $row_updated < $row_count ) ? $row_updated : $row_count; } - print "row_done/row_total: $row_done / $row_total\n"; + print "Progress update: $row_done / $row_total\n"; +} + +sub _update_data_nomalize_domains { + my ( $db ) = @_; + + my ( $row_total ) = $db->dbh->selectrow_array( 'SELECT count(*) FROM test_results' ); + print "Will update $row_total rows\n"; + + + my $sth1 = $db->dbh->prepare( 'SELECT hash_id, params FROM test_results' ); + $sth1->execute; + + my $row_done = 0; + my $progress = 0; + + while ( my $row = $sth1->fetchrow_hashref ) { + my $hash_id = $row->{hash_id}; + my $raw_params = decode_json($row->{params}); + my $domain = $raw_params->{domain}; + + # This has never been cleaned + delete $raw_params->{user_ip}; + + my $params = $db->encode_params( $raw_params ); + my $fingerprint = $db->generate_fingerprint( $raw_params ); + + $domain = Zonemaster::Backend::DB::_normalize_domain( $domain ); + + $db->dbh->do('UPDATE test_results SET domain = ?, params = ?, fingerprint = ? where hash_id = ?', undef, $domain, $params, $fingerprint, $hash_id); + $row_done += 1; + my $new_progress = int(($row_done / $row_total) * 100); + if ( $new_progress != $progress ) { + $progress = $new_progress; + print("$progress%\n"); + } + } } sub patch_db_mysql { @@ -91,7 +129,11 @@ sub patch_db_mysql { try { $db->create_schema(); - _update_data( $dbh ); + print( "\n-> (1/2) Populating new result_entries table\n" ); + _update_data_result_entries( $dbh ); + + print( "\n-> (2/2) Normalizing domain names\n" ); + _update_data_nomalize_domains( $db ); $dbh->commit(); } catch { @@ -112,6 +154,8 @@ sub patch_db_postgresql { try { $db->create_schema(); + print( "\n-> (1/2) Populating new result_entries table\n" ); + $dbh->do(q[ INSERT INTO result_entries ( hash_id, args, module, level, tag, timestamp, testcase @@ -139,6 +183,10 @@ sub patch_db_postgresql { 'UPDATE test_results SET results = NULL WHERE results IS NOT NULL' ); + + print( "\n-> (2/2) Normalizing domain names\n" ); + _update_data_nomalize_domains( $db ); + $dbh->commit(); } catch { print( "Could not upgrade database: " . $_ ); @@ -158,7 +206,11 @@ sub patch_db_sqlite { try { $db->create_schema(); - _update_data( $dbh ); + print( "\n-> (1/2) Populating new result_entries table\n" ); + _update_data_result_entries( $dbh ); + + print( "\n-> (2/2) Normalizing domain names\n" ); + _update_data_nomalize_domains( $db ); $dbh->commit(); } catch { From a6737968509dece33f714d9e68e5124aa38f1095 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABl=20Berthaud-M=C3=BCller?= Date: Tue, 5 Dec 2023 15:39:32 +0100 Subject: [PATCH 6/8] die if errors occur in normaliz_domain method --- lib/Zonemaster/Backend/DB.pm | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/Zonemaster/Backend/DB.pm b/lib/Zonemaster/Backend/DB.pm index 3aa5e9fa1..e66acd65c 100644 --- a/lib/Zonemaster/Backend/DB.pm +++ b/lib/Zonemaster/Backend/DB.pm @@ -875,8 +875,11 @@ sub process_dead_test { sub _normalize_domain { my ( $domain ) = @_; - # Discard errors, at this point errors have already been checked - my ( $_errors, $normalized_domain ) = normalize_name( $domain ); + my ( $errors, $normalized_domain ) = normalize_name( $domain ); + + if ( scalar( @{$errors} ) ) { + die Zonemaster::Backend::Error::Internal->new( reason => "Normalizing domain returned errors.", data => $errors ); + } return $normalized_domain; } From 396bd31c42c6905363d6681bdbffc72a137caa11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABl=20Berthaud-M=C3=BCller?= Date: Fri, 8 Dec 2023 14:12:09 +0100 Subject: [PATCH 7/8] ignore error in migration script --- lib/Zonemaster/Backend/DB.pm | 2 +- .../patch_db_zonemaster_backend_ver_11.0.3.pl | 23 +++++++++++-------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/lib/Zonemaster/Backend/DB.pm b/lib/Zonemaster/Backend/DB.pm index e66acd65c..1a3e2d5fd 100644 --- a/lib/Zonemaster/Backend/DB.pm +++ b/lib/Zonemaster/Backend/DB.pm @@ -878,7 +878,7 @@ sub _normalize_domain { my ( $errors, $normalized_domain ) = normalize_name( $domain ); if ( scalar( @{$errors} ) ) { - die Zonemaster::Backend::Error::Internal->new( reason => "Normalizing domain returned errors.", data => $errors ); + die Zonemaster::Backend::Error::Internal->new( reason => "Normalizing domain returned errors.", data => [ map { $_->string } @{$errors} ] ); } return $normalized_domain; diff --git a/share/patch/patch_db_zonemaster_backend_ver_11.0.3.pl b/share/patch/patch_db_zonemaster_backend_ver_11.0.3.pl index 120808ead..0e001f440 100644 --- a/share/patch/patch_db_zonemaster_backend_ver_11.0.3.pl +++ b/share/patch/patch_db_zonemaster_backend_ver_11.0.3.pl @@ -96,19 +96,24 @@ sub _update_data_nomalize_domains { my $progress = 0; while ( my $row = $sth1->fetchrow_hashref ) { - my $hash_id = $row->{hash_id}; - my $raw_params = decode_json($row->{params}); - my $domain = $raw_params->{domain}; + eval { + my $hash_id = $row->{hash_id}; + my $raw_params = decode_json($row->{params}); + my $domain = $raw_params->{domain}; - # This has never been cleaned - delete $raw_params->{user_ip}; + # This has never been cleaned + delete $raw_params->{user_ip}; - my $params = $db->encode_params( $raw_params ); - my $fingerprint = $db->generate_fingerprint( $raw_params ); + my $params = $db->encode_params( $raw_params ); + my $fingerprint = $db->generate_fingerprint( $raw_params ); - $domain = Zonemaster::Backend::DB::_normalize_domain( $domain ); + $domain = Zonemaster::Backend::DB::_normalize_domain( $domain ); - $db->dbh->do('UPDATE test_results SET domain = ?, params = ?, fingerprint = ? where hash_id = ?', undef, $domain, $params, $fingerprint, $hash_id); + $db->dbh->do('UPDATE test_results SET domain = ?, params = ?, fingerprint = ? where hash_id = ?', undef, $domain, $params, $fingerprint, $hash_id); + }; + if ($@) { + warn "Caught error while updating record, ignoring: $@\n"; + } $row_done += 1; my $new_progress = int(($row_done / $row_total) * 100); if ( $new_progress != $progress ) { From 235970356a297009892bd0b729371c6216fe0e1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABl=20Berthaud-M=C3=BCller?= Date: Mon, 11 Dec 2023 11:46:13 +0100 Subject: [PATCH 8/8] add hash id in warning message --- share/patch/patch_db_zonemaster_backend_ver_11.0.3.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/share/patch/patch_db_zonemaster_backend_ver_11.0.3.pl b/share/patch/patch_db_zonemaster_backend_ver_11.0.3.pl index 0e001f440..6ccf28c0c 100644 --- a/share/patch/patch_db_zonemaster_backend_ver_11.0.3.pl +++ b/share/patch/patch_db_zonemaster_backend_ver_11.0.3.pl @@ -96,8 +96,8 @@ sub _update_data_nomalize_domains { my $progress = 0; while ( my $row = $sth1->fetchrow_hashref ) { + my $hash_id = $row->{hash_id}; eval { - my $hash_id = $row->{hash_id}; my $raw_params = decode_json($row->{params}); my $domain = $raw_params->{domain}; @@ -112,7 +112,7 @@ sub _update_data_nomalize_domains { $db->dbh->do('UPDATE test_results SET domain = ?, params = ?, fingerprint = ? where hash_id = ?', undef, $domain, $params, $fingerprint, $hash_id); }; if ($@) { - warn "Caught error while updating record, ignoring: $@\n"; + warn "Caught error while updating record with hash id $hash_id, ignoring: $@\n"; } $row_done += 1; my $new_progress = int(($row_done / $row_total) * 100);