Skip to content

Commit

Permalink
Merge pull request zonemaster#1132 from blacksponge/domain-normalization
Browse files Browse the repository at this point in the history
use name normalization method
  • Loading branch information
hannaeko authored Dec 13, 2023
2 parents ca6590b + 2359703 commit d5b4e57
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 45 deletions.
10 changes: 7 additions & 3 deletions lib/Zonemaster/Backend/DB.pm
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use POSIX qw( strftime );
use Readonly;
use Try::Tiny;

use Zonemaster::Engine::Normalization;
use Zonemaster::Backend::Errors;
use Zonemaster::Engine::Logger::Entry;

Expand Down Expand Up @@ -874,10 +875,13 @@ sub process_dead_test {
sub _normalize_domain {
my ( $domain ) = @_;

$domain = lc( $domain );
$domain =~ s/\.$// unless $domain eq '.';
my ( $errors, $normalized_domain ) = normalize_name( $domain );

return $domain;
if ( scalar( @{$errors} ) ) {
die Zonemaster::Backend::Error::Internal->new( reason => "Normalizing domain returned errors.", data => [ map { $_->string } @{$errors} ] );
}

return $normalized_domain;
}

sub _project_params {
Expand Down
10 changes: 4 additions & 6 deletions lib/Zonemaster/Backend/RPCAPI.pm
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use Encode;

# Zonemaster Modules
use Zonemaster::Engine;
use Zonemaster::Engine::Normalization;
use Zonemaster::Engine::Profile;
use Zonemaster::Engine::Recursor;
use Zonemaster::Backend;
Expand Down Expand Up @@ -223,16 +224,17 @@ sub get_data_from_parent_zone {
my $result = eval {
my %result;
my $domain = $params->{domain};
my ( $_errors, $normalized_domain ) = normalize_name( $domain );

my @ns_list;
my @ns_names;

my $zone = Zonemaster::Engine->zone( $domain );
my $zone = Zonemaster::Engine->zone( $normalized_domain );
push @ns_list, { ns => $_->name->string, ip => $_->address->short} for @{$zone->glue};

my @ds_list;

$zone = Zonemaster::Engine->zone($domain);
$zone = Zonemaster::Engine->zone($normalized_domain);
my $ds_p = $zone->parent->query_one( $zone->name, 'DS', { dnssec => 1, cd => 1, recurse => 1 } );
if ($ds_p) {
my @ds = $ds_p->get_records( 'DS', 'answer' );
Expand Down Expand Up @@ -291,10 +293,6 @@ sub start_domain_test {

my $result = 0;
eval {
$params->{domain} =~ s/^\.// unless ( !$params->{domain} || $params->{domain} eq '.' );

die "No domain in parameters\n" unless ( defined $params->{domain} && length($params->{domain}) );

$params->{profile} //= "default";
$params->{priority} //= 10;
$params->{queue} //= 0;
Expand Down
31 changes: 5 additions & 26 deletions lib/Zonemaster/Backend/Validator.pm
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use Readonly;
use Locale::TextDomain qw[Zonemaster-Backend];
use Net::IP::XS;
use Zonemaster::Engine::Logger::Entry;
use Zonemaster::Engine::Normalization;
use Zonemaster::LDNS;

our @EXPORT_OK = qw(
Expand Down Expand Up @@ -281,35 +282,13 @@ sub check_domain {
return N__ 'Domain name required';
}

if ( $domain =~ m/[^[:ascii:]]+/ ) {
if ( Zonemaster::LDNS::has_idn() ) {
eval { $domain = Zonemaster::LDNS::to_idn( $domain ); };
if ( $@ ) {
return N__ 'The domain name is IDNA invalid';
}
}
else {
return N__ 'The domain name contains non-ascii characters and IDNA support is not installed';
}
}

if ( $domain !~ m{^[a-z0-9_./-]+$}i ) {
return N__ 'The domain name character(s) are not supported';
}

if ( $domain =~ m/\.\./i ) {
return N__ 'The domain name contains consecutive dots';
}
my ( $errors, $_domain ) = normalize_name( $domain );

my %levels = Zonemaster::Engine::Logger::Entry::levels();
my @res;
@res = Zonemaster::Engine::Test::Basic->basic00( $domain );
@res = grep { $_->numeric_level >= $levels{ERROR} } @res;
if ( @res != 0 ) {
return N__ 'The domain name or label is too long';
if ( @{$errors} ) {
return $errors->[0]->message;
}

return undef;
return undef
}

=head2 check_language_tag($value, %locales)
Expand Down
71 changes: 64 additions & 7 deletions share/patch/patch_db_zonemaster_backend_ver_11.0.3.pl
Original file line number Diff line number Diff line change
Expand Up @@ -15,23 +15,25 @@
);

my $db_engine = $config->DB_engine;
print "engine: $db_engine\n";
print "Configured database engine: $db_engine\n";

if ( $db_engine =~ /^(MySQL|PostgreSQL|SQLite)$/ ) {
print( "Starting database migration\n" );
$patch{ lc $db_engine }();
print( "\nMigration done\n" );
}
else {
die "Unknown database engine configured: $db_engine\n";
}

sub _update_data {
sub _update_data_result_entries {
my ( $dbh ) = @_;

my $json = JSON::PP->new->allow_blessed->convert_blessed->canonical;

# update only jobs with results
my ( $row_total ) = $dbh->selectrow_array( 'SELECT count(*) FROM test_results WHERE results IS NOT NULL' );
print "count: $row_total\n";
print "Will update $row_total rows\n";

my %levels = Zonemaster::Engine::Logger::Entry->levels();

Expand All @@ -40,7 +42,7 @@ sub _update_data {
my $row_count = 50000;
my $row_done = 0;
while ( $row_done < $row_total ) {
print "row_done/row_total: $row_done / $row_total\n";
print "Progress update: $row_done / $row_total\n";
my $row_updated = 0;
my $sth1 = $dbh->prepare( 'SELECT hash_id, results FROM test_results WHERE results IS NOT NULL ORDER BY id ASC LIMIT ?,?' );
$sth1->execute( $row_done, $row_count );
Expand Down Expand Up @@ -77,7 +79,48 @@ sub _update_data {
# increase by min(row_updated, row_count)
$row_done += ( $row_updated < $row_count ) ? $row_updated : $row_count;
}
print "row_done/row_total: $row_done / $row_total\n";
print "Progress update: $row_done / $row_total\n";
}

sub _update_data_nomalize_domains {
my ( $db ) = @_;

my ( $row_total ) = $db->dbh->selectrow_array( 'SELECT count(*) FROM test_results' );
print "Will update $row_total rows\n";


my $sth1 = $db->dbh->prepare( 'SELECT hash_id, params FROM test_results' );
$sth1->execute;

my $row_done = 0;
my $progress = 0;

while ( my $row = $sth1->fetchrow_hashref ) {
my $hash_id = $row->{hash_id};
eval {
my $raw_params = decode_json($row->{params});
my $domain = $raw_params->{domain};

# This has never been cleaned
delete $raw_params->{user_ip};

my $params = $db->encode_params( $raw_params );
my $fingerprint = $db->generate_fingerprint( $raw_params );

$domain = Zonemaster::Backend::DB::_normalize_domain( $domain );

$db->dbh->do('UPDATE test_results SET domain = ?, params = ?, fingerprint = ? where hash_id = ?', undef, $domain, $params, $fingerprint, $hash_id);
};
if ($@) {
warn "Caught error while updating record with hash id $hash_id, ignoring: $@\n";
}
$row_done += 1;
my $new_progress = int(($row_done / $row_total) * 100);
if ( $new_progress != $progress ) {
$progress = $new_progress;
print("$progress%\n");
}
}
}

sub patch_db_mysql {
Expand All @@ -91,7 +134,11 @@ sub patch_db_mysql {
try {
$db->create_schema();

_update_data( $dbh );
print( "\n-> (1/2) Populating new result_entries table\n" );
_update_data_result_entries( $dbh );

print( "\n-> (2/2) Normalizing domain names\n" );
_update_data_nomalize_domains( $db );

$dbh->commit();
} catch {
Expand All @@ -112,6 +159,8 @@ sub patch_db_postgresql {
try {
$db->create_schema();

print( "\n-> (1/2) Populating new result_entries table\n" );

$dbh->do(q[
INSERT INTO result_entries (
hash_id, args, module, level, tag, timestamp, testcase
Expand Down Expand Up @@ -139,6 +188,10 @@ sub patch_db_postgresql {
'UPDATE test_results SET results = NULL WHERE results IS NOT NULL'
);


print( "\n-> (2/2) Normalizing domain names\n" );
_update_data_nomalize_domains( $db );

$dbh->commit();
} catch {
print( "Could not upgrade database: " . $_ );
Expand All @@ -158,7 +211,11 @@ sub patch_db_sqlite {
try {
$db->create_schema();

_update_data( $dbh );
print( "\n-> (1/2) Populating new result_entries table\n" );
_update_data_result_entries( $dbh );

print( "\n-> (2/2) Normalizing domain names\n" );
_update_data_nomalize_domains( $db );

$dbh->commit();
} catch {
Expand Down
4 changes: 2 additions & 2 deletions t/db.t
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,8 @@ subtest 'encoding and fingerprint' => sub {
};

subtest 'IDN domain' => sub {
my $expected_encoded_params = encode_utf8( '{"domain":"café.example","ds_info":[],"ipv4":true,"ipv6":true,"nameservers":[],"profile":"default"}' );
my $expected_fingerprint = '8c64f7feaa3f13b77e769720991f2a79';
my $expected_encoded_params = encode_utf8( '{"domain":"xn--caf-dma.example","ds_info":[],"ipv4":true,"ipv6":true,"nameservers":[],"profile":"default"}' );
my $expected_fingerprint = '8cb027ff2c175f48aed2623abad0cdd2';

my %params = ( domain => "café.example" );
$params{ipv4} = JSON::PP->true;
Expand Down
2 changes: 1 addition & 1 deletion t/parameters_validation.t
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ subtest 'Test custom formats' => sub {
},
output => [{
path => '/my_domain',
message => 'The domain name character(s) are not supported'
message => 'Domain name has an ASCII label ("not a domain") with a character not permitted.'
}]
},
{
Expand Down

0 comments on commit d5b4e57

Please sign in to comment.