Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use name normalization method #1132

Merged
merged 8 commits into from
Dec 13, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions lib/Zonemaster/Backend/DB.pm
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use POSIX qw( strftime );
use Readonly;
use Try::Tiny;

use Zonemaster::Engine::Normalization;
use Zonemaster::Backend::Errors;
use Zonemaster::Engine::Logger::Entry;

Expand Down Expand Up @@ -874,10 +875,13 @@ sub process_dead_test {
sub _normalize_domain {
my ( $domain ) = @_;

$domain = lc( $domain );
$domain =~ s/\.$// unless $domain eq '.';
my ( $errors, $normalized_domain ) = normalize_name( $domain );

return $domain;
if ( scalar( @{$errors} ) ) {
die Zonemaster::Backend::Error::Internal->new( reason => "Normalizing domain returned errors.", data => $errors );
}

return $normalized_domain;
}

sub _project_params {
Expand Down
10 changes: 4 additions & 6 deletions lib/Zonemaster/Backend/RPCAPI.pm
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use Encode;

# Zonemaster Modules
use Zonemaster::Engine;
use Zonemaster::Engine::Normalization;
use Zonemaster::Engine::Profile;
use Zonemaster::Engine::Recursor;
use Zonemaster::Backend;
Expand Down Expand Up @@ -223,16 +224,17 @@ sub get_data_from_parent_zone {
my $result = eval {
my %result;
my $domain = $params->{domain};
my ( $_errors, $normalized_domain ) = normalize_name( $domain );

my @ns_list;
my @ns_names;

my $zone = Zonemaster::Engine->zone( $domain );
my $zone = Zonemaster::Engine->zone( $normalized_domain );
push @ns_list, { ns => $_->name->string, ip => $_->address->short} for @{$zone->glue};

my @ds_list;

$zone = Zonemaster::Engine->zone($domain);
$zone = Zonemaster::Engine->zone($normalized_domain);
my $ds_p = $zone->parent->query_one( $zone->name, 'DS', { dnssec => 1, cd => 1, recurse => 1 } );
if ($ds_p) {
my @ds = $ds_p->get_records( 'DS', 'answer' );
Expand Down Expand Up @@ -291,10 +293,6 @@ sub start_domain_test {

my $result = 0;
eval {
$params->{domain} =~ s/^\.// unless ( !$params->{domain} || $params->{domain} eq '.' );

die "No domain in parameters\n" unless ( defined $params->{domain} && length($params->{domain}) );

$params->{profile} //= "default";
$params->{priority} //= 10;
$params->{queue} //= 0;
Expand Down
31 changes: 5 additions & 26 deletions lib/Zonemaster/Backend/Validator.pm
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use Readonly;
use Locale::TextDomain qw[Zonemaster-Backend];
use Net::IP::XS;
use Zonemaster::Engine::Logger::Entry;
use Zonemaster::Engine::Normalization;
use Zonemaster::LDNS;

our @EXPORT_OK = qw(
Expand Down Expand Up @@ -281,35 +282,13 @@ sub check_domain {
return N__ 'Domain name required';
}

if ( $domain =~ m/[^[:ascii:]]+/ ) {
if ( Zonemaster::LDNS::has_idn() ) {
eval { $domain = Zonemaster::LDNS::to_idn( $domain ); };
if ( $@ ) {
return N__ 'The domain name is IDNA invalid';
}
}
else {
return N__ 'The domain name contains non-ascii characters and IDNA support is not installed';
}
}

if ( $domain !~ m{^[a-z0-9_./-]+$}i ) {
return N__ 'The domain name character(s) are not supported';
}

if ( $domain =~ m/\.\./i ) {
return N__ 'The domain name contains consecutive dots';
}
my ( $errors, $_domain ) = normalize_name( $domain );

my %levels = Zonemaster::Engine::Logger::Entry::levels();
my @res;
@res = Zonemaster::Engine::Test::Basic->basic00( $domain );
@res = grep { $_->numeric_level >= $levels{ERROR} } @res;
if ( @res != 0 ) {
return N__ 'The domain name or label is too long';
if ( @{$errors} ) {
return $errors->[0]->message;
}

return undef;
return undef
}

=head2 check_language_tag($value, %locales)
Expand Down
66 changes: 59 additions & 7 deletions share/patch/patch_db_zonemaster_backend_ver_11.0.3.pl
Original file line number Diff line number Diff line change
Expand Up @@ -15,23 +15,25 @@
);

my $db_engine = $config->DB_engine;
print "engine: $db_engine\n";
print "Configured database engine: $db_engine\n";

if ( $db_engine =~ /^(MySQL|PostgreSQL|SQLite)$/ ) {
print( "Starting database migration\n" );
$patch{ lc $db_engine }();
print( "\nMigration done\n" );
}
else {
die "Unknown database engine configured: $db_engine\n";
}

sub _update_data {
sub _update_data_result_entries {
my ( $dbh ) = @_;

my $json = JSON::PP->new->allow_blessed->convert_blessed->canonical;

# update only jobs with results
my ( $row_total ) = $dbh->selectrow_array( 'SELECT count(*) FROM test_results WHERE results IS NOT NULL' );
print "count: $row_total\n";
print "Will update $row_total rows\n";

my %levels = Zonemaster::Engine::Logger::Entry->levels();

Expand All @@ -40,7 +42,7 @@ sub _update_data {
my $row_count = 50000;
my $row_done = 0;
while ( $row_done < $row_total ) {
print "row_done/row_total: $row_done / $row_total\n";
print "Progress update: $row_done / $row_total\n";
my $row_updated = 0;
my $sth1 = $dbh->prepare( 'SELECT hash_id, results FROM test_results WHERE results IS NOT NULL ORDER BY id ASC LIMIT ?,?' );
$sth1->execute( $row_done, $row_count );
Expand Down Expand Up @@ -77,7 +79,43 @@ sub _update_data {
# increase by min(row_updated, row_count)
$row_done += ( $row_updated < $row_count ) ? $row_updated : $row_count;
}
print "row_done/row_total: $row_done / $row_total\n";
print "Progress update: $row_done / $row_total\n";
}

sub _update_data_nomalize_domains {
my ( $db ) = @_;

my ( $row_total ) = $db->dbh->selectrow_array( 'SELECT count(*) FROM test_results' );
print "Will update $row_total rows\n";


my $sth1 = $db->dbh->prepare( 'SELECT hash_id, params FROM test_results' );
$sth1->execute;

my $row_done = 0;
my $progress = 0;

while ( my $row = $sth1->fetchrow_hashref ) {
my $hash_id = $row->{hash_id};
my $raw_params = decode_json($row->{params});
my $domain = $raw_params->{domain};
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're extracting domain from params and putting in its own field, but we're still keeping a copy inside params. Is this intentional? Should this be further cleaned up in the future?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The domain is still in the params, whether it could / should be removed is a good question, but outside of this PR.

Here I am taking the domain from the params instead its own field for normalization to avoid encoding issues.


# This has never been cleaned
delete $raw_params->{user_ip};

my $params = $db->encode_params( $raw_params );
my $fingerprint = $db->generate_fingerprint( $raw_params );

$domain = Zonemaster::Backend::DB::_normalize_domain( $domain );

$db->dbh->do('UPDATE test_results SET domain = ?, params = ?, fingerprint = ? where hash_id = ?', undef, $domain, $params, $fingerprint, $hash_id);
$row_done += 1;
my $new_progress = int(($row_done / $row_total) * 100);
if ( $new_progress != $progress ) {
$progress = $new_progress;
print("$progress%\n");
}
}
}

sub patch_db_mysql {
Expand All @@ -91,7 +129,11 @@ sub patch_db_mysql {
try {
$db->create_schema();

_update_data( $dbh );
print( "\n-> (1/2) Populating new result_entries table\n" );
_update_data_result_entries( $dbh );

print( "\n-> (2/2) Normalizing domain names\n" );
_update_data_nomalize_domains( $db );

$dbh->commit();
} catch {
Expand All @@ -112,6 +154,8 @@ sub patch_db_postgresql {
try {
$db->create_schema();

print( "\n-> (1/2) Populating new result_entries table\n" );

$dbh->do(q[
INSERT INTO result_entries (
hash_id, args, module, level, tag, timestamp, testcase
Expand Down Expand Up @@ -139,6 +183,10 @@ sub patch_db_postgresql {
'UPDATE test_results SET results = NULL WHERE results IS NOT NULL'
);


print( "\n-> (2/2) Normalizing domain names\n" );
_update_data_nomalize_domains( $db );

$dbh->commit();
} catch {
print( "Could not upgrade database: " . $_ );
Expand All @@ -158,7 +206,11 @@ sub patch_db_sqlite {
try {
$db->create_schema();

_update_data( $dbh );
print( "\n-> (1/2) Populating new result_entries table\n" );
_update_data_result_entries( $dbh );

print( "\n-> (2/2) Normalizing domain names\n" );
_update_data_nomalize_domains( $db );

$dbh->commit();
} catch {
Expand Down
4 changes: 2 additions & 2 deletions t/db.t
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,8 @@ subtest 'encoding and fingerprint' => sub {
};

subtest 'IDN domain' => sub {
my $expected_encoded_params = encode_utf8( '{"domain":"café.example","ds_info":[],"ipv4":true,"ipv6":true,"nameservers":[],"profile":"default"}' );
my $expected_fingerprint = '8c64f7feaa3f13b77e769720991f2a79';
my $expected_encoded_params = encode_utf8( '{"domain":"xn--caf-dma.example","ds_info":[],"ipv4":true,"ipv6":true,"nameservers":[],"profile":"default"}' );
my $expected_fingerprint = '8cb027ff2c175f48aed2623abad0cdd2';

my %params = ( domain => "café.example" );
$params{ipv4} = JSON::PP->true;
Expand Down
2 changes: 1 addition & 1 deletion t/parameters_validation.t
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ subtest 'Test custom formats' => sub {
},
output => [{
path => '/my_domain',
message => 'The domain name character(s) are not supported'
message => 'Domain name has an ASCII label ("not a domain") with a character not permitted.'
}]
},
{
Expand Down