From e0ef785143f0e255f2db2baecc7130318aa154de Mon Sep 17 00:00:00 2001 From: Tiago Peczenyj Date: Wed, 13 Dec 2023 20:15:17 +0100 Subject: [PATCH] Refactor bitfield & others (#19) * increase performance in 17% on TO_JSON method when it is bitfield by limit data size * small refactor on range section * continue refactor on bitfield, range section and publisher restriction * refactor offsets * add changes * refactor offset / data_size * verify if offset exists on range section Parse method * fix tidy * restrict bitfield data * tidy code --- Changes | 2 + lib/GDPR/IAB/TCFv2.pm | 82 +++++++++------------ lib/GDPR/IAB/TCFv2/BitField.pm | 28 ++----- lib/GDPR/IAB/TCFv2/BitUtils.pm | 16 ++-- lib/GDPR/IAB/TCFv2/PublisherRestrictions.pm | 68 ++++++++++++++--- lib/GDPR/IAB/TCFv2/RangeSection.pm | 46 +++++++----- t/00-load.t | 10 +-- t/01-parse.t | 2 +- 8 files changed, 141 insertions(+), 113 deletions(-) diff --git a/Changes b/Changes index 51bb70b..aa30003 100644 --- a/Changes +++ b/Changes @@ -1,3 +1,5 @@ + - refactor on Publisher Restriction parsing. + - small fixes about data and offset. - performance improvement: when we parse a range-based consent string now the Parse method is 23% faster, TO_JSON is 9% faster and check vendor consent or legitimate interest is between 122% and 137% faster than the previous version - remove GDPR::IAB::TCFv2::RangeConsent package diff --git a/lib/GDPR/IAB/TCFv2.pm b/lib/GDPR/IAB/TCFv2.pm index 67d62d9..6688fe0 100644 --- a/lib/GDPR/IAB/TCFv2.pm +++ b/lib/GDPR/IAB/TCFv2.pm @@ -120,15 +120,6 @@ sub Parse { croak 'invalid vendor list version' if $self->vendor_list_version == 0; - # TODO parse special feature opt in - - #_parse_bitfield() - - # TODO parse purpose section - # TODO parse purpose consent - - # TODO parse purpose legitimate interest - # parse vendor section # parse vendor consent @@ -330,7 +321,7 @@ sub check_publisher_restriction { my ( $self, $purpose_id, $restrict_type, $vendor ) = @_; return $self->{publisher_restrictions} - ->check_publisher_restriction( $purpose_id, $restrict_type, $vendor ); + ->contains( $purpose_id, $restrict_type, $vendor ); } sub _format_date { @@ -495,37 +486,20 @@ sub _parse_vendor_legitimate_interests { sub _parse_publisher_restrictions { my ( $self, $pub_restrict_offset ) = @_; - my ( $num_restrictions, $next_offset ) = - get_uint12( $self->{data}, $pub_restrict_offset ); - - my %restrictions; - - for ( 1 .. $num_restrictions ) { - my ( $purpose_id, $restriction_type, $vendor_restrictions ); - - ( $purpose_id, $next_offset ) = - get_uint6( $self->{data}, $next_offset ); - - ( $restriction_type, $next_offset ) = - get_uint2( $self->{data}, $next_offset ); - - ( $vendor_restrictions, $next_offset ) = $self->_parse_range_section( - ASSUMED_MAX_VENDOR_ID, - $next_offset - ); + my $data = + substr( $self->{data}, $pub_restrict_offset, ASSUMED_MAX_VENDOR_ID ); - $restrictions{$purpose_id} ||= {}; - - $restrictions{$purpose_id}->{$restriction_type} = $vendor_restrictions; - } - - my $publisher_restrictions = GDPR::IAB::TCFv2::PublisherRestrictions->new( - restrictions => \%restrictions, - ); + my ( $publisher_restrictions, $relative_next_offset ) = + GDPR::IAB::TCFv2::PublisherRestrictions->Parse( + data => $data, + data_size => length( $self->{data} ), + max_id => ASSUMED_MAX_VENDOR_ID, + options => $self->{options}, + ); $self->{publisher_restrictions} = $publisher_restrictions; - return $next_offset; + return $pub_restrict_offset + $relative_next_offset; } sub _get_core_tc_string { @@ -571,30 +545,40 @@ sub _is_vendor_consent_range_encoding { } sub _parse_range_section { - my ( $self, $max_id, $offset ) = @_; + my ( $self, $max_id, $range_section_start_offset ) = @_; + + my $data = substr( $self->{data}, $range_section_start_offset, $max_id ); my ( $range_section, $next_offset ) = GDPR::IAB::TCFv2::RangeSection->Parse( - data => $self->{data}, - offset => $offset, - max_id => $max_id, - options => $self->{options}, + data => $data, + data_size => length( $self->{data} ), + offset => 0, + max_id => $max_id, + options => $self->{options}, ); - return ( $range_section, $next_offset ); + return + wantarray + ? ( $range_section, $range_section_start_offset + $next_offset ) + : $range_section; } sub _parse_bitfield { - my ( $self, $max_id, $offset ) = @_; + my ( $self, $max_id, $bitfield_start_offset ) = @_; + + my $data = substr( $self->{data}, $bitfield_start_offset, $max_id ); my ( $bitfield, $next_offset ) = GDPR::IAB::TCFv2::BitField->Parse( - data => $self->{data}, - offset => $offset, - max_id => $max_id, - options => $self->{options}, + data => $data, + data_size => length( $self->{data} ), + max_id => $max_id, + options => $self->{options}, ); - return ( $bitfield, $next_offset ); + return wantarray + ? ( $bitfield, $bitfield_start_offset + $next_offset ) + : $bitfield; } sub looksLikeIsConsentVersion2 { diff --git a/lib/GDPR/IAB/TCFv2/BitField.pm b/lib/GDPR/IAB/TCFv2/BitField.pm index 25912c4..48a2f00 100644 --- a/lib/GDPR/IAB/TCFv2/BitField.pm +++ b/lib/GDPR/IAB/TCFv2/BitField.pm @@ -10,32 +10,28 @@ use Carp qw; sub Parse { my ( $klass, %args ) = @_; - croak "missing 'data'" unless defined $args{data}; - croak "missing 'offset'" unless defined $args{offset}; + croak "missing 'data'" unless defined $args{data}; croak "missing 'max_id'" unless defined $args{max_id}; croak "missing 'options'" unless defined $args{options}; croak "missing 'options.json'" unless defined $args{options}->{json}; - my $data = $args{data}; - my $offset = $args{offset}; - my $max_id = $args{max_id}; - my $options = $args{options}; - - my $data_size = length($data); + my $data = $args{data}; + my $data_size = $args{data_size}; + my $offset = 0; + my $max_id = $args{max_id}; + my $options = $args{options}; # add 7 to force rounding to next integer value - my $bytes_required = ( $max_id + $offset + 7 ) / 8; + my $bytes_required = ( $max_id + 7 ) / 8; croak "a BitField for $max_id requires a consent string of $bytes_required bytes. This consent string had $data_size" if $data_size < $bytes_required; my $self = { - - # TODO consider store data as arrayref of bits - data => substr( $data, $offset ), + data => substr( $data, $offset, $max_id ), max_id => $max_id, options => $options, }; @@ -56,14 +52,6 @@ sub contains { return is_set( $self->{data}, $id - 1 ); } -sub all { - my $self = shift; - - my @data = split //, $self->{data}; - - return [ grep { $data[ $_ - 1 ] } 1 .. $self->{max_id} ]; -} - sub TO_JSON { my $self = shift; diff --git a/lib/GDPR/IAB/TCFv2/BitUtils.pm b/lib/GDPR/IAB/TCFv2/BitUtils.pm index a010ad5..7231b7e 100644 --- a/lib/GDPR/IAB/TCFv2/BitUtils.pm +++ b/lib/GDPR/IAB/TCFv2/BitUtils.pm @@ -32,8 +32,11 @@ our @EXPORT_OK = qw length($data); + my $data_size = length($data); + + croak + "index out of bounds on offset $offset: can't read 1, only has: $data_size" + if $offset + 1 > $data_size; my $r = substr( $data, $offset, 1 ) == 1; @@ -139,8 +142,6 @@ sub get_uint36 { sub _get_bits_with_padding { my ( $data, $bits, $offset, $nbits ) = @_; - # TODO check if offset is in range of $data ? - my ( $data_with_padding, $next_offset ) = _add_padding( $data, $bits, $offset, $nbits ); @@ -152,8 +153,11 @@ sub _get_bits_with_padding { sub _add_padding { my ( $data, $bits, $offset, $nbits ) = @_; - croak "index out of bounds on offset $offset" - if $offset + $nbits > length($data); + my $data_size = length($data); + + croak + "index out of bounds on offset $offset: can't read $nbits, only has: $data_size" + if $offset + $nbits > $data_size; my $padding = "0" x ( $bits - $nbits ); diff --git a/lib/GDPR/IAB/TCFv2/PublisherRestrictions.pm b/lib/GDPR/IAB/TCFv2/PublisherRestrictions.pm index be3f6cb..91697fc 100644 --- a/lib/GDPR/IAB/TCFv2/PublisherRestrictions.pm +++ b/lib/GDPR/IAB/TCFv2/PublisherRestrictions.pm @@ -4,22 +4,67 @@ use warnings; use Carp qw; -sub new { +use GDPR::IAB::TCFv2::BitUtils qw; + +sub Parse { my ( $klass, %args ) = @_; - my $restrictions = $args{restrictions} - or croak "missing field 'restrictions'"; + croak "missing 'data'" unless defined $args{data}; + croak "missing 'data_size'" unless defined $args{data_size}; + croak "missing 'max_id'" + unless defined $args{max_id}; + + croak "missing 'options'" unless defined $args{options}; + croak "missing 'options.json'" unless defined $args{options}->{json}; + + my $data = $args{data}; + my $data_size = $args{data_size}; + my $offset = 0; + my $max_id = $args{max_id}; + my $options = $args{options}; + + my ( $num_restrictions, $next_offset ) = get_uint12( $data, $offset ); + + my %restrictions; + + for ( 1 .. $num_restrictions ) { + my ( $purpose_id, $restriction_type, $vendor_restrictions ); + + ( $purpose_id, $next_offset ) = get_uint6( $data, $next_offset ); + + ( $restriction_type, $next_offset ) = get_uint2( $data, $next_offset ); + + ( $vendor_restrictions, $next_offset ) = + GDPR::IAB::TCFv2::RangeSection->Parse( + data => $data, + data_size => $data_size, + offset => $next_offset, + max_id => $max_id, + options => $options, + ); + + $restrictions{$purpose_id} ||= {}; + + $restrictions{$purpose_id}->{$restriction_type} = $vendor_restrictions; + } my $self = { - restrictions => $restrictions, + restrictions => \%restrictions, }; bless $self, $klass; - return $self; + return wantarray ? ( $self, $next_offset ) : $self; } -sub check_publisher_restriction { +sub contains { my ( $self, $purpose_id, $restrict_type, $vendor ) = @_; return 0 @@ -62,12 +107,11 @@ GDPR::IAB::TCFv2::PublisherRestrictions - Transparency & Consent String version =head1 SYNOPSIS - my $range = GDPR::IAB::TCFv2::PublisherRestrictions->new( - restrictions => { - purpose id => { - restriction type => instance of GDPR::IAB::TCFv2::RangeSection - }, - }, + my ($publisher_restrictions, $next_offset) = GDPR::IAB::TCFv2::PublisherRestrictions->Parse( + data => $self->{data}, + offset => $pub_restrict_offset, + max_id =>ASSUMED_MAX_VENDOR_ID, + options => $self->{options}, ); die "there is publisher restriction on purpose id 1, type 0 on vendor 284" diff --git a/lib/GDPR/IAB/TCFv2/RangeSection.pm b/lib/GDPR/IAB/TCFv2/RangeSection.pm index 2260d1c..fe5611c 100644 --- a/lib/GDPR/IAB/TCFv2/RangeSection.pm +++ b/lib/GDPR/IAB/TCFv2/RangeSection.pm @@ -10,24 +10,24 @@ use Carp qw; sub Parse { my ( $klass, %args ) = @_; - croak "missing 'data'" unless defined $args{data}; - croak "missing 'offset'" unless defined $args{offset}; + croak "missing 'data'" unless defined $args{data}; + croak "missing 'data_size'" unless defined $args{data_size}; + croak "missing 'offset'" unless defined $args{offset}; croak "missing 'max_id'" unless defined $args{max_id}; croak "missing 'options'" unless defined $args{options}; croak "missing 'options.json'" unless defined $args{options}->{json}; - my $data = $args{data}; - my $offset = $args{offset}; - my $max_id = $args{max_id}; - my $options = $args{options}; + my $data = $args{data}; + my $data_size = $args{data_size}; + my $offset = $args{offset}; + my $max_id = $args{max_id}; + my $options = $args{options}; - my $data_size = length($data); - - croak + Carp::confess "a BitField for vendor consent strings using RangeSections require at least 31 bytes. Got $data_size" - if $data_size < 32; + if $data_size < 31; my ( $num_entries, $next_offset ) = get_uint12( $data, $offset ); @@ -36,7 +36,9 @@ sub Parse { foreach my $i ( 1 .. $num_entries ) { my $range; ( $range, $next_offset ) = _parse_range( - $data, $next_offset, + $data, + $data_size, + $next_offset, $max_id, $options, ); @@ -56,16 +58,14 @@ sub Parse { } sub _parse_range { - my ( $data, $initial_bit, $max_id, $options ) = @_; - - my $data_size = length($data); + my ( $data, $data_size, $offset, $max_id, $options ) = @_; croak - "bit $initial_bit was suppose to start a new range entry, but the consent string was only $data_size bytes long" - if $data_size <= $initial_bit / 8; + "bit $offset was suppose to start a new range entry, but the consent string was only $data_size bytes long" + if $data_size <= $offset / 8; # If the first bit is set, it's a Range of IDs - my ( $is_range, $next_offset ) = is_set $data, $initial_bit; + my ( $is_range, $next_offset ) = is_set $data, $offset; if ($is_range) { my ( $start, $end ); @@ -73,9 +73,15 @@ sub _parse_range { ( $end, $next_offset ) = get_uint16( $data, $next_offset ); croak - "bit $initial_bit range entry exclusion ends at $end, but the max vendor ID is $max_id" + "bit $offset range entry exclusion starts at $start, but the min vendor ID is 1" + if 1 > $start; + + croak + "bit $offset range entry exclusion ends at $end, but the max vendor ID is $max_id" if $end > $max_id; + croak "start $start can't be bigger than end $end" if $start > $end; + return [ $start, $end ], $next_offset; } @@ -85,8 +91,8 @@ sub _parse_range { ( $vendor_id, $next_offset ) = get_uint16( $data, $next_offset ); croak - "bit $initial_bit range entry excludes vendor $vendor_id, but only vendors [1, $max_id] are valid" - if $vendor_id > $max_id; + "bit $offset range entry exclusion vendor $vendor_id, but only vendors [1, $max_id] are valid" + if 1 > $vendor_id || $vendor_id > $max_id; return [ $vendor_id, $vendor_id ], $next_offset; } diff --git a/t/00-load.t b/t/00-load.t index 22d79a1..54e7870 100644 --- a/t/00-load.t +++ b/t/00-load.t @@ -28,13 +28,13 @@ subtest "check interfaces" => sub { isa_ok 'GDPR::IAB::TCFv2::Constants::SpecialFeature', 'Exporter'; isa_ok 'GDPR::IAB::TCFv2::Constants::RestrictionType', 'Exporter'; - my @methods = qw; + my @role_methods = qw; - can_ok 'GDPR::IAB::TCFv2::BitField', @methods; - can_ok 'GDPR::IAB::TCFv2::RangeSection', @methods; + can_ok 'GDPR::IAB::TCFv2::BitField', @role_methods; + can_ok 'GDPR::IAB::TCFv2::RangeSection', @role_methods; + can_ok 'GDPR::IAB::TCFv2::PublisherRestrictions', @role_methods; - can_ok 'GDPR::IAB::TCFv2::PublisherRestrictions', - qw; + can_ok 'GDPR::IAB::TCFv2::RangeSection', qw; done_testing; }; diff --git a/t/01-parse.t b/t/01-parse.t index 99146d9..96285d0 100644 --- a/t/01-parse.t +++ b/t/01-parse.t @@ -427,7 +427,7 @@ subtest "invalid tcf consent string candidates" => sub { GDPR::IAB::TCFv2->Parse( 'COvcSpYOvcSpYC9AAAENAPCAAAAAAAAAAAAAAFQBgAAgABAACAAEAAQAAgAA') } - qr/index out of bounds on offset 360/, + qr/index out of bounds on offset 0: can't read 12, only has: 10/, 'this test uses a crafted consent uses range section, declares 10 vendors, 6 exceptions and legitimate interest without require';