From a322e05a11c9521fc419b8afe303d2ad6bbe9f3d Mon Sep 17 00:00:00 2001 From: Tiago Peczenyj Date: Mon, 4 Dec 2023 14:27:44 +0100 Subject: [PATCH] add full support to vendor consent and vendor legitimate interest, ias bitfield or range sections. fix issue #1 --- lib/GDPR/IAB/TCFv2.pm | 138 ++++++++++------------------- lib/GDPR/IAB/TCFv2/BitField.pm | 11 +-- lib/GDPR/IAB/TCFv2/BitUtils.pm | 112 +++++++++++++++++++++++ lib/GDPR/IAB/TCFv2/RangeConsent.pm | 33 +++++++ lib/GDPR/IAB/TCFv2/RangeSection.pm | 103 +++++++++++++++++++++ t/00-load.t | 17 +++- t/01-parse.t | 110 ++++++++++++++++++++++- 7 files changed, 422 insertions(+), 102 deletions(-) create mode 100644 lib/GDPR/IAB/TCFv2/BitUtils.pm create mode 100644 lib/GDPR/IAB/TCFv2/RangeConsent.pm create mode 100644 lib/GDPR/IAB/TCFv2/RangeSection.pm diff --git a/lib/GDPR/IAB/TCFv2.pm b/lib/GDPR/IAB/TCFv2.pm index 7c05a02..fcc0f5e 100644 --- a/lib/GDPR/IAB/TCFv2.pm +++ b/lib/GDPR/IAB/TCFv2.pm @@ -8,7 +8,9 @@ use version; our $VERSION = version->declare('v0.0.2'); use feature 'state'; +use GDPR::IAB::TCFv2::BitUtils qw(:all); use GDPR::IAB::TCFv2::BitField; +use GDPR::IAB::TCFv2::RangeSection; use MIME::Base64 qw; use Carp qw; @@ -57,25 +59,32 @@ sub Parse { $self->{vendor_consents} = $vendor_consents; my $legitimate_interest_max_vendor = - _get_uint16( $self->{data}, $legitimate_interest_start, 16 ); + get_uint16( $self->{data}, $legitimate_interest_start ); - croak "invalid consent data: no legitimate interest start position" + croak + "invalid consent data: no legitimate interest start position (got $legitimate_interest_start +16 but @{[ length( $self->{data} ) ]})" if $legitimate_interest_start + 16 > length( $self->{data} ); + my $is_vendor_legitimate_interest_range = is_set( $data, $legitimate_interest_start + 16 ); + $self->{legitimate_interest_start} = $legitimate_interest_start + 17; my $vendor_legitimate_interests; my $pub_restrict_start; - if ( $self->_is_vendor_legitimate_interest_range_encoding ) { + if ( $is_vendor_legitimate_interest_range ) { ( $vendor_legitimate_interests, $pub_restrict_start ) = - $self->_parseRangeSection( $legitimate_interest_max_vendor, - $self->{legitimate_interest_start} ); + $self->_parseRangeSection( + $legitimate_interest_max_vendor, + $self->{legitimate_interest_start} + ); } else { ( $vendor_legitimate_interests, $pub_restrict_start ) = - $self->_parseBitField( $legitimate_interest_max_vendor, - $self->{legitimate_interest_start} ); + $self->_parseBitField( + $legitimate_interest_max_vendor, + $self->{legitimate_interest_start} + ); } $self->{vendor_legitimate_interests} = $vendor_legitimate_interests; @@ -103,13 +112,13 @@ sub _decode_base64url { sub version { my $self = shift; - return _get_uint8( $self->{data}, 0, 6 ); + return get_uint6( $self->{data}, 0 ); } sub created { my $self = shift; - my $deciseconds = _get_uint64( $self->{data}, 6, 36 ); + my $deciseconds = get_uint36( $self->{data}, 6 ); return $deciseconds / 10; } @@ -117,7 +126,7 @@ sub created { sub last_updated { my $self = shift; - my $deciseconds = _get_uint64( $self->{data}, 42, 36 ); + my $deciseconds = get_uint36( $self->{data}, 42 ); return $deciseconds / 10; } @@ -125,49 +134,49 @@ sub last_updated { sub cmp_id { my $self = shift; - return _get_uint16( $self->{data}, 78, 12 ); + return get_uint12( $self->{data}, 78 ); } sub cmp_version { my $self = shift; - return _get_uint16( $self->{data}, 90, 12 ); + return get_uint12( $self->{data}, 90 ); } sub consent_screen { my $self = shift; - return _get_uint8( $self->{data}, 102, 6 ); + return get_uint6( $self->{data}, 102 ); } sub consent_language { my $self = shift; - return _get_ascii_sequence( $self->{data}, 2, 108, 6 ); + return get_char6_sequence( $self->{data}, 108, 2 ); } sub vendor_list_version { my $self = shift; - return _get_uint16( $self->{data}, 120, 12 ); + return get_uint12( $self->{data}, 120 ); } sub policy_version { my $self = shift; - return _get_uint8( $self->{data}, 132, 6 ); + return get_uint6( $self->{data}, 132 ); } sub is_service_specific { my $self = shift; - return _is_set( $self->{data}, 138 ); + return is_set( $self->{data}, 138 ); } sub use_non_standard_stacks { my $self = shift; - return _is_set( $self->{data}, 139 ); + return is_set( $self->{data}, 139 ); } sub is_special_feature_opt_in { @@ -176,7 +185,7 @@ sub is_special_feature_opt_in { croak "invalid special feature id $id: must be between 1 and 12" if $id < 1 || $id > 12; - return _is_set( $self->{data}, 140 + $id - 1 ); + return is_set( $self->{data}, 140 + $id - 1 ); } sub is_purpose_consent_allowed { @@ -185,7 +194,7 @@ sub is_purpose_consent_allowed { croak "invalid purpose id $id: must be between 1 and 24" if $id < 1 || $id > 24; - return _is_set( $self->{data}, 152 + $id - 1 ); + return is_set( $self->{data}, 152 + $id - 1 ); } sub is_purpose_legitimate_interest_allowed { @@ -194,25 +203,25 @@ sub is_purpose_legitimate_interest_allowed { croak "invalid purpose id $id: must be between 1 and 24" if $id < 1 || $id > 24; - return _is_set( $self->{data}, 176 + $id - 1 ); + return is_set( $self->{data}, 176 + $id - 1 ); } sub purpose_one_treatment { my $self = shift; - return _is_set( $self->{data}, 200 ); + return is_set( $self->{data}, 200 ); } sub publisher_country_code { my $self = shift; - return _get_ascii_sequence( $self->{data}, 2, 201, 6 ); + return get_char6_sequence( $self->{data}, 201, 2 ); } sub max_vendor_id { my $self = shift; - return _get_uint16( $self->{data}, 213, 16 ); + return get_uint16( $self->{data}, 213 ); } sub vendor_consent { @@ -230,19 +239,25 @@ sub vendor_legitimate_interest { sub _is_vendor_consent_range_encoding { my $self = shift; - return _is_set( $self->{data}, 229 ); + return is_set( $self->{data}, 229 ); } -sub _is_vendor_legitimate_interest_range_encoding { - my $self = shift; +sub _parseRangeSection { + my ( $self, $vendor_bits_required, $start_bit ) = @_; - return _is_set( $self->{data}, $self->{legitimate_interest_start} + 16 ); -} + my $data_size = length( $self->{data} ); -sub _parseRangeSection { - my ( $self, $vendor_bits_required, $startbit ) = @_; + croak + "a BitField for vendor consent strings using RangeSections require at least 31 bytes. Got $data_size" + if $data_size < 32; - return ( undef, 0 ); # do this later... + my $range_section = GDPR::IAB::TCFv2::RangeSection->new( + data => $self->{data}, + start_bit => $start_bit, + vendor_bits_required => $vendor_bits_required, + ); + + return ( $range_section, $range_section->current_offset ); } sub _parseBitField { @@ -266,65 +281,6 @@ sub _parseBitField { return ( $bitfield, $start_bit + $vendor_bits_required ); } -sub _is_set { - my ( $data, $offset ) = @_; - - return substr( $data, $offset, 1 ) == 1; -} - -sub _get_uint8 { - my ( $data, $offset, $nbits ) = @_; - - return unpack( - "C", - _get_bits_with_padding( $data, 8, $offset, $nbits ) - ); -} - -sub _get_ascii { - my ( $data, $offset, $nbits ) = @_; - - state $ascii_offset = ord("A"); - - return chr( $ascii_offset + _get_uint8( $data, $offset, $nbits ) ); -} - -sub _get_ascii_sequence { - my ( $data, $n, $offset, $nbits ) = @_; - - state $ascii_offset = ord("A"); - - return join "", - map { _get_ascii( $data, $offset + ( $_ * $nbits ), $nbits ) } - ( 0 .. $n - 1 ); -} - -sub _get_uint16 { - my ( $data, $offset, $nbits ) = @_; - - return unpack( - "S>", - _get_bits_with_padding( $data, 16, $offset, $nbits ) - ); -} - -sub _get_uint64 { - my ( $data, $offset, $nbits ) = @_; - - return unpack( - "Q>", - _get_bits_with_padding( $data, 64, $offset, $nbits ) - ); -} - -sub _get_bits_with_padding { - my ( $data, $bits, $offset, $nbits ) = @_; - - my $padding = "0" x ( $bits - $nbits ); - - return pack( "B${bits}", $padding . substr( $data, $offset, $nbits ) ); -} - sub looksLikeIsConsentVersion2 { my ($gdpr_consent_string) = @_; diff --git a/lib/GDPR/IAB/TCFv2/BitField.pm b/lib/GDPR/IAB/TCFv2/BitField.pm index 1246b25..eac19ca 100644 --- a/lib/GDPR/IAB/TCFv2/BitField.pm +++ b/lib/GDPR/IAB/TCFv2/BitField.pm @@ -4,7 +4,8 @@ use warnings; use integer; use bytes; -use Carp qw; +use GDPR::IAB::TCFv2::BitUtils qw; +use Carp qw; sub new { my ( $klass, %args ) = @_; @@ -38,13 +39,7 @@ sub vendor_consent { return if $id > $self->{vendor_bits_required}; - return _is_set( $self->{data}, $id - 1 ); -} - -sub _is_set { - my ( $data, $offset ) = @_; - - return substr( $data, $offset, 1 ) == 1; + return is_set( $self->{data}, $id - 1 ); } 1; diff --git a/lib/GDPR/IAB/TCFv2/BitUtils.pm b/lib/GDPR/IAB/TCFv2/BitUtils.pm new file mode 100644 index 0000000..f9812a3 --- /dev/null +++ b/lib/GDPR/IAB/TCFv2/BitUtils.pm @@ -0,0 +1,112 @@ +package GDPR::IAB::TCFv2::BitUtils; +use strict; +use warnings; +use integer; +use bytes; + +use feature 'state'; + +require Exporter; +use base qw; + +our @EXPORT_OK = qw; + +our %EXPORT_TAGS = ( + all => [ + qw + ] +); + +sub is_set { + my ( $data, $offset ) = @_; + + # TODO check if offset is in range of $data + + return substr( $data, $offset, 1 ) == 1; +} + +sub get_uint6 { + my ( $data, $offset ) = @_; + + return unpack( + "C", + _get_bits_with_padding( $data, 8, $offset, 6 ) + ); +} + +sub get_uint8 { + my ( $data, $offset ) = @_; + + return unpack( + "C", + _get_bits_with_padding( $data, 8, $offset, 8 ) + ); +} + +sub get_char6 { + my ( $data, $offset ) = @_; + + state $char_offset = ord("A"); + + return chr( $char_offset + get_uint6( $data, $offset ) ); +} + +sub get_char6_sequence { + my ( $data, $offset, $n ) = @_; + + return join "", + map { get_char6( $data, $offset + ( $_ * 6 ) ) } ( 0 .. $n - 1 ); +} + +sub get_uint12 { + my ( $data, $offset ) = @_; + + return unpack( + "S>", + _get_bits_with_padding( $data, 16, $offset, 12 ) + ); +} + +sub get_uint16 { + my ( $data, $offset ) = @_; + + return unpack( + "S>", + _get_bits_with_padding( $data, 16, $offset, 16 ) + ); +} + +sub get_uint36 { + my ( $data, $offset ) = @_; + + return unpack( + "Q>", + _get_bits_with_padding( $data, 64, $offset, 36 ) + ); +} + +sub _get_bits_with_padding { + my ( $data, $bits, $offset, $nbits ) = @_; + + # TODO check if offset is in range of $data ? + + my $padding = "0" x ( $bits - $nbits ); + + return pack( "B${bits}", $padding . substr( $data, $offset, $nbits ) ); +} + +1; diff --git a/lib/GDPR/IAB/TCFv2/RangeConsent.pm b/lib/GDPR/IAB/TCFv2/RangeConsent.pm new file mode 100644 index 0000000..33285c0 --- /dev/null +++ b/lib/GDPR/IAB/TCFv2/RangeConsent.pm @@ -0,0 +1,33 @@ +package GDPR::IAB::TCFv2::RangeConsent; +use strict; +use warnings; +use integer; +use bytes; + +use Carp qw; + +sub new { + my ( $klass, %args ) = @_; + + my $start = $args{start} or croak "missing field 'start'"; + my $end = $args{end} or croak "missing field 'end'"; + + croak "ops start should not be bigger than end" if $start > $end; + + my $self = { + start => $start, + end => $end, + }; + + bless $self, $klass; + + return $self; +} + +sub contains { + my ( $self, $id ) = @_; + + return $self->{start} <= $id && $id <= $self->{end}; +} + +1; diff --git a/lib/GDPR/IAB/TCFv2/RangeSection.pm b/lib/GDPR/IAB/TCFv2/RangeSection.pm new file mode 100644 index 0000000..564da49 --- /dev/null +++ b/lib/GDPR/IAB/TCFv2/RangeSection.pm @@ -0,0 +1,103 @@ +package GDPR::IAB::TCFv2::RangeSection; +use strict; +use warnings; +use integer; +use bytes; + +use GDPR::IAB::TCFv2::BitUtils qw; +use GDPR::IAB::TCFv2::RangeConsent; +use Carp qw; +use List::MoreUtils qw; + +sub new { + my ( $klass, %args ) = @_; + + my $data = $args{data} or croak "missing 'data'"; + my $start_bit = $args{start_bit} or croak "missing 'start_bit'"; + my $vendor_bits_required = $args{vendor_bits_required} + or croak "missing 'vendor_bits_required'"; + + # TODO add parse range consent + my $num_entries = get_uint12( $data, $start_bit ); + + my $current_offset = $start_bit + 12; + + my @consents; + + foreach my $i ( 1 .. $num_entries ) { + my ( $consent, $bits_consumed ) = + _parse_range_consent( $data, $current_offset, + $vendor_bits_required ); + + push @consents, $consent; + + $current_offset += $bits_consumed; + } + + my $self = { + consents => \@consents, + vendor_bits_required => $vendor_bits_required, + _current_offset => $current_offset, + }; + + bless $self, $klass; + + return $self; +} + +sub _parse_range_consent { + my ( $data, $initial_bit, $max_vendor_id ) = @_; + + my $data_size = length($data); + + croak + "bit $initial_bit was suppose to start a new range entry, but the consent string was only $data_size bytes long" + if $data_size <= $initial_bit / 8; + + # If the first bit is set, it's a Range of IDs + if ( is_set $data, $initial_bit ) { + my $start = get_uint16( $data, $initial_bit + 1 ); + my $end = get_uint16( $data, $initial_bit + 17 ); + + croak + "bit $initial_bit range entry exclusion ends at $end, but the max vendor ID is $max_vendor_id" + if $end > $max_vendor_id; + + return GDPR::IAB::TCFv2::RangeConsent->new( start => $start, + end => $end ), 33; + } + + my $vendor_id = get_uint16( $data, $initial_bit + 1 ); + + croak + "bit $initial_bit range entry excludes vendor $vendor_id, but only vendors [1, $max_vendor_id] are valid" + if $vendor_id > $max_vendor_id; + + return GDPR::IAB::TCFv2::RangeConsent->new( start => $vendor_id, + end => $vendor_id ), 17; +} + +sub current_offset { + my $self = shift; + + return $self->{_current_offset}; +} + +sub max_vendor_id { + my $self = shift; + + return $self->{vendor_bits_required}; +} + +sub vendor_consent { + my ( $self, $id ) = @_; + + croak "invalid vendor id $id: must be positive integer bigger than 0" + if $id < 1; + + return if $id > $self->{vendor_bits_required}; + + return any { $_->contains($id) } @{ $self->{consents} }; +} + +1; diff --git a/t/00-load.t b/t/00-load.t index 3a7337a..969658e 100644 --- a/t/00-load.t +++ b/t/00-load.t @@ -1,9 +1,24 @@ -use Test::More tests => 4; +use Test::More tests => 14; BEGIN { + use_ok('GDPR::IAB::TCFv2::BitUtils'); use_ok('GDPR::IAB::TCFv2::BitField'); + use_ok('GDPR::IAB::TCFv2::RangeSection'); + use_ok('GDPR::IAB::TCFv2::RangeConsent'); use_ok('GDPR::IAB::TCFv2'); } +require_ok 'GDPR::IAB::TCFv2::BitUtils'; require_ok 'GDPR::IAB::TCFv2::BitField'; +require_ok 'GDPR::IAB::TCFv2::RangeSection'; +require_ok 'GDPR::IAB::TCFv2::RangeConsent'; require_ok 'GDPR::IAB::TCFv2'; + +isa_ok 'GDPR::IAB::TCFv2::BitUtils', 'Exporter'; + +my @methods = qw; + +can_ok 'GDPR::IAB::TCFv2::BitField', @methods; +can_ok 'GDPR::IAB::TCFv2::RangeSection', @methods; + +can_ok 'GDPR::IAB::TCFv2::RangeConsent', 'new', 'contains'; diff --git a/t/01-parse.t b/t/01-parse.t index ba1b4fb..e020ea3 100644 --- a/t/01-parse.t +++ b/t/01-parse.t @@ -1,9 +1,9 @@ -use Test::More tests => 2; +use Test::More tests => 3; use Test::Exception; use GDPR::IAB::TCFv2; -subtest "valid tcf v2 consent string" => sub { +subtest "valid tcf v2 consent string using bitfield" => sub { plan tests => 21; my $consent; @@ -124,6 +124,112 @@ subtest "valid tcf v2 consent string" => sub { }; }; +subtest "valid tcf v2 consent string using range" => sub { + plan tests => 21; + + my $consent; + + lives_ok { + $consent = GDPR::IAB::TCFv2->Parse( + 'COyfVVoOyfVVoADACHENAwCAAAAAAAAAAAAAE5QBgALgAqgD8AQACSwEygJyAnSAMABgAFkAgQCDASeAmYBOgAA' + ); + } + 'should not throw exception'; + + isa_ok $consent, 'GDPR::IAB::TCFv2', 'gdpr iab tcf v2 consent'; + + is $consent->version, 2, 'should return version 2'; + + is $consent->created, 1587946020, + 'should return the creation epoch 27/04/2020'; + + is $consent->last_updated, 1587946020, + 'should return the last update epoch 27/04/2020'; + + is $consent->cmp_id, 3, 'should return the cmp id 3'; + + is $consent->cmp_version, 2, 'should return the cmp version 2'; + + is $consent->consent_screen, 7, 'should return the consent screen 7'; + + is $consent->consent_language, "EN", + 'should return the consent language "EN"'; + + is $consent->vendor_list_version, 48, + 'should return the vendor list version 23'; + + is $consent->policy_version, 2, + 'should return the policy version 2'; + + ok !$consent->is_service_specific, + 'should return true for service specific'; + + ok !$consent->use_non_standard_stacks, + 'should return false for use non standard stacks'; + + ok !$consent->purpose_one_treatment, + 'should return false for use purpose one treatment'; + + is $consent->publisher_country_code, "AA", + 'should return the publisher country code "AA"'; + + is $consent->max_vendor_id, 626, "max vendor id is 626"; + + subtest "check purpose consent ids" => sub { + plan tests => 24; + + foreach my $id ( 1 .. 24 ) { + ok !$consent->is_purpose_consent_allowed($id), + "checking purpose id $id for consent"; + } + }; + + subtest "check purpose legitimate interest ids" => sub { + plan tests => 24; + + foreach my $id ( 1 .. 24 ) { + ok !$consent->is_purpose_legitimate_interest_allowed($id), + "checking purpose id $id for legitimate interest"; + } + }; + + subtest "check special feature opt in" => sub { + plan tests => 12; + + foreach my $id ( 1 .. 12 ) { + ok !$consent->is_special_feature_opt_in($id), + "checking special feature id $id opt in"; + } + }; + + subtest "check vendor consent ids" => sub { + plan tests => 626; + + my %allowed_vendors = + map { $_ => 1 } ( 23, 42, 126, 127, 128, 587, 613, 626 ); + + foreach my $id ( 1 .. 626 ) { + is !!$consent->vendor_consent($id), + !!$allowed_vendors{$id}, + "checking vendor id $id for consent"; + } + }; + + subtest "check vendor legitimate interest ids" => sub { + plan tests => 628; + + my %allowed_vendors = + map { $_ => 1 } ( 24, 44, 129, 130, 131, 591, 614, 628 ); + + foreach my $id ( 1 .. 628 ) { + is !!$consent->vendor_legitimate_interest($id), + !!$allowed_vendors{$id}, + "checking vendor id $id for legitimate interest"; + } + }; + +}; + subtest "invalid tcf consent string candidates" => sub { plan tests => 5;