From f6ab2c13b8dde197c1c56d3330a2737873c95f15 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Sat, 14 Dec 2024 17:32:07 +1100 Subject: [PATCH 1/9] Add PDL::IO::Dumper tests for re-used ndarrays in a structure Currently a structure like this returns undef for duplicated entries, as well as raising "my variable masking" warnings when evaled back. my $p = zeroes (5); my $arr = [$p, $p]; --- t/dumper.t | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/t/dumper.t b/t/dumper.t index fbb25603f..7dff0a116 100644 --- a/t/dumper.t +++ b/t/dumper.t @@ -54,4 +54,37 @@ is ref($x), 'ARRAY' or diag explain $s; ok eval { $x->[0]->hdrcpy() == 1 && $x->[1]->hdrcpy() == 0 }, 'Check hdrcpy()\'s persist'; ok eval { ($x->[0]->gethdr()->{ok}==1) && ($x->[1]->gethdr()->{ok}==2) }, 'Check gethdr() values persist'; +# GH508 +{ + my $x = xvals(5); + my $y1 = $x; + my $y2 = 2*$x; + my $y3 = $x*$x; + + my %plots = ( + 'x1'=>$x, 'y1'=>$y1, + 'x2'=>$x, 'y2'=>$y2, + 'x3'=>$x, 'y3'=>$y3, + ); + + my $as_string = sdump \%plots; + + my $restored = eval $as_string; + + #diag $as_string; + + my @nulls = grep {!defined $restored->{$_}} sort keys %$restored; + is_deeply \@nulls, [], 'none of the restored items are undef'; + + # test a dump with uuencoded content + my $u = xvals(25,25); + my @ndarrays = ($u, $u); + $as_string = sdump \@ndarrays; + # diag $as_string; + $restored = eval $as_string; + @nulls = grep {!defined $_} @$restored; + is_deeply \@nulls, [], 'none of the restored uuencoded items are undef'; + +} + done_testing; From 771bde8ef80c4458cfcb01ec37a80ee246177465 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Sat, 14 Dec 2024 17:34:21 +1100 Subject: [PATCH 2/9] Handle repeated ndarrays when dumped to strings This also avoids "my variable masking" warnings when eval'ing smaller ndarrays back to a structure. Warnings for larger uuencoded strings are not yet handled. --- lib/PDL/IO/Dumper.pm | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/PDL/IO/Dumper.pm b/lib/PDL/IO/Dumper.pm index 97f25c830..32b819895 100644 --- a/lib/PDL/IO/Dumper.pm +++ b/lib/PDL/IO/Dumper.pm @@ -95,6 +95,7 @@ convenience routine exists to use it. sub PDL::IO::Dumper::sdump { # Make an initial dump... + local $Data::Dumper::Purity = 1; my($s) = Data::Dumper->Dump([@_]); my(%pdls); # Find the bless(...,'PDL') lines @@ -114,7 +115,7 @@ sub PDL::IO::Dumper::sdump { # find_PDLs call (which modifies $s using the s/// operator). my($s2) = "{my(\$VAR1);\n".&PDL::IO::Dumper::find_PDLs(\$s,@_)."\n\n"; - return $s2.$s."\n}"; + return $s2.$s."\n\$VAR1}"; # } @@ -529,6 +530,12 @@ sub PDL::IO::Dumper::find_PDLs { } } + + # deduplicate + my %seen; + $out = join "\n", grep {!$seen{$_}++} split ("\n", $out); + $out .= "\n"; + return $out; } From 930c19c5f980c38042ccfb9c70c9e75532302174 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Sat, 14 Dec 2024 17:46:23 +1100 Subject: [PATCH 3/9] PDL::IO::Dumper::find_PDLs: disable deduplication It interferes with uuencoded strings. Temporary change. --- lib/PDL/IO/Dumper.pm | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/PDL/IO/Dumper.pm b/lib/PDL/IO/Dumper.pm index 32b819895..75c5bda21 100644 --- a/lib/PDL/IO/Dumper.pm +++ b/lib/PDL/IO/Dumper.pm @@ -532,9 +532,9 @@ sub PDL::IO::Dumper::find_PDLs { } # deduplicate - my %seen; - $out = join "\n", grep {!$seen{$_}++} split ("\n", $out); - $out .= "\n"; + # my %seen; + # $out = join "\n", grep {!$seen{$_}++} split ("\n", $out); + # $out .= "\n"; return $out; } From 261a60f2eb38b3fe7aeb48bd68718d18bf7a066b Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Sat, 14 Dec 2024 17:50:53 +1100 Subject: [PATCH 4/9] modernise PDL::IO::Dumper::find_PDLs * Use named lexical as the loop variable * Unpack all args at start * Some whitespace changes --- lib/PDL/IO/Dumper.pm | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/lib/PDL/IO/Dumper.pm b/lib/PDL/IO/Dumper.pm index 75c5bda21..305238225 100644 --- a/lib/PDL/IO/Dumper.pm +++ b/lib/PDL/IO/Dumper.pm @@ -489,25 +489,27 @@ string. You shouldn't call this unless you know what you're doing. =cut sub PDL::IO::Dumper::find_PDLs { - local($_); - my($out)=""; - my($sp) = shift; + my($sp, @items) = @_; + + my $out = ""; - findpdl:foreach $_(@_) { - next findpdl unless ref($_); + findpdl: + foreach my $item (@items) { + next findpdl unless ref($item); - if(UNIVERSAL::isa($_,'ARRAY')) { + if(UNIVERSAL::isa($item,'ARRAY')) { my($x); - foreach $x(@{$_}) { - $out .= find_PDLs($sp,$x); + foreach $x(@{$item}) { + $out .= find_PDLs($sp,$x); } } - elsif(UNIVERSAL::isa($_,'HASH')) { + elsif(UNIVERSAL::isa($item,'HASH')) { my($x); - foreach $x(values %{$_}) { - $out .= find_PDLs($sp,$x) - } - } elsif(UNIVERSAL::isa($_,'PDL')) { + foreach $x(values %{$item}) { + $out .= find_PDLs($sp,$x) + } + } + elsif(UNIVERSAL::isa($item,'PDL')) { # In addition to straight PDLs, # this gets subclasses of PDL, but NOT magic-hash subclasses of @@ -516,17 +518,17 @@ sub PDL::IO::Dumper::find_PDLs { # just a straight PDL (and not a hash with PDL field), you end up here. # - my($pdlid) = sprintf('PDL_%u',$$_); - my(@strings) = &PDL::IO::Dumper::dump_PDL($_,$pdlid); + my($pdlid) = sprintf('PDL_%u',$$item); + my(@strings) = &PDL::IO::Dumper::dump_PDL($item,$pdlid); $out .= $strings[0]; $$sp =~ s/\$$pdlid/$strings[1]/g if(defined($strings[1])); } - elsif(UNIVERSAL::isa($_,'SCALAR')) { + elsif(UNIVERSAL::isa($item,'SCALAR')) { # This gets other kinds of refs -- PDLs have already been gotten. # Naked PDLs are themselves SCALARs, so the SCALAR case has to come # last to let the PDL case run. - $out .= find_PDLs( $sp, ${$_} ); + $out .= find_PDLs( $sp, ${$item} ); } } From 39b68e71571fc6b1529c18346c8b0aa17222f5bc Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Sat, 14 Dec 2024 17:53:25 +1100 Subject: [PATCH 5/9] t/dumper.t: update test to trigger issue --- t/dumper.t | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/t/dumper.t b/t/dumper.t index 7dff0a116..abfd7492f 100644 --- a/t/dumper.t +++ b/t/dumper.t @@ -56,7 +56,8 @@ ok eval { ($x->[0]->gethdr()->{ok}==1) && ($x->[1]->gethdr()->{ok}==2) }, 'Check # GH508 { - my $x = xvals(5); + # need 10 vals to trigger GH508 + my $x = xvals(10); my $y1 = $x; my $y2 = 2*$x; my $y3 = $x*$x; @@ -71,7 +72,7 @@ ok eval { ($x->[0]->gethdr()->{ok}==1) && ($x->[1]->gethdr()->{ok}==2) }, 'Check my $restored = eval $as_string; - #diag $as_string; + diag $as_string; my @nulls = grep {!defined $restored->{$_}} sort keys %$restored; is_deeply \@nulls, [], 'none of the restored items are undef'; From b4d79cb0117497a3dfa12b84f7951b3ef2c9efbf Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Sat, 14 Dec 2024 20:09:44 +1100 Subject: [PATCH 6/9] refactor PDL::IO::Dumper::find_PDLs Shift most of the work to an internal sub which works with an array instead of strings. This avoids problems with deduplication when structures span more than one line. --- lib/PDL/IO/Dumper.pm | 57 ++++++++++++++++++++++++++++++-------------- t/dumper.t | 2 +- 2 files changed, 40 insertions(+), 19 deletions(-) diff --git a/lib/PDL/IO/Dumper.pm b/lib/PDL/IO/Dumper.pm index 305238225..e46bbb687 100644 --- a/lib/PDL/IO/Dumper.pm +++ b/lib/PDL/IO/Dumper.pm @@ -491,7 +491,30 @@ string. You shouldn't call this unless you know what you're doing. sub PDL::IO::Dumper::find_PDLs { my($sp, @items) = @_; - my $out = ""; + my $out_aref = _find_PDLs_inner($sp, @items); + + # deduplicate + my (@uniq, %seen); + LINE: + foreach my $line (@$out_aref) { + if ($line =~ /^my\(\$PDL_(\d+)\)/) { + my $id = $1; + next LINE if $seen{$id}; + $seen{$id}++; + } + push @uniq, $line; + } + + my $out = join "\n", @uniq; + $out .= "\n"; + + return $out; +} + +sub _find_PDLs_inner { + my($sp, @items) = @_; + + my @out; findpdl: foreach my $item (@items) { @@ -500,18 +523,20 @@ sub PDL::IO::Dumper::find_PDLs { if(UNIVERSAL::isa($item,'ARRAY')) { my($x); foreach $x(@{$item}) { - $out .= find_PDLs($sp,$x); + my $res = _find_PDLs_inner($sp,$x); + push @out, @$res; } - } + } elsif(UNIVERSAL::isa($item,'HASH')) { my($x); - foreach $x(values %{$item}) { - $out .= find_PDLs($sp,$x) + foreach $x (values %{$item}) { + my $res = _find_PDLs_inner($sp,$x); + push @out, @$res; } } elsif(UNIVERSAL::isa($item,'PDL')) { - # In addition to straight PDLs, + # In addition to straight PDLs, # this gets subclasses of PDL, but NOT magic-hash subclasses of # PDL (because they'd be gotten by the previous clause). # So if you subclass PDL but your actual data structure is still @@ -520,25 +545,21 @@ sub PDL::IO::Dumper::find_PDLs { my($pdlid) = sprintf('PDL_%u',$$item); my(@strings) = &PDL::IO::Dumper::dump_PDL($item,$pdlid); - - $out .= $strings[0]; - $$sp =~ s/\$$pdlid/$strings[1]/g if(defined($strings[1])); + + push @out, $strings[0]; + $$sp =~ s/\$$pdlid/$strings[1]/g if(defined($strings[1])); } elsif(UNIVERSAL::isa($item,'SCALAR')) { # This gets other kinds of refs -- PDLs have already been gotten. - # Naked PDLs are themselves SCALARs, so the SCALAR case has to come + # Naked PDLs are themselves SCALARs, so the SCALAR case has to come # last to let the PDL case run. - $out .= find_PDLs( $sp, ${$item} ); + my $res = _find_PDLs_inner( $sp, ${$item} ); + push @out, @$res; } - - } - # deduplicate - # my %seen; - # $out = join "\n", grep {!$seen{$_}++} split ("\n", $out); - # $out .= "\n"; + } - return $out; + return \@out; } =head1 AUTHOR diff --git a/t/dumper.t b/t/dumper.t index abfd7492f..c30088d01 100644 --- a/t/dumper.t +++ b/t/dumper.t @@ -72,7 +72,7 @@ ok eval { ($x->[0]->gethdr()->{ok}==1) && ($x->[1]->gethdr()->{ok}==2) }, 'Check my $restored = eval $as_string; - diag $as_string; + # diag $as_string; my @nulls = grep {!defined $restored->{$_}} sort keys %$restored; is_deeply \@nulls, [], 'none of the restored items are undef'; From ad5c636e752c864e01fa3b97dc299e4d1cda5812 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Sat, 14 Dec 2024 20:17:48 +1100 Subject: [PATCH 7/9] PDL::IO::Dumper::_find_PDLs_inner: use hash of args This allows us to pass the seen hash as an arg and dedup as we go. Should save memory for large structures. --- lib/PDL/IO/Dumper.pm | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/lib/PDL/IO/Dumper.pm b/lib/PDL/IO/Dumper.pm index e46bbb687..e69abd529 100644 --- a/lib/PDL/IO/Dumper.pm +++ b/lib/PDL/IO/Dumper.pm @@ -491,10 +491,11 @@ string. You shouldn't call this unless you know what you're doing. sub PDL::IO::Dumper::find_PDLs { my($sp, @items) = @_; - my $out_aref = _find_PDLs_inner($sp, @items); + my %seen; + my $out_aref = _find_PDLs_inner(dumped_string => $sp, items => \@items, seen => \%seen); # deduplicate - my (@uniq, %seen); + my @uniq; LINE: foreach my $line (@$out_aref) { if ($line =~ /^my\(\$PDL_(\d+)\)/) { @@ -512,7 +513,11 @@ sub PDL::IO::Dumper::find_PDLs { } sub _find_PDLs_inner { - my($sp, @items) = @_; + my %args = @_; + my $sp = $args{dumped_string}; + # internal sub so legitimate uses will pass an array + my @items = @{$args{items}}; + my $seen = $args{seen}; my @out; @@ -523,14 +528,14 @@ sub _find_PDLs_inner { if(UNIVERSAL::isa($item,'ARRAY')) { my($x); foreach $x(@{$item}) { - my $res = _find_PDLs_inner($sp,$x); + my $res = _find_PDLs_inner(%args, items => [$x]); push @out, @$res; } } elsif(UNIVERSAL::isa($item,'HASH')) { my($x); foreach $x (values %{$item}) { - my $res = _find_PDLs_inner($sp,$x); + my $res = _find_PDLs_inner(%args, items => [$x]); push @out, @$res; } } @@ -553,7 +558,7 @@ sub _find_PDLs_inner { # This gets other kinds of refs -- PDLs have already been gotten. # Naked PDLs are themselves SCALARs, so the SCALAR case has to come # last to let the PDL case run. - my $res = _find_PDLs_inner( $sp, ${$item} ); + my $res = _find_PDLs_inner( %args, items => [${$item}] ); push @out, @$res; } From c5f60fc951d3f7a41e28ceae1db980820a658644 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Sat, 14 Dec 2024 20:30:50 +1100 Subject: [PATCH 8/9] PDL::IO::Dumper::_find_PDLs_inner: no repeat serialisation Don't serialise an ndarray that has already been processed. This avoids the need to deduplicate later, although that process is retained just in case it is useful. --- lib/PDL/IO/Dumper.pm | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/lib/PDL/IO/Dumper.pm b/lib/PDL/IO/Dumper.pm index e69abd529..e7731efd1 100644 --- a/lib/PDL/IO/Dumper.pm +++ b/lib/PDL/IO/Dumper.pm @@ -491,14 +491,15 @@ string. You shouldn't call this unless you know what you're doing. sub PDL::IO::Dumper::find_PDLs { my($sp, @items) = @_; - my %seen; - my $out_aref = _find_PDLs_inner(dumped_string => $sp, items => \@items, seen => \%seen); - # deduplicate + my $out_aref = _find_PDLs_inner(dumped_string => $sp, items => \@items); + + # deduplicate - should not be needed now but retained just in case. my @uniq; + my %seen; LINE: foreach my $line (@$out_aref) { - if ($line =~ /^my\(\$PDL_(\d+)\)/) { + if ($line =~ /^my\(\$(PDL_\d+)\)/) { my $id = $1; next LINE if $seen{$id}; $seen{$id}++; @@ -517,7 +518,7 @@ sub _find_PDLs_inner { my $sp = $args{dumped_string}; # internal sub so legitimate uses will pass an array my @items = @{$args{items}}; - my $seen = $args{seen}; + my $seen = $args{seen} //= {}; my @out; @@ -549,10 +550,13 @@ sub _find_PDLs_inner { # my($pdlid) = sprintf('PDL_%u',$$item); - my(@strings) = &PDL::IO::Dumper::dump_PDL($item,$pdlid); + if (!$seen->{$pdlid}) { + my (@strings) = &PDL::IO::Dumper::dump_PDL($item, $pdlid); - push @out, $strings[0]; - $$sp =~ s/\$$pdlid/$strings[1]/g if(defined($strings[1])); + push @out, $strings[0]; + $$sp =~ s/\$$pdlid/$strings[1]/g if (defined($strings[1])); + $seen->{$pdlid}++; + } } elsif(UNIVERSAL::isa($item,'SCALAR')) { # This gets other kinds of refs -- PDLs have already been gotten. From 5d3e3fe8d70497cbe4f9e82654bf6c863816f92d Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Sun, 15 Dec 2024 08:22:36 +1100 Subject: [PATCH 9/9] dumper.t: avoid hard coded thresholds The threshold for using uuencode may change in future given 25x25 is not that large these days. Do the same for the small threshold used to determine if the struct should be inlined. --- t/dumper.t | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/t/dumper.t b/t/dumper.t index c30088d01..43af432fe 100644 --- a/t/dumper.t +++ b/t/dumper.t @@ -14,7 +14,16 @@ use PDL::LiteF; my ( $s, $x ); -eval { $s = sdump({a=>3,b=>pdl(4),c=>xvals(3,3),d=>xvals(4,4)}) }; +# Need a value greater than the uuencode dump threshold. +# Currently 25 but may change in future. +my $big_size = int (5 + sqrt $PDL::IO::Dumper::med_thresh); +my @big_dims = ($big_size, $big_size); + +# Small thresh is currently 8 +my $med_size = int (2 + sqrt $PDL::IO::Dumper::small_thresh); +my @med_dims = ($med_size, $med_size); + +eval { $s = sdump({a=>3,b=>pdl(4),c=>xvals(3,3),d=>xvals(@med_dims)}) }; is $@, '', 'Call sdump()' or diag("Call sdump() output string:\n$s\n"); $x = eval $s; @@ -24,27 +33,35 @@ ok(($x->{a}==3), 'SCALAR value restored ok'); ok(((ref $x->{b} eq 'PDL') && ($x->{b}==4)), '0-d PDL restored ok'); ok(((ref $x->{c} eq 'PDL') && ($x->{c}->nelem == 9) && (sum(abs(($x->{c} - xvals(3,3))))<0.0000001)), '3x3 PDL restored ok'); -ok(((ref $x->{d} eq 'PDL') && ($x->{d}->nelem == 16) - && (sum(abs(($x->{d} - xvals(4,4))))<0.0000001)), '4x4 PDL restored ok'); +ok(((ref $x->{d} eq 'PDL') && ($x->{d}->nelem == $med_size ** 2) + && (sum(abs(($x->{d} - xvals(@med_dims))))<0.0000001)), '"medium" sized PDL restored ok'); ########## Dump a uuencoded expr and try to get it back... # e: uuencoded expr -eval { $s = sdump({e=>xvals(25,25)}) }; -is $@, '', 'sdump() of 25x25 PDL to test uuencode dumps'; +eval { $s = sdump({e=>xvals(@big_dims)}) }; +is $@, '', 'sdump() of "big" PDL to test uuencode dumps'; #diag $s,"\n"; $x = eval $s; -is $@, '', 'Can eval dumped 25x25 PDL' or diag 'string: ', $s; +is $@, '', 'Can eval dumped "big" PDL' or diag 'string: ', $s; -ok((ref $x eq 'HASH'), 'HASH structure for uuencoded 25x25 PDL restored'); +ok((ref $x eq 'HASH'), 'HASH structure for uuencoded "big" PDL restored'); isa_ok $x->{e}, 'PDL'; -is $x->{e}->nelem, 625; -is_pdl $x->{e}, xvals(25,25), 'Verify 25x25 PDL restored data'; +is $x->{e}->nelem, $big_size ** 2; +is_pdl $x->{e}, xvals(@big_dims), 'Verify "big" PDL restored data'; ########## Check header dumping... my $y; -eval { $x = xvals(2,2); $x->sethdr({ok=>1}); $x->hdrcpy(1); $y = xvals(25,25); $y->sethdr({ok=>2}); $y->hdrcpy(0); $s = sdump([$x,$y,yvals(25,25)]); }; +eval { + $x = xvals(2,2); + $x->sethdr({ok=>1}); + $x->hdrcpy(1); + $y = xvals(@big_dims); + $y->sethdr({ok=>2}); + $y->hdrcpy(0); + $s = sdump([$x,$y,yvals(@big_dims)]); +}; is $@, '', 'Check header dumping'; $x = eval $s; @@ -78,7 +95,7 @@ ok eval { ($x->[0]->gethdr()->{ok}==1) && ($x->[1]->gethdr()->{ok}==2) }, 'Check is_deeply \@nulls, [], 'none of the restored items are undef'; # test a dump with uuencoded content - my $u = xvals(25,25); + my $u = xvals(@big_dims); my @ndarrays = ($u, $u); $as_string = sdump \@ndarrays; # diag $as_string;