Skip to content

Commit

Permalink
Merge pull request #509 from shawnlaffan/io_dumper_dups
Browse files Browse the repository at this point in the history
PDL::IO::Dumper: better handling of multiply referenced ndarrays
  • Loading branch information
mohawk2 authored Dec 15, 2024
2 parents 4ba5d7c + 5d3e3fe commit 77e1fd4
Show file tree
Hide file tree
Showing 2 changed files with 126 additions and 36 deletions.
91 changes: 65 additions & 26 deletions lib/PDL/IO/Dumper.pm
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ convenience routine exists to use it.

sub PDL::IO::Dumper::sdump {
# Make an initial dump...
local $Data::Dumper::Purity = 1;
my($s) = Data::Dumper->Dump([@_]);
my(%pdls);
# Find the bless(...,'PDL') lines
Expand All @@ -114,7 +115,7 @@ sub PDL::IO::Dumper::sdump {
# find_PDLs call (which modifies $s using the s/// operator).

my($s2) = "{my(\$VAR1);\n".&PDL::IO::Dumper::find_PDLs(\$s,@_)."\n\n";
return $s2.$s."\n}";
return $s2.$s."\n\$VAR1}";

#
}
Expand Down Expand Up @@ -488,48 +489,86 @@ string. You shouldn't call this unless you know what you're doing.
=cut

sub PDL::IO::Dumper::find_PDLs {
local($_);
my($out)="";
my($sp) = shift;
my($sp, @items) = @_;


my $out_aref = _find_PDLs_inner(dumped_string => $sp, items => \@items);

# deduplicate - should not be needed now but retained just in case.
my @uniq;
my %seen;
LINE:
foreach my $line (@$out_aref) {
if ($line =~ /^my\(\$(PDL_\d+)\)/) {
my $id = $1;
next LINE if $seen{$id};
$seen{$id}++;
}
push @uniq, $line;
}

my $out = join "\n", @uniq;
$out .= "\n";

return $out;
}

sub _find_PDLs_inner {
my %args = @_;
my $sp = $args{dumped_string};
# internal sub so legitimate uses will pass an array
my @items = @{$args{items}};
my $seen = $args{seen} //= {};

findpdl:foreach $_(@_) {
next findpdl unless ref($_);
my @out;

if(UNIVERSAL::isa($_,'ARRAY')) {
findpdl:
foreach my $item (@items) {
next findpdl unless ref($item);

if(UNIVERSAL::isa($item,'ARRAY')) {
my($x);
foreach $x(@{$_}) {
$out .= find_PDLs($sp,$x);
foreach $x(@{$item}) {
my $res = _find_PDLs_inner(%args, items => [$x]);
push @out, @$res;
}
}
elsif(UNIVERSAL::isa($_,'HASH')) {
}
elsif(UNIVERSAL::isa($item,'HASH')) {
my($x);
foreach $x(values %{$_}) {
$out .= find_PDLs($sp,$x)
}
} elsif(UNIVERSAL::isa($_,'PDL')) {
foreach $x (values %{$item}) {
my $res = _find_PDLs_inner(%args, items => [$x]);
push @out, @$res;
}
}
elsif(UNIVERSAL::isa($item,'PDL')) {

# In addition to straight PDLs,
# In addition to straight PDLs,
# this gets subclasses of PDL, but NOT magic-hash subclasses of
# PDL (because they'd be gotten by the previous clause).
# So if you subclass PDL but your actual data structure is still
# just a straight PDL (and not a hash with PDL field), you end up here.
#

my($pdlid) = sprintf('PDL_%u',$$_);
my(@strings) = &PDL::IO::Dumper::dump_PDL($_,$pdlid);

$out .= $strings[0];
$$sp =~ s/\$$pdlid/$strings[1]/g if(defined($strings[1]));
my($pdlid) = sprintf('PDL_%u',$$item);
if (!$seen->{$pdlid}) {
my (@strings) = &PDL::IO::Dumper::dump_PDL($item, $pdlid);

push @out, $strings[0];
$$sp =~ s/\$$pdlid/$strings[1]/g if (defined($strings[1]));
$seen->{$pdlid}++;
}
}
elsif(UNIVERSAL::isa($_,'SCALAR')) {
elsif(UNIVERSAL::isa($item,'SCALAR')) {
# This gets other kinds of refs -- PDLs have already been gotten.
# Naked PDLs are themselves SCALARs, so the SCALAR case has to come
# Naked PDLs are themselves SCALARs, so the SCALAR case has to come
# last to let the PDL case run.
$out .= find_PDLs( $sp, ${$_} );
my $res = _find_PDLs_inner( %args, items => [${$item}] );
push @out, @$res;
}

}
return $out;

return \@out;
}

=head1 AUTHOR
Expand Down
71 changes: 61 additions & 10 deletions t/dumper.t
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,16 @@ use PDL::LiteF;

my ( $s, $x );

eval { $s = sdump({a=>3,b=>pdl(4),c=>xvals(3,3),d=>xvals(4,4)}) };
# Need a value greater than the uuencode dump threshold.
# Currently 25 but may change in future.
my $big_size = int (5 + sqrt $PDL::IO::Dumper::med_thresh);
my @big_dims = ($big_size, $big_size);

# Small thresh is currently 8
my $med_size = int (2 + sqrt $PDL::IO::Dumper::small_thresh);
my @med_dims = ($med_size, $med_size);

eval { $s = sdump({a=>3,b=>pdl(4),c=>xvals(3,3),d=>xvals(@med_dims)}) };
is $@, '', 'Call sdump()'
or diag("Call sdump() output string:\n$s\n");
$x = eval $s;
Expand All @@ -24,27 +33,35 @@ ok(($x->{a}==3), 'SCALAR value restored ok');
ok(((ref $x->{b} eq 'PDL') && ($x->{b}==4)), '0-d PDL restored ok');
ok(((ref $x->{c} eq 'PDL') && ($x->{c}->nelem == 9)
&& (sum(abs(($x->{c} - xvals(3,3))))<0.0000001)), '3x3 PDL restored ok');
ok(((ref $x->{d} eq 'PDL') && ($x->{d}->nelem == 16)
&& (sum(abs(($x->{d} - xvals(4,4))))<0.0000001)), '4x4 PDL restored ok');
ok(((ref $x->{d} eq 'PDL') && ($x->{d}->nelem == $med_size ** 2)
&& (sum(abs(($x->{d} - xvals(@med_dims))))<0.0000001)), '"medium" sized PDL restored ok');

########## Dump a uuencoded expr and try to get it back...
# e: uuencoded expr
eval { $s = sdump({e=>xvals(25,25)}) };
is $@, '', 'sdump() of 25x25 PDL to test uuencode dumps';
eval { $s = sdump({e=>xvals(@big_dims)}) };
is $@, '', 'sdump() of "big" PDL to test uuencode dumps';

#diag $s,"\n";

$x = eval $s;
is $@, '', 'Can eval dumped 25x25 PDL' or diag 'string: ', $s;
is $@, '', 'Can eval dumped "big" PDL' or diag 'string: ', $s;

ok((ref $x eq 'HASH'), 'HASH structure for uuencoded 25x25 PDL restored');
ok((ref $x eq 'HASH'), 'HASH structure for uuencoded "big" PDL restored');
isa_ok $x->{e}, 'PDL';
is $x->{e}->nelem, 625;
is_pdl $x->{e}, xvals(25,25), 'Verify 25x25 PDL restored data';
is $x->{e}->nelem, $big_size ** 2;
is_pdl $x->{e}, xvals(@big_dims), 'Verify "big" PDL restored data';

########## Check header dumping...
my $y;
eval { $x = xvals(2,2); $x->sethdr({ok=>1}); $x->hdrcpy(1); $y = xvals(25,25); $y->sethdr({ok=>2}); $y->hdrcpy(0); $s = sdump([$x,$y,yvals(25,25)]); };
eval {
$x = xvals(2,2);
$x->sethdr({ok=>1});
$x->hdrcpy(1);
$y = xvals(@big_dims);
$y->sethdr({ok=>2});
$y->hdrcpy(0);
$s = sdump([$x,$y,yvals(@big_dims)]);
};
is $@, '', 'Check header dumping';

$x = eval $s;
Expand All @@ -54,4 +71,38 @@ is ref($x), 'ARRAY' or diag explain $s;
ok eval { $x->[0]->hdrcpy() == 1 && $x->[1]->hdrcpy() == 0 }, 'Check hdrcpy()\'s persist';
ok eval { ($x->[0]->gethdr()->{ok}==1) && ($x->[1]->gethdr()->{ok}==2) }, 'Check gethdr() values persist';

# GH508
{
# need 10 vals to trigger GH508
my $x = xvals(10);
my $y1 = $x;
my $y2 = 2*$x;
my $y3 = $x*$x;

my %plots = (
'x1'=>$x, 'y1'=>$y1,
'x2'=>$x, 'y2'=>$y2,
'x3'=>$x, 'y3'=>$y3,
);

my $as_string = sdump \%plots;

my $restored = eval $as_string;

# diag $as_string;

my @nulls = grep {!defined $restored->{$_}} sort keys %$restored;
is_deeply \@nulls, [], 'none of the restored items are undef';

# test a dump with uuencoded content
my $u = xvals(@big_dims);
my @ndarrays = ($u, $u);
$as_string = sdump \@ndarrays;
# diag $as_string;
$restored = eval $as_string;
@nulls = grep {!defined $_} @$restored;
is_deeply \@nulls, [], 'none of the restored uuencoded items are undef';

}

done_testing;

0 comments on commit 77e1fd4

Please sign in to comment.