Skip to content

Commit

Permalink
speed up x{y when singleton leading axes
Browse files Browse the repository at this point in the history
  • Loading branch information
HenryHRich committed Aug 7, 2023
1 parent 359def3 commit 90a1ce0
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 6 deletions.
15 changes: 10 additions & 5 deletions jsrc/vfrom.c
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,12 @@ static A jtaxisfrom(J jt,A w,struct faxis *axes,I rflags){F2PREFIP; I i;
I zn; // number of atoms in result
I celllen=axes[r].lencell; // length of cell of last axis, in atoms
A z; // result
I nunitsels=r; // number of leading axes containing exctly one selector. When the axis below this prefix rolls over, we can stop, knowing that there are no higher selectors
if(likely(AN(w)!=0)){ // normal case, w has atoms
for(i=r-1;i>=0;--i){ // for axes before the last
DPMULDE(framesize,axes[i].nsel^REPSGN(axes[i].nsel),framesize); // count # cells in frame and selectors
for(i=r-1;i>=0;--i){ // for axes BEFORE the last
I absnsel=axes[i].nsel^REPSGN(axes[i].nsel); // adjust for complementary indexing
DPMULDE(framesize,absnsel,framesize); // count # cells in frame and selectors
nunitsels=absnsel!=1?i:nunitsels; // if this cell-count is not 1, reset the count of # 1s.
// note: if some selector is empty and the others overflow, this will give limit error. Sue me.
axes[i].lencell<<=k; // convert cellsize to bytes
base+=axes[i].lencell*axes[i].sel0; // for axes before last, add offset of first index
Expand Down Expand Up @@ -277,7 +280,8 @@ static A jtaxisfrom(J jt,A w,struct faxis *axes,I rflags){F2PREFIP; I i;
R z;
}

I noframe=REPSGN(framesize-2); axes+=r&noframe; r-=r&noframe; // F..F if all previous axes are in base; only one selection pass needed (gives early exit)
// obsolete I noframe=REPSGN(framesize-2); axes+=r&noframe; r-=r&noframe; // F..F if all previous axes are in base; only one selection pass needed (gives early exit)
axes+=nunitsels; r-=nunitsels; // Now that initial selectors are rolled into base, we can delete the consecutive leading single-selector axes. When they start to roll over we can quit
celllen<<=k; // convert last-axis len to bytes

// decide what copy routines to use for last axis.
Expand Down Expand Up @@ -356,7 +360,7 @@ else{ // normal last axis
// normal non-complementary index. Step to the next row
SETNDX(nextx,axes[rodo].sels[axes[rodo].currselx],axes[rodo].lenaxis); // fetch next index
}else{
// complementary index. Start after currselv and find the next 1-bit
// complementary index. Start after currselv and find the next 1-bit scaf should rewrite to use the complementary indexes, not a mask
nextx=axes[rodo].currselv+1; // bit# to start look
while(1){ // it's gotta be there
UI nextbits=(UI)axes[rodo].sels[nextx>>LGBW]>>(nextx&(BW-1)); // the rest of this word
Expand All @@ -369,7 +373,7 @@ else{ // normal last axis
break; // when wheel doesn't roll over, stop processing wheels
}
// here the current axis is rolling over.
if(rodo==0)goto endaxes; // when first axis rolls over, we are finished
if(rodo<=0)goto endaxes; // when we roll over the biggest wheel, quit
axes[rodo].currselx=0; // back to start
base+=(axes[rodo].sel0-axes[rodo].currselv)*axes[rodo].lencell; // move base by amount of index movement
axes[rodo].currselv=axes[rodo].sel0; // set starting index for wheel
Expand Down Expand Up @@ -922,6 +926,7 @@ static F2(jtafrom){F2PREFIP; PROLOG(0073);
// obsolete if(!(AT(c)&BOX)){ASSERT(AR(c)<=1,EVRANK) R jtfrombu(jtinplace,c,w,wf);} // if single-boxed, handle as <"1@[ { ].
if(!(AT(c)&BOX)){R jtfrombu(jtinplace,c,w,wf);} // if single-boxed, handle as <"1@[ { ].
// Double-boxed. Set up axis structs
// We DO NOT treat leading scalar indexes as a special case here. Building & using the axis block is pretty cheap. We catch them when we fill.
ASSERT(1>=AR(c),EVRANK); // boxes may not have rank > 1
ASSERT(AN(c)<=wcr,EVLENGTH); // number of axes must not exceed #axes in major cell
I *ws=AS(w); // #axes-1. We need a leading axis in full if there are multiple cells of w
Expand Down
2 changes: 1 addition & 1 deletion test/g520.ijs
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ NB. parms is #cols(flagged),maxAx,Col0Threshold,expandQk (testcase option),Min
cutoffinfo =. (2 ,~ #rvt) $ 0.
assert. (0 1 4 832 1.341640786499873838; _2 ]\ _256 5 _512 5 _64 2 0 1 ) ((0 1.341640786499873838,:0 1.788854381999831) run128_9) Ax;Am;Av;M;rvt;bndrowmask;'';cutoffinfo;1 0 2 3;(_0.3 (4}) parms);Frow


NB. scaf need a way to test many threads in gradient mode

NB. end of tests, add a thread
0 T. ''
Expand Down

0 comments on commit 90a1ce0

Please sign in to comment.