diff --git a/go.mod b/go.mod index f51b31e..dd619b3 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/kelindar/tile go 1.23 require ( + github.com/kelindar/intmap v1.4.1 github.com/kelindar/iostream v1.4.0 github.com/stretchr/testify v1.9.0 ) diff --git a/go.sum b/go.sum index 9a32195..a913624 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,7 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/kelindar/intmap v1.4.1 h1:3jTPTrfNx4pxBPURR1+6f4YhbZS57CzsU0S9NEV51ZI= +github.com/kelindar/intmap v1.4.1/go.mod h1:NkypxhfaklmDTJqwano3Q1BWk6je77qgQwszDwu8Kc8= github.com/kelindar/iostream v1.4.0 h1:ELKlinnM/K3GbRp9pYhWuZOyBxMMlYAfsOP+gauvZaY= github.com/kelindar/iostream v1.4.0/go.mod h1:MkjMuVb6zGdPQVdwLnFRO0xOTOdDvBWTztFmjRDQkXk= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= diff --git a/path.go b/path.go index baf61ba..31b8f31 100644 --- a/path.go +++ b/path.go @@ -5,7 +5,10 @@ package tile import ( "math" + "math/bits" "sync" + + "github.com/kelindar/intmap" ) type costFn = func(Value) uint16 @@ -25,19 +28,20 @@ func (m *Grid[T]) Around(from Point, distance uint32, costOf costFn, fn func(Poi fn(from, start) - // Acquire a frontier heap for search - frontier := acquireHeap() - frontier.Push(from.Integer(), 0) - defer releaseHeap(frontier) - // For pre-allocating, we use πr2 since BFS will result in a approximation // of a circle, in the worst case. maxArea := int(math.Ceil(math.Pi * float64(distance*distance))) - reached := make(map[uint32]struct{}, maxArea) - reached[from.Integer()] = struct{}{} + // Acquire a frontier heap for search + state := acquire(maxArea) + frontier := state.frontier + reached := state.edges + defer release(state) + + frontier.Push(from.Integer(), 0) + reached.Store(from.Integer(), 0) for !frontier.IsEmpty() { - pCurr, _ := frontier.Pop() + pCurr := frontier.Pop() current := unpackPoint(pCurr) // Get all of the neighbors @@ -52,9 +56,9 @@ func (m *Grid[T]) Around(from Point, distance uint32, costOf costFn, fn func(Poi // Add to the search queue pNext := next.Integer() - if _, ok := reached[pNext]; !ok { + if _, ok := reached.Load(pNext); !ok { frontier.Push(pNext, 1) - reached[pNext] = struct{}{} + reached.Store(pNext, 1) fn(next, nextTile) } }) @@ -63,177 +67,190 @@ func (m *Grid[T]) Around(from Point, distance uint32, costOf costFn, fn func(Poi // Path calculates a short path and the distance between the two locations func (m *Grid[T]) Path(from, to Point, costOf costFn) ([]Point, int, bool) { - - // Acquire a frontier heap for search - frontier := acquireHeap() - frontier.Push(from.Integer(), 0) - defer releaseHeap(frontier) + distance := float64(from.DistanceTo(to)) + maxArea := int(math.Ceil(math.Pi * float64(distance*distance))) // For pre-allocating, we use πr2 since BFS will result in a approximation // of a circle, in the worst case. - distance := float64(from.DistanceTo(to)) - maxArea := int(math.Ceil(math.Pi * float64(distance*distance))) - edges := make(map[uint32]edge, maxArea) - edges[from.Integer()] = edge{ - Point: from, - Cost: 0, - } + state := acquire(maxArea) + edges := state.edges + frontier := state.frontier + defer release(state) + + frontier.Push(from.Integer(), 0) + edges.Store(from.Integer(), encode(0, Direction(0))) // Starting point has no direction for !frontier.IsEmpty() { - pCurr, _ := frontier.Pop() + pCurr := frontier.Pop() current := unpackPoint(pCurr) - // We have a path to the goal + // Decode the cost to reach the current point + currentEncoded, _ := edges.Load(pCurr) + currentCost, _ := decode(currentEncoded) + + // Check if we've reached the destination if current.Equal(to) { - dist := int(edges[current.Integer()].Cost) - path := make([]Point, 0, dist) - curr, _ := edges[current.Integer()] - for !curr.Point.Equal(from) { - path = append(path, curr.Point) - curr = edges[curr.Point.Integer()] + + // Reconstruct the path + path := make([]Point, 0, 64) + path = append(path, current) + for !current.Equal(from) { + currentEncoded, _ := edges.Load(current.Integer()) + _, dir := decode(currentEncoded) + current = current.Move(oppositeDirection(dir)) + path = append(path, current) + } + + // Reverse the path to get from source to destination + for i, j := 0, len(path)-1; i < j; i, j = i+1, j-1 { + path[i], path[j] = path[j], path[i] } - return path, dist, true + return path, int(currentCost), true } - // Get all of the neighbors + // Explore neighbors m.Neighbors(current.X, current.Y, func(next Point, nextTile Tile[T]) { cNext := costOf(nextTile.Value()) if cNext == 0 { - return // Blocked tile, ignore completely + return // Blocked tile } + nextCost := currentCost + uint32(cNext) pNext := next.Integer() - newCost := edges[pCurr].Cost + uint32(cNext) // cost(current, next) - if e, ok := edges[pNext]; !ok || newCost < e.Cost { - priority := newCost + next.DistanceTo(to) // heuristic - frontier.Push(next.Integer(), priority) + existingEncoded, visited := edges.Load(pNext) + existingCost, _ := decode(existingEncoded) - edges[pNext] = edge{ - Point: current, - Cost: newCost, - } - } + // If we haven't visited this node or we found a better path + if !visited || nextCost < existingCost { + angle := angleOf(current, next) + priority := nextCost + next.DistanceTo(to) + // Store the edge and push to the frontier + edges.Store(pNext, encode(nextCost, angle)) + frontier.Push(pNext, priority) + } }) } return nil, 0, false } -// ----------------------------------------------------------------------------- - -var heapPool = sync.Pool{ - New: func() interface{} { return new(heap32) }, -} - -// Acquires a new instance of a heap -func acquireHeap() *heap32 { - h := heapPool.Get().(*heap32) - h.Reset() - return h +// encode packs the cost and direction into a uint32 +func encode(cost uint32, dir Direction) uint32 { + return (cost << 4) | uint32(dir&0xF) } -// Releases a heap instance back to the pool -func releaseHeap(h *heap32) { - heapPool.Put(h) +// decode unpacks the cost and direction from a uint32 +func decode(value uint32) (cost uint32, dir Direction) { + cost = value >> 4 + dir = Direction(value & 0xF) + return } // ----------------------------------------------------------------------------- -// heapNode represents a ranked node for the heap. -type heapNode struct { - Value uint32 // The value of the ranked node. - Rank uint32 // The rank associated with the ranked node. +type pathfinder struct { + edges *intmap.Map + frontier *frontier } -type heap32 []heapNode - -func newHeap32(capacity int) heap32 { - return make(heap32, 0, capacity) +var pathfinders = sync.Pool{ + New: func() any { + return &pathfinder{ + edges: intmap.New(32, .95), + frontier: newFrontier(), + } + }, } -// Reset clears the heap for reuse -func (h *heap32) Reset() { - *h = (*h)[:0] -} +// Acquires a new instance of a pathfinding state +func acquire(capacity int) *pathfinder { + v := pathfinders.Get().(*pathfinder) + if v.edges.Capacity() < capacity { + v.edges = intmap.New(capacity, .95) + } -// Push pushes the element x onto the heap. -// The complexity is O(log n) where n = h.Len(). -func (h *heap32) Push(v, rank uint32) { - *h = append(*h, heapNode{ - Value: v, - Rank: rank, - }) - h.up(h.Len() - 1) + return v } -// Pop removes and returns the minimum element (according to Less) from the heap. -// The complexity is O(log n) where n = h.Len(). -// Pop is equivalent to Remove(h, 0). -func (h *heap32) Pop() (uint32, bool) { - n := h.Len() - 1 - if n < 0 { - return 0, false - } - - h.Swap(0, n) - h.down(0, n) - return h.pop(), true +// release releases a pathfinding state back to the pool +func release(v *pathfinder) { + v.edges.Clear() + v.frontier.Reset() + pathfinders.Put(v) } -func (h *heap32) pop() uint32 { - old := *h - n := len(old) - no := old[n-1] - *h = old[0 : n-1] - return no.Value +// ----------------------------------------------------------------------------- + +// frontier is a priority queue implementation that uses buckets to store +// elements. Original implementation by Iskander Sharipov (https://github.com/quasilyte/pathing) +type frontier struct { + buckets [64][]uint32 + mask uint64 } -func (h *heap32) up(j int) { - for { - i := (j - 1) / 2 // parent - if i == j || !h.Less(j, i) { - break - } - h.Swap(i, j) - j = i +// newFrontier creates a new frontier priority queue +func newFrontier() *frontier { + h := &frontier{} + for i := range &h.buckets { + h.buckets[i] = make([]uint32, 0, 16) } + return h } -func (h *heap32) down(i0, n int) bool { - i := i0 - for { - j1 := 2*i + 1 - if j1 >= n || j1 < 0 { // j1 < 0 after int overflow - break - } - j := j1 // left child - if j2 := j1 + 1; j2 < n && h.Less(j2, j1) { - j = j2 // = 2*i + 2 // right child +func (q *frontier) Reset() { + buckets := &q.buckets + + // Reslice storage slices back. + // To avoid traversing all len(q.buckets), + // we have some offset to skip uninteresting (already empty) buckets. + // We also stop when mask is 0 meaning all remaining buckets are empty too. + // In other words, it would only touch slices between min and max non-empty priorities. + mask := q.mask + offset := uint(bits.TrailingZeros64(mask)) + mask >>= offset + i := offset + for mask != 0 { + if i < uint(len(buckets)) { + buckets[i] = buckets[i][:0] } - if !h.Less(j, i) { - break - } - h.Swap(i, j) - i = j + mask >>= 1 + i++ } - return i > i0 -} -func (h heap32) Len() int { - return len(h) + q.mask = 0 } -func (h heap32) IsEmpty() bool { - return len(h) == 0 +func (q *frontier) IsEmpty() bool { + return q.mask == 0 } -func (h heap32) Less(i, j int) bool { - return h[i].Rank < h[j].Rank +func (q *frontier) Push(value, priority uint32) { + // No bound checks since compiler knows that i will never exceed 64. + // We also get a cool truncation of values above 64 to store them + // in our biggest bucket. + i := priority & 0b111111 + q.buckets[i] = append(q.buckets[i], value) + q.mask |= 1 << i } -func (h *heap32) Swap(i, j int) { - (*h)[i], (*h)[j] = (*h)[j], (*h)[i] +func (q *frontier) Pop() uint32 { + buckets := &q.buckets + + // Using uints here and explicit len check to avoid the + // implicitly inserted bound check. + i := uint(bits.TrailingZeros64(q.mask)) + if i < uint(len(buckets)) { + e := buckets[i][len(buckets[i])-1] + buckets[i] = buckets[i][:len(buckets[i])-1] + if len(buckets[i]) == 0 { + q.mask &^= 1 << i + } + return e + } + + // A queue is empty + return 0 } diff --git a/path_test.go b/path_test.go index c4135bd..c9b3e62 100644 --- a/path_test.go +++ b/path_test.go @@ -4,6 +4,7 @@ package tile import ( + "fmt" "image" "image/color" "image/png" @@ -19,14 +20,16 @@ func TestPath(t *testing.T) { path, dist, found := m.Path(At(1, 1), At(7, 7), costOf) assert.Equal(t, ` ......... -. x . . -. x... .. -. xxx. .. -... x. . -. xx . +.x . . +.x ... .. +.xxx . .. +...x . . +. xxx . .....x... -. xx . +. xxx. .........`, plotPath(m, path)) + + fmt.Println(plotPath(m, path)) assert.Equal(t, 12, dist) assert.True(t, found) } @@ -35,12 +38,12 @@ func TestPathTiny(t *testing.T) { m := NewGrid(6, 6) path, dist, found := m.Path(At(0, 0), At(5, 5), costOf) assert.Equal(t, ` - x - x - x - x - x - xxxx `, plotPath(m, path)) +x +x +x +x +x +xxxxxx`, plotPath(m, path)) assert.Equal(t, 10, dist) assert.True(t, found) } @@ -51,12 +54,15 @@ func TestDraw(t *testing.T) { assert.NotNil(t, out) } -// BenchmarkPath/9x9-8 210472 5316 ns/op 16468 B/op 3 allocs/op -// BenchmarkPath/300x300-8 463 2546373 ns/op 7801135 B/op 4 allocs/op -// BenchmarkPath/381x381-8 373 2732657 ns/op 62394362 B/op 4 allocs/op -// BenchmarkPath/384x384-8 153 7791925 ns/op 62396304 B/op 5 allocs/op -// BenchmarkPath/6144x6144-8 158 7468206 ns/op 62395377 B/op 3 allocs/op -// BenchmarkPath/6147x6147-8 160 7468716 ns/op 62395359 B/op 3 allocs/op +/* +BenchmarkPath/9x9-24 2704395 440.4 ns/op 256 B/op 1 allocs/op +BenchmarkPath/300x300-24 1134 1033808 ns/op 3845 B/op 4 allocs/op +BenchmarkPath/381x381-24 2782 377676 ns/op 7298 B/op 5 allocs/op +BenchmarkPath/384x384-24 2716 382663 ns/op 7298 B/op 5 allocs/op +BenchmarkPath/3069x3069-24 847 1368243 ns/op 100140 B/op 7 allocs/op +BenchmarkPath/3072x3072-24 849 1368387 ns/op 99954 B/op 7 allocs/op +BenchmarkPath/6144x6144-24 3050 387195 ns/op 12802 B/op 5 allocs/op +*/ func BenchmarkPath(b *testing.B) { b.Run("9x9", func(b *testing.B) { m := mapFrom("9x9.png") @@ -122,9 +128,12 @@ func BenchmarkPath(b *testing.B) { }) } -// BenchmarkAround/3r-8 352876 3355 ns/op 385 B/op 1 allocs/op -// BenchmarkAround/5r-8 162103 7551 ns/op 931 B/op 2 allocs/op -// BenchmarkAround/10r-8 62491 19235 ns/op 3489 B/op 2 allocs/op +/* +cpu: 13th Gen Intel(R) Core(TM) i7-13700K +BenchmarkAround/3r-24 2080566 562.7 ns/op 0 B/op 0 allocs/op +BenchmarkAround/5r-24 885582 1358 ns/op 0 B/op 0 allocs/op +BenchmarkAround/10r-24 300672 3953 ns/op 0 B/op 0 allocs/op +*/ func BenchmarkAround(b *testing.B) { m := mapFrom("300x300.png") b.Run("3r", func(b *testing.B) { @@ -175,10 +184,13 @@ func TestAroundMiss(t *testing.T) { }) } -// BenchmarkHeap-8 94454 12303 ns/op 3968 B/op 5 allocs/op +/* +cpu: 13th Gen Intel(R) Core(TM) i7-13700K +BenchmarkHeap-24 240228 5076 ns/op 6016 B/op 68 allocs/op +*/ func BenchmarkHeap(b *testing.B) { for i := 0; i < b.N; i++ { - h := newHeap32(16) + h := newFrontier() for j := 0; j < 128; j++ { h.Push(rand(j), 1) } @@ -189,28 +201,6 @@ func BenchmarkHeap(b *testing.B) { } } -func TestHeap(t *testing.T) { - h := newHeap32(16) - h.Push(1, 0) - h.Pop() -} - -func TestNewHeap(t *testing.T) { - h := newHeap32(16) - for j := 0; j < 8; j++ { - h.Push(rand(j), uint32(j)) - } - - val, _ := h.Pop() - for j := 1; j < 128; j++ { - newval, ok := h.Pop() - if ok { - assert.True(t, val < newval) - val = newval - } - } -} - // very fast semi-random function func rand(i int) uint32 { i = i + 10000 diff --git a/point.go b/point.go index d8a900b..eb34f1e 100644 --- a/point.go +++ b/point.go @@ -269,3 +269,35 @@ func (v Direction) String() string { func (v Direction) Vector(scale int16) Point { return Point{}.MoveBy(v, scale) } + +// angleOf returns the direction from one point to another +func angleOf(from, to Point) Direction { + dx := to.X - from.X + dy := to.Y - from.Y + + switch { + case dx == 0 && dy == -1: + return North + case dx == 1 && dy == -1: + return NorthEast + case dx == 1 && dy == 0: + return East + case dx == 1 && dy == 1: + return SouthEast + case dx == 0 && dy == 1: + return South + case dx == -1 && dy == 1: + return SouthWest + case dx == -1 && dy == 0: + return West + case dx == -1 && dy == -1: + return NorthWest + default: + return Direction(0) // Invalid direction + } +} + +// oppositeDirection returns the opposite of the given direction +func oppositeDirection(dir Direction) Direction { + return Direction((dir + 4) % 8) +}