Skip to content

Commit

Permalink
precommit
Browse files Browse the repository at this point in the history
  • Loading branch information
renxida committed Dec 10, 2024
1 parent 386c368 commit 176f0aa
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions shortfin/python/shortfin_apps/llm/components/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,13 +390,17 @@ async def run(self):
if self.phase == InferencePhase.DECODE:
start_positions_host = start_positions.for_transfer()
with start_positions_host.map(discard=True) as m:
m.fill(1) # Pad unused requests. Must pad with nonzero value because division by 0 floods clobber page (page 0) in cache with NaN values.
m.fill(
1
) # Pad unused requests. Must pad with nonzero value because division by 0 floods clobber page (page 0) in cache with NaN values.
m.items = [req.start_position for req in self.exec_requests]
start_positions_host.copy_to(start_positions)

seq_lens_host = seq_lens.for_transfer()
with seq_lens_host.map(discard=True) as m:
m.fill(1) # Pad unused requests. Must pad with nonzero value because division by 0 floods clobber page (page 0) in cache with NaN values.
m.fill(
1
) # Pad unused requests. Must pad with nonzero value because division by 0 floods clobber page (page 0) in cache with NaN values.
m.items = [
req.start_position + len(req.input_token_ids)
for req in self.exec_requests
Expand Down

0 comments on commit 176f0aa

Please sign in to comment.