-
Notifications
You must be signed in to change notification settings - Fork 5.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[serve] recursive cancellation #47873
Changes from all commits
679a91d
dcd60ff
6cef130
fe7bd70
ce4042b
ebfaba3
8ac7d12
e59e081
ee9537f
6d20408
7b5e6b5
f95c889
24a89bf
4487c78
ed16fa7
3d7bcf1
708c7de
3590a75
c36c082
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,7 +14,7 @@ | |
async_wait_for_condition, | ||
wait_for_condition, | ||
) | ||
from ray.serve._private.test_utils import send_signal_on_cancellation | ||
from ray.serve._private.test_utils import send_signal_on_cancellation, tlog | ||
|
||
|
||
@pytest.mark.parametrize("use_fastapi", [False, True]) | ||
|
@@ -365,5 +365,104 @@ async def get_out_of_band_response(self): | |
assert h.get_out_of_band_response.remote().result() == "ok" | ||
|
||
|
||
def test_recursive_cancellation_during_execution(serve_instance): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in order for these tests to be meaningful, don't we need to turn off the core cancellation support? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So at first I took the approach of implementing full recursive cancellation at the Serve layer, but that leads to more of a performance drop. I think it's unnecessary to cover the part that core already covers (recursive task cancellation). We just need to take care of canceling the asyncio request assignment task before a request has been assigned to a replica. So in this test file |
||
inner_signal_actor = SignalActor.remote() | ||
outer_signal_actor = SignalActor.remote() | ||
|
||
@serve.deployment | ||
async def inner(): | ||
await send_signal_on_cancellation(inner_signal_actor) | ||
|
||
@serve.deployment | ||
class Ingress: | ||
def __init__(self, handle): | ||
self._handle = handle | ||
|
||
async def __call__(self): | ||
_ = self._handle.remote() | ||
await send_signal_on_cancellation(outer_signal_actor) | ||
|
||
h = serve.run(Ingress.bind(inner.bind())) | ||
|
||
resp = h.remote() | ||
with pytest.raises(TimeoutError): | ||
resp.result(timeout_s=0.5) | ||
|
||
resp.cancel() | ||
ray.get(inner_signal_actor.wait.remote(), timeout=10) | ||
ray.get(outer_signal_actor.wait.remote(), timeout=10) | ||
|
||
|
||
def test_recursive_cancellation_during_assignment(serve_instance): | ||
signal = SignalActor.remote() | ||
|
||
@serve.deployment(max_ongoing_requests=1) | ||
class Counter: | ||
def __init__(self): | ||
self._count = 0 | ||
|
||
async def __call__(self): | ||
self._count += 1 | ||
await signal.wait.remote() | ||
|
||
def get_count(self): | ||
return self._count | ||
|
||
@serve.deployment | ||
class Ingress: | ||
def __init__(self, handle): | ||
self._handle = handle | ||
|
||
async def __call__(self): | ||
self._handle.remote() | ||
await signal.wait.remote() | ||
return "hi" | ||
|
||
async def get_count(self): | ||
return await self._handle.get_count.remote() | ||
|
||
async def check_requests_pending_assignment_cache(self): | ||
requests_pending_assignment = ray.serve.context._requests_pending_assignment | ||
return {k: list(v.keys()) for k, v in requests_pending_assignment.items()} | ||
|
||
h = serve.run(Ingress.bind(Counter.bind())) | ||
|
||
# Send two requests to Ingress. The second should be queued and | ||
# pending assignment at Ingress because max ongoing requests for | ||
# Counter is only 1. | ||
tlog("Sending two requests to Ingress.") | ||
resp1 = h.remote() | ||
with pytest.raises(TimeoutError): | ||
resp1.result(timeout_s=0.5) | ||
resp2 = h.remote() | ||
with pytest.raises(TimeoutError): | ||
resp2.result(timeout_s=0.5) | ||
|
||
# Cancel second request, which should be pending assignment. | ||
tlog("Canceling second request.") | ||
resp2.cancel() | ||
|
||
# Release signal so that the first request can complete, and any new | ||
# requests to Counter can be let through | ||
tlog("Releasing signal.") | ||
ray.get(signal.send.remote()) | ||
assert resp1.result() == "hi" | ||
|
||
# The second request, even though it was pending assignment to a | ||
# Counter replica, should have been properly canceled. Confirm this | ||
# by making sure that no more calls to __call__ were made | ||
for _ in range(10): | ||
assert h.get_count.remote().result() == 1 | ||
|
||
tlog("Confirmed second request was properly canceled.") | ||
|
||
# Check that cache was cleared so there are no memory leaks | ||
requests_pending_assignment = ( | ||
h.check_requests_pending_assignment_cache.remote().result() | ||
) | ||
for k, v in requests_pending_assignment.items(): | ||
assert len(v) == 0, f"Request {k} has in flight requests in cache: {v}" | ||
|
||
|
||
if __name__ == "__main__": | ||
sys.exit(pytest.main(["-v", "-s", __file__])) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Non-blocker: There is this generate_request_id() maybe we can rename and reuse here?