Skip to content

Commit

Permalink
cherrypick #50860 (#50867)
Browse files Browse the repository at this point in the history
fix 2 serve release tests.

Signed-off-by: Lonnie Liu <[email protected]>
  • Loading branch information
aslonnie authored Feb 24, 2025
1 parent ecd0709 commit 84f2764
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 7 deletions.
11 changes: 8 additions & 3 deletions release/serve_tests/workloads/anyscale_service_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@
logging.basicConfig(level=logging.INFO)


def check_service_state(service_name: str, expected_state: ServiceState):
state = service.status(name=service_name).state
def check_service_state(
service_name: str, expected_state: ServiceState, cloud: Optional[str] = None
):
state = service.status(name=service_name, cloud=cloud).state
logger.info(
f"Waiting for service {service_name} to be {expected_state}, currently {state}"
)
Expand All @@ -34,6 +36,7 @@ def start_service(
image_uri: Optional[str] = None,
working_dir: Optional[str] = None,
add_unique_suffix: bool = True,
cloud: Optional[str] = None,
):
"""Starts an Anyscale Service with the specified configs.
Expand Down Expand Up @@ -76,18 +79,20 @@ def start_service(
expected_state="RUNNING",
retry_interval_ms=10000, # 10s
timeout=600,
cloud=cloud,
)

yield service_name

finally:
logger.info(f"Terminating service {service_name}.")
service.terminate(name=service_name)
service.terminate(name=service_name, cloud=cloud)
wait_for_condition(
check_service_state,
service_name=service_name,
expected_state="TERMINATED",
retry_interval_ms=10000, # 10s
timeout=600,
cloud=cloud,
)
logger.info(f"Service '{service_name}' terminated successfully.")
6 changes: 4 additions & 2 deletions release/serve_tests/workloads/autoscaling_load_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@


URI = "https://serve-resnet-benchmark-data.s3.us-west-1.amazonaws.com/000000000019.jpeg"
CLOUD = "serve_release_tests_cloud"


@click.command()
Expand All @@ -47,7 +48,7 @@ def main(output_path: Optional[str], image_uri: Optional[str]):
],
}
compute_config = ComputeConfig(
cloud="serve_release_tests_cloud",
cloud=CLOUD,
head_node=HeadNodeConfig(instance_type="m5.8xlarge"),
worker_nodes=[
WorkerNodeGroupConfig(
Expand All @@ -67,9 +68,10 @@ def main(output_path: Optional[str], image_uri: Optional[str]):
compute_config=compute_config,
applications=[resnet_application],
working_dir="workloads",
cloud=CLOUD,
) as service_name:
ray.init(address="auto")
status = service.status(name=service_name)
status = service.status(name=service_name, cloud=CLOUD)

# Start the locust workload
num_locust_workers = int(ray.available_resources()["CPU"]) - 1
Expand Down
6 changes: 4 additions & 2 deletions release/serve_tests/workloads/replica_scalability.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

DEFAULT_FULL_TEST_NUM_REPLICA = 1000
DEFAULT_FULL_TEST_TRIAL_LENGTH_S = 60
CLOUD = "serve_release_tests_cloud"


@click.command()
Expand Down Expand Up @@ -52,7 +53,7 @@ def main(
],
}
compute_config = ComputeConfig(
cloud="serve_release_tests_cloud",
cloud=CLOUD,
head_node=HeadNodeConfig(instance_type="m5.8xlarge"),
worker_nodes=[
WorkerNodeGroupConfig(
Expand Down Expand Up @@ -92,9 +93,10 @@ def main(
compute_config=compute_config,
applications=[noop_1k_application],
working_dir="workloads",
cloud=CLOUD,
) as service_name:
ray.init("auto")
status = service.status(name=service_name)
status = service.status(name=service_name, cloud=CLOUD)

# Start the locust workload
num_locust_workers = int(ray.available_resources()["CPU"]) - 1
Expand Down

0 comments on commit 84f2764

Please sign in to comment.