diff --git a/alf/trainers/policy_trainer.py b/alf/trainers/policy_trainer.py
index 72fd6d1df..accf2c633 100644
--- a/alf/trainers/policy_trainer.py
+++ b/alf/trainers/policy_trainer.py
@@ -1120,6 +1120,8 @@ def play(root_dir,
     else:
         selective_criteria_func = None
 
+    # Sync the progress for all environments in case parallel_play > 1
+    env.sync_progress()
     while episodes < num_episodes:
         # For parallel play, we cannot naively pick the first finished `num_episodes`
         # episodes to estimate the average return (or other statistics) as it can be