diff --git a/autogen/agentchat/contrib/retrieve_user_proxy_agent.py b/autogen/agentchat/contrib/retrieve_user_proxy_agent.py
index 4842bd4e9f53..90757af6fc3e 100644
--- a/autogen/agentchat/contrib/retrieve_user_proxy_agent.py
+++ b/autogen/agentchat/contrib/retrieve_user_proxy_agent.py
@@ -519,7 +519,7 @@ def _generate_retrieve_user_reply(
                             self.problem, self.n_results * (2 * _tmp_retrieve_count + 1), self._search_string
                         )
                         doc_contents = self._get_context(self._results)
-                        if doc_contents:
+                        if doc_contents or self.n_results * (2 * _tmp_retrieve_count + 1) >= len(self._results[0]):
                             break
             elif update_context_case2:
                 # Use the current intermediate info as the query text to retrieve docs, and each time we append the top similar
@@ -531,7 +531,7 @@ def _generate_retrieve_user_reply(
                     )
                     self._get_context(self._results)
                     doc_contents = "\n".join(self._doc_contents)  # + "\n" + "\n".join(self._intermediate_answers)
-                    if doc_contents:
+                    if doc_contents or self.n_results * (2 * _tmp_retrieve_count + 1) >= len(self._results[0]):
                         break
 
             self.clear_history()
diff --git a/notebook/agentchat_RetrieveChat.ipynb b/notebook/agentchat_RetrieveChat.ipynb
index adb13ac47bd5..6fefcd3ba44c 100644
--- a/notebook/agentchat_RetrieveChat.ipynb
+++ b/notebook/agentchat_RetrieveChat.ipynb
@@ -48,14 +48,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "models to use:  ['gpt-3.5-turbo-0125']\n"
+      "models to use:  ['gpt-35-turbo', 'gpt4-1106-preview', 'gpt-4o']\n"
      ]
     }
    ],
@@ -73,9 +73,7 @@
     "# a vector database instance\n",
     "from autogen.retrieve_utils import TEXT_FORMATS\n",
     "\n",
-    "config_list = [\n",
-    "    {\"model\": \"gpt-3.5-turbo-0125\", \"api_key\": \"<YOUR_API_KEY>\", \"api_type\": \"openai\"},\n",
-    "]\n",
+    "config_list = autogen.config_list_from_json(\"OAI_CONFIG_LIST\")\n",
     "\n",
     "assert len(config_list) > 0\n",
     "print(\"models to use: \", [config_list[i][\"model\"] for i in range(len(config_list))])"
@@ -107,7 +105,7 @@
      "output_type": "stream",
      "text": [
       "Accepted file formats for `docs_path`:\n",
-      "['odt', 'xml', 'pdf', 'docx', 'html', 'md', 'htm', 'csv', 'rst', 'org', 'ppt', 'doc', 'log', 'json', 'epub', 'jsonl', 'pptx', 'yml', 'xlsx', 'tsv', 'txt', 'yaml', 'msg', 'rtf']\n"
+      "['txt', 'json', 'csv', 'tsv', 'md', 'html', 'htm', 'rtf', 'rst', 'jsonl', 'log', 'xml', 'yaml', 'yml', 'pdf']\n"
      ]
     }
    ],
@@ -120,7 +118,16 @@
    "cell_type": "code",
    "execution_count": 3,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/workspace/anaconda3/envs/autogen312/lib/python3.12/site-packages/sentence_transformers/cross_encoder/CrossEncoder.py:11: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n",
+      "  from tqdm.autonotebook import tqdm, trange\n"
+     ]
+    }
+   ],
    "source": [
     "# 1. create an RetrieveAssistantAgent instance named \"assistant\"\n",
     "assistant = RetrieveAssistantAgent(\n",
@@ -160,6 +167,7 @@
     "        # \"client\": chromadb.PersistentClient(path=\"/tmp/chromadb\"),  # deprecated, use \"vector_db\" instead\n",
     "        \"vector_db\": \"chroma\",  # to use the deprecated `client` parameter, set to None and uncomment the line above\n",
     "        \"overwrite\": False,  # set to True if you want to overwrite an existing collection\n",
+    "        \"get_or_create\": True,  # set to False if don't want to reuse an existing collection\n",
     "    },\n",
     "    code_execution_config=False,  # set to False if you don't want to execute the code\n",
     ")"
@@ -188,7 +196,8 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2024-04-07 17:30:56,955 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - \u001b[32mUse the existing collection `autogen-docs`.\u001b[0m\n"
+      "2024-08-02 06:30:11,303 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - \u001b[32mUse the existing collection `autogen-docs`.\u001b[0m\n",
+      "2024-08-02 06:30:11,485 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - Found 2 chunks.\u001b[0m\n"
      ]
     },
     {
@@ -202,7 +211,6 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2024-04-07 17:30:59,609 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - Found 2 chunks.\u001b[0m\n",
       "Number of requested results 20 is greater than number of elements in index 2, updating n_results = 2\n"
      ]
     },
@@ -361,65 +369,53 @@
       "--------------------------------------------------------------------------------\n",
       "\u001b[33massistant\u001b[0m (to ragproxyagent):\n",
       "\n",
-      "To perform a classification task using FLAML and use Spark to do parallel training for 30 seconds and force cancel jobs if the time limit is reached, you can follow these steps:\n",
-      "\n",
-      "1. First, convert your data into Spark dataframe format using `to_pandas_on_spark` function from `flaml.automl.spark.utils` module.\n",
-      "2. Then, format your data for use SparkML models by using `VectorAssembler`.\n",
-      "3. Define your AutoML settings, including the `metric`, `time_budget`, and `task`.\n",
-      "4. Use `AutoML` from `flaml` to run AutoML with SparkML models by setting `use_spark` to `true`, and `estimator_list` to a list of spark-based estimators, like `[\"lgbm_spark\"]`.\n",
-      "5. Set `n_concurrent_trials` to the desired number of parallel jobs and `force_cancel` to `True` to cancel the jobs if the time limit is reached.\n",
-      "\n",
-      "Here's an example code snippet for performing classification using FLAML and Spark:\n",
-      "\n",
       "```python\n",
-      "import pandas as pd\n",
+      "import flaml\n",
       "from flaml.automl.spark.utils import to_pandas_on_spark\n",
       "from pyspark.ml.feature import VectorAssembler\n",
-      "import flaml\n",
+      "import pandas as pd\n",
       "\n",
-      "# Creating a dictionary\n",
+      "# Example Data (Please provide real data in practice)\n",
       "data = {\n",
-      "    \"sepal_length\": [5.1, 4.9, 4.7, 4.6, 5.0],\n",
-      "    \"sepal_width\": [3.5, 3.0, 3.2, 3.1, 3.6],\n",
-      "    \"petal_length\": [1.4, 1.4, 1.3, 1.5, 1.4],\n",
-      "    \"petal_width\": [0.2, 0.2, 0.2, 0.2, 0.2],\n",
-      "    \"species\": [\"setosa\", \"setosa\", \"setosa\", \"setosa\", \"setosa\"]\n",
+      "    \"feature1\": [0, 1, 2, 3, 4],\n",
+      "    \"feature2\": [1, 2, 3, 4, 5],\n",
+      "    # ... add all features you need for your classification\n",
+      "    \"label\": ['a', 'b', 'a', 'a', 'b'],  # assuming binary classification with labels 'a' and 'b'\n",
       "}\n",
       "\n",
-      "# Creating a pandas DataFrame\n",
-      "dataframe = pd.DataFrame(data)\n",
-      "label = \"species\"\n",
+      "# Convert to Pandas DataFrame\n",
+      "pdf = pd.DataFrame(data)\n",
       "\n",
-      "# Convert to pandas-on-spark dataframe\n",
-      "psdf = to_pandas_on_spark(dataframe)\n",
+      "# Generate pandas-on-spark dataframe\n",
+      "psdf = to_pandas_on_spark(pdf)\n",
       "\n",
-      "# Format data for SparkML models\n",
-      "columns = psdf.columns\n",
-      "feature_cols = [col for col in columns if col != label]\n",
+      "# Organize data into feature vectors and labels\n",
+      "label_col = \"label\"\n",
+      "feature_cols = [col for col in psdf.columns if col != label_col]\n",
       "featurizer = VectorAssembler(inputCols=feature_cols, outputCol=\"features\")\n",
-      "psdf = featurizer.transform(psdf.to_spark(index_col=\"index\"))[\"index\", \"features\"]\n",
       "\n",
-      "# Define AutoML settings\n",
-      "settings = {\n",
+      "# Apply the transformation\n",
+      "psdf = featurizer.transform(psdf.to_spark(index_col=\"index\"))[\"index\", \"features\", label_col]\n",
+      "\n",
+      "# Prepare AutoML settings\n",
+      "automl_settings = {\n",
       "    \"time_budget\": 30,\n",
-      "    \"metric\": \"accuracy\",\n",
+      "    \"metric\": \"accuracy\",  # Change this to a classification metric you prefer\n",
       "    \"task\": \"classification\",\n",
+      "    \"n_concurrent_trials\": 2,  # Or other number that fits your Spark cluster configuration\n",
+      "    \"use_spark\": True,\n",
+      "    \"force_cancel\": True,  # Enable force cancel to obey the time constraint\n",
+      "    \"estimator_list\": [\"lgbm_spark\"],  # Specify SparkML estimators you want to try\n",
       "}\n",
       "\n",
-      "# Use AutoML with SparkML models and parallel jobs\n",
+      "# Create an AutoML instance\n",
       "automl = flaml.AutoML()\n",
-      "automl.fit(\n",
-      "    dataframe=psdf,\n",
-      "    label=label,\n",
-      "    estimator_list=[\"lgbm_spark\"],\n",
-      "    use_spark=True,\n",
-      "    n_concurrent_trials=2,\n",
-      "    force_cancel=True,\n",
-      "    **settings,\n",
-      ")\n",
-      "```\n",
       "\n",
-      "Note that the above code assumes the data is small enough to train within 30 seconds. If you have a larger dataset, you may need to increase the `time_budget` and adjust the number of parallel jobs accordingly.\n",
+      "# Run the AutoML search\n",
+      "automl.fit(dataframe=psdf, label=label_col, **automl_settings)\n",
+      "``` \n",
+      "\n",
+      "Remember to replace the example data with your real dataset and choose an appropriate metric for your classification task. You'll also need a configured and running Spark environment to utilize the \"use_spark\" feature.\n",
       "\n",
       "--------------------------------------------------------------------------------\n",
       "\u001b[33mragproxyagent\u001b[0m (to assistant):\n",
@@ -439,25 +435,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Number of requested results 60 is greater than number of elements in index 2, updating n_results = 2\n",
-      "Number of requested results 100 is greater than number of elements in index 2, updating n_results = 2\n",
-      "Number of requested results 140 is greater than number of elements in index 2, updating n_results = 2\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "VectorDB returns doc_ids:  [['bdfbc921']]\n",
-      "VectorDB returns doc_ids:  [['bdfbc921']]\n",
-      "VectorDB returns doc_ids:  [['bdfbc921']]\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Number of requested results 180 is greater than number of elements in index 2, updating n_results = 2\n"
+      "Number of requested results 60 is greater than number of elements in index 2, updating n_results = 2\n"
      ]
     },
     {
@@ -470,18 +448,13 @@
       "\n",
       "TERMINATE\n",
       "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33mragproxyagent\u001b[0m (to assistant):\n",
+      "\n",
+      "TERMINATE\n",
+      "\n",
       "--------------------------------------------------------------------------------\n"
      ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "ChatResult(chat_id=None, chat_history=[{'content': 'TERMINATE', 'role': 'assistant'}], summary='', cost=({'total_cost': 0.007691, 'gpt-35-turbo': {'cost': 0.007691, 'prompt_tokens': 4242, 'completion_tokens': 664, 'total_tokens': 4906}}, {'total_cost': 0}), human_input=[])"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
     }
    ],
    "source": [
@@ -2836,7 +2809,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.12.4"
   },
   "skip_test": "Requires interactive usage"
  },