diff --git a/notebook/agentchat_groupchat_stateflow.ipynb b/notebook/agentchat_groupchat_stateflow.ipynb
index 461687b9070d..6205e1147ee4 100644
--- a/notebook/agentchat_groupchat_stateflow.ipynb
+++ b/notebook/agentchat_groupchat_stateflow.ipynb
@@ -87,19 +87,20 @@
    "cell_type": "code",
    "execution_count": 2,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/ykw5399/maintain/autogen/autogen/agentchat/user_proxy_agent.py:83: UserWarning: Using None to signal a default code_execution_config is deprecated. Use {} to use default or False to disable code execution.\n",
-      "  super().__init__(\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
+    "import tempfile\n",
+    "\n",
+    "from autogen.coding import LocalCommandLineCodeExecutor\n",
+    "\n",
+    "temp_dir = tempfile.TemporaryDirectory()\n",
+    "executor = LocalCommandLineCodeExecutor(\n",
+    "    timeout=10,  # Timeout for each code execution in seconds.\n",
+    "    work_dir=temp_dir.name,  # Use the temporary directory to store the code files.\n",
+    ")\n",
+    "\n",
     "gpt4_config = {\n",
-    "    \"cache_seed\": 42,  # change the cache_seed for different trials\n",
+    "    \"cache_seed\": False,  # change the cache_seed for different trials\n",
     "    \"temperature\": 0,\n",
     "    \"config_list\": config_list,\n",
     "    \"timeout\": 120,\n",
@@ -107,8 +108,11 @@
     "\n",
     "initializer = autogen.UserProxyAgent(\n",
     "    name=\"Init\",\n",
+    "    code_execution_config=False,\n",
     ")\n",
     "\n",
+    "\n",
+    "\n",
     "coder = autogen.AssistantAgent(\n",
     "    name=\"Retrieve_Action_1\",\n",
     "    llm_config=gpt4_config,\n",
@@ -122,11 +126,7 @@
     "    name=\"Retrieve_Action_2\",\n",
     "    system_message=\"Executor. Execute the code written by the Coder and report the result.\",\n",
     "    human_input_mode=\"NEVER\",\n",
-    "    code_execution_config={\n",
-    "        \"last_n_messages\": 3,\n",
-    "        \"work_dir\": \"paper\",\n",
-    "        \"use_docker\": False,\n",
-    "    },  # Please set use_docker=True if docker is available to run the generated code. Using docker is safer than running the generated code directly.\n",
+    "    code_execution_config={\"executor\": executor},\n",
     ")\n",
     "scientist = autogen.AssistantAgent(\n",
     "    name=\"Research_Action_1\",\n",
@@ -189,19 +189,20 @@
       "import feedparser\n",
       "\n",
       "# Define the base URL for the arXiv API\n",
-      "ARXIV_API_URL = 'http://export.arxiv.org/api/query?'\n",
+      "ARXIV_API_URL = \"http://export.arxiv.org/api/query?\"\n",
       "\n",
       "# Define the search parameters\n",
-      "search_query = 'all:\"LLM applications\"'\n",
-      "start_date = (datetime.now() - timedelta(days=7)).strftime('%Y%m%d%H%M%S')\n",
-      "end_date = datetime.now().strftime('%Y%m%d%H%M%S')\n",
+      "search_query = \"all:\\\"LLM applications\\\"\"\n",
       "start = 0\n",
       "max_results = 10\n",
-      "sort_by = 'submittedDate'\n",
-      "sort_order = 'descending'\n",
+      "sort_by = \"submittedDate\"\n",
+      "sort_order = \"descending\"\n",
+      "\n",
+      "# Calculate the date one week ago from today\n",
+      "one_week_ago = (datetime.now() - timedelta(days=7)).strftime('%Y-%m-%dT%H:%M:%SZ')\n",
       "\n",
       "# Construct the query\n",
-      "query = f'search_query={search_query}&sortBy={sort_by}&sortOrder={sort_order}&start={start}&max_results={max_results}'\n",
+      "query = f\"search_query={search_query}&start={start}&max_results={max_results}&sortBy={sort_by}&sortOrder={sort_order}&submittedDateRange={one_week_ago}-\"\n",
       "\n",
       "# Send the request to the arXiv API\n",
       "response = requests.get(ARXIV_API_URL + query)\n",
@@ -215,299 +216,298 @@
       "    print(\"Authors:\", ', '.join(author.name for author in entry.authors))\n",
       "    print(\"Abstract:\", entry.summary)\n",
       "    print(\"Link:\", entry.link)\n",
-      "    print(\"\\n\")\n",
-      "\n",
-      "# Check if we have at least 5 papers, if not, adjust the search or notify\n",
-      "if len(feed.entries) < 5:\n",
-      "    print(\"Less than 5 papers found. Consider adjusting the search parameters or timeframe.\")\n",
+      "    print(\"\\n---\\n\")\n",
       "```\n",
       "\n",
-      "This script will print the title, authors, abstract, and link for each paper related to \"LLM applications\" from the last week, up to a maximum of 10 papers. If fewer than 5 papers are found, it will notify the user to consider adjusting the search parameters or timeframe.\n",
+      "This script will print the title, authors, abstract, and link for each paper related to \"LLM applications\" that was submitted in the last week, up to a maximum of 10 papers. If you want to ensure that the papers are from different domains, you might need to manually check the categories of the papers or refine the search query to target specific domains.\n",
       "\n",
       "--------------------------------------------------------------------------------\n",
       "\u001b[31m\n",
-      ">>>>>>>> EXECUTING CODE BLOCK 0 (inferred language is python)...\u001b[0m\n",
+      ">>>>>>>> EXECUTING CODE BLOCK (inferred language is python)...\u001b[0m\n",
       "\u001b[33mRetrieve_Action_2\u001b[0m (to chat_manager):\n",
       "\n",
       "exitcode: 0 (execution succeeded)\n",
-      "Code output: \n",
-      "Title: PRSA: Prompt Reverse Stealing Attacks against Large Language Models\n",
-      "Authors: Yong Yang, Xuhong Zhang, Yi Jiang, Xi Chen, Haoyu Wang, Shouling Ji, Zonghui Wang\n",
-      "Abstract: Prompt, recognized as crucial intellectual property, enables large language\n",
-      "models (LLMs) to perform specific tasks without the need of fine-tuning,\n",
-      "underscoring their escalating importance. With the rise of prompt-based\n",
-      "services, such as prompt marketplaces and LLM applications, providers often\n",
-      "display prompts' capabilities through input-output examples to attract users.\n",
-      "However, this paradigm raises a pivotal security concern: does the exposure of\n",
-      "input-output pairs pose the risk of potential prompt leakage, infringing on the\n",
-      "intellectual property rights of the developers? To our knowledge, this problem\n",
-      "still has not been comprehensively explored yet. To remedy this gap, in this\n",
-      "paper, we perform the first in depth exploration and propose a novel attack\n",
-      "framework for reverse-stealing prompts against commercial LLMs, namely PRSA.\n",
-      "The main idea of PRSA is that by analyzing the critical features of the\n",
-      "input-output pairs, we mimic and gradually infer (steal) the target prompts. In\n",
-      "detail, PRSA mainly consists of two key phases: prompt mutation and prompt\n",
-      "pruning. In the mutation phase, we propose a prompt attention algorithm based\n",
-      "on differential feedback to capture these critical features for effectively\n",
-      "inferring the target prompts. In the prompt pruning phase, we identify and mask\n",
-      "the words dependent on specific inputs, enabling the prompts to accommodate\n",
-      "diverse inputs for generalization. Through extensive evaluation, we verify that\n",
-      "PRSA poses a severe threat in real world scenarios. We have reported these\n",
-      "findings to prompt service providers and actively collaborate with them to take\n",
-      "protective measures for prompt copyright.\n",
-      "Link: http://arxiv.org/abs/2402.19200v1\n",
-      "\n",
-      "\n",
-      "Title: Political Compass or Spinning Arrow? Towards More Meaningful Evaluations\n",
-      "  for Values and Opinions in Large Language Models\n",
-      "Authors: Paul Röttger, Valentin Hofmann, Valentina Pyatkin, Musashi Hinck, Hannah Rose Kirk, Hinrich Schütze, Dirk Hovy\n",
-      "Abstract: Much recent work seeks to evaluate values and opinions in large language\n",
-      "models (LLMs) using multiple-choice surveys and questionnaires. Most of this\n",
-      "work is motivated by concerns around real-world LLM applications. For example,\n",
-      "politically-biased LLMs may subtly influence society when they are used by\n",
-      "millions of people. Such real-world concerns, however, stand in stark contrast\n",
-      "to the artificiality of current evaluations: real users do not typically ask\n",
-      "LLMs survey questions. Motivated by this discrepancy, we challenge the\n",
-      "prevailing constrained evaluation paradigm for values and opinions in LLMs and\n",
-      "explore more realistic unconstrained evaluations. As a case study, we focus on\n",
-      "the popular Political Compass Test (PCT). In a systematic review, we find that\n",
-      "most prior work using the PCT forces models to comply with the PCT's\n",
-      "multiple-choice format. We show that models give substantively different\n",
-      "answers when not forced; that answers change depending on how models are\n",
-      "forced; and that answers lack paraphrase robustness. Then, we demonstrate that\n",
-      "models give different answers yet again in a more realistic open-ended answer\n",
-      "setting. We distill these findings into recommendations and open challenges in\n",
-      "evaluating values and opinions in LLMs.\n",
-      "Link: http://arxiv.org/abs/2402.16786v1\n",
-      "\n",
-      "\n",
-      "Title: Large Language Models as Urban Residents: An LLM Agent Framework for\n",
-      "  Personal Mobility Generation\n",
-      "Authors: Jiawei Wang, Renhe Jiang, Chuang Yang, Zengqing Wu, Makoto Onizuka, Ryosuke Shibasaki, Chuan Xiao\n",
-      "Abstract: This paper introduces a novel approach using Large Language Models (LLMs)\n",
-      "integrated into an agent framework for flexible and efficient personal mobility\n",
-      "generation. LLMs overcome the limitations of previous models by efficiently\n",
-      "processing semantic data and offering versatility in modeling various tasks.\n",
-      "Our approach addresses the critical need to align LLMs with real-world urban\n",
-      "mobility data, focusing on three research questions: aligning LLMs with rich\n",
-      "activity data, developing reliable activity generation strategies, and\n",
-      "exploring LLM applications in urban mobility. The key technical contribution is\n",
-      "a novel LLM agent framework that accounts for individual activity patterns and\n",
-      "motivations, including a self-consistency approach to align LLMs with\n",
-      "real-world activity data and a retrieval-augmented strategy for interpretable\n",
-      "activity generation. In experimental studies, comprehensive validation is\n",
-      "performed using real-world data. This research marks the pioneering work of\n",
-      "designing an LLM agent framework for activity generation based on real-world\n",
-      "human activity data, offering a promising tool for urban mobility analysis.\n",
-      "Link: http://arxiv.org/abs/2402.14744v1\n",
-      "\n",
-      "\n",
-      "Title: An Evaluation of Large Language Models in Bioinformatics Research\n",
-      "Authors: Hengchuang Yin, Zhonghui Gu, Fanhao Wang, Yiparemu Abuduhaibaier, Yanqiao Zhu, Xinming Tu, Xian-Sheng Hua, Xiao Luo, Yizhou Sun\n",
-      "Abstract: Large language models (LLMs) such as ChatGPT have gained considerable\n",
-      "interest across diverse research communities. Their notable ability for text\n",
-      "completion and generation has inaugurated a novel paradigm for\n",
-      "language-interfaced problem solving. However, the potential and efficacy of\n",
-      "these models in bioinformatics remain incompletely explored. In this work, we\n",
-      "study the performance LLMs on a wide spectrum of crucial bioinformatics tasks.\n",
-      "These tasks include the identification of potential coding regions, extraction\n",
-      "of named entities for genes and proteins, detection of antimicrobial and\n",
-      "anti-cancer peptides, molecular optimization, and resolution of educational\n",
-      "bioinformatics problems. Our findings indicate that, given appropriate prompts,\n",
-      "LLMs like GPT variants can successfully handle most of these tasks. In\n",
-      "addition, we provide a thorough analysis of their limitations in the context of\n",
-      "complicated bioinformatics tasks. In conclusion, we believe that this work can\n",
-      "provide new perspectives and motivate future research in the field of LLMs\n",
-      "applications, AI for Science and bioinformatics.\n",
-      "Link: http://arxiv.org/abs/2402.13714v1\n",
-      "\n",
-      "\n",
-      "Title: Privacy-Preserving Instructions for Aligning Large Language Models\n",
-      "Authors: Da Yu, Peter Kairouz, Sewoong Oh, Zheng Xu\n",
-      "Abstract: Service providers of large language model (LLM) applications collect user\n",
-      "instructions in the wild and use them in further aligning LLMs with users'\n",
-      "intentions. These instructions, which potentially contain sensitive\n",
-      "information, are annotated by human workers in the process. This poses a new\n",
-      "privacy risk not addressed by the typical private optimization. To this end, we\n",
-      "propose using synthetic instructions to replace real instructions in data\n",
-      "annotation and model fine-tuning. Formal differential privacy is guaranteed by\n",
-      "generating those synthetic instructions using privately fine-tuned generators.\n",
-      "Crucial in achieving the desired utility is our novel filtering algorithm that\n",
-      "matches the distribution of the synthetic instructions to that of the real\n",
-      "ones. In both supervised fine-tuning and reinforcement learning from human\n",
-      "feedback, our extensive experiments demonstrate the high utility of the final\n",
-      "set of synthetic instructions by showing comparable results to real\n",
-      "instructions. In supervised fine-tuning, models trained with private synthetic\n",
-      "instructions outperform leading open-source models such as Vicuna.\n",
-      "Link: http://arxiv.org/abs/2402.13659v1\n",
-      "\n",
-      "\n",
-      "Title: Ain't Misbehavin' -- Using LLMs to Generate Expressive Robot Behavior in\n",
-      "  Conversations with the Tabletop Robot Haru\n",
-      "Authors: Zining Wang, Paul Reisert, Eric Nichols, Randy Gomez\n",
-      "Abstract: Social robots aim to establish long-term bonds with humans through engaging\n",
-      "conversation. However, traditional conversational approaches, reliant on\n",
-      "scripted interactions, often fall short in maintaining engaging conversations.\n",
-      "This paper addresses this limitation by integrating large language models\n",
-      "(LLMs) into social robots to achieve more dynamic and expressive conversations.\n",
-      "We introduce a fully-automated conversation system that leverages LLMs to\n",
-      "generate robot responses with expressive behaviors, congruent with the robot's\n",
-      "personality. We incorporate robot behavior with two modalities: 1) a\n",
-      "text-to-speech (TTS) engine capable of various delivery styles, and 2) a\n",
-      "library of physical actions for the robot. We develop a custom,\n",
-      "state-of-the-art emotion recognition model to dynamically select the robot's\n",
-      "tone of voice and utilize emojis from LLM output as cues for generating robot\n",
-      "actions. A demo of our system is available here. To illuminate design and\n",
-      "implementation issues, we conduct a pilot study where volunteers chat with a\n",
-      "social robot using our proposed system, and we analyze their feedback,\n",
-      "conducting a rigorous error analysis of chat transcripts. Feedback was\n",
-      "overwhelmingly positive, with participants commenting on the robot's empathy,\n",
-      "helpfulness, naturalness, and entertainment. Most negative feedback was due to\n",
-      "automatic speech recognition (ASR) errors which had limited impact on\n",
-      "conversations. However, we observed a small class of errors, such as the LLM\n",
-      "repeating itself or hallucinating fictitious information and human responses,\n",
-      "that have the potential to derail conversations, raising important issues for\n",
-      "LLM application.\n",
-      "Link: http://arxiv.org/abs/2402.11571v1\n",
-      "\n",
-      "\n",
-      "Title: Fine-tuning Large Language Model (LLM) Artificial Intelligence Chatbots\n",
-      "  in Ophthalmology and LLM-based evaluation using GPT-4\n",
-      "Authors: Ting Fang Tan, Kabilan Elangovan, Liyuan Jin, Yao Jie, Li Yong, Joshua Lim, Stanley Poh, Wei Yan Ng, Daniel Lim, Yuhe Ke, Nan Liu, Daniel Shu Wei Ting\n",
-      "Abstract: Purpose: To assess the alignment of GPT-4-based evaluation to human clinician\n",
-      "experts, for the evaluation of responses to ophthalmology-related patient\n",
-      "queries generated by fine-tuned LLM chatbots. Methods: 400 ophthalmology\n",
-      "questions and paired answers were created by ophthalmologists to represent\n",
-      "commonly asked patient questions, divided into fine-tuning (368; 92%), and\n",
-      "testing (40; 8%). We find-tuned 5 different LLMs, including LLAMA2-7b,\n",
-      "LLAMA2-7b-Chat, LLAMA2-13b, and LLAMA2-13b-Chat. For the testing dataset,\n",
-      "additional 8 glaucoma QnA pairs were included. 200 responses to the testing\n",
-      "dataset were generated by 5 fine-tuned LLMs for evaluation. A customized\n",
-      "clinical evaluation rubric was used to guide GPT-4 evaluation, grounded on\n",
-      "clinical accuracy, relevance, patient safety, and ease of understanding. GPT-4\n",
-      "evaluation was then compared against ranking by 5 clinicians for clinical\n",
-      "alignment. Results: Among all fine-tuned LLMs, GPT-3.5 scored the highest\n",
-      "(87.1%), followed by LLAMA2-13b (80.9%), LLAMA2-13b-chat (75.5%),\n",
-      "LLAMA2-7b-Chat (70%) and LLAMA2-7b (68.8%) based on the GPT-4 evaluation. GPT-4\n",
-      "evaluation demonstrated significant agreement with human clinician rankings,\n",
-      "with Spearman and Kendall Tau correlation coefficients of 0.90 and 0.80\n",
-      "respectively; while correlation based on Cohen Kappa was more modest at 0.50.\n",
-      "Notably, qualitative analysis and the glaucoma sub-analysis revealed clinical\n",
-      "inaccuracies in the LLM-generated responses, which were appropriately\n",
-      "identified by the GPT-4 evaluation. Conclusion: The notable clinical alignment\n",
-      "of GPT-4 evaluation highlighted its potential to streamline the clinical\n",
-      "evaluation of LLM chatbot responses to healthcare-related queries. By\n",
-      "complementing the existing clinician-dependent manual grading, this efficient\n",
-      "and automated evaluation could assist the validation of future developments in\n",
-      "LLM applications for healthcare.\n",
-      "Link: http://arxiv.org/abs/2402.10083v1\n",
-      "\n",
-      "\n",
-      "Title: Unmemorization in Large Language Models via Self-Distillation and\n",
-      "  Deliberate Imagination\n",
-      "Authors: Yijiang River Dong, Hongzhou Lin, Mikhail Belkin, Ramon Huerta, Ivan Vulić\n",
-      "Abstract: While displaying impressive generation capabilities across many tasks, Large\n",
-      "Language Models (LLMs) still struggle with crucial issues of privacy violation\n",
-      "and unwanted exposure of sensitive data. This raises an essential question: how\n",
-      "should we prevent such undesired behavior of LLMs while maintaining their\n",
-      "strong generation and natural language understanding (NLU) capabilities? In\n",
-      "this work, we introduce a novel approach termed deliberate imagination in the\n",
-      "context of LLM unlearning. Instead of trying to forget memorized data, we\n",
-      "employ a self-distillation framework, guiding LLMs to deliberately imagine\n",
-      "alternative scenarios. As demonstrated in a wide range of experiments, the\n",
-      "proposed method not only effectively unlearns targeted text but also preserves\n",
-      "the LLMs' capabilities in open-ended generation tasks as well as in NLU tasks.\n",
-      "Our results demonstrate the usefulness of this approach across different models\n",
-      "and sizes, and also with parameter-efficient fine-tuning, offering a novel\n",
-      "pathway to addressing the challenges with private and sensitive data in LLM\n",
-      "applications.\n",
-      "Link: http://arxiv.org/abs/2402.10052v1\n",
-      "\n",
-      "\n",
-      "Title: Anchor-based Large Language Models\n",
-      "Authors: Jianhui Pang, Fanghua Ye, Derek F. Wong, Longyue Wang\n",
-      "Abstract: Large language models (LLMs) predominantly employ decoder-only transformer\n",
-      "architectures, necessitating the retention of keys/values information for\n",
-      "historical tokens to provide contextual information and avoid redundant\n",
-      "computation. However, the substantial size and parameter volume of these LLMs\n",
-      "require massive GPU memory. This memory demand increases with the length of the\n",
-      "input text, leading to an urgent need for more efficient methods of information\n",
-      "storage and processing. This study introduces Anchor-based LLMs (AnLLMs), which\n",
-      "utilize an innovative anchor-based self-attention network (AnSAN) and also an\n",
-      "anchor-based inference strategy. This approach enables LLMs to compress\n",
-      "sequence information into an anchor token, reducing the keys/values cache and\n",
-      "enhancing inference efficiency. Experiments on question-answering benchmarks\n",
-      "reveal that AnLLMs maintain similar accuracy levels while achieving up to 99%\n",
-      "keys/values cache reduction and up to 3.5 times faster inference. Despite a\n",
-      "minor compromise in accuracy, the substantial enhancements of AnLLMs employing\n",
-      "the AnSAN technique in resource utilization and computational efficiency\n",
-      "underscore their potential for practical LLM applications.\n",
-      "Link: http://arxiv.org/abs/2402.07616v2\n",
-      "\n",
-      "\n",
-      "Title: T-RAG: Lessons from the LLM Trenches\n",
-      "Authors: Masoomali Fatehkia, Ji Kim Lucas, Sanjay Chawla\n",
-      "Abstract: Large Language Models (LLM) have shown remarkable language capabilities\n",
-      "fueling attempts to integrate them into applications across a wide range of\n",
-      "domains. An important application area is question answering over private\n",
-      "enterprise documents where the main considerations are data security, which\n",
-      "necessitates applications that can be deployed on-prem, limited computational\n",
-      "resources and the need for a robust application that correctly responds to\n",
-      "queries. Retrieval-Augmented Generation (RAG) has emerged as the most prominent\n",
-      "framework for building LLM-based applications. While building a RAG is\n",
-      "relatively straightforward, making it robust and a reliable application\n",
-      "requires extensive customization and relatively deep knowledge of the\n",
-      "application domain. We share our experiences building and deploying an LLM\n",
-      "application for question answering over private organizational documents. Our\n",
-      "application combines the use of RAG with a finetuned open-source LLM.\n",
-      "Additionally, our system, which we call Tree-RAG (T-RAG), uses a tree structure\n",
-      "to represent entity hierarchies within the organization. This is used to\n",
-      "generate a textual description to augment the context when responding to user\n",
-      "queries pertaining to entities within the organization's hierarchy. Our\n",
-      "evaluations show that this combination performs better than a simple RAG or\n",
-      "finetuning implementation. Finally, we share some lessons learned based on our\n",
-      "experiences building an LLM application for real-world use.\n",
-      "Link: http://arxiv.org/abs/2402.07483v1\n",
-      "\n",
+      "Code output: Title: Adapting LLMs for Efficient Context Processing through Soft Prompt\n",
+      "  Compression\n",
+      "Authors: Cangqing Wang, Yutian Yang, Ruisi Li, Dan Sun, Ruicong Cai, Yuzhu Zhang, Chengqian Fu, Lillian Floyd\n",
+      "Abstract: The rapid advancement of Large Language Models (LLMs) has inaugurated a\n",
+      "transformative epoch in natural language processing, fostering unprecedented\n",
+      "proficiency in text generation, comprehension, and contextual scrutiny.\n",
+      "Nevertheless, effectively handling extensive contexts, crucial for myriad\n",
+      "applications, poses a formidable obstacle owing to the intrinsic constraints of\n",
+      "the models' context window sizes and the computational burdens entailed by\n",
+      "their operations. This investigation presents an innovative framework that\n",
+      "strategically tailors LLMs for streamlined context processing by harnessing the\n",
+      "synergies among natural language summarization, soft prompt compression, and\n",
+      "augmented utility preservation mechanisms. Our methodology, dubbed\n",
+      "SoftPromptComp, amalgamates natural language prompts extracted from\n",
+      "summarization methodologies with dynamically generated soft prompts to forge a\n",
+      "concise yet semantically robust depiction of protracted contexts. This\n",
+      "depiction undergoes further refinement via a weighting mechanism optimizing\n",
+      "information retention and utility for subsequent tasks. We substantiate that\n",
+      "our framework markedly diminishes computational overhead and enhances LLMs'\n",
+      "efficacy across various benchmarks, while upholding or even augmenting the\n",
+      "caliber of the produced content. By amalgamating soft prompt compression with\n",
+      "sophisticated summarization, SoftPromptComp confronts the dual challenges of\n",
+      "managing lengthy contexts and ensuring model scalability. Our findings point\n",
+      "towards a propitious trajectory for augmenting LLMs' applicability and\n",
+      "efficiency, rendering them more versatile and pragmatic for real-world\n",
+      "applications. This research enriches the ongoing discourse on optimizing\n",
+      "language models, providing insights into the potency of soft prompts and\n",
+      "summarization techniques as pivotal instruments for the forthcoming generation\n",
+      "of NLP solutions.\n",
+      "Link: http://arxiv.org/abs/2404.04997v1\n",
+      "\n",
+      "---\n",
+      "\n",
+      "Title: Explainable Traffic Flow Prediction with Large Language Models\n",
+      "Authors: Xusen Guo, Qiming Zhang, Mingxing Peng, Meixin Zhu, Hao, Yang\n",
+      "Abstract: Traffic flow prediction is crucial for urban planning, transportation\n",
+      "management, and infrastructure development. However, achieving both accuracy\n",
+      "and interpretability in prediction models remains challenging due to the\n",
+      "complexity of traffic data and the inherent opacity of deep learning\n",
+      "methodologies. In this paper, we propose a novel approach, Traffic Flow\n",
+      "Prediction LLM (TF-LLM), which leverages large language models (LLMs) to\n",
+      "generate interpretable traffic flow predictions. By transferring multi-modal\n",
+      "traffic data into natural language descriptions, TF-LLM captures complex\n",
+      "spatial-temporal patterns and external factors such as weather conditions,\n",
+      "Points of Interest (PoIs), date, and holidays. We fine-tune the LLM framework\n",
+      "using language-based instructions to align with spatial-temporal traffic flow\n",
+      "data. Our comprehensive multi-modal traffic flow dataset (CATraffic) in\n",
+      "California enables the evaluation of TF-LLM against state-of-the-art deep\n",
+      "learning baselines. Results demonstrate TF-LLM's competitive accuracy while\n",
+      "providing intuitive and interpretable predictions. We discuss the\n",
+      "spatial-temporal and input dependencies for explainable future flow\n",
+      "forecasting, showcasing TF-LLM's potential for diverse city prediction tasks.\n",
+      "This paper contributes to advancing explainable traffic prediction models and\n",
+      "lays a foundation for future exploration of LLM applications in transportation.\n",
+      "Link: http://arxiv.org/abs/2404.02937v2\n",
+      "\n",
+      "---\n",
+      "\n",
+      "Title: Designing Child-Centric AI Learning Environments: Insights from\n",
+      "  LLM-Enhanced Creative Project-Based Learning\n",
+      "Authors: Siyu Zha, Yuehan Qiao, Qingyu Hu, Zhongsheng Li, Jiangtao Gong, Yingqing Xu\n",
+      "Abstract: Project-based learning (PBL) is an instructional method that is very helpful\n",
+      "in nurturing students' creativity, but it requires significant time and energy\n",
+      "from both students and teachers. Large language models (LLMs) have been proven\n",
+      "to assist in creative tasks, yet much controversy exists regarding their role\n",
+      "in fostering creativity. This paper explores the potential of LLMs in PBL\n",
+      "settings, with a special focus on fostering creativity. We began with an\n",
+      "exploratory study involving 12 middle school students and identified five\n",
+      "design considerations for LLM applications in PBL. Building on this, we\n",
+      "developed an LLM-empowered, 48-hour PBL program and conducted an instructional\n",
+      "experiment with 31 middle school students. Our results indicated that LLMs can\n",
+      "enhance every stage of PBL. Additionally, we also discovered ambivalent\n",
+      "perspectives among students and mentors toward LLM usage. Furthermore, we\n",
+      "explored the challenge and design implications of integrating LLMs into PBL and\n",
+      "reflected on the program. By bridging AI advancements into educational\n",
+      "practice, our work aims to inspire further discourse and investigation into\n",
+      "harnessing AI's potential in child-centric educational settings.\n",
+      "Link: http://arxiv.org/abs/2403.16159v2\n",
+      "\n",
+      "---\n",
+      "\n",
+      "Title: The opportunities and risks of large language models in mental health\n",
+      "Authors: Hannah R. Lawrence, Renee A. Schneider, Susan B. Rubin, Maja J. Mataric, Daniel J. McDuff, Megan Jones Bell\n",
+      "Abstract: Global rates of mental health concerns are rising and there is increasing\n",
+      "realization that existing models of mental healthcare will not adequately\n",
+      "expand to meet the demand. With the emergence of large language models (LLMs)\n",
+      "has come great optimism regarding their promise to create novel, large-scale\n",
+      "solutions to support mental health. Despite their nascence, LLMs have already\n",
+      "been applied to mental health-related tasks. In this review, we summarize the\n",
+      "extant literature on efforts to use LLMs to provide mental health education,\n",
+      "assessment, and intervention and highlight key opportunities for positive\n",
+      "impact in each area. We then highlight risks associated with LLMs application\n",
+      "to mental health and encourage adoption of strategies to mitigate these risks.\n",
+      "The urgent need for mental health support must be balanced with responsible\n",
+      "development, testing, and deployment of mental health LLMs. Especially critical\n",
+      "is ensuring that mental health LLMs are fine-tuned for mental health, enhance\n",
+      "mental health equity, adhere to ethical standards, and that people, including\n",
+      "those with lived experience with mental health concerns, are involved in all\n",
+      "stages from development through deployment. Prioritizing these efforts will\n",
+      "minimize potential harms to mental health and maximize the likelihood that LLMs\n",
+      "will positively impact mental health globally.\n",
+      "Link: http://arxiv.org/abs/2403.14814v2\n",
+      "\n",
+      "---\n",
+      "\n",
+      "Title: Large Language Models for Blockchain Security: A Systematic Literature\n",
+      "  Review\n",
+      "Authors: Zheyuan He, Zihao Li, Sen Yang\n",
+      "Abstract: Large Language Models (LLMs) have emerged as powerful tools in various\n",
+      "domains involving blockchain security (BS). Several recent studies are\n",
+      "exploring LLMs applied to BS. However, there remains a gap in our understanding\n",
+      "regarding the full scope of applications, impacts, and potential constraints of\n",
+      "LLMs on blockchain security. To fill this gap, we conduct a literature review\n",
+      "on LLM4BS.\n",
+      "  As the first review of LLM's application on blockchain security, our study\n",
+      "aims to comprehensively analyze existing research and elucidate how LLMs\n",
+      "contribute to enhancing the security of blockchain systems. Through a thorough\n",
+      "examination of scholarly works, we delve into the integration of LLMs into\n",
+      "various aspects of blockchain security. We explore the mechanisms through which\n",
+      "LLMs can bolster blockchain security, including their applications in smart\n",
+      "contract auditing, identity verification, anomaly detection, vulnerable repair,\n",
+      "and so on. Furthermore, we critically assess the challenges and limitations\n",
+      "associated with leveraging LLMs for blockchain security, considering factors\n",
+      "such as scalability, privacy concerns, and adversarial attacks. Our review\n",
+      "sheds light on the opportunities and potential risks inherent in this\n",
+      "convergence, providing valuable insights for researchers, practitioners, and\n",
+      "policymakers alike.\n",
+      "Link: http://arxiv.org/abs/2403.14280v2\n",
+      "\n",
+      "---\n",
+      "\n",
+      "Title: Do Large Language Model Understand Multi-Intent Spoken Language ?\n",
+      "Authors: Shangjian Yin, Peijie Huang, Yuhong Xu, Haojing Huang, Jiatian Chen\n",
+      "Abstract: This study marks a significant advancement by harnessing Large Language\n",
+      "Models (LLMs) for multi-intent spoken language understanding (SLU), proposing a\n",
+      "unique methodology that capitalizes on the generative power of LLMs within an\n",
+      "SLU context. Our innovative technique reconfigures entity slots specifically\n",
+      "for LLM application in multi-intent SLU environments and introduces the concept\n",
+      "of Sub-Intent Instruction (SII), enhancing the dissection and interpretation of\n",
+      "intricate, multi-intent communication within varied domains. The resultant\n",
+      "datasets, dubbed LM-MixATIS and LM-MixSNIPS, are crafted from pre-existing\n",
+      "benchmarks. Our research illustrates that LLMs can match and potentially excel\n",
+      "beyond the capabilities of current state-of-the-art multi-intent SLU models. It\n",
+      "further explores LLM efficacy across various intent configurations and dataset\n",
+      "proportions. Moreover, we introduce two pioneering metrics, Entity Slot\n",
+      "Accuracy (ESA) and Combined Semantic Accuracy (CSA), to provide an in-depth\n",
+      "analysis of LLM proficiency in this complex field.\n",
+      "Link: http://arxiv.org/abs/2403.04481v2\n",
+      "\n",
+      "---\n",
+      "\n",
+      "Title: Breaking the Language Barrier: Can Direct Inference Outperform\n",
+      "  Pre-Translation in Multilingual LLM Applications?\n",
+      "Authors: Yotam Intrator, Matan Halfon, Roman Goldenberg, Reut Tsarfaty, Matan Eyal, Ehud Rivlin, Yossi Matias, Natalia Aizenberg\n",
+      "Abstract: Large language models hold significant promise in multilingual applications.\n",
+      "However, inherent biases stemming from predominantly English-centric\n",
+      "pre-training have led to the widespread practice of pre-translation, i.e.,\n",
+      "translating non-English inputs to English before inference, leading to\n",
+      "complexity and information loss. This study re-evaluates the need for\n",
+      "pre-translation in the context of PaLM2 models (Anil et al., 2023), which have\n",
+      "been established as highly performant in multilingual tasks. We offer a\n",
+      "comprehensive investigation across 108 languages and 6 diverse benchmarks,\n",
+      "including open-end generative tasks, which were excluded from previous similar\n",
+      "studies. Our findings challenge the pre-translation paradigm established in\n",
+      "prior research, highlighting the advantages of direct inference in PaLM2.\n",
+      "Specifically, PaLM2-L consistently outperforms pre-translation in 94 out of 108\n",
+      "languages. These findings pave the way for more efficient and effective\n",
+      "multilingual applications, alleviating the limitations associated with\n",
+      "pre-translation and unlocking linguistic authenticity.\n",
+      "Link: http://arxiv.org/abs/2403.04792v1\n",
+      "\n",
+      "---\n",
+      "\n",
+      "Title: SciAssess: Benchmarking LLM Proficiency in Scientific Literature\n",
+      "  Analysis\n",
+      "Authors: Hengxing Cai, Xiaochen Cai, Junhan Chang, Sihang Li, Lin Yao, Changxin Wang, Zhifeng Gao, Hongshuai Wang, Yongge Li, Mujie Lin, Shuwen Yang, Jiankun Wang, Yuqi Yin, Yaqi Li, Linfeng Zhang, Guolin Ke\n",
+      "Abstract: Recent breakthroughs in Large Language Models (LLMs) have revolutionized\n",
+      "natural language understanding and generation, igniting a surge of interest in\n",
+      "leveraging these technologies in the field of scientific literature analysis.\n",
+      "Existing benchmarks, however, inadequately evaluate the proficiency of LLMs in\n",
+      "scientific literature analysis, especially in scenarios involving complex\n",
+      "comprehension and multimodal data. In response, we introduced SciAssess, a\n",
+      "benchmark tailored for the in-depth analysis of scientific literature, crafted\n",
+      "to provide a thorough assessment of LLMs' efficacy. SciAssess focuses on\n",
+      "evaluating LLMs' abilities in memorization, comprehension, and analysis within\n",
+      "the context of scientific literature analysis. It includes representative tasks\n",
+      "from diverse scientific fields, such as general chemistry, organic materials,\n",
+      "and alloy materials. And rigorous quality control measures ensure its\n",
+      "reliability in terms of correctness, anonymization, and copyright compliance.\n",
+      "SciAssess evaluates leading LLMs, including GPT-4, GPT-3.5, and Gemini,\n",
+      "identifying their strengths and aspects for improvement and supporting the\n",
+      "ongoing development of LLM applications in scientific literature analysis.\n",
+      "SciAssess and its resources are made available at https://sci-assess.github.io,\n",
+      "offering a valuable tool for advancing LLM capabilities in scientific\n",
+      "literature analysis.\n",
+      "Link: http://arxiv.org/abs/2403.01976v2\n",
+      "\n",
+      "---\n",
+      "\n",
+      "Title: Differentially Private Synthetic Data via Foundation Model APIs 2: Text\n",
+      "Authors: Chulin Xie, Zinan Lin, Arturs Backurs, Sivakanth Gopi, Da Yu, Huseyin A Inan, Harsha Nori, Haotian Jiang, Huishuai Zhang, Yin Tat Lee, Bo Li, Sergey Yekhanin\n",
+      "Abstract: Text data has become extremely valuable due to the emergence of machine\n",
+      "learning algorithms that learn from it. A lot of high-quality text data\n",
+      "generated in the real world is private and therefore cannot be shared or used\n",
+      "freely due to privacy concerns. Generating synthetic replicas of private text\n",
+      "data with a formal privacy guarantee, i.e., differential privacy (DP), offers a\n",
+      "promising and scalable solution. However, existing methods necessitate DP\n",
+      "finetuning of large language models (LLMs) on private data to generate DP\n",
+      "synthetic data. This approach is not viable for proprietary LLMs (e.g.,\n",
+      "GPT-3.5) and also demands considerable computational resources for open-source\n",
+      "LLMs. Lin et al. (2024) recently introduced the Private Evolution (PE)\n",
+      "algorithm to generate DP synthetic images with only API access to diffusion\n",
+      "models. In this work, we propose an augmented PE algorithm, named Aug-PE, that\n",
+      "applies to the complex setting of text. We use API access to an LLM and\n",
+      "generate DP synthetic text without any model training. We conduct comprehensive\n",
+      "experiments on three benchmark datasets. Our results demonstrate that Aug-PE\n",
+      "produces DP synthetic text that yields competitive utility with the SOTA DP\n",
+      "finetuning baselines. This underscores the feasibility of relying solely on API\n",
+      "access of LLMs to produce high-quality DP synthetic texts, thereby facilitating\n",
+      "more accessible routes to privacy-preserving LLM applications. Our code and\n",
+      "data are available at https://github.com/AI-secure/aug-pe.\n",
+      "Link: http://arxiv.org/abs/2403.01749v1\n",
+      "\n",
+      "---\n",
+      "\n",
+      "Title: SERVAL: Synergy Learning between Vertical Models and LLMs towards\n",
+      "  Oracle-Level Zero-shot Medical Prediction\n",
+      "Authors: Jiahuan Yan, Jintai Chen, Chaowen Hu, Bo Zheng, Yaojun Hu, Jimeng Sun, Jian Wu\n",
+      "Abstract: Recent development of large language models (LLMs) has exhibited impressive\n",
+      "zero-shot proficiency on generic and common sense questions. However, LLMs'\n",
+      "application on domain-specific vertical questions still lags behind, primarily\n",
+      "due to the humiliation problems and deficiencies in vertical knowledge.\n",
+      "Furthermore, the vertical data annotation process often requires\n",
+      "labor-intensive expert involvement, thereby presenting an additional challenge\n",
+      "in enhancing the model's vertical capabilities. In this paper, we propose\n",
+      "SERVAL, a synergy learning pipeline designed for unsupervised development of\n",
+      "vertical capabilities in both LLMs and small models by mutual enhancement.\n",
+      "Specifically, SERVAL utilizes the LLM's zero-shot outputs as annotations,\n",
+      "leveraging its confidence to teach a robust vertical model from scratch.\n",
+      "Reversely, the trained vertical model guides the LLM fine-tuning to enhance its\n",
+      "zero-shot capability, progressively improving both models through an iterative\n",
+      "process. In medical domain, known for complex vertical knowledge and costly\n",
+      "annotations, comprehensive experiments show that, without access to any gold\n",
+      "labels, SERVAL with the synergy learning of OpenAI GPT-3.5 and a simple model\n",
+      "attains fully-supervised competitive performance across ten widely used medical\n",
+      "datasets. These datasets represent vertically specialized medical diagnostic\n",
+      "scenarios (e.g., diabetes, heart diseases, COVID-19), highlighting the\n",
+      "potential of SERVAL in refining the vertical capabilities of LLMs and training\n",
+      "vertical models from scratch, all achieved without the need for annotations.\n",
+      "Link: http://arxiv.org/abs/2403.01570v2\n",
+      "\n",
+      "---\n",
       "\n",
       "\n",
       "\n",
       "--------------------------------------------------------------------------------\n",
       "\u001b[33mResearch_Action_1\u001b[0m (to chat_manager):\n",
       "\n",
-      "Based on the retrieved abstracts, here is a markdown table categorizing the papers by domain, along with their titles, authors, summaries, and links:\n",
+      "Based on the provided code output, here is a markdown table categorizing the papers by domain, along with their titles, authors, summaries, and links:\n",
       "\n",
       "| Domain | Title | Authors | Summary | Link |\n",
       "|--------|-------|---------|---------|------|\n",
-      "| Security | PRSA: Prompt Reverse Stealing Attacks against Large Language Models | Yong Yang, Xuhong Zhang, Yi Jiang, Xi Chen, Haoyu Wang, Shouling Ji, Zonghui Wang | The paper explores the security risks associated with exposing input-output pairs of prompts used in LLMs and proposes a novel attack framework, PRSA, to reverse-steal prompts, posing a threat to intellectual property rights. | [Link](http://arxiv.org/abs/2402.19200v1) |\n",
-      "| Ethics & Evaluation | Political Compass or Spinning Arrow? Towards More Meaningful Evaluations for Values and Opinions in Large Language Models | Paul Röttger, Valentin Hofmann, Valentina Pyatkin, Musashi Hinck, Hannah Rose Kirk, Hinrich Schütze, Dirk Hovy | This work challenges the constrained evaluation paradigm for values and opinions in LLMs and explores more realistic unconstrained evaluations, focusing on the Political Compass Test (PCT). | [Link](http://arxiv.org/abs/2402.16786v1) |\n",
-      "| Urban Mobility | Large Language Models as Urban Residents: An LLM Agent Framework for Personal Mobility Generation | Jiawei Wang, Renhe Jiang, Chuang Yang, Zengqing Wu, Makoto Onizuka, Ryosuke Shibasaki, Chuan Xiao | Introduces an LLM agent framework for personal mobility generation, aligning LLMs with real-world urban mobility data, and offering a tool for urban mobility analysis. | [Link](http://arxiv.org/abs/2402.14744v1) |\n",
-      "| Bioinformatics | An Evaluation of Large Language Models in Bioinformatics Research | Hengchuang Yin, Zhonghui Gu, Fanhao Wang, Yiparemu Abuduhaibaier, Yanqiao Zhu, Xinming Tu, Xian-Sheng Hua, Xiao Luo, Yizhou Sun | Evaluates the performance of LLMs on bioinformatics tasks, highlighting their potential and limitations, and motivating future research in LLM applications in bioinformatics. | [Link](http://arxiv.org/abs/2402.13714v1) |\n",
-      "| Privacy | Privacy-Preserving Instructions for Aligning Large Language Models | Da Yu, Peter Kairouz, Sewoong Oh, Zheng Xu | Proposes using synthetic instructions generated by privately fine-tuned generators to replace real instructions in data annotation and model fine-tuning, ensuring privacy while maintaining utility. | [Link](http://arxiv.org/abs/2402.13659v1) |\n",
-      "| Social Robotics | Ain't Misbehavin' -- Using LLMs to Generate Expressive Robot Behavior in Conversations with the Tabletop Robot Haru | Zining Wang, Paul Reisert, Eric Nichols, Randy Gomez | Integrates LLMs into social robots to generate dynamic and expressive conversations, using a text-to-speech engine and a library of physical actions for the robot. | [Link](http://arxiv.org/abs/2402.11571v1) |\n",
-      "| Ophthalmology | Fine-tuning Large Language Model (LLM) Artificial Intelligence Chatbots in Ophthalmology and LLM-based evaluation using GPT-4 | Ting Fang Tan, Kabilan Elangovan, Liyuan Jin, Yao Jie, Li Yong, Joshua Lim, Stanley Poh, Wei Yan Ng, Daniel Lim, Yuhe Ke, Nan Liu, Daniel Shu Wei Ting | Assesses the alignment of GPT-4-based evaluation to human clinician experts for evaluating responses to ophthalmology-related patient queries generated by fine-tuned LLM chatbots. | [Link](http://arxiv.org/abs/2402.10083v1) |\n",
-      "| Privacy & Data Security | Unmemorization in Large Language Models via Self-Distillation and Deliberate Imagination | Yijiang River Dong, Hongzhou Lin, Mikhail Belkin, Ramon Huerta, Ivan Vulić | Introduces a novel approach for LLM unlearning by guiding LLMs to imagine alternative scenarios, effectively unlearning targeted text while preserving generation and NLU capabilities. | [Link](http://arxiv.org/abs/2402.10052v1) |\n",
-      "| Computational Efficiency | Anchor-based Large Language Models | Jianhui Pang, Fanghua Ye, Derek F. Wong, Longyue Wang | Proposes Anchor-based LLMs (AnLLMs) with an innovative anchor-based self-attention network (AnSAN) to reduce memory demand and enhance inference efficiency. | [Link](http://arxiv.org/abs/2402.07616v2) |\n",
-      "| Enterprise Applications | T-RAG: Lessons from the LLM Trenches | Masoomali Fatehkia, Ji Kim Lucas, Sanjay Chawla | Shares experiences building and deploying an LLM application for question answering over private organizational documents, combining RAG with a finetuned LLM and a tree structure for entity hierarchies. | [Link](http://arxiv.org/abs/2402.07483v1) |\n",
-      "\n",
-      "These papers cover a range of domains including security, ethics, urban mobility, bioinformatics, privacy, social robotics, ophthalmology, data security, computational efficiency, and enterprise applications, showcasing the diverse applications of large language models.\n",
+      "| Natural Language Processing | Adapting LLMs for Efficient Context Processing through Soft Prompt Compression | Cangqing Wang, et al. | The paper presents a framework for efficient context processing in LLMs using natural language summarization and soft prompt compression. | [Link](http://arxiv.org/abs/2404.04997v1) |\n",
+      "| Transportation | Explainable Traffic Flow Prediction with Large Language Models | Xusen Guo, et al. | This paper introduces a novel approach for interpretable traffic flow predictions using LLMs, which captures complex spatial-temporal patterns. | [Link](http://arxiv.org/abs/2404.02937v2) |\n",
+      "| Education | Designing Child-Centric AI Learning Environments: Insights from LLM-Enhanced Creative Project-Based Learning | Siyu Zha, et al. | The study explores the potential of LLMs in enhancing project-based learning (PBL) and fostering creativity in educational settings. | [Link](http://arxiv.org/abs/2403.16159v2) |\n",
+      "| Mental Health | The opportunities and risks of large language models in mental health | Hannah R. Lawrence, et al. | This review summarizes the literature on LLMs in mental health education, assessment, and intervention, highlighting opportunities and risks. | [Link](http://arxiv.org/abs/2403.14814v2) |\n",
+      "| Blockchain Security | Large Language Models for Blockchain Security: A Systematic Literature Review | Zheyuan He, et al. | The paper reviews the application of LLMs in blockchain security, discussing their impact and potential limitations. | [Link](http://arxiv.org/abs/2403.14280v2) |\n",
+      "| Spoken Language Understanding | Do Large Language Model Understand Multi-Intent Spoken Language? | Shangjian Yin, et al. | The study investigates LLMs' capabilities in multi-intent spoken language understanding and proposes new methodologies and metrics. | [Link](http://arxiv.org/abs/2403.04481v2) |\n",
+      "| Multilingualism | Breaking the Language Barrier: Can Direct Inference Outperform Pre-Translation in Multilingual LLM Applications? | Yotam Intrator, et al. | The paper challenges the pre-translation paradigm in multilingual LLM applications, showing the advantages of direct inference. | [Link](http://arxiv.org/abs/2403.04792v1) |\n",
+      "| Scientific Literature | SciAssess: Benchmarking LLM Proficiency in Scientific Literature Analysis | Hengxing Cai, et al. | Introduces SciAssess, a benchmark for evaluating LLMs' abilities in scientific literature analysis across various scientific fields. | [Link](http://arxiv.org/abs/2403.01976v2) |\n",
+      "| Privacy & Security | Differentially Private Synthetic Data via Foundation Model APIs 2: Text | Chulin Xie, et al. | The paper proposes a method to generate differentially private synthetic text data using API access to LLMs without model training. | [Link](http://arxiv.org/abs/2403.01749v1) |\n",
+      "| Medical Diagnostics | SERVAL: Synergy Learning between Vertical Models and LLMs towards Oracle-Level Zero-shot Medical Prediction | Jiahuan Yan, et al. | SERVAL is a synergy learning pipeline that enhances the vertical capabilities of LLMs and trains vertical models without annotations in the medical domain. | [Link](http://arxiv.org/abs/2403.01570v2) |\n",
+      "\n",
+      "Please note that the domains have been inferred from the summaries and titles of the papers and may not perfectly reflect the authors' intended categorization.\n",
       "\n",
       "--------------------------------------------------------------------------------\n"
      ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "ChatResult(chat_id=None, chat_history=[{'content': 'Topic: LLM applications papers from last week. Requirement: 5 - 10 papers from different domains.', 'role': 'assistant'}, {'content': 'To retrieve related papers from the arXiv API, we can use Python with the `requests` library to send a query to the API and parse the response. Below is a Python script that searches for papers related to \"LLM applications\" (Large Language Models applications) from the last week, across different domains, and prints out the required information for 5 to 10 papers.\\n\\n```python\\nimport requests\\nfrom datetime import datetime, timedelta\\nimport feedparser\\n\\n# Define the base URL for the arXiv API\\nARXIV_API_URL = \\'http://export.arxiv.org/api/query?\\'\\n\\n# Define the search parameters\\nsearch_query = \\'all:\"LLM applications\"\\'\\nstart_date = (datetime.now() - timedelta(days=7)).strftime(\\'%Y%m%d%H%M%S\\')\\nend_date = datetime.now().strftime(\\'%Y%m%d%H%M%S\\')\\nstart = 0\\nmax_results = 10\\nsort_by = \\'submittedDate\\'\\nsort_order = \\'descending\\'\\n\\n# Construct the query\\nquery = f\\'search_query={search_query}&sortBy={sort_by}&sortOrder={sort_order}&start={start}&max_results={max_results}\\'\\n\\n# Send the request to the arXiv API\\nresponse = requests.get(ARXIV_API_URL + query)\\n\\n# Parse the response using feedparser\\nfeed = feedparser.parse(response.content)\\n\\n# Print the title, authors, abstract, and link of each paper\\nfor entry in feed.entries:\\n    print(\"Title:\", entry.title)\\n    print(\"Authors:\", \\', \\'.join(author.name for author in entry.authors))\\n    print(\"Abstract:\", entry.summary)\\n    print(\"Link:\", entry.link)\\n    print(\"\\\\n\")\\n\\n# Check if we have at least 5 papers, if not, adjust the search or notify\\nif len(feed.entries) < 5:\\n    print(\"Less than 5 papers found. Consider adjusting the search parameters or timeframe.\")\\n```\\n\\nThis script will print the title, authors, abstract, and link for each paper related to \"LLM applications\" from the last week, up to a maximum of 10 papers. If fewer than 5 papers are found, it will notify the user to consider adjusting the search parameters or timeframe.', 'name': 'Retrieve_Action_1', 'role': 'user'}, {'content': \"exitcode: 0 (execution succeeded)\\nCode output: \\nTitle: PRSA: Prompt Reverse Stealing Attacks against Large Language Models\\nAuthors: Yong Yang, Xuhong Zhang, Yi Jiang, Xi Chen, Haoyu Wang, Shouling Ji, Zonghui Wang\\nAbstract: Prompt, recognized as crucial intellectual property, enables large language\\nmodels (LLMs) to perform specific tasks without the need of fine-tuning,\\nunderscoring their escalating importance. With the rise of prompt-based\\nservices, such as prompt marketplaces and LLM applications, providers often\\ndisplay prompts' capabilities through input-output examples to attract users.\\nHowever, this paradigm raises a pivotal security concern: does the exposure of\\ninput-output pairs pose the risk of potential prompt leakage, infringing on the\\nintellectual property rights of the developers? To our knowledge, this problem\\nstill has not been comprehensively explored yet. To remedy this gap, in this\\npaper, we perform the first in depth exploration and propose a novel attack\\nframework for reverse-stealing prompts against commercial LLMs, namely PRSA.\\nThe main idea of PRSA is that by analyzing the critical features of the\\ninput-output pairs, we mimic and gradually infer (steal) the target prompts. In\\ndetail, PRSA mainly consists of two key phases: prompt mutation and prompt\\npruning. In the mutation phase, we propose a prompt attention algorithm based\\non differential feedback to capture these critical features for effectively\\ninferring the target prompts. In the prompt pruning phase, we identify and mask\\nthe words dependent on specific inputs, enabling the prompts to accommodate\\ndiverse inputs for generalization. Through extensive evaluation, we verify that\\nPRSA poses a severe threat in real world scenarios. We have reported these\\nfindings to prompt service providers and actively collaborate with them to take\\nprotective measures for prompt copyright.\\nLink: http://arxiv.org/abs/2402.19200v1\\n\\n\\nTitle: Political Compass or Spinning Arrow? Towards More Meaningful Evaluations\\n  for Values and Opinions in Large Language Models\\nAuthors: Paul Röttger, Valentin Hofmann, Valentina Pyatkin, Musashi Hinck, Hannah Rose Kirk, Hinrich Schütze, Dirk Hovy\\nAbstract: Much recent work seeks to evaluate values and opinions in large language\\nmodels (LLMs) using multiple-choice surveys and questionnaires. Most of this\\nwork is motivated by concerns around real-world LLM applications. For example,\\npolitically-biased LLMs may subtly influence society when they are used by\\nmillions of people. Such real-world concerns, however, stand in stark contrast\\nto the artificiality of current evaluations: real users do not typically ask\\nLLMs survey questions. Motivated by this discrepancy, we challenge the\\nprevailing constrained evaluation paradigm for values and opinions in LLMs and\\nexplore more realistic unconstrained evaluations. As a case study, we focus on\\nthe popular Political Compass Test (PCT). In a systematic review, we find that\\nmost prior work using the PCT forces models to comply with the PCT's\\nmultiple-choice format. We show that models give substantively different\\nanswers when not forced; that answers change depending on how models are\\nforced; and that answers lack paraphrase robustness. Then, we demonstrate that\\nmodels give different answers yet again in a more realistic open-ended answer\\nsetting. We distill these findings into recommendations and open challenges in\\nevaluating values and opinions in LLMs.\\nLink: http://arxiv.org/abs/2402.16786v1\\n\\n\\nTitle: Large Language Models as Urban Residents: An LLM Agent Framework for\\n  Personal Mobility Generation\\nAuthors: Jiawei Wang, Renhe Jiang, Chuang Yang, Zengqing Wu, Makoto Onizuka, Ryosuke Shibasaki, Chuan Xiao\\nAbstract: This paper introduces a novel approach using Large Language Models (LLMs)\\nintegrated into an agent framework for flexible and efficient personal mobility\\ngeneration. LLMs overcome the limitations of previous models by efficiently\\nprocessing semantic data and offering versatility in modeling various tasks.\\nOur approach addresses the critical need to align LLMs with real-world urban\\nmobility data, focusing on three research questions: aligning LLMs with rich\\nactivity data, developing reliable activity generation strategies, and\\nexploring LLM applications in urban mobility. The key technical contribution is\\na novel LLM agent framework that accounts for individual activity patterns and\\nmotivations, including a self-consistency approach to align LLMs with\\nreal-world activity data and a retrieval-augmented strategy for interpretable\\nactivity generation. In experimental studies, comprehensive validation is\\nperformed using real-world data. This research marks the pioneering work of\\ndesigning an LLM agent framework for activity generation based on real-world\\nhuman activity data, offering a promising tool for urban mobility analysis.\\nLink: http://arxiv.org/abs/2402.14744v1\\n\\n\\nTitle: An Evaluation of Large Language Models in Bioinformatics Research\\nAuthors: Hengchuang Yin, Zhonghui Gu, Fanhao Wang, Yiparemu Abuduhaibaier, Yanqiao Zhu, Xinming Tu, Xian-Sheng Hua, Xiao Luo, Yizhou Sun\\nAbstract: Large language models (LLMs) such as ChatGPT have gained considerable\\ninterest across diverse research communities. Their notable ability for text\\ncompletion and generation has inaugurated a novel paradigm for\\nlanguage-interfaced problem solving. However, the potential and efficacy of\\nthese models in bioinformatics remain incompletely explored. In this work, we\\nstudy the performance LLMs on a wide spectrum of crucial bioinformatics tasks.\\nThese tasks include the identification of potential coding regions, extraction\\nof named entities for genes and proteins, detection of antimicrobial and\\nanti-cancer peptides, molecular optimization, and resolution of educational\\nbioinformatics problems. Our findings indicate that, given appropriate prompts,\\nLLMs like GPT variants can successfully handle most of these tasks. In\\naddition, we provide a thorough analysis of their limitations in the context of\\ncomplicated bioinformatics tasks. In conclusion, we believe that this work can\\nprovide new perspectives and motivate future research in the field of LLMs\\napplications, AI for Science and bioinformatics.\\nLink: http://arxiv.org/abs/2402.13714v1\\n\\n\\nTitle: Privacy-Preserving Instructions for Aligning Large Language Models\\nAuthors: Da Yu, Peter Kairouz, Sewoong Oh, Zheng Xu\\nAbstract: Service providers of large language model (LLM) applications collect user\\ninstructions in the wild and use them in further aligning LLMs with users'\\nintentions. These instructions, which potentially contain sensitive\\ninformation, are annotated by human workers in the process. This poses a new\\nprivacy risk not addressed by the typical private optimization. To this end, we\\npropose using synthetic instructions to replace real instructions in data\\nannotation and model fine-tuning. Formal differential privacy is guaranteed by\\ngenerating those synthetic instructions using privately fine-tuned generators.\\nCrucial in achieving the desired utility is our novel filtering algorithm that\\nmatches the distribution of the synthetic instructions to that of the real\\nones. In both supervised fine-tuning and reinforcement learning from human\\nfeedback, our extensive experiments demonstrate the high utility of the final\\nset of synthetic instructions by showing comparable results to real\\ninstructions. In supervised fine-tuning, models trained with private synthetic\\ninstructions outperform leading open-source models such as Vicuna.\\nLink: http://arxiv.org/abs/2402.13659v1\\n\\n\\nTitle: Ain't Misbehavin' -- Using LLMs to Generate Expressive Robot Behavior in\\n  Conversations with the Tabletop Robot Haru\\nAuthors: Zining Wang, Paul Reisert, Eric Nichols, Randy Gomez\\nAbstract: Social robots aim to establish long-term bonds with humans through engaging\\nconversation. However, traditional conversational approaches, reliant on\\nscripted interactions, often fall short in maintaining engaging conversations.\\nThis paper addresses this limitation by integrating large language models\\n(LLMs) into social robots to achieve more dynamic and expressive conversations.\\nWe introduce a fully-automated conversation system that leverages LLMs to\\ngenerate robot responses with expressive behaviors, congruent with the robot's\\npersonality. We incorporate robot behavior with two modalities: 1) a\\ntext-to-speech (TTS) engine capable of various delivery styles, and 2) a\\nlibrary of physical actions for the robot. We develop a custom,\\nstate-of-the-art emotion recognition model to dynamically select the robot's\\ntone of voice and utilize emojis from LLM output as cues for generating robot\\nactions. A demo of our system is available here. To illuminate design and\\nimplementation issues, we conduct a pilot study where volunteers chat with a\\nsocial robot using our proposed system, and we analyze their feedback,\\nconducting a rigorous error analysis of chat transcripts. Feedback was\\noverwhelmingly positive, with participants commenting on the robot's empathy,\\nhelpfulness, naturalness, and entertainment. Most negative feedback was due to\\nautomatic speech recognition (ASR) errors which had limited impact on\\nconversations. However, we observed a small class of errors, such as the LLM\\nrepeating itself or hallucinating fictitious information and human responses,\\nthat have the potential to derail conversations, raising important issues for\\nLLM application.\\nLink: http://arxiv.org/abs/2402.11571v1\\n\\n\\nTitle: Fine-tuning Large Language Model (LLM) Artificial Intelligence Chatbots\\n  in Ophthalmology and LLM-based evaluation using GPT-4\\nAuthors: Ting Fang Tan, Kabilan Elangovan, Liyuan Jin, Yao Jie, Li Yong, Joshua Lim, Stanley Poh, Wei Yan Ng, Daniel Lim, Yuhe Ke, Nan Liu, Daniel Shu Wei Ting\\nAbstract: Purpose: To assess the alignment of GPT-4-based evaluation to human clinician\\nexperts, for the evaluation of responses to ophthalmology-related patient\\nqueries generated by fine-tuned LLM chatbots. Methods: 400 ophthalmology\\nquestions and paired answers were created by ophthalmologists to represent\\ncommonly asked patient questions, divided into fine-tuning (368; 92%), and\\ntesting (40; 8%). We find-tuned 5 different LLMs, including LLAMA2-7b,\\nLLAMA2-7b-Chat, LLAMA2-13b, and LLAMA2-13b-Chat. For the testing dataset,\\nadditional 8 glaucoma QnA pairs were included. 200 responses to the testing\\ndataset were generated by 5 fine-tuned LLMs for evaluation. A customized\\nclinical evaluation rubric was used to guide GPT-4 evaluation, grounded on\\nclinical accuracy, relevance, patient safety, and ease of understanding. GPT-4\\nevaluation was then compared against ranking by 5 clinicians for clinical\\nalignment. Results: Among all fine-tuned LLMs, GPT-3.5 scored the highest\\n(87.1%), followed by LLAMA2-13b (80.9%), LLAMA2-13b-chat (75.5%),\\nLLAMA2-7b-Chat (70%) and LLAMA2-7b (68.8%) based on the GPT-4 evaluation. GPT-4\\nevaluation demonstrated significant agreement with human clinician rankings,\\nwith Spearman and Kendall Tau correlation coefficients of 0.90 and 0.80\\nrespectively; while correlation based on Cohen Kappa was more modest at 0.50.\\nNotably, qualitative analysis and the glaucoma sub-analysis revealed clinical\\ninaccuracies in the LLM-generated responses, which were appropriately\\nidentified by the GPT-4 evaluation. Conclusion: The notable clinical alignment\\nof GPT-4 evaluation highlighted its potential to streamline the clinical\\nevaluation of LLM chatbot responses to healthcare-related queries. By\\ncomplementing the existing clinician-dependent manual grading, this efficient\\nand automated evaluation could assist the validation of future developments in\\nLLM applications for healthcare.\\nLink: http://arxiv.org/abs/2402.10083v1\\n\\n\\nTitle: Unmemorization in Large Language Models via Self-Distillation and\\n  Deliberate Imagination\\nAuthors: Yijiang River Dong, Hongzhou Lin, Mikhail Belkin, Ramon Huerta, Ivan Vulić\\nAbstract: While displaying impressive generation capabilities across many tasks, Large\\nLanguage Models (LLMs) still struggle with crucial issues of privacy violation\\nand unwanted exposure of sensitive data. This raises an essential question: how\\nshould we prevent such undesired behavior of LLMs while maintaining their\\nstrong generation and natural language understanding (NLU) capabilities? In\\nthis work, we introduce a novel approach termed deliberate imagination in the\\ncontext of LLM unlearning. Instead of trying to forget memorized data, we\\nemploy a self-distillation framework, guiding LLMs to deliberately imagine\\nalternative scenarios. As demonstrated in a wide range of experiments, the\\nproposed method not only effectively unlearns targeted text but also preserves\\nthe LLMs' capabilities in open-ended generation tasks as well as in NLU tasks.\\nOur results demonstrate the usefulness of this approach across different models\\nand sizes, and also with parameter-efficient fine-tuning, offering a novel\\npathway to addressing the challenges with private and sensitive data in LLM\\napplications.\\nLink: http://arxiv.org/abs/2402.10052v1\\n\\n\\nTitle: Anchor-based Large Language Models\\nAuthors: Jianhui Pang, Fanghua Ye, Derek F. Wong, Longyue Wang\\nAbstract: Large language models (LLMs) predominantly employ decoder-only transformer\\narchitectures, necessitating the retention of keys/values information for\\nhistorical tokens to provide contextual information and avoid redundant\\ncomputation. However, the substantial size and parameter volume of these LLMs\\nrequire massive GPU memory. This memory demand increases with the length of the\\ninput text, leading to an urgent need for more efficient methods of information\\nstorage and processing. This study introduces Anchor-based LLMs (AnLLMs), which\\nutilize an innovative anchor-based self-attention network (AnSAN) and also an\\nanchor-based inference strategy. This approach enables LLMs to compress\\nsequence information into an anchor token, reducing the keys/values cache and\\nenhancing inference efficiency. Experiments on question-answering benchmarks\\nreveal that AnLLMs maintain similar accuracy levels while achieving up to 99%\\nkeys/values cache reduction and up to 3.5 times faster inference. Despite a\\nminor compromise in accuracy, the substantial enhancements of AnLLMs employing\\nthe AnSAN technique in resource utilization and computational efficiency\\nunderscore their potential for practical LLM applications.\\nLink: http://arxiv.org/abs/2402.07616v2\\n\\n\\nTitle: T-RAG: Lessons from the LLM Trenches\\nAuthors: Masoomali Fatehkia, Ji Kim Lucas, Sanjay Chawla\\nAbstract: Large Language Models (LLM) have shown remarkable language capabilities\\nfueling attempts to integrate them into applications across a wide range of\\ndomains. An important application area is question answering over private\\nenterprise documents where the main considerations are data security, which\\nnecessitates applications that can be deployed on-prem, limited computational\\nresources and the need for a robust application that correctly responds to\\nqueries. Retrieval-Augmented Generation (RAG) has emerged as the most prominent\\nframework for building LLM-based applications. While building a RAG is\\nrelatively straightforward, making it robust and a reliable application\\nrequires extensive customization and relatively deep knowledge of the\\napplication domain. We share our experiences building and deploying an LLM\\napplication for question answering over private organizational documents. Our\\napplication combines the use of RAG with a finetuned open-source LLM.\\nAdditionally, our system, which we call Tree-RAG (T-RAG), uses a tree structure\\nto represent entity hierarchies within the organization. This is used to\\ngenerate a textual description to augment the context when responding to user\\nqueries pertaining to entities within the organization's hierarchy. Our\\nevaluations show that this combination performs better than a simple RAG or\\nfinetuning implementation. Finally, we share some lessons learned based on our\\nexperiences building an LLM application for real-world use.\\nLink: http://arxiv.org/abs/2402.07483v1\\n\\n\\n\", 'name': 'Retrieve_Action_2', 'role': 'user'}, {'content': \"Based on the retrieved abstracts, here is a markdown table categorizing the papers by domain, along with their titles, authors, summaries, and links:\\n\\n| Domain | Title | Authors | Summary | Link |\\n|--------|-------|---------|---------|------|\\n| Security | PRSA: Prompt Reverse Stealing Attacks against Large Language Models | Yong Yang, Xuhong Zhang, Yi Jiang, Xi Chen, Haoyu Wang, Shouling Ji, Zonghui Wang | The paper explores the security risks associated with exposing input-output pairs of prompts used in LLMs and proposes a novel attack framework, PRSA, to reverse-steal prompts, posing a threat to intellectual property rights. | [Link](http://arxiv.org/abs/2402.19200v1) |\\n| Ethics & Evaluation | Political Compass or Spinning Arrow? Towards More Meaningful Evaluations for Values and Opinions in Large Language Models | Paul Röttger, Valentin Hofmann, Valentina Pyatkin, Musashi Hinck, Hannah Rose Kirk, Hinrich Schütze, Dirk Hovy | This work challenges the constrained evaluation paradigm for values and opinions in LLMs and explores more realistic unconstrained evaluations, focusing on the Political Compass Test (PCT). | [Link](http://arxiv.org/abs/2402.16786v1) |\\n| Urban Mobility | Large Language Models as Urban Residents: An LLM Agent Framework for Personal Mobility Generation | Jiawei Wang, Renhe Jiang, Chuang Yang, Zengqing Wu, Makoto Onizuka, Ryosuke Shibasaki, Chuan Xiao | Introduces an LLM agent framework for personal mobility generation, aligning LLMs with real-world urban mobility data, and offering a tool for urban mobility analysis. | [Link](http://arxiv.org/abs/2402.14744v1) |\\n| Bioinformatics | An Evaluation of Large Language Models in Bioinformatics Research | Hengchuang Yin, Zhonghui Gu, Fanhao Wang, Yiparemu Abuduhaibaier, Yanqiao Zhu, Xinming Tu, Xian-Sheng Hua, Xiao Luo, Yizhou Sun | Evaluates the performance of LLMs on bioinformatics tasks, highlighting their potential and limitations, and motivating future research in LLM applications in bioinformatics. | [Link](http://arxiv.org/abs/2402.13714v1) |\\n| Privacy | Privacy-Preserving Instructions for Aligning Large Language Models | Da Yu, Peter Kairouz, Sewoong Oh, Zheng Xu | Proposes using synthetic instructions generated by privately fine-tuned generators to replace real instructions in data annotation and model fine-tuning, ensuring privacy while maintaining utility. | [Link](http://arxiv.org/abs/2402.13659v1) |\\n| Social Robotics | Ain't Misbehavin' -- Using LLMs to Generate Expressive Robot Behavior in Conversations with the Tabletop Robot Haru | Zining Wang, Paul Reisert, Eric Nichols, Randy Gomez | Integrates LLMs into social robots to generate dynamic and expressive conversations, using a text-to-speech engine and a library of physical actions for the robot. | [Link](http://arxiv.org/abs/2402.11571v1) |\\n| Ophthalmology | Fine-tuning Large Language Model (LLM) Artificial Intelligence Chatbots in Ophthalmology and LLM-based evaluation using GPT-4 | Ting Fang Tan, Kabilan Elangovan, Liyuan Jin, Yao Jie, Li Yong, Joshua Lim, Stanley Poh, Wei Yan Ng, Daniel Lim, Yuhe Ke, Nan Liu, Daniel Shu Wei Ting | Assesses the alignment of GPT-4-based evaluation to human clinician experts for evaluating responses to ophthalmology-related patient queries generated by fine-tuned LLM chatbots. | [Link](http://arxiv.org/abs/2402.10083v1) |\\n| Privacy & Data Security | Unmemorization in Large Language Models via Self-Distillation and Deliberate Imagination | Yijiang River Dong, Hongzhou Lin, Mikhail Belkin, Ramon Huerta, Ivan Vulić | Introduces a novel approach for LLM unlearning by guiding LLMs to imagine alternative scenarios, effectively unlearning targeted text while preserving generation and NLU capabilities. | [Link](http://arxiv.org/abs/2402.10052v1) |\\n| Computational Efficiency | Anchor-based Large Language Models | Jianhui Pang, Fanghua Ye, Derek F. Wong, Longyue Wang | Proposes Anchor-based LLMs (AnLLMs) with an innovative anchor-based self-attention network (AnSAN) to reduce memory demand and enhance inference efficiency. | [Link](http://arxiv.org/abs/2402.07616v2) |\\n| Enterprise Applications | T-RAG: Lessons from the LLM Trenches | Masoomali Fatehkia, Ji Kim Lucas, Sanjay Chawla | Shares experiences building and deploying an LLM application for question answering over private organizational documents, combining RAG with a finetuned LLM and a tree structure for entity hierarchies. | [Link](http://arxiv.org/abs/2402.07483v1) |\\n\\nThese papers cover a range of domains including security, ethics, urban mobility, bioinformatics, privacy, social robotics, ophthalmology, data security, computational efficiency, and enterprise applications, showcasing the diverse applications of large language models.\", 'name': 'Research_Action_1', 'role': 'user'}], summary=\"Based on the retrieved abstracts, here is a markdown table categorizing the papers by domain, along with their titles, authors, summaries, and links:\\n\\n| Domain | Title | Authors | Summary | Link |\\n|--------|-------|---------|---------|------|\\n| Security | PRSA: Prompt Reverse Stealing Attacks against Large Language Models | Yong Yang, Xuhong Zhang, Yi Jiang, Xi Chen, Haoyu Wang, Shouling Ji, Zonghui Wang | The paper explores the security risks associated with exposing input-output pairs of prompts used in LLMs and proposes a novel attack framework, PRSA, to reverse-steal prompts, posing a threat to intellectual property rights. | [Link](http://arxiv.org/abs/2402.19200v1) |\\n| Ethics & Evaluation | Political Compass or Spinning Arrow? Towards More Meaningful Evaluations for Values and Opinions in Large Language Models | Paul Röttger, Valentin Hofmann, Valentina Pyatkin, Musashi Hinck, Hannah Rose Kirk, Hinrich Schütze, Dirk Hovy | This work challenges the constrained evaluation paradigm for values and opinions in LLMs and explores more realistic unconstrained evaluations, focusing on the Political Compass Test (PCT). | [Link](http://arxiv.org/abs/2402.16786v1) |\\n| Urban Mobility | Large Language Models as Urban Residents: An LLM Agent Framework for Personal Mobility Generation | Jiawei Wang, Renhe Jiang, Chuang Yang, Zengqing Wu, Makoto Onizuka, Ryosuke Shibasaki, Chuan Xiao | Introduces an LLM agent framework for personal mobility generation, aligning LLMs with real-world urban mobility data, and offering a tool for urban mobility analysis. | [Link](http://arxiv.org/abs/2402.14744v1) |\\n| Bioinformatics | An Evaluation of Large Language Models in Bioinformatics Research | Hengchuang Yin, Zhonghui Gu, Fanhao Wang, Yiparemu Abuduhaibaier, Yanqiao Zhu, Xinming Tu, Xian-Sheng Hua, Xiao Luo, Yizhou Sun | Evaluates the performance of LLMs on bioinformatics tasks, highlighting their potential and limitations, and motivating future research in LLM applications in bioinformatics. | [Link](http://arxiv.org/abs/2402.13714v1) |\\n| Privacy | Privacy-Preserving Instructions for Aligning Large Language Models | Da Yu, Peter Kairouz, Sewoong Oh, Zheng Xu | Proposes using synthetic instructions generated by privately fine-tuned generators to replace real instructions in data annotation and model fine-tuning, ensuring privacy while maintaining utility. | [Link](http://arxiv.org/abs/2402.13659v1) |\\n| Social Robotics | Ain't Misbehavin' -- Using LLMs to Generate Expressive Robot Behavior in Conversations with the Tabletop Robot Haru | Zining Wang, Paul Reisert, Eric Nichols, Randy Gomez | Integrates LLMs into social robots to generate dynamic and expressive conversations, using a text-to-speech engine and a library of physical actions for the robot. | [Link](http://arxiv.org/abs/2402.11571v1) |\\n| Ophthalmology | Fine-tuning Large Language Model (LLM) Artificial Intelligence Chatbots in Ophthalmology and LLM-based evaluation using GPT-4 | Ting Fang Tan, Kabilan Elangovan, Liyuan Jin, Yao Jie, Li Yong, Joshua Lim, Stanley Poh, Wei Yan Ng, Daniel Lim, Yuhe Ke, Nan Liu, Daniel Shu Wei Ting | Assesses the alignment of GPT-4-based evaluation to human clinician experts for evaluating responses to ophthalmology-related patient queries generated by fine-tuned LLM chatbots. | [Link](http://arxiv.org/abs/2402.10083v1) |\\n| Privacy & Data Security | Unmemorization in Large Language Models via Self-Distillation and Deliberate Imagination | Yijiang River Dong, Hongzhou Lin, Mikhail Belkin, Ramon Huerta, Ivan Vulić | Introduces a novel approach for LLM unlearning by guiding LLMs to imagine alternative scenarios, effectively unlearning targeted text while preserving generation and NLU capabilities. | [Link](http://arxiv.org/abs/2402.10052v1) |\\n| Computational Efficiency | Anchor-based Large Language Models | Jianhui Pang, Fanghua Ye, Derek F. Wong, Longyue Wang | Proposes Anchor-based LLMs (AnLLMs) with an innovative anchor-based self-attention network (AnSAN) to reduce memory demand and enhance inference efficiency. | [Link](http://arxiv.org/abs/2402.07616v2) |\\n| Enterprise Applications | T-RAG: Lessons from the LLM Trenches | Masoomali Fatehkia, Ji Kim Lucas, Sanjay Chawla | Shares experiences building and deploying an LLM application for question answering over private organizational documents, combining RAG with a finetuned LLM and a tree structure for entity hierarchies. | [Link](http://arxiv.org/abs/2402.07483v1) |\\n\\nThese papers cover a range of domains including security, ethics, urban mobility, bioinformatics, privacy, social robotics, ophthalmology, data security, computational efficiency, and enterprise applications, showcasing the diverse applications of large language models.\", cost=({'total_cost': 0}, {'total_cost': 0}), human_input=[])"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
     }
    ],
    "source": [
-    "initializer.initiate_chat(\n",
+    "chat_result = initializer.initiate_chat(\n",
     "    manager, message=\"Topic: LLM applications papers from last week. Requirement: 5 - 10 papers from different domains.\"\n",
     ")"
    ]