137 arbitrage agent (#511)

* Added arbitrage script * First version of arbitrage agent * Added test for Arbitrage agent * Updated poetry.lock * Fixed mypy * Last fixes before review * Removed unnecessary tokens from prompt * Now handling correlation output as a bool instead of a float between [-1,1] * Making sure agent only runs for Omen * Clean up * Addressed PR comments * Fixing tests * Adding pinecone update when running Arbitrage agent * Betting in more markets * Update prediction_market_agent/agents/arbitrage_agent/data_models.py Co-authored-by: Peter Jung <[email protected]> * Update tests/agents/arbitrage_agent/test_arbitrage_agent.py Co-authored-by: Peter Jung <[email protected]> * Implemented Pinecone upgrades * Removed lower when fetching markets * Pinecone adjustments - now uses only open markets for correlation * Handling case corr < 0 * Added tests to data models * Final touches * Merged lock * Updated lock * Filtering only open markets on Arbitrage agent * Update prediction_market_agent/db/pinecone_handler.py Co-authored-by: Peter Jung <[email protected]> * Reverted some changes in the pinecone_handler.py * Fixed ci * Filtering omen markets earlier * Changes required due to new PMAT version * Fixing merge conflicts --------- Co-authored-by: Peter Jung <[email protected]>
gnosis · Oct 29, 2024 · 4b3d1c0 · 4b3d1c0
1 parent 7122b9c
commit 4b3d1c0
Show file tree

Hide file tree

Showing 17 changed files with 977 additions and 522 deletions.
diff --git a/poetry.lock b/poetry.lock
diff --git a/prediction_market_agent/agents/arbitrage_agent/data_models.py b/prediction_market_agent/agents/arbitrage_agent/data_models.py
@@ -0,0 +1,93 @@
+import typing as t
+
+from prediction_market_agent_tooling.markets.agent_market import AgentMarket
+from prediction_market_agent_tooling.tools.betting_strategies.utils import SimpleBet
+from prediction_market_agent_tooling.tools.utils import check_not_none
+from pydantic import BaseModel
+
+
+class Correlation(BaseModel):
+    near_perfect_correlation: bool | None
+    reasoning: str
+
+
+class ArbitrageBet(BaseModel):
+    main_market_bet: SimpleBet
+    related_market_bet: SimpleBet
+
+
+class CorrelatedMarketPair(BaseModel):
+    main_market: AgentMarket
+    related_market: AgentMarket
+    correlation: Correlation
+
+    def __str__(self) -> str:
+        return f"main_market {self.main_market.question} related_market_question {self.related_market.question} potential_profit_per_unit {self.potential_profit_per_bet_unit()}"
+
+    def potential_profit_per_bet_unit(self) -> float:
+        """
+        Calculate potential profit per bet unit based on high positive market correlation.
+        For positively correlated markets: Bet YES/NO or NO/YES.
+        """
+
+        if self.correlation.near_perfect_correlation is None:
+            return 0
+
+        bet_direction_main, bet_direction_related = self.bet_directions()
+        p_main = (
+            self.main_market.current_p_yes
+            if bet_direction_main
+            else self.main_market.current_p_no
+        )
+        p_related = (
+            self.related_market.current_p_yes
+            if bet_direction_related
+            else self.related_market.current_p_no
+        )
+        denominator = p_main + p_related
+        return (1 / denominator) - 1
+
+    def bet_directions(self) -> t.Tuple[bool, bool]:
+        correlation = check_not_none(self.correlation.near_perfect_correlation)
+        if correlation:
+            # We compare denominators for cases YES/NO and NO/YES bets and take the most profitable (i.e. where denominator is the lowest).
+            # For other cases we employ similar logic.
+            yes_no = self.main_market.current_p_yes + self.related_market.current_p_no
+            no_yes = self.main_market.current_p_no + self.related_market.current_p_yes
+            return (True, False) if yes_no <= no_yes else (False, True)
+
+        else:
+            yes_yes = self.main_market.current_p_yes + self.related_market.current_p_yes
+            no_no = self.main_market.current_p_no + self.related_market.current_p_no
+            return (True, True) if yes_yes <= no_no else (False, False)
+
+    def split_bet_amount_between_yes_and_no(
+        self, total_bet_amount: float
+    ) -> ArbitrageBet:
+        """Splits total bet amount following equations below:
+        A1/p1 = A2/p2 (same profit regardless of outcome resolution)
+        A1 + A2 = total bet amount
+        """
+
+        bet_direction_main, bet_direction_related = self.bet_directions()
+
+        p_main = (
+            self.main_market.current_p_yes
+            if bet_direction_main
+            else self.main_market.current_p_no
+        )
+        p_related = (
+            self.related_market.current_p_yes
+            if bet_direction_related
+            else self.related_market.current_p_no
+        )
+        total_probability = p_main + p_related
+        bet_main = total_bet_amount * p_main / total_probability
+        bet_related = total_bet_amount * p_related / total_probability
+        main_market_bet = SimpleBet(direction=bet_direction_main, size=bet_main)
+        related_market_bet = SimpleBet(
+            direction=bet_direction_related, size=bet_related
+        )
+        return ArbitrageBet(
+            main_market_bet=main_market_bet, related_market_bet=related_market_bet
+        )
diff --git a/prediction_market_agent/agents/arbitrage_agent/deploy.py b/prediction_market_agent/agents/arbitrage_agent/deploy.py
@@ -0,0 +1,190 @@
+import typing as t
+
+from langchain_core.output_parsers import PydanticOutputParser
+from langchain_core.prompts import PromptTemplate
+from langchain_core.runnables import RunnableSerializable
+from langchain_openai import ChatOpenAI
+from prediction_market_agent_tooling.deploy.agent import DeployableTraderAgent
+from prediction_market_agent_tooling.gtypes import Probability
+from prediction_market_agent_tooling.loggers import logger
+from prediction_market_agent_tooling.markets.agent_market import AgentMarket
+from prediction_market_agent_tooling.markets.data_models import (
+    BetAmount,
+    Position,
+    ProbabilisticAnswer,
+    TokenAmount,
+    Trade,
+    TradeType,
+)
+from prediction_market_agent_tooling.markets.markets import MarketType
+from prediction_market_agent_tooling.markets.omen.omen import OmenAgentMarket
+from prediction_market_agent_tooling.markets.omen.omen_subgraph_handler import (
+    OmenSubgraphHandler,
+)
+from prediction_market_agent_tooling.tools.langfuse_ import (
+    get_langfuse_langchain_config,
+    observe,
+)
+from prediction_market_agent_tooling.tools.utils import utcnow
+
+from prediction_market_agent.agents.arbitrage_agent.data_models import (
+    CorrelatedMarketPair,
+    Correlation,
+)
+from prediction_market_agent.agents.arbitrage_agent.prompt import PROMPT_TEMPLATE
+from prediction_market_agent.db.pinecone_handler import PineconeHandler
+from prediction_market_agent.utils import APIKeys
+
+
+class DeployableArbitrageAgent(DeployableTraderAgent):
+    """Agent that places mirror bets on Omen for (quasi) risk-neutral profit."""
+
+    model = "gpt-4o"
+    # trade amount will be divided between correlated markets.
+    total_trade_amount = BetAmount(amount=0.1, currency=OmenAgentMarket.currency)
+    bet_on_n_markets_per_run = 5
+    max_related_markets_per_market = 10
+    n_markets_to_fetch = 50
+
+    def run(self, market_type: MarketType) -> None:
+        if market_type != MarketType.OMEN:
+            raise RuntimeError(
+                "Can arbitrage only on Omen since related markets embeddings available only for Omen markets."
+            )
+        self.subgraph_handler = OmenSubgraphHandler()
+        self.pinecone_handler = PineconeHandler()
+        self.pinecone_handler.insert_all_omen_markets_if_not_exists()
+        self.chain = self._build_chain()
+        super().run(market_type=market_type)
+
+    def answer_binary_market(self, market: AgentMarket) -> ProbabilisticAnswer | None:
+        return ProbabilisticAnswer(p_yes=Probability(0.5), confidence=1.0)
+
+    def _build_chain(self) -> RunnableSerializable[t.Any, t.Any]:
+        llm = ChatOpenAI(
+            temperature=0,
+            model=self.model,
+            api_key=APIKeys().openai_api_key_secretstr_v1,
+        )
+
+        parser = PydanticOutputParser(pydantic_object=Correlation)
+        prompt = PromptTemplate(
+            template=PROMPT_TEMPLATE,
+            input_variables=["main_market_question", "related_market_question"],
+            partial_variables={"format_instructions": parser.get_format_instructions()},
+        )
+
+        chain = prompt | llm | parser
+        return chain
+
+    @observe()
+    def calculate_correlation_between_markets(
+        self, market: AgentMarket, related_market: AgentMarket
+    ) -> Correlation:
+        correlation: Correlation = self.chain.invoke(
+            {
+                "main_market_question": market.question,
+                "related_market_question": related_market.question,
+            }
+        )
+        return correlation
+
+    @observe()
+    def get_correlated_markets(self, market: AgentMarket) -> list[CorrelatedMarketPair]:
+        # We try to find similar, open markets which point to the same outcome.
+        correlated_markets = []
+        # We only wanted to find related markets that are open.
+        # We intentionally query more markets in the hope it yields open markets.
+        # We could store market_status (open, closed) in Pinecone, but we refrain from it
+        # to keep the chain data (or graph) as the source-of-truth, instead of managing the
+        # update process of the vectorDB.
+
+        related = self.pinecone_handler.find_nearest_questions_with_threshold(
+            limit=self.max_related_markets_per_market,
+            text=market.question,
+            filter_on_metadata={
+                "close_time_timestamp": {"gte": utcnow().timestamp() + 3600}
+            },  # closing 1h from now
+        )
+
+        omen_markets = self.subgraph_handler.get_omen_binary_markets(
+            limit=len(related),
+            id_in=[i.market_address for i in related if i.market_address != market.id],
+            resolved=False,
+        )
+
+        # Order omen_markets in the same order as related
+        related_market_addresses = [i.market_address for i in related]
+        omen_markets = sorted(
+            omen_markets, key=lambda m: related_market_addresses.index(m.id)
+        )
+
+        print(f"Fetched {len(omen_markets)} related markets for market {market.id}")
+
+        for related_market in omen_markets:
+            result: Correlation = self.chain.invoke(
+                {
+                    "main_market_question": market,
+                    "related_market_question": related_market,
+                },
+                config=get_langfuse_langchain_config(),
+            )
+            if result.near_perfect_correlation is not None:
+                related_agent_market = OmenAgentMarket.from_data_model(related_market)
+                correlated_markets.append(
+                    CorrelatedMarketPair(
+                        main_market=market,
+                        related_market=related_agent_market,
+                        correlation=result,
+                    )
+                )
+        return correlated_markets
+
+    @observe()
+    def build_trades_for_correlated_markets(
+        self, pair: CorrelatedMarketPair
+    ) -> list[Trade]:
+        # Split between main_market and related_market
+        arbitrage_bet = pair.split_bet_amount_between_yes_and_no(
+            self.total_trade_amount.amount
+        )
+
+        main_trade = Trade(
+            trade_type=TradeType.BUY,
+            outcome=arbitrage_bet.main_market_bet.direction,
+            amount=TokenAmount(
+                amount=arbitrage_bet.main_market_bet.size,
+                currency=pair.main_market.currency,
+            ),
+        )
+
+        # related trade
+        related_trade = Trade(
+            trade_type=TradeType.BUY,
+            outcome=arbitrage_bet.related_market_bet.direction,
+            amount=TokenAmount(
+                amount=arbitrage_bet.related_market_bet.size,
+                currency=pair.related_market.currency,
+            ),
+        )
+
+        trades = [main_trade, related_trade]
+        logger.info(f"Placing arbitrage trades {trades}")
+        return trades
+
+    @observe()
+    def build_trades(
+        self,
+        market: AgentMarket,
+        answer: ProbabilisticAnswer,
+        existing_position: Position | None,
+    ) -> list[Trade]:
+        trades = []
+        correlated_markets = self.get_correlated_markets(market=market)
+        for pair in correlated_markets:
+            # We want to profit at least 0.5% per market (value chosen as initial baseline).
+            if pair.potential_profit_per_bet_unit() > 0.005:
+                trades_for_pair = self.build_trades_for_correlated_markets(pair)
+                trades.extend(trades_for_pair)
+
+        return trades
diff --git a/prediction_market_agent/agents/arbitrage_agent/prompt.py b/prediction_market_agent/agents/arbitrage_agent/prompt.py
@@ -0,0 +1,12 @@
+PROMPT_TEMPLATE = """Given two markets, MARKET 1 and MARKET 2, provide a boolean value that represents the correlation between these two markets' outcomes. Return True if the outcomes are perfectly or nearly perfectly correlated, meaning there is a high probability that both markets resolve to the same outcome. Return False if the outcomes are perfectly or nearly perfectly inversely correlated, and finally return None if the correlation is weak or non-existent.
+Correlation can also be understood as the conditional probability that market 2 resolves to YES, given that market 1 resolved to YES.
+In addition to the correlation value, explain the reasoning behind your decision.
+
+[MARKET 1]
+{main_market_question}
+
+[MARKET 2]
+{related_market_question}
+
+Follow the formatting instructions below for producing an output in the correct format.
+{format_instructions}"""
diff --git a/prediction_market_agent/agents/invalid_agent/deploy.py b/prediction_market_agent/agents/invalid_agent/deploy.py
@@ -22,7 +22,9 @@ class InvalidAgent(DeployableTraderAgent):
     supported_markets = [MarketType.OMEN]
 
     def verify_market(self, market_type: MarketType, market: AgentMarket) -> bool:
-        if self.have_bet_on_market_since(market, since=self.same_market_bet_interval):
+        if self.have_bet_on_market_since(
+            market, since=self.same_market_trade_interval.get(market=market)
+        ):
             return False
 
         # If the market is new, don't bet on it as the potential profit from market invalidity is low.

diff --git a/prediction_market_agent/agents/specialized_agent/deploy.py b/prediction_market_agent/agents/specialized_agent/deploy.py
@@ -28,21 +28,21 @@
 class GetMarketCreatorsStalkerMarkets:
     # Do as many bets as we can for the special markets.
     bet_on_n_markets_per_run = MAX_AVAILABLE_MARKETS
+    n_markets_to_fetch: int = MAX_AVAILABLE_MARKETS
     # These tends to be long-running markets, it's not interesting to bet on them too much.
     same_market_bet_interval = timedelta(days=7)
     supported_markets: t.Sequence[MarketType] = [MarketType.OMEN]
 
     def get_markets(
         self,
         market_type: MarketType,
-        limit: int = MAX_AVAILABLE_MARKETS,
         sort_by: SortBy = SortBy.CLOSING_SOONEST,
         filter_by: FilterBy = FilterBy.OPEN,
     ) -> t.Sequence[OmenAgentMarket]:
         available_markets = [
             OmenAgentMarket.from_data_model(m)
             for m in OmenSubgraphHandler().get_omen_binary_markets_simple(
-                limit=limit,
+                limit=self.n_markets_to_fetch,
                 sort_by=sort_by,
                 filter_by=filter_by,
                 creator_in=SPECIALIZED_FOR_MARKET_CREATORS,

diff --git a/prediction_market_agent/agents/think_thoroughly_agent/benchmark.py b/prediction_market_agent/agents/think_thoroughly_agent/benchmark.py
@@ -20,6 +20,7 @@
     MarketFees,
     SortBy,
 )
+from prediction_market_agent_tooling.markets.market_fees import MarketFees
 from prediction_market_agent_tooling.markets.markets import (
     MarketType,
     get_binary_markets,

diff --git a/prediction_market_agent/agents/think_thoroughly_agent/deploy.py b/prediction_market_agent/agents/think_thoroughly_agent/deploy.py
@@ -32,7 +32,7 @@ def answer_binary_market(self, market: AgentMarket) -> ProbabilisticAnswer | Non
         )
 
     def before_process_markets(self, market_type: MarketType) -> None:
-        self.agent.update_markets()
+        self.agent.pinecone_handler.insert_all_omen_markets_if_not_exists()
         super().before_process_markets(market_type=market_type)
 
 

diff --git a/prediction_market_agent/agents/think_thoroughly_agent/models.py b/prediction_market_agent/agents/think_thoroughly_agent/models.py
@@ -7,11 +7,14 @@
 class PineconeMetadata(BaseModel):
     question_title: str
     market_address: HexAddress
+    close_time_timestamp: int
 
     @staticmethod
     def from_omen_market(market: OmenMarket) -> "PineconeMetadata":
         return PineconeMetadata(
-            question_title=market.question_title, market_address=market.id
+            question_title=market.question_title,
+            market_address=market.id,
+            close_time_timestamp=int(market.close_time.timestamp()),
         )
 
 

diff --git a/prediction_market_agent/agents/think_thoroughly_agent/think_thoroughly_agent.py b/prediction_market_agent/agents/think_thoroughly_agent/think_thoroughly_agent.py
@@ -1,4 +1,3 @@
-import datetime
 import typing as t
 from abc import ABC
 from uuid import UUID, uuid4
@@ -179,14 +178,6 @@ def _build_llm(model: str) -> BaseChatModel:
         )
         return llm
 
-    def update_markets(self) -> None:
-        """We use the agent's run to add embeddings of new markets that don't exist yet in the
-        vector DB."""
-        created_after = utcnow() - datetime.timedelta(days=7)
-        self.pinecone_handler.insert_all_omen_markets_if_not_exists(
-            created_after=created_after
-        )
-
     @observe()
     def get_required_conditions(self, question: str) -> Scenarios:
         researcher = self._get_researcher(self.model)