Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

545 strategy backtests add financial metrics #548

Merged
merged 15 commits into from
Nov 21, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ape-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ default_ecosystem: gnosis
node:
gnosis:
mainnet:
uri: https://rpc.gnosischain.com
uri: https://rpc.gnosis.gateway.fm #https://rpc.gnosischain.com
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved


networks:
Expand Down
66 changes: 66 additions & 0 deletions examples/monitor/financial_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import numpy as np
import pandas as pd

from prediction_market_agent_tooling.markets.data_models import (
SharpeOutput,
SimulationDetail,
)


class SharpeRatioCalculator:
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
def __init__(
self, details: list[SimulationDetail], risk_free_rate: float = 0.0
) -> None:
self.details = details
self.df = pd.DataFrame([d.model_dump() for d in self.details])
self.risk_free_rate = risk_free_rate

def __has_df_valid_columns_else_exception(
self, required_columns: list[str]
) -> None:
if not set(required_columns).issubset(self.df.columns):
raise ValueError("Dataframe doesn't contain all the required columns.")

def prepare_wallet_daily_balance_df(
self, timestamp_col_name: str, profit_col_name: str
) -> pd.DataFrame:
self.__has_df_valid_columns_else_exception(
[timestamp_col_name, profit_col_name]
)
df = self.df.copy()
df[timestamp_col_name] = pd.to_datetime(df[timestamp_col_name])
df.sort_values(timestamp_col_name, ascending=True, inplace=True)

df["profit_cumsum"] = df[profit_col_name].cumsum()
df["profit_cumsum"] = df["profit_cumsum"] + 50

df = df.drop_duplicates(subset=timestamp_col_name, keep="last")
df.set_index(timestamp_col_name, inplace=True)
# We generate a new Dataframe with daily wallet balances, derived by the final wallet balance
# from the previous day.
wallet_balance_daily_df = df[["profit_cumsum"]].resample("D").ffill()
wallet_balance_daily_df.dropna(inplace=True)
wallet_balance_daily_df["returns"] = wallet_balance_daily_df[
"profit_cumsum"
].pct_change()
return wallet_balance_daily_df

def calculate_annual_sharpe_ratio(
self, timestamp_col_name: str = "timestamp", profit_col_name: str = "sim_profit"
) -> SharpeOutput:
wallet_daily_balance_df = self.prepare_wallet_daily_balance_df(
timestamp_col_name=timestamp_col_name, profit_col_name=profit_col_name
)

daily_volatility = wallet_daily_balance_df["returns"].std()
annualized_volatility = daily_volatility * np.sqrt(365)
mean_daily_return = wallet_daily_balance_df["returns"].mean()
daily_sharpe_ratio = (
mean_daily_return - self.risk_free_rate
) / daily_volatility
annualized_sharpe_ratio = daily_sharpe_ratio * np.sqrt(365)
return SharpeOutput(
annualized_volatility=annualized_volatility,
mean_daily_return=mean_daily_return,
annualized_sharpe_ratio=annualized_sharpe_ratio,
)
200 changes: 123 additions & 77 deletions examples/monitor/match_bets_with_langfuse_traces.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,19 @@
from pathlib import Path
from typing import Any

import dotenv
from eth_typing import HexAddress, HexStr

from examples.monitor.financial_metrics import SharpeRatioCalculator
from examples.monitor.transaction_cache import TransactionBlockCache
from prediction_market_agent_tooling.markets.omen.omen_contracts import (
OmenConditionalTokenContract,
)
from prediction_market_agent_tooling.markets.omen.omen_subgraph_handler import (
OmenSubgraphHandler,
)

dotenv.load_dotenv()
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
import pandas as pd
from langfuse import Langfuse
from pydantic import BaseModel
Expand All @@ -10,13 +23,13 @@
BettingStrategy,
GuaranteedLossError,
KellyBettingStrategy,
MaxAccuracyBettingStrategy,
MaxAccuracyWithKellyScaledBetsStrategy,
MaxExpectedValueBettingStrategy,
ProbabilisticAnswer,
TradeType,
)
from prediction_market_agent_tooling.markets.data_models import ResolvedBet
from prediction_market_agent_tooling.markets.data_models import (
ResolvedBet,
SimulationDetail,
)
from prediction_market_agent_tooling.markets.omen.omen import OmenAgentMarket
from prediction_market_agent_tooling.tools.httpx_cached_client import HttpxCachedClient
from prediction_market_agent_tooling.tools.langfuse_client_utils import (
Expand Down Expand Up @@ -47,6 +60,8 @@ def get_outcome_for_trace(
strategy: BettingStrategy,
trace: ProcessMarketTrace,
market_outcome: bool,
actual_placed_bet: ResolvedBet,
tx_block_cache: TransactionBlockCache,
) -> SimulatedOutcome | None:
market = trace.market
answer = trace.answer
Expand All @@ -72,18 +87,25 @@ def get_outcome_for_trace(
trades[0].trade_type == TradeType.BUY
), "Can only buy without previous position."
buy_trade = trades[0]

received_outcome_tokens = market.get_buy_token_amount(
bet_amount=market.get_bet_amount(buy_trade.amount.amount),
direction=buy_trade.outcome,
).amount

correct = buy_trade.outcome == market_outcome
profit = (
received_outcome_tokens - buy_trade.amount.amount
if correct
else -buy_trade.amount.amount
)
# If not correct, stop early because profit is known.
if not correct:
profit = -buy_trade.amount.amount
else:
# We use a historical state (by passing in a block_number as arg) to get the correct outcome token balances.
tx_block_number = tx_block_cache.get_block_number(actual_placed_bet.id)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oooh, so if we used Market at state of latest block, get_buy_token_amount produced rubbish? 😱

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

get_buy_token_amount is block-dependent, since the price of the token is dictated by the size of the yes_outcome_pool_size and no_outcome_pool_size

market_at_block = OmenSubgraphHandler().get_omen_market_by_market_id(
HexAddress(HexStr(market.id)), block_number=tx_block_number
)
omen_agent_market_at_block = OmenAgentMarket.from_data_model(market_at_block)

received_outcome_tokens = omen_agent_market_at_block.get_buy_token_amount(
bet_amount=omen_agent_market_at_block.get_bet_amount(
buy_trade.amount.amount
),
direction=buy_trade.outcome,
).amount
profit = received_outcome_tokens - buy_trade.amount.amount
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved

return SimulatedOutcome(
size=buy_trade.amount.amount,
Expand All @@ -100,14 +122,14 @@ def get_outcome_for_trace(
# Get the private keys for the agents from GCP Secret Manager
agent_gcp_secret_map = {
"DeployablePredictionProphetGPT4TurboFinalAgent": "pma-prophetgpt4turbo-final",
"DeployablePredictionProphetGPT4TurboPreviewAgent": "pma-prophetgpt4",
"DeployablePredictionProphetGPT4oAgent": "pma-prophetgpt3",
"DeployablePredictionProphetGPTo1PreviewAgent": "pma-prophet-o1-preview",
"DeployablePredictionProphetGPTo1MiniAgent": "pma-prophet-o1-mini",
"DeployableOlasEmbeddingOAAgent": "pma-evo-olas-embeddingoa",
"DeployableThinkThoroughlyAgent": "pma-think-thoroughly",
"DeployableThinkThoroughlyProphetResearchAgent": "pma-think-thoroughly-prophet-research",
"DeployableKnownOutcomeAgent": "pma-knownoutcome",
# "DeployablePredictionProphetGPT4TurboPreviewAgent": "pma-prophetgpt4",
# "DeployablePredictionProphetGPT4oAgent": "pma-prophetgpt3",
# "DeployablePredictionProphetGPTo1PreviewAgent": "pma-prophet-o1-preview",
# "DeployablePredictionProphetGPTo1MiniAgent": "pma-prophet-o1-mini",
# "DeployableOlasEmbeddingOAAgent": "pma-evo-olas-embeddingoa",
# "DeployableThinkThoroughlyAgent": "pma-think-thoroughly",
# "DeployableThinkThoroughlyProphetResearchAgent": "pma-think-thoroughly-prophet-research",
# "DeployableKnownOutcomeAgent": "pma-knownoutcome",
}

agent_pkey_map = {
Expand All @@ -116,44 +138,44 @@ def get_outcome_for_trace(

# Define strategies we want to test out
strategies = [
MaxAccuracyBettingStrategy(bet_amount=1),
MaxAccuracyBettingStrategy(bet_amount=2),
MaxAccuracyBettingStrategy(bet_amount=25),
KellyBettingStrategy(max_bet_amount=1),
KellyBettingStrategy(max_bet_amount=2),
# MaxAccuracyBettingStrategy(bet_amount=1),
# MaxAccuracyBettingStrategy(bet_amount=2),
# MaxAccuracyBettingStrategy(bet_amount=25),
# KellyBettingStrategy(max_bet_amount=1),
# KellyBettingStrategy(max_bet_amount=2),
KellyBettingStrategy(max_bet_amount=5),
KellyBettingStrategy(max_bet_amount=25),
MaxAccuracyWithKellyScaledBetsStrategy(max_bet_amount=1),
MaxAccuracyWithKellyScaledBetsStrategy(max_bet_amount=2),
MaxAccuracyWithKellyScaledBetsStrategy(max_bet_amount=25),
MaxExpectedValueBettingStrategy(bet_amount=1),
MaxExpectedValueBettingStrategy(bet_amount=2),
MaxExpectedValueBettingStrategy(bet_amount=5),
MaxExpectedValueBettingStrategy(bet_amount=25),
KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.01),
KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.05),
KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.1),
KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.15),
KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.2),
KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.25),
KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.3),
KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.4),
KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.5),
KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.6),
KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.7),
KellyBettingStrategy(max_bet_amount=5, max_price_impact=0.1),
KellyBettingStrategy(max_bet_amount=5, max_price_impact=0.15),
KellyBettingStrategy(max_bet_amount=5, max_price_impact=0.2),
KellyBettingStrategy(max_bet_amount=5, max_price_impact=0.3),
KellyBettingStrategy(max_bet_amount=5, max_price_impact=0.4),
KellyBettingStrategy(max_bet_amount=5, max_price_impact=0.5),
KellyBettingStrategy(max_bet_amount=5, max_price_impact=0.6),
KellyBettingStrategy(max_bet_amount=5, max_price_impact=0.7),
# KellyBettingStrategy(max_bet_amount=25),
# MaxAccuracyWithKellyScaledBetsStrategy(max_bet_amount=1),
# MaxAccuracyWithKellyScaledBetsStrategy(max_bet_amount=2),
# MaxAccuracyWithKellyScaledBetsStrategy(max_bet_amount=25),
# MaxExpectedValueBettingStrategy(bet_amount=1),
# MaxExpectedValueBettingStrategy(bet_amount=2),
# MaxExpectedValueBettingStrategy(bet_amount=5),
# MaxExpectedValueBettingStrategy(bet_amount=25),
# KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.01),
# KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.05),
# KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.1),
# KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.15),
# KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.2),
# KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.25),
# KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.3),
# KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.4),
# KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.5),
# KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.6),
# KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.7),
# KellyBettingStrategy(max_bet_amount=5, max_price_impact=0.1),
# KellyBettingStrategy(max_bet_amount=5, max_price_impact=0.15),
# KellyBettingStrategy(max_bet_amount=5, max_price_impact=0.2),
# KellyBettingStrategy(max_bet_amount=5, max_price_impact=0.3),
# KellyBettingStrategy(max_bet_amount=5, max_price_impact=0.4),
# KellyBettingStrategy(max_bet_amount=5, max_price_impact=0.5),
# KellyBettingStrategy(max_bet_amount=5, max_price_impact=0.6),
# KellyBettingStrategy(max_bet_amount=5, max_price_impact=0.7),
KellyBettingStrategy(max_bet_amount=25, max_price_impact=0.1),
KellyBettingStrategy(max_bet_amount=25, max_price_impact=0.2),
KellyBettingStrategy(max_bet_amount=25, max_price_impact=0.3),
KellyBettingStrategy(max_bet_amount=25, max_price_impact=0.5),
KellyBettingStrategy(max_bet_amount=25, max_price_impact=0.7),
# KellyBettingStrategy(max_bet_amount=25, max_price_impact=0.2),
# KellyBettingStrategy(max_bet_amount=25, max_price_impact=0.3),
# KellyBettingStrategy(max_bet_amount=25, max_price_impact=0.5),
# KellyBettingStrategy(max_bet_amount=25, max_price_impact=0.7),
]

httpx_client = HttpxCachedClient().get_client()
Expand All @@ -165,6 +187,11 @@ def get_outcome_for_trace(
strat_mse_profits[repr(strategy)] = MSEProfit(p_yes_mse=[], total_profit=[])

print("# Agent Bet vs Simulated Bet Comparison")

tx_block_cache = TransactionBlockCache(
web3=OmenConditionalTokenContract().get_web3()
)

for agent_name, private_key in agent_pkey_map.items():
print(f"\n## {agent_name}\n")
api_keys = APIKeys(BET_FROM_PRIVATE_KEY=private_key)
Expand Down Expand Up @@ -211,51 +238,68 @@ def get_outcome_for_trace(
continue

if len(bets_with_traces) != len(bets):
pct_bets_without_traces = (len(bets) - len(bets_with_traces)) / len(bets)
print(
f"{len(bets) - len(bets_with_traces)} bets do not have a corresponding trace, ignoring them."
f"{len(bets) - len(bets_with_traces)} bets do not have a corresponding trace ({pct_bets_without_traces * 100:.2f}%), ignoring them."
)

simulations: list[dict[str, Any]] = []
details = []
details: list[SimulationDetail] = []

for strategy_idx, strategy in enumerate(strategies):
# "Born" agent with initial funding, simulate as if he was doing bets one by one.
starting_balance = 50.0
agent_balance = starting_balance
simulated_outcomes: list[SimulatedOutcome] = []

# ToDo - Can we add the value of tokens that weren't redeemed yet?
# Like a portfolio tracking.
for bet_with_trace in bets_with_traces:
bet = bet_with_trace.bet
trace = bet_with_trace.trace
simulated_outcome = get_outcome_for_trace(
strategy=strategy, trace=trace, market_outcome=bet.market_outcome
strategy=strategy,
trace=trace,
market_outcome=bet.market_outcome,
actual_placed_bet=bet,
tx_block_cache=tx_block_cache,
)
if simulated_outcome is None:
continue

simulated_outcomes.append(simulated_outcome)
agent_balance += simulated_outcome.profit

details.append(
{
"url": trace.market.url,
"market_p_yes": round(trace.market.current_p_yes, 4),
"agent_p_yes": round(trace.answer.p_yes, 4),
"agent_conf": round(trace.answer.confidence, 4),
"org_bet": round(bet.amount.amount, 4),
"sim_bet": round(simulated_outcome.size, 4),
"org_dir": bet.outcome,
"sim_dir": simulated_outcome.direction,
"org_profit": round(bet.profit.amount, 4),
"sim_profit": round(simulated_outcome.profit, 4),
}
simulation_detail = SimulationDetail(
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
strategy=repr(strategy),
url=trace.market.url,
market_p_yes=round(trace.market.current_p_yes, 4),
agent_p_yes=round(trace.answer.p_yes, 4),
agent_conf=round(trace.answer.confidence, 4),
org_bet=round(bet.amount.amount, 4),
sim_bet=round(simulated_outcome.size, 4),
org_dir=bet.outcome,
sim_dir=simulated_outcome.direction,
org_profit=round(bet.profit.amount, 4),
sim_profit=round(simulated_outcome.profit, 4),
timestamp=bet_with_trace.trace.timestamp_datetime,
)
details.append(simulation_detail)

details.sort(key=lambda x: x["sim_profit"], reverse=True)
pd.DataFrame.from_records(details).to_csv(
details.sort(key=lambda x: x.sim_profit, reverse=True)
details_df = pd.DataFrame.from_records([d.model_dump() for d in details])
details_df.to_csv(
output_directory / f"{agent_name} - {strategy} - all bets.csv",
index=False,
)

# Financial analysis
calc = SharpeRatioCalculator(details=details)
sharpe_output_simulation = calc.calculate_annual_sharpe_ratio()
sharpe_output_original = calc.calculate_annual_sharpe_ratio(
profit_col_name="org_profit"
)

sum_squared_errors = 0.0
for bet_with_trace in bets_with_traces:
bet = bet_with_trace.bet
Expand Down Expand Up @@ -283,6 +327,7 @@ def get_outcome_for_trace(
# We don't know these for the original run.
"start_balance": None,
"end_balance": None,
**sharpe_output_original.model_dump(),
}
)
else:
Expand All @@ -300,6 +345,7 @@ def get_outcome_for_trace(
"p_yes mse": p_yes_mse,
"start_balance": starting_balance,
"end_balance": agent_balance,
**sharpe_output_simulation.model_dump(),
}
)

Expand Down
Loading
Loading