Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

545 strategy backtests add financial metrics #548

Merged
merged 15 commits into from
Nov 21, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions examples/monitor/data_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from pydantic import BaseModel
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved

from prediction_market_agent_tooling.tools.datetime_utc import DatetimeUTC


class SimulationDetail(BaseModel):
strategy: str
url: str
market_p_yes: float
agent_p_yes: float
agent_conf: float
org_bet: float
sim_bet: float
org_dir: bool
sim_dir: bool
org_profit: float
sim_profit: float
timestamp: DatetimeUTC
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved


class SharpeOutput(BaseModel):
annualized_volatility: float
mean_daily_return: float
annualized_sharpe_ratio: float
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
63 changes: 63 additions & 0 deletions examples/monitor/financial_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import numpy as np
import pandas as pd

from examples.monitor.data_models import SharpeOutput, SimulationDetail


class SharpeRatioCalculator:
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
def __init__(
self, details: list[SimulationDetail], risk_free_rate: float = 0.0
) -> None:
self.details = details
self.df = pd.DataFrame([d.model_dump() for d in self.details])
self.risk_free_rate = risk_free_rate

def __has_df_valid_columns_else_exception(
self, required_columns: list[str]
) -> None:
if not set(required_columns).issubset(self.df.columns):
raise ValueError("Dataframe doesn't contain all the required columns.")

def prepare_wallet_daily_balance_df(
self, timestamp_col_name: str, profit_col_name: str
) -> pd.DataFrame:
self.__has_df_valid_columns_else_exception(
[timestamp_col_name, profit_col_name]
)
df = self.df.copy()
df[timestamp_col_name] = pd.to_datetime(df[timestamp_col_name])
df.sort_values(timestamp_col_name, ascending=True, inplace=True)

df["profit_cumsum"] = df[profit_col_name].cumsum()
df["profit_cumsum"] = df["profit_cumsum"] + 50

df = df.drop_duplicates(subset=timestamp_col_name, keep="last")
df.set_index(timestamp_col_name, inplace=True)
# We generate a new Dataframe with daily wallet balances, derived by the final wallet balance
# from the previous day.
wallet_balance_daily_df = df[["profit_cumsum"]].resample("D").ffill()
wallet_balance_daily_df.dropna(inplace=True)
wallet_balance_daily_df["returns"] = wallet_balance_daily_df[
"profit_cumsum"
].pct_change()
return wallet_balance_daily_df

def calculate_annual_sharpe_ratio(
self, timestamp_col_name: str = "timestamp", profit_col_name: str = "sim_profit"
) -> SharpeOutput:
wallet_daily_balance_df = self.prepare_wallet_daily_balance_df(
timestamp_col_name=timestamp_col_name, profit_col_name=profit_col_name
)

daily_volatility = wallet_daily_balance_df["returns"].std()
annualized_volatility = daily_volatility * np.sqrt(365)
mean_daily_return = wallet_daily_balance_df["returns"].mean()
daily_sharpe_ratio = (
mean_daily_return - self.risk_free_rate
) / daily_volatility
annualized_sharpe_ratio = daily_sharpe_ratio * np.sqrt(365)
return SharpeOutput(
annualized_volatility=annualized_volatility,
mean_daily_return=mean_daily_return,
annualized_sharpe_ratio=annualized_sharpe_ratio,
)
205 changes: 129 additions & 76 deletions examples/monitor/match_bets_with_langfuse_traces.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,20 @@
from pathlib import Path
from typing import Any

import dotenv
from eth_typing import HexAddress, HexStr

from examples.monitor.data_models import SimulationDetail
from examples.monitor.financial_metrics import SharpeRatioCalculator
from examples.monitor.transaction_cache import TransactionBlockCache
from prediction_market_agent_tooling.markets.omen.omen_contracts import (
OmenConditionalTokenContract,
)
from prediction_market_agent_tooling.markets.omen.omen_subgraph_handler import (
OmenSubgraphHandler,
)

dotenv.load_dotenv()
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
import pandas as pd
from langfuse import Langfuse
from pydantic import BaseModel
Expand All @@ -10,9 +24,6 @@
BettingStrategy,
GuaranteedLossError,
KellyBettingStrategy,
MaxAccuracyBettingStrategy,
MaxAccuracyWithKellyScaledBetsStrategy,
MaxExpectedValueBettingStrategy,
ProbabilisticAnswer,
TradeType,
)
Expand Down Expand Up @@ -47,6 +58,8 @@ def get_outcome_for_trace(
strategy: BettingStrategy,
trace: ProcessMarketTrace,
market_outcome: bool,
actual_placed_bet: ResolvedBet,
tx_block_cache: TransactionBlockCache,
) -> SimulatedOutcome | None:
market = trace.market
answer = trace.answer
Expand All @@ -72,18 +85,34 @@ def get_outcome_for_trace(
trades[0].trade_type == TradeType.BUY
), "Can only buy without previous position."
buy_trade = trades[0]
correct = buy_trade.outcome == market_outcome
# If not correct, stop early because profit is known.
if not correct:
profit = -buy_trade.amount.amount
else:
# We use a historical state (by passing in a block_number as arg) to get the correct outcome token balances.
tx_block_number = tx_block_cache.get_block_number(actual_placed_bet.id)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oooh, so if we used Market at state of latest block, get_buy_token_amount produced rubbish? 😱

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

get_buy_token_amount is block-dependent, since the price of the token is dictated by the size of the yes_outcome_pool_size and no_outcome_pool_size

market_at_block = OmenSubgraphHandler().get_omen_market_by_market_id(
HexAddress(HexStr(market.id)), block_number=tx_block_number
)
omen_agent_market_at_block = OmenAgentMarket.from_data_model(market_at_block)

received_outcome_tokens = market.get_buy_token_amount(
bet_amount=market.get_bet_amount(buy_trade.amount.amount),
direction=buy_trade.outcome,
).amount
received_outcome_tokens = omen_agent_market_at_block.get_buy_token_amount(
bet_amount=omen_agent_market_at_block.get_bet_amount(
buy_trade.amount.amount
),
direction=buy_trade.outcome,
).amount
profit = (
received_outcome_tokens - buy_trade.amount.amount
if correct
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is duplicate of if-else above, right?

Suggested change
if correct

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually this was correct, but readability was horrible - please see updated version

else -buy_trade.amount.amount
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
else -buy_trade.amount.amount

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not needed - see refactoring I mentioned earlier

)

correct = buy_trade.outcome == market_outcome
profit = (
received_outcome_tokens - buy_trade.amount.amount
if correct
else -buy_trade.amount.amount
)
# received_outcome_tokens = market.get_buy_token_amount(
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
# bet_amount=market.get_bet_amount(buy_trade.amount.amount),
# direction=buy_trade.outcome,
# ).amount

return SimulatedOutcome(
size=buy_trade.amount.amount,
Expand All @@ -100,14 +129,14 @@ def get_outcome_for_trace(
# Get the private keys for the agents from GCP Secret Manager
agent_gcp_secret_map = {
"DeployablePredictionProphetGPT4TurboFinalAgent": "pma-prophetgpt4turbo-final",
"DeployablePredictionProphetGPT4TurboPreviewAgent": "pma-prophetgpt4",
"DeployablePredictionProphetGPT4oAgent": "pma-prophetgpt3",
"DeployablePredictionProphetGPTo1PreviewAgent": "pma-prophet-o1-preview",
"DeployablePredictionProphetGPTo1MiniAgent": "pma-prophet-o1-mini",
"DeployableOlasEmbeddingOAAgent": "pma-evo-olas-embeddingoa",
"DeployableThinkThoroughlyAgent": "pma-think-thoroughly",
"DeployableThinkThoroughlyProphetResearchAgent": "pma-think-thoroughly-prophet-research",
"DeployableKnownOutcomeAgent": "pma-knownoutcome",
# "DeployablePredictionProphetGPT4TurboPreviewAgent": "pma-prophetgpt4",
# "DeployablePredictionProphetGPT4oAgent": "pma-prophetgpt3",
# "DeployablePredictionProphetGPTo1PreviewAgent": "pma-prophet-o1-preview",
# "DeployablePredictionProphetGPTo1MiniAgent": "pma-prophet-o1-mini",
# "DeployableOlasEmbeddingOAAgent": "pma-evo-olas-embeddingoa",
# "DeployableThinkThoroughlyAgent": "pma-think-thoroughly",
# "DeployableThinkThoroughlyProphetResearchAgent": "pma-think-thoroughly-prophet-research",
# "DeployableKnownOutcomeAgent": "pma-knownoutcome",
}

agent_pkey_map = {
Expand All @@ -116,44 +145,44 @@ def get_outcome_for_trace(

# Define strategies we want to test out
strategies = [
MaxAccuracyBettingStrategy(bet_amount=1),
MaxAccuracyBettingStrategy(bet_amount=2),
MaxAccuracyBettingStrategy(bet_amount=25),
KellyBettingStrategy(max_bet_amount=1),
KellyBettingStrategy(max_bet_amount=2),
# MaxAccuracyBettingStrategy(bet_amount=1),
# MaxAccuracyBettingStrategy(bet_amount=2),
# MaxAccuracyBettingStrategy(bet_amount=25),
# KellyBettingStrategy(max_bet_amount=1),
# KellyBettingStrategy(max_bet_amount=2),
KellyBettingStrategy(max_bet_amount=5),
KellyBettingStrategy(max_bet_amount=25),
MaxAccuracyWithKellyScaledBetsStrategy(max_bet_amount=1),
MaxAccuracyWithKellyScaledBetsStrategy(max_bet_amount=2),
MaxAccuracyWithKellyScaledBetsStrategy(max_bet_amount=25),
MaxExpectedValueBettingStrategy(bet_amount=1),
MaxExpectedValueBettingStrategy(bet_amount=2),
MaxExpectedValueBettingStrategy(bet_amount=5),
MaxExpectedValueBettingStrategy(bet_amount=25),
KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.01),
KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.05),
KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.1),
KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.15),
KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.2),
KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.25),
KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.3),
KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.4),
KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.5),
KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.6),
KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.7),
KellyBettingStrategy(max_bet_amount=5, max_price_impact=0.1),
KellyBettingStrategy(max_bet_amount=5, max_price_impact=0.15),
KellyBettingStrategy(max_bet_amount=5, max_price_impact=0.2),
KellyBettingStrategy(max_bet_amount=5, max_price_impact=0.3),
KellyBettingStrategy(max_bet_amount=5, max_price_impact=0.4),
KellyBettingStrategy(max_bet_amount=5, max_price_impact=0.5),
KellyBettingStrategy(max_bet_amount=5, max_price_impact=0.6),
KellyBettingStrategy(max_bet_amount=5, max_price_impact=0.7),
# KellyBettingStrategy(max_bet_amount=25),
# MaxAccuracyWithKellyScaledBetsStrategy(max_bet_amount=1),
# MaxAccuracyWithKellyScaledBetsStrategy(max_bet_amount=2),
# MaxAccuracyWithKellyScaledBetsStrategy(max_bet_amount=25),
# MaxExpectedValueBettingStrategy(bet_amount=1),
# MaxExpectedValueBettingStrategy(bet_amount=2),
# MaxExpectedValueBettingStrategy(bet_amount=5),
# MaxExpectedValueBettingStrategy(bet_amount=25),
# KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.01),
# KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.05),
# KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.1),
# KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.15),
# KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.2),
# KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.25),
# KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.3),
# KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.4),
# KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.5),
# KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.6),
# KellyBettingStrategy(max_bet_amount=2, max_price_impact=0.7),
# KellyBettingStrategy(max_bet_amount=5, max_price_impact=0.1),
# KellyBettingStrategy(max_bet_amount=5, max_price_impact=0.15),
# KellyBettingStrategy(max_bet_amount=5, max_price_impact=0.2),
# KellyBettingStrategy(max_bet_amount=5, max_price_impact=0.3),
# KellyBettingStrategy(max_bet_amount=5, max_price_impact=0.4),
# KellyBettingStrategy(max_bet_amount=5, max_price_impact=0.5),
# KellyBettingStrategy(max_bet_amount=5, max_price_impact=0.6),
# KellyBettingStrategy(max_bet_amount=5, max_price_impact=0.7),
KellyBettingStrategy(max_bet_amount=25, max_price_impact=0.1),
KellyBettingStrategy(max_bet_amount=25, max_price_impact=0.2),
KellyBettingStrategy(max_bet_amount=25, max_price_impact=0.3),
KellyBettingStrategy(max_bet_amount=25, max_price_impact=0.5),
KellyBettingStrategy(max_bet_amount=25, max_price_impact=0.7),
# KellyBettingStrategy(max_bet_amount=25, max_price_impact=0.2),
# KellyBettingStrategy(max_bet_amount=25, max_price_impact=0.3),
# KellyBettingStrategy(max_bet_amount=25, max_price_impact=0.5),
# KellyBettingStrategy(max_bet_amount=25, max_price_impact=0.7),
]

httpx_client = HttpxCachedClient().get_client()
Expand All @@ -165,12 +194,17 @@ def get_outcome_for_trace(
strat_mse_profits[repr(strategy)] = MSEProfit(p_yes_mse=[], total_profit=[])

print("# Agent Bet vs Simulated Bet Comparison")

tx_block_cache = TransactionBlockCache(
web3=OmenConditionalTokenContract().get_web3()
)

for agent_name, private_key in agent_pkey_map.items():
print(f"\n## {agent_name}\n")
api_keys = APIKeys(BET_FROM_PRIVATE_KEY=private_key)

# Pick a time after pool token number is stored in OmenAgentMarket
start_time = utc_datetime(2024, 10, 1)
start_time = utc_datetime(2024, 10, 28)
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved

langfuse = Langfuse(
secret_key=api_keys.langfuse_secret_key.get_secret_value(),
Expand Down Expand Up @@ -211,51 +245,68 @@ def get_outcome_for_trace(
continue

if len(bets_with_traces) != len(bets):
pct_bets_without_traces = (len(bets) - len(bets_with_traces)) / len(bets)
print(
f"{len(bets) - len(bets_with_traces)} bets do not have a corresponding trace, ignoring them."
f"{len(bets) - len(bets_with_traces)} bets do not have a corresponding trace ({pct_bets_without_traces * 100:.2f}%), ignoring them."
)

simulations: list[dict[str, Any]] = []
details = []
details: list[SimulationDetail] = []

for strategy_idx, strategy in enumerate(strategies):
# "Born" agent with initial funding, simulate as if he was doing bets one by one.
starting_balance = 50.0
agent_balance = starting_balance
simulated_outcomes: list[SimulatedOutcome] = []

# ToDo - Can we add the value of tokens that weren't redeemed yet?
# Like a portfolio tracking.
for bet_with_trace in bets_with_traces:
bet = bet_with_trace.bet
trace = bet_with_trace.trace
simulated_outcome = get_outcome_for_trace(
strategy=strategy, trace=trace, market_outcome=bet.market_outcome
strategy=strategy,
trace=trace,
market_outcome=bet.market_outcome,
actual_placed_bet=bet,
tx_block_cache=tx_block_cache,
)
if simulated_outcome is None:
continue

simulated_outcomes.append(simulated_outcome)
agent_balance += simulated_outcome.profit

details.append(
{
"url": trace.market.url,
"market_p_yes": round(trace.market.current_p_yes, 4),
"agent_p_yes": round(trace.answer.p_yes, 4),
"agent_conf": round(trace.answer.confidence, 4),
"org_bet": round(bet.amount.amount, 4),
"sim_bet": round(simulated_outcome.size, 4),
"org_dir": bet.outcome,
"sim_dir": simulated_outcome.direction,
"org_profit": round(bet.profit.amount, 4),
"sim_profit": round(simulated_outcome.profit, 4),
}
simulation_detail = SimulationDetail(
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
strategy=repr(strategy),
url=trace.market.url,
market_p_yes=round(trace.market.current_p_yes, 4),
agent_p_yes=round(trace.answer.p_yes, 4),
agent_conf=round(trace.answer.confidence, 4),
org_bet=round(bet.amount.amount, 4),
sim_bet=round(simulated_outcome.size, 4),
org_dir=bet.outcome,
sim_dir=simulated_outcome.direction,
org_profit=round(bet.profit.amount, 4),
sim_profit=round(simulated_outcome.profit, 4),
timestamp=bet_with_trace.trace.timestamp_datetime,
)
details.append(simulation_detail)

details.sort(key=lambda x: x["sim_profit"], reverse=True)
pd.DataFrame.from_records(details).to_csv(
details.sort(key=lambda x: x.sim_profit, reverse=True)
details_df = pd.DataFrame.from_records([d.model_dump() for d in details])
details_df.to_csv(
output_directory / f"{agent_name} - {strategy} - all bets.csv",
index=False,
)

# Financial analysis
calc = SharpeRatioCalculator(details=details)
sharpe_output_simulation = calc.calculate_annual_sharpe_ratio()
sharpe_output_original = calc.calculate_annual_sharpe_ratio(
profit_col_name="org_profit"
)

sum_squared_errors = 0.0
for bet_with_trace in bets_with_traces:
bet = bet_with_trace.bet
Expand Down Expand Up @@ -283,6 +334,7 @@ def get_outcome_for_trace(
# We don't know these for the original run.
"start_balance": None,
"end_balance": None,
**sharpe_output_original.model_dump(),
}
)
else:
Expand All @@ -300,6 +352,7 @@ def get_outcome_for_trace(
"p_yes mse": p_yes_mse,
"start_balance": starting_balance,
"end_balance": agent_balance,
**sharpe_output_simulation.model_dump(),
}
)

Expand Down
Loading
Loading