From 15dfa80ff4c0aa0c94bd8d6079baa64332ffefbb Mon Sep 17 00:00:00 2001 From: Azulinho Date: Wed, 3 May 2023 22:48:28 +0100 Subject: [PATCH 1/8] only check symbol contents if not in self.coins move around some of the logic that checks if a coin is a coin we want to process. Instead of checking if thats a valid symbol, we only check for if that symbol is not already in self.coins --- lib/bot.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/lib/bot.py b/lib/bot.py index fb6c78a..839e47e 100644 --- a/lib/bot.py +++ b/lib/bot.py @@ -1508,6 +1508,18 @@ def process_line( # TODO: rework this, generate a binance_data blob to pass to # init_or_update_coin() if symbol not in self.coins: + if not symbol.endswith(self.cfg["PAIRING"]): + return + + # discard any BULL/BEAR tokens + if any( + f"{w}{self.cfg['PAIRING']}" in symbol + for w in ["UP", "DOWN", "BULL", "BEAR"] + ) or any( + f"{self.cfg['PAIRING']}{w}" in symbol + for w in ["UP", "DOWN", "BULL", "BEAR"] + ): + return self.coins[symbol] = Coin( symbol, float(date), @@ -1583,18 +1595,6 @@ def backtesting(self) -> None: # symbol will be False if we fail to process the line fields if not symbol: continue - if not symbol.endswith(self.cfg["PAIRING"]): - continue - - # discard any BULL/BEAR tokens - if any( - f"{w}{self.cfg['PAIRING']}" in symbol - for w in ["UP", "DOWN", "BULL", "BEAR"] - ) or any( - f"{self.cfg['PAIRING']}{w}" in symbol - for w in ["UP", "DOWN", "BULL", "BEAR"] - ): - continue self.process_line(symbol, date, market_price) From 6ef0365c13c73d537810417f465d96ae4dfc4bca Mon Sep 17 00:00:00 2001 From: Azulinho Date: Wed, 3 May 2023 23:00:48 +0100 Subject: [PATCH 2/8] check if coin not in tickers in process_line() move the check where we look for the symbol in self.tickers to the process_line. This might cause an extra call to the cached 'date' calculation, but it is cleaner as it should be in process_line and not in split_line --- lib/bot.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/bot.py b/lib/bot.py index 839e47e..8229bb5 100644 --- a/lib/bot.py +++ b/lib/bot.py @@ -1468,9 +1468,6 @@ def split_logline(self, line: str) -> Tuple[Any, Any, Any]: except ValueError: return (False, False, False) - if symbol not in self.tickers: - return (False, False, False) - # datetime is very slow, discard the .microseconds and fetch a # cached pre-calculated unix epoch timestamp date = c_date_from(line[0:19]) @@ -1508,6 +1505,8 @@ def process_line( # TODO: rework this, generate a binance_data blob to pass to # init_or_update_coin() if symbol not in self.coins: + if symbol not in self.tickers: + return if not symbol.endswith(self.cfg["PAIRING"]): return From d56af0a4fa813480211e468ee95cfee3972f6ae1 Mon Sep 17 00:00:00 2001 From: Azulinho Date: Thu, 4 May 2023 09:16:24 +0100 Subject: [PATCH 3/8] make sure we write to disk on a self.quit Instead of returning early we need to make sure we continue writing to disk any data we might have to avoid errors. --- lib/bot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/bot.py b/lib/bot.py index 8229bb5..f69f089 100644 --- a/lib/bot.py +++ b/lib/bot.py @@ -1569,7 +1569,7 @@ def backtesting(self) -> None: with requests.Session() as session: for logfile in self.cfg["PRICE_LOGS"]: if self.quit: - return + break for w, v in [ ("backtesting:", logfile), ("wallet:", self.wallet), From 57157dd8dc26057ef5fe31dd3f3521784399a1ed Mon Sep 17 00:00:00 2001 From: Azulinho Date: Sat, 24 Jun 2023 15:49:17 +0100 Subject: [PATCH 4/8] pass -u to download_klines --- run | 8 ++++++-- utils/pull_klines.py | 10 +++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/run b/run index 42db31c..e9ccd4e 100755 --- a/run +++ b/run @@ -14,7 +14,7 @@ function usage() { echo "./run config-endpoint-service BIND=0.0.0.0 CONFIG_FILE=myconfig.yaml" echo "./run klines-caching-service BIND=0.0.0.0" echo "./run price_log_service BIND=0.0.0.0" - echo "./run download_price_logs FROM=20220101 TO=20220131" + echo "./run download_price_logs FROM=20220101 TO=20220131 UNIT=1m" } function free_port () { # looks for a free TCP port @@ -72,6 +72,10 @@ function download_price_logs() { # downloads klines logs fro binance exit 1 fi + if [ -z "$UNIT" ]; then + export UNIT="1m" + fi + docker run --rm \ ${USE_TTY} \ ${DOCKER_RUN_AS} \ @@ -81,7 +85,7 @@ function download_price_logs() { # downloads klines logs fro binance ${RUN_IN_BACKGROUND} \ ${IMAGE}:${TAG} \ /cryptobot/.venv/bin/python -u /cryptobot/utils/pull_klines.py \ - -s ${FROM} -e ${TO} + -s ${FROM} -e ${TO} -u ${UNIT} } function docker_network() { # creates a docker network diff --git a/utils/pull_klines.py b/utils/pull_klines.py index f7f9e4c..6f2331d 100644 --- a/utils/pull_klines.py +++ b/utils/pull_klines.py @@ -20,7 +20,7 @@ def get_all_tickers(): return sorted(_tickers) -def pull_klines(k_symbol, k_start, k_end, limit=720): +def pull_klines(k_symbol, k_start, k_end, _unit, limit=720): """returns klines for a particular day and ticker""" k_results = [] print(f"start: {k_start} end: {k_end}") @@ -28,7 +28,7 @@ def pull_klines(k_symbol, k_start, k_end, limit=720): print(f"fetching chunk {k_start} <-> {k_start + (limit * 60000)}") klines = client.get_klines( symbol=k_symbol, - interval="1m", + interval=_unit, limit=limit, startTime=int(k_start), endTime=int(k_start + (limit * 60000)), @@ -99,6 +99,9 @@ def generate_index(log_dir="log"): parser.add_argument( "-e", "--end", help="end day to fetch klines for", required=False ) + parser.add_argument( + "-u", "--unit", help="Unit to use 1m/5m/1h/1d", default="1m" + ) args = parser.parse_args() s = args.start @@ -108,6 +111,7 @@ def generate_index(log_dir="log"): else: e = s + unit = args.unit start_dt = datetime.strptime(s, "%Y%m%d") end_dt = datetime.strptime(e, "%Y%m%d") @@ -148,7 +152,7 @@ def generate_index(log_dir="log"): print(f"getting klines for {ticker} on {day}") ticker_klines: list = [] - for line in pull_klines(ticker, start, end): + for line in pull_klines(ticker, start, end, unit): ticker_klines.append(line) if not ticker_klines: From c17e8b324cac7833f50de89d364e6c4a22d78b4f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 23 May 2023 06:49:46 +0000 Subject: [PATCH 5/8] Bump requests from 2.29.0 to 2.31.0 Bumps [requests](https://github.com/psf/requests) from 2.29.0 to 2.31.0. - [Release notes](https://github.com/psf/requests/releases) - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) - [Commits](https://github.com/psf/requests/compare/v2.29.0...v2.31.0) --- updated-dependencies: - dependency-name: requests dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 970e624..4469dff 100644 --- a/requirements.txt +++ b/requirements.txt @@ -40,7 +40,7 @@ pytz-deprecation-shim==0.1.0.post0 PyYAML==6.0 pyzmq==25.0.2 regex==2023.3.23 -requests==2.29.0 +requests==2.31.0 six==1.16.0 sortedcontainers==2.4.0 ta==0.10.2 From bb624f1f6137adb0b4da99f4c8aa513046ed7f43 Mon Sep 17 00:00:00 2001 From: Azulinho Date: Mon, 29 May 2023 20:58:14 +0100 Subject: [PATCH 6/8] add index_v2.json.gz --- run | 2 + tests/index_v2.json.gz | Bin 0 -> 310 bytes utils/prove-backtesting.py | 7 ++-- utils/pull_klines.py | 76 +++++++++++++++++++++++++++---------- 4 files changed, 61 insertions(+), 24 deletions(-) create mode 100644 tests/index_v2.json.gz diff --git a/run b/run index e9ccd4e..471f97e 100755 --- a/run +++ b/run @@ -311,9 +311,11 @@ function github_actions_ci_pr_docker_tests() { cat tests/price.log.gz | grep BTCUSDT | grep 2021-12-${ta} |gzip -1 > log/BTCUSDT/202112${ta}.log.gz done cp tests/index.json.gz log/ + cp tests/index_v2.json.gz log/ export PRICE_LOG_PORT=$( cat ${STATE_DIR}/.price_log_service.port) curl --output /dev/null http://${DOCKER_IP}:${PRICE_LOG_PORT}/index.json.gz + curl --output /dev/null http://${DOCKER_IP}:${PRICE_LOG_PORT}/index_v2.json.gz echo BuyMoonSellRecoveryStrategy.yaml cp tests/BuyMoonSellRecoveryStrategy.yaml configs/ diff --git a/tests/index_v2.json.gz b/tests/index_v2.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..d5d7572bb1cc8d9825220cc1efa5f88ac9d5b79a GIT binary patch literal 310 zcmb2|=HPf&+>*k;oSB!BS`lAnq?c8kpU3canl*P*f<$Y1eXE6gMPJ$7T^w7lEbzT} zM`(-s-|vrvTlMBFH8y#%cW`tlMhN`LS`GhTeke5yY$-JnOk+YZ|hx?o1A_7$St$2+s@9(^;nx5CZ15rh(TnO zHin5~BR0jyaFl<4wl!tz{df1omT!CUxTS2dRNT7f4}X5E3)-#p(&BRAy2lgyj$6%p zQL*ar%$|v_B7&QjvH9}Ok}@&P$hvT4!Ai&A#mn4$duK_Rnr3BPxw2wqAXq^6^@`fR kZd*=$XWD;k?}5DsKHlfL^Jm)P>MsnJ@ tuple[set[str], set[str]]: + """returns lists of symbols and dates""" date_list = set() symbols_list = set() - index = {} # gather all date.log.gz logs and # all symbol dirs - for item in sorted(os.listdir(log_dir)): + for dir_item in sorted(os.listdir(log_dir)): if ( - os.path.isfile(f"{log_dir}/{item}") - and item.startswith("20") - and ".log." in item + os.path.isfile(f"{log_dir}/{dir_item}") + and dir_item.startswith("20") + and dir_item.endswith(".log.gz") ): - date = item.split(".")[0] + date: str = dir_item.split(".")[0] date_list.add(date) - if os.path.isdir(f"{log_dir}/{item}"): - symbols_list.add(item) + if os.path.isdir(f"{log_dir}/{dir_item}"): + symbols_list.add(dir_item) + + return (set(symbols_list), set(date_list)) + + +def gather_symbols_per_date( + log_dir, symbols_list, date_list +) -> dict[str, list[str]]: + """returns map of dates containing symbols available on that date""" + dates_idx: dict[str, list[str]] = {} # we'll store all symbol logs in each date for date in sorted(date_list): - index[date] = set() + if date not in dates_idx: + dates_idx[date] = [] - # iterate over all the symbols and gather all the - # logfiles in in each one of those symbol dirs for _symbol in sorted(symbols_list): - logs = os.listdir(f"{log_dir}/{_symbol}") + logs: list[str] = os.listdir(f"{log_dir}/{_symbol}") for _log in sorted(logs): if not os.path.isfile(f"{log_dir}/{_symbol}/{_log}"): continue - date = _log.split(".")[0] - index[date].add(_symbol) + _date: str = _log.split(".")[0] + dates_idx[_date].append(_symbol) + return dates_idx - tmp = index - index = {} - for date in tmp.keys(): # pylint: disable=C0206,C0201 - index[date] = list(tmp[date]) + +def generate_index(log_dir="log") -> None: + """generates index.json with dates <- [coins]""" + + print("generating index...") + symbols_list, date_list = gather_symbols_and_logs(log_dir) + + dates_index: dict[str, list[str]] = gather_symbols_per_date( + log_dir, symbols_list, date_list + ) + + # generate index_v1 + print("writing index.json.gz...") with gzip.open( f"{log_dir}/index.json.gz", "wt", encoding="utf-8" + ) as index_json: + index_json.write(json.dumps(dates_index, indent=4)) + + # generate index_v2 + print("generating index_v2.json.gz...") + index: dict[str, dict] = {"DATES": {}, "COINS": {}} + for date in dates_index.keys(): # pylint: disable=C0206,C0201 + index["DATES"][date] = list(dates_index[date]) + + for _symbol in sorted(os.listdir(log_dir)): + if os.path.isdir(f"{log_dir}/{_symbol}"): + logs: list[str] = os.listdir(f"{log_dir}/{_symbol}") + index["COINS"][_symbol] = sorted(logs) + + print("writing index_v2.json.gz...") + with gzip.open( + f"{log_dir}/index_v2.json.gz", "wt", encoding="utf-8" ) as index_json: index_json.write(json.dumps(index, indent=4)) if __name__ == "__main__": - parser = argparse.ArgumentParser() + parser: argparse.ArgumentParser = argparse.ArgumentParser() parser.add_argument("-s", "--start", help="start day to fetch klines for") parser.add_argument( "-e", "--end", help="end day to fetch klines for", required=False From 67f26c67e71c4d611b727d4972a8e7ab6d6094ff Mon Sep 17 00:00:00 2001 From: Azulinho Date: Sat, 10 Jun 2023 14:32:56 +0100 Subject: [PATCH 7/8] skip dupe condition check --- run | 2 +- utils/prove-backtesting.py | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/run b/run index 471f97e..174f844 100755 --- a/run +++ b/run @@ -358,7 +358,7 @@ function github_actions_ci_pr_docker_tests() { TAG=pr CONFIG_FILE=prove-backtesting.yaml wc -l results/prove-backtesting.prove-backtesting.yaml.txt \ - | grep '29' + | grep '44' for ta in 01 02 03 04 05 06 07 08 09 do diff --git a/utils/prove-backtesting.py b/utils/prove-backtesting.py index 66e799a..c993e0e 100644 --- a/utils/prove-backtesting.py +++ b/utils/prove-backtesting.py @@ -777,12 +777,6 @@ def run_optimized_config(self, s_investment: float) -> float: price_logs = pv.generate_price_log_list(rollforward_dates) tickers = pv.gather_best_results_from_backtesting_log("coincfg") - # if our backtesting gave us no tickers, - # we'll skip this forward testing run - if not tickers: - log_msg("forwardtesting config contains no tickers, skipping run") - continue - log_msg( f"now forwardtesting {rollforward_dates[0]}...{rollforward_dates[-1]}" ) From f92aaebf9d5b2e25a749a51663fd4ff8738d4018 Mon Sep 17 00:00:00 2001 From: Azulinho Date: Sat, 10 Jun 2023 20:18:06 +0100 Subject: [PATCH 8/8] re-work retry calls --- lib/helpers.py | 4 ++-- utils/prove-backtesting.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/helpers.py b/lib/helpers.py index cebbb6b..406e137 100644 --- a/lib/helpers.py +++ b/lib/helpers.py @@ -11,7 +11,7 @@ import udatetime from binance.client import Client from filelock import SoftFileLock -from tenacity import retry, wait_exponential +from tenacity import retry, wait_fixed, stop_after_delay def mean(values: list[float]) -> float: @@ -44,7 +44,7 @@ def c_from_timestamp(date: float) -> datetime: return datetime.fromtimestamp(date) -@retry(wait=wait_exponential(multiplier=1, max=3)) +@retry(wait=wait_fixed(2), stop=stop_after_delay(10)) def cached_binance_client(access_key: str, secret_key: str) -> Client: """retry wrapper for binance client first call""" diff --git a/utils/prove-backtesting.py b/utils/prove-backtesting.py index c993e0e..3207bb7 100644 --- a/utils/prove-backtesting.py +++ b/utils/prove-backtesting.py @@ -16,10 +16,10 @@ import pandas as pd import requests import yaml -from tenacity import retry, wait_exponential +from tenacity import retry, wait_fixed, stop_after_delay -@retry(wait=wait_exponential(multiplier=2, min=1, max=30)) +@retry(wait=wait_fixed(2), stop=stop_after_delay(10)) def get_index_json(query: str) -> requests.Response: """retry wrapper for requests calls""" response: requests.Response = requests.get(query, timeout=5)