From 56bb7d6ac20511dc49de462be9e1de3b1623b242 Mon Sep 17 00:00:00 2001 From: Herklos Date: Thu, 19 Feb 2026 09:32:50 +0100 Subject: [PATCH] Remove ai modules Signed-off-by: Herklos --- octobot_script/__init__.py | 1 - octobot_script/ai/__init__.py | 14 --- octobot_script/ai/agents.py | 60 ----------- octobot_script/ai/environments.py | 172 ------------------------------ octobot_script/ai/models.py | 18 ---- 5 files changed, 265 deletions(-) delete mode 100644 octobot_script/ai/__init__.py delete mode 100644 octobot_script/ai/agents.py delete mode 100644 octobot_script/ai/environments.py delete mode 100644 octobot_script/ai/models.py diff --git a/octobot_script/__init__.py b/octobot_script/__init__.py index ea3d86e..4fd4ad5 100644 --- a/octobot_script/__init__.py +++ b/octobot_script/__init__.py @@ -56,4 +56,3 @@ def _use_module_local_tentacles(): from octobot_script.constants import * from octobot_script.api import * from octobot_script.model import * -from octobot_script.ai import * diff --git a/octobot_script/ai/__init__.py b/octobot_script/ai/__init__.py deleted file mode 100644 index ff8ec2f..0000000 --- a/octobot_script/ai/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -try: - from octobot_script.ai.environments import * - from octobot_script.ai.models import * - from octobot_script.ai.agents import * - - from gymnasium.envs.registration import register - - register( - id='TradingEnv', - entry_point='octobot_script.ai.environments:TradingEnv', - disable_env_checker = True - ) -except ImportError: - pass diff --git a/octobot_script/ai/agents.py b/octobot_script/ai/agents.py deleted file mode 100644 index 1e76ecd..0000000 --- a/octobot_script/ai/agents.py +++ /dev/null @@ -1,60 +0,0 @@ -from collections import deque -import random -import numpy as np - -from octobot_script.ai.models import mlp - -class DQNAgent: - def __init__(self, action_size): - self.action_size = action_size - self.memory = deque(maxlen=2000) - self.gamma = 0.95 # discount rate - self.epsilon = 1.0 # exploration rate - self.epsilon_min = 0.01 - self.epsilon_decay = 0.995 - self.model = mlp(action_size) - - def remember(self, state, action, reward, next_state, done): - self.memory.append((state, action, reward, next_state, done)) - - def act(self, state): - if np.random.rand() <= self.epsilon: - return random.randrange(self.action_size) - act_values = self.model.predict(state) - return np.argmax(act_values[0]) # returns action - - def replay(self, batch_size=32, epochs=1, evaluate=False, tensorboard_callback=None): - # pylint: disable=unsubscriptable-object - """ vectorized implementation; 30x speed up compared with for loop """ - minibatch = random.sample(self.memory, batch_size) - - states = np.array([tup[0][0] for tup in minibatch]) - actions = np.array([tup[1] for tup in minibatch]) - rewards = np.array([tup[2] for tup in minibatch]) - next_states = np.array([tup[3][0] for tup in minibatch]) - done = np.array([tup[4] for tup in minibatch]) - - # Q(s', a) - target = rewards + self.gamma * np.amax(self.model.predict(next_states), axis=1) - # end state target is reward itself (no lookahead) - target[done] = rewards[done] - - # Q(s, a) - target_f = self.model.predict(states) - # make the agent to approximately map the current state to future discounted reward - target_f[range(batch_size), actions] = target - - self.model.fit(states, target_f, batch_size=batch_size, epochs=epochs, verbose=0, callbacks=[tensorboard_callback]) - - if self.epsilon > self.epsilon_min: - self.epsilon *= self.epsilon_decay - - if evaluate: - return self.model.evaluate(states, target_f, batch_size=32) - return 0 - - def load(self, name): - self.model.load_weights(name) - - def save(self, name): - self.model.save_weights(name) \ No newline at end of file diff --git a/octobot_script/ai/environments.py b/octobot_script/ai/environments.py deleted file mode 100644 index 2f202be..0000000 --- a/octobot_script/ai/environments.py +++ /dev/null @@ -1,172 +0,0 @@ -# pylint: disable=maybe-no-member -import gymnasium as gym -from gymnasium import spaces -import numpy as np - -import octobot_script as obs -import octobot_trading.errors as octobot_trading_errors -import octobot_trading.api as trading_api - -def basic_reward_function(current_portfolio_value, previous_portfolio_value, current_profitability, market_profitability, created_orders): - if previous_portfolio_value is None: - return 0 - try: - pf_reward = np.log(float(current_portfolio_value) / float(previous_portfolio_value)) - prof_reward = np.log(float(current_profitability) / float(market_profitability)) - reward = 0 if np.isnan(pf_reward) else pf_reward + 0 if np.isnan(prof_reward) else prof_reward - return reward - except ZeroDivisionError: - return 0 - -async def basic_trade_function(ctx, action): - try: - created_orders = [] - if action == 0: - # TODO cancel orders - pass - elif action == 1: - created_orders.append(await obs.market( - ctx, - "buy", - amount=f"10%" - )) - elif action == 2: - created_orders.append(await obs.market( - ctx, - "sell", - amount=f"{10}%" - )) - elif action in [3, 4, 5]: - created_orders.append(await obs.limit( - ctx, - "buy", - amount=f"{1 if action == 3 else 10 if action == 4 else 30}%", - offset=f"-{1 if action == 3 else 2 if action == 4 else 3}%", - )) - elif action in [6, 7, 8]: - created_orders.append(await obs.limit( - ctx, - "sell", - amount=f"{1 if action == 6 else 10 if action == 7 else 30}%", - offset=f"{1 if action == 6 else 2 if action == 7 else 3}%", - )) - else: - # Nothing for now - pass - return created_orders - except TypeError: - pass - -# TODO move somewhere else -def get_profitabilities(ctx): - return trading_api.get_profitability_stats(ctx.exchange_manager) - -# TODO move somewhere else -def get_open_orders(ctx): - return [] # TODO - -# TODO move somewhere else -def get_current_portfolio_value(ctx): - return trading_api.get_current_portfolio_value(ctx.exchange_manager) - -# TODO move somewhere else -def get_current_portfolio(ctx): - return trading_api.portfolio.get_portfolio(ctx.exchange_manager) - -def get_flatten_pf(current_portfolio, symbol): - return np.array([float(current_portfolio[symbol.base].available), - float(current_portfolio[symbol.base].total), - float(current_portfolio[symbol.quote].available), - float(current_portfolio[symbol.quote].total)], dtype=np.float32) - -class TradingEnv(gym.Env): - def __init__(self, - action_size=1, - dynamic_feature_functions = [], - reward_function = basic_reward_function, - trade_function = basic_trade_function, - max_episode_duration = 'max', - verbose = 1, - name = "Rl", - traded_symbols=[] - ): - self.max_episode_duration = max_episode_duration - self.name = name - self.verbose = verbose - self.is_reset = False - - self.traded_symbols = traded_symbols - self.static_features = [] # TODO there are computed once before being used in the environement - self.dynamic_feature_functions = dynamic_feature_functions # are computed at each step of the environment - self._nb_features = 79 + len(self.traded_symbols) * 4 + len(self.static_features) + len(self.dynamic_feature_functions) - - self.reward_function = reward_function - self.trade_function = trade_function - self.max_episode_duration = max_episode_duration - - self.action_space = spaces.Discrete(action_size) - self.observation_space = spaces.Box( - -np.inf, - np.inf, - shape = [self._nb_features] - ) - - self.log_metrics = [] - self._previous_portfolio_value = None - - async def get_obs(self, ctx): - flatten_pf = np.concatenate([get_flatten_pf(get_current_portfolio(ctx), symbol) for symbol in self.traded_symbols]) - # TODO open orders - dynamic_obs = [] - for dynamic_feature_function in self.dynamic_feature_functions: - dynamic_obs.append(await dynamic_feature_function(ctx)) - return np.concatenate([dynamic_obs[0], flatten_pf]) - - async def reset(self, seed = None, options = None): - super().reset(seed = seed) - self.is_reset = True - self._step = 0 - self._idx = 0 - if self.max_episode_duration != 'max': - self._idx = np.random.randint( - low = self._idx, - high = len(self.df) - self.max_episode_duration - self._idx - ) - - return await self.get_obs(options['ctx']) - - async def step(self, action): - ctx = action['ctx'] - content = action['content'] - - forced_reward = None - # take content - try: - created_orders = await self.trade_function(ctx, content) - except octobot_trading_errors.PortfolioNegativeValueError: - forced_reward = -1 - - self._idx += 1 - self._step += 1 - - done, truncated = False, False - - if not done and forced_reward is None: - current_pf_value = get_current_portfolio_value(ctx) - profitabilities = get_profitabilities(ctx) - current_profitability = profitabilities[1] - market_profitability = profitabilities[3] - reward = self.reward_function(current_pf_value, - self._previous_portfolio_value, - current_profitability, - market_profitability, - created_orders) - self._previous_portfolio_value = current_pf_value - else: - reward = forced_reward - # TODO save reward - - if done or truncated: - # TODO ? - None - return await self.get_obs(ctx), reward, done, truncated diff --git a/octobot_script/ai/models.py b/octobot_script/ai/models.py deleted file mode 100644 index 7ecdbfd..0000000 --- a/octobot_script/ai/models.py +++ /dev/null @@ -1,18 +0,0 @@ -from keras.models import Sequential -from keras.layers import Dense -from keras.optimizers import Adam - - -def mlp(n_action, n_hidden_layer=1, n_neuron_per_layer=32, - activation='relu', loss='mse'): - """ A multi-layer perceptron """ - print(n_action) - model = Sequential() - - model.add(Dense(n_neuron_per_layer, input_dim=1, activation=activation)) - for _ in range(n_hidden_layer): - model.add(Dense(n_neuron_per_layer, activation=activation)) - model.add(Dense(n_action, activation='relu')) - model.compile(loss=loss, optimizer=Adam()) - print(model.summary()) - return model