diff --git a/Kernel.py b/Kernel.py index a34dcb827..513071fdb 100644 --- a/Kernel.py +++ b/Kernel.py @@ -1,566 +1,673 @@ +import os +import queue +import sys + import numpy as np import pandas as pd -import os, queue, sys from message.Message import MessageType - from util.util import log_print class Kernel: - def __init__(self, kernel_name, random_state = None): - # kernel_name is for human readers only. - self.name = kernel_name - self.random_state = random_state - - if not random_state: - raise ValueError("A valid, seeded np.random.RandomState object is required " + - "for the Kernel", self.name) - sys.exit() - - # A single message queue to keep everything organized by increasing - # delivery timestamp. - self.messages = queue.PriorityQueue() - - # currentTime is None until after kernelStarting() event completes - # for all agents. This is a pd.Timestamp that includes the date. - self.currentTime = None - - # Timestamp at which the Kernel was created. Primarily used to - # create a unique log directory for this run. Also used to - # print some elapsed time and messages per second statistics. - self.kernelWallClockStart = pd.Timestamp('now') - - # TODO: This is financial, and so probably should not be here... - self.meanResultByAgentType = {} - self.agentCountByType = {} - - # The Kernel maintains a summary log to which agents can write - # information that should be centralized for very fast access - # by separate statistical summary programs. Detailed event - # logging should go only to the agent's individual log. This - # is for things like "final position value" and such. - self.summaryLog = [] - - log_print ("Kernel initialized: {}", self.name) - - - # This is called to actually start the simulation, once all agent - # configuration is done. - def runner(self, agents = [], startTime = None, stopTime = None, - num_simulations = 1, defaultComputationDelay = 1, - defaultLatency = 1, agentLatency = None, latencyNoise = [ 1.0 ], - agentLatencyModel = None, skip_log = False, - seed = None, oracle = None, log_dir = None): - - # agents must be a list of agents for the simulation, - # based on class agent.Agent - self.agents = agents - - # Simulation custom state in a freeform dictionary. Allows config files - # that drive multiple simulations, or require the ability to generate - # special logs after simulation, to obtain needed output without special - # case code in the Kernel. Per-agent state should be handled using the - # provided updateAgentState() method. - self.custom_state = {} - - # The kernel start and stop time (first and last timestamp in - # the simulation, separate from anything like exchange open/close). - self.startTime = startTime - self.stopTime = stopTime - - # The global seed, NOT used for anything agent-related. - self.seed = seed - - # Should the Kernel skip writing agent logs? - self.skip_log = skip_log - - # The data oracle for this simulation, if needed. - self.oracle = oracle - - # If a log directory was not specified, use the initial wallclock. - if log_dir: - self.log_dir = log_dir - else: - self.log_dir = str(int(self.kernelWallClockStart.timestamp())) - - # The kernel maintains a current time for each agent to allow - # simulation of per-agent computation delays. The agent's time - # is pushed forward (see below) each time it awakens, and it - # cannot receive new messages/wakeups until the global time - # reaches the agent's time. (i.e. it cannot act again while - # it is still "in the future") - - # This also nicely enforces agents being unable to act before - # the simulation startTime. - self.agentCurrentTimes = [self.startTime] * len(agents) - - # agentComputationDelays is in nanoseconds, starts with a default - # value from config, and can be changed by any agent at any time - # (for itself only). It represents the time penalty applied to - # an agent each time it is awakened (wakeup or recvMsg). The - # penalty applies _after_ the agent acts, before it may act again. - # TODO: this might someday change to pd.Timedelta objects. - self.agentComputationDelays = [defaultComputationDelay] * len(agents) - - # If an agentLatencyModel is defined, it will be used instead of - # the older, non-model-based attributes. - self.agentLatencyModel = agentLatencyModel - - # If an agentLatencyModel is NOT defined, the older parameters: - # agentLatency (or defaultLatency) and latencyNoise should be specified. - # These should be considered deprecated and will be removed in the future. - - # If agentLatency is not defined, define it using the defaultLatency. - # This matrix defines the communication delay between every pair of - # agents. - if agentLatency is None: - self.agentLatency = [[defaultLatency] * len(agents)] * len(agents) - else: - self.agentLatency = agentLatency - - # There is a noise model for latency, intended to be a one-sided - # distribution with the peak at zero. By default there is no noise - # (100% chance to add zero ns extra delay). Format is a list with - # list index = ns extra delay, value = probability of this delay. - self.latencyNoise = latencyNoise - - # The kernel maintains an accumulating additional delay parameter - # for the current agent. This is applied to each message sent - # and upon return from wakeup/receiveMessage, in addition to the - # agent's standard computation delay. However, it never carries - # over to future wakeup/receiveMessage calls. It is useful for - # staggering of sent messages. - self.currentAgentAdditionalDelay = 0 - - log_print ("Kernel started: {}", self.name) - log_print ("Simulation started!") - - # Note that num_simulations has not yet been really used or tested - # for anything. Instead we have been running multiple simulations - # with coarse parallelization from a shell script. - for sim in range(num_simulations): - log_print ("Starting sim {}", sim) - - # Event notification for kernel init (agents should not try to - # communicate with other agents, as order is unknown). Agents - # should initialize any internal resources that may be needed - # to communicate with other agents during agent.kernelStarting(). - # Kernel passes self-reference for agents to retain, so they can - # communicate with the kernel in the future (as it does not have - # an agentID). - log_print ("\n--- Agent.kernelInitializing() ---") - for agent in self.agents: - agent.kernelInitializing(self) - - # Event notification for kernel start (agents may set up - # communications or references to other agents, as all agents - # are guaranteed to exist now). Agents should obtain references - # to other agents they require for proper operation (exchanges, - # brokers, subscription services...). Note that we generally - # don't (and shouldn't) permit agents to get direct references - # to other agents (like the exchange) as they could then bypass - # the Kernel, and therefore simulation "physics" to send messages - # directly and instantly or to perform disallowed direct inspection - # of the other agent's state. Agents should instead obtain the - # agent ID of other agents, and communicate with them only via - # the Kernel. Direct references to utility objects that are not - # agents are acceptable (e.g. oracles). - log_print ("\n--- Agent.kernelStarting() ---") - for agent in self.agents: - agent.kernelStarting(self.startTime) - - # Set the kernel to its startTime. - self.currentTime = self.startTime - log_print ("\n--- Kernel Clock started ---") - log_print ("Kernel.currentTime is now {}", self.currentTime) - - # Start processing the Event Queue. - log_print ("\n--- Kernel Event Queue begins ---") - log_print ("Kernel will start processing messages. Queue length: {}", len(self.messages.queue)) - - # Track starting wall clock time and total message count for stats at the end. - eventQueueWallClockStart = pd.Timestamp('now') - ttl_messages = 0 - - # Process messages until there aren't any (at which point there never can - # be again, because agents only "wake" in response to messages), or until - # the kernel stop time is reached. - while not self.messages.empty() and self.currentTime and (self.currentTime <= self.stopTime): - # Get the next message in timestamp order (delivery time) and extract it. - self.currentTime, event = self.messages.get() - msg_recipient, msg_type, msg = event - - # Periodically print the simulation time and total messages, even if muted. - if ttl_messages % 100000 == 0: - print ("\n--- Simulation time: {}, messages processed: {}, wallclock elapsed: {} ---\n".format( - self.fmtTime(self.currentTime), ttl_messages, pd.Timestamp('now') - eventQueueWallClockStart)) - - log_print ("\n--- Kernel Event Queue pop ---") - log_print ("Kernel handling {} message for agent {} at time {}", - msg_type, msg_recipient, self.fmtTime(self.currentTime)) - - ttl_messages += 1 - - # In between messages, always reset the currentAgentAdditionalDelay. + def __init__(self, kernel_name, random_state=None): + # kernel_name is for human readers only. + self.name = kernel_name + self.random_state = random_state + + if not random_state: + raise ValueError( + "A valid, seeded np.random.RandomState object is required " + "for the Kernel", + self.name, + ) + sys.exit() + + # A single message queue to keep everything organized by increasing + # delivery timestamp. + self.messages = queue.PriorityQueue() + + # currentTime is None until after kernelStarting() event completes + # for all agents. This is a pd.Timestamp that includes the date. + self.currentTime = None + + # Timestamp at which the Kernel was created. Primarily used to + # create a unique log directory for this run. Also used to + # print some elapsed time and messages per second statistics. + self.kernelWallClockStart = pd.Timestamp("now") + + # TODO: This is financial, and so probably should not be here... + self.meanResultByAgentType = {} + self.agentCountByType = {} + + # The Kernel maintains a summary log to which agents can write + # information that should be centralized for very fast access + # by separate statistical summary programs. Detailed event + # logging should go only to the agent's individual log. This + # is for things like "final position value" and such. + self.summaryLog = [] + + log_print("Kernel initialized: {}", self.name) + + # This is called to actually start the simulation, once all agent + # configuration is done. + def runner( + self, + agents=[], + startTime=None, + stopTime=None, + num_simulations=1, + defaultComputationDelay=1, + defaultLatency=1, + agentLatency=None, + latencyNoise=[1.0], + agentLatencyModel=None, + skip_log=False, + seed=None, + oracle=None, + log_dir=None, + ): + + # agents must be a list of agents for the simulation, + # based on class agent.Agent + self.agents = agents + + # Simulation custom state in a freeform dictionary. Allows config files + # that drive multiple simulations, or require the ability to generate + # special logs after simulation, to obtain needed output without special + # case code in the Kernel. Per-agent state should be handled using the + # provided updateAgentState() method. + self.custom_state = {} + + # The kernel start and stop time (first and last timestamp in + # the simulation, separate from anything like exchange open/close). + self.startTime = startTime + self.stopTime = stopTime + + # The global seed, NOT used for anything agent-related. + self.seed = seed + + # Should the Kernel skip writing agent logs? + self.skip_log = skip_log + + # The data oracle for this simulation, if needed. + self.oracle = oracle + + # If a log directory was not specified, use the initial wallclock. + if log_dir: + self.log_dir = log_dir + else: + self.log_dir = str(int(self.kernelWallClockStart.timestamp())) + + # The kernel maintains a current time for each agent to allow + # simulation of per-agent computation delays. The agent's time + # is pushed forward (see below) each time it awakens, and it + # cannot receive new messages/wakeups until the global time + # reaches the agent's time. (i.e. it cannot act again while + # it is still "in the future") + + # This also nicely enforces agents being unable to act before + # the simulation startTime. + self.agentCurrentTimes = [self.startTime] * len(agents) + + # agentComputationDelays is in nanoseconds, starts with a default + # value from config, and can be changed by any agent at any time + # (for itself only). It represents the time penalty applied to + # an agent each time it is awakened (wakeup or recvMsg). The + # penalty applies _after_ the agent acts, before it may act again. + # TODO: this might someday change to pd.Timedelta objects. + self.agentComputationDelays = [defaultComputationDelay] * len(agents) + + # If an agentLatencyModel is defined, it will be used instead of + # the older, non-model-based attributes. + self.agentLatencyModel = agentLatencyModel + + # If an agentLatencyModel is NOT defined, the older parameters: + # agentLatency (or defaultLatency) and latencyNoise should be specified. + # These should be considered deprecated and will be removed in the future. + + # If agentLatency is not defined, define it using the defaultLatency. + # This matrix defines the communication delay between every pair of + # agents. + if agentLatency is None: + self.agentLatency = [[defaultLatency] * len(agents)] * len(agents) + else: + self.agentLatency = agentLatency + + # There is a noise model for latency, intended to be a one-sided + # distribution with the peak at zero. By default there is no noise + # (100% chance to add zero ns extra delay). Format is a list with + # list index = ns extra delay, value = probability of this delay. + self.latencyNoise = latencyNoise + + # The kernel maintains an accumulating additional delay parameter + # for the current agent. This is applied to each message sent + # and upon return from wakeup/receiveMessage, in addition to the + # agent's standard computation delay. However, it never carries + # over to future wakeup/receiveMessage calls. It is useful for + # staggering of sent messages. self.currentAgentAdditionalDelay = 0 - # Dispatch message to agent. - if msg_type == MessageType.WAKEUP: - - # Who requested this wakeup call? - agent = msg_recipient - - # Test to see if the agent is already in the future. If so, - # delay the wakeup until the agent can act again. - if self.agentCurrentTimes[agent] > self.currentTime: - # Push the wakeup call back into the PQ with a new time. - self.messages.put((self.agentCurrentTimes[agent], - (msg_recipient, msg_type, msg))) - log_print ("Agent in future: wakeup requeued for {}", - self.fmtTime(self.agentCurrentTimes[agent])) - continue - - # Set agent's current time to global current time for start - # of processing. - self.agentCurrentTimes[agent] = self.currentTime - - # Wake the agent. - agents[agent].wakeup(self.currentTime) - - # Delay the agent by its computation delay plus any transient additional delay requested. - self.agentCurrentTimes[agent] += pd.Timedelta(self.agentComputationDelays[agent] + - self.currentAgentAdditionalDelay) - - log_print ("After wakeup return, agent {} delayed from {} to {}", - agent, self.fmtTime(self.currentTime), self.fmtTime(self.agentCurrentTimes[agent])) - - elif msg_type == MessageType.MESSAGE: - - # Who is receiving this message? - agent = msg_recipient - - # Test to see if the agent is already in the future. If so, - # delay the message until the agent can act again. - if self.agentCurrentTimes[agent] > self.currentTime: - # Push the message back into the PQ with a new time. - self.messages.put((self.agentCurrentTimes[agent], - (msg_recipient, msg_type, msg))) - log_print ("Agent in future: message requeued for {}", - self.fmtTime(self.agentCurrentTimes[agent])) - continue - - # Set agent's current time to global current time for start - # of processing. - self.agentCurrentTimes[agent] = self.currentTime - - # Deliver the message. - agents[agent].receiveMessage(self.currentTime, msg) - - # Delay the agent by its computation delay plus any transient additional delay requested. - self.agentCurrentTimes[agent] += pd.Timedelta(self.agentComputationDelays[agent] + - self.currentAgentAdditionalDelay) - - log_print ("After receiveMessage return, agent {} delayed from {} to {}", - agent, self.fmtTime(self.currentTime), self.fmtTime(self.agentCurrentTimes[agent])) - + log_print("Kernel started: {}", self.name) + log_print("Simulation started!") + + # Note that num_simulations has not yet been really used or tested + # for anything. Instead we have been running multiple simulations + # with coarse parallelization from a shell script. + for sim in range(num_simulations): + log_print("Starting sim {}", sim) + + # Event notification for kernel init (agents should not try to + # communicate with other agents, as order is unknown). Agents + # should initialize any internal resources that may be needed + # to communicate with other agents during agent.kernelStarting(). + # Kernel passes self-reference for agents to retain, so they can + # communicate with the kernel in the future (as it does not have + # an agentID). + log_print("\n--- Agent.kernelInitializing() ---") + for agent in self.agents: + agent.kernelInitializing(self) + + # Event notification for kernel start (agents may set up + # communications or references to other agents, as all agents + # are guaranteed to exist now). Agents should obtain references + # to other agents they require for proper operation (exchanges, + # brokers, subscription services...). Note that we generally + # don't (and shouldn't) permit agents to get direct references + # to other agents (like the exchange) as they could then bypass + # the Kernel, and therefore simulation "physics" to send messages + # directly and instantly or to perform disallowed direct inspection + # of the other agent's state. Agents should instead obtain the + # agent ID of other agents, and communicate with them only via + # the Kernel. Direct references to utility objects that are not + # agents are acceptable (e.g. oracles). + log_print("\n--- Agent.kernelStarting() ---") + for agent in self.agents: + agent.kernelStarting(self.startTime) + + # Set the kernel to its startTime. + self.currentTime = self.startTime + log_print("\n--- Kernel Clock started ---") + log_print("Kernel.currentTime is now {}", self.currentTime) + + # Start processing the Event Queue. + log_print("\n--- Kernel Event Queue begins ---") + log_print( + "Kernel will start processing messages. Queue length: {}", + len(self.messages.queue), + ) + + # Track starting wall clock time and total message count for stats at the end. + eventQueueWallClockStart = pd.Timestamp("now") + ttl_messages = 0 + + # Process messages until there aren't any (at which point there never can + # be again, because agents only "wake" in response to messages), or until + # the kernel stop time is reached. + while not self.messages.empty() and self.currentTime and (self.currentTime <= self.stopTime): + # Get the next message in timestamp order (delivery time) and extract it. + self.currentTime, event = self.messages.get() + msg_recipient, msg_type, msg = event + + # Periodically print the simulation time and total messages, even if muted. + if ttl_messages % 100000 == 0: + print( + "\n--- Simulation time: {}, messages processed: {}, wallclock elapsed: {} ---\n".format( + self.fmtTime(self.currentTime), + ttl_messages, + pd.Timestamp("now") - eventQueueWallClockStart, + ) + ) + + log_print("\n--- Kernel Event Queue pop ---") + log_print( + "Kernel handling {} message for agent {} at time {}", + msg_type, + msg_recipient, + self.fmtTime(self.currentTime), + ) + + ttl_messages += 1 + + # In between messages, always reset the currentAgentAdditionalDelay. + self.currentAgentAdditionalDelay = 0 + + # Dispatch message to agent. + if msg_type == MessageType.WAKEUP: + + # Who requested this wakeup call? + agent = msg_recipient + + # Test to see if the agent is already in the future. If so, + # delay the wakeup until the agent can act again. + if self.agentCurrentTimes[agent] > self.currentTime: + # Push the wakeup call back into the PQ with a new time. + self.messages.put( + ( + self.agentCurrentTimes[agent], + (msg_recipient, msg_type, msg), + ) + ) + log_print( + "Agent in future: wakeup requeued for {}", + self.fmtTime(self.agentCurrentTimes[agent]), + ) + continue + + # Set agent's current time to global current time for start + # of processing. + self.agentCurrentTimes[agent] = self.currentTime + + # Wake the agent. + agents[agent].wakeup(self.currentTime) + + # Delay the agent by its computation delay plus any transient additional delay requested. + self.agentCurrentTimes[agent] += pd.Timedelta( + self.agentComputationDelays[agent] + self.currentAgentAdditionalDelay + ) + + log_print( + "After wakeup return, agent {} delayed from {} to {}", + agent, + self.fmtTime(self.currentTime), + self.fmtTime(self.agentCurrentTimes[agent]), + ) + + elif msg_type == MessageType.MESSAGE: + + # Who is receiving this message? + agent = msg_recipient + + # Test to see if the agent is already in the future. If so, + # delay the message until the agent can act again. + if self.agentCurrentTimes[agent] > self.currentTime: + # Push the message back into the PQ with a new time. + self.messages.put( + ( + self.agentCurrentTimes[agent], + (msg_recipient, msg_type, msg), + ) + ) + log_print( + "Agent in future: message requeued for {}", + self.fmtTime(self.agentCurrentTimes[agent]), + ) + continue + + # Set agent's current time to global current time for start + # of processing. + self.agentCurrentTimes[agent] = self.currentTime + + # Deliver the message. + agents[agent].receiveMessage(self.currentTime, msg) + + # Delay the agent by its computation delay plus any transient additional delay requested. + self.agentCurrentTimes[agent] += pd.Timedelta( + self.agentComputationDelays[agent] + self.currentAgentAdditionalDelay + ) + + log_print( + "After receiveMessage return, agent {} delayed from {} to {}", + agent, + self.fmtTime(self.currentTime), + self.fmtTime(self.agentCurrentTimes[agent]), + ) + + else: + raise ValueError( + "Unknown message type found in queue", + "currentTime:", + self.currentTime, + "messageType:", + self.msg.type, + ) + + if self.messages.empty(): + log_print("\n--- Kernel Event Queue empty ---") + + if self.currentTime and (self.currentTime > self.stopTime): + log_print("\n--- Kernel Stop Time surpassed ---") + + # Record wall clock stop time and elapsed time for stats at the end. + eventQueueWallClockStop = pd.Timestamp("now") + + eventQueueWallClockElapsed = eventQueueWallClockStop - eventQueueWallClockStart + + # Event notification for kernel end (agents may communicate with + # other agents, as all agents are still guaranteed to exist). + # Agents should not destroy resources they may need to respond + # to final communications from other agents. + log_print("\n--- Agent.kernelStopping() ---") + for agent in agents: + agent.kernelStopping() + + # Event notification for kernel termination (agents should not + # attempt communication with other agents, as order of termination + # is unknown). Agents should clean up all used resources as the + # simulation program may not actually terminate if num_simulations > 1. + log_print("\n--- Agent.kernelTerminating() ---") + for agent in agents: + agent.kernelTerminating() + + print( + "Event Queue elapsed: {}, messages: {}, messages per second: {:0.1f}".format( + eventQueueWallClockElapsed, + ttl_messages, + ttl_messages / (eventQueueWallClockElapsed / (np.timedelta64(1, "s"))), + ) + ) + log_print("Ending sim {}", sim) + + # The Kernel adds a handful of custom state results for all simulations, + # which configurations may use, print, log, or discard. + self.custom_state["kernel_event_queue_elapsed_wallclock"] = eventQueueWallClockElapsed + self.custom_state["kernel_slowest_agent_finish_time"] = max(self.agentCurrentTimes) + + # Agents will request the Kernel to serialize their agent logs, usually + # during kernelTerminating, but the Kernel must write out the summary + # log itself. + self.writeSummaryLog() + + # This should perhaps be elsewhere, as it is explicitly financial, but it + # is convenient to have a quick summary of the results for now. + print("Mean ending value by agent type:") + for a in self.meanResultByAgentType: + value = self.meanResultByAgentType[a] + count = self.agentCountByType[a] + print("{}: {:d}".format(a, int(round(value / count)))) + + print("Simulation ending!") + + return self.custom_state + + def sendMessage(self, sender=None, recipient=None, msg=None, delay=0): + # Called by an agent to send a message to another agent. The kernel + # supplies its own currentTime (i.e. "now") to prevent possible + # abuse by agents. The kernel will handle computational delay penalties + # and/or network latency. The message must derive from the message.Message class. + # The optional delay parameter represents an agent's request for ADDITIONAL + # delay (beyond the Kernel's mandatory computation + latency delays) to represent + # parallel pipeline processing delays (that should delay the transmission of messages + # but do not make the agent "busy" and unable to respond to new messages). + + if sender is None: + raise ValueError( + "sendMessage() called without valid sender ID", + "sender:", + sender, + "recipient:", + recipient, + "msg:", + msg, + ) + + if recipient is None: + raise ValueError( + "sendMessage() called without valid recipient ID", + "sender:", + sender, + "recipient:", + recipient, + "msg:", + msg, + ) + + if msg is None: + raise ValueError( + "sendMessage() called with message == None", + "sender:", + sender, + "recipient:", + recipient, + "msg:", + msg, + ) + + # Apply the agent's current computation delay to effectively "send" the message + # at the END of the agent's current computation period when it is done "thinking". + # NOTE: sending multiple messages on a single wake will transmit all at the same + # time, at the end of computation. To avoid this, use Agent.delay() to accumulate + # a temporary delay (current cycle only) that will also stagger messages. + + # The optional pipeline delay parameter DOES push the send time forward, since it + # represents "thinking" time before the message would be sent. We don't use this + # for much yet, but it could be important later. + + # This means message delay (before latency) is the agent's standard computation delay + # PLUS any accumulated delay for this wake cycle PLUS any one-time requested delay + # for this specific message only. + sentTime = self.currentTime + pd.Timedelta( + self.agentComputationDelays[sender] + self.currentAgentAdditionalDelay + delay + ) + + # Apply communication delay per the agentLatencyModel, if defined, or the + # agentLatency matrix [sender][recipient] otherwise. + if self.agentLatencyModel is not None: + latency = self.agentLatencyModel.get_latency(sender_id=sender, recipient_id=recipient) + deliverAt = sentTime + pd.Timedelta(latency) + log_print( + "Kernel applied latency {}, accumulated delay {}, one-time delay {} on sendMessage from: {} to {}, scheduled for {}", + latency, + self.currentAgentAdditionalDelay, + delay, + self.agents[sender].name, + self.agents[recipient].name, + self.fmtTime(deliverAt), + ) else: - raise ValueError("Unknown message type found in queue", - "currentTime:", self.currentTime, - "messageType:", self.msg.type) - - if self.messages.empty(): - log_print ("\n--- Kernel Event Queue empty ---") - - if self.currentTime and (self.currentTime > self.stopTime): - log_print ("\n--- Kernel Stop Time surpassed ---") - - # Record wall clock stop time and elapsed time for stats at the end. - eventQueueWallClockStop = pd.Timestamp('now') - - eventQueueWallClockElapsed = eventQueueWallClockStop - eventQueueWallClockStart - - # Event notification for kernel end (agents may communicate with - # other agents, as all agents are still guaranteed to exist). - # Agents should not destroy resources they may need to respond - # to final communications from other agents. - log_print ("\n--- Agent.kernelStopping() ---") - for agent in agents: - agent.kernelStopping() - - # Event notification for kernel termination (agents should not - # attempt communication with other agents, as order of termination - # is unknown). Agents should clean up all used resources as the - # simulation program may not actually terminate if num_simulations > 1. - log_print ("\n--- Agent.kernelTerminating() ---") - for agent in agents: - agent.kernelTerminating() - - print ("Event Queue elapsed: {}, messages: {}, messages per second: {:0.1f}".format( - eventQueueWallClockElapsed, ttl_messages, - ttl_messages / (eventQueueWallClockElapsed / (np.timedelta64(1, 's'))))) - log_print ("Ending sim {}", sim) - - - # The Kernel adds a handful of custom state results for all simulations, - # which configurations may use, print, log, or discard. - self.custom_state['kernel_event_queue_elapsed_wallclock'] = eventQueueWallClockElapsed - self.custom_state['kernel_slowest_agent_finish_time'] = max(self.agentCurrentTimes) - - # Agents will request the Kernel to serialize their agent logs, usually - # during kernelTerminating, but the Kernel must write out the summary - # log itself. - self.writeSummaryLog() - - # This should perhaps be elsewhere, as it is explicitly financial, but it - # is convenient to have a quick summary of the results for now. - print ("Mean ending value by agent type:") - for a in self.meanResultByAgentType: - value = self.meanResultByAgentType[a] - count = self.agentCountByType[a] - print ("{}: {:d}".format(a, int(round(value / count)))) - - print ("Simulation ending!") - - return self.custom_state - - - def sendMessage(self, sender = None, recipient = None, msg = None, delay = 0): - # Called by an agent to send a message to another agent. The kernel - # supplies its own currentTime (i.e. "now") to prevent possible - # abuse by agents. The kernel will handle computational delay penalties - # and/or network latency. The message must derive from the message.Message class. - # The optional delay parameter represents an agent's request for ADDITIONAL - # delay (beyond the Kernel's mandatory computation + latency delays) to represent - # parallel pipeline processing delays (that should delay the transmission of messages - # but do not make the agent "busy" and unable to respond to new messages). - - if sender is None: - raise ValueError("sendMessage() called without valid sender ID", - "sender:", sender, "recipient:", recipient, - "msg:", msg) - - if recipient is None: - raise ValueError("sendMessage() called without valid recipient ID", - "sender:", sender, "recipient:", recipient, - "msg:", msg) - - if msg is None: - raise ValueError("sendMessage() called with message == None", - "sender:", sender, "recipient:", recipient, - "msg:", msg) - - # Apply the agent's current computation delay to effectively "send" the message - # at the END of the agent's current computation period when it is done "thinking". - # NOTE: sending multiple messages on a single wake will transmit all at the same - # time, at the end of computation. To avoid this, use Agent.delay() to accumulate - # a temporary delay (current cycle only) that will also stagger messages. - - # The optional pipeline delay parameter DOES push the send time forward, since it - # represents "thinking" time before the message would be sent. We don't use this - # for much yet, but it could be important later. - - # This means message delay (before latency) is the agent's standard computation delay - # PLUS any accumulated delay for this wake cycle PLUS any one-time requested delay - # for this specific message only. - sentTime = self.currentTime + pd.Timedelta(self.agentComputationDelays[sender] + - self.currentAgentAdditionalDelay + delay) - - # Apply communication delay per the agentLatencyModel, if defined, or the - # agentLatency matrix [sender][recipient] otherwise. - if self.agentLatencyModel is not None: - latency = self.agentLatencyModel.get_latency(sender_id = sender, recipient_id = recipient) - deliverAt = sentTime + pd.Timedelta(latency) - log_print ("Kernel applied latency {}, accumulated delay {}, one-time delay {} on sendMessage from: {} to {}, scheduled for {}", - latency, self.currentAgentAdditionalDelay, delay, self.agents[sender].name, self.agents[recipient].name, - self.fmtTime(deliverAt)) - else: - latency = self.agentLatency[sender][recipient] - noise = self.random_state.choice(len(self.latencyNoise), 1, self.latencyNoise)[0] - deliverAt = sentTime + pd.Timedelta(latency + noise) - log_print ("Kernel applied latency {}, noise {}, accumulated delay {}, one-time delay {} on sendMessage from: {} to {}, scheduled for {}", - latency, noise, self.currentAgentAdditionalDelay, delay, self.agents[sender].name, self.agents[recipient].name, - self.fmtTime(deliverAt)) - - # Finally drop the message in the queue with priority == delivery time. - self.messages.put((deliverAt, (recipient, MessageType.MESSAGE, msg))) - - log_print ("Sent time: {}, current time {}, computation delay {}", sentTime, self.currentTime, self.agentComputationDelays[sender]) - log_print ("Message queued: {}", msg) - - - - def setWakeup(self, sender = None, requestedTime = None): - # Called by an agent to receive a "wakeup call" from the kernel - # at some requested future time. Defaults to the next possible - # timestamp. Wakeup time cannot be the current time or a past time. - # Sender is required and should be the ID of the agent making the call. - # The agent is responsible for maintaining any required state; the - # kernel will not supply any parameters to the wakeup() call. - - if requestedTime is None: - requestedTime = self.currentTime + pd.TimeDelta(1) - - if sender is None: - raise ValueError("setWakeup() called without valid sender ID", - "sender:", sender, "requestedTime:", requestedTime) - - if self.currentTime and (requestedTime < self.currentTime): - raise ValueError("setWakeup() called with requested time not in future", - "currentTime:", self.currentTime, - "requestedTime:", requestedTime) - - log_print ("Kernel adding wakeup for agent {} at time {}", - sender, self.fmtTime(requestedTime)) - - self.messages.put((requestedTime, - (sender, MessageType.WAKEUP, None))) - - - def getAgentComputeDelay(self, sender = None): - # Allows an agent to query its current computation delay. - return self.agentComputationDelays[sender] - - - def setAgentComputeDelay(self, sender = None, requestedDelay = None): - # Called by an agent to update its computation delay. This does - # not initiate a global delay, nor an immediate delay for the - # agent. Rather it sets the new default delay for the calling - # agent. The delay will be applied upon every return from wakeup - # or recvMsg. Note that this delay IS applied to any messages - # sent by the agent during the current wake cycle (simulating the - # messages popping out at the end of its "thinking" time). - - # Also note that we DO permit a computation delay of zero, but this should - # really only be used for special or massively parallel agents. - - # requestedDelay should be in whole nanoseconds. - if not type(requestedDelay) is int: - raise ValueError("Requested computation delay must be whole nanoseconds.", - "requestedDelay:", requestedDelay) - - # requestedDelay must be non-negative. - if not requestedDelay >= 0: - raise ValueError("Requested computation delay must be non-negative nanoseconds.", - "requestedDelay:", requestedDelay) - - self.agentComputationDelays[sender] = requestedDelay - - - - def delayAgent(self, sender = None, additionalDelay = None): - # Called by an agent to accumulate temporary delay for the current wake cycle. - # This will apply the total delay (at time of sendMessage) to each message, - # and will modify the agent's next available time slot. These happen on top - # of the agent's compute delay BUT DO NOT ALTER IT. (i.e. effects are transient) - # Mostly useful for staggering outbound messages. - - # additionalDelay should be in whole nanoseconds. - if not type(additionalDelay) is int: - raise ValueError("Additional delay must be whole nanoseconds.", - "additionalDelay:", additionalDelay) - - # additionalDelay must be non-negative. - if not additionalDelay >= 0: - raise ValueError("Additional delay must be non-negative nanoseconds.", - "additionalDelay:", additionalDelay) - - self.currentAgentAdditionalDelay += additionalDelay - - - - def findAgentByType(self, type = None): - # Called to request an arbitrary agent ID that matches the class or base class - # passed as "type". For example, any ExchangeAgent, or any NasdaqExchangeAgent. - # This method is rather expensive, so the results should be cached by the caller! - - for agent in self.agents: - if isinstance(agent, type): - return agent.id - - - def writeLog (self, sender, dfLog, filename=None): - # Called by any agent, usually at the very end of the simulation just before - # kernel shutdown, to write to disk any log dataframe it has been accumulating - # during simulation. The format can be decided by the agent, although changes - # will require a special tool to read and parse the logs. The Kernel places - # the log in a unique directory per run, with one filename per agent, also - # decided by the Kernel using agent type, id, etc. - - # If there are too many agents, placing all these files in a directory might - # be unfortunate. Also if there are too many agents, or if the logs are too - # large, memory could become an issue. In this case, we might have to take - # a speed hit to write logs incrementally. - - # If filename is not None, it will be used as the filename. Otherwise, - # the Kernel will construct a filename based on the name of the Agent - # requesting log archival. - - if self.skip_log: return - - path = os.path.join(".", "log", self.log_dir) - - if filename: - file = "{}.bz2".format(filename) - else: - file = "{}.bz2".format(self.agents[sender].name.replace(" ","")) - - if not os.path.exists(path): - os.makedirs(path) - - dfLog.to_pickle(os.path.join(path, file), compression='bz2') - + latency = self.agentLatency[sender][recipient] + noise = self.random_state.choice(len(self.latencyNoise), 1, self.latencyNoise)[0] + deliverAt = sentTime + pd.Timedelta(latency + noise) + log_print( + "Kernel applied latency {}, noise {}, accumulated delay {}, one-time delay {} on sendMessage from: {} to {}, scheduled for {}", + latency, + noise, + self.currentAgentAdditionalDelay, + delay, + self.agents[sender].name, + self.agents[recipient].name, + self.fmtTime(deliverAt), + ) + + # Finally drop the message in the queue with priority == delivery time. + self.messages.put((deliverAt, (recipient, MessageType.MESSAGE, msg))) + + log_print( + "Sent time: {}, current time {}, computation delay {}", + sentTime, + self.currentTime, + self.agentComputationDelays[sender], + ) + log_print("Message queued: {}", msg) + + def setWakeup(self, sender=None, requestedTime=None): + # Called by an agent to receive a "wakeup call" from the kernel + # at some requested future time. Defaults to the next possible + # timestamp. Wakeup time cannot be the current time or a past time. + # Sender is required and should be the ID of the agent making the call. + # The agent is responsible for maintaining any required state; the + # kernel will not supply any parameters to the wakeup() call. + + if requestedTime is None: + requestedTime = self.currentTime + pd.TimeDelta(1) + + if sender is None: + raise ValueError( + "setWakeup() called without valid sender ID", + "sender:", + sender, + "requestedTime:", + requestedTime, + ) + + if self.currentTime and (requestedTime < self.currentTime): + raise ValueError( + "setWakeup() called with requested time not in future", + "currentTime:", + self.currentTime, + "requestedTime:", + requestedTime, + ) + + log_print( + "Kernel adding wakeup for agent {} at time {}", + sender, + self.fmtTime(requestedTime), + ) + + self.messages.put((requestedTime, (sender, MessageType.WAKEUP, None))) + + def getAgentComputeDelay(self, sender=None): + # Allows an agent to query its current computation delay. + return self.agentComputationDelays[sender] + + def setAgentComputeDelay(self, sender=None, requestedDelay=None): + # Called by an agent to update its computation delay. This does + # not initiate a global delay, nor an immediate delay for the + # agent. Rather it sets the new default delay for the calling + # agent. The delay will be applied upon every return from wakeup + # or recvMsg. Note that this delay IS applied to any messages + # sent by the agent during the current wake cycle (simulating the + # messages popping out at the end of its "thinking" time). + + # Also note that we DO permit a computation delay of zero, but this should + # really only be used for special or massively parallel agents. + + # requestedDelay should be in whole nanoseconds. + if not type(requestedDelay) is int: + raise ValueError( + "Requested computation delay must be whole nanoseconds.", + "requestedDelay:", + requestedDelay, + ) + + # requestedDelay must be non-negative. + if not requestedDelay >= 0: + raise ValueError( + "Requested computation delay must be non-negative nanoseconds.", + "requestedDelay:", + requestedDelay, + ) + + self.agentComputationDelays[sender] = requestedDelay + + def delayAgent(self, sender=None, additionalDelay=None): + # Called by an agent to accumulate temporary delay for the current wake cycle. + # This will apply the total delay (at time of sendMessage) to each message, + # and will modify the agent's next available time slot. These happen on top + # of the agent's compute delay BUT DO NOT ALTER IT. (i.e. effects are transient) + # Mostly useful for staggering outbound messages. + + # additionalDelay should be in whole nanoseconds. + if not type(additionalDelay) is int: + raise ValueError( + "Additional delay must be whole nanoseconds.", + "additionalDelay:", + additionalDelay, + ) + + # additionalDelay must be non-negative. + if not additionalDelay >= 0: + raise ValueError( + "Additional delay must be non-negative nanoseconds.", + "additionalDelay:", + additionalDelay, + ) + + self.currentAgentAdditionalDelay += additionalDelay + + def findAgentByType(self, type=None): + # Called to request an arbitrary agent ID that matches the class or base class + # passed as "type". For example, any ExchangeAgent, or any NasdaqExchangeAgent. + # This method is rather expensive, so the results should be cached by the caller! + + for agent in self.agents: + if isinstance(agent, type): + return agent.id + + def writeLog(self, sender, dfLog, filename=None): + # Called by any agent, usually at the very end of the simulation just before + # kernel shutdown, to write to disk any log dataframe it has been accumulating + # during simulation. The format can be decided by the agent, although changes + # will require a special tool to read and parse the logs. The Kernel places + # the log in a unique directory per run, with one filename per agent, also + # decided by the Kernel using agent type, id, etc. + + # If there are too many agents, placing all these files in a directory might + # be unfortunate. Also if there are too many agents, or if the logs are too + # large, memory could become an issue. In this case, we might have to take + # a speed hit to write logs incrementally. + + # If filename is not None, it will be used as the filename. Otherwise, + # the Kernel will construct a filename based on the name of the Agent + # requesting log archival. + + if self.skip_log: + return + + path = os.path.join(".", "log", self.log_dir) + + if filename: + file = "{}.bz2".format(filename) + else: + file = "{}.bz2".format(self.agents[sender].name.replace(" ", "")) - def appendSummaryLog (self, sender, eventType, event): - # We don't even include a timestamp, because this log is for one-time-only - # summary reporting, like starting cash, or ending cash. - self.summaryLog.append({ 'AgentID' : sender, - 'AgentStrategy' : self.agents[sender].type, - 'EventType' : eventType, 'Event' : event }) + if not os.path.exists(path): + os.makedirs(path) + dfLog.to_pickle(os.path.join(path, file), compression="bz2") - def writeSummaryLog (self): - path = os.path.join(".", "log", self.log_dir) - file = "summary_log.bz2" + def appendSummaryLog(self, sender, eventType, event): + # We don't even include a timestamp, because this log is for one-time-only + # summary reporting, like starting cash, or ending cash. + self.summaryLog.append( + { + "AgentID": sender, + "AgentStrategy": self.agents[sender].type, + "EventType": eventType, + "Event": event, + } + ) - if not os.path.exists(path): - os.makedirs(path) + def writeSummaryLog(self): + path = os.path.join(".", "log", self.log_dir) + file = "summary_log.bz2" - dfLog = pd.DataFrame(self.summaryLog) + if not os.path.exists(path): + os.makedirs(path) - dfLog.to_pickle(os.path.join(path, file), compression='bz2') + dfLog = pd.DataFrame(self.summaryLog) + dfLog.to_pickle(os.path.join(path, file), compression="bz2") - def updateAgentState (self, agent_id, state): - """ Called by an agent that wishes to replace its custom state in the dictionary + def updateAgentState(self, agent_id, state): + """Called by an agent that wishes to replace its custom state in the dictionary the Kernel will return at the end of simulation. Shared state must be set directly, and agents should coordinate that non-destructively. Note that it is never necessary to use this kernel state dictionary for an agent to remember information about itself, only to report it back to the config file. - """ - - if 'agent_state' not in self.custom_state: self.custom_state['agent_state'] = {} - self.custom_state['agent_state'][agent_id] = state - - - @staticmethod - def fmtTime(simulationTime): - # The Kernel class knows how to pretty-print time. It is assumed simulationTime - # is in nanoseconds since midnight. Note this is a static method which can be - # called either on the class or an instance. - - # Try just returning the pd.Timestamp now. - return (simulationTime) - - ns = simulationTime - hr = int(ns / (1000000000 * 60 * 60)) - ns -= (hr * 1000000000 * 60 * 60) - m = int(ns / (1000000000 * 60)) - ns -= (m * 1000000000 * 60) - s = int(ns / 1000000000) - ns = int(ns - (s * 1000000000)) - - return "{:02d}:{:02d}:{:02d}.{:09d}".format(hr, m, s, ns) - + """ + + if "agent_state" not in self.custom_state: + self.custom_state["agent_state"] = {} + self.custom_state["agent_state"][agent_id] = state + + @staticmethod + def fmtTime(simulationTime): + # The Kernel class knows how to pretty-print time. It is assumed simulationTime + # is in nanoseconds since midnight. Note this is a static method which can be + # called either on the class or an instance. + + # Try just returning the pd.Timestamp now. + return simulationTime + + ns = simulationTime + hr = int(ns / (1000000000 * 60 * 60)) + ns -= hr * 1000000000 * 60 * 60 + m = int(ns / (1000000000 * 60)) + ns -= m * 1000000000 * 60 + s = int(ns / 1000000000) + ns = int(ns - (s * 1000000000)) + + return "{:02d}:{:02d}:{:02d}.{:09d}".format(hr, m, s, ns) diff --git a/abides.py b/abides.py index 4548c4df7..8cc26f5b4 100644 --- a/abides.py +++ b/abides.py @@ -2,30 +2,30 @@ import importlib import sys -if __name__ == '__main__': +if __name__ == "__main__": - # Print system banner. - system_name = "ABIDES: Agent-Based Interactive Discrete Event Simulation" + # Print system banner. + system_name = "ABIDES: Agent-Based Interactive Discrete Event Simulation" - print ("=" * len(system_name)) - print (system_name) - print ("=" * len(system_name)) - print () + print("=" * len(system_name)) + print(system_name) + print("=" * len(system_name)) + print() - # Test command line parameters. Only peel off the config file. - # Anything else should be left FOR the config file to consume as agent - # or experiment parameterization. - parser = argparse.ArgumentParser(description='Simulation configuration.') - parser.add_argument('-c', '--config', required=True, - help='Name of config file to execute') - parser.add_argument('--config-help', action='store_true', - help='Print argument options for the specific config file.') + # Test command line parameters. Only peel off the config file. + # Anything else should be left FOR the config file to consume as agent + # or experiment parameterization. + parser = argparse.ArgumentParser(description="Simulation configuration.") + parser.add_argument("-c", "--config", required=True, help="Name of config file to execute") + parser.add_argument( + "--config-help", + action="store_true", + help="Print argument options for the specific config file.", + ) - args, config_args = parser.parse_known_args() + args, config_args = parser.parse_known_args() - # First parameter supplied is config file. - config_file = args.config - - config = importlib.import_module('config.{}'.format(config_file), - package=None) + # First parameter supplied is config file. + config_file = args.config + config = importlib.import_module("config.{}".format(config_file), package=None) diff --git a/agent/Agent.py b/agent/Agent.py index a4d561468..e053f6e99 100644 --- a/agent/Agent.py +++ b/agent/Agent.py @@ -1,185 +1,191 @@ -import pandas as pd - from copy import deepcopy -from util.util import log_print - -class Agent: - - def __init__ (self, id, name, type, random_state, log_to_file=True): - - # ID must be a unique number (usually autoincremented). - # Name is for human consumption, should be unique (often type + number). - # Type is for machine aggregation of results, should be same for all - # agents following the same strategy (incl. parameter settings). - # Every agent is given a random state to use for any stochastic needs. - # This is an np.random.RandomState object, already seeded. - self.id = id - self.name = name - self.type = type - self.log_to_file = log_to_file - self.random_state = random_state - - if not random_state: - raise ValueError("A valid, seeded np.random.RandomState object is required " + - "for every agent.Agent", self.name) - sys.exit() - - # Kernel is supplied via kernelInitializing method of kernel lifecycle. - self.kernel = None - - # What time does the agent think it is? Should be updated each time - # the agent wakes via wakeup or receiveMessage. (For convenience - # of reference throughout the Agent class hierarchy, NOT THE - # CANONICAL TIME.) - self.currentTime = None - - # Agents may choose to maintain a log. During simulation, - # it should be stored as a list of dictionaries. The expected - # keys by default are: EventTime, EventType, Event. Other - # Columns may be added, but will then require specializing - # parsing and will increase output dataframe size. If there - # is a non-empty log, it will be written to disk as a Dataframe - # at kernel termination. - - # It might, or might not, make sense to formalize these log Events - # as a class, with enumerated EventTypes and so forth. - self.log = [] - self.logEvent("AGENT_TYPE", type) - - - ### Flow of required kernel listening methods: - ### init -> start -> (entire simulation) -> end -> terminate - - def kernelInitializing (self, kernel): - # Called by kernel one time when simulation first begins. - # No other agents are guaranteed to exist at this time. - - # Kernel reference must be retained, as this is the only time the - # agent can "see" it. - - self.kernel = kernel - - log_print ("{} exists!", self.name) - - - def kernelStarting (self, startTime): - # Called by kernel one time _after_ simulationInitializing. - # All other agents are guaranteed to exist at this time. - # startTime is the earliest time for which the agent can - # schedule a wakeup call (or could receive a message). - - # Base Agent schedules a wakeup call for the first available timestamp. - # Subclass agents may override this behavior as needed. - - log_print ("Agent {} ({}) requesting kernel wakeup at time {}", - self.id, self.name, self.kernel.fmtTime(startTime)) - - self.setWakeup(startTime) - - def kernelStopping (self): - # Called by kernel one time _before_ simulationTerminating. - # All other agents are guaranteed to exist at this time. - - pass - - - def kernelTerminating (self): - # Called by kernel one time when simulation terminates. - # No other agents are guaranteed to exist at this time. - - # If this agent has been maintaining a log, convert it to a Dataframe - # and request that the Kernel write it to disk before terminating. - if self.log and self.log_to_file: - dfLog = pd.DataFrame(self.log) - dfLog.set_index('EventTime', inplace=True) - self.writeLog(dfLog) - - - ### Methods for internal use by agents (e.g. bookkeeping). - - def logEvent (self, eventType, event = '', appendSummaryLog = False): - # Adds an event to this agent's log. The deepcopy of the Event field, - # often an object, ensures later state changes to the object will not - # retroactively update the logged event. - - # We can make a single copy of the object (in case it is an arbitrary - # class instance) for both potential log targets, because we don't - # alter logs once recorded. - e = deepcopy(event) - self.log.append({ 'EventTime' : self.currentTime, 'EventType' : eventType, - 'Event' : e }) - - if appendSummaryLog: self.kernel.appendSummaryLog(self.id, eventType, e) - - - ### Methods required for communication from other agents. - ### The kernel will _not_ call these methods on its own behalf, - ### only to pass traffic from other agents.. - - def receiveMessage (self, currentTime, msg): - # Called each time a message destined for this agent reaches - # the front of the kernel's priority queue. currentTime is - # the simulation time at which the kernel is delivering this - # message -- the agent should treat this as "now". msg is - # an object guaranteed to inherit from the message.Message class. - - self.currentTime = currentTime - - log_print ("At {}, agent {} ({}) received: {}", - self.kernel.fmtTime(currentTime), self.id, self.name, msg) - - - def wakeup (self, currentTime): - # Agents can request a wakeup call at a future simulation time using - # Agent.setWakeup(). This is the method called when the wakeup time - # arrives. - - self.currentTime = currentTime - - log_print ("At {}, agent {} ({}) received wakeup.", - self.kernel.fmtTime(currentTime), self.id, self.name) +import pandas as pd +from util.util import log_print - ### Methods used to request services from the Kernel. These should be used - ### by all agents. Kernel methods should _not_ be called directly! - ### Presently the kernel expects agent IDs only, not agent references. - ### It is possible this could change in the future. Normal agents will - ### not typically wish to request additional delay. - def sendMessage (self, recipientID, msg, delay = 0): - self.kernel.sendMessage(self.id, recipientID, msg, delay = delay) +class Agent: - def setWakeup (self, requestedTime): - self.kernel.setWakeup(self.id, requestedTime) + def __init__(self, id, name, type, random_state, log_to_file=True): + + # ID must be a unique number (usually autoincremented). + # Name is for human consumption, should be unique (often type + number). + # Type is for machine aggregation of results, should be same for all + # agents following the same strategy (incl. parameter settings). + # Every agent is given a random state to use for any stochastic needs. + # This is an np.random.RandomState object, already seeded. + self.id = id + self.name = name + self.type = type + self.log_to_file = log_to_file + self.random_state = random_state + + if not random_state: + raise ValueError( + "A valid, seeded np.random.RandomState object is required " + "for every agent.Agent", + self.name, + ) + sys.exit() + + # Kernel is supplied via kernelInitializing method of kernel lifecycle. + self.kernel = None + + # What time does the agent think it is? Should be updated each time + # the agent wakes via wakeup or receiveMessage. (For convenience + # of reference throughout the Agent class hierarchy, NOT THE + # CANONICAL TIME.) + self.currentTime = None + + # Agents may choose to maintain a log. During simulation, + # it should be stored as a list of dictionaries. The expected + # keys by default are: EventTime, EventType, Event. Other + # Columns may be added, but will then require specializing + # parsing and will increase output dataframe size. If there + # is a non-empty log, it will be written to disk as a Dataframe + # at kernel termination. + + # It might, or might not, make sense to formalize these log Events + # as a class, with enumerated EventTypes and so forth. + self.log = [] + self.logEvent("AGENT_TYPE", type) + + ### Flow of required kernel listening methods: + ### init -> start -> (entire simulation) -> end -> terminate + + def kernelInitializing(self, kernel): + # Called by kernel one time when simulation first begins. + # No other agents are guaranteed to exist at this time. + + # Kernel reference must be retained, as this is the only time the + # agent can "see" it. + + self.kernel = kernel + + log_print("{} exists!", self.name) + + def kernelStarting(self, startTime): + # Called by kernel one time _after_ simulationInitializing. + # All other agents are guaranteed to exist at this time. + # startTime is the earliest time for which the agent can + # schedule a wakeup call (or could receive a message). + + # Base Agent schedules a wakeup call for the first available timestamp. + # Subclass agents may override this behavior as needed. + + log_print( + "Agent {} ({}) requesting kernel wakeup at time {}", + self.id, + self.name, + self.kernel.fmtTime(startTime), + ) + + self.setWakeup(startTime) + + def kernelStopping(self): + # Called by kernel one time _before_ simulationTerminating. + # All other agents are guaranteed to exist at this time. + + pass + + def kernelTerminating(self): + # Called by kernel one time when simulation terminates. + # No other agents are guaranteed to exist at this time. + + # If this agent has been maintaining a log, convert it to a Dataframe + # and request that the Kernel write it to disk before terminating. + if self.log and self.log_to_file: + dfLog = pd.DataFrame(self.log) + dfLog.set_index("EventTime", inplace=True) + self.writeLog(dfLog) + + ### Methods for internal use by agents (e.g. bookkeeping). + + def logEvent(self, eventType, event="", appendSummaryLog=False): + # Adds an event to this agent's log. The deepcopy of the Event field, + # often an object, ensures later state changes to the object will not + # retroactively update the logged event. + + # We can make a single copy of the object (in case it is an arbitrary + # class instance) for both potential log targets, because we don't + # alter logs once recorded. + e = deepcopy(event) + self.log.append({"EventTime": self.currentTime, "EventType": eventType, "Event": e}) + + if appendSummaryLog: + self.kernel.appendSummaryLog(self.id, eventType, e) + + ### Methods required for communication from other agents. + ### The kernel will _not_ call these methods on its own behalf, + ### only to pass traffic from other agents.. + + def receiveMessage(self, currentTime, msg): + # Called each time a message destined for this agent reaches + # the front of the kernel's priority queue. currentTime is + # the simulation time at which the kernel is delivering this + # message -- the agent should treat this as "now". msg is + # an object guaranteed to inherit from the message.Message class. + + self.currentTime = currentTime + + log_print( + "At {}, agent {} ({}) received: {}", + self.kernel.fmtTime(currentTime), + self.id, + self.name, + msg, + ) + + def wakeup(self, currentTime): + # Agents can request a wakeup call at a future simulation time using + # Agent.setWakeup(). This is the method called when the wakeup time + # arrives. + + self.currentTime = currentTime - def getComputationDelay (self): - return self.kernel.getAgentComputeDelay(sender = self.id) + log_print( + "At {}, agent {} ({}) received wakeup.", + self.kernel.fmtTime(currentTime), + self.id, + self.name, + ) - def setComputationDelay (self, requestedDelay): - self.kernel.setAgentComputeDelay(sender = self.id, requestedDelay = requestedDelay) + ### Methods used to request services from the Kernel. These should be used + ### by all agents. Kernel methods should _not_ be called directly! - def delay (self, additionalDelay): - self.kernel.delayAgent(sender = self.id, additionalDelay = additionalDelay) + ### Presently the kernel expects agent IDs only, not agent references. + ### It is possible this could change in the future. Normal agents will + ### not typically wish to request additional delay. + def sendMessage(self, recipientID, msg, delay=0): + self.kernel.sendMessage(self.id, recipientID, msg, delay=delay) - def writeLog (self, dfLog, filename=None): - self.kernel.writeLog(self.id, dfLog, filename) + def setWakeup(self, requestedTime): + self.kernel.setWakeup(self.id, requestedTime) - def updateAgentState (self, state): - """ Agents should use this method to replace their custom state in the dictionary + def getComputationDelay(self): + return self.kernel.getAgentComputeDelay(sender=self.id) + + def setComputationDelay(self, requestedDelay): + self.kernel.setAgentComputeDelay(sender=self.id, requestedDelay=requestedDelay) + + def delay(self, additionalDelay): + self.kernel.delayAgent(sender=self.id, additionalDelay=additionalDelay) + + def writeLog(self, dfLog, filename=None): + self.kernel.writeLog(self.id, dfLog, filename) + + def updateAgentState(self, state): + """Agents should use this method to replace their custom state in the dictionary the Kernel will return to the experimental config file at the end of the simulation. This is intended to be write-only, and agents should not use it to store information for their own later use. - """ - - self.kernel.updateAgentState(self.id, state) - + """ - ### Internal methods that should not be modified without a very good reason. + self.kernel.updateAgentState(self.id, state) - def __lt__(self, other): - # Required by Python3 for this object to be placed in a priority queue. + ### Internal methods that should not be modified without a very good reason. - return ("{}".format(self.id) < - "{}".format(other.id)) + def __lt__(self, other): + # Required by Python3 for this object to be placed in a priority queue. + return "{}".format(self.id) < "{}".format(other.id) diff --git a/agent/ExchangeAgent.py b/agent/ExchangeAgent.py index 7878331b9..855d55caa 100644 --- a/agent/ExchangeAgent.py +++ b/agent/ExchangeAgent.py @@ -4,415 +4,543 @@ # the levels of order stream history to maintain per symbol (maintains all orders that led to the last N trades), # whether to log all order activity to the agent log, and a random state object (already seeded) to use # for stochasticity. +import datetime as dt +import warnings + from agent.FinancialAgent import FinancialAgent from message.Message import Message from util.OrderBook import OrderBook from util.util import log_print -import datetime as dt - -import warnings -warnings.simplefilter(action='ignore', category=FutureWarning) -warnings.simplefilter(action='ignore', category=UserWarning) +warnings.simplefilter(action="ignore", category=FutureWarning) +warnings.simplefilter(action="ignore", category=UserWarning) import pandas as pd -pd.set_option('display.max_rows', 500) + +pd.set_option("display.max_rows", 500) from copy import deepcopy class ExchangeAgent(FinancialAgent): - def __init__(self, id, name, type, mkt_open, mkt_close, symbols, book_freq='S', wide_book=False, pipeline_delay = 40000, - computation_delay = 1, stream_history = 0, log_orders = False, random_state = None): - - super().__init__(id, name, type, random_state) - - # Do not request repeated wakeup calls. - self.reschedule = False - - # Store this exchange's open and close times. - self.mkt_open = mkt_open - self.mkt_close = mkt_close - - # Right now, only the exchange agent has a parallel processing pipeline delay. This is an additional - # delay added only to order activity (placing orders, etc) and not simple inquiries (market operating - # hours, etc). - self.pipeline_delay = pipeline_delay - - # Computation delay is applied on every wakeup call or message received. - self.computation_delay = computation_delay - - # The exchange maintains an order stream of all orders leading to the last L trades - # to support certain agents from the auction literature (GD, HBL, etc). - self.stream_history = stream_history - - # Log all order activity? - self.log_orders = log_orders - - # Create an order book for each symbol. - self.order_books = {} - - for symbol in symbols: - self.order_books[symbol] = OrderBook(self, symbol) - - # At what frequency will we archive the order books for visualization and analysis? - self.book_freq = book_freq - - # Store orderbook in wide format? ONLY WORKS with book_freq == 0 - self.wide_book = wide_book - - # The subscription dict is a dictionary with the key = agent ID, - # value = dict (key = symbol, value = list [levels (no of levels to recieve updates for), - # frequency (min number of ns between messages), last agent update timestamp] - # e.g. {101 : {'AAPL' : [1, 10, pd.Timestamp(10:00:00)}} - self.subscription_dict = {} - - # The exchange agent overrides this to obtain a reference to an oracle. - # This is needed to establish a "last trade price" at open (i.e. an opening - # price) in case agents query last trade before any simulated trades are made. - # This can probably go away once we code the opening cross auction. - def kernelInitializing (self, kernel): - super().kernelInitializing(kernel) - - self.oracle = self.kernel.oracle - - # Obtain opening prices (in integer cents). These are not noisy right now. - for symbol in self.order_books: - try: - self.order_books[symbol].last_trade = self.oracle.getDailyOpenPrice(symbol, self.mkt_open) - log_print ("Opening price for {} is {}", symbol, self.order_books[symbol].last_trade) - except AttributeError as e: - log_print(str(e)) - - - # The exchange agent overrides this to additionally log the full depth of its - # order books for the entire day. - def kernelTerminating (self): - super().kernelTerminating() - - # If the oracle supports writing the fundamental value series for its - # symbols, write them to disk. - if hasattr(self.oracle, 'f_log'): - for symbol in self.oracle.f_log: - dfFund = pd.DataFrame(self.oracle.f_log[symbol]) - if not dfFund.empty: - dfFund.set_index('FundamentalTime', inplace=True) - self.writeLog(dfFund, filename='fundamental_{}'.format(symbol)) - log_print("Fundamental archival complete.") - if self.book_freq is None: return - else: - # Iterate over the order books controlled by this exchange. - for symbol in self.order_books: - start_time = dt.datetime.now() - self.logOrderBookSnapshots(symbol) - end_time = dt.datetime.now() - print("Time taken to log the order book: {}".format(end_time - start_time)) - print("Order book archival complete.") - - def receiveMessage(self, currentTime, msg): - super().receiveMessage(currentTime, msg) - - # Unless the intent of an experiment is to examine computational issues within an Exchange, - # it will typically have either 1 ns delay (near instant but cannot process multiple orders - # in the same atomic time unit) or 0 ns delay (can process any number of orders, always in - # the atomic time unit in which they are received). This is separate from, and additional - # to, any parallel pipeline delay imposed for order book activity. - - # Note that computation delay MUST be updated before any calls to sendMessage. - self.setComputationDelay(self.computation_delay) - - # Is the exchange closed? (This block only affects post-close, not pre-open.) - if currentTime > self.mkt_close: - # Most messages after close will receive a 'MKT_CLOSED' message in response. A few things - # might still be processed, like requests for final trade prices or such. - if msg.body['msg'] in ['LIMIT_ORDER', 'MARKET_ORDER', 'CANCEL_ORDER', 'MODIFY_ORDER']: - log_print("{} received {}: {}", self.name, msg.body['msg'], msg.body['order']) - self.sendMessage(msg.body['sender'], Message({"msg": "MKT_CLOSED"})) - - # Don't do any further processing on these messages! - return - elif 'QUERY' in msg.body['msg']: - # Specifically do allow querying after market close, so agents can get the - # final trade of the day as their "daily close" price for a symbol. - pass - else: - log_print("{} received {}, discarded: market is closed.", self.name, msg.body['msg']) - self.sendMessage(msg.body['sender'], Message({"msg": "MKT_CLOSED"})) - - # Don't do any further processing on these messages! - return - - # Log order messages only if that option is configured. Log all other messages. - if msg.body['msg'] in ['LIMIT_ORDER', 'MARKET_ORDER', 'CANCEL_ORDER', 'MODIFY_ORDER']: - if self.log_orders: self.logEvent(msg.body['msg'], msg.body['order'].to_dict()) - else: - self.logEvent(msg.body['msg'], msg.body['sender']) - - # Handle the DATA SUBSCRIPTION request and cancellation messages from the agents. - if msg.body['msg'] in ["MARKET_DATA_SUBSCRIPTION_REQUEST", "MARKET_DATA_SUBSCRIPTION_CANCELLATION"]: - log_print("{} received {} request from agent {}", self.name, msg.body['msg'], msg.body['sender']) - self.updateSubscriptionDict(msg, currentTime) - - # Handle all message types understood by this exchange. - if msg.body['msg'] == "WHEN_MKT_OPEN": - log_print("{} received WHEN_MKT_OPEN request from agent {}", self.name, msg.body['sender']) - - # The exchange is permitted to respond to requests for simple immutable data (like "what are your - # hours?") instantly. This does NOT include anything that queries mutable data, like equity - # quotes or trades. - self.setComputationDelay(0) - - self.sendMessage(msg.body['sender'], Message({"msg": "WHEN_MKT_OPEN", "data": self.mkt_open})) - elif msg.body['msg'] == "WHEN_MKT_CLOSE": - log_print("{} received WHEN_MKT_CLOSE request from agent {}", self.name, msg.body['sender']) - - # The exchange is permitted to respond to requests for simple immutable data (like "what are your - # hours?") instantly. This does NOT include anything that queries mutable data, like equity - # quotes or trades. - self.setComputationDelay(0) - - self.sendMessage(msg.body['sender'], Message({"msg": "WHEN_MKT_CLOSE", "data": self.mkt_close})) - elif msg.body['msg'] == "QUERY_LAST_TRADE": - symbol = msg.body['symbol'] - if symbol not in self.order_books: - log_print("Last trade request discarded. Unknown symbol: {}", symbol) - else: - log_print("{} received QUERY_LAST_TRADE ({}) request from agent {}", self.name, symbol, msg.body['sender']) - - # Return the single last executed trade price (currently not volume) for the requested symbol. - # This will return the average share price if multiple executions resulted from a single order. - self.sendMessage(msg.body['sender'], Message({"msg": "QUERY_LAST_TRADE", "symbol": symbol, - "data": self.order_books[symbol].last_trade, - "mkt_closed": True if currentTime > self.mkt_close else False})) - elif msg.body['msg'] == "QUERY_SPREAD": - symbol = msg.body['symbol'] - depth = msg.body['depth'] - if symbol not in self.order_books: - log_print("Bid-ask spread request discarded. Unknown symbol: {}", symbol) - else: - log_print("{} received QUERY_SPREAD ({}:{}) request from agent {}", self.name, symbol, depth, - msg.body['sender']) - - # Return the requested depth on both sides of the order book for the requested symbol. - # Returns price levels and aggregated volume at each level (not individual orders). - self.sendMessage(msg.body['sender'], Message({"msg": "QUERY_SPREAD", "symbol": symbol, "depth": depth, - "bids": self.order_books[symbol].getInsideBids(depth), - "asks": self.order_books[symbol].getInsideAsks(depth), - "data": self.order_books[symbol].last_trade, - "mkt_closed": True if currentTime > self.mkt_close else False, - "book": ''})) - - # It is possible to also send the pretty-printed order book to the agent for logging, but forcing pretty-printing - # of a large order book is very slow, so we should only do it with good reason. We don't currently - # have a configurable option for it. - # "book": self.order_books[symbol].prettyPrint(silent=True) })) - elif msg.body['msg'] == "QUERY_ORDER_STREAM": - symbol = msg.body['symbol'] - length = msg.body['length'] - - if symbol not in self.order_books: - log_print("Order stream request discarded. Unknown symbol: {}", symbol) - else: - log_print("{} received QUERY_ORDER_STREAM ({}:{}) request from agent {}", self.name, symbol, length, - msg.body['sender']) - - # We return indices [1:length] inclusive because the agent will want "orders leading up to the last - # L trades", and the items under index 0 are more recent than the last trade. - self.sendMessage(msg.body['sender'], Message({"msg": "QUERY_ORDER_STREAM", "symbol": symbol, "length": length, - "mkt_closed": True if currentTime > self.mkt_close else False, - "orders": self.order_books[symbol].history[1:length + 1] - })) - elif msg.body['msg'] == 'QUERY_TRANSACTED_VOLUME': - symbol = msg.body['symbol'] - lookback_period = msg.body['lookback_period'] - if symbol not in self.order_books: - log_print("Order stream request discarded. Unknown symbol: {}", symbol) - else: - log_print("{} received QUERY_TRANSACTED_VOLUME ({}:{}) request from agent {}", self.name, symbol, lookback_period, - msg.body['sender']) - self.sendMessage(msg.body['sender'], Message({"msg": "QUERY_TRANSACTED_VOLUME", "symbol": symbol, - "transacted_volume": self.order_books[symbol].get_transacted_volume(lookback_period), - "mkt_closed": True if currentTime > self.mkt_close else False - })) - elif msg.body['msg'] == "LIMIT_ORDER": - order = msg.body['order'] - log_print("{} received LIMIT_ORDER: {}", self.name, order) - if order.symbol not in self.order_books: - log_print("Limit Order discarded. Unknown symbol: {}", order.symbol) - else: - # Hand the order to the order book for processing. - self.order_books[order.symbol].handleLimitOrder(deepcopy(order)) - self.publishOrderBookData() - elif msg.body['msg'] == "MARKET_ORDER": - order = msg.body['order'] - log_print("{} received MARKET_ORDER: {}", self.name, order) - if order.symbol not in self.order_books: - log_print("Market Order discarded. Unknown symbol: {}", order.symbol) - else: - # Hand the market order to the order book for processing. - self.order_books[order.symbol].handleMarketOrder(deepcopy(order)) - self.publishOrderBookData() - elif msg.body['msg'] == "CANCEL_ORDER": - # Note: this is somewhat open to abuse, as in theory agents could cancel other agents' orders. - # An agent could also become confused if they receive a (partial) execution on an order they - # then successfully cancel, but receive the cancel confirmation first. Things to think about - # for later... - order = msg.body['order'] - log_print("{} received CANCEL_ORDER: {}", self.name, order) - if order.symbol not in self.order_books: - log_print("Cancellation request discarded. Unknown symbol: {}", order.symbol) - else: - # Hand the order to the order book for processing. - self.order_books[order.symbol].cancelOrder(deepcopy(order)) - self.publishOrderBookData() - elif msg.body['msg'] == 'MODIFY_ORDER': - # Replace an existing order with a modified order. There could be some timing issues - # here. What if an order is partially executed, but the submitting agent has not - # yet received the norification, and submits a modification to the quantity of the - # (already partially executed) order? I guess it is okay if we just think of this - # as "delete and then add new" and make it the agent's problem if anything weird - # happens. - order = msg.body['order'] - new_order = msg.body['new_order'] - log_print("{} received MODIFY_ORDER: {}, new order: {}".format(self.name, order, new_order)) - if order.symbol not in self.order_books: - log_print("Modification request discarded. Unknown symbol: {}".format(order.symbol)) - else: - self.order_books[order.symbol].modifyOrder(deepcopy(order), deepcopy(new_order)) - self.publishOrderBookData() - - def updateSubscriptionDict(self, msg, currentTime): - # The subscription dict is a dictionary with the key = agent ID, - # value = dict (key = symbol, value = list [levels (no of levels to recieve updates for), - # frequency (min number of ns between messages), last agent update timestamp] - # e.g. {101 : {'AAPL' : [1, 10, pd.Timestamp(10:00:00)}} - if msg.body['msg'] == "MARKET_DATA_SUBSCRIPTION_REQUEST": - agent_id, symbol, levels, freq = msg.body['sender'], msg.body['symbol'], msg.body['levels'], msg.body['freq'] - self.subscription_dict[agent_id] = {symbol: [levels, freq, currentTime]} - elif msg.body['msg'] == "MARKET_DATA_SUBSCRIPTION_CANCELLATION": - agent_id, symbol = msg.body['sender'], msg.body['symbol'] - del self.subscription_dict[agent_id][symbol] - - def publishOrderBookData(self): - ''' - The exchange agents sends an order book update to the agents using the subscription API if one of the following - conditions are met: - 1) agent requests ALL order book updates (freq == 0) - 2) order book update timestamp > last time agent was updated AND the orderbook update time stamp is greater than - the last agent update time stamp by a period more than that specified in the freq parameter. - ''' - for agent_id, params in self.subscription_dict.items(): - for symbol, values in params.items(): - levels, freq, last_agent_update = values[0], values[1], values[2] - orderbook_last_update = self.order_books[symbol].last_update_ts - if (freq == 0) or \ - ((orderbook_last_update > last_agent_update) and ((orderbook_last_update - last_agent_update).delta >= freq)): - self.sendMessage(agent_id, Message({"msg": "MARKET_DATA", - "symbol": symbol, - "bids": self.order_books[symbol].getInsideBids(levels), - "asks": self.order_books[symbol].getInsideAsks(levels), - "last_transaction": self.order_books[symbol].last_trade, - "exchange_ts": self.currentTime})) - self.subscription_dict[agent_id][symbol][2] = orderbook_last_update - - def logOrderBookSnapshots(self, symbol): - """ - Log full depth quotes (price, volume) from this order book at some pre-determined frequency. Here we are looking at - the actual log for this order book (i.e. are there snapshots to export, independent of the requested frequency). - """ - def get_quote_range_iterator(s): - """ Helper method for order book logging. Takes pandas Series and returns python range() from first to last - element. - """ - forbidden_values = [0, 19999900] # TODO: Put constant value in more sensible place! - quotes = sorted(s) - for val in forbidden_values: - try: quotes.remove(val) - except ValueError: - pass - return quotes - - book = self.order_books[symbol] - - if book.book_log: - - print("Logging order book to file...") - dfLog = book.book_log_to_df() - dfLog.set_index('QuoteTime', inplace=True) - dfLog = dfLog[~dfLog.index.duplicated(keep='last')] - dfLog.sort_index(inplace=True) - - if str(self.book_freq).isdigit() and int(self.book_freq) == 0: # Save all possible information - # Get the full range of quotes at the finest possible resolution. - quotes = get_quote_range_iterator(dfLog.columns.unique()) - - # Restructure the log to have multi-level rows of all possible pairs of time and quote - # with volume as the only column. - if not self.wide_book: - filledIndex = pd.MultiIndex.from_product([dfLog.index, quotes], names=['time', 'quote']) - dfLog = dfLog.stack() - dfLog = dfLog.reindex(filledIndex) - - filename = f'ORDERBOOK_{symbol}_FULL' - - else: # Sample at frequency self.book_freq - # With multiple quotes in a nanosecond, use the last one, then resample to the requested freq. - dfLog = dfLog.resample(self.book_freq).ffill() - dfLog.sort_index(inplace=True) - - # Create a fully populated index at the desired frequency from market open to close. - # Then project the logged data into this complete index. - time_idx = pd.date_range(self.mkt_open, self.mkt_close, freq=self.book_freq, closed='right') - dfLog = dfLog.reindex(time_idx, method='ffill') - dfLog.sort_index(inplace=True) - - if not self.wide_book: - dfLog = dfLog.stack() - dfLog.sort_index(inplace=True) - - # Get the full range of quotes at the finest possible resolution. - quotes = get_quote_range_iterator(dfLog.index.get_level_values(1).unique()) - - # Restructure the log to have multi-level rows of all possible pairs of time and quote - # with volume as the only column. - filledIndex = pd.MultiIndex.from_product([time_idx, quotes], names=['time', 'quote']) - dfLog = dfLog.reindex(filledIndex) - - filename = f'ORDERBOOK_{symbol}_FREQ_{self.book_freq}' - - # Final cleanup - if not self.wide_book: - dfLog.rename('Volume') - df = pd.SparseDataFrame(index=dfLog.index) - df['Volume'] = dfLog - else: - df = dfLog - df = df.reindex(sorted(df.columns), axis=1) - - # Archive the order book snapshots directly to a file named with the symbol, rather than - # to the exchange agent log. - self.writeLog(df, filename=filename) - print("Order book logging complete!") - - def sendMessage (self, recipientID, msg): - # The ExchangeAgent automatically applies appropriate parallel processing pipeline delay - # to those message types which require it. - # TODO: probably organize the order types into categories once there are more, so we can - # take action by category (e.g. ORDER-related messages) instead of enumerating all message - # types to be affected. - if msg.body['msg'] in ['ORDER_ACCEPTED', 'ORDER_CANCELLED', 'ORDER_EXECUTED']: - # Messages that require order book modification (not simple queries) incur the additional - # parallel processing delay as configured. - super().sendMessage(recipientID, msg, delay = self.pipeline_delay) - if self.log_orders: self.logEvent(msg.body['msg'], msg.body['order'].to_dict()) - else: - # Other message types incur only the currently-configured computation delay for this agent. - super().sendMessage(recipientID, msg) - - # Simple accessor methods for the market open and close times. - def getMarketOpen(self): - return self.__mkt_open - - def getMarketClose(self): - return self.__mkt_close + def __init__( + self, + id, + name, + type, + mkt_open, + mkt_close, + symbols, + book_freq="S", + wide_book=False, + pipeline_delay=40000, + computation_delay=1, + stream_history=0, + log_orders=False, + random_state=None, + ): + + super().__init__(id, name, type, random_state) + + # Do not request repeated wakeup calls. + self.reschedule = False + + # Store this exchange's open and close times. + self.mkt_open = mkt_open + self.mkt_close = mkt_close + + # Right now, only the exchange agent has a parallel processing pipeline delay. This is an additional + # delay added only to order activity (placing orders, etc) and not simple inquiries (market operating + # hours, etc). + self.pipeline_delay = pipeline_delay + + # Computation delay is applied on every wakeup call or message received. + self.computation_delay = computation_delay + + # The exchange maintains an order stream of all orders leading to the last L trades + # to support certain agents from the auction literature (GD, HBL, etc). + self.stream_history = stream_history + + # Log all order activity? + self.log_orders = log_orders + + # Create an order book for each symbol. + self.order_books = {} + + for symbol in symbols: + self.order_books[symbol] = OrderBook(self, symbol) + + # At what frequency will we archive the order books for visualization and analysis? + self.book_freq = book_freq + + # Store orderbook in wide format? ONLY WORKS with book_freq == 0 + self.wide_book = wide_book + + # The subscription dict is a dictionary with the key = agent ID, + # value = dict (key = symbol, value = list [levels (no of levels to recieve updates for), + # frequency (min number of ns between messages), last agent update timestamp] + # e.g. {101 : {'AAPL' : [1, 10, pd.Timestamp(10:00:00)}} + self.subscription_dict = {} + + # The exchange agent overrides this to obtain a reference to an oracle. + # This is needed to establish a "last trade price" at open (i.e. an opening + # price) in case agents query last trade before any simulated trades are made. + # This can probably go away once we code the opening cross auction. + def kernelInitializing(self, kernel): + super().kernelInitializing(kernel) + + self.oracle = self.kernel.oracle + + # Obtain opening prices (in integer cents). These are not noisy right now. + for symbol in self.order_books: + try: + self.order_books[symbol].last_trade = self.oracle.getDailyOpenPrice(symbol, self.mkt_open) + log_print( + "Opening price for {} is {}", + symbol, + self.order_books[symbol].last_trade, + ) + except AttributeError as e: + log_print(str(e)) + + # The exchange agent overrides this to additionally log the full depth of its + # order books for the entire day. + def kernelTerminating(self): + super().kernelTerminating() + + # If the oracle supports writing the fundamental value series for its + # symbols, write them to disk. + if hasattr(self.oracle, "f_log"): + for symbol in self.oracle.f_log: + dfFund = pd.DataFrame(self.oracle.f_log[symbol]) + if not dfFund.empty: + dfFund.set_index("FundamentalTime", inplace=True) + self.writeLog(dfFund, filename="fundamental_{}".format(symbol)) + log_print("Fundamental archival complete.") + if self.book_freq is None: + return + else: + # Iterate over the order books controlled by this exchange. + for symbol in self.order_books: + start_time = dt.datetime.now() + self.logOrderBookSnapshots(symbol) + end_time = dt.datetime.now() + print("Time taken to log the order book: {}".format(end_time - start_time)) + print("Order book archival complete.") + + def receiveMessage(self, currentTime, msg): + super().receiveMessage(currentTime, msg) + + # Unless the intent of an experiment is to examine computational issues within an Exchange, + # it will typically have either 1 ns delay (near instant but cannot process multiple orders + # in the same atomic time unit) or 0 ns delay (can process any number of orders, always in + # the atomic time unit in which they are received). This is separate from, and additional + # to, any parallel pipeline delay imposed for order book activity. + + # Note that computation delay MUST be updated before any calls to sendMessage. + self.setComputationDelay(self.computation_delay) + + # Is the exchange closed? (This block only affects post-close, not pre-open.) + if currentTime > self.mkt_close: + # Most messages after close will receive a 'MKT_CLOSED' message in response. A few things + # might still be processed, like requests for final trade prices or such. + if msg.body["msg"] in [ + "LIMIT_ORDER", + "MARKET_ORDER", + "CANCEL_ORDER", + "MODIFY_ORDER", + ]: + log_print("{} received {}: {}", self.name, msg.body["msg"], msg.body["order"]) + self.sendMessage(msg.body["sender"], Message({"msg": "MKT_CLOSED"})) + + # Don't do any further processing on these messages! + return + elif "QUERY" in msg.body["msg"]: + # Specifically do allow querying after market close, so agents can get the + # final trade of the day as their "daily close" price for a symbol. + pass + else: + log_print( + "{} received {}, discarded: market is closed.", + self.name, + msg.body["msg"], + ) + self.sendMessage(msg.body["sender"], Message({"msg": "MKT_CLOSED"})) + + # Don't do any further processing on these messages! + return + + # Log order messages only if that option is configured. Log all other messages. + if msg.body["msg"] in [ + "LIMIT_ORDER", + "MARKET_ORDER", + "CANCEL_ORDER", + "MODIFY_ORDER", + ]: + if self.log_orders: + self.logEvent(msg.body["msg"], msg.body["order"].to_dict()) + else: + self.logEvent(msg.body["msg"], msg.body["sender"]) + + # Handle the DATA SUBSCRIPTION request and cancellation messages from the agents. + if msg.body["msg"] in [ + "MARKET_DATA_SUBSCRIPTION_REQUEST", + "MARKET_DATA_SUBSCRIPTION_CANCELLATION", + ]: + log_print( + "{} received {} request from agent {}", + self.name, + msg.body["msg"], + msg.body["sender"], + ) + self.updateSubscriptionDict(msg, currentTime) + + # Handle all message types understood by this exchange. + if msg.body["msg"] == "WHEN_MKT_OPEN": + log_print( + "{} received WHEN_MKT_OPEN request from agent {}", + self.name, + msg.body["sender"], + ) + + # The exchange is permitted to respond to requests for simple immutable data (like "what are your + # hours?") instantly. This does NOT include anything that queries mutable data, like equity + # quotes or trades. + self.setComputationDelay(0) + + self.sendMessage( + msg.body["sender"], + Message({"msg": "WHEN_MKT_OPEN", "data": self.mkt_open}), + ) + elif msg.body["msg"] == "WHEN_MKT_CLOSE": + log_print( + "{} received WHEN_MKT_CLOSE request from agent {}", + self.name, + msg.body["sender"], + ) + + # The exchange is permitted to respond to requests for simple immutable data (like "what are your + # hours?") instantly. This does NOT include anything that queries mutable data, like equity + # quotes or trades. + self.setComputationDelay(0) + + self.sendMessage( + msg.body["sender"], + Message({"msg": "WHEN_MKT_CLOSE", "data": self.mkt_close}), + ) + elif msg.body["msg"] == "QUERY_LAST_TRADE": + symbol = msg.body["symbol"] + if symbol not in self.order_books: + log_print("Last trade request discarded. Unknown symbol: {}", symbol) + else: + log_print( + "{} received QUERY_LAST_TRADE ({}) request from agent {}", + self.name, + symbol, + msg.body["sender"], + ) + + # Return the single last executed trade price (currently not volume) for the requested symbol. + # This will return the average share price if multiple executions resulted from a single order. + self.sendMessage( + msg.body["sender"], + Message( + { + "msg": "QUERY_LAST_TRADE", + "symbol": symbol, + "data": self.order_books[symbol].last_trade, + "mkt_closed": (True if currentTime > self.mkt_close else False), + } + ), + ) + elif msg.body["msg"] == "QUERY_SPREAD": + symbol = msg.body["symbol"] + depth = msg.body["depth"] + if symbol not in self.order_books: + log_print("Bid-ask spread request discarded. Unknown symbol: {}", symbol) + else: + log_print( + "{} received QUERY_SPREAD ({}:{}) request from agent {}", + self.name, + symbol, + depth, + msg.body["sender"], + ) + + # Return the requested depth on both sides of the order book for the requested symbol. + # Returns price levels and aggregated volume at each level (not individual orders). + self.sendMessage( + msg.body["sender"], + Message( + { + "msg": "QUERY_SPREAD", + "symbol": symbol, + "depth": depth, + "bids": self.order_books[symbol].getInsideBids(depth), + "asks": self.order_books[symbol].getInsideAsks(depth), + "data": self.order_books[symbol].last_trade, + "mkt_closed": (True if currentTime > self.mkt_close else False), + "book": "", + } + ), + ) + + # It is possible to also send the pretty-printed order book to the agent for logging, but forcing pretty-printing + # of a large order book is very slow, so we should only do it with good reason. We don't currently + # have a configurable option for it. + # "book": self.order_books[symbol].prettyPrint(silent=True) })) + elif msg.body["msg"] == "QUERY_ORDER_STREAM": + symbol = msg.body["symbol"] + length = msg.body["length"] + + if symbol not in self.order_books: + log_print("Order stream request discarded. Unknown symbol: {}", symbol) + else: + log_print( + "{} received QUERY_ORDER_STREAM ({}:{}) request from agent {}", + self.name, + symbol, + length, + msg.body["sender"], + ) + + # We return indices [1:length] inclusive because the agent will want "orders leading up to the last + # L trades", and the items under index 0 are more recent than the last trade. + self.sendMessage( + msg.body["sender"], + Message( + { + "msg": "QUERY_ORDER_STREAM", + "symbol": symbol, + "length": length, + "mkt_closed": True if currentTime > self.mkt_close else False, + "orders": self.order_books[symbol].history[1 : length + 1], + } + ), + ) + elif msg.body["msg"] == "QUERY_TRANSACTED_VOLUME": + symbol = msg.body["symbol"] + lookback_period = msg.body["lookback_period"] + if symbol not in self.order_books: + log_print("Order stream request discarded. Unknown symbol: {}", symbol) + else: + log_print( + "{} received QUERY_TRANSACTED_VOLUME ({}:{}) request from agent {}", + self.name, + symbol, + lookback_period, + msg.body["sender"], + ) + self.sendMessage( + msg.body["sender"], + Message( + { + "msg": "QUERY_TRANSACTED_VOLUME", + "symbol": symbol, + "transacted_volume": self.order_books[symbol].get_transacted_volume(lookback_period), + "mkt_closed": True if currentTime > self.mkt_close else False, + } + ), + ) + elif msg.body["msg"] == "LIMIT_ORDER": + order = msg.body["order"] + log_print("{} received LIMIT_ORDER: {}", self.name, order) + if order.symbol not in self.order_books: + log_print("Limit Order discarded. Unknown symbol: {}", order.symbol) + else: + # Hand the order to the order book for processing. + self.order_books[order.symbol].handleLimitOrder(deepcopy(order)) + self.publishOrderBookData() + elif msg.body["msg"] == "MARKET_ORDER": + order = msg.body["order"] + log_print("{} received MARKET_ORDER: {}", self.name, order) + if order.symbol not in self.order_books: + log_print("Market Order discarded. Unknown symbol: {}", order.symbol) + else: + # Hand the market order to the order book for processing. + self.order_books[order.symbol].handleMarketOrder(deepcopy(order)) + self.publishOrderBookData() + elif msg.body["msg"] == "CANCEL_ORDER": + # Note: this is somewhat open to abuse, as in theory agents could cancel other agents' orders. + # An agent could also become confused if they receive a (partial) execution on an order they + # then successfully cancel, but receive the cancel confirmation first. Things to think about + # for later... + order = msg.body["order"] + log_print("{} received CANCEL_ORDER: {}", self.name, order) + if order.symbol not in self.order_books: + log_print("Cancellation request discarded. Unknown symbol: {}", order.symbol) + else: + # Hand the order to the order book for processing. + self.order_books[order.symbol].cancelOrder(deepcopy(order)) + self.publishOrderBookData() + elif msg.body["msg"] == "MODIFY_ORDER": + # Replace an existing order with a modified order. There could be some timing issues + # here. What if an order is partially executed, but the submitting agent has not + # yet received the norification, and submits a modification to the quantity of the + # (already partially executed) order? I guess it is okay if we just think of this + # as "delete and then add new" and make it the agent's problem if anything weird + # happens. + order = msg.body["order"] + new_order = msg.body["new_order"] + log_print("{} received MODIFY_ORDER: {}, new order: {}".format(self.name, order, new_order)) + if order.symbol not in self.order_books: + log_print("Modification request discarded. Unknown symbol: {}".format(order.symbol)) + else: + self.order_books[order.symbol].modifyOrder(deepcopy(order), deepcopy(new_order)) + self.publishOrderBookData() + + def updateSubscriptionDict(self, msg, currentTime): + # The subscription dict is a dictionary with the key = agent ID, + # value = dict (key = symbol, value = list [levels (no of levels to recieve updates for), + # frequency (min number of ns between messages), last agent update timestamp] + # e.g. {101 : {'AAPL' : [1, 10, pd.Timestamp(10:00:00)}} + if msg.body["msg"] == "MARKET_DATA_SUBSCRIPTION_REQUEST": + agent_id, symbol, levels, freq = ( + msg.body["sender"], + msg.body["symbol"], + msg.body["levels"], + msg.body["freq"], + ) + self.subscription_dict[agent_id] = {symbol: [levels, freq, currentTime]} + elif msg.body["msg"] == "MARKET_DATA_SUBSCRIPTION_CANCELLATION": + agent_id, symbol = msg.body["sender"], msg.body["symbol"] + del self.subscription_dict[agent_id][symbol] + + def publishOrderBookData(self): + """ + The exchange agents sends an order book update to the agents using the subscription API if one of the following + conditions are met: + 1) agent requests ALL order book updates (freq == 0) + 2) order book update timestamp > last time agent was updated AND the orderbook update time stamp is greater than + the last agent update time stamp by a period more than that specified in the freq parameter. + """ + for agent_id, params in self.subscription_dict.items(): + for symbol, values in params.items(): + levels, freq, last_agent_update = values[0], values[1], values[2] + orderbook_last_update = self.order_books[symbol].last_update_ts + if (freq == 0) or ( + (orderbook_last_update > last_agent_update) + and ((orderbook_last_update - last_agent_update).delta >= freq) + ): + self.sendMessage( + agent_id, + Message( + { + "msg": "MARKET_DATA", + "symbol": symbol, + "bids": self.order_books[symbol].getInsideBids(levels), + "asks": self.order_books[symbol].getInsideAsks(levels), + "last_transaction": self.order_books[symbol].last_trade, + "exchange_ts": self.currentTime, + } + ), + ) + self.subscription_dict[agent_id][symbol][2] = orderbook_last_update + + def logOrderBookSnapshots(self, symbol): + """ + Log full depth quotes (price, volume) from this order book at some pre-determined frequency. Here we are looking at + the actual log for this order book (i.e. are there snapshots to export, independent of the requested frequency). + """ + + def get_quote_range_iterator(s): + """Helper method for order book logging. Takes pandas Series and returns python range() from first to last + element. + """ + forbidden_values = [ + 0, + 19999900, + ] # TODO: Put constant value in more sensible place! + quotes = sorted(s) + for val in forbidden_values: + try: + quotes.remove(val) + except ValueError: + pass + return quotes + + book = self.order_books[symbol] + + if book.book_log: + + print("Logging order book to file...") + dfLog = book.book_log_to_df() + dfLog.set_index("QuoteTime", inplace=True) + dfLog = dfLog[~dfLog.index.duplicated(keep="last")] + dfLog.sort_index(inplace=True) + + if str(self.book_freq).isdigit() and int(self.book_freq) == 0: # Save all possible information + # Get the full range of quotes at the finest possible resolution. + quotes = get_quote_range_iterator(dfLog.columns.unique()) + + # Restructure the log to have multi-level rows of all possible pairs of time and quote + # with volume as the only column. + if not self.wide_book: + filledIndex = pd.MultiIndex.from_product([dfLog.index, quotes], names=["time", "quote"]) + dfLog = dfLog.stack() + dfLog = dfLog.reindex(filledIndex) + + filename = f"ORDERBOOK_{symbol}_FULL" + + else: # Sample at frequency self.book_freq + # With multiple quotes in a nanosecond, use the last one, then resample to the requested freq. + dfLog = dfLog.resample(self.book_freq).ffill() + dfLog.sort_index(inplace=True) + + # Create a fully populated index at the desired frequency from market open to close. + # Then project the logged data into this complete index. + time_idx = pd.date_range(self.mkt_open, self.mkt_close, freq=self.book_freq, closed="right") + dfLog = dfLog.reindex(time_idx, method="ffill") + dfLog.sort_index(inplace=True) + + if not self.wide_book: + dfLog = dfLog.stack() + dfLog.sort_index(inplace=True) + + # Get the full range of quotes at the finest possible resolution. + quotes = get_quote_range_iterator(dfLog.index.get_level_values(1).unique()) + + # Restructure the log to have multi-level rows of all possible pairs of time and quote + # with volume as the only column. + filledIndex = pd.MultiIndex.from_product([time_idx, quotes], names=["time", "quote"]) + dfLog = dfLog.reindex(filledIndex) + + filename = f"ORDERBOOK_{symbol}_FREQ_{self.book_freq}" + + # Final cleanup + if not self.wide_book: + dfLog.rename("Volume") + df = pd.SparseDataFrame(index=dfLog.index) + df["Volume"] = dfLog + else: + df = dfLog + df = df.reindex(sorted(df.columns), axis=1) + + # Archive the order book snapshots directly to a file named with the symbol, rather than + # to the exchange agent log. + self.writeLog(df, filename=filename) + print("Order book logging complete!") + + def sendMessage(self, recipientID, msg): + # The ExchangeAgent automatically applies appropriate parallel processing pipeline delay + # to those message types which require it. + # TODO: probably organize the order types into categories once there are more, so we can + # take action by category (e.g. ORDER-related messages) instead of enumerating all message + # types to be affected. + if msg.body["msg"] in ["ORDER_ACCEPTED", "ORDER_CANCELLED", "ORDER_EXECUTED"]: + # Messages that require order book modification (not simple queries) incur the additional + # parallel processing delay as configured. + super().sendMessage(recipientID, msg, delay=self.pipeline_delay) + if self.log_orders: + self.logEvent(msg.body["msg"], msg.body["order"].to_dict()) + else: + # Other message types incur only the currently-configured computation delay for this agent. + super().sendMessage(recipientID, msg) + + # Simple accessor methods for the market open and close times. + def getMarketOpen(self): + return self.__mkt_open + + def getMarketClose(self): + return self.__mkt_close diff --git a/agent/FinancialAgent.py b/agent/FinancialAgent.py index daca864d1..d9ac069c4 100644 --- a/agent/FinancialAgent.py +++ b/agent/FinancialAgent.py @@ -1,7 +1,9 @@ -from agent.Agent import Agent import sys import traceback +from agent.Agent import Agent + + # The FinancialAgent class contains attributes and methods that should be available # to all agent types (traders, exchanges, etc) in a financial market simulation. # To be honest, it mainly exists because the base Agent class should not have any @@ -10,27 +12,28 @@ # exchanges to make this more useful later on. class FinancialAgent(Agent): - def __init__(self, id, name, type, random_state, log_to_file=True): - # Base class init. - super().__init__(id, name, type, random_state, log_to_file) + def __init__(self, id, name, type, random_state, log_to_file=True): + # Base class init. + super().__init__(id, name, type, random_state, log_to_file) - # Used by any subclass to dollarize an int-cents price for printing. - def dollarize (self, cents): - return dollarize(cents) + # Used by any subclass to dollarize an int-cents price for printing. + def dollarize(self, cents): + return dollarize(cents) - pass + pass # Dollarizes int-cents prices for printing. Defined outside the class for # utility access by non-agent classes. + def dollarize(cents): - if type(cents) is list: - return ( [ dollarize(x) for x in cents ] ) - elif type(cents) is int: - return "${:0.2f}".format(cents / 100) - else: - # If cents is already a float, there is an error somewhere. - print ("ERROR: dollarize(cents) called without int or list of ints: {}".format(cents)) - traceback.print_stack() - sys.exit() + if type(cents) is list: + return [dollarize(x) for x in cents] + elif type(cents) is int: + return "${:0.2f}".format(cents / 100) + else: + # If cents is already a float, there is an error somewhere. + print("ERROR: dollarize(cents) called without int or list of ints: {}".format(cents)) + traceback.print_stack() + sys.exit() diff --git a/agent/FundamentalTrackingAgent.py b/agent/FundamentalTrackingAgent.py index dc93ae6fe..e8d1e75a3 100644 --- a/agent/FundamentalTrackingAgent.py +++ b/agent/FundamentalTrackingAgent.py @@ -1,22 +1,27 @@ -from agent.TradingAgent import TradingAgent -from util.util import log_print import numpy as np import pandas as pd +from agent.TradingAgent import TradingAgent +from util.util import log_print + class FundamentalTrackingAgent(TradingAgent): - """ Agent who collects and saves to disk noise-free observations of the fundamental. """ + """Agent who collects and saves to disk noise-free observations of the fundamental.""" def __init__(self, id, name, type, log_frequency, symbol, log_orders=False): - """ Constructor for FundamentalTrackingAgent + """Constructor for FundamentalTrackingAgent - :param log_frequency: Frequency to update log (in nanoseconds) - :param symbol: symbol for which fundamental is being logged + :param log_frequency: Frequency to update log (in nanoseconds) + :param symbol: symbol for which fundamental is being logged """ - super().__init__(id, name, type, starting_cash=0, log_orders=log_orders, - random_state=np.random.RandomState(seed=np.random.randint(low=0, - high=2 ** 32, - dtype='uint64'))) + super().__init__( + id, + name, + type, + starting_cash=0, + log_orders=log_orders, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) self.log_freqency = log_frequency self.fundamental_series = [] @@ -28,20 +33,19 @@ def kernelStarting(self, startTime): super().kernelStarting(startTime) self.oracle = self.kernel.oracle - def kernelStopping(self): - """ Stops kernel and saves fundamental series to disk. """ + """Stops kernel and saves fundamental series to disk.""" # Always call parent method to be safe. super().kernelStopping() self.writeFundamental() def measureFundamental(self): - """ Saves the fundamental value at self.currentTime to self.fundamental_series. """ + """Saves the fundamental value at self.currentTime to self.fundamental_series.""" obs_t = self.oracle.observePrice(self.symbol, self.currentTime, sigma_n=0) - self.fundamental_series.append({'FundamentalTime': self.currentTime, 'FundamentalValue': obs_t}) + self.fundamental_series.append({"FundamentalTime": self.currentTime, "FundamentalValue": obs_t}) def wakeup(self, currentTime): - """ Advances agent in time and takes measurement of fundamental. """ + """Advances agent in time and takes measurement of fundamental.""" # Parent class handles discovery of exchange times and market_open wakeup call. super().wakeup(currentTime) @@ -53,12 +57,15 @@ def wakeup(self, currentTime): self.setWakeup(currentTime + self.getWakeFrequency()) def writeFundamental(self): - """ Logs fundamental series to file. """ + """Logs fundamental series to file.""" dfFund = pd.DataFrame(self.fundamental_series) - dfFund.set_index('FundamentalTime', inplace=True) - self.writeLog(dfFund, filename='fundamental_{symbol}_freq_{self.log_frequency}_ns'.format(self.symbol)) + dfFund.set_index("FundamentalTime", inplace=True) + self.writeLog( + dfFund, + filename="fundamental_{symbol}_freq_{self.log_frequency}_ns".format(self.symbol), + ) print("Noise-free fundamental archival complete.") def getWakeFrequency(self): - return pd.Timedelta(self.log_freqency, unit='ns') + return pd.Timedelta(self.log_freqency, unit="ns") diff --git a/agent/HeuristicBeliefLearningAgent.py b/agent/HeuristicBeliefLearningAgent.py index 2b4826912..b189f7d88 100644 --- a/agent/HeuristicBeliefLearningAgent.py +++ b/agent/HeuristicBeliefLearningAgent.py @@ -1,27 +1,60 @@ -from agent.ZeroIntelligenceAgent import ZeroIntelligenceAgent -from message.Message import Message -from util.util import log_print - +import sys from math import sqrt + import numpy as np import pandas as pd -import sys + +from agent.ZeroIntelligenceAgent import ZeroIntelligenceAgent +from message.Message import Message +from util.util import log_print np.set_printoptions(threshold=np.inf) class HeuristicBeliefLearningAgent(ZeroIntelligenceAgent): - def __init__(self, id, name, type, symbol='IBM', starting_cash=100000, sigma_n=1000, - r_bar=100000, kappa=0.05, sigma_s=100000, q_max=10, sigma_pv=5000000, R_min=0, - R_max=250, eta=1.0, lambda_a=0.005, L=8, log_orders=False, - random_state=None): + def __init__( + self, + id, + name, + type, + symbol="IBM", + starting_cash=100000, + sigma_n=1000, + r_bar=100000, + kappa=0.05, + sigma_s=100000, + q_max=10, + sigma_pv=5000000, + R_min=0, + R_max=250, + eta=1.0, + lambda_a=0.005, + L=8, + log_orders=False, + random_state=None, + ): # Base class init. - super().__init__(id, name, type, symbol=symbol, starting_cash=starting_cash, sigma_n=sigma_n, - r_bar=r_bar, kappa=kappa, sigma_s=sigma_s, q_max=q_max, sigma_pv=sigma_pv, R_min=R_min, - R_max=R_max, eta=eta, lambda_a=lambda_a, log_orders=log_orders, - random_state=random_state) + super().__init__( + id, + name, + type, + symbol=symbol, + starting_cash=starting_cash, + sigma_n=sigma_n, + r_bar=r_bar, + kappa=kappa, + sigma_s=sigma_s, + q_max=q_max, + sigma_pv=sigma_pv, + R_min=R_min, + R_max=R_max, + eta=eta, + lambda_a=lambda_a, + log_orders=log_orders, + random_state=random_state, + ) # Store important parameters particular to the HBL agent. self.L = L # length of order book history to use (number of transactions) @@ -33,11 +66,12 @@ def wakeup(self, currentTime): # Only if the superclass leaves the state as ACTIVE should we proceed with our # trading strategy. - if self.state != 'ACTIVE': return + if self.state != "ACTIVE": + return # To make trade decisions, the HBL agent requires recent order stream information. self.getOrderStream(self.symbol, length=self.L) - self.state = 'AWAITING_STREAM' + self.state = "AWAITING_STREAM" def placeOrder(self): # Called when it is time for the agent to determine a limit price and place an order. @@ -51,7 +85,11 @@ def placeOrder(self): if len(self.stream_history[self.symbol]) < self.L: # Not enough history for HBL. - log_print("Insufficient history for HBL: length {}, L {}", len(self.stream_history[self.symbol]), self.L) + log_print( + "Insufficient history for HBL: length {}, L {}", + len(self.stream_history[self.symbol]), + self.L, + ) super().placeOrder() return @@ -70,9 +108,11 @@ def placeOrder(self): # Find the lowest and highest observed prices in the order history. for h in self.stream_history[self.symbol]: for id, order in h.items(): - p = order['limit_price'] - if p < low_p: low_p = p - if p > high_p: high_p = p + p = order["limit_price"] + if p < low_p: + low_p = p + if p > high_p: + high_p = p # Set up the ndarray we will use for our computation. # idx 0-7 are sa, sb, ua, ub, num, denom, Pr, Es @@ -83,20 +123,22 @@ def placeOrder(self): # h follows increasing "transactions into the past", with index zero being orders # after the most recent transaction. for id, order in h.items(): - p = order['limit_price'] - if p < low_p: low_p = p - if p > high_p: high_p = p + p = order["limit_price"] + if p < low_p: + low_p = p + if p > high_p: + high_p = p # For now if there are any transactions, consider the order successful. For single # unit orders, this is sufficient. For multi-unit orders, # we may wish to switch to a proportion of shares executed. - if order['is_buy_order']: - if order['transactions']: + if order["is_buy_order"]: + if order["transactions"]: nd[p - low_p, 1] += 1 else: nd[p - low_p, 3] += 1 else: - if order['transactions']: + if order["transactions"]: nd[p - low_p, 0] += 1 else: nd[p - low_p, 2] += 1 @@ -119,7 +161,7 @@ def placeOrder(self): # nan to zero, which is the right answer for us. # Compute probability estimates for successful transaction at all price levels. - with np.errstate(divide='ignore', invalid='ignore'): + with np.errstate(divide="ignore", invalid="ignore"): nd[:, 6] = np.nan_to_num(np.divide(nd[:, 4], nd[:, 5])) # Compute expected surplus for all price levels. @@ -135,14 +177,22 @@ def placeOrder(self): # If the best expected surplus is positive, go for it. if best_Es > 0: - log_print("Numpy: {} selects limit price {} with expected surplus {} (Pr = {:0.4f})", self.name, best_p, - int(round(best_Es)), best_Pr) + log_print( + "Numpy: {} selects limit price {} with expected surplus {} (Pr = {:0.4f})", + self.name, + best_p, + int(round(best_Es)), + best_Pr, + ) # Place the constructed order. self.placeLimitOrder(self.symbol, 100, buy, int(round(best_p))) else: # Do nothing if best limit price has negative expected surplus with below code. - log_print("Numpy: {} elects not to place an order (best expected surplus <= 0)", self.name) + log_print( + "Numpy: {} elects not to place an order (best expected surplus <= 0)", + self.name, + ) # OTHER OPTION 1: Allow negative expected surplus with below code. # log_print ("Numpy: {} placing undesirable order (best expected surplus <= 0)", self.name) @@ -162,13 +212,14 @@ def receiveMessage(self, currentTime, msg): super().receiveMessage(currentTime, msg) # Do our special stuff. - if self.state == 'AWAITING_STREAM': + if self.state == "AWAITING_STREAM": # We were waiting to receive the recent order stream. - if msg.body['msg'] == 'QUERY_ORDER_STREAM': + if msg.body["msg"] == "QUERY_ORDER_STREAM": # This is what we were waiting for. # But if the market is now closed, don't advance. - if self.mkt_closed: return + if self.mkt_closed: + return self.getCurrentSpread(self.symbol) - self.state = 'AWAITING_SPREAD' \ No newline at end of file + self.state = "AWAITING_SPREAD" diff --git a/agent/NoiseAgent.py b/agent/NoiseAgent.py index d07efc886..ba170bee1 100644 --- a/agent/NoiseAgent.py +++ b/agent/NoiseAgent.py @@ -1,21 +1,39 @@ -from agent.TradingAgent import TradingAgent -from util.util import log_print - from math import sqrt + import numpy as np import pandas as pd +from agent.TradingAgent import TradingAgent +from util.util import log_print + class NoiseAgent(TradingAgent): - def __init__(self, id, name, type, symbol='IBM', starting_cash=100000, - log_orders=False, log_to_file=True, random_state=None, wakeup_time = None ): + def __init__( + self, + id, + name, + type, + symbol="IBM", + starting_cash=100000, + log_orders=False, + log_to_file=True, + random_state=None, + wakeup_time=None, + ): # Base class init. - super().__init__(id, name, type, starting_cash=starting_cash, log_orders=log_orders, - log_to_file=log_to_file, random_state=random_state) - - self.wakeup_time = wakeup_time, + super().__init__( + id, + name, + type, + starting_cash=starting_cash, + log_orders=log_orders, + log_to_file=log_to_file, + random_state=random_state, + ) + + self.wakeup_time = (wakeup_time,) self.symbol = symbol # symbol to trade @@ -25,7 +43,7 @@ def __init__(self, id, name, type, symbol='IBM', starting_cash=100000, # The agent begins in its "complete" state, not waiting for # any special event or condition. - self.state = 'AWAITING_WAKEUP' + self.state = "AWAITING_WAKEUP" # The agent must track its previous wake time, so it knows how many time # units have passed. @@ -48,13 +66,13 @@ def kernelStopping(self): # Print end of day valuation. H = int(round(self.getHoldings(self.symbol), -2) / 100) - #noise trader surplus is marked to EOD + # noise trader surplus is marked to EOD bid, bid_vol, ask, ask_vol = self.getKnownBidAsk(self.symbol) if bid and ask: - rT = int(bid + ask)/2 + rT = int(bid + ask) / 2 else: - rT = self.last_trade[ self.symbol ] + rT = self.last_trade[self.symbol] # final (real) fundamental value times shares held. surplus = rT * H @@ -62,14 +80,20 @@ def kernelStopping(self): log_print("surplus after holdings: {}", surplus) # Add ending cash value and subtract starting cash value. - surplus += self.holdings['CASH'] - self.starting_cash + surplus += self.holdings["CASH"] - self.starting_cash surplus = float(surplus) / self.starting_cash - self.logEvent('FINAL_VALUATION', surplus, True) + self.logEvent("FINAL_VALUATION", surplus, True) log_print( "{} final report. Holdings {}, end cash {}, start cash {}, final fundamental {}, surplus {}", - self.name, H, self.holdings['CASH'], self.starting_cash, rT, surplus) + self.name, + H, + self.holdings["CASH"], + self.starting_cash, + rT, + surplus, + ) print("Final relative surplus", self.name, surplus) @@ -77,7 +101,7 @@ def wakeup(self, currentTime): # Parent class handles discovery of exchange times and market_open wakeup call. super().wakeup(currentTime) - self.state = 'INACTIVE' + self.state = "INACTIVE" if not self.mkt_open or not self.mkt_close: # TradingAgent handles discovery of exchange times. @@ -97,22 +121,22 @@ def wakeup(self, currentTime): # Market is closed and we already got the daily close price. return - if self.wakeup_time[0] >currentTime: + if self.wakeup_time[0] > currentTime: self.setWakeup(self.wakeup_time[0]) if self.mkt_closed and (not self.symbol in self.daily_close_price): self.getCurrentSpread(self.symbol) - self.state = 'AWAITING_SPREAD' + self.state = "AWAITING_SPREAD" return if type(self) == NoiseAgent: self.getCurrentSpread(self.symbol) - self.state = 'AWAITING_SPREAD' + self.state = "AWAITING_SPREAD" else: - self.state = 'ACTIVE' + self.state = "ACTIVE" def placeOrder(self): - #place order in random direction at a mid + # place order in random direction at a mid buy_indicator = np.random.randint(0, 1 + 1) bid, bid_vol, ask, ask_vol = self.getKnownBidAsk(self.symbol) @@ -130,28 +154,30 @@ def receiveMessage(self, currentTime, msg): # If our internal state indicates we were waiting for a particular event, # check if we can transition to a new state. - if self.state == 'AWAITING_SPREAD': + if self.state == "AWAITING_SPREAD": # We were waiting to receive the current spread/book. Since we don't currently # track timestamps on retained information, we rely on actually seeing a # QUERY_SPREAD response message. - if msg.body['msg'] == 'QUERY_SPREAD': + if msg.body["msg"] == "QUERY_SPREAD": # This is what we were waiting for. # But if the market is now closed, don't advance to placing orders. - if self.mkt_closed: return + if self.mkt_closed: + return # We now have the information needed to place a limit order with the eta # strategic threshold parameter. self.placeOrder() - self.state = 'AWAITING_WAKEUP' + self.state = "AWAITING_WAKEUP" # Internal state and logic specific to this agent subclass. # Cancel all open orders. # Return value: did we issue any cancellation requests? def cancelOrders(self): - if not self.orders: return False + if not self.orders: + return False for id, order in self.orders.items(): self.cancelOrder(order) @@ -159,4 +185,4 @@ def cancelOrders(self): return True def getWakeFrequency(self): - return pd.Timedelta(self.random_state.randint(low=0, high=100), unit='ns') + return pd.Timedelta(self.random_state.randint(low=0, high=100), unit="ns") diff --git a/agent/OrderBookImbalanceAgent.py b/agent/OrderBookImbalanceAgent.py index 31c1f2d31..202a900d2 100644 --- a/agent/OrderBookImbalanceAgent.py +++ b/agent/OrderBookImbalanceAgent.py @@ -1,9 +1,10 @@ -from agent.TradingAgent import TradingAgent +import matplotlib import pandas as pd + +from agent.TradingAgent import TradingAgent from util.util import log_print -import matplotlib -matplotlib.use('TkAgg') +matplotlib.use("TkAgg") import matplotlib.pyplot as plt @@ -25,8 +26,28 @@ class OrderBookImbalanceAgent(TradingAgent): # For example, entry_threshold=0.1 causes long entry at 0.6 or short entry at 0.4. # Trail Dist: how far behind the peak bid_pct should the trailing stop follow? - def __init__(self, id, name, type, symbol=None, levels=10, entry_threshold=0.17, trail_dist=0.085, freq=3600000000000, starting_cash=1000000, log_orders=True, random_state=None): - super().__init__(id, name, type, starting_cash=starting_cash, log_orders=log_orders, random_state=random_state) + def __init__( + self, + id, + name, + type, + symbol=None, + levels=10, + entry_threshold=0.17, + trail_dist=0.085, + freq=3600000000000, + starting_cash=1000000, + log_orders=True, + random_state=None, + ): + super().__init__( + id, + name, + type, + starting_cash=starting_cash, + log_orders=log_orders, + random_state=random_state, + ) self.symbol = symbol self.levels = levels self.entry_threshold = entry_threshold @@ -39,7 +60,6 @@ def __init__(self, id, name, type, symbol=None, levels=10, entry_threshold=0.17, self.trailing_stop = None self.plotme = [] - def kernelStarting(self, startTime): super().kernelStarting(startTime) @@ -50,11 +70,11 @@ def wakeup(self, currentTime): def receiveMessage(self, currentTime, msg): super().receiveMessage(currentTime, msg) - if msg.body['msg'] == 'MARKET_DATA': + if msg.body["msg"] == "MARKET_DATA": self.cancelOrders() self.last_market_data_update = currentTime - bids, asks = msg.body['bids'], msg.body['asks'] + bids, asks = msg.body["bids"], msg.body["asks"] bid_liq = sum(x[1] for x in bids) ask_liq = sum(x[1] for x in asks) @@ -77,58 +97,107 @@ def receiveMessage(self, currentTime, msg): if self.is_short: # Update trailing stop. if bid_pct - self.trail_dist > self.trailing_stop: - log_print("Trailing stop updated: new > old ({:2f} > {:2f})", bid_pct - self.trail_dist, self.trailing_stop) + log_print( + "Trailing stop updated: new > old ({:2f} > {:2f})", + bid_pct - self.trail_dist, + self.trailing_stop, + ) self.trailing_stop = bid_pct - self.trail_dist else: - log_print("Trailing stop remains: potential < old ({:2f} < {:2f})", bid_pct - self.trail_dist, self.trailing_stop) + log_print( + "Trailing stop remains: potential < old ({:2f} < {:2f})", + bid_pct - self.trail_dist, + self.trailing_stop, + ) # Check the trailing stop. - if bid_pct < self.trailing_stop: - log_print("OBI agent exiting short position: bid_pct < trailing_stop ({:2f} < {:2f})", bid_pct, self.trailing_stop) + if bid_pct < self.trailing_stop: + log_print( + "OBI agent exiting short position: bid_pct < trailing_stop ({:2f} < {:2f})", + bid_pct, + self.trailing_stop, + ) target = 0 self.is_short = False self.trailing_stop = None else: - log_print("OBI agent holding short position: bid_pct > trailing_stop ({:2f} > {:2f})", bid_pct, self.trailing_stop) + log_print( + "OBI agent holding short position: bid_pct > trailing_stop ({:2f} > {:2f})", + bid_pct, + self.trailing_stop, + ) target = -100 # If we are long, we need to decide if we should hold or exit. elif self.is_long: if bid_pct + self.trail_dist < self.trailing_stop: - log_print("Trailing stop updated: new < old ({:2f} < {:2f})", bid_pct + self.trail_dist, self.trailing_stop) + log_print( + "Trailing stop updated: new < old ({:2f} < {:2f})", + bid_pct + self.trail_dist, + self.trailing_stop, + ) self.trailing_stop = bid_pct + self.trail_dist else: - log_print("Trailing stop remains: potential > old ({:2f} > {:2f})", bid_pct + self.trail_dist, self.trailing_stop) + log_print( + "Trailing stop remains: potential > old ({:2f} > {:2f})", + bid_pct + self.trail_dist, + self.trailing_stop, + ) # Check the trailing stop. - if bid_pct > self.trailing_stop: - log_print("OBI agent exiting long position: bid_pct > trailing_stop ({:2f} > {:2f})", bid_pct, self.trailing_stop) + if bid_pct > self.trailing_stop: + log_print( + "OBI agent exiting long position: bid_pct > trailing_stop ({:2f} > {:2f})", + bid_pct, + self.trailing_stop, + ) target = 0 self.is_long = False self.trailing_stop = None else: - log_print("OBI agent holding long position: bid_pct < trailing_stop ({:2f} < {:2f})", bid_pct, self.trailing_stop) + log_print( + "OBI agent holding long position: bid_pct < trailing_stop ({:2f} < {:2f})", + bid_pct, + self.trailing_stop, + ) target = 100 # If we are flat, we need to decide if we should enter (long or short). else: - if bid_pct > (0.5 + self.entry_threshold): - log_print("OBI agent entering long position: bid_pct < entry_threshold ({:2f} < {:2f})", bid_pct, 0.5 - self.entry_threshold) - target = 100 - self.is_long = True - self.trailing_stop = bid_pct + self.trail_dist - log_print("Initial trailing stop: {:2f}", self.trailing_stop) - elif bid_pct < (0.5 - self.entry_threshold): - log_print("OBI agent entering short position: bid_pct > entry_threshold ({:2f} > {:2f})", bid_pct, 0.5 + self.entry_threshold) - target = -100 - self.is_short = True - self.trailing_stop = bid_pct - self.trail_dist - log_print("Initial trailing stop: {:2f}", self.trailing_stop) - else: - log_print("OBI agent staying flat: long_entry < bid_pct < short_entry ({:2f} < {:2f} < {:2f})", 0.5 - self.entry_threshold, bid_pct, 0.5 + self.entry_threshold) - target = 0 - - - self.plotme.append( { 'currentTime' : self.currentTime, 'midpoint' : (asks[0][0] + bids[0][0]) / 2, 'bid_pct' : bid_pct } ) + if bid_pct > (0.5 + self.entry_threshold): + log_print( + "OBI agent entering long position: bid_pct < entry_threshold ({:2f} < {:2f})", + bid_pct, + 0.5 - self.entry_threshold, + ) + target = 100 + self.is_long = True + self.trailing_stop = bid_pct + self.trail_dist + log_print("Initial trailing stop: {:2f}", self.trailing_stop) + elif bid_pct < (0.5 - self.entry_threshold): + log_print( + "OBI agent entering short position: bid_pct > entry_threshold ({:2f} > {:2f})", + bid_pct, + 0.5 + self.entry_threshold, + ) + target = -100 + self.is_short = True + self.trailing_stop = bid_pct - self.trail_dist + log_print("Initial trailing stop: {:2f}", self.trailing_stop) + else: + log_print( + "OBI agent staying flat: long_entry < bid_pct < short_entry ({:2f} < {:2f} < {:2f})", + 0.5 - self.entry_threshold, + bid_pct, + 0.5 + self.entry_threshold, + ) + target = 0 + self.plotme.append( + { + "currentTime": self.currentTime, + "midpoint": (asks[0][0] + bids[0][0]) / 2, + "bid_pct": bid_pct, + } + ) # Adjust holdings to target. holdings = self.holdings[self.symbol] if self.symbol in self.holdings else 0 @@ -144,14 +213,12 @@ def receiveMessage(self, currentTime, msg): log_print("Adjusting holdings by {}", delta) self.placeLimitOrder(self.symbol, abs(delta), direction, price) - def getWakeFrequency(self): - return pd.Timedelta('1s') - + return pd.Timedelta("1s") # Computes required limit price to immediately execute a trade for the specified quantity # of shares. - def computeRequiredPrice (self, direction, shares, known_bids, known_asks): + def computeRequiredPrice(self, direction, shares, known_bids, known_asks): book = known_asks if direction else known_bids # Start at the inside and add up the shares. @@ -168,26 +235,23 @@ def computeRequiredPrice (self, direction, shares, known_bids, known_asks): # Not enough shares. Just return worst price (highest ask, lowest bid). return book[-1][0] - # Cancel all open orders. # Return value: did we issue any cancellation requests? def cancelOrders(self): - if not self.orders: return False + if not self.orders: + return False for id, order in self.orders.items(): self.cancelOrder(order) return True - # Lifecycle. def kernelTerminating(self): - # Plotting code is probably not needed here long term, but helps during development. - - #df = pd.DataFrame(self.plotme) - #df.set_index('currentTime', inplace=True) - #df.rolling(30).mean().plot(secondary_y=['bid_pct'], figsize=(12,9)) - #plt.show() - super().kernelTerminating() - + # Plotting code is probably not needed here long term, but helps during development. + # df = pd.DataFrame(self.plotme) + # df.set_index('currentTime', inplace=True) + # df.rolling(30).mean().plot(secondary_y=['bid_pct'], figsize=(12,9)) + # plt.show() + super().kernelTerminating() diff --git a/agent/TradingAgent.py b/agent/TradingAgent.py index ed898e7b7..55d690d65 100644 --- a/agent/TradingAgent.py +++ b/agent/TradingAgent.py @@ -1,12 +1,13 @@ -from agent.FinancialAgent import FinancialAgent +import sys +from copy import deepcopy + from agent.ExchangeAgent import ExchangeAgent +from agent.FinancialAgent import FinancialAgent from message.Message import Message from util.order.LimitOrder import LimitOrder from util.order.MarketOrder import MarketOrder from util.util import log_print -from copy import deepcopy -import sys # The TradingAgent class (via FinancialAgent, via Agent) is intended as the # base class for all trading agents (i.e. not things like exchanges) in a @@ -15,627 +16,751 @@ # implementing a strategy without too much bookkeeping. class TradingAgent(FinancialAgent): - def __init__(self, id, name, type, random_state=None, starting_cash=100000, log_orders=False, log_to_file=True): - # Base class init. - super().__init__(id, name, type, random_state, log_to_file) - - # We don't yet know when the exchange opens or closes. - self.mkt_open = None - self.mkt_close = None - - # Log order activity? - self.log_orders = log_orders - - # Log all activity to file? - if log_orders is None: - self.log_orders = False - self.log_to_file = False - - # Store starting_cash in case we want to refer to it for performance stats. - # It should NOT be modified. Use the 'CASH' key in self.holdings. - # 'CASH' is always in cents! Note that agents are limited by their starting - # cash, currently without leverage. Taking short positions is permitted, - # but does NOT increase the amount of at-risk capital allowed. - self.starting_cash = starting_cash - - # TradingAgent has constants to support simulated market orders. - self.MKT_BUY = sys.maxsize - self.MKT_SELL = 0 - - # The base TradingAgent will track its holdings and outstanding orders. - # Holdings is a dictionary of symbol -> shares. CASH is a special symbol - # worth one cent per share. Orders is a dictionary of active, open orders - # (not cancelled, not fully executed) keyed by order_id. - self.holdings = { 'CASH' : starting_cash } - self.orders = {} - - # The base TradingAgent also tracks last known prices for every symbol - # for which it has received as QUERY_LAST_TRADE message. Subclass - # agents may use or ignore this as they wish. Note that the subclass - # agent must request pricing when it wants it. This agent does NOT - # automatically generate such requests, though it has a helper function - # that can be used to make it happen. - self.last_trade = {} - - # used in subscription mode to record the timestamp for which the data was current in the ExchangeAgent - self.exchange_ts = {} - - # When a last trade price comes in after market close, the trading agent - # automatically records it as the daily close price for a symbol. - self.daily_close_price = {} - - self.nav_diff = 0 - self.basket_size = 0 - - # The agent remembers the last known bids and asks (with variable depth, - # showing only aggregate volume at each price level) when it receives - # a response to QUERY_SPREAD. - self.known_bids = {} - self.known_asks = {} - - # The agent remembers the order history communicated by the exchange - # when such is requested by an agent (for example, a heuristic belief - # learning agent). - self.stream_history = {} - - # The agent records the total transacted volume in the exchange for a given symbol and lookback period - self.transacted_volume = {} - - # Each agent can choose to log the orders executed - self.executed_orders = [] - - # For special logging at the first moment the simulator kernel begins - # running (which is well after agent init), it is useful to keep a simple - # boolean flag. - self.first_wake = True - - # Remember whether we have already passed the exchange close time, as far - # as we know. - self.mkt_closed = False - - # This is probably a transient feature, but for now we permit the exchange - # to return the entire order book sometimes, for development and debugging. - # It is very expensive to pass this around, and breaks "simulation physics", - # but can really help understand why agents are making certain decisions. - # Subclasses should NOT rely on this feature as part of their strategy, - # as it will go away. - self.book = '' - - - # Simulation lifecycle messages. - - def kernelStarting(self, startTime): - # self.kernel is set in Agent.kernelInitializing() - self.logEvent('STARTING_CASH', self.starting_cash, True) - - # Find an exchange with which we can place orders. It is guaranteed - # to exist by now (if there is one). - self.exchangeID = self.kernel.findAgentByType(ExchangeAgent) - - log_print ("Agent {} requested agent of type Agent.ExchangeAgent. Given Agent ID: {}", - self.id, self.exchangeID) - - # Request a wake-up call as in the base Agent. - super().kernelStarting(startTime) - - - def kernelStopping (self): - # Always call parent method to be safe. - super().kernelStopping() - - # Print end of day holdings. - self.logEvent('FINAL_HOLDINGS', self.fmtHoldings(self.holdings)) - self.logEvent('FINAL_CASH_POSITION', self.holdings['CASH'], True) - - # Mark to market. - cash = self.markToMarket(self.holdings) - - self.logEvent('ENDING_CASH', cash, True) - print ("Final holdings for {}: {}. Marked to market: {}".format(self.name, self.fmtHoldings(self.holdings), - cash)) - - # Record final results for presentation/debugging. This is an ugly way - # to do this, but it is useful for now. - mytype = self.type - gain = cash - self.starting_cash - - if mytype in self.kernel.meanResultByAgentType: - self.kernel.meanResultByAgentType[mytype] += gain - self.kernel.agentCountByType[mytype] += 1 - else: - self.kernel.meanResultByAgentType[mytype] = gain - self.kernel.agentCountByType[mytype] = 1 - - - # Simulation participation messages. - - def wakeup (self, currentTime): - super().wakeup(currentTime) - - if self.first_wake: - # Log initial holdings. - self.logEvent('HOLDINGS_UPDATED', self.holdings) - self.first_wake = False - - if self.mkt_open is None: - # Ask our exchange when it opens and closes. - self.sendMessage(self.exchangeID, Message({ "msg" : "WHEN_MKT_OPEN", "sender": self.id })) - self.sendMessage(self.exchangeID, Message({ "msg" : "WHEN_MKT_CLOSE", "sender": self.id })) - - # For the sake of subclasses, TradingAgent now returns a boolean - # indicating whether the agent is "ready to trade" -- has it received - # the market open and closed times, and is the market not already closed. - return (self.mkt_open and self.mkt_close) and not self.mkt_closed - - def requestDataSubscription(self, symbol, levels, freq): - self.sendMessage(recipientID = self.exchangeID, - msg = Message({"msg": "MARKET_DATA_SUBSCRIPTION_REQUEST", - "sender": self.id, "symbol": symbol, "levels": levels, "freq": freq})) - - # Used by any Trading Agent subclass to cancel subscription to market data from the Exchange Agent - def cancelDataSubscription(self, symbol): - self.sendMessage(recipientID=self.exchangeID, - msg=Message({"msg": "MARKET_DATA_SUBSCRIPTION_CANCELLATION", - "sender": self.id, "symbol": symbol})) - - - def receiveMessage (self, currentTime, msg): - super().receiveMessage(currentTime, msg) - - # Do we know the market hours? - had_mkt_hours = self.mkt_open is not None and self.mkt_close is not None - - # Record market open or close times. - if msg.body['msg'] == "WHEN_MKT_OPEN": - self.mkt_open = msg.body['data'] - - log_print ("Recorded market open: {}", self.kernel.fmtTime(self.mkt_open)) - - elif msg.body['msg'] == "WHEN_MKT_CLOSE": - self.mkt_close = msg.body['data'] - - log_print ("Recorded market close: {}", self.kernel.fmtTime(self.mkt_close)) - - elif msg.body['msg'] == "ORDER_EXECUTED": - # Call the orderExecuted method, which subclasses should extend. This parent - # class could implement default "portfolio tracking" or "returns tracking" - # behavior. - order = msg.body['order'] - - self.orderExecuted(order) - - elif msg.body['msg'] == "ORDER_ACCEPTED": - # Call the orderAccepted method, which subclasses should extend. - order = msg.body['order'] - - self.orderAccepted(order) - - elif msg.body['msg'] == "ORDER_CANCELLED": - # Call the orderCancelled method, which subclasses should extend. - order = msg.body['order'] - - self.orderCancelled(order) - - elif msg.body['msg'] == "MKT_CLOSED": - # We've tried to ask the exchange for something after it closed. Remember this - # so we stop asking for things that can't happen. - - self.marketClosed() - - elif msg.body['msg'] == 'QUERY_LAST_TRADE': - # Call the queryLastTrade method, which subclasses may extend. - # Also note if the market is closed. - if msg.body['mkt_closed']: self.mkt_closed = True - - self.queryLastTrade(msg.body['symbol'], msg.body['data']) - - elif msg.body['msg'] == 'QUERY_SPREAD': - # Call the querySpread method, which subclasses may extend. - # Also note if the market is closed. - if msg.body['mkt_closed']: self.mkt_closed = True - - self.querySpread(msg.body['symbol'], msg.body['data'], msg.body['bids'], msg.body['asks'], msg.body['book']) - - elif msg.body['msg'] == 'QUERY_ORDER_STREAM': - # Call the queryOrderStream method, which subclasses may extend. - # Also note if the market is closed. - if msg.body['mkt_closed']: self.mkt_closed = True - - self.queryOrderStream(msg.body['symbol'], msg.body['orders']) + def __init__( + self, + id, + name, + type, + random_state=None, + starting_cash=100000, + log_orders=False, + log_to_file=True, + ): + # Base class init. + super().__init__(id, name, type, random_state, log_to_file) + + # We don't yet know when the exchange opens or closes. + self.mkt_open = None + self.mkt_close = None + + # Log order activity? + self.log_orders = log_orders + + # Log all activity to file? + if log_orders is None: + self.log_orders = False + self.log_to_file = False + + # Store starting_cash in case we want to refer to it for performance stats. + # It should NOT be modified. Use the 'CASH' key in self.holdings. + # 'CASH' is always in cents! Note that agents are limited by their starting + # cash, currently without leverage. Taking short positions is permitted, + # but does NOT increase the amount of at-risk capital allowed. + self.starting_cash = starting_cash + + # TradingAgent has constants to support simulated market orders. + self.MKT_BUY = sys.maxsize + self.MKT_SELL = 0 + + # The base TradingAgent will track its holdings and outstanding orders. + # Holdings is a dictionary of symbol -> shares. CASH is a special symbol + # worth one cent per share. Orders is a dictionary of active, open orders + # (not cancelled, not fully executed) keyed by order_id. + self.holdings = {"CASH": starting_cash} + self.orders = {} + + # The base TradingAgent also tracks last known prices for every symbol + # for which it has received as QUERY_LAST_TRADE message. Subclass + # agents may use or ignore this as they wish. Note that the subclass + # agent must request pricing when it wants it. This agent does NOT + # automatically generate such requests, though it has a helper function + # that can be used to make it happen. + self.last_trade = {} + + # used in subscription mode to record the timestamp for which the data was current in the ExchangeAgent + self.exchange_ts = {} + + # When a last trade price comes in after market close, the trading agent + # automatically records it as the daily close price for a symbol. + self.daily_close_price = {} + + self.nav_diff = 0 + self.basket_size = 0 + + # The agent remembers the last known bids and asks (with variable depth, + # showing only aggregate volume at each price level) when it receives + # a response to QUERY_SPREAD. + self.known_bids = {} + self.known_asks = {} + + # The agent remembers the order history communicated by the exchange + # when such is requested by an agent (for example, a heuristic belief + # learning agent). + self.stream_history = {} + + # The agent records the total transacted volume in the exchange for a given symbol and lookback period + self.transacted_volume = {} + + # Each agent can choose to log the orders executed + self.executed_orders = [] + + # For special logging at the first moment the simulator kernel begins + # running (which is well after agent init), it is useful to keep a simple + # boolean flag. + self.first_wake = True + + # Remember whether we have already passed the exchange close time, as far + # as we know. + self.mkt_closed = False + + # This is probably a transient feature, but for now we permit the exchange + # to return the entire order book sometimes, for development and debugging. + # It is very expensive to pass this around, and breaks "simulation physics", + # but can really help understand why agents are making certain decisions. + # Subclasses should NOT rely on this feature as part of their strategy, + # as it will go away. + self.book = "" + + # Simulation lifecycle messages. + + def kernelStarting(self, startTime): + # self.kernel is set in Agent.kernelInitializing() + self.logEvent("STARTING_CASH", self.starting_cash, True) + + # Find an exchange with which we can place orders. It is guaranteed + # to exist by now (if there is one). + self.exchangeID = self.kernel.findAgentByType(ExchangeAgent) + + log_print( + "Agent {} requested agent of type Agent.ExchangeAgent. Given Agent ID: {}", + self.id, + self.exchangeID, + ) + + # Request a wake-up call as in the base Agent. + super().kernelStarting(startTime) + + def kernelStopping(self): + # Always call parent method to be safe. + super().kernelStopping() + + # Print end of day holdings. + self.logEvent("FINAL_HOLDINGS", self.fmtHoldings(self.holdings)) + self.logEvent("FINAL_CASH_POSITION", self.holdings["CASH"], True) + + # Mark to market. + cash = self.markToMarket(self.holdings) + + self.logEvent("ENDING_CASH", cash, True) + print( + "Final holdings for {}: {}. Marked to market: {}".format(self.name, self.fmtHoldings(self.holdings), cash) + ) + + # Record final results for presentation/debugging. This is an ugly way + # to do this, but it is useful for now. + mytype = self.type + gain = cash - self.starting_cash + + if mytype in self.kernel.meanResultByAgentType: + self.kernel.meanResultByAgentType[mytype] += gain + self.kernel.agentCountByType[mytype] += 1 + else: + self.kernel.meanResultByAgentType[mytype] = gain + self.kernel.agentCountByType[mytype] = 1 + + # Simulation participation messages. + + def wakeup(self, currentTime): + super().wakeup(currentTime) + + if self.first_wake: + # Log initial holdings. + self.logEvent("HOLDINGS_UPDATED", self.holdings) + self.first_wake = False + + if self.mkt_open is None: + # Ask our exchange when it opens and closes. + self.sendMessage(self.exchangeID, Message({"msg": "WHEN_MKT_OPEN", "sender": self.id})) + self.sendMessage(self.exchangeID, Message({"msg": "WHEN_MKT_CLOSE", "sender": self.id})) + + # For the sake of subclasses, TradingAgent now returns a boolean + # indicating whether the agent is "ready to trade" -- has it received + # the market open and closed times, and is the market not already closed. + return (self.mkt_open and self.mkt_close) and not self.mkt_closed + + def requestDataSubscription(self, symbol, levels, freq): + self.sendMessage( + recipientID=self.exchangeID, + msg=Message( + { + "msg": "MARKET_DATA_SUBSCRIPTION_REQUEST", + "sender": self.id, + "symbol": symbol, + "levels": levels, + "freq": freq, + } + ), + ) + + # Used by any Trading Agent subclass to cancel subscription to market data from the Exchange Agent + def cancelDataSubscription(self, symbol): + self.sendMessage( + recipientID=self.exchangeID, + msg=Message( + { + "msg": "MARKET_DATA_SUBSCRIPTION_CANCELLATION", + "sender": self.id, + "symbol": symbol, + } + ), + ) + + def receiveMessage(self, currentTime, msg): + super().receiveMessage(currentTime, msg) + + # Do we know the market hours? + had_mkt_hours = self.mkt_open is not None and self.mkt_close is not None + + # Record market open or close times. + if msg.body["msg"] == "WHEN_MKT_OPEN": + self.mkt_open = msg.body["data"] + + log_print("Recorded market open: {}", self.kernel.fmtTime(self.mkt_open)) + + elif msg.body["msg"] == "WHEN_MKT_CLOSE": + self.mkt_close = msg.body["data"] + + log_print("Recorded market close: {}", self.kernel.fmtTime(self.mkt_close)) + + elif msg.body["msg"] == "ORDER_EXECUTED": + # Call the orderExecuted method, which subclasses should extend. This parent + # class could implement default "portfolio tracking" or "returns tracking" + # behavior. + order = msg.body["order"] + + self.orderExecuted(order) + + elif msg.body["msg"] == "ORDER_ACCEPTED": + # Call the orderAccepted method, which subclasses should extend. + order = msg.body["order"] + + self.orderAccepted(order) + + elif msg.body["msg"] == "ORDER_CANCELLED": + # Call the orderCancelled method, which subclasses should extend. + order = msg.body["order"] + + self.orderCancelled(order) + + elif msg.body["msg"] == "MKT_CLOSED": + # We've tried to ask the exchange for something after it closed. Remember this + # so we stop asking for things that can't happen. + + self.marketClosed() + + elif msg.body["msg"] == "QUERY_LAST_TRADE": + # Call the queryLastTrade method, which subclasses may extend. + # Also note if the market is closed. + if msg.body["mkt_closed"]: + self.mkt_closed = True + + self.queryLastTrade(msg.body["symbol"], msg.body["data"]) + + elif msg.body["msg"] == "QUERY_SPREAD": + # Call the querySpread method, which subclasses may extend. + # Also note if the market is closed. + if msg.body["mkt_closed"]: + self.mkt_closed = True + + self.querySpread( + msg.body["symbol"], + msg.body["data"], + msg.body["bids"], + msg.body["asks"], + msg.body["book"], + ) + + elif msg.body["msg"] == "QUERY_ORDER_STREAM": + # Call the queryOrderStream method, which subclasses may extend. + # Also note if the market is closed. + if msg.body["mkt_closed"]: + self.mkt_closed = True + + self.queryOrderStream(msg.body["symbol"], msg.body["orders"]) + + elif msg.body["msg"] == "QUERY_TRANSACTED_VOLUME": + if msg.body["mkt_closed"]: + self.mkt_closed = True + self.query_transacted_volume(msg.body["symbol"], msg.body["transacted_volume"]) + + elif msg.body["msg"] == "MARKET_DATA": + self.handleMarketData(msg) + + # Now do we know the market hours? + have_mkt_hours = self.mkt_open is not None and self.mkt_close is not None + + # Once we know the market open and close times, schedule a wakeup call for market open. + # Only do this once, when we first have both items. + if have_mkt_hours and not had_mkt_hours: + # Agents are asked to generate a wake offset from the market open time. We structure + # this as a subclass request so each agent can supply an appropriate offset relative + # to its trading frequency. + ns_offset = self.getWakeFrequency() + + self.setWakeup(self.mkt_open + ns_offset) + + # Used by any Trading Agent subclass to query the last trade price for a symbol. + # This activity is not logged. + def getLastTrade(self, symbol): + self.sendMessage( + self.exchangeID, + Message({"msg": "QUERY_LAST_TRADE", "sender": self.id, "symbol": symbol}), + ) + + # Used by any Trading Agent subclass to query the current spread for a symbol. + # This activity is not logged. + def getCurrentSpread(self, symbol, depth=1): + self.sendMessage( + self.exchangeID, + Message( + { + "msg": "QUERY_SPREAD", + "sender": self.id, + "symbol": symbol, + "depth": depth, + } + ), + ) + + # Used by any Trading Agent subclass to query the recent order stream for a symbol. + def getOrderStream(self, symbol, length=1): + self.sendMessage( + self.exchangeID, + Message( + { + "msg": "QUERY_ORDER_STREAM", + "sender": self.id, + "symbol": symbol, + "length": length, + } + ), + ) + + def get_transacted_volume(self, symbol, lookback_period="10min"): + """Used by any trading agent subclass to query the total transacted volume in a given lookback period""" + self.sendMessage( + self.exchangeID, + Message( + { + "msg": "QUERY_TRANSACTED_VOLUME", + "sender": self.id, + "symbol": symbol, + "lookback_period": lookback_period, + } + ), + ) + + # Used by any Trading Agent subclass to place a limit order. Parameters expect: + # string (valid symbol), int (positive share quantity), bool (True == BUY), int (price in cents). + # The call may optionally specify an order_id (otherwise global autoincrement is used) and + # whether cash or risk limits should be enforced or ignored for the order. + def placeLimitOrder( + self, + symbol, + quantity, + is_buy_order, + limit_price, + order_id=None, + ignore_risk=True, + tag=None, + ): + order = LimitOrder( + self.id, + self.currentTime, + symbol, + quantity, + is_buy_order, + limit_price, + order_id, + tag, + ) + + if quantity > 0: + # Test if this order can be permitted given our at-risk limits. + new_holdings = self.holdings.copy() + + q = order.quantity if order.is_buy_order else -order.quantity + + if order.symbol in new_holdings: + new_holdings[order.symbol] += q + else: + new_holdings[order.symbol] = q + + # If at_risk is lower, always allow. Otherwise, new_at_risk must be below starting cash. + if not ignore_risk: + # Compute before and after at-risk capital. + at_risk = self.markToMarket(self.holdings) - self.holdings["CASH"] + new_at_risk = self.markToMarket(new_holdings) - new_holdings["CASH"] + + if (new_at_risk > at_risk) and (new_at_risk > self.starting_cash): + log_print( + "TradingAgent ignored limit order due to at-risk constraints: {}\n{}", + order, + self.fmtHoldings(self.holdings), + ) + return + + # Copy the intended order for logging, so any changes made to it elsewhere + # don't retroactively alter our "as placed" log of the order. Eventually + # it might be nice to make the whole history of the order into transaction + # objects inside the order (we're halfway there) so there CAN be just a single + # object per order, that never alters its original state, and eliminate all these copies. + self.orders[order.order_id] = deepcopy(order) + self.sendMessage( + self.exchangeID, + Message({"msg": "LIMIT_ORDER", "sender": self.id, "order": order}), + ) + + # Log this activity. + if self.log_orders: + self.logEvent("ORDER_SUBMITTED", order.to_dict()) - elif msg.body['msg'] == 'QUERY_TRANSACTED_VOLUME': - if msg.body['mkt_closed']: self.mkt_closed = True - self.query_transacted_volume(msg.body['symbol'], msg.body['transacted_volume']) + else: + log_print("TradingAgent ignored limit order of quantity zero: {}", order) + + def placeMarketOrder(self, symbol, quantity, is_buy_order, order_id=None, ignore_risk=True, tag=None): + """ + Used by any Trading Agent subclass to place a market order. The market order is created as multiple limit orders + crossing the spread walking the book until all the quantities are matched. + :param symbol (str): name of the stock traded + :param quantity (int): order quantity + :param is_buy_order (bool): True if Buy else False + :param order_id: Order ID for market replay + :param ignore_risk (bool): Determines whether cash or risk limits should be enforced or ignored for the order + :return: + """ + order = MarketOrder(self.id, self.currentTime, symbol, quantity, is_buy_order, order_id) + if quantity > 0: + # compute new holdings + new_holdings = self.holdings.copy() + q = order.quantity if order.is_buy_order else -order.quantity + if order.symbol in new_holdings: + new_holdings[order.symbol] += q + else: + new_holdings[order.symbol] = q + + if not ignore_risk: + # Compute before and after at-risk capital. + at_risk = self.markToMarket(self.holdings) - self.holdings["CASH"] + new_at_risk = self.markToMarket(new_holdings) - new_holdings["CASH"] + + if (new_at_risk > at_risk) and (new_at_risk > self.starting_cash): + log_print( + "TradingAgent ignored market order due to at-risk constraints: {}\n{}", + order, + self.fmtHoldings(self.holdings), + ) + return + self.orders[order.order_id] = deepcopy(order) + self.sendMessage( + self.exchangeID, + Message({"msg": "MARKET_ORDER", "sender": self.id, "order": order}), + ) + if self.log_orders: + self.logEvent("ORDER_SUBMITTED", order.to_dict()) + else: + log_print("TradingAgent ignored market order of quantity zero: {}", order) + + def cancelOrder(self, order): + """Used by any Trading Agent subclass to cancel any order. The order must currently + appear in the agent's open orders list.""" + if isinstance(order, LimitOrder): + self.sendMessage( + self.exchangeID, + Message({"msg": "CANCEL_ORDER", "sender": self.id, "order": order}), + ) + # Log this activity. + if self.log_orders: + self.logEvent("CANCEL_SUBMITTED", order.to_dict()) + else: + log_print("order {} of type, {} cannot be cancelled", order, type(order)) - elif msg.body['msg'] == 'MARKET_DATA': - self.handleMarketData(msg) - - # Now do we know the market hours? - have_mkt_hours = self.mkt_open is not None and self.mkt_close is not None - - # Once we know the market open and close times, schedule a wakeup call for market open. - # Only do this once, when we first have both items. - if have_mkt_hours and not had_mkt_hours: - # Agents are asked to generate a wake offset from the market open time. We structure - # this as a subclass request so each agent can supply an appropriate offset relative - # to its trading frequency. - ns_offset = self.getWakeFrequency() - - self.setWakeup(self.mkt_open + ns_offset) - - - # Used by any Trading Agent subclass to query the last trade price for a symbol. - # This activity is not logged. - def getLastTrade (self, symbol): - self.sendMessage(self.exchangeID, Message({ "msg" : "QUERY_LAST_TRADE", "sender": self.id, - "symbol" : symbol })) - - - # Used by any Trading Agent subclass to query the current spread for a symbol. - # This activity is not logged. - def getCurrentSpread (self, symbol, depth=1): - self.sendMessage(self.exchangeID, Message({ "msg" : "QUERY_SPREAD", "sender": self.id, - "symbol" : symbol, "depth" : depth })) - - - # Used by any Trading Agent subclass to query the recent order stream for a symbol. - def getOrderStream (self, symbol, length=1): - self.sendMessage(self.exchangeID, Message({ "msg" : "QUERY_ORDER_STREAM", "sender": self.id, - "symbol" : symbol, "length" : length })) - - def get_transacted_volume(self, symbol, lookback_period='10min'): - """ Used by any trading agent subclass to query the total transacted volume in a given lookback period """ - self.sendMessage(self.exchangeID, Message({ "msg": "QUERY_TRANSACTED_VOLUME", "sender": self.id, - "symbol": symbol, "lookback_period": lookback_period})) - - # Used by any Trading Agent subclass to place a limit order. Parameters expect: - # string (valid symbol), int (positive share quantity), bool (True == BUY), int (price in cents). - # The call may optionally specify an order_id (otherwise global autoincrement is used) and - # whether cash or risk limits should be enforced or ignored for the order. - def placeLimitOrder (self, symbol, quantity, is_buy_order, limit_price, order_id=None, ignore_risk = True, tag = None): - order = LimitOrder(self.id, self.currentTime, symbol, quantity, is_buy_order, limit_price, order_id, tag) - - if quantity > 0: - # Test if this order can be permitted given our at-risk limits. - new_holdings = self.holdings.copy() - - q = order.quantity if order.is_buy_order else -order.quantity - - if order.symbol in new_holdings: new_holdings[order.symbol] += q - else: new_holdings[order.symbol] = q - - # If at_risk is lower, always allow. Otherwise, new_at_risk must be below starting cash. - if not ignore_risk: - # Compute before and after at-risk capital. - at_risk = self.markToMarket(self.holdings) - self.holdings['CASH'] - new_at_risk = self.markToMarket(new_holdings) - new_holdings['CASH'] - - if (new_at_risk > at_risk) and (new_at_risk > self.starting_cash): - log_print ("TradingAgent ignored limit order due to at-risk constraints: {}\n{}", order, self.fmtHoldings(self.holdings)) - return - - # Copy the intended order for logging, so any changes made to it elsewhere - # don't retroactively alter our "as placed" log of the order. Eventually - # it might be nice to make the whole history of the order into transaction - # objects inside the order (we're halfway there) so there CAN be just a single - # object per order, that never alters its original state, and eliminate all these copies. - self.orders[order.order_id] = deepcopy(order) - self.sendMessage(self.exchangeID, Message({ "msg" : "LIMIT_ORDER", "sender": self.id, - "order" : order })) - - # Log this activity. - if self.log_orders: self.logEvent('ORDER_SUBMITTED', order.to_dict()) - - else: - log_print ("TradingAgent ignored limit order of quantity zero: {}", order) - - def placeMarketOrder(self, symbol, quantity, is_buy_order, order_id=None, ignore_risk = True, tag=None): - """ - Used by any Trading Agent subclass to place a market order. The market order is created as multiple limit orders - crossing the spread walking the book until all the quantities are matched. - :param symbol (str): name of the stock traded - :param quantity (int): order quantity - :param is_buy_order (bool): True if Buy else False - :param order_id: Order ID for market replay - :param ignore_risk (bool): Determines whether cash or risk limits should be enforced or ignored for the order - :return: - """ - order = MarketOrder(self.id, self.currentTime, symbol, quantity, is_buy_order, order_id) - if quantity > 0: - # compute new holdings - new_holdings = self.holdings.copy() - q = order.quantity if order.is_buy_order else -order.quantity - if order.symbol in new_holdings: new_holdings[order.symbol] += q - else: new_holdings[order.symbol] = q - - if not ignore_risk: - # Compute before and after at-risk capital. - at_risk = self.markToMarket(self.holdings) - self.holdings['CASH'] - new_at_risk = self.markToMarket(new_holdings) - new_holdings['CASH'] - - if (new_at_risk > at_risk) and (new_at_risk > self.starting_cash): - log_print("TradingAgent ignored market order due to at-risk constraints: {}\n{}", - order, self.fmtHoldings(self.holdings)) - return - self.orders[order.order_id] = deepcopy(order) - self.sendMessage(self.exchangeID, Message({"msg" : "MARKET_ORDER", "sender": self.id, "order": order})) - if self.log_orders: self.logEvent('ORDER_SUBMITTED', order.to_dict()) - else: - log_print("TradingAgent ignored market order of quantity zero: {}", order) - - def cancelOrder(self, order): - """Used by any Trading Agent subclass to cancel any order. The order must currently - appear in the agent's open orders list.""" - if isinstance(order, LimitOrder): - self.sendMessage(self.exchangeID, Message({"msg": "CANCEL_ORDER", "sender": self.id, - "order": order})) - # Log this activity. - if self.log_orders: self.logEvent('CANCEL_SUBMITTED', order.to_dict()) - else: - log_print("order {} of type, {} cannot be cancelled", order, type(order)) - - def modifyOrder (self, order, newOrder): - """ Used by any Trading Agent subclass to modify any existing limit order. The order must currently + def modifyOrder(self, order, newOrder): + """Used by any Trading Agent subclass to modify any existing limit order. The order must currently appear in the agent's open orders list. Some additional tests might be useful here to ensure the old and new orders are the same in some way.""" - self.sendMessage(self.exchangeID, Message({ "msg" : "MODIFY_ORDER", "sender": self.id, - "order" : order, "new_order" : newOrder})) - - # Log this activity. - if self.log_orders: self.logEvent('MODIFY_ORDER', order.to_dict()) - - - # Handles ORDER_EXECUTED messages from an exchange agent. Subclasses may wish to extend, - # but should still call parent method for basic portfolio/returns tracking. - def orderExecuted (self, order): - log_print ("Received notification of execution for: {}", order) - - # Log this activity. - if self.log_orders: self.logEvent('ORDER_EXECUTED', order.to_dict()) - - # At the very least, we must update CASH and holdings at execution time. - qty = order.quantity if order.is_buy_order else -1 * order.quantity - sym = order.symbol - - if sym in self.holdings: - self.holdings[sym] += qty - else: - self.holdings[sym] = qty - - if self.holdings[sym] == 0: del self.holdings[sym] - - # As with everything else, CASH holdings are in CENTS. - self.holdings['CASH'] -= (qty * order.fill_price) - - # If this original order is now fully executed, remove it from the open orders list. - # Otherwise, decrement by the quantity filled just now. It is _possible_ that due - # to timing issues, it might not be in the order list (i.e. we issued a cancellation - # but it was executed first, or something). - if order.order_id in self.orders: - o = self.orders[order.order_id] - - if order.quantity >= o.quantity: del self.orders[order.order_id] - else: o.quantity -= order.quantity - - else: - log_print ("Execution received for order not in orders list: {}", order) - - log_print ("After execution, agent open orders: {}", self.orders) - - # After execution, log holdings. - self.logEvent('HOLDINGS_UPDATED', self.holdings) - - - # Handles ORDER_ACCEPTED messages from an exchange agent. Subclasses may wish to extend. - def orderAccepted (self, order): - log_print ("Received notification of acceptance for: {}", order) - - # Log this activity. - if self.log_orders: self.logEvent('ORDER_ACCEPTED', order.to_dict()) - - - # We may later wish to add a status to the open orders so an agent can tell whether - # a given order has been accepted or not (instead of needing to override this method). - - - # Handles ORDER_CANCELLED messages from an exchange agent. Subclasses may wish to extend. - def orderCancelled (self, order): - log_print ("Received notification of cancellation for: {}", order) - - # Log this activity. - if self.log_orders: self.logEvent('ORDER_CANCELLED', order.to_dict()) - - # Remove the cancelled order from the open orders list. We may of course wish to have - # additional logic here later, so agents can easily "look for" cancelled orders. Of - # course they can just override this method. - if order.order_id in self.orders: - del self.orders[order.order_id] - else: - log_print ("Cancellation received for order not in orders list: {}", order) - - - # Handles MKT_CLOSED messages from an exchange agent. Subclasses may wish to extend. - def marketClosed (self): - log_print ("Received notification of market closure.") - - # Log this activity. - self.logEvent('MKT_CLOSED') - - # Remember that this has happened. - self.mkt_closed = True - - - # Handles QUERY_LAST_TRADE messages from an exchange agent. - def queryLastTrade (self, symbol, price): - self.last_trade[symbol] = price - - log_print ("Received last trade price of {} for {}.", self.last_trade[symbol], symbol) - - if self.mkt_closed: - # Note this as the final price of the day. - self.daily_close_price[symbol] = self.last_trade[symbol] - - log_print ("Received daily close price of {} for {}.", self.last_trade[symbol], symbol) - - - # Handles QUERY_SPREAD messages from an exchange agent. - def querySpread (self, symbol, price, bids, asks, book): - # The spread message now also includes last price for free. - self.queryLastTrade(symbol, price) - - self.known_bids[symbol] = bids - self.known_asks[symbol] = asks - - if bids: best_bid, best_bid_qty = (bids[0][0], bids[0][1]) - else: best_bid, best_bid_qty = ('No bids', 0) - - if asks: best_ask, best_ask_qty = (asks[0][0], asks[0][1]) - else: best_ask, best_ask_qty = ('No asks', 0) - - log_print ("Received spread of {} @ {} / {} @ {} for {}", best_bid_qty, best_bid, best_ask_qty, best_ask, symbol) - - self.logEvent("BID_DEPTH", bids) - self.logEvent("ASK_DEPTH", asks) - self.logEvent("IMBALANCE", [sum([x[1] for x in bids]), sum([x[1] for x in asks])]) - - self.book = book - - def handleMarketData(self, msg): - ''' - Handles Market Data messages for agents using subscription mechanism - ''' - symbol = msg.body['symbol'] - self.known_asks[symbol] = msg.body['asks'] - self.known_bids[symbol] = msg.body['bids'] - self.last_trade[symbol] = msg.body['last_transaction'] - self.exchange_ts[symbol] = msg.body['exchange_ts'] - - - # Handles QUERY_ORDER_STREAM messages from an exchange agent. - def queryOrderStream (self, symbol, orders): - # It is up to the requesting agent to do something with the data, which is a list of dictionaries keyed - # by order id. The list index is 0 for orders since the most recent trade, 1 for orders that led up to - # the most recent trade, and so on. Agents are not given index 0 (orders more recent than the last - # trade). - self.stream_history[self.symbol] = orders + self.sendMessage( + self.exchangeID, + Message( + { + "msg": "MODIFY_ORDER", + "sender": self.id, + "order": order, + "new_order": newOrder, + } + ), + ) + + # Log this activity. + if self.log_orders: + self.logEvent("MODIFY_ORDER", order.to_dict()) + + # Handles ORDER_EXECUTED messages from an exchange agent. Subclasses may wish to extend, + # but should still call parent method for basic portfolio/returns tracking. + def orderExecuted(self, order): + log_print("Received notification of execution for: {}", order) + + # Log this activity. + if self.log_orders: + self.logEvent("ORDER_EXECUTED", order.to_dict()) + + # At the very least, we must update CASH and holdings at execution time. + qty = order.quantity if order.is_buy_order else -1 * order.quantity + sym = order.symbol + + if sym in self.holdings: + self.holdings[sym] += qty + else: + self.holdings[sym] = qty - def query_transacted_volume(self, symbol, transacted_volume): - """ Handles the QUERY_TRANSACTED_VOLUME messages from the exchange agent""" - self.transacted_volume[symbol] = transacted_volume + if self.holdings[sym] == 0: + del self.holdings[sym] - # Utility functions that perform calculations from available knowledge, but implement no - # particular strategy. + # As with everything else, CASH holdings are in CENTS. + self.holdings["CASH"] -= qty * order.fill_price + # If this original order is now fully executed, remove it from the open orders list. + # Otherwise, decrement by the quantity filled just now. It is _possible_ that due + # to timing issues, it might not be in the order list (i.e. we issued a cancellation + # but it was executed first, or something). + if order.order_id in self.orders: + o = self.orders[order.order_id] - # Extract the current known bid and asks. This does NOT request new information. - def getKnownBidAsk (self, symbol, best=True): - if best: - bid = self.known_bids[symbol][0][0] if self.known_bids[symbol] else None - ask = self.known_asks[symbol][0][0] if self.known_asks[symbol] else None - bid_vol = self.known_bids[symbol][0][1] if self.known_bids[symbol] else 0 - ask_vol = self.known_asks[symbol][0][1] if self.known_asks[symbol] else 0 - return bid, bid_vol, ask, ask_vol - else: - bids = self.known_bids[symbol] if self.known_bids[symbol] else None - asks = self.known_asks[symbol] if self.known_asks[symbol] else None - return bids, asks + if order.quantity >= o.quantity: + del self.orders[order.order_id] + else: + o.quantity -= order.quantity + else: + log_print("Execution received for order not in orders list: {}", order) - # Extract the current bid and ask liquidity within a certain proportion of the - # inside bid and ask. (i.e. within=0.01 means to report total BID shares - # within 1% of the best bid price, and total ASK shares within 1% of the best - # ask price) - # - # Returns bid_liquidity, ask_liquidity. Note that this is from the order book - # perspective, not the agent perspective. (The agent would be selling into the - # bid liquidity, etc.) - def getKnownLiquidity (self, symbol, within=0.00): - bid_liq = self.getBookLiquidity(self.known_bids[symbol], within) - ask_liq = self.getBookLiquidity(self.known_asks[symbol], within) + log_print("After execution, agent open orders: {}", self.orders) - log_print ("Bid/ask liq: {}, {}", bid_liq, ask_liq) - log_print ("Known bids: {}", self.known_bids[self.symbol]) - log_print ("Known asks: {}", self.known_asks[self.symbol]) + # After execution, log holdings. + self.logEvent("HOLDINGS_UPDATED", self.holdings) - return bid_liq, ask_liq + # Handles ORDER_ACCEPTED messages from an exchange agent. Subclasses may wish to extend. + def orderAccepted(self, order): + log_print("Received notification of acceptance for: {}", order) + # Log this activity. + if self.log_orders: + self.logEvent("ORDER_ACCEPTED", order.to_dict()) - # Helper function for the above. Checks one side of the known order book. - def getBookLiquidity (self, book, within): - liq = 0 - for i, (price, shares) in enumerate(book): - if i == 0: - best = price + # We may later wish to add a status to the open orders so an agent can tell whether + # a given order has been accepted or not (instead of needing to override this method). - # Is this price within "within" proportion of the best price? - if abs(best - price) <= int(round(best * within)): - log_print ("Within {} of {}: {} with {} shares", within, best, price, shares) - liq += shares - - return liq + # Handles ORDER_CANCELLED messages from an exchange agent. Subclasses may wish to extend. + def orderCancelled(self, order): + log_print("Received notification of cancellation for: {}", order) + # Log this activity. + if self.log_orders: + self.logEvent("ORDER_CANCELLED", order.to_dict()) - # Marks holdings to market (including cash). - def markToMarket (self, holdings, use_midpoint=False): - cash = holdings['CASH'] - - cash += self.basket_size * self.nav_diff - - for symbol, shares in holdings.items(): - if symbol == 'CASH': continue - - if use_midpoint: - bid, ask, midpoint = self.getKnownBidAskMidpoint(symbol) - if bid is None or ask is None or midpoint is None: - value = self.last_trade[symbol] * shares + # Remove the cancelled order from the open orders list. We may of course wish to have + # additional logic here later, so agents can easily "look for" cancelled orders. Of + # course they can just override this method. + if order.order_id in self.orders: + del self.orders[order.order_id] else: - value = midpoint * shares - else: - value = self.last_trade[symbol] * shares - - cash += value + log_print("Cancellation received for order not in orders list: {}", order) - self.logEvent('MARK_TO_MARKET', "{} {} @ {} == {}".format(shares, symbol, - self.last_trade[symbol], value)) + # Handles MKT_CLOSED messages from an exchange agent. Subclasses may wish to extend. + def marketClosed(self): + log_print("Received notification of market closure.") - self.logEvent('MARKED_TO_MARKET', cash) + # Log this activity. + self.logEvent("MKT_CLOSED") - return cash + # Remember that this has happened. + self.mkt_closed = True + # Handles QUERY_LAST_TRADE messages from an exchange agent. + def queryLastTrade(self, symbol, price): + self.last_trade[symbol] = price - # Gets holdings. Returns zero for any symbol not held. - def getHoldings (self, symbol): - if symbol in self.holdings: return self.holdings[symbol] - return 0 + log_print("Received last trade price of {} for {}.", self.last_trade[symbol], symbol) + if self.mkt_closed: + # Note this as the final price of the day. + self.daily_close_price[symbol] = self.last_trade[symbol] - # Get the known best bid, ask, and bid/ask midpoint from cached data. No volume. - def getKnownBidAskMidpoint (self, symbol) : - bid = self.known_bids[symbol][0][0] if self.known_bids[symbol] else None - ask = self.known_asks[symbol][0][0] if self.known_asks[symbol] else None + log_print( + "Received daily close price of {} for {}.", + self.last_trade[symbol], + symbol, + ) - midpoint = int(round((bid + ask) / 2)) if bid is not None and ask is not None else None + # Handles QUERY_SPREAD messages from an exchange agent. + def querySpread(self, symbol, price, bids, asks, book): + # The spread message now also includes last price for free. + self.queryLastTrade(symbol, price) - return bid, ask, midpoint - - def get_average_transaction_price(self): - """ Calculates the average price paid (weighted by the order size) """ - return round(sum(executed_order.quantity * executed_order.fill_price for executed_order in self.executed_orders) / \ - sum(executed_order.quantity for executed_order in self.executed_orders), 2) - - # Prints holdings. Standard dictionary->string representation is almost fine, but it is - # less confusing to see the CASH holdings in dollars and cents, instead of just integer - # cents. We could change to a Holdings object that knows to print CASH "special". - def fmtHoldings (self, holdings): - h = '' - for k,v in sorted(holdings.items()): - if k == 'CASH': continue - h += "{}: {}, ".format(k,v) - - # There must always be a CASH entry. - h += "{}: {}".format('CASH', holdings['CASH']) - h = '{ ' + h + ' }' - return h + self.known_bids[symbol] = bids + self.known_asks[symbol] = asks + if bids: + best_bid, best_bid_qty = (bids[0][0], bids[0][1]) + else: + best_bid, best_bid_qty = ("No bids", 0) - pass + if asks: + best_ask, best_ask_qty = (asks[0][0], asks[0][1]) + else: + best_ask, best_ask_qty = ("No asks", 0) + + log_print( + "Received spread of {} @ {} / {} @ {} for {}", + best_bid_qty, + best_bid, + best_ask_qty, + best_ask, + symbol, + ) + + self.logEvent("BID_DEPTH", bids) + self.logEvent("ASK_DEPTH", asks) + self.logEvent("IMBALANCE", [sum([x[1] for x in bids]), sum([x[1] for x in asks])]) + + self.book = book + + def handleMarketData(self, msg): + """ + Handles Market Data messages for agents using subscription mechanism + """ + symbol = msg.body["symbol"] + self.known_asks[symbol] = msg.body["asks"] + self.known_bids[symbol] = msg.body["bids"] + self.last_trade[symbol] = msg.body["last_transaction"] + self.exchange_ts[symbol] = msg.body["exchange_ts"] + + # Handles QUERY_ORDER_STREAM messages from an exchange agent. + def queryOrderStream(self, symbol, orders): + # It is up to the requesting agent to do something with the data, which is a list of dictionaries keyed + # by order id. The list index is 0 for orders since the most recent trade, 1 for orders that led up to + # the most recent trade, and so on. Agents are not given index 0 (orders more recent than the last + # trade). + self.stream_history[self.symbol] = orders + + def query_transacted_volume(self, symbol, transacted_volume): + """Handles the QUERY_TRANSACTED_VOLUME messages from the exchange agent""" + self.transacted_volume[symbol] = transacted_volume + + # Utility functions that perform calculations from available knowledge, but implement no + # particular strategy. + + # Extract the current known bid and asks. This does NOT request new information. + def getKnownBidAsk(self, symbol, best=True): + if best: + bid = self.known_bids[symbol][0][0] if self.known_bids[symbol] else None + ask = self.known_asks[symbol][0][0] if self.known_asks[symbol] else None + bid_vol = self.known_bids[symbol][0][1] if self.known_bids[symbol] else 0 + ask_vol = self.known_asks[symbol][0][1] if self.known_asks[symbol] else 0 + return bid, bid_vol, ask, ask_vol + else: + bids = self.known_bids[symbol] if self.known_bids[symbol] else None + asks = self.known_asks[symbol] if self.known_asks[symbol] else None + return bids, asks + + # Extract the current bid and ask liquidity within a certain proportion of the + # inside bid and ask. (i.e. within=0.01 means to report total BID shares + # within 1% of the best bid price, and total ASK shares within 1% of the best + # ask price) + # + # Returns bid_liquidity, ask_liquidity. Note that this is from the order book + # perspective, not the agent perspective. (The agent would be selling into the + # bid liquidity, etc.) + def getKnownLiquidity(self, symbol, within=0.00): + bid_liq = self.getBookLiquidity(self.known_bids[symbol], within) + ask_liq = self.getBookLiquidity(self.known_asks[symbol], within) + + log_print("Bid/ask liq: {}, {}", bid_liq, ask_liq) + log_print("Known bids: {}", self.known_bids[self.symbol]) + log_print("Known asks: {}", self.known_asks[self.symbol]) + + return bid_liq, ask_liq + + # Helper function for the above. Checks one side of the known order book. + def getBookLiquidity(self, book, within): + liq = 0 + for i, (price, shares) in enumerate(book): + if i == 0: + best = price + + # Is this price within "within" proportion of the best price? + if abs(best - price) <= int(round(best * within)): + log_print("Within {} of {}: {} with {} shares", within, best, price, shares) + liq += shares + + return liq + + # Marks holdings to market (including cash). + def markToMarket(self, holdings, use_midpoint=False): + cash = holdings["CASH"] + + cash += self.basket_size * self.nav_diff + + for symbol, shares in holdings.items(): + if symbol == "CASH": + continue + + if use_midpoint: + bid, ask, midpoint = self.getKnownBidAskMidpoint(symbol) + if bid is None or ask is None or midpoint is None: + value = self.last_trade[symbol] * shares + else: + value = midpoint * shares + else: + value = self.last_trade[symbol] * shares + + cash += value + + self.logEvent( + "MARK_TO_MARKET", + "{} {} @ {} == {}".format(shares, symbol, self.last_trade[symbol], value), + ) + + self.logEvent("MARKED_TO_MARKET", cash) + + return cash + + # Gets holdings. Returns zero for any symbol not held. + def getHoldings(self, symbol): + if symbol in self.holdings: + return self.holdings[symbol] + return 0 + + # Get the known best bid, ask, and bid/ask midpoint from cached data. No volume. + def getKnownBidAskMidpoint(self, symbol): + bid = self.known_bids[symbol][0][0] if self.known_bids[symbol] else None + ask = self.known_asks[symbol][0][0] if self.known_asks[symbol] else None + + midpoint = int(round((bid + ask) / 2)) if bid is not None and ask is not None else None + + return bid, ask, midpoint + + def get_average_transaction_price(self): + """Calculates the average price paid (weighted by the order size)""" + return round( + sum(executed_order.quantity * executed_order.fill_price for executed_order in self.executed_orders) + / sum(executed_order.quantity for executed_order in self.executed_orders), + 2, + ) + + # Prints holdings. Standard dictionary->string representation is almost fine, but it is + # less confusing to see the CASH holdings in dollars and cents, instead of just integer + # cents. We could change to a Holdings object that knows to print CASH "special". + def fmtHoldings(self, holdings): + h = "" + for k, v in sorted(holdings.items()): + if k == "CASH": + continue + h += "{}: {}, ".format(k, v) + + # There must always be a CASH entry. + h += "{}: {}".format("CASH", holdings["CASH"]) + h = "{ " + h + " }" + return h + + pass diff --git a/agent/ValueAgent.py b/agent/ValueAgent.py index 68713c72f..48fe3b43a 100644 --- a/agent/ValueAgent.py +++ b/agent/ValueAgent.py @@ -1,20 +1,41 @@ -from agent.TradingAgent import TradingAgent -from util.util import log_print - from math import sqrt + import numpy as np import pandas as pd +from agent.TradingAgent import TradingAgent +from util.util import log_print + class ValueAgent(TradingAgent): - def __init__(self, id, name, type, symbol='IBM', starting_cash=100000, sigma_n=10000, - r_bar=100000, kappa=0.05, sigma_s=100000, - lambda_a=0.005, log_orders=False, log_to_file=True, random_state=None): + def __init__( + self, + id, + name, + type, + symbol="IBM", + starting_cash=100000, + sigma_n=10000, + r_bar=100000, + kappa=0.05, + sigma_s=100000, + lambda_a=0.005, + log_orders=False, + log_to_file=True, + random_state=None, + ): # Base class init. - super().__init__(id, name, type, starting_cash=starting_cash, - log_orders=log_orders, log_to_file=log_to_file, random_state=random_state) + super().__init__( + id, + name, + type, + starting_cash=starting_cash, + log_orders=log_orders, + log_to_file=log_to_file, + random_state=random_state, + ) # Store important parameters particular to the ZI agent. self.symbol = symbol # symbol to trade @@ -30,7 +51,7 @@ def __init__(self, id, name, type, symbol='IBM', starting_cash=100000, sigma_n=1 # The agent begins in its "complete" state, not waiting for # any special event or condition. - self.state = 'AWAITING_WAKEUP' + self.state = "AWAITING_WAKEUP" # The agent maintains two priors: r_t and sigma_t (value and error estimates). self.r_t = r_bar @@ -40,8 +61,8 @@ def __init__(self, id, name, type, symbol='IBM', starting_cash=100000, sigma_n=1 # units have passed. self.prev_wake_time = None - self.percent_aggr = 0.1 #percent of time that the agent will aggress the spread - self.size = np.random.randint(20, 50) #size that the agent will be placing + self.percent_aggr = 0.1 # percent of time that the agent will aggress the spread + self.size = np.random.randint(20, 50) # size that the agent will be placing self.depth_spread = 2 def kernelStarting(self, startTime): @@ -60,7 +81,7 @@ def kernelStopping(self): H = int(round(self.getHoldings(self.symbol), -2) / 100) # May request real fundamental value from oracle as part of final cleanup/stats. - #marked to fundamental + # marked to fundamental rT = self.oracle.observePrice(self.symbol, self.currentTime, sigma_n=0, random_state=self.random_state) # final (real) fundamental value times shares held. @@ -69,22 +90,28 @@ def kernelStopping(self): log_print("surplus after holdings: {}", surplus) # Add ending cash value and subtract starting cash value. - surplus += self.holdings['CASH'] - self.starting_cash - surplus = float( surplus )/self.starting_cash + surplus += self.holdings["CASH"] - self.starting_cash + surplus = float(surplus) / self.starting_cash - self.logEvent('FINAL_VALUATION', surplus, True) + self.logEvent("FINAL_VALUATION", surplus, True) log_print( "{} final report. Holdings {}, end cash {}, start cash {}, final fundamental {}, surplus {}", - self.name, H, self.holdings['CASH'], self.starting_cash, rT, surplus) + self.name, + H, + self.holdings["CASH"], + self.starting_cash, + rT, + surplus, + ) - #print("Final surplus", self.name, surplus) + # print("Final surplus", self.name, surplus) def wakeup(self, currentTime): # Parent class handles discovery of exchange times and market_open wakeup call. super().wakeup(currentTime) - self.state = 'INACTIVE' + self.state = "INACTIVE" if not self.mkt_open or not self.mkt_close: # TradingAgent handles discovery of exchange times. @@ -105,20 +132,20 @@ def wakeup(self, currentTime): return delta_time = self.random_state.exponential(scale=1.0 / self.lambda_a) - self.setWakeup(currentTime + pd.Timedelta('{}ns'.format(int(round(delta_time))))) + self.setWakeup(currentTime + pd.Timedelta("{}ns".format(int(round(delta_time))))) if self.mkt_closed and (not self.symbol in self.daily_close_price): self.getCurrentSpread(self.symbol) - self.state = 'AWAITING_SPREAD' + self.state = "AWAITING_SPREAD" return self.cancelOrders() if type(self) == ValueAgent: self.getCurrentSpread(self.symbol) - self.state = 'AWAITING_SPREAD' + self.state = "AWAITING_SPREAD" else: - self.state = 'ACTIVE' + self.state = "ACTIVE" def updateEstimates(self): # Called by a background agent that wishes to obtain a new fundamental observation, @@ -127,22 +154,27 @@ def updateEstimates(self): # The agent obtains a new noisy observation of the current fundamental value # and uses this to update its internal estimates in a Bayesian manner. - obs_t = self.oracle.observePrice(self.symbol, self.currentTime, sigma_n=self.sigma_n, - random_state=self.random_state) + obs_t = self.oracle.observePrice( + self.symbol, + self.currentTime, + sigma_n=self.sigma_n, + random_state=self.random_state, + ) log_print("{} observed {} at {}", self.name, obs_t, self.currentTime) # Update internal estimates of the current fundamental value and our error of same. # If this is our first estimate, treat the previous wake time as "market open". - if self.prev_wake_time is None: self.prev_wake_time = self.mkt_open + if self.prev_wake_time is None: + self.prev_wake_time = self.mkt_open # First, obtain an intermediate estimate of the fundamental value by advancing # time from the previous wake time to the current time, performing mean # reversion at each time step. # delta must be integer time steps since last wake - delta = (self.currentTime - self.prev_wake_time) / np.timedelta64(1, 'ns') + delta = (self.currentTime - self.prev_wake_time) / np.timedelta64(1, "ns") # Update r estimate for time advancement. r_tprime = (1 - (1 - self.kappa) ** delta) * self.r_bar @@ -163,7 +195,7 @@ def updateEstimates(self): # Now having a best estimate of the fundamental at time t, we can make our best estimate # of the final fundamental (for time T) as of current time t. Delta is now the number # of time steps remaining until the simulated exchange closes. - delta = max(0, (self.mkt_close - self.currentTime) / np.timedelta64(1, 'ns')) + delta = max(0, (self.mkt_close - self.currentTime) / np.timedelta64(1, "ns")) # IDEA: instead of letting agent "imagine time forward" to the end of the day, # impose a maximum forward delta, like ten minutes or so. This could make @@ -186,31 +218,35 @@ def updateEstimates(self): return r_T def placeOrder(self): - #estimate final value of the fundamental price - #used for surplus calculation + # estimate final value of the fundamental price + # used for surplus calculation r_T = self.updateEstimates() bid, bid_vol, ask, ask_vol = self.getKnownBidAsk(self.symbol) if bid and ask: - mid = int((ask+bid)/2) + mid = int((ask + bid) / 2) spread = abs(ask - bid) if np.random.rand() < self.percent_aggr: adjust_int = 0 else: - adjust_int = np.random.randint( 0, self.depth_spread*spread ) - #adjustment to the limit price, allowed to post inside the spread - #or deeper in the book as a passive order to maximize surplus + adjust_int = np.random.randint(0, self.depth_spread * spread) + # adjustment to the limit price, allowed to post inside the spread + # or deeper in the book as a passive order to maximize surplus if r_T < mid: - #fundamental belief that price will go down, place a sell order + # fundamental belief that price will go down, place a sell order buy = False - p = bid + adjust_int #submit a market order to sell, limit order inside the spread or deeper in the book + p = ( + bid + adjust_int + ) # submit a market order to sell, limit order inside the spread or deeper in the book elif r_T >= mid: - #fundamental belief that price will go up, buy order + # fundamental belief that price will go up, buy order buy = True - p = ask - adjust_int #submit a market order to buy, a limit order inside the spread or deeper in the book + p = ( + ask - adjust_int + ) # submit a market order to buy, a limit order inside the spread or deeper in the book else: # initialize randomly buy = np.random.randint(0, 1 + 1) @@ -227,27 +263,29 @@ def receiveMessage(self, currentTime, msg): # If our internal state indicates we were waiting for a particular event, # check if we can transition to a new state. - if self.state == 'AWAITING_SPREAD': + if self.state == "AWAITING_SPREAD": # We were waiting to receive the current spread/book. Since we don't currently # track timestamps on retained information, we rely on actually seeing a # QUERY_SPREAD response message. - if msg.body['msg'] == 'QUERY_SPREAD': + if msg.body["msg"] == "QUERY_SPREAD": # This is what we were waiting for. # But if the market is now closed, don't advance to placing orders. - if self.mkt_closed: return + if self.mkt_closed: + return # We now have the information needed to place a limit order with the eta # strategic threshold parameter. self.placeOrder() - self.state = 'AWAITING_WAKEUP' + self.state = "AWAITING_WAKEUP" # Cancel all open orders. # Return value: did we issue any cancellation requests? def cancelOrders(self): - if not self.orders: return False + if not self.orders: + return False for id, order in self.orders.items(): self.cancelOrder(order) @@ -255,4 +293,4 @@ def cancelOrders(self): return True def getWakeFrequency(self): - return pd.Timedelta(self.random_state.randint(low=0, high=100), unit='ns') \ No newline at end of file + return pd.Timedelta(self.random_state.randint(low=0, high=100), unit="ns") diff --git a/agent/ZeroIntelligenceAgent.py b/agent/ZeroIntelligenceAgent.py index 217155670..27716d04d 100644 --- a/agent/ZeroIntelligenceAgent.py +++ b/agent/ZeroIntelligenceAgent.py @@ -1,20 +1,44 @@ -from agent.TradingAgent import TradingAgent -from util.util import log_print - from math import sqrt + import numpy as np import pandas as pd +from agent.TradingAgent import TradingAgent +from util.util import log_print + class ZeroIntelligenceAgent(TradingAgent): - def __init__(self, id, name, type, symbol='IBM', starting_cash=100000, sigma_n=1000, - r_bar=100000, kappa=0.05, sigma_s=100000, q_max=10, - sigma_pv=5000000, R_min=0, R_max=250, eta=1.0, - lambda_a=0.005, log_orders=False, random_state=None): + def __init__( + self, + id, + name, + type, + symbol="IBM", + starting_cash=100000, + sigma_n=1000, + r_bar=100000, + kappa=0.05, + sigma_s=100000, + q_max=10, + sigma_pv=5000000, + R_min=0, + R_max=250, + eta=1.0, + lambda_a=0.005, + log_orders=False, + random_state=None, + ): # Base class init. - super().__init__(id, name, type, starting_cash=starting_cash, log_orders=log_orders, random_state=random_state) + super().__init__( + id, + name, + type, + starting_cash=starting_cash, + log_orders=log_orders, + random_state=random_state, + ) # Store important parameters particular to the ZI agent. self.symbol = symbol # symbol to trade @@ -35,7 +59,7 @@ def __init__(self, id, name, type, symbol='IBM', starting_cash=100000, sigma_n=1 # The agent begins in its "complete" state, not waiting for # any special event or condition. - self.state = 'AWAITING_WAKEUP' + self.state = "AWAITING_WAKEUP" # The agent maintains two priors: r_t and sigma_t (value and error estimates). self.r_t = r_bar @@ -46,9 +70,13 @@ def __init__(self, id, name, type, symbol='IBM', starting_cash=100000, sigma_n=1 self.prev_wake_time = None # The agent has a private value for each incremental unit. - self.theta = [int(x) for x in sorted( - np.round(self.random_state.normal(loc=0, scale=sqrt(sigma_pv), size=(q_max * 2))).tolist(), - reverse=True)] + self.theta = [ + int(x) + for x in sorted( + np.round(self.random_state.normal(loc=0, scale=sqrt(sigma_pv), size=(q_max * 2))).tolist(), + reverse=True, + ) + ] def kernelStarting(self, startTime): # self.kernel is set in Agent.kernelInitializing() @@ -65,12 +93,16 @@ def kernelStopping(self): # Print end of day valuation. H = int(round(self.getHoldings(self.symbol), -2) / 100) # May request real fundamental value from oracle as part of final cleanup/stats. - if self.symbol != 'ETF': + if self.symbol != "ETF": rT = self.oracle.observePrice(self.symbol, self.currentTime, sigma_n=0, random_state=self.random_state) else: - portfolio_rT, rT = self.oracle.observePortfolioPrice(self.symbol, self.portfolio, self.currentTime, - sigma_n=0, - random_state=self.random_state) + portfolio_rT, rT = self.oracle.observePortfolioPrice( + self.symbol, + self.portfolio, + self.currentTime, + sigma_n=0, + random_state=self.random_state, + ) # Start with surplus as private valuation of shares held. if H > 0: @@ -88,19 +120,26 @@ def kernelStopping(self): log_print("surplus after holdings: {}", surplus) # Add ending cash value and subtract starting cash value. - surplus += self.holdings['CASH'] - self.starting_cash + surplus += self.holdings["CASH"] - self.starting_cash - self.logEvent('FINAL_VALUATION', surplus, True) + self.logEvent("FINAL_VALUATION", surplus, True) log_print( "{} final report. Holdings {}, end cash {}, start cash {}, final fundamental {}, preferences {}, surplus {}", - self.name, H, self.holdings['CASH'], self.starting_cash, rT, self.theta, surplus) + self.name, + H, + self.holdings["CASH"], + self.starting_cash, + rT, + self.theta, + surplus, + ) def wakeup(self, currentTime): # Parent class handles discovery of exchange times and market_open wakeup call. super().wakeup(currentTime) - self.state = 'INACTIVE' + self.state = "INACTIVE" if not self.mkt_open or not self.mkt_close: # TradingAgent handles discovery of exchange times. @@ -129,14 +168,14 @@ def wakeup(self, currentTime): # distribution in alternate Beta formation with Beta = 1 / lambda, where lambda # is the mean arrival rate of the Poisson process. delta_time = self.random_state.exponential(scale=1.0 / self.lambda_a) - self.setWakeup(currentTime + pd.Timedelta('{}ns'.format(int(round(delta_time))))) + self.setWakeup(currentTime + pd.Timedelta("{}ns".format(int(round(delta_time))))) # If the market has closed and we haven't obtained the daily close price yet, # do that before we cease activity for the day. Don't do any other behavior # after market close. if self.mkt_closed and (not self.symbol in self.daily_close_price): self.getCurrentSpread(self.symbol) - self.state = 'AWAITING_SPREAD' + self.state = "AWAITING_SPREAD" return # Issue cancel requests for any open orders. Don't wait for confirmation, as presently @@ -156,9 +195,9 @@ def wakeup(self, currentTime): if type(self) == ZeroIntelligenceAgent: self.getCurrentSpread(self.symbol) - self.state = 'AWAITING_SPREAD' + self.state = "AWAITING_SPREAD" else: - self.state = 'ACTIVE' + self.state = "ACTIVE" def updateEstimates(self): # Called by a background agent that wishes to obtain a new fundamental observation, @@ -167,13 +206,17 @@ def updateEstimates(self): # The agent obtains a new noisy observation of the current fundamental value # and uses this to update its internal estimates in a Bayesian manner. - obs_t = self.oracle.observePrice(self.symbol, self.currentTime, sigma_n=self.sigma_n, - random_state=self.random_state) + obs_t = self.oracle.observePrice( + self.symbol, + self.currentTime, + sigma_n=self.sigma_n, + random_state=self.random_state, + ) log_print("{} observed {} at {}", self.name, obs_t, self.currentTime) # Flip a coin to decide if we will buy or sell a unit at this time. - q = int(self.getHoldings(self.symbol) / 100) # q now represents an index to how many 100 lots are held + q = int(self.getHoldings(self.symbol) / 100) # q now represents an index to how many 100 lots are held if q >= self.q_max: buy = False @@ -188,14 +231,15 @@ def updateEstimates(self): # Update internal estimates of the current fundamental value and our error of same. # If this is our first estimate, treat the previous wake time as "market open". - if self.prev_wake_time is None: self.prev_wake_time = self.mkt_open + if self.prev_wake_time is None: + self.prev_wake_time = self.mkt_open # First, obtain an intermediate estimate of the fundamental value by advancing # time from the previous wake time to the current time, performing mean # reversion at each time step. # delta must be integer time steps since last wake - delta = (self.currentTime - self.prev_wake_time) / np.timedelta64(1, 'ns') + delta = (self.currentTime - self.prev_wake_time) / np.timedelta64(1, "ns") # Update r estimate for time advancement. r_tprime = (1 - (1 - self.kappa) ** delta) * self.r_bar @@ -216,7 +260,7 @@ def updateEstimates(self): # Now having a best estimate of the fundamental at time t, we can make our best estimate # of the final fundamental (for time T) as of current time t. Delta is now the number # of time steps remaining until the simulated exchange closes. - delta = max(0, (self.mkt_close - self.currentTime) / np.timedelta64(1, 'ns')) + delta = max(0, (self.mkt_close - self.currentTime) / np.timedelta64(1, "ns")) # IDEA: instead of letting agent "imagine time forward" to the end of the day, # impose a maximum forward delta, like ten minutes or so. This could make @@ -237,7 +281,7 @@ def updateEstimates(self): log_print("{} estimates r_T = {} as of {}", self.name, r_T, self.currentTime) # Determine the agent's total valuation. - q += (self.q_max - 1) + q += self.q_max - 1 theta = self.theta[q + 1 if buy else q] v = r_T + theta @@ -264,14 +308,26 @@ def placeOrder(self): if buy and ask_vol > 0: R_ask = v - ask if R_ask >= (self.eta * R): - log_print("{} desired R = {}, but took R = {} at ask = {} due to eta", self.name, R, R_ask, ask) + log_print( + "{} desired R = {}, but took R = {} at ask = {} due to eta", + self.name, + R, + R_ask, + ask, + ) p = ask else: log_print("{} demands R = {}, limit price {}", self.name, R, p) elif (not buy) and bid_vol > 0: R_bid = bid - v if R_bid >= (self.eta * R): - log_print("{} desired R = {}, but took R = {} at bid = {} due to eta", self.name, R, R_bid, bid) + log_print( + "{} desired R = {}, but took R = {} at bid = {} due to eta", + self.name, + R, + R_bid, + bid, + ) p = bid else: log_print("{} demands R = {}, limit price {}", self.name, R, p) @@ -288,28 +344,30 @@ def receiveMessage(self, currentTime, msg): # If our internal state indicates we were waiting for a particular event, # check if we can transition to a new state. - if self.state == 'AWAITING_SPREAD': + if self.state == "AWAITING_SPREAD": # We were waiting to receive the current spread/book. Since we don't currently # track timestamps on retained information, we rely on actually seeing a # QUERY_SPREAD response message. - if msg.body['msg'] == 'QUERY_SPREAD': + if msg.body["msg"] == "QUERY_SPREAD": # This is what we were waiting for. # But if the market is now closed, don't advance to placing orders. - if self.mkt_closed: return + if self.mkt_closed: + return # We now have the information needed to place a limit order with the eta # strategic threshold parameter. self.placeOrder() - self.state = 'AWAITING_WAKEUP' + self.state = "AWAITING_WAKEUP" # Internal state and logic specific to this agent subclass. # Cancel all open orders. # Return value: did we issue any cancellation requests? def cancelOrders(self): - if not self.orders: return False + if not self.orders: + return False for id, order in self.orders.items(): self.cancelOrder(order) @@ -317,4 +375,4 @@ def cancelOrders(self): return True def getWakeFrequency(self): - return pd.Timedelta(self.random_state.randint(low=0, high=100), unit='ns') + return pd.Timedelta(self.random_state.randint(low=0, high=100), unit="ns") diff --git a/agent/etf/EtfArbAgent.py b/agent/etf/EtfArbAgent.py index 5017c5488..8110565b5 100644 --- a/agent/etf/EtfArbAgent.py +++ b/agent/etf/EtfArbAgent.py @@ -1,197 +1,213 @@ -from agent.TradingAgent import TradingAgent -from util.util import log_print - import numpy as np import pandas as pd +from agent.TradingAgent import TradingAgent +from util.util import log_print + class EtfArbAgent(TradingAgent): - def __init__(self, id, name, type, portfolio = {}, gamma = 0, starting_cash=100000, lambda_a = 0.005, - log_orders = False, random_state = None): - - # Base class init. - super().__init__(id, name, type, starting_cash=starting_cash, log_orders=log_orders, random_state = random_state) - - # Store important parameters particular to the ETF arbitrage agent. - self.inPrime = False # Determines if the agent also participates in the Primary ETF market - self.portfolio = portfolio # ETF portfolio - self.gamma = gamma # Threshold for difference between ETF and index to trade - self.messageCount = len(self.portfolio) + 1 # Tracks the number of messages sent so all limit orders are sent after - # are sent after all mid prices are calculated - #self.q_max = q_max # max unit holdings - self.lambda_a = lambda_a # mean arrival rate of ETF arb agents (eventually change to a subscription) - - # NEED TO TEST IF THERE ARE SYMBOLS IN PORTFOLIO - - # The agent uses this to track whether it has begun its strategy or is still - # handling pre-market tasks. - self.trading = False - - # The agent begins in its "complete" state, not waiting for - # any special event or condition. - self.state = 'AWAITING_WAKEUP' - - def kernelStarting(self, startTime): - # self.kernel is set in Agent.kernelInitializing() - # self.exchangeID is set in TradingAgent.kernelStarting() - - super().kernelStarting(startTime) - - self.oracle = self.kernel.oracle - - - def kernelStopping (self): - # Always call parent method to be safe. - super().kernelStopping() - - # Print end of day valuation. - H = {} - H['ETF'] = self.getHoldings('ETF') - for i,s in enumerate(self.portfolio): - H[s] = self.getHoldings(s) - print(H) - print(self.daily_close_price) - - - def wakeup (self, currentTime): - # Parent class handles discovery of exchange times and market_open wakeup call. - super().wakeup(currentTime) - - self.state = 'INACTIVE' - - if not self.mkt_open or not self.mkt_close: - return - else: - if not self.trading: - self.trading = True - # Time to start trading! - log_print ("{} is ready to start trading now.", self.name) - - - # Steady state wakeup behavior starts here. - # If we've been told the market has closed for the day, we will only request - # final price information, then stop. - # If the market has closed and we haven't obtained the daily close price yet, - # do that before we cease activity for the day. Don't do any other behavior - # after market close. - # If the calling agent is a subclass, don't initiate the strategy section of wakeup(), as it - # may want to do something different. - if self.mkt_closed and not self.inPrime: - for i,s in enumerate(self.portfolio): - if s not in self.daily_close_price: - self.getLastTrade(s) - self.state = 'AWAITING_LAST_TRADE' - if 'ETF' not in self.daily_close_price: - self.getLastTrade('ETF') - self.state = 'AWAITING_LAST_TRADE' - return - - # Schedule a wakeup for the next time this agent should arrive at the market - # (following the conclusion of its current activity cycle). - # We do this early in case some of our expected message responses don't arrive. - - # Agents should arrive according to a Poisson process. This is equivalent to - # each agent independently sampling its next arrival time from an exponential - # distribution in alternate Beta formation with Beta = 1 / lambda, where lambda - # is the mean arrival rate of the Poisson process. - elif not self.inPrime: - delta_time = self.random_state.exponential(scale = 1.0 / self.lambda_a) - self.setWakeup(currentTime + pd.Timedelta('{}ns'.format(int(round(delta_time))))) - - # Issue cancel requests for any open orders. Don't wait for confirmation, as presently - # the only reason it could fail is that the order already executed. (But requests won't - # be generated for those, anyway, unless something strange has happened.) - self.cancelOrders() - - - # The ETF arb agent DOES try to maintain a zero position, so there IS need to exit positions - # as some "active trading" agents might. It might exit a position based on its order logic, - # but this will be as a natural consequence of its beliefs... but it submits marketable orders so... - for i,s in enumerate(self.portfolio): - self.getCurrentSpread(s) - self.getCurrentSpread('ETF') - self.state = 'AWAITING_SPREAD' - - else: - self.state = 'ACTIVE' - - def getPriceEstimates(self): - index_mids = np.empty(len(self.portfolio)) - index_p = {} - empty_mid = False - for i,s in enumerate(self.portfolio): - bid, bid_vol, ask, ask_vol = self.getKnownBidAsk(s) - if bid != None and ask != None: - index_p[s] = {'bid': bid, 'ask': ask} - mid = 0.5 * (int(bid) + int(ask)) - else: - mid = float() - index_p[s] = {'bid': float(), 'ask': float()} - empty_mid = True - index_mids[i] = mid - bid, bid_vol, ask, ask_vol = self.getKnownBidAsk('ETF') - etf_p = {'bid': bid, 'ask': ask} - if bid != None and ask != None: - etf_mid = 0.5 * (int(bid) + int(ask)) - else: - etf_mid = float() - empty_mid = True - index_mid = np.sum(index_mids) - return etf_mid, index_mid, etf_p, index_p, empty_mid - - def placeOrder(self): - etf_mid, index_mid, etf_p, index_p, empty_mid = self.getPriceEstimates() - if empty_mid: - #print('no move because index or ETF was missing part of NBBO') - pass - elif (index_mid - etf_mid) > self.gamma: - self.placeLimitOrder('ETF', 1, True, etf_p['ask']) - elif (etf_mid - index_mid) > self.gamma: - self.placeLimitOrder('ETF', 1, False, etf_p['bid']) - else: - pass - #print('no move because abs(index - ETF mid) < gamma') - - def receiveMessage(self, currentTime, msg): - # Parent class schedules market open wakeup call once market open/close times are known. - super().receiveMessage(currentTime, msg) - - # We have been awakened by something other than our scheduled wakeup. - # If our internal state indicates we were waiting for a particular event, - # check if we can transition to a new state. - - if self.state == 'AWAITING_SPREAD': - # We were waiting to receive the current spread/book. Since we don't currently - # track timestamps on retained information, we rely on actually seeing a - # QUERY_SPREAD response message. - - if msg.body['msg'] == 'QUERY_SPREAD': - # This is what we were waiting for. - self.messageCount -= 1 - - # But if the market is now closed, don't advance to placing orders. - if self.mkt_closed: return - - # We now have the information needed to place a limit order with the eta - # strategic threshold parameter. - if self.messageCount == 0: - self.placeOrder() - self.messageCount = len(self.portfolio) + 1 - self.state = 'AWAITING_WAKEUP' - - - # Internal state and logic specific to this agent subclass. - - # Cancel all open orders. - # Return value: did we issue any cancellation requests? - def cancelOrders (self): - if not self.orders: return False - - for id, order in self.orders.items(): - self.cancelOrder(order) - - return True - - def getWakeFrequency (self): - return pd.Timedelta(self.random_state.randint(low = 0, high = 100), unit='ns') \ No newline at end of file + def __init__( + self, + id, + name, + type, + portfolio={}, + gamma=0, + starting_cash=100000, + lambda_a=0.005, + log_orders=False, + random_state=None, + ): + + # Base class init. + super().__init__( + id, + name, + type, + starting_cash=starting_cash, + log_orders=log_orders, + random_state=random_state, + ) + + # Store important parameters particular to the ETF arbitrage agent. + self.inPrime = False # Determines if the agent also participates in the Primary ETF market + self.portfolio = portfolio # ETF portfolio + self.gamma = gamma # Threshold for difference between ETF and index to trade + self.messageCount = ( + len(self.portfolio) + 1 + ) # Tracks the number of messages sent so all limit orders are sent after + # are sent after all mid prices are calculated + # self.q_max = q_max # max unit holdings + self.lambda_a = lambda_a # mean arrival rate of ETF arb agents (eventually change to a subscription) + + # NEED TO TEST IF THERE ARE SYMBOLS IN PORTFOLIO + + # The agent uses this to track whether it has begun its strategy or is still + # handling pre-market tasks. + self.trading = False + + # The agent begins in its "complete" state, not waiting for + # any special event or condition. + self.state = "AWAITING_WAKEUP" + + def kernelStarting(self, startTime): + # self.kernel is set in Agent.kernelInitializing() + # self.exchangeID is set in TradingAgent.kernelStarting() + + super().kernelStarting(startTime) + + self.oracle = self.kernel.oracle + + def kernelStopping(self): + # Always call parent method to be safe. + super().kernelStopping() + + # Print end of day valuation. + H = {} + H["ETF"] = self.getHoldings("ETF") + for i, s in enumerate(self.portfolio): + H[s] = self.getHoldings(s) + print(H) + print(self.daily_close_price) + + def wakeup(self, currentTime): + # Parent class handles discovery of exchange times and market_open wakeup call. + super().wakeup(currentTime) + + self.state = "INACTIVE" + + if not self.mkt_open or not self.mkt_close: + return + else: + if not self.trading: + self.trading = True + # Time to start trading! + log_print("{} is ready to start trading now.", self.name) + + # Steady state wakeup behavior starts here. + # If we've been told the market has closed for the day, we will only request + # final price information, then stop. + # If the market has closed and we haven't obtained the daily close price yet, + # do that before we cease activity for the day. Don't do any other behavior + # after market close. + # If the calling agent is a subclass, don't initiate the strategy section of wakeup(), as it + # may want to do something different. + if self.mkt_closed and not self.inPrime: + for i, s in enumerate(self.portfolio): + if s not in self.daily_close_price: + self.getLastTrade(s) + self.state = "AWAITING_LAST_TRADE" + if "ETF" not in self.daily_close_price: + self.getLastTrade("ETF") + self.state = "AWAITING_LAST_TRADE" + return + + # Schedule a wakeup for the next time this agent should arrive at the market + # (following the conclusion of its current activity cycle). + # We do this early in case some of our expected message responses don't arrive. + + # Agents should arrive according to a Poisson process. This is equivalent to + # each agent independently sampling its next arrival time from an exponential + # distribution in alternate Beta formation with Beta = 1 / lambda, where lambda + # is the mean arrival rate of the Poisson process. + elif not self.inPrime: + delta_time = self.random_state.exponential(scale=1.0 / self.lambda_a) + self.setWakeup(currentTime + pd.Timedelta("{}ns".format(int(round(delta_time))))) + + # Issue cancel requests for any open orders. Don't wait for confirmation, as presently + # the only reason it could fail is that the order already executed. (But requests won't + # be generated for those, anyway, unless something strange has happened.) + self.cancelOrders() + + # The ETF arb agent DOES try to maintain a zero position, so there IS need to exit positions + # as some "active trading" agents might. It might exit a position based on its order logic, + # but this will be as a natural consequence of its beliefs... but it submits marketable orders so... + for i, s in enumerate(self.portfolio): + self.getCurrentSpread(s) + self.getCurrentSpread("ETF") + self.state = "AWAITING_SPREAD" + + else: + self.state = "ACTIVE" + + def getPriceEstimates(self): + index_mids = np.empty(len(self.portfolio)) + index_p = {} + empty_mid = False + for i, s in enumerate(self.portfolio): + bid, bid_vol, ask, ask_vol = self.getKnownBidAsk(s) + if bid != None and ask != None: + index_p[s] = {"bid": bid, "ask": ask} + mid = 0.5 * (int(bid) + int(ask)) + else: + mid = float() + index_p[s] = {"bid": float(), "ask": float()} + empty_mid = True + index_mids[i] = mid + bid, bid_vol, ask, ask_vol = self.getKnownBidAsk("ETF") + etf_p = {"bid": bid, "ask": ask} + if bid != None and ask != None: + etf_mid = 0.5 * (int(bid) + int(ask)) + else: + etf_mid = float() + empty_mid = True + index_mid = np.sum(index_mids) + return etf_mid, index_mid, etf_p, index_p, empty_mid + + def placeOrder(self): + etf_mid, index_mid, etf_p, index_p, empty_mid = self.getPriceEstimates() + if empty_mid: + # print('no move because index or ETF was missing part of NBBO') + pass + elif (index_mid - etf_mid) > self.gamma: + self.placeLimitOrder("ETF", 1, True, etf_p["ask"]) + elif (etf_mid - index_mid) > self.gamma: + self.placeLimitOrder("ETF", 1, False, etf_p["bid"]) + else: + pass + # print('no move because abs(index - ETF mid) < gamma') + + def receiveMessage(self, currentTime, msg): + # Parent class schedules market open wakeup call once market open/close times are known. + super().receiveMessage(currentTime, msg) + + # We have been awakened by something other than our scheduled wakeup. + # If our internal state indicates we were waiting for a particular event, + # check if we can transition to a new state. + + if self.state == "AWAITING_SPREAD": + # We were waiting to receive the current spread/book. Since we don't currently + # track timestamps on retained information, we rely on actually seeing a + # QUERY_SPREAD response message. + + if msg.body["msg"] == "QUERY_SPREAD": + # This is what we were waiting for. + self.messageCount -= 1 + + # But if the market is now closed, don't advance to placing orders. + if self.mkt_closed: + return + + # We now have the information needed to place a limit order with the eta + # strategic threshold parameter. + if self.messageCount == 0: + self.placeOrder() + self.messageCount = len(self.portfolio) + 1 + self.state = "AWAITING_WAKEUP" + + # Internal state and logic specific to this agent subclass. + + # Cancel all open orders. + # Return value: did we issue any cancellation requests? + def cancelOrders(self): + if not self.orders: + return False + + for id, order in self.orders.items(): + self.cancelOrder(order) + + return True + + def getWakeFrequency(self): + return pd.Timedelta(self.random_state.randint(low=0, high=100), unit="ns") diff --git a/agent/etf/EtfMarketMakerAgent.py b/agent/etf/EtfMarketMakerAgent.py index e6d534ad2..d39adfd9b 100644 --- a/agent/etf/EtfMarketMakerAgent.py +++ b/agent/etf/EtfMarketMakerAgent.py @@ -1,241 +1,268 @@ +import sys + +import pandas as pd + from agent.etf.EtfArbAgent import EtfArbAgent from agent.etf.EtfPrimaryAgent import EtfPrimaryAgent from message.Message import Message -from util.order.etf.BasketOrder import BasketOrder from util.order.BasketOrder import BasketOrder +from util.order.etf.BasketOrder import BasketOrder from util.util import log_print -import pandas as pd -import sys class EtfMarketMakerAgent(EtfArbAgent): - def __init__(self, id, name, type, portfolio = {}, gamma = 0, starting_cash=100000, lambda_a = 0.005, - log_orders = False, random_state = None): - - # Base class init. - super().__init__(id, name, type, portfolio = portfolio, gamma = gamma, starting_cash=starting_cash, - lambda_a = lambda_a, log_orders=log_orders, random_state = random_state) - - # NEED TO TEST IF THERE ARE SYMBOLS IN PORTFOLIO - - # Store important parameters particular to the ETF arbitrage agent. - self.inPrime = True # Determines if the agent also participates in the Primary ETF market - - # We don't yet know when the primary opens or closes. - self.prime_open = None - self.prime_close = None - - # Remember whether we have already passed the primary close time, as far - # as we know. - self.prime_closed = False - self.switched_mkt = False - - def kernelStarting(self, startTime): - # self.kernel is set in Agent.kernelInitializing() - # self.exchangeID is set in TradingAgent.kernelStarting() - - self.primeID = self.kernel.findAgentByType(EtfPrimaryAgent) - - log_print ("Agent {} requested agent of type Agent.EtfPrimaryAgent. Given Agent ID: {}", - self.id, self.primeID) - - super().kernelStarting(startTime) - - - def wakeup (self, currentTime): - # Parent class handles discovery of exchange times and market_open wakeup call. - super().wakeup(currentTime) - - # Only if the superclass leaves the state as ACTIVE should we proceed with our - # trading strategy. - if self.state != 'ACTIVE': return - - if not self.prime_open: - # Ask our primary when it opens and closes, exchange is handled in TradingAgent - self.sendMessage(self.primeID, Message({ "msg" : "WHEN_PRIME_OPEN", "sender": self.id })) - self.sendMessage(self.primeID, Message({ "msg" : "WHEN_PRIME_CLOSE", "sender": self.id })) - - - # Steady state wakeup behavior starts here. - if not self.mkt_closed and self.prime_closed: - print('The prime closed before the exchange') - sys.exit() - - elif self.mkt_closed and self.prime_closed: - return - - # If we've been told the market has closed for the day, we will only request - # final price information, then stop. - # If the market has closed and we haven't obtained the daily close price yet, - # do that before we cease activity for the day. Don't do any other behavior - # after market close. - elif self.mkt_closed and not self.prime_closed: - if self.switched_mkt and self.currentTime >= self.prime_open: - self.getEtfNav() - self.state = 'AWAITING_NAV' - elif not self.switched_mkt: - for i,s in enumerate(self.portfolio): - if s not in self.daily_close_price: - self.getLastTrade(s) - self.state = 'AWAITING_LAST_TRADE' - if 'ETF' not in self.daily_close_price: - self.getLastTrade('ETF') - self.state = 'AWAITING_LAST_TRADE' - - print('holdings before primary: ' + str(self.holdings)) - - self.setWakeup(self.prime_open) - self.switched_mkt = True - else: - self.setWakeup(self.prime_open) - return - - # Schedule a wakeup for the next time this agent should arrive at the market - # (following the conclusion of its current activity cycle). - # We do this early in case some of our expected message responses don't arrive. - - # Agents should arrive according to a Poisson process. This is equivalent to - # each agent independently sampling its next arrival time from an exponential - # distribution in alternate Beta formation with Beta = 1 / lambda, where lambda - # is the mean arrival rate of the Poisson process. - else: - delta_time = self.random_state.exponential(scale = 1.0 / self.lambda_a) - self.setWakeup(currentTime + pd.Timedelta('{}ns'.format(int(round(delta_time))))) - - # Issue cancel requests for any open orders. Don't wait for confirmation, as presently - # the only reason it could fail is that the order already executed. (But requests won't - # be generated for those, anyway, unless something strange has happened.) - self.cancelOrders() - - - # The ETF arb agent DOES try to maintain a zero position, so there IS need to exit positions - # as some "active trading" agents might. It might exit a position based on its order logic, - # but this will be as a natural consequence of its beliefs... but it submits marketable orders so... - - - # If the calling agent is a subclass, don't initiate the strategy section of wakeup(), as it - # may want to do something different. - # FIGURE OUT WHAT TO DO WITH MULTIPLE SPREADS... - for i,s in enumerate(self.portfolio): - self.getCurrentSpread(s) - self.getCurrentSpread('ETF') - self.state = 'AWAITING_SPREAD' - - def placeOrder(self): - etf_mid, index_mid, etf_p, index_p, empty_mid = self.getPriceEstimates() - if empty_mid: - #print('no move because index or ETF was missing part of NBBO') - pass - elif (index_mid - etf_mid) > self.gamma: - #print('buy ETF') - for i,s in enumerate(self.portfolio): - self.placeLimitOrder(s, 1, False, index_p[s]['bid']) - self.placeLimitOrder('ETF', 1, True, etf_p['ask']) - elif (etf_mid - index_mid) > self.gamma: - #print('sell ETF') - for i,s in enumerate(self.portfolio): - self.placeLimitOrder(s, 1, True, index_p[s]['ask']) - self.placeLimitOrder('ETF', 1, False, etf_p['bid']) - else: - pass - #print('no move because abs(index - ETF mid) < gamma') - - def decideBasket(self): - print(self.portfolio) - index_est = 0 - for i,s in enumerate(self.portfolio): - index_est += self.daily_close_price[s] - - H = {} - for i,s in enumerate(self.portfolio): - H[s] = self.getHoldings(s) - etf_h = self.getHoldings('ETF') - - self.nav_diff = self.nav - index_est - if self.nav_diff > 0: - if min(H.values()) > 0 and etf_h < 0: - print("send creation basket") - self.placeBasketOrder(min(H.values()), True) - else: print('wrong side for basket') - elif self.nav_diff < 0: - if etf_h > 0 and max(H.values()) < 0: - print("submit redemption basket") - self.placeBasketOrder(etf_h, False) - else: print('wrong side for basket') - else: - if min(H.values()) > 0 and etf_h < 0: - print("send creation basket") - self.placeBasketOrder(min(H.values()), True) - elif etf_h > 0 and max(H.values()) < 0: - print("submit redemption basket") - self.placeBasketOrder(etf_h, False) - - - def receiveMessage(self, currentTime, msg): - # Parent class schedules market open wakeup call once market open/close times are known. - super().receiveMessage(currentTime, msg) - - # We have been awakened by something other than our scheduled wakeup. - # If our internal state indicates we were waiting for a particular event, - # check if we can transition to a new state. - - # Record market open or close times. - if msg.body['msg'] == "WHEN_PRIME_OPEN": - self.prime_open = msg.body['data'] - - log_print ("Recorded primary open: {}", self.kernel.fmtTime(self.prime_open)) - - elif msg.body['msg'] == "WHEN_PRIME_CLOSE": - self.prime_close = msg.body['data'] - - log_print ("Recorded primary close: {}", self.kernel.fmtTime(self.prime_close)) - - if self.state == 'AWAITING_NAV': - if msg.body['msg'] == 'QUERY_NAV': - if msg.body['prime_closed']: self.prime_closed = True - self.queryEtfNav(msg.body['nav']) - - # But if the market is now closed, don't advance to placing orders. - if self.prime_closed: return - - # We now have the information needed to place a C/R basket. - self.decideBasket() - - elif self.state == 'AWAITING_BASKET': - if msg.body['msg'] == 'BASKET_EXECUTED': - order = msg.body['order'] - # We now have the information needed to place a C/R basket. - for i,s in enumerate(self.portfolio): - if order.is_buy_order: self.holdings[s] -= order.quantity - else: self.holdings[s] += order.quantity - if order.is_buy_order: - self.holdings['ETF'] += order.quantity - self.basket_size = order.quantity + def __init__( + self, + id, + name, + type, + portfolio={}, + gamma=0, + starting_cash=100000, + lambda_a=0.005, + log_orders=False, + random_state=None, + ): + + # Base class init. + super().__init__( + id, + name, + type, + portfolio=portfolio, + gamma=gamma, + starting_cash=starting_cash, + lambda_a=lambda_a, + log_orders=log_orders, + random_state=random_state, + ) + + # NEED TO TEST IF THERE ARE SYMBOLS IN PORTFOLIO + + # Store important parameters particular to the ETF arbitrage agent. + self.inPrime = True # Determines if the agent also participates in the Primary ETF market + + # We don't yet know when the primary opens or closes. + self.prime_open = None + self.prime_close = None + + # Remember whether we have already passed the primary close time, as far + # as we know. + self.prime_closed = False + self.switched_mkt = False + + def kernelStarting(self, startTime): + # self.kernel is set in Agent.kernelInitializing() + # self.exchangeID is set in TradingAgent.kernelStarting() + + self.primeID = self.kernel.findAgentByType(EtfPrimaryAgent) + + log_print( + "Agent {} requested agent of type Agent.EtfPrimaryAgent. Given Agent ID: {}", + self.id, + self.primeID, + ) + + super().kernelStarting(startTime) + + def wakeup(self, currentTime): + # Parent class handles discovery of exchange times and market_open wakeup call. + super().wakeup(currentTime) + + # Only if the superclass leaves the state as ACTIVE should we proceed with our + # trading strategy. + if self.state != "ACTIVE": + return + + if not self.prime_open: + # Ask our primary when it opens and closes, exchange is handled in TradingAgent + self.sendMessage(self.primeID, Message({"msg": "WHEN_PRIME_OPEN", "sender": self.id})) + self.sendMessage(self.primeID, Message({"msg": "WHEN_PRIME_CLOSE", "sender": self.id})) + + # Steady state wakeup behavior starts here. + if not self.mkt_closed and self.prime_closed: + print("The prime closed before the exchange") + sys.exit() + + elif self.mkt_closed and self.prime_closed: + return + + # If we've been told the market has closed for the day, we will only request + # final price information, then stop. + # If the market has closed and we haven't obtained the daily close price yet, + # do that before we cease activity for the day. Don't do any other behavior + # after market close. + elif self.mkt_closed and not self.prime_closed: + if self.switched_mkt and self.currentTime >= self.prime_open: + self.getEtfNav() + self.state = "AWAITING_NAV" + elif not self.switched_mkt: + for i, s in enumerate(self.portfolio): + if s not in self.daily_close_price: + self.getLastTrade(s) + self.state = "AWAITING_LAST_TRADE" + if "ETF" not in self.daily_close_price: + self.getLastTrade("ETF") + self.state = "AWAITING_LAST_TRADE" + + print("holdings before primary: " + str(self.holdings)) + + self.setWakeup(self.prime_open) + self.switched_mkt = True + else: + self.setWakeup(self.prime_open) + return + + # Schedule a wakeup for the next time this agent should arrive at the market + # (following the conclusion of its current activity cycle). + # We do this early in case some of our expected message responses don't arrive. + + # Agents should arrive according to a Poisson process. This is equivalent to + # each agent independently sampling its next arrival time from an exponential + # distribution in alternate Beta formation with Beta = 1 / lambda, where lambda + # is the mean arrival rate of the Poisson process. else: - self.holdings['ETF'] -= order.quantity - self.basket_size = -1 * order.quantity - - self.state = 'INACTIVE' - - - # Internal state and logic specific to this agent subclass. - - # Used by any ETF Arb Agent subclass to query the Net Assest Value (NAV) of the ETF. - # This activity is not logged. - def getEtfNav (self): - self.sendMessage(self.primeID, Message({ "msg" : "QUERY_NAV", "sender": self.id })) - - # Used by ETF Arb Agent subclass to place a basket order. - # This activity is not logged. - def placeBasketOrder (self, quantity, is_create_order): - order = BasketOrder(self.id, self.currentTime, 'ETF', quantity, is_create_order) - print('BASKET ORDER PLACED: ' + str(order)) - self.sendMessage(self.primeID, Message({ "msg" : "BASKET_ORDER", "sender": self.id, - "order" : order })) - self.state = 'AWAITING_BASKET' - - # Handles QUERY NAV messages from primary - def queryEtfNav(self, nav): - self.nav = nav - log_print ("Received NAV of ETF.") \ No newline at end of file + delta_time = self.random_state.exponential(scale=1.0 / self.lambda_a) + self.setWakeup(currentTime + pd.Timedelta("{}ns".format(int(round(delta_time))))) + + # Issue cancel requests for any open orders. Don't wait for confirmation, as presently + # the only reason it could fail is that the order already executed. (But requests won't + # be generated for those, anyway, unless something strange has happened.) + self.cancelOrders() + + # The ETF arb agent DOES try to maintain a zero position, so there IS need to exit positions + # as some "active trading" agents might. It might exit a position based on its order logic, + # but this will be as a natural consequence of its beliefs... but it submits marketable orders so... + + # If the calling agent is a subclass, don't initiate the strategy section of wakeup(), as it + # may want to do something different. + # FIGURE OUT WHAT TO DO WITH MULTIPLE SPREADS... + for i, s in enumerate(self.portfolio): + self.getCurrentSpread(s) + self.getCurrentSpread("ETF") + self.state = "AWAITING_SPREAD" + + def placeOrder(self): + etf_mid, index_mid, etf_p, index_p, empty_mid = self.getPriceEstimates() + if empty_mid: + # print('no move because index or ETF was missing part of NBBO') + pass + elif (index_mid - etf_mid) > self.gamma: + # print('buy ETF') + for i, s in enumerate(self.portfolio): + self.placeLimitOrder(s, 1, False, index_p[s]["bid"]) + self.placeLimitOrder("ETF", 1, True, etf_p["ask"]) + elif (etf_mid - index_mid) > self.gamma: + # print('sell ETF') + for i, s in enumerate(self.portfolio): + self.placeLimitOrder(s, 1, True, index_p[s]["ask"]) + self.placeLimitOrder("ETF", 1, False, etf_p["bid"]) + else: + pass + # print('no move because abs(index - ETF mid) < gamma') + + def decideBasket(self): + print(self.portfolio) + index_est = 0 + for i, s in enumerate(self.portfolio): + index_est += self.daily_close_price[s] + + H = {} + for i, s in enumerate(self.portfolio): + H[s] = self.getHoldings(s) + etf_h = self.getHoldings("ETF") + + self.nav_diff = self.nav - index_est + if self.nav_diff > 0: + if min(H.values()) > 0 and etf_h < 0: + print("send creation basket") + self.placeBasketOrder(min(H.values()), True) + else: + print("wrong side for basket") + elif self.nav_diff < 0: + if etf_h > 0 and max(H.values()) < 0: + print("submit redemption basket") + self.placeBasketOrder(etf_h, False) + else: + print("wrong side for basket") + else: + if min(H.values()) > 0 and etf_h < 0: + print("send creation basket") + self.placeBasketOrder(min(H.values()), True) + elif etf_h > 0 and max(H.values()) < 0: + print("submit redemption basket") + self.placeBasketOrder(etf_h, False) + + def receiveMessage(self, currentTime, msg): + # Parent class schedules market open wakeup call once market open/close times are known. + super().receiveMessage(currentTime, msg) + + # We have been awakened by something other than our scheduled wakeup. + # If our internal state indicates we were waiting for a particular event, + # check if we can transition to a new state. + + # Record market open or close times. + if msg.body["msg"] == "WHEN_PRIME_OPEN": + self.prime_open = msg.body["data"] + + log_print("Recorded primary open: {}", self.kernel.fmtTime(self.prime_open)) + + elif msg.body["msg"] == "WHEN_PRIME_CLOSE": + self.prime_close = msg.body["data"] + + log_print("Recorded primary close: {}", self.kernel.fmtTime(self.prime_close)) + + if self.state == "AWAITING_NAV": + if msg.body["msg"] == "QUERY_NAV": + if msg.body["prime_closed"]: + self.prime_closed = True + self.queryEtfNav(msg.body["nav"]) + + # But if the market is now closed, don't advance to placing orders. + if self.prime_closed: + return + + # We now have the information needed to place a C/R basket. + self.decideBasket() + + elif self.state == "AWAITING_BASKET": + if msg.body["msg"] == "BASKET_EXECUTED": + order = msg.body["order"] + # We now have the information needed to place a C/R basket. + for i, s in enumerate(self.portfolio): + if order.is_buy_order: + self.holdings[s] -= order.quantity + else: + self.holdings[s] += order.quantity + if order.is_buy_order: + self.holdings["ETF"] += order.quantity + self.basket_size = order.quantity + else: + self.holdings["ETF"] -= order.quantity + self.basket_size = -1 * order.quantity + + self.state = "INACTIVE" + + # Internal state and logic specific to this agent subclass. + + # Used by any ETF Arb Agent subclass to query the Net Assest Value (NAV) of the ETF. + # This activity is not logged. + def getEtfNav(self): + self.sendMessage(self.primeID, Message({"msg": "QUERY_NAV", "sender": self.id})) + + # Used by ETF Arb Agent subclass to place a basket order. + # This activity is not logged. + def placeBasketOrder(self, quantity, is_create_order): + order = BasketOrder(self.id, self.currentTime, "ETF", quantity, is_create_order) + print("BASKET ORDER PLACED: " + str(order)) + self.sendMessage( + self.primeID, + Message({"msg": "BASKET_ORDER", "sender": self.id, "order": order}), + ) + self.state = "AWAITING_BASKET" + + # Handles QUERY NAV messages from primary + def queryEtfNav(self, nav): + self.nav = nav + log_print("Received NAV of ETF.") diff --git a/agent/etf/EtfPrimaryAgent.py b/agent/etf/EtfPrimaryAgent.py index eecb12bfc..1ab085ba7 100644 --- a/agent/etf/EtfPrimaryAgent.py +++ b/agent/etf/EtfPrimaryAgent.py @@ -4,163 +4,210 @@ # the levels of order stream history to maintain per symbol (maintains all orders that led to the last N trades), # whether to log all order activity to the agent log, and a random state object (already seeded) to use # for stochasticity. -from agent.FinancialAgent import FinancialAgent +import pandas as pd + from agent.ExchangeAgent import ExchangeAgent +from agent.FinancialAgent import FinancialAgent from message.Message import Message from util.util import log_print -import pandas as pd -pd.set_option('display.max_rows', 500) +pd.set_option("display.max_rows", 500) class EtfPrimaryAgent(FinancialAgent): - def __init__(self, id, name, type, prime_open, prime_close, symbol, pipeline_delay = 40000, - computation_delay = 1, random_state = None): - - super().__init__(id, name, type, random_state) - - # Do not request repeated wakeup calls. - self.reschedule = False - - # Store this exchange's open and close times. - self.prime_open = prime_open - self.prime_close = prime_close - - self.mkt_close = None - - self.nav = 0 - self.create = 0 - self.redeem = 0 - - self.symbol = symbol - - # Right now, only the exchange agent has a parallel processing pipeline delay. This is an additional - # delay added only to order activity (placing orders, etc) and not simple inquiries (market operating - # hours, etc). - self.pipeline_delay = pipeline_delay - - # Computation delay is applied on every wakeup call or message received. - self.computation_delay = computation_delay - - def kernelStarting(self, startTime): - # Find an exchange with which we can place orders. It is guaranteed - # to exist by now (if there is one). - self.exchangeID = self.kernel.findAgentByType(ExchangeAgent) - - log_print ("Agent {} requested agent of type Agent.ExchangeAgent. Given Agent ID: {}", - self.id, self.exchangeID) - - # Request a wake-up call as in the base Agent. - super().kernelStarting(startTime) - - - def kernelStopping (self): - # Always call parent method to be safe. - super().kernelStopping() - - print ("Final C/R baskets for {}: {} creation baskets. {} redemption baskets".format(self.name, - self.create, self.redeem)) - - - # Simulation participation messages. - - def wakeup (self, currentTime): - super().wakeup(currentTime) - - if self.mkt_close is None: - # Ask our exchange when it opens and closes. - self.sendMessage(self.exchangeID, Message({ "msg" : "WHEN_MKT_CLOSE", "sender": self.id })) - - else: - # Get close price of ETF/nav - self.getLastTrade(self.symbol) - - def receiveMessage (self, currentTime, msg): - super().receiveMessage(currentTime, msg) - - # Unless the intent of an experiment is to examine computational issues within an Exchange, - # it will typically have either 1 ns delay (near instant but cannot process multiple orders - # in the same atomic time unit) or 0 ns delay (can process any number of orders, always in - # the atomic time unit in which they are received). This is separate from, and additional - # to, any parallel pipeline delay imposed for order book activity. - - # Note that computation delay MUST be updated before any calls to sendMessage. - self.setComputationDelay(self.computation_delay) - - # Is the exchange closed? (This block only affects post-close, not pre-open.) - if currentTime > self.prime_close: - # Most messages after close will receive a 'PRIME_CLOSED' message in response. - log_print ("{} received {}, discarded: prime is closed.", self.name, msg.body['msg']) - self.sendMessage(msg.body['sender'], Message({ "msg": "PRIME_CLOSED" })) - # Don't do any further processing on these messages! - return - - - if msg.body['msg'] == "WHEN_MKT_CLOSE": - self.mkt_close = msg.body['data'] - log_print ("Recorded market close: {}", self.kernel.fmtTime(self.mkt_close)) - self.setWakeup(self.mkt_close) - return - - elif msg.body['msg'] == 'QUERY_LAST_TRADE': - # Call the queryLastTrade method. - self.queryLastTrade(msg.body['symbol'], msg.body['data']) - return - - self.logEvent(msg.body['msg'], msg.body['sender']) - - # Handle all message types understood by this exchange. - if msg.body['msg'] == "WHEN_PRIME_OPEN": - log_print ("{} received WHEN_PRIME_OPEN request from agent {}", self.name, msg.body['sender']) - - # The exchange is permitted to respond to requests for simple immutable data (like "what are your - # hours?") instantly. This does NOT include anything that queries mutable data, like equity - # quotes or trades. - self.setComputationDelay(0) - - self.sendMessage(msg.body['sender'], Message({ "msg": "WHEN_PRIME_OPEN", "data": self.prime_open })) - - elif msg.body['msg'] == "WHEN_PRIME_CLOSE": - log_print ("{} received WHEN_PRIME_CLOSE request from agent {}", self.name, msg.body['sender']) - - # The exchange is permitted to respond to requests for simple immutable data (like "what are your - # hours?") instantly. This does NOT include anything that queries mutable data, like equity - # quotes or trades. - self.setComputationDelay(0) - - self.sendMessage(msg.body['sender'], Message({ "msg": "WHEN_PRIME_CLOSE", "data": self.prime_close })) - - elif msg.body['msg'] == "QUERY_NAV": - log_print ("{} received QUERY_NAV ({}) request from agent {}", self.name, msg.body['sender']) - - # Return the NAV for the requested symbol. - self.sendMessage(msg.body['sender'], Message({ "msg": "QUERY_NAV", - "nav": self.nav, "prime_closed": True if currentTime > self.prime_close else False })) - - elif msg.body['msg'] == "BASKET_ORDER": - order = msg.body['order'] - log_print ("{} received BASKET_ORDER: {}", self.name, order) - if order.is_buy_order: self.create += 1 - else: self.redeem += 1 - order.fill_price = self.nav - self.sendMessage(msg.body['sender'], Message({ "msg": "BASKET_EXECUTED", "order": order})) - - - # Handles QUERY_LAST_TRADE messages from an exchange agent. - def queryLastTrade (self, symbol, price): - self.nav = price - log_print ("Received daily close price or nav of {} for {}.", price, symbol) - - # Used by any Trading Agent subclass to query the last trade price for a symbol. - # This activity is not logged. - def getLastTrade (self, symbol): - self.sendMessage(self.exchangeID, Message({ "msg" : "QUERY_LAST_TRADE", "sender": self.id, - "symbol" : symbol })) - - # Simple accessor methods for the market open and close times. - def getPrimeOpen(self): - return self.__prime_open - - def getPrimeClose(self): - return self.__prime_close \ No newline at end of file + def __init__( + self, + id, + name, + type, + prime_open, + prime_close, + symbol, + pipeline_delay=40000, + computation_delay=1, + random_state=None, + ): + + super().__init__(id, name, type, random_state) + + # Do not request repeated wakeup calls. + self.reschedule = False + + # Store this exchange's open and close times. + self.prime_open = prime_open + self.prime_close = prime_close + + self.mkt_close = None + + self.nav = 0 + self.create = 0 + self.redeem = 0 + + self.symbol = symbol + + # Right now, only the exchange agent has a parallel processing pipeline delay. This is an additional + # delay added only to order activity (placing orders, etc) and not simple inquiries (market operating + # hours, etc). + self.pipeline_delay = pipeline_delay + + # Computation delay is applied on every wakeup call or message received. + self.computation_delay = computation_delay + + def kernelStarting(self, startTime): + # Find an exchange with which we can place orders. It is guaranteed + # to exist by now (if there is one). + self.exchangeID = self.kernel.findAgentByType(ExchangeAgent) + + log_print( + "Agent {} requested agent of type Agent.ExchangeAgent. Given Agent ID: {}", + self.id, + self.exchangeID, + ) + + # Request a wake-up call as in the base Agent. + super().kernelStarting(startTime) + + def kernelStopping(self): + # Always call parent method to be safe. + super().kernelStopping() + + print( + "Final C/R baskets for {}: {} creation baskets. {} redemption baskets".format( + self.name, self.create, self.redeem + ) + ) + + # Simulation participation messages. + + def wakeup(self, currentTime): + super().wakeup(currentTime) + + if self.mkt_close is None: + # Ask our exchange when it opens and closes. + self.sendMessage(self.exchangeID, Message({"msg": "WHEN_MKT_CLOSE", "sender": self.id})) + + else: + # Get close price of ETF/nav + self.getLastTrade(self.symbol) + + def receiveMessage(self, currentTime, msg): + super().receiveMessage(currentTime, msg) + + # Unless the intent of an experiment is to examine computational issues within an Exchange, + # it will typically have either 1 ns delay (near instant but cannot process multiple orders + # in the same atomic time unit) or 0 ns delay (can process any number of orders, always in + # the atomic time unit in which they are received). This is separate from, and additional + # to, any parallel pipeline delay imposed for order book activity. + + # Note that computation delay MUST be updated before any calls to sendMessage. + self.setComputationDelay(self.computation_delay) + + # Is the exchange closed? (This block only affects post-close, not pre-open.) + if currentTime > self.prime_close: + # Most messages after close will receive a 'PRIME_CLOSED' message in response. + log_print( + "{} received {}, discarded: prime is closed.", + self.name, + msg.body["msg"], + ) + self.sendMessage(msg.body["sender"], Message({"msg": "PRIME_CLOSED"})) + # Don't do any further processing on these messages! + return + + if msg.body["msg"] == "WHEN_MKT_CLOSE": + self.mkt_close = msg.body["data"] + log_print("Recorded market close: {}", self.kernel.fmtTime(self.mkt_close)) + self.setWakeup(self.mkt_close) + return + + elif msg.body["msg"] == "QUERY_LAST_TRADE": + # Call the queryLastTrade method. + self.queryLastTrade(msg.body["symbol"], msg.body["data"]) + return + + self.logEvent(msg.body["msg"], msg.body["sender"]) + + # Handle all message types understood by this exchange. + if msg.body["msg"] == "WHEN_PRIME_OPEN": + log_print( + "{} received WHEN_PRIME_OPEN request from agent {}", + self.name, + msg.body["sender"], + ) + + # The exchange is permitted to respond to requests for simple immutable data (like "what are your + # hours?") instantly. This does NOT include anything that queries mutable data, like equity + # quotes or trades. + self.setComputationDelay(0) + + self.sendMessage( + msg.body["sender"], + Message({"msg": "WHEN_PRIME_OPEN", "data": self.prime_open}), + ) + + elif msg.body["msg"] == "WHEN_PRIME_CLOSE": + log_print( + "{} received WHEN_PRIME_CLOSE request from agent {}", + self.name, + msg.body["sender"], + ) + + # The exchange is permitted to respond to requests for simple immutable data (like "what are your + # hours?") instantly. This does NOT include anything that queries mutable data, like equity + # quotes or trades. + self.setComputationDelay(0) + + self.sendMessage( + msg.body["sender"], + Message({"msg": "WHEN_PRIME_CLOSE", "data": self.prime_close}), + ) + + elif msg.body["msg"] == "QUERY_NAV": + log_print( + "{} received QUERY_NAV ({}) request from agent {}", + self.name, + msg.body["sender"], + ) + + # Return the NAV for the requested symbol. + self.sendMessage( + msg.body["sender"], + Message( + { + "msg": "QUERY_NAV", + "nav": self.nav, + "prime_closed": (True if currentTime > self.prime_close else False), + } + ), + ) + + elif msg.body["msg"] == "BASKET_ORDER": + order = msg.body["order"] + log_print("{} received BASKET_ORDER: {}", self.name, order) + if order.is_buy_order: + self.create += 1 + else: + self.redeem += 1 + order.fill_price = self.nav + self.sendMessage(msg.body["sender"], Message({"msg": "BASKET_EXECUTED", "order": order})) + + # Handles QUERY_LAST_TRADE messages from an exchange agent. + def queryLastTrade(self, symbol, price): + self.nav = price + log_print("Received daily close price or nav of {} for {}.", price, symbol) + + # Used by any Trading Agent subclass to query the last trade price for a symbol. + # This activity is not logged. + def getLastTrade(self, symbol): + self.sendMessage( + self.exchangeID, + Message({"msg": "QUERY_LAST_TRADE", "sender": self.id, "symbol": symbol}), + ) + + # Simple accessor methods for the market open and close times. + def getPrimeOpen(self): + return self.__prime_open + + def getPrimeClose(self): + return self.__prime_close diff --git a/agent/examples/ExampleExperimentalAgent.py b/agent/examples/ExampleExperimentalAgent.py index 750ee5054..73131291c 100644 --- a/agent/examples/ExampleExperimentalAgent.py +++ b/agent/examples/ExampleExperimentalAgent.py @@ -1,13 +1,26 @@ -from agent.examples.SubscriptionAgent import SubscriptionAgent -import pandas as pd from copy import deepcopy +import pandas as pd + +from agent.examples.SubscriptionAgent import SubscriptionAgent + class ExampleExperimentalAgentTemplate(SubscriptionAgent): - """ Minimal working template for an experimental trading agent - """ - def __init__(self, id, name, type, symbol, starting_cash, levels, subscription_freq, log_orders=False, random_state=None): - """ Constructor for ExampleExperimentalAgentTemplate. + """Minimal working template for an experimental trading agent""" + + def __init__( + self, + id, + name, + type, + symbol, + starting_cash, + levels, + subscription_freq, + log_orders=False, + random_state=None, + ): + """Constructor for ExampleExperimentalAgentTemplate. :param id: Agent's ID as set in config :param name: Agent's human-readable name as set in config @@ -19,21 +32,31 @@ def __init__(self, id, name, type, symbol, starting_cash, levels, subscription_f :param log_orders: bool to decide if agent's individual actions logged to file. :param random_state: numpy RandomState object from which agent derives randomness """ - super().__init__(id, name, type, symbol, starting_cash, levels, subscription_freq, log_orders=log_orders, random_state=random_state) + super().__init__( + id, + name, + type, + symbol, + starting_cash, + levels, + subscription_freq, + log_orders=log_orders, + random_state=random_state, + ) self.current_bids = None # subscription to market data populates this list self.current_asks = None # subscription to market data populates this list def wakeup(self, currentTime): - """ Action to be taken by agent at each wakeup. + """Action to be taken by agent at each wakeup. - :param currentTime: pd.Timestamp for current simulation time + :param currentTime: pd.Timestamp for current simulation time """ super().wakeup(currentTime) self.setWakeup(currentTime + self.getWakeFrequency()) def receiveMessage(self, currentTime, msg): - """ Action taken when agent receives a message from the exchange + """Action taken when agent receives a message from the exchange :param currentTime: pd.Timestamp for current simulation time :param msg: message from exchange @@ -42,29 +65,28 @@ def receiveMessage(self, currentTime, msg): super().receiveMessage(currentTime, msg) # receives subscription market data def getWakeFrequency(self): - """ Set next wakeup time for agent. """ + """Set next wakeup time for agent.""" return pd.Timedelta("1min") def placeLimitOrder(self, quantity, is_buy_order, limit_price): - """ Place a limit order at the exchange. - :param quantity (int): order quantity - :param is_buy_order (bool): True if Buy else False - :param limit_price: price level at which to place a limit order - :return: + """Place a limit order at the exchange. + :param quantity (int): order quantity + :param is_buy_order (bool): True if Buy else False + :param limit_price: price level at which to place a limit order + :return: """ super().placeLimitOrder(self.symbol, quantity, is_buy_order, limit_price) def placeMarketOrder(self, quantity, is_buy_order): - """ Place a market order at the exchange. - :param quantity (int): order quantity - :param is_buy_order (bool): True if Buy else False - :return: + """Place a market order at the exchange. + :param quantity (int): order quantity + :param is_buy_order (bool): True if Buy else False + :return: """ super().placeMarketOrder(self.symbol, quantity, is_buy_order) def cancelAllOrders(self): - """ Cancels all resting limit orders placed by the experimental agent. - """ + """Cancels all resting limit orders placed by the experimental agent.""" for _, order in self.orders.items(): self.cancelOrder(order) @@ -85,10 +107,10 @@ def __init__(self, *args, wake_freq, order_size, short_window, long_window, **kw self.order_size = order_size self.short_window = short_window self.long_window = long_window - self.mid_price_history = pd.DataFrame(columns=['mid_price'], index=pd.to_datetime([])) + self.mid_price_history = pd.DataFrame(columns=["mid_price"], index=pd.to_datetime([])) def getCurrentMidPrice(self): - """ Retrieve mid price from most recent subscription data. + """Retrieve mid price from most recent subscription data. :return: """ @@ -101,7 +123,7 @@ def getCurrentMidPrice(self): return None def receiveMessage(self, currentTime, msg): - """ Action taken when agent receives a message from the exchange -- action here is for agent to update internal + """Action taken when agent receives a message from the exchange -- action here is for agent to update internal log of most recently observed mid-price. :param currentTime: pd.Timestamp for current simulation time @@ -110,24 +132,25 @@ def receiveMessage(self, currentTime, msg): """ super().receiveMessage(currentTime, msg) # receives subscription market data self.mid_price_history = self.mid_price_history.append( - pd.Series({'mid_price': self.getCurrentMidPrice()}, name=currentTime)) + pd.Series({"mid_price": self.getCurrentMidPrice()}, name=currentTime) + ) self.mid_price_history.dropna(inplace=True) def computeMidPriceMovingAverages(self): - """ Returns the short-window and long-window moving averages of mid price. + """Returns the short-window and long-window moving averages of mid price. :return: """ try: - short_moving_avg = self.mid_price_history.rolling(self.short_window).mean().iloc[-1]['mid_price'] - long_moving_avg = self.mid_price_history.rolling(self.long_window).mean().iloc[-1]['mid_price'] + short_moving_avg = self.mid_price_history.rolling(self.short_window).mean().iloc[-1]["mid_price"] + long_moving_avg = self.mid_price_history.rolling(self.long_window).mean().iloc[-1]["mid_price"] return short_moving_avg, long_moving_avg except IndexError: return None, None def wakeup(self, currentTime): - """ Action to be taken by agent at each wakeup. + """Action to be taken by agent at each wakeup. - :param currentTime: pd.Timestamp for current simulation time + :param currentTime: pd.Timestamp for current simulation time """ super().wakeup(currentTime) short_moving_avg, long_moving_avg = self.computeMidPriceMovingAverages() @@ -138,9 +161,5 @@ def wakeup(self, currentTime): self.placeMarketOrder(self.order_size, 1) def getWakeFrequency(self): - """ Set next wakeup time for agent. """ + """Set next wakeup time for agent.""" return pd.Timedelta(self.wake_freq) - - - - diff --git a/agent/examples/ImpactAgent.py b/agent/examples/ImpactAgent.py index 5f88b20a1..2750b2bd0 100644 --- a/agent/examples/ImpactAgent.py +++ b/agent/examples/ImpactAgent.py @@ -1,160 +1,171 @@ -from agent.TradingAgent import TradingAgent - import pandas as pd +from agent.TradingAgent import TradingAgent -class ImpactAgent(TradingAgent): - - def __init__(self, id, name, type, symbol = None, starting_cash = None, greed = None, within = 0.01, - impact = True, impact_time = None, random_state = None): - # Base class init. - super().__init__(id, name, type, starting_cash = starting_cash, random_state = random_state) - - self.symbol = symbol # symbol to trade - self.trading = False # ready to trade - self.traded = False # has made its one trade - - # The amount of available "nearby" liquidity to consume when placing its order. - self.greed = greed # trade this proportion of liquidity - self.within = within # within this range of the inside price - - # When should we make the impact trade? - self.impact_time = impact_time - - # The agent begins in its "complete" state, not waiting for - # any special event or condition. - self.state = 'AWAITING_WAKEUP' - - # Controls whether the impact trade is actually placed. - self.impact = impact - - - def wakeup (self, currentTime): - # Parent class handles discovery of exchange times and market_open wakeup call. - super().wakeup(currentTime) - - if not self.mkt_open or not self.mkt_close: - # TradingAgent handles discovery of exchange times. - return - else: - if not self.trading: - self.trading = True - - # Time to start trading! - print ("{} is ready to start trading now.".format(self.name)) - - - # Steady state wakeup behavior starts here. - - # First, see if we have received a MKT_CLOSED message for the day. If so, - # there's nothing to do except clean-up. - if self.mkt_closed and (self.symbol in self.daily_close_price): - # Market is closed and we already got the daily close price. - return - - - ### Impact agent operates at a specific time. - if currentTime < self.impact_time: - print ("Impact agent waiting for impact_time {}".format(self.impact_time)) - self.setWakeup(self.impact_time) - return - - - ### The impact agent only trades once, but we will monitor prices for - ### the sake of performance. - self.setWakeup(currentTime + pd.Timedelta('30m')) - - - # If the market is closed and we haven't obtained the daily close price yet, - # do that before we cease activity for the day. Don't do any other behavior - # after market close. - # - # Also, if we already made our one trade, do nothing except monitor prices. - if self.traded or (self.mkt_closed and (not self.symbol in self.daily_close_price)): - self.getLastTrade() - self.state = 'AWAITING_LAST_TRADE' - return - - - # The impact agent will place one order based on the current spread. - self.getCurrentSpread() - self.state = 'AWAITING_SPREAD' - - - def receiveMessage (self, currentTime, msg): - # Parent class schedules market open wakeup call once market open/close times are known. - super().receiveMessage(currentTime, msg) - - # We have been awakened by something other than our scheduled wakeup. - # If our internal state indicates we were waiting for a particular event, - # check if we can transition to a new state. - - if self.state == 'AWAITING_SPREAD': - # We were waiting for current spread information to make our trade. - # If the message we just received is QUERY_SPREAD, that means we just got it. - if msg.body['msg'] == 'QUERY_SPREAD': - # Place our one trade. - bid, bid_vol, ask, ask_vol = self.getKnownBidAsk(self.symbol) - bid_liq, ask_liq = self.getKnownLiquidity(self.symbol, within=self.within) - - # Buy order. - direction, shares, price = True, int(round(ask_liq * self.greed)), ask - - # Sell order. This should be a parameter, but isn't yet. - #direction, shares, price = False, int(round(bid_liq * self.greed)), bid - # Compute the limit price we must offer to ensure our order executes immediately. - # This is essentially a workaround for the lack of true market orders in our - # current simulation. - price = self.computeRequiredPrice(direction, shares) +class ImpactAgent(TradingAgent): - # Actually place the order only if self.impact is true. - if self.impact: - print ("Impact agent firing: {} {} @ {}".format('BUY' if direction else 'SELL', shares, self.dollarize(price))) - self.placeLimitOrder (self.symbol, shares, direction, price) + def __init__( + self, + id, + name, + type, + symbol=None, + starting_cash=None, + greed=None, + within=0.01, + impact=True, + impact_time=None, + random_state=None, + ): + # Base class init. + super().__init__(id, name, type, starting_cash=starting_cash, random_state=random_state) + + self.symbol = symbol # symbol to trade + self.trading = False # ready to trade + self.traded = False # has made its one trade + + # The amount of available "nearby" liquidity to consume when placing its order. + self.greed = greed # trade this proportion of liquidity + self.within = within # within this range of the inside price + + # When should we make the impact trade? + self.impact_time = impact_time + + # The agent begins in its "complete" state, not waiting for + # any special event or condition. + self.state = "AWAITING_WAKEUP" + + # Controls whether the impact trade is actually placed. + self.impact = impact + + def wakeup(self, currentTime): + # Parent class handles discovery of exchange times and market_open wakeup call. + super().wakeup(currentTime) + + if not self.mkt_open or not self.mkt_close: + # TradingAgent handles discovery of exchange times. + return else: - print ("Impact agent would fire: {} {} @ {} (but self.impact = False)".format('BUY' if direction else 'SELL', shares, self.dollarize(price))) - - self.traded = True - self.state = 'AWAITING_WAKEUP' - - - # Internal state and logic specific to this agent. - - def placeLimitOrder (self, symbol, quantity, is_buy_order, limit_price): - super().placeLimitOrder(symbol, quantity, is_buy_order, limit_price, ignore_risk = True) - - # Computes required limit price to immediately execute a trade for the specified quantity - # of shares. - def computeRequiredPrice (self, direction, shares): - book = self.known_asks[self.symbol] if direction else self.known_bids[self.symbol] - - # Start at the inside and add up the shares. - t = 0 - - for i in range(len(book)): - p,v = book[i] - t += v - - # If we have accumulated enough shares, return this price. - if t >= shares: return p - - # Not enough shares. Just return worst price (highest ask, lowest bid). - return book[-1][0] - - - # Request the last trade price for our symbol. - def getLastTrade (self): - super().getLastTrade(self.symbol) - - - # Request the spread for our symbol. - def getCurrentSpread (self): - # Impact agent gets depth 10000 on each side (probably everything). - super().getCurrentSpread(self.symbol, 10000) - - - def getWakeFrequency (self): - return (pd.Timedelta('1ns')) - - + if not self.trading: + self.trading = True + + # Time to start trading! + print("{} is ready to start trading now.".format(self.name)) + + # Steady state wakeup behavior starts here. + + # First, see if we have received a MKT_CLOSED message for the day. If so, + # there's nothing to do except clean-up. + if self.mkt_closed and (self.symbol in self.daily_close_price): + # Market is closed and we already got the daily close price. + return + + ### Impact agent operates at a specific time. + if currentTime < self.impact_time: + print("Impact agent waiting for impact_time {}".format(self.impact_time)) + self.setWakeup(self.impact_time) + return + + ### The impact agent only trades once, but we will monitor prices for + ### the sake of performance. + self.setWakeup(currentTime + pd.Timedelta("30m")) + + # If the market is closed and we haven't obtained the daily close price yet, + # do that before we cease activity for the day. Don't do any other behavior + # after market close. + # + # Also, if we already made our one trade, do nothing except monitor prices. + if self.traded or (self.mkt_closed and (not self.symbol in self.daily_close_price)): + self.getLastTrade() + self.state = "AWAITING_LAST_TRADE" + return + + # The impact agent will place one order based on the current spread. + self.getCurrentSpread() + self.state = "AWAITING_SPREAD" + + def receiveMessage(self, currentTime, msg): + # Parent class schedules market open wakeup call once market open/close times are known. + super().receiveMessage(currentTime, msg) + + # We have been awakened by something other than our scheduled wakeup. + # If our internal state indicates we were waiting for a particular event, + # check if we can transition to a new state. + + if self.state == "AWAITING_SPREAD": + # We were waiting for current spread information to make our trade. + # If the message we just received is QUERY_SPREAD, that means we just got it. + if msg.body["msg"] == "QUERY_SPREAD": + # Place our one trade. + bid, bid_vol, ask, ask_vol = self.getKnownBidAsk(self.symbol) + bid_liq, ask_liq = self.getKnownLiquidity(self.symbol, within=self.within) + + # Buy order. + direction, shares, price = True, int(round(ask_liq * self.greed)), ask + + # Sell order. This should be a parameter, but isn't yet. + # direction, shares, price = False, int(round(bid_liq * self.greed)), bid + + # Compute the limit price we must offer to ensure our order executes immediately. + # This is essentially a workaround for the lack of true market orders in our + # current simulation. + price = self.computeRequiredPrice(direction, shares) + + # Actually place the order only if self.impact is true. + if self.impact: + print( + "Impact agent firing: {} {} @ {}".format( + "BUY" if direction else "SELL", + shares, + self.dollarize(price), + ) + ) + self.placeLimitOrder(self.symbol, shares, direction, price) + else: + print( + "Impact agent would fire: {} {} @ {} (but self.impact = False)".format( + "BUY" if direction else "SELL", + shares, + self.dollarize(price), + ) + ) + + self.traded = True + self.state = "AWAITING_WAKEUP" + + # Internal state and logic specific to this agent. + + def placeLimitOrder(self, symbol, quantity, is_buy_order, limit_price): + super().placeLimitOrder(symbol, quantity, is_buy_order, limit_price, ignore_risk=True) + + # Computes required limit price to immediately execute a trade for the specified quantity + # of shares. + def computeRequiredPrice(self, direction, shares): + book = self.known_asks[self.symbol] if direction else self.known_bids[self.symbol] + + # Start at the inside and add up the shares. + t = 0 + + for i in range(len(book)): + p, v = book[i] + t += v + + # If we have accumulated enough shares, return this price. + if t >= shares: + return p + + # Not enough shares. Just return worst price (highest ask, lowest bid). + return book[-1][0] + + # Request the last trade price for our symbol. + def getLastTrade(self): + super().getLastTrade(self.symbol) + + # Request the spread for our symbol. + def getCurrentSpread(self): + # Impact agent gets depth 10000 on each side (probably everything). + super().getCurrentSpread(self.symbol, 10000) + + def getWakeFrequency(self): + return pd.Timedelta("1ns") diff --git a/agent/examples/MarketReplayAgent.py b/agent/examples/MarketReplayAgent.py index e2aceef02..e96ebee59 100644 --- a/agent/examples/MarketReplayAgent.py +++ b/agent/examples/MarketReplayAgent.py @@ -1,29 +1,55 @@ -import pickle import os.path +import pickle from datetime import datetime + import pandas as pd -#from joblib import Memory from agent.TradingAgent import TradingAgent from util.order.LimitOrder import LimitOrder from util.util import log_print +# from joblib import Memory + class MarketReplayAgent(TradingAgent): - def __init__(self, id, name, type, symbol, date, start_time, end_time, - orders_file_path, processed_orders_folder_path, - starting_cash, log_orders=False, random_state=None): - super().__init__(id, name, type, starting_cash=starting_cash, log_orders=log_orders, random_state=random_state) + def __init__( + self, + id, + name, + type, + symbol, + date, + start_time, + end_time, + orders_file_path, + processed_orders_folder_path, + starting_cash, + log_orders=False, + random_state=None, + ): + super().__init__( + id, + name, + type, + starting_cash=starting_cash, + log_orders=log_orders, + random_state=random_state, + ) self.symbol = symbol self.date = date self.log_orders = log_orders self.executed_trades = dict() - self.state = 'AWAITING_WAKEUP' - - self.historical_orders = L3OrdersProcessor(self.symbol, - self.date, start_time, end_time, - orders_file_path, processed_orders_folder_path) + self.state = "AWAITING_WAKEUP" + + self.historical_orders = L3OrdersProcessor( + self.symbol, + self.date, + start_time, + end_time, + orders_file_path, + processed_orders_folder_path, + ) self.wakeup_times = self.historical_orders.wakeup_times def wakeup(self, currentTime): @@ -39,25 +65,39 @@ def wakeup(self, currentTime): def receiveMessage(self, currentTime, msg): super().receiveMessage(currentTime, msg) - if msg.body['msg'] == 'ORDER_EXECUTED': - order = msg.body['order'] + if msg.body["msg"] == "ORDER_EXECUTED": + order = msg.body["order"] self.executed_trades[currentTime] = [order.fill_price, order.quantity] self.last_trade[self.symbol] = order.fill_price def placeOrder(self, currentTime, order): if len(order) == 1: order = order[0] - order_id = order['ORDER_ID'] + order_id = order["ORDER_ID"] existing_order = self.orders.get(order_id) - if not existing_order and order['SIZE'] > 0: - self.placeLimitOrder(self.symbol, order['SIZE'], order['BUY_SELL_FLAG'] == 'BUY', order['PRICE'], - order_id=order_id) - elif existing_order and order['SIZE'] == 0: + if not existing_order and order["SIZE"] > 0: + self.placeLimitOrder( + self.symbol, + order["SIZE"], + order["BUY_SELL_FLAG"] == "BUY", + order["PRICE"], + order_id=order_id, + ) + elif existing_order and order["SIZE"] == 0: self.cancelOrder(existing_order) elif existing_order: - self.modifyOrder(existing_order, LimitOrder(self.id, currentTime, self.symbol, order['SIZE'], - order['BUY_SELL_FLAG'] == 'BUY', order['PRICE'], - order_id=order_id)) + self.modifyOrder( + existing_order, + LimitOrder( + self.id, + currentTime, + self.symbol, + order["SIZE"], + order["BUY_SELL_FLAG"] == "BUY", + order["PRICE"], + order_id=order_id, + ), + ) else: for ind_order in order: self.placeOrder(currentTime, order=[ind_order]) @@ -67,15 +107,23 @@ def getWakeFrequency(self): return self.historical_orders.first_wakeup - self.mkt_open -#mem = Memory(cachedir='./cache', verbose=0) +# mem = Memory(cachedir='./cache', verbose=0) class L3OrdersProcessor: - COLUMNS = ['TIMESTAMP', 'ORDER_ID', 'PRICE', 'SIZE', 'BUY_SELL_FLAG'] - DIRECTION = {0: 'BUY', 1: 'SELL'} + COLUMNS = ["TIMESTAMP", "ORDER_ID", "PRICE", "SIZE", "BUY_SELL_FLAG"] + DIRECTION = {0: "BUY", 1: "SELL"} # Class for reading historical exchange orders stream - def __init__(self, symbol, date, start_time, end_time, orders_file_path, processed_orders_folder_path): + def __init__( + self, + symbol, + date, + start_time, + end_time, + orders_file_path, + processed_orders_folder_path, + ): self.symbol = symbol self.date = date self.start_time = start_time @@ -90,36 +138,36 @@ def __init__(self, symbol, date, start_time, end_time, orders_file_path, process def processOrders(self): def convertDate(date_str): try: - return datetime.strptime(date_str, '%Y%m%d%H%M%S.%f') + return datetime.strptime(date_str, "%Y%m%d%H%M%S.%f") except ValueError: return convertDate(date_str[:-1]) - #@mem.cache + # @mem.cache def read_processed_orders_file(processed_orders_file): - with open(processed_orders_file, 'rb') as handle: + with open(processed_orders_file, "rb") as handle: return pickle.load(handle) - processed_orders_file = f'{self.processed_orders_folder_path}marketreplay_{self.symbol}_{self.date.date()}.pkl' + processed_orders_file = f"{self.processed_orders_folder_path}marketreplay_{self.symbol}_{self.date.date()}.pkl" if os.path.isfile(processed_orders_file): - print(f'Processed file exists for {self.symbol} and {self.date.date()}: {processed_orders_file}') + print(f"Processed file exists for {self.symbol} and {self.date.date()}: {processed_orders_file}") return read_processed_orders_file(processed_orders_file) else: - print(f'Processed file does not exist for {self.symbol} and {self.date.date()}, processing...') + print(f"Processed file does not exist for {self.symbol} and {self.date.date()}, processing...") orders_df = pd.read_csv(self.orders_file_path).iloc[1:] - all_columns = orders_df.columns[0].split('|') - orders_df = orders_df[orders_df.columns[0]].str.split('|', 16, expand=True) + all_columns = orders_df.columns[0].split("|") + orders_df = orders_df[orders_df.columns[0]].str.split("|", 16, expand=True) orders_df.columns = all_columns orders_df = orders_df[L3OrdersProcessor.COLUMNS] - orders_df['BUY_SELL_FLAG'] = orders_df['BUY_SELL_FLAG'].astype(int).replace(L3OrdersProcessor.DIRECTION) - orders_df['TIMESTAMP'] = orders_df['TIMESTAMP'].astype(str).apply(convertDate) - orders_df['SIZE'] = orders_df['SIZE'].astype(int) - orders_df['PRICE'] = orders_df['PRICE'].astype(float) * 100 - orders_df['PRICE'] = orders_df['PRICE'].astype(int) + orders_df["BUY_SELL_FLAG"] = orders_df["BUY_SELL_FLAG"].astype(int).replace(L3OrdersProcessor.DIRECTION) + orders_df["TIMESTAMP"] = orders_df["TIMESTAMP"].astype(str).apply(convertDate) + orders_df["SIZE"] = orders_df["SIZE"].astype(int) + orders_df["PRICE"] = orders_df["PRICE"].astype(float) * 100 + orders_df["PRICE"] = orders_df["PRICE"].astype(int) orders_df = orders_df.loc[(orders_df.TIMESTAMP >= self.start_time) & (orders_df.TIMESTAMP < self.end_time)] - orders_df.set_index('TIMESTAMP', inplace=True) + orders_df.set_index("TIMESTAMP", inplace=True) log_print(f"Number of Orders: {len(orders_df)}") - orders_dict = {k: g.to_dict(orient='records') for k, g in orders_df.groupby(level=0)} - with open(processed_orders_file, 'wb') as handle: + orders_dict = {k: g.to_dict(orient="records") for k, g in orders_df.groupby(level=0)} + with open(processed_orders_file, "wb") as handle: pickle.dump(orders_dict, handle, protocol=pickle.HIGHEST_PROTOCOL) - print(f'processed file created as {processed_orders_file}') + print(f"processed file created as {processed_orders_file}") return orders_dict diff --git a/agent/examples/MomentumAgent.py b/agent/examples/MomentumAgent.py index ba979199a..fd0c4dd43 100644 --- a/agent/examples/MomentumAgent.py +++ b/agent/examples/MomentumAgent.py @@ -1,6 +1,7 @@ -from agent.TradingAgent import TradingAgent -import pandas as pd import numpy as np +import pandas as pd + +from agent.TradingAgent import TradingAgent class MomentumAgent(TradingAgent): @@ -10,11 +11,29 @@ class MomentumAgent(TradingAgent): sell limit order if the 20 mid-price average < 50 mid-price average """ - def __init__(self, id, name, type, symbol, starting_cash, - min_size, max_size, wake_up_freq='60s', - subscribe=False, log_orders=False, random_state=None): + def __init__( + self, + id, + name, + type, + symbol, + starting_cash, + min_size, + max_size, + wake_up_freq="60s", + subscribe=False, + log_orders=False, + random_state=None, + ): - super().__init__(id, name, type, starting_cash=starting_cash, log_orders=log_orders, random_state=random_state) + super().__init__( + id, + name, + type, + starting_cash=starting_cash, + log_orders=log_orders, + random_state=random_state, + ) self.symbol = symbol self.min_size = min_size # Minimum order size self.max_size = max_size # Maximum order size @@ -30,40 +49,53 @@ def kernelStarting(self, startTime): super().kernelStarting(startTime) def wakeup(self, currentTime): - """ Agent wakeup is determined by self.wake_up_freq """ + """Agent wakeup is determined by self.wake_up_freq""" can_trade = super().wakeup(currentTime) if self.subscribe and not self.subscription_requested: super().requestDataSubscription(self.symbol, levels=1, freq=10e9) self.subscription_requested = True - self.state = 'AWAITING_MARKET_DATA' + self.state = "AWAITING_MARKET_DATA" elif can_trade and not self.subscribe: self.getCurrentSpread(self.symbol) - self.state = 'AWAITING_SPREAD' + self.state = "AWAITING_SPREAD" def receiveMessage(self, currentTime, msg): - """ Momentum agent actions are determined after obtaining the best bid and ask in the LOB """ + """Momentum agent actions are determined after obtaining the best bid and ask in the LOB""" super().receiveMessage(currentTime, msg) - if not self.subscribe and self.state == 'AWAITING_SPREAD' and msg.body['msg'] == 'QUERY_SPREAD': + if not self.subscribe and self.state == "AWAITING_SPREAD" and msg.body["msg"] == "QUERY_SPREAD": bid, _, ask, _ = self.getKnownBidAsk(self.symbol) self.placeOrders(bid, ask) self.setWakeup(currentTime + self.getWakeFrequency()) - self.state = 'AWAITING_WAKEUP' - elif self.subscribe and self.state == 'AWAITING_MARKET_DATA' and msg.body['msg'] == 'MARKET_DATA': + self.state = "AWAITING_WAKEUP" + elif self.subscribe and self.state == "AWAITING_MARKET_DATA" and msg.body["msg"] == "MARKET_DATA": bids, asks = self.known_bids[self.symbol], self.known_asks[self.symbol] - if bids and asks: self.placeOrders(bids[0][0], asks[0][0]) - self.state = 'AWAITING_MARKET_DATA' + if bids and asks: + self.placeOrders(bids[0][0], asks[0][0]) + self.state = "AWAITING_MARKET_DATA" def placeOrders(self, bid, ask): - """ Momentum Agent actions logic """ + """Momentum Agent actions logic""" if bid and ask: self.mid_list.append((bid + ask) / 2) - if len(self.mid_list) > 20: self.avg_20_list.append(MomentumAgent.ma(self.mid_list, n=20)[-1].round(2)) - if len(self.mid_list) > 50: self.avg_50_list.append(MomentumAgent.ma(self.mid_list, n=50)[-1].round(2)) + if len(self.mid_list) > 20: + self.avg_20_list.append(MomentumAgent.ma(self.mid_list, n=20)[-1].round(2)) + if len(self.mid_list) > 50: + self.avg_50_list.append(MomentumAgent.ma(self.mid_list, n=50)[-1].round(2)) if len(self.avg_20_list) > 0 and len(self.avg_50_list) > 0: if self.avg_20_list[-1] >= self.avg_50_list[-1]: - self.placeLimitOrder(self.symbol, quantity=self.size, is_buy_order=True, limit_price=ask) + self.placeLimitOrder( + self.symbol, + quantity=self.size, + is_buy_order=True, + limit_price=ask, + ) else: - self.placeLimitOrder(self.symbol, quantity=self.size, is_buy_order=False, limit_price=bid) + self.placeLimitOrder( + self.symbol, + quantity=self.size, + is_buy_order=False, + limit_price=bid, + ) def getWakeFrequency(self): return pd.Timedelta(self.wake_up_freq) @@ -72,4 +104,4 @@ def getWakeFrequency(self): def ma(a, n=20): ret = np.cumsum(a, dtype=float) ret[n:] = ret[n:] - ret[:-n] - return ret[n - 1:] / n \ No newline at end of file + return ret[n - 1 :] / n diff --git a/agent/examples/QLearningAgent.py b/agent/examples/QLearningAgent.py index 17ac226ba..94b496e92 100644 --- a/agent/examples/QLearningAgent.py +++ b/agent/examples/QLearningAgent.py @@ -1,213 +1,223 @@ -from agent.TradingAgent import TradingAgent -from message.Message import Message -from util.util import log_print +import sys import numpy as np import pandas as pd -import sys - -class QLearningAgent(TradingAgent): - - def __init__(self, id, name, type, symbol='IBM', starting_cash=100000, - qtable = None, log_orders = False, random_state = None): - - # Base class init. - super().__init__(id, name, type, starting_cash=starting_cash, log_orders=log_orders, random_state = random_state) - - # Store important parameters particular to the QLearning agent. - self.symbol = symbol - self.qtable = qtable - - # The agent uses this to track whether it has begun its strategy or is still - # handling pre-market tasks. - self.trading = False - - # The agent begins in its "complete" state, not waiting for - # any special event or condition. - self.state = 'AWAITING_WAKEUP' - - # The agent tracks an experience history to sample and learn from over time. - # Tuples of (s,a,s',r). This is not currently used (nor part of the saved state). - self.experience = [] - - # Remember prior state, action, and portfolio value (marked to market). - self.s = None - self.a = None - self.v = None - - - - # During kernelStopping, give the Kernel our saved state for potential - # subsequent simulations during the same experiment. - def kernelStopping (self): - super().kernelStopping() - self.updateAgentState(self.qtable) - - - def wakeup (self, currentTime): - # Parent class handles discovery of exchange times and market_open wakeup call. - super().wakeup(currentTime) - - self.state = 'INACTIVE' - - if not self.mkt_open or not self.mkt_close: - # TradingAgent handles discovery of exchange times. - return - else: - if not self.trading: - self.trading = True - - # Time to start trading! - log_print ("{} is ready to start trading now.", self.name) - - - # Steady state wakeup behavior starts here. - - # If we've been told the market has closed for the day, we will only request - # final price information, then stop. - if self.mkt_closed and (self.symbol in self.daily_close_price): - # Market is closed and we already got the daily close price. - return - - - # Schedule a wakeup for the next time this agent should arrive at the market - # (following the conclusion of its current activity cycle). - # We do this early in case some of our expected message responses don't arrive. - - # The QLearning agent is not a background agent, so it should select strategically - # appropriate times. (Maybe we should LEARN the best times or frequencies at which - # to trade, someday.) Presently it just trades once a minute. - self.setWakeup(currentTime + pd.Timedelta('1min')) - - # If the market has closed and we haven't obtained the daily close price yet, - # do that before we cease activity for the day. Don't do any other behavior - # after market close. - if self.mkt_closed and (not self.symbol in self.daily_close_price): - self.getCurrentSpread(self.symbol) - self.state = 'AWAITING_SPREAD' - return - - # Cancel unfilled orders (but don't exit positions). - self.cancelOrders() - - # Get the order book or whatever else we need for the state. - self.getCurrentSpread(self.symbol, depth=1000) - self.state = 'AWAITING_SPREAD' - - - - def placeOrder (self): - # Called when it is time for the agent to determine a limit price and place an order. - # Compute the order imbalance feature. - bid_vol = sum([ v[1] for v in self.known_bids[self.symbol] ]) - ask_vol = sum([ v[1] for v in self.known_asks[self.symbol] ]) - imba = bid_vol - ask_vol - - # A unit of stock is now 100 shares instead of one. - imba = int(imba / 100) - - # Get our current holdings in the stock of interest. - h = self.getHoldings(self.symbol) - - # The new state will be called s_prime. This agent simply uses current - # holdings (limit: one share long or short) and offer volume imbalance. - # State: 1000s digit is 0 (short), 1 (neutral), 2 (long). Remaining digits - # are 000 (-100 imba) to 200 (+100 imba). - s_prime = ((h + 1) * 1000) + (imba + 100) - - log_print ("h: {}, imba: {}, s_prime: {}", h, imba, s_prime) - - # Compute our reward from last time. We estimate the change in the value - # of our portfolio by marking it to market and comparing against the last - # time we were contemplating an action. - v = self.markToMarket(self.holdings, use_midpoint=True) - r = v - self.v if self.v is not None else 0 - - # Store our experience tuple. - self.experience.append((self.s, self.a, s_prime, r)) - - # Update our q table. - old_q = self.qtable.q[self.s, self.a] - old_weighted = (1 - self.qtable.alpha) * old_q - - a_prime = np.argmax(self.qtable.q[s_prime,:]) - new_q = r + (self.qtable.gamma * self.qtable.q[s_prime, a_prime]) - new_weighted = self.qtable.alpha * new_q - - self.qtable.q[self.s, self.a] = old_weighted + new_weighted - - - # Decay alpha. - self.qtable.alpha *= self.qtable.alpha_decay - self.qtable.alpha = max(self.qtable.alpha, self.qtable.alpha_min) - - - # Compute our next action. 0 = sell one, 1 == do nothing, 2 == buy one. - if self.random_state.rand() < self.qtable.epsilon: - # Random action, and decay epsilon. - a = self.random_state.randint(0,3) - self.qtable.epsilon *= self.qtable.epsilon_decay - self.qtable.epsilon = max(self.qtable.epsilon, self.qtable.epsilon_min) - else: - # Expected best action. - a = a_prime - - # Respect holding limit. - if a == 0 and h == -1: a = 1 - elif a == 2 and h == 1: a = 1 - - - # Remember s, a, and v for next time. - self.s = s_prime - self.a = a - self.v = v - - - # Place the order. We probably want this to be a market order, once supported, - # or use a "compute required price for guaranteed execution" function like the - # impact agent, but that requires fetching quite a bit of book depth. - if a == 0: self.placeLimitOrder(self.symbol, 1, False, 50000) - elif a == 2: self.placeLimitOrder(self.symbol, 1, True, 200000) - - - def receiveMessage (self, currentTime, msg): - # Parent class schedules market open wakeup call once market open/close times are known. - super().receiveMessage(currentTime, msg) - - # We have been awakened by something other than our scheduled wakeup. - # If our internal state indicates we were waiting for a particular event, - # check if we can transition to a new state. - - if self.state == 'AWAITING_SPREAD': - # We were waiting to receive the current spread/book. Since we don't currently - # track timestamps on retained information, we rely on actually seeing a - # QUERY_SPREAD response message. - - if msg.body['msg'] == 'QUERY_SPREAD': - # This is what we were waiting for. - - # But if the market is now closed, don't advance to placing orders. - if self.mkt_closed: return - - # We now have the information needed to place a limit order with the eta - # strategic threshold parameter. - self.placeOrder() - self.state = 'AWAITING_WAKEUP' - - - # Internal state and logic specific to this agent subclass. - - # Cancel all open orders. - # Return value: did we issue any cancellation requests? - def cancelOrders (self): - if not self.orders: return False - - for id, order in self.orders.items(): - self.cancelOrder(order) +from agent.TradingAgent import TradingAgent +from message.Message import Message +from util.util import log_print - return True - def getWakeFrequency (self): - return pd.Timedelta(self.random_state.randint(low = 0, high = 100), unit='ns') +class QLearningAgent(TradingAgent): + def __init__( + self, + id, + name, + type, + symbol="IBM", + starting_cash=100000, + qtable=None, + log_orders=False, + random_state=None, + ): + + # Base class init. + super().__init__( + id, + name, + type, + starting_cash=starting_cash, + log_orders=log_orders, + random_state=random_state, + ) + + # Store important parameters particular to the QLearning agent. + self.symbol = symbol + self.qtable = qtable + + # The agent uses this to track whether it has begun its strategy or is still + # handling pre-market tasks. + self.trading = False + + # The agent begins in its "complete" state, not waiting for + # any special event or condition. + self.state = "AWAITING_WAKEUP" + + # The agent tracks an experience history to sample and learn from over time. + # Tuples of (s,a,s',r). This is not currently used (nor part of the saved state). + self.experience = [] + + # Remember prior state, action, and portfolio value (marked to market). + self.s = None + self.a = None + self.v = None + + # During kernelStopping, give the Kernel our saved state for potential + # subsequent simulations during the same experiment. + def kernelStopping(self): + super().kernelStopping() + self.updateAgentState(self.qtable) + + def wakeup(self, currentTime): + # Parent class handles discovery of exchange times and market_open wakeup call. + super().wakeup(currentTime) + + self.state = "INACTIVE" + + if not self.mkt_open or not self.mkt_close: + # TradingAgent handles discovery of exchange times. + return + else: + if not self.trading: + self.trading = True + + # Time to start trading! + log_print("{} is ready to start trading now.", self.name) + + # Steady state wakeup behavior starts here. + + # If we've been told the market has closed for the day, we will only request + # final price information, then stop. + if self.mkt_closed and (self.symbol in self.daily_close_price): + # Market is closed and we already got the daily close price. + return + + # Schedule a wakeup for the next time this agent should arrive at the market + # (following the conclusion of its current activity cycle). + # We do this early in case some of our expected message responses don't arrive. + + # The QLearning agent is not a background agent, so it should select strategically + # appropriate times. (Maybe we should LEARN the best times or frequencies at which + # to trade, someday.) Presently it just trades once a minute. + self.setWakeup(currentTime + pd.Timedelta("1min")) + + # If the market has closed and we haven't obtained the daily close price yet, + # do that before we cease activity for the day. Don't do any other behavior + # after market close. + if self.mkt_closed and (not self.symbol in self.daily_close_price): + self.getCurrentSpread(self.symbol) + self.state = "AWAITING_SPREAD" + return + + # Cancel unfilled orders (but don't exit positions). + self.cancelOrders() + + # Get the order book or whatever else we need for the state. + self.getCurrentSpread(self.symbol, depth=1000) + self.state = "AWAITING_SPREAD" + + def placeOrder(self): + # Called when it is time for the agent to determine a limit price and place an order. + + # Compute the order imbalance feature. + bid_vol = sum([v[1] for v in self.known_bids[self.symbol]]) + ask_vol = sum([v[1] for v in self.known_asks[self.symbol]]) + imba = bid_vol - ask_vol + + # A unit of stock is now 100 shares instead of one. + imba = int(imba / 100) + + # Get our current holdings in the stock of interest. + h = self.getHoldings(self.symbol) + + # The new state will be called s_prime. This agent simply uses current + # holdings (limit: one share long or short) and offer volume imbalance. + # State: 1000s digit is 0 (short), 1 (neutral), 2 (long). Remaining digits + # are 000 (-100 imba) to 200 (+100 imba). + s_prime = ((h + 1) * 1000) + (imba + 100) + + log_print("h: {}, imba: {}, s_prime: {}", h, imba, s_prime) + + # Compute our reward from last time. We estimate the change in the value + # of our portfolio by marking it to market and comparing against the last + # time we were contemplating an action. + v = self.markToMarket(self.holdings, use_midpoint=True) + r = v - self.v if self.v is not None else 0 + + # Store our experience tuple. + self.experience.append((self.s, self.a, s_prime, r)) + + # Update our q table. + old_q = self.qtable.q[self.s, self.a] + old_weighted = (1 - self.qtable.alpha) * old_q + + a_prime = np.argmax(self.qtable.q[s_prime, :]) + new_q = r + (self.qtable.gamma * self.qtable.q[s_prime, a_prime]) + new_weighted = self.qtable.alpha * new_q + + self.qtable.q[self.s, self.a] = old_weighted + new_weighted + + # Decay alpha. + self.qtable.alpha *= self.qtable.alpha_decay + self.qtable.alpha = max(self.qtable.alpha, self.qtable.alpha_min) + + # Compute our next action. 0 = sell one, 1 == do nothing, 2 == buy one. + if self.random_state.rand() < self.qtable.epsilon: + # Random action, and decay epsilon. + a = self.random_state.randint(0, 3) + self.qtable.epsilon *= self.qtable.epsilon_decay + self.qtable.epsilon = max(self.qtable.epsilon, self.qtable.epsilon_min) + else: + # Expected best action. + a = a_prime + + # Respect holding limit. + if a == 0 and h == -1: + a = 1 + elif a == 2 and h == 1: + a = 1 + + # Remember s, a, and v for next time. + self.s = s_prime + self.a = a + self.v = v + + # Place the order. We probably want this to be a market order, once supported, + # or use a "compute required price for guaranteed execution" function like the + # impact agent, but that requires fetching quite a bit of book depth. + if a == 0: + self.placeLimitOrder(self.symbol, 1, False, 50000) + elif a == 2: + self.placeLimitOrder(self.symbol, 1, True, 200000) + + def receiveMessage(self, currentTime, msg): + # Parent class schedules market open wakeup call once market open/close times are known. + super().receiveMessage(currentTime, msg) + + # We have been awakened by something other than our scheduled wakeup. + # If our internal state indicates we were waiting for a particular event, + # check if we can transition to a new state. + + if self.state == "AWAITING_SPREAD": + # We were waiting to receive the current spread/book. Since we don't currently + # track timestamps on retained information, we rely on actually seeing a + # QUERY_SPREAD response message. + + if msg.body["msg"] == "QUERY_SPREAD": + # This is what we were waiting for. + + # But if the market is now closed, don't advance to placing orders. + if self.mkt_closed: + return + + # We now have the information needed to place a limit order with the eta + # strategic threshold parameter. + self.placeOrder() + self.state = "AWAITING_WAKEUP" + + # Internal state and logic specific to this agent subclass. + + # Cancel all open orders. + # Return value: did we issue any cancellation requests? + def cancelOrders(self): + if not self.orders: + return False + + for id, order in self.orders.items(): + self.cancelOrder(order) + + return True + + def getWakeFrequency(self): + return pd.Timedelta(self.random_state.randint(low=0, high=100), unit="ns") diff --git a/agent/examples/ShockAgent.py b/agent/examples/ShockAgent.py index 22507b540..54c5886aa 100644 --- a/agent/examples/ShockAgent.py +++ b/agent/examples/ShockAgent.py @@ -1,171 +1,185 @@ -from agent.TradingAgent import TradingAgent - import pandas as pd +from agent.TradingAgent import TradingAgent + # Extends Impact agent to fire large order evenly over a predetermined time period # Need to add: (1) duration, (2) number of wakeups, (3) desired execution size class ImpactAgent(TradingAgent): - def __init__(self, id, name, type, symbol = None, starting_cash = None, within = 0.01, - impact = True, impact_time = None, impact_duration = 0, impact_trades = 0, - impact_vol = None, random_state = None): - # Base class init. - super().__init__(id, name, type, starting_cash = starting_cash, random_state = random_state) - - self.symbol = symbol # symbol to trade - self.trading = False # ready to trade - self.traded = False # has made its t trade - - # The amount of available "nearby" liquidity to consume when placing its order. - self.within = within # within this range of the inside price - - self.impact_time = impact_time # When should we make the impact trade? - self.impact_duration = impact_duration # How long the agent should wait to submit the next trade - self.impact_trades = impact_trades # The number of trades to execute across - self.impact_vol = impact_vol # The total volume to execute across all trades - - # The agent begins in its "complete" state, not waiting for - # any special event or condition. - self.state = 'AWAITING_WAKEUP' - - # Controls whether the impact trade is actually placed. - self.impact = impact - - - def wakeup (self, currentTime): - # Parent class handles discovery of exchange times and market_open wakeup call. - super().wakeup(currentTime) - - if not self.mkt_open or not self.mkt_close: - # TradingAgent handles discovery of exchange times. - return - else: - if not self.trading: - self.trading = True - - # Time to start trading! - print ("{} is ready to start trading now.".format(self.name)) - - - # Steady state wakeup behavior starts here. - - # First, see if we have received a MKT_CLOSED message for the day. If so, - # there's nothing to do except clean-up. - if self.mkt_closed and (self.symbol in self.daily_close_price): - # Market is closed and we already got the daily close price. - return - - - ### Impact agent operates at a specific time. - if currentTime < self.impact_time: - print ("Impact agent waiting for impact_time {}".format(self.impact_time)) - self.setWakeup(self.impact_time) - return - - - ### The impact agent only trades once, but we will monitor prices for - ### the sake of performance. - self.setWakeup(currentTime + pd.Timedelta('30m')) - - - # If the market is closed and we haven't obtained the daily close price yet, - # do that before we cease activity for the day. Don't do any other behavior - # after market close. - # - # Also, if we already made our one trade, do nothing except monitor prices. - #if self.traded >= self.impact_trades or (self.mkt_closed and (not self.symbol in self.daily_close_price)): - if self.traded or (self.mkt_closed and (not self.symbol in self.daily_close_price)): - self.getLastTrade() - self.state = 'AWAITING_LAST_TRADE' - return - - #if self.traded < self.impact_trades: - #self.setWakeup(currentTime + impact_duration) - - # The impact agent will place one order based on the current spread. - self.getCurrentSpread() - self.state = 'AWAITING_SPREAD' - - - def receiveMessage (self, currentTime, msg): - # Parent class schedules market open wakeup call once market open/close times are known. - super().receiveMessage(currentTime, msg) - - # We have been awakened by something other than our scheduled wakeup. - # If our internal state indicates we were waiting for a particular event, - # check if we can transition to a new state. - - if self.state == 'AWAITING_SPREAD': - # We were waiting for current spread information to make our trade. - # If the message we just received is QUERY_SPREAD, that means we just got it. - if msg.body['msg'] == 'QUERY_SPREAD': - # Place our one trade. - bid, bid_vol, ask, ask_vol = self.getKnownBidAsk(self.symbol) - #bid_liq, ask_liq = self.getKnownLiquidity(self.symbol, within=self.within) - print('within: ' + str(self.within)) - bid_liq, ask_liq = self.getKnownLiquidity(self.symbol, within=0.75) - - # Buy order. - #direction, shares, price = True, int(round(ask_liq * self.greed)), ask - - # Sell order. This should be a parameter, but isn't yet. - #direction, shares = False, int(round(bid_liq * self.greed)) - direction, shares = False, int(round(bid_liq * 0.5)) - - # Compute the limit price we must offer to ensure our order executes immediately. - # This is essentially a workaround for the lack of true market orders in our - # current simulation. - price = self.computeRequiredPrice(direction, shares) - - # Actually place the order only if self.impact is true. - if self.impact: - print ("Impact agent firing: {} {} @ {} @ {}".format('BUY' if direction else 'SELL', shares, self.dollarize(price), currentTime)) - self.placeLimitOrder (self.symbol, shares, direction, price) + def __init__( + self, + id, + name, + type, + symbol=None, + starting_cash=None, + within=0.01, + impact=True, + impact_time=None, + impact_duration=0, + impact_trades=0, + impact_vol=None, + random_state=None, + ): + # Base class init. + super().__init__(id, name, type, starting_cash=starting_cash, random_state=random_state) + + self.symbol = symbol # symbol to trade + self.trading = False # ready to trade + self.traded = False # has made its t trade + + # The amount of available "nearby" liquidity to consume when placing its order. + self.within = within # within this range of the inside price + + self.impact_time = impact_time # When should we make the impact trade? + self.impact_duration = impact_duration # How long the agent should wait to submit the next trade + self.impact_trades = impact_trades # The number of trades to execute across + self.impact_vol = impact_vol # The total volume to execute across all trades + + # The agent begins in its "complete" state, not waiting for + # any special event or condition. + self.state = "AWAITING_WAKEUP" + + # Controls whether the impact trade is actually placed. + self.impact = impact + + def wakeup(self, currentTime): + # Parent class handles discovery of exchange times and market_open wakeup call. + super().wakeup(currentTime) + + if not self.mkt_open or not self.mkt_close: + # TradingAgent handles discovery of exchange times. + return else: - print ("Impact agent would fire: {} {} @ {} (but self.impact = False)".format('BUY' if direction else 'SELL', shares, self.dollarize(price))) - - self.traded = True - self.state = 'AWAITING_WAKEUP' - - - # Internal state and logic specific to this agent. - - def placeLimitOrder (self, symbol, quantity, is_buy_order, limit_price): - super().placeLimitOrder(symbol, quantity, is_buy_order, limit_price, ignore_risk = True) - - # Computes required limit price to immediately execute a trade for the specified quantity - # of shares. - def computeRequiredPrice (self, direction, shares): - book = self.known_asks[self.symbol] if direction else self.known_bids[self.symbol] - - # Start at the inside and add up the shares. - t = 0 - - for i in range(len(book)): - p,v = book[i] - t += v - - # If we have accumulated enough shares, return this price. - # Need to also return if greater than the number of desired shares - if t >= shares: return p - - # Not enough shares. Just return worst price (highest ask, lowest bid). - return book[-1][0] - - - # Request the last trade price for our symbol. - def getLastTrade (self): - super().getLastTrade(self.symbol) - - - # Request the spread for our symbol. - def getCurrentSpread (self): - # Impact agent gets depth 10000 on each side (probably everything). - super().getCurrentSpread(self.symbol, 10000) - - - def getWakeFrequency (self): - return (pd.Timedelta('1ns')) - - + if not self.trading: + self.trading = True + + # Time to start trading! + print("{} is ready to start trading now.".format(self.name)) + + # Steady state wakeup behavior starts here. + + # First, see if we have received a MKT_CLOSED message for the day. If so, + # there's nothing to do except clean-up. + if self.mkt_closed and (self.symbol in self.daily_close_price): + # Market is closed and we already got the daily close price. + return + + ### Impact agent operates at a specific time. + if currentTime < self.impact_time: + print("Impact agent waiting for impact_time {}".format(self.impact_time)) + self.setWakeup(self.impact_time) + return + + ### The impact agent only trades once, but we will monitor prices for + ### the sake of performance. + self.setWakeup(currentTime + pd.Timedelta("30m")) + + # If the market is closed and we haven't obtained the daily close price yet, + # do that before we cease activity for the day. Don't do any other behavior + # after market close. + # + # Also, if we already made our one trade, do nothing except monitor prices. + # if self.traded >= self.impact_trades or (self.mkt_closed and (not self.symbol in self.daily_close_price)): + if self.traded or (self.mkt_closed and (not self.symbol in self.daily_close_price)): + self.getLastTrade() + self.state = "AWAITING_LAST_TRADE" + return + + # if self.traded < self.impact_trades: + # self.setWakeup(currentTime + impact_duration) + + # The impact agent will place one order based on the current spread. + self.getCurrentSpread() + self.state = "AWAITING_SPREAD" + + def receiveMessage(self, currentTime, msg): + # Parent class schedules market open wakeup call once market open/close times are known. + super().receiveMessage(currentTime, msg) + + # We have been awakened by something other than our scheduled wakeup. + # If our internal state indicates we were waiting for a particular event, + # check if we can transition to a new state. + + if self.state == "AWAITING_SPREAD": + # We were waiting for current spread information to make our trade. + # If the message we just received is QUERY_SPREAD, that means we just got it. + if msg.body["msg"] == "QUERY_SPREAD": + # Place our one trade. + bid, bid_vol, ask, ask_vol = self.getKnownBidAsk(self.symbol) + # bid_liq, ask_liq = self.getKnownLiquidity(self.symbol, within=self.within) + print("within: " + str(self.within)) + bid_liq, ask_liq = self.getKnownLiquidity(self.symbol, within=0.75) + + # Buy order. + # direction, shares, price = True, int(round(ask_liq * self.greed)), ask + + # Sell order. This should be a parameter, but isn't yet. + # direction, shares = False, int(round(bid_liq * self.greed)) + direction, shares = False, int(round(bid_liq * 0.5)) + + # Compute the limit price we must offer to ensure our order executes immediately. + # This is essentially a workaround for the lack of true market orders in our + # current simulation. + price = self.computeRequiredPrice(direction, shares) + + # Actually place the order only if self.impact is true. + if self.impact: + print( + "Impact agent firing: {} {} @ {} @ {}".format( + "BUY" if direction else "SELL", + shares, + self.dollarize(price), + currentTime, + ) + ) + self.placeLimitOrder(self.symbol, shares, direction, price) + else: + print( + "Impact agent would fire: {} {} @ {} (but self.impact = False)".format( + "BUY" if direction else "SELL", + shares, + self.dollarize(price), + ) + ) + + self.traded = True + self.state = "AWAITING_WAKEUP" + + # Internal state and logic specific to this agent. + + def placeLimitOrder(self, symbol, quantity, is_buy_order, limit_price): + super().placeLimitOrder(symbol, quantity, is_buy_order, limit_price, ignore_risk=True) + + # Computes required limit price to immediately execute a trade for the specified quantity + # of shares. + def computeRequiredPrice(self, direction, shares): + book = self.known_asks[self.symbol] if direction else self.known_bids[self.symbol] + + # Start at the inside and add up the shares. + t = 0 + + for i in range(len(book)): + p, v = book[i] + t += v + + # If we have accumulated enough shares, return this price. + # Need to also return if greater than the number of desired shares + if t >= shares: + return p + + # Not enough shares. Just return worst price (highest ask, lowest bid). + return book[-1][0] + + # Request the last trade price for our symbol. + def getLastTrade(self): + super().getLastTrade(self.symbol) + + # Request the spread for our symbol. + def getCurrentSpread(self): + # Impact agent gets depth 10000 on each side (probably everything). + super().getCurrentSpread(self.symbol, 10000) + + def getWakeFrequency(self): + return pd.Timedelta("1ns") diff --git a/agent/examples/SubscriptionAgent.py b/agent/examples/SubscriptionAgent.py index b3b11b2fb..af893d5b2 100644 --- a/agent/examples/SubscriptionAgent.py +++ b/agent/examples/SubscriptionAgent.py @@ -1,24 +1,42 @@ +import pandas as pd + from agent.TradingAgent import TradingAgent from util.util import log_print -import pandas as pd - class SubscriptionAgent(TradingAgent): """ Simple agent to demonstrate subscription to order book market data. """ - def __init__(self, id, name, type, symbol, starting_cash, levels, freq, log_orders=False, random_state=None): - super().__init__(id, name, type, starting_cash=starting_cash, log_orders=log_orders, random_state=random_state) + def __init__( + self, + id, + name, + type, + symbol, + starting_cash, + levels, + freq, + log_orders=False, + random_state=None, + ): + super().__init__( + id, + name, + type, + starting_cash=starting_cash, + log_orders=log_orders, + random_state=random_state, + ) self.symbol = symbol # symbol traded self.levels = levels # number of price levels to subscribe to/recieve updates for self.freq = freq # minimum number of nanoseconds between market data messages self.subscribe = True # Flag to determine whether to subscribe to data or use polling mechanism self.subscription_requested = False self.last_update_ts = None # timestamp of the last agent update. - # This is NOT required but only used to demonstrate how subscription works - self.state = 'AWAITING_MARKET_DATA' + # This is NOT required but only used to demonstrate how subscription works + self.state = "AWAITING_MARKET_DATA" self.current_bids = None self.current_asks = None @@ -34,10 +52,13 @@ def wakeup(self, currentTime): def receiveMessage(self, currentTime, msg): super().receiveMessage(currentTime, msg) - if self.subscribe and self.state == 'AWAITING_MARKET_DATA' and msg.body['msg'] == 'MARKET_DATA': - bids, asks = msg.body['bids'], msg.body['asks'] + if self.subscribe and self.state == "AWAITING_MARKET_DATA" and msg.body["msg"] == "MARKET_DATA": + bids, asks = msg.body["bids"], msg.body["asks"] log_print("--------------------") - log_print("seconds elapsed since last update: {}", (currentTime - self.last_update_ts).delta / 1e9) + log_print( + "seconds elapsed since last update: {}", + (currentTime - self.last_update_ts).delta / 1e9, + ) log_print("number of bid levels: {}", len(bids)) log_print("number of ask levels: {}", len(asks)) log_print("bids: {}, asks: {}", bids, asks) @@ -49,4 +70,4 @@ def receiveMessage(self, currentTime, msg): self.current_asks = asks def getWakeFrequency(self): - return pd.Timedelta('1s') \ No newline at end of file + return pd.Timedelta("1s") diff --git a/agent/examples/SumClientAgent.py b/agent/examples/SumClientAgent.py index bc109112d..aa86e6213 100644 --- a/agent/examples/SumClientAgent.py +++ b/agent/examples/SumClientAgent.py @@ -1,107 +1,121 @@ +import pandas as pd + from agent.Agent import Agent from agent.examples.SumServiceAgent import SumServiceAgent from message.Message import Message from util.util import log_print -import pandas as pd - - # The SumClientAgent class inherits from the base Agent class. It is intended # to serve as an example in which a service agent performs some aggregated # computation for multiple clients and returns the result to all clients. -class SumClientAgent(Agent): - - def __init__(self, id, name, type, peer_list=None, random_state=None): - # Base class init. - super().__init__(id, name, type, random_state) - - self.peer_list = peer_list - self.peer_exchange_complete = False - - self.peers_received = {} - self.peer_sum = 0 - - - ### Simulation lifecycle messages. - - def kernelStarting(self, startTime): - # self.kernel is set in Agent.kernelInitializing() - - # Find an SumServiceAgent which can answer our queries. It is guaranteed - # to exist by now (if there is one). - self.serviceAgentID = self.kernel.findAgentByType(SumServiceAgent) - - log_print ("Agent {} requested agent of type Agent.SumServiceAgent. Given Agent ID: {}", - self.id, self.serviceAgentID) - - # Request a wake-up call as in the base Agent (spread across five seconds). - super().kernelStarting(startTime + pd.Timedelta(self.random_state.randint(low = 0, high = 5000000000), unit='ns')) - - - ### Simulation participation messages. - - def wakeup (self, currentTime): - # Allow the base Agent to do whatever it needs to. - super().wakeup(currentTime) - # This agent only needs one wakeup call at simulation start. At this time, - # each client agent will send a number to each agent in its peer list. - # Each number will be sampled independently. That is, client agent 1 will - # send n2 to agent 2, n3 to agent 3, and so forth. - - # Once a client agent has received these initial random numbers from all - # agents in the peer list, it will make its first request from the sum - # service. Afterwards, it will simply request new sums when answers are - # delivered to previous queries. - - # At the first wakeup, initiate peer exchange. - if not self.peer_exchange_complete: - n = [self.random_state.randint(low = 0, high = 100) for i in range(len(self.peer_list))] - log_print ("agent {} peer list: {}", self.id, self.peer_list) - log_print ("agent {} numbers to exchange: {}", self.id, n) - - for idx, peer in enumerate(self.peer_list): - self.sendMessage(peer, Message({ "msg" : "PEER_EXCHANGE", "sender": self.id, "n" : n[idx] })) - - else: - # For subsequent (self-induced) wakeups, place a sum query. - n1, n2 = [self.random_state.randint(low = 0, high = 100) for i in range(2)] - - log_print ("agent {} transmitting numbers {} and {} with peer sum {}", self.id, n1, n2, self.peer_sum) - - # Add the sum of the peer exchange values to both numbers. - n1 += self.peer_sum - n2 += self.peer_sum - - self.sendMessage(self.serviceAgentID, Message({ "msg" : "SUM_QUERY", "sender": self.id, - "n1" : n1, "n2" : n2 })) - - return - - - def receiveMessage (self, currentTime, msg): - # Allow the base Agent to do whatever it needs to. - super().receiveMessage(currentTime, msg) - - if msg.body['msg'] == "PEER_EXCHANGE": - - # Ensure we don't somehow record the same peer twice. - if msg.body['sender'] not in self.peers_received: - self.peers_received[msg.body['sender']] = True - self.peer_sum += msg.body['n'] - - if len(self.peers_received) == len(self.peer_list): - # We just heard from the final peer. Initiate our first sum request. - log_print ("agent {} heard from final peer. peers_received = {}, peer_sum = {}", - self.id, self.peers_received, self.peer_sum) - - self.peer_exchange_complete = True - self.setWakeup(currentTime + pd.Timedelta('1ns')) - - elif msg.body['msg'] == "SUM_QUERY_RESPONSE": - log_print("Agent {} received sum query response: {}", self.id, msg) - - # Now schedule a new query. - self.setWakeup(currentTime + pd.Timedelta('1m')) +class SumClientAgent(Agent): + def __init__(self, id, name, type, peer_list=None, random_state=None): + # Base class init. + super().__init__(id, name, type, random_state) + + self.peer_list = peer_list + self.peer_exchange_complete = False + + self.peers_received = {} + self.peer_sum = 0 + + ### Simulation lifecycle messages. + + def kernelStarting(self, startTime): + # self.kernel is set in Agent.kernelInitializing() + + # Find an SumServiceAgent which can answer our queries. It is guaranteed + # to exist by now (if there is one). + self.serviceAgentID = self.kernel.findAgentByType(SumServiceAgent) + + log_print( + "Agent {} requested agent of type Agent.SumServiceAgent. Given Agent ID: {}", + self.id, + self.serviceAgentID, + ) + + # Request a wake-up call as in the base Agent (spread across five seconds). + super().kernelStarting(startTime + pd.Timedelta(self.random_state.randint(low=0, high=5000000000), unit="ns")) + + ### Simulation participation messages. + + def wakeup(self, currentTime): + # Allow the base Agent to do whatever it needs to. + super().wakeup(currentTime) + + # This agent only needs one wakeup call at simulation start. At this time, + # each client agent will send a number to each agent in its peer list. + # Each number will be sampled independently. That is, client agent 1 will + # send n2 to agent 2, n3 to agent 3, and so forth. + + # Once a client agent has received these initial random numbers from all + # agents in the peer list, it will make its first request from the sum + # service. Afterwards, it will simply request new sums when answers are + # delivered to previous queries. + + # At the first wakeup, initiate peer exchange. + if not self.peer_exchange_complete: + n = [self.random_state.randint(low=0, high=100) for i in range(len(self.peer_list))] + log_print("agent {} peer list: {}", self.id, self.peer_list) + log_print("agent {} numbers to exchange: {}", self.id, n) + + for idx, peer in enumerate(self.peer_list): + self.sendMessage( + peer, + Message({"msg": "PEER_EXCHANGE", "sender": self.id, "n": n[idx]}), + ) + + else: + # For subsequent (self-induced) wakeups, place a sum query. + n1, n2 = [self.random_state.randint(low=0, high=100) for i in range(2)] + + log_print( + "agent {} transmitting numbers {} and {} with peer sum {}", + self.id, + n1, + n2, + self.peer_sum, + ) + + # Add the sum of the peer exchange values to both numbers. + n1 += self.peer_sum + n2 += self.peer_sum + + self.sendMessage( + self.serviceAgentID, + Message({"msg": "SUM_QUERY", "sender": self.id, "n1": n1, "n2": n2}), + ) + + return + + def receiveMessage(self, currentTime, msg): + # Allow the base Agent to do whatever it needs to. + super().receiveMessage(currentTime, msg) + + if msg.body["msg"] == "PEER_EXCHANGE": + + # Ensure we don't somehow record the same peer twice. + if msg.body["sender"] not in self.peers_received: + self.peers_received[msg.body["sender"]] = True + self.peer_sum += msg.body["n"] + + if len(self.peers_received) == len(self.peer_list): + # We just heard from the final peer. Initiate our first sum request. + log_print( + "agent {} heard from final peer. peers_received = {}, peer_sum = {}", + self.id, + self.peers_received, + self.peer_sum, + ) + + self.peer_exchange_complete = True + self.setWakeup(currentTime + pd.Timedelta("1ns")) + + elif msg.body["msg"] == "SUM_QUERY_RESPONSE": + log_print("Agent {} received sum query response: {}", self.id, msg) + + # Now schedule a new query. + self.setWakeup(currentTime + pd.Timedelta("1m")) diff --git a/agent/examples/SumServiceAgent.py b/agent/examples/SumServiceAgent.py index e2b5d0c6e..79e9c362a 100644 --- a/agent/examples/SumServiceAgent.py +++ b/agent/examples/SumServiceAgent.py @@ -2,76 +2,72 @@ from message.Message import Message from util.util import log_print - # The SumServiceAgent class inherits from the base Agent class. It is intended # to serve as an example in which a service agent performs some aggregated # computation for multiple clients and returns the result to all clients. -class SumServiceAgent(Agent): - - def __init__(self, id, name, type, random_state=None, num_clients=10): - # Base class init. - super().__init__(id, name, type, random_state) - - # How many clients should we wait for? - self.num_clients = num_clients - # A list of the numbers to sum: dictionary keyed by agentID. - self.numbers = {} - - # We track the total sum for the entire day to print at the end. - self.total = 0 - - - ### Simulation lifecycle messages. - def kernelStarting(self, startTime): - # self.kernel is set in Agent.kernelInitializing() +class SumServiceAgent(Agent): - # This agent should have negligible computation delay. - self.setComputationDelay(1000000) # 1 ms + def __init__(self, id, name, type, random_state=None, num_clients=10): + # Base class init. + super().__init__(id, name, type, random_state) - # Request a wake-up call as in the base Agent. - super().kernelStarting(startTime) + # How many clients should we wait for? + self.num_clients = num_clients + # A list of the numbers to sum: dictionary keyed by agentID. + self.numbers = {} - def kernelStopping(self): - # Print the total sum for the day, only for completed sum requests. - print("Agent {} reports total sum: {}".format(self.id, self.total)) + # We track the total sum for the entire day to print at the end. + self.total = 0 - # Allow the base class to perform stopping activities. - super().kernelStopping() + ### Simulation lifecycle messages. + def kernelStarting(self, startTime): + # self.kernel is set in Agent.kernelInitializing() + # This agent should have negligible computation delay. + self.setComputationDelay(1000000) # 1 ms - ### Simulation participation messages. + # Request a wake-up call as in the base Agent. + super().kernelStarting(startTime) - # The service agent does not require wakeup calls. + def kernelStopping(self): + # Print the total sum for the day, only for completed sum requests. + print("Agent {} reports total sum: {}".format(self.id, self.total)) - def receiveMessage (self, currentTime, msg): - # Allow the base Agent to do whatever it needs to. - super().receiveMessage(currentTime, msg) + # Allow the base class to perform stopping activities. + super().kernelStopping() - if msg.body['msg'] == "SUM_QUERY": - log_print("Agent {} received sum query: {}", self.id, msg) - self.numbers[msg.body['sender']] = (msg.body['n1'], msg.body['n2']) + ### Simulation participation messages. - if len(self.numbers.keys()) >= self.num_clients: - # It is time to sum the numbers. - self.processSum() + # The service agent does not require wakeup calls. - # Then clear the pending queries. - self.numbers = {} + def receiveMessage(self, currentTime, msg): + # Allow the base Agent to do whatever it needs to. + super().receiveMessage(currentTime, msg) + if msg.body["msg"] == "SUM_QUERY": + log_print("Agent {} received sum query: {}", self.id, msg) + self.numbers[msg.body["sender"]] = (msg.body["n1"], msg.body["n2"]) - ### Sum client numbers and respond to each client. - def processSum (self): + if len(self.numbers.keys()) >= self.num_clients: + # It is time to sum the numbers. + self.processSum() - current_sum = sum([ x[0] + x[1] for x in self.numbers.values() ]) - self.total += current_sum + # Then clear the pending queries. + self.numbers = {} - log_print("Agent {} computed sum: {}", self.id, current_sum) + ### Sum client numbers and respond to each client. + def processSum(self): - for sender in self.numbers.keys(): - self.sendMessage(sender, Message({ "msg" : "SUM_QUERY_RESPONSE", "sender": self.id, - "sum" : current_sum })) + current_sum = sum([x[0] + x[1] for x in self.numbers.values()]) + self.total += current_sum + log_print("Agent {} computed sum: {}", self.id, current_sum) + for sender in self.numbers.keys(): + self.sendMessage( + sender, + Message({"msg": "SUM_QUERY_RESPONSE", "sender": self.id, "sum": current_sum}), + ) diff --git a/agent/examples/crypto/PPFL_ClientAgent.py b/agent/examples/crypto/PPFL_ClientAgent.py index ebb2c47f3..3d5197d82 100755 --- a/agent/examples/crypto/PPFL_ClientAgent.py +++ b/agent/examples/crypto/PPFL_ClientAgent.py @@ -1,380 +1,450 @@ -from agent.Agent import Agent -from agent.examples.crypto.PPFL_ServiceAgent import PPFL_ServiceAgent -from message.Message import Message -from util.util import log_print - -from util.crypto.logReg import getWeights, reportStats -import util.crypto.diffieHellman as dh +import random +from os.path import exists import numpy as np -from os.path import exists import pandas as pd -import random +import util.crypto.diffieHellman as dh +from agent.Agent import Agent +from agent.examples.crypto.PPFL_ServiceAgent import PPFL_ServiceAgent +from message.Message import Message +from util.crypto.logReg import getWeights, reportStats +from util.util import log_print # The PPFL_ClientAgent class inherits from the base Agent class. It implements # a secure federated learning protocol with basic differential privacy plus # secure multiparty communication. -class PPFL_ClientAgent(Agent): - - def __init__(self, id, name, type, peer_list=None, iterations=4, multiplier=10000, secret_scale = 100000, - X_train = None, y_train = None, X_test = None, y_test = None, split_size = None, - learning_rate = None, clear_learning = None, num_clients = None, num_subgraphs = None, - epsilon = None, max_logreg_iterations = None, collusion = False, random_state=None): - - # Base class init. - super().__init__(id, name, type, random_state) - - - # Store the client's peer list (subgraph, neighborhood) with which it should communicate. - self.peer_list = peer_list - - # Initialize a tracking attribute for the initial peer exchange and record the subgraph size. - self.peer_exchange_complete = False - self.num_peers = len(self.peer_list) - - # Record the total number of clients participating in the protocol and the number of subgraphs. - # Neither of these are part of the protocol, or necessary for real-world implementation, but do - # allow for convenient logging of progress and results in simulation. - self.num_clients = num_clients - self.num_subgraphs = num_subgraphs - - # Record whether the clients should be recording information about the potential accuracy of - # peer data reconstruction via collusion among the clients. - self.collusion = collusion - - # Record the number of protocol (federated learning) iterations the clients will perform. - self.no_of_iterations = iterations - - # Record the multiplier that will be used to protect against floating point accuracy loss and - # the scale of the client shared secrets. - self.multiplier = multiplier - self.secret_scale = secret_scale - - # Record the number of local iterations of logistic regression each client will run during - # each protocol iteration and what local learning rate will be used. - self.max_logreg_iterations = max_logreg_iterations - self.learning_rate = learning_rate - - # Record whether clients will do federated learning in the clear (no privacy, no encryption) - # and, if needed, the epsilon value for differential privacy. - self.clear_learning = clear_learning - self.epsilon = epsilon - - # Record the training and testing splits for the data set to be learned. - self.X_train = X_train - self.y_train = y_train - - self.X_test = X_test - self.y_test = y_test - - # Record the number of features in the data set. - self.no_of_weights = X_train.shape[1] - - # Initialize an attribute to remember the shared weights returned from the server. - self.prevWeight = None - - # Each client receives only a portion of the training data each protocol iteration. - self.split_size = split_size - - # Initialize a dictionary to remember which peers we have heard from during peer exchange. - self.peers_received = {} - - # Initialize a dictionary to accumulate this client's timing information by task. - self.elapsed_time = { 'DH_OFFLINE' : pd.Timedelta(0), 'DH_ONLINE' : pd.Timedelta(0), - 'TRAINING' : pd.Timedelta(0), 'ENCRYPTION' : pd.Timedelta(0) } - - - # Pre-generate this client's local training data for each iteration (for the sake of simulation speed). - self.trainX = [] - self.trainY = [] - - # This is a faster PRNG than the default, for times when we must select a large quantity of randomness. - self.prng = np.random.Generator(np.random.SFC64()) - - ### Data randomly selected from total training set each iteration, simulating online behavior. - for i in range(iterations): - slice = self.prng.choice(range(self.X_train.shape[0]), size = split_size, replace = False) - - # Pull together the current local training set. - self.trainX.append(self.X_train[slice].copy()) - self.trainY.append(self.y_train[slice].copy()) - - - # Create dictionaries to hold the public and secure keys for this client, and the public keys shared - # by its peers. - self.pubkeys = {} - self.seckeys = {} - self.peer_public_keys = {} - - # Create dictionaries to hold the shared key for each peer each iteration and the seed for the - # following iteration. - self.r = {} - self.R = {} - - - # Specify the parameters used for generation of randomness. - self.px_reg = 1 - self.px_epsilon = epsilon - self.px_min_rows = self.split_size - - self.px_shape = 1 / ( self.num_peers + 1) - self.px_scale = 2 / (( self.num_peers + 1 ) * self.px_min_rows * self.px_reg * self.px_epsilon ) - - if self.id == 1: print (f"px_shape is {self.px_shape}") - if self.id == 1: print (f"px_scale is {self.px_scale}") - - # Specify the required shape for vectorized generation of randomness. - self.px_dims = ( self.num_peers, self.no_of_iterations, self.no_of_weights ) - - - # Iteration counter. - self.current_iteration = 0 - - - - - ### Simulation lifecycle messages. - - def kernelStarting(self, startTime): - - # Initialize custom state properties into which we will later accumulate results. - # To avoid redundancy, we allow only the first client to handle initialization. - if self.id == 1: - self.kernel.custom_state['dh_offline'] = pd.Timedelta(0) - self.kernel.custom_state['dh_online'] = pd.Timedelta(0) - self.kernel.custom_state['training'] = pd.Timedelta(0) - self.kernel.custom_state['encryption'] = pd.Timedelta(0) - - # Find the PPFL service agent, so messages can be directed there. - self.serviceAgentID = self.kernel.findAgentByType(PPFL_ServiceAgent) - - # Request a wake-up call as in the base Agent. Noise is kept small because - # the overall protocol duration is so short right now. (up to one microsecond) - super().kernelStarting(startTime + pd.Timedelta(self.random_state.randint(low = 0, high = 1000), unit='ns')) - - - def kernelStopping(self): - - # Accumulate into the Kernel's "custom state" this client's elapsed times per category. - # Note that times which should be reported in the mean per iteration are already so computed. - # These will be output to the config (experiment) file at the end of the simulation. - - self.kernel.custom_state['dh_offline'] += self.elapsed_time['DH_OFFLINE'] - self.kernel.custom_state['dh_online'] += (self.elapsed_time['DH_ONLINE'] / self.no_of_iterations) - self.kernel.custom_state['training'] += (self.elapsed_time['TRAINING'] / self.no_of_iterations) - self.kernel.custom_state['encryption'] += (self.elapsed_time['ENCRYPTION'] / self.no_of_iterations) - - super().kernelStopping() - - - ### Simulation participation messages. - - def wakeup (self, currentTime): - super().wakeup(currentTime) - - # Record start of wakeup for real-time computation delay.. - dt_wake_start = pd.Timestamp('now') - - # Check if the clients are still performing the one-time peer exchange. - if not self.peer_exchange_complete: - - # Generate DH keys. - if not self.clear_learning: self.pubkeys, self.seckeys = dh.dict_keygeneration( self.peer_list ) - - # Record elapsed wallclock for Diffie Hellman offline. - dt_wake_end = pd.Timestamp('now') - self.elapsed_time['DH_OFFLINE'] += dt_wake_end - dt_wake_start - - # Set computation delay to elapsed wallclock time. - self.setComputationDelay(int((dt_wake_end - dt_wake_start).to_timedelta64())) - - # Send generated values to peers. - if not self.clear_learning: - for idx, peer in enumerate(self.peer_list): - # We assume a star network configuration where all messages between peers must be forwarded - # through the server. - self.sendMessage(self.serviceAgentID, Message({ "msg" : "FWD_MSG", "msgToForward" : "PEER_EXCHANGE", - "sender": self.id, "recipient": peer, "pubkey" : self.pubkeys[peer] })) - - if self.clear_learning: - self.peer_exchange_complete = True - self.setWakeup(currentTime + pd.Timedelta('1ns')) - - else: - - # We are waking up to start a new iteration of the protocol. - # (Peer exchange is done before all this.) - - if (self.current_iteration == 0): - # During iteration 0 (only) we complete the key exchange and prepare the - # common key list, because at this point we know we have received keys - # from all peers. - - # R is the common key dictionary (by peer agent id). - if not self.clear_learning: self.R = dh.dict_keyexchange(self.peer_list, self.id, self.pubkeys, - self.seckeys, self.peer_public_keys) - - # Pre-generate all of this client's local differential privacy noise (for simulation speed). - # We will need one per weight per protocol iteration. - self.my_noise = np.random.laplace(scale = self.px_scale, size = (self.no_of_iterations, self.no_of_weights)) - - - # Diffie Hellman is done in every iteration. - if not self.clear_learning: - for peer_id, common_key in self.R.items(): - - random.seed(common_key) - rand = random.getrandbits(512) - - rand_b_raw = format(rand, '0512b') - rand_b_rawr = rand_b_raw[:256] - rand_b_rawR = rand_b_raw[256:] - - - # Negate offsets below this agent's id. This ensures each offset will be - # added once and subtracted once. - r = int(rand_b_rawr,2) % (2**32) - - log_print ("SELECTED r: {}", r) - - # Update dictionary of shared secrets for this iteration. - self.r[peer_id] = r if peer_id < self.id else -r - - # Store the shared seeds for the next iteration. - self.R[peer_id] = int(rand_b_rawR,2) - - - # Record elapsed wallclock for Diffie Hellman online. - dt_online_complete = pd.Timestamp('now') - - # For convenience of things indexed by iteration... - i = self.current_iteration - - # Perform the local training for this client, using only its local (private) data. The configured learning - # rate might need to be increased if there are very many clients, each with very little data, otherwise - # convergence may take a really long time. - # - # max_iter controls how many iterations of gradient descent to perform on the logistic - # regression model. previous_weight should be passed as None for the first iteration. - weight = getWeights(previous_weight = self.prevWeight, max_iter = self.max_logreg_iterations, lr = self.learning_rate, - trainX = self.trainX[i], trainY = self.trainY[i], self_id = self.id) - - # If in collusion analysis mode, write out the weights we will need to evaluate reconstruction. - if self.collusion: - with open('results/collusion_weights.csv', 'a') as results_file: - results_file.write(f"{self.id},{self.current_iteration},{','.join([str(x) for x in weight])}\n") - - # Record elapsed wallclock for training model. - dt_training_complete = pd.Timestamp('now') - - if not self.clear_learning: - # Add a random sample from Laplace to each of the weights. - noise = self.my_noise[i] - - if self.collusion: - with open('results/collusion_selected_noise.csv', 'a') as results_file: - # Write out the noise added to each weight by this client. - results_file.write(f"{self.id},{self.current_iteration},{','.join([str(x) for x in noise])}\n") - - log_print ("weight {}", weight) - log_print ("noise {}", noise) - - if self.clear_learning: n = np.array(weight) * self.multiplier - else: n = (np.array(weight) + noise) * self.multiplier - - log_print ("n {}", n) - log_print ("r {}", self.r) - - weights_to_send = n + sum(self.r.values()) - - log_print ("weights_to_send {}", weights_to_send) - - - # Record elapsed wallclock for encryption. - dt_encryption_complete = pd.Timestamp('now') - - # Set computation delay to elapsed wallclock time. - self.setComputationDelay(int((dt_encryption_complete - dt_wake_start).to_timedelta64())) - - # Send the message to the server. - self.sendMessage(self.serviceAgentID, Message({ "msg" : "CLIENT_WEIGHTS", "sender": self.id, - "weights" : weights_to_send })) - - self.current_iteration += 1 - - # Store elapsed times by category. - self.elapsed_time['DH_ONLINE'] += dt_online_complete - dt_wake_start - self.elapsed_time['TRAINING'] += dt_training_complete - dt_online_complete - self.elapsed_time['ENCRYPTION'] += dt_encryption_complete - dt_training_complete - - - - def receiveMessage (self, currentTime, msg): - super().receiveMessage(currentTime, msg) - - if msg.body['msg'] == "PEER_EXCHANGE": - - # Record start of message processing. - dt_rcv_start = pd.Timestamp('now') - - # Ensure we don't somehow record the same peer twice. These all come from the - # service provider, relayed from other clients, but are "fixed up" to appear - # as if they come straight from the relevant peer. - if msg.body['sender'] not in self.peers_received: - - # Record the content of the message and that we received it. - self.peers_received[msg.body['sender']] = True - self.peer_public_keys[msg.body['sender']] = msg.body['pubkey'] - - # Record end of message processing. - dt_rcv_end = pd.Timestamp('now') - - # Store elapsed times by category. - self.elapsed_time['DH_OFFLINE'] += dt_rcv_end - dt_rcv_start - - # Set computation delay to elapsed wallclock time. - self.setComputationDelay(int((dt_rcv_end - dt_rcv_start).to_timedelta64())) - - # If this is the last peer from whom we expect to hear, move on with the protocol. - if len(self.peers_received) == self.num_peers: - self.peer_exchange_complete = True - self.setWakeup(currentTime + pd.Timedelta('1ns')) - - elif msg.body['msg'] == "SHARED_WEIGHTS": - # Reset computation delay. - self.setComputationDelay(0) - - # Extract the shared weights from the message. - self.prevWeight = msg.body['weights'] - - # Remove the multiplier that was helping guard against floating point error. - self.prevWeight /= self.multiplier - - log_print ("Client weights received for iteration {} by {}: {}", self.current_iteration, self.id, self.prevWeight) - - # Client number 1 (arbitrary choice) records the shared learning progress each iteration - # for later visualization and analysis. - if self.id == 1: - is_acc, is_mcc, is_f1, is_mse, is_auprc, oos_acc, oos_mcc, oos_f1, oos_mse, oos_auprc = reportStats(self.prevWeight, self.current_iteration, self.X_train, self.y_train, self.X_test, self.y_test) - - if not exists("results/all_results.csv"): - with open('results/all_results.csv', 'a') as results_file: - # Write out the header. - results_file.write(f"Clients,Peers,Subgraphs,Iterations,Train Rows,Learning Rate,In The Clear?,Local Iterations,Epsilon,Iteration,IS ACC,OOS ACC,IS MCC,OOS MCC,IS MSE,OOS MSE,IS F1,OOS F1,IS AUPRC,OOS AUPRC\n") - - with open('results/all_results.csv', 'a') as results_file: - # Write out the current protocol iteration weights and metadata. - results_file.write(f"{self.num_clients},{self.num_peers},{self.num_subgraphs},{self.no_of_iterations},{self.split_size},{self.learning_rate},{self.clear_learning},{self.max_logreg_iterations},{self.epsilon},{self.current_iteration},{is_acc},{oos_acc},{is_mcc},{oos_mcc},{is_mse},{oos_mse},{is_f1},{oos_f1},{is_auprc},{oos_auprc}\n") - - if self.collusion: - with open('results/collusion_consensus.csv', 'a') as results_file: - # Agent 1 also writes out the consensus weights each iteration (for collusion analysis). - results_file.write(f"{self.current_iteration},{','.join([str(x) for x in self.prevWeight])}\n") +class PPFL_ClientAgent(Agent): - # Start a new iteration if we are not at the end of the protocol. - if self.current_iteration < self.no_of_iterations: - self.setWakeup(currentTime + pd.Timedelta('1ns')) + def __init__( + self, + id, + name, + type, + peer_list=None, + iterations=4, + multiplier=10000, + secret_scale=100000, + X_train=None, + y_train=None, + X_test=None, + y_test=None, + split_size=None, + learning_rate=None, + clear_learning=None, + num_clients=None, + num_subgraphs=None, + epsilon=None, + max_logreg_iterations=None, + collusion=False, + random_state=None, + ): + + # Base class init. + super().__init__(id, name, type, random_state) + + # Store the client's peer list (subgraph, neighborhood) with which it should communicate. + self.peer_list = peer_list + + # Initialize a tracking attribute for the initial peer exchange and record the subgraph size. + self.peer_exchange_complete = False + self.num_peers = len(self.peer_list) + + # Record the total number of clients participating in the protocol and the number of subgraphs. + # Neither of these are part of the protocol, or necessary for real-world implementation, but do + # allow for convenient logging of progress and results in simulation. + self.num_clients = num_clients + self.num_subgraphs = num_subgraphs + + # Record whether the clients should be recording information about the potential accuracy of + # peer data reconstruction via collusion among the clients. + self.collusion = collusion + + # Record the number of protocol (federated learning) iterations the clients will perform. + self.no_of_iterations = iterations + + # Record the multiplier that will be used to protect against floating point accuracy loss and + # the scale of the client shared secrets. + self.multiplier = multiplier + self.secret_scale = secret_scale + + # Record the number of local iterations of logistic regression each client will run during + # each protocol iteration and what local learning rate will be used. + self.max_logreg_iterations = max_logreg_iterations + self.learning_rate = learning_rate + + # Record whether clients will do federated learning in the clear (no privacy, no encryption) + # and, if needed, the epsilon value for differential privacy. + self.clear_learning = clear_learning + self.epsilon = epsilon + + # Record the training and testing splits for the data set to be learned. + self.X_train = X_train + self.y_train = y_train + + self.X_test = X_test + self.y_test = y_test + + # Record the number of features in the data set. + self.no_of_weights = X_train.shape[1] + + # Initialize an attribute to remember the shared weights returned from the server. + self.prevWeight = None + + # Each client receives only a portion of the training data each protocol iteration. + self.split_size = split_size + + # Initialize a dictionary to remember which peers we have heard from during peer exchange. + self.peers_received = {} + + # Initialize a dictionary to accumulate this client's timing information by task. + self.elapsed_time = { + "DH_OFFLINE": pd.Timedelta(0), + "DH_ONLINE": pd.Timedelta(0), + "TRAINING": pd.Timedelta(0), + "ENCRYPTION": pd.Timedelta(0), + } + + # Pre-generate this client's local training data for each iteration (for the sake of simulation speed). + self.trainX = [] + self.trainY = [] + + # This is a faster PRNG than the default, for times when we must select a large quantity of randomness. + self.prng = np.random.Generator(np.random.SFC64()) + + ### Data randomly selected from total training set each iteration, simulating online behavior. + for i in range(iterations): + slice = self.prng.choice(range(self.X_train.shape[0]), size=split_size, replace=False) + + # Pull together the current local training set. + self.trainX.append(self.X_train[slice].copy()) + self.trainY.append(self.y_train[slice].copy()) + + # Create dictionaries to hold the public and secure keys for this client, and the public keys shared + # by its peers. + self.pubkeys = {} + self.seckeys = {} + self.peer_public_keys = {} + + # Create dictionaries to hold the shared key for each peer each iteration and the seed for the + # following iteration. + self.r = {} + self.R = {} + + # Specify the parameters used for generation of randomness. + self.px_reg = 1 + self.px_epsilon = epsilon + self.px_min_rows = self.split_size + + self.px_shape = 1 / (self.num_peers + 1) + self.px_scale = 2 / ((self.num_peers + 1) * self.px_min_rows * self.px_reg * self.px_epsilon) + + if self.id == 1: + print(f"px_shape is {self.px_shape}") + if self.id == 1: + print(f"px_scale is {self.px_scale}") + + # Specify the required shape for vectorized generation of randomness. + self.px_dims = (self.num_peers, self.no_of_iterations, self.no_of_weights) + + # Iteration counter. + self.current_iteration = 0 + + ### Simulation lifecycle messages. + + def kernelStarting(self, startTime): + + # Initialize custom state properties into which we will later accumulate results. + # To avoid redundancy, we allow only the first client to handle initialization. + if self.id == 1: + self.kernel.custom_state["dh_offline"] = pd.Timedelta(0) + self.kernel.custom_state["dh_online"] = pd.Timedelta(0) + self.kernel.custom_state["training"] = pd.Timedelta(0) + self.kernel.custom_state["encryption"] = pd.Timedelta(0) + + # Find the PPFL service agent, so messages can be directed there. + self.serviceAgentID = self.kernel.findAgentByType(PPFL_ServiceAgent) + + # Request a wake-up call as in the base Agent. Noise is kept small because + # the overall protocol duration is so short right now. (up to one microsecond) + super().kernelStarting(startTime + pd.Timedelta(self.random_state.randint(low=0, high=1000), unit="ns")) + + def kernelStopping(self): + + # Accumulate into the Kernel's "custom state" this client's elapsed times per category. + # Note that times which should be reported in the mean per iteration are already so computed. + # These will be output to the config (experiment) file at the end of the simulation. + + self.kernel.custom_state["dh_offline"] += self.elapsed_time["DH_OFFLINE"] + self.kernel.custom_state["dh_online"] += self.elapsed_time["DH_ONLINE"] / self.no_of_iterations + self.kernel.custom_state["training"] += self.elapsed_time["TRAINING"] / self.no_of_iterations + self.kernel.custom_state["encryption"] += self.elapsed_time["ENCRYPTION"] / self.no_of_iterations + + super().kernelStopping() + ### Simulation participation messages. + + def wakeup(self, currentTime): + super().wakeup(currentTime) + + # Record start of wakeup for real-time computation delay.. + dt_wake_start = pd.Timestamp("now") + + # Check if the clients are still performing the one-time peer exchange. + if not self.peer_exchange_complete: + + # Generate DH keys. + if not self.clear_learning: + self.pubkeys, self.seckeys = dh.dict_keygeneration(self.peer_list) + + # Record elapsed wallclock for Diffie Hellman offline. + dt_wake_end = pd.Timestamp("now") + self.elapsed_time["DH_OFFLINE"] += dt_wake_end - dt_wake_start + + # Set computation delay to elapsed wallclock time. + self.setComputationDelay(int((dt_wake_end - dt_wake_start).to_timedelta64())) + + # Send generated values to peers. + if not self.clear_learning: + for idx, peer in enumerate(self.peer_list): + # We assume a star network configuration where all messages between peers must be forwarded + # through the server. + self.sendMessage( + self.serviceAgentID, + Message( + { + "msg": "FWD_MSG", + "msgToForward": "PEER_EXCHANGE", + "sender": self.id, + "recipient": peer, + "pubkey": self.pubkeys[peer], + } + ), + ) + + if self.clear_learning: + self.peer_exchange_complete = True + self.setWakeup(currentTime + pd.Timedelta("1ns")) + + else: + + # We are waking up to start a new iteration of the protocol. + # (Peer exchange is done before all this.) + + if self.current_iteration == 0: + # During iteration 0 (only) we complete the key exchange and prepare the + # common key list, because at this point we know we have received keys + # from all peers. + + # R is the common key dictionary (by peer agent id). + if not self.clear_learning: + self.R = dh.dict_keyexchange( + self.peer_list, + self.id, + self.pubkeys, + self.seckeys, + self.peer_public_keys, + ) + + # Pre-generate all of this client's local differential privacy noise (for simulation speed). + # We will need one per weight per protocol iteration. + self.my_noise = np.random.laplace( + scale=self.px_scale, + size=(self.no_of_iterations, self.no_of_weights), + ) + + # Diffie Hellman is done in every iteration. + if not self.clear_learning: + for peer_id, common_key in self.R.items(): + + random.seed(common_key) + rand = random.getrandbits(512) + + rand_b_raw = format(rand, "0512b") + rand_b_rawr = rand_b_raw[:256] + rand_b_rawR = rand_b_raw[256:] + + # Negate offsets below this agent's id. This ensures each offset will be + # added once and subtracted once. + r = int(rand_b_rawr, 2) % (2**32) + + log_print("SELECTED r: {}", r) + + # Update dictionary of shared secrets for this iteration. + self.r[peer_id] = r if peer_id < self.id else -r + + # Store the shared seeds for the next iteration. + self.R[peer_id] = int(rand_b_rawR, 2) + + # Record elapsed wallclock for Diffie Hellman online. + dt_online_complete = pd.Timestamp("now") + + # For convenience of things indexed by iteration... + i = self.current_iteration + + # Perform the local training for this client, using only its local (private) data. The configured learning + # rate might need to be increased if there are very many clients, each with very little data, otherwise + # convergence may take a really long time. + # + # max_iter controls how many iterations of gradient descent to perform on the logistic + # regression model. previous_weight should be passed as None for the first iteration. + weight = getWeights( + previous_weight=self.prevWeight, + max_iter=self.max_logreg_iterations, + lr=self.learning_rate, + trainX=self.trainX[i], + trainY=self.trainY[i], + self_id=self.id, + ) + + # If in collusion analysis mode, write out the weights we will need to evaluate reconstruction. + if self.collusion: + with open("results/collusion_weights.csv", "a") as results_file: + results_file.write(f"{self.id},{self.current_iteration},{','.join([str(x) for x in weight])}\n") + + # Record elapsed wallclock for training model. + dt_training_complete = pd.Timestamp("now") + + if not self.clear_learning: + # Add a random sample from Laplace to each of the weights. + noise = self.my_noise[i] + + if self.collusion: + with open("results/collusion_selected_noise.csv", "a") as results_file: + # Write out the noise added to each weight by this client. + results_file.write(f"{self.id},{self.current_iteration},{','.join([str(x) for x in noise])}\n") + + log_print("weight {}", weight) + log_print("noise {}", noise) + + if self.clear_learning: + n = np.array(weight) * self.multiplier + else: + n = (np.array(weight) + noise) * self.multiplier + + log_print("n {}", n) + log_print("r {}", self.r) + + weights_to_send = n + sum(self.r.values()) + + log_print("weights_to_send {}", weights_to_send) + + # Record elapsed wallclock for encryption. + dt_encryption_complete = pd.Timestamp("now") + + # Set computation delay to elapsed wallclock time. + self.setComputationDelay(int((dt_encryption_complete - dt_wake_start).to_timedelta64())) + + # Send the message to the server. + self.sendMessage( + self.serviceAgentID, + Message( + { + "msg": "CLIENT_WEIGHTS", + "sender": self.id, + "weights": weights_to_send, + } + ), + ) + + self.current_iteration += 1 + + # Store elapsed times by category. + self.elapsed_time["DH_ONLINE"] += dt_online_complete - dt_wake_start + self.elapsed_time["TRAINING"] += dt_training_complete - dt_online_complete + self.elapsed_time["ENCRYPTION"] += dt_encryption_complete - dt_training_complete + + def receiveMessage(self, currentTime, msg): + super().receiveMessage(currentTime, msg) + + if msg.body["msg"] == "PEER_EXCHANGE": + + # Record start of message processing. + dt_rcv_start = pd.Timestamp("now") + + # Ensure we don't somehow record the same peer twice. These all come from the + # service provider, relayed from other clients, but are "fixed up" to appear + # as if they come straight from the relevant peer. + if msg.body["sender"] not in self.peers_received: + + # Record the content of the message and that we received it. + self.peers_received[msg.body["sender"]] = True + self.peer_public_keys[msg.body["sender"]] = msg.body["pubkey"] + + # Record end of message processing. + dt_rcv_end = pd.Timestamp("now") + + # Store elapsed times by category. + self.elapsed_time["DH_OFFLINE"] += dt_rcv_end - dt_rcv_start + + # Set computation delay to elapsed wallclock time. + self.setComputationDelay(int((dt_rcv_end - dt_rcv_start).to_timedelta64())) + + # If this is the last peer from whom we expect to hear, move on with the protocol. + if len(self.peers_received) == self.num_peers: + self.peer_exchange_complete = True + self.setWakeup(currentTime + pd.Timedelta("1ns")) + + elif msg.body["msg"] == "SHARED_WEIGHTS": + # Reset computation delay. + self.setComputationDelay(0) + + # Extract the shared weights from the message. + self.prevWeight = msg.body["weights"] + + # Remove the multiplier that was helping guard against floating point error. + self.prevWeight /= self.multiplier + + log_print( + "Client weights received for iteration {} by {}: {}", + self.current_iteration, + self.id, + self.prevWeight, + ) + + # Client number 1 (arbitrary choice) records the shared learning progress each iteration + # for later visualization and analysis. + if self.id == 1: + ( + is_acc, + is_mcc, + is_f1, + is_mse, + is_auprc, + oos_acc, + oos_mcc, + oos_f1, + oos_mse, + oos_auprc, + ) = reportStats( + self.prevWeight, + self.current_iteration, + self.X_train, + self.y_train, + self.X_test, + self.y_test, + ) + + if not exists("results/all_results.csv"): + with open("results/all_results.csv", "a") as results_file: + # Write out the header. + results_file.write( + f"Clients,Peers,Subgraphs,Iterations,Train Rows,Learning Rate,In The Clear?,Local Iterations,Epsilon,Iteration,IS ACC,OOS ACC,IS MCC,OOS MCC,IS MSE,OOS MSE,IS F1,OOS F1,IS AUPRC,OOS AUPRC\n" + ) + + with open("results/all_results.csv", "a") as results_file: + # Write out the current protocol iteration weights and metadata. + results_file.write( + f"{self.num_clients},{self.num_peers},{self.num_subgraphs},{self.no_of_iterations},{self.split_size},{self.learning_rate},{self.clear_learning},{self.max_logreg_iterations},{self.epsilon},{self.current_iteration},{is_acc},{oos_acc},{is_mcc},{oos_mcc},{is_mse},{oos_mse},{is_f1},{oos_f1},{is_auprc},{oos_auprc}\n" + ) + + if self.collusion: + with open("results/collusion_consensus.csv", "a") as results_file: + # Agent 1 also writes out the consensus weights each iteration (for collusion analysis). + results_file.write(f"{self.current_iteration},{','.join([str(x) for x in self.prevWeight])}\n") + + # Start a new iteration if we are not at the end of the protocol. + if self.current_iteration < self.no_of_iterations: + self.setWakeup(currentTime + pd.Timedelta("1ns")) diff --git a/agent/examples/crypto/PPFL_ServiceAgent.py b/agent/examples/crypto/PPFL_ServiceAgent.py index 4e4078e10..b005d8a1f 100755 --- a/agent/examples/crypto/PPFL_ServiceAgent.py +++ b/agent/examples/crypto/PPFL_ServiceAgent.py @@ -1,155 +1,174 @@ -from agent.Agent import Agent -from message.Message import Message -from util.util import log_print -from util.crypto.logReg import getWeights - from copy import deepcopy + import numpy as np import pandas as pd +from agent.Agent import Agent +from message.Message import Message +from util.crypto.logReg import getWeights +from util.util import log_print + ### NEW MESSAGES: CLIENT_WEIGHTS : weights, SHARED_WEIGHTS : weights # The PPFL_ServiceAgent class inherits from the base Agent class. It provides # the simple shared service necessary for model combination under secure # federated learning. -class PPFL_ServiceAgent(Agent): - - def __init__(self, id, name, type, random_state=None, msg_fwd_delay=1000000, - iterations=4, num_clients=10): - - # Base class init. - super().__init__(id, name, type, random_state) - - # From how many clients do we expect to hear in each protocol iteration? - self.num_clients = num_clients - - # How long does it take us to forward a peer-to-peer client relay message? - self.msg_fwd_delay = msg_fwd_delay - - # Agent accumulation of elapsed times by category of task. - self.elapsed_time = { 'STORE_MODEL' : pd.Timedelta(0), 'COMBINE_MODEL' : pd.Timedelta(0) } - - # How many iterations of the protocol should be run? - self.no_of_iterations = iterations - - # Create a dictionary keyed by agentID to record which clients we have - # already heard from during the current protocol iteration. This can - # also guard against double accumulation from duplicate messages. - self.received = {} - # Create a list to accumulate received values. We don't need to know which came from which - # client, what the values mean, or indeed anything about them. - self.total = [] - - # Track the current iteration of the protocol. - self.current_iteration = 0 +class PPFL_ServiceAgent(Agent): + def __init__( + self, + id, + name, + type, + random_state=None, + msg_fwd_delay=1000000, + iterations=4, + num_clients=10, + ): + + # Base class init. + super().__init__(id, name, type, random_state) + + # From how many clients do we expect to hear in each protocol iteration? + self.num_clients = num_clients + + # How long does it take us to forward a peer-to-peer client relay message? + self.msg_fwd_delay = msg_fwd_delay + + # Agent accumulation of elapsed times by category of task. + self.elapsed_time = { + "STORE_MODEL": pd.Timedelta(0), + "COMBINE_MODEL": pd.Timedelta(0), + } + + # How many iterations of the protocol should be run? + self.no_of_iterations = iterations + + # Create a dictionary keyed by agentID to record which clients we have + # already heard from during the current protocol iteration. This can + # also guard against double accumulation from duplicate messages. + self.received = {} - ### Simulation lifecycle messages. - def kernelStarting(self, startTime): - # self.kernel is set in Agent.kernelInitializing() + # Create a list to accumulate received values. We don't need to know which came from which + # client, what the values mean, or indeed anything about them. + self.total = [] - # Initialize custom state properties into which we will accumulate results later. - self.kernel.custom_state['srv_store_model'] = pd.Timedelta(0) - self.kernel.custom_state['srv_combine_model'] = pd.Timedelta(0) + # Track the current iteration of the protocol. + self.current_iteration = 0 - # This agent should have negligible (or no) computation delay until otherwise specified. - self.setComputationDelay(0) + ### Simulation lifecycle messages. + def kernelStarting(self, startTime): + # self.kernel is set in Agent.kernelInitializing() - # Request a wake-up call as in the base Agent. - super().kernelStarting(startTime) + # Initialize custom state properties into which we will accumulate results later. + self.kernel.custom_state["srv_store_model"] = pd.Timedelta(0) + self.kernel.custom_state["srv_combine_model"] = pd.Timedelta(0) + # This agent should have negligible (or no) computation delay until otherwise specified. + self.setComputationDelay(0) - def kernelStopping(self): - # Add the server time components to the custom state in the Kernel, for output to the config. - # Note that times which should be reported in the mean per iteration are already so computed. - self.kernel.custom_state['srv_store_model'] += (self.elapsed_time['STORE_MODEL'] / self.no_of_iterations) - self.kernel.custom_state['srv_combine_model'] += (self.elapsed_time['COMBINE_MODEL'] / self.no_of_iterations) + # Request a wake-up call as in the base Agent. + super().kernelStarting(startTime) - # Allow the base class to perform stopping activities. - super().kernelStopping() - + def kernelStopping(self): + # Add the server time components to the custom state in the Kernel, for output to the config. + # Note that times which should be reported in the mean per iteration are already so computed. + self.kernel.custom_state["srv_store_model"] += self.elapsed_time["STORE_MODEL"] / self.no_of_iterations + self.kernel.custom_state["srv_combine_model"] += self.elapsed_time["COMBINE_MODEL"] / self.no_of_iterations - ### Simulation participation messages. + # Allow the base class to perform stopping activities. + super().kernelStopping() - # The service agent does not require wakeup calls. + ### Simulation participation messages. - def receiveMessage (self, currentTime, msg): - # Allow the base Agent to do whatever it needs to. - super().receiveMessage(currentTime, msg) + # The service agent does not require wakeup calls. - # Logic for receiving weights from client agents. The weights are almost certainly - # noisy and encrypted, but that doesn't matter to us. - if msg.body['msg'] == "CLIENT_WEIGHTS": - - # Start wallclock timing for message handling. - dt_combine_complete = None - dt_start_rcv = pd.Timestamp('now') + def receiveMessage(self, currentTime, msg): + # Allow the base Agent to do whatever it needs to. + super().receiveMessage(currentTime, msg) - sender = msg.body['sender'] - if sender in self.received: return + # Logic for receiving weights from client agents. The weights are almost certainly + # noisy and encrypted, but that doesn't matter to us. + if msg.body["msg"] == "CLIENT_WEIGHTS": - self.received[sender] = True - self.total.append(msg.body['weights'].tolist()) + # Start wallclock timing for message handling. + dt_combine_complete = None + dt_start_rcv = pd.Timestamp("now") - # Capture elapsed wallclock for model storage. - dt_store_complete = pd.Timestamp('now') + sender = msg.body["sender"] + if sender in self.received: + return - log_print ("Server received {} from {}.", msg.body['weights'], msg.body['sender']) + self.received[sender] = True + self.total.append(msg.body["weights"].tolist()) - if len(self.received.keys()) >= self.num_clients: - # This is the last client on whom we were waiting. - self.combineWeights() + # Capture elapsed wallclock for model storage. + dt_store_complete = pd.Timestamp("now") - # Capture elapsed wallclock for model combination. - dt_combine_complete = pd.Timestamp('now') + log_print("Server received {} from {}.", msg.body["weights"], msg.body["sender"]) - # Then clear the protocol attributes for the next round. - self.received = {} - self.total = [] + if len(self.received.keys()) >= self.num_clients: + # This is the last client on whom we were waiting. + self.combineWeights() - # Capture elapsed wallclock at end of CLIENT_WEIGHTS. - dt_end_rcv = pd.Timestamp('now') + # Capture elapsed wallclock for model combination. + dt_combine_complete = pd.Timestamp("now") - # Compute time deltas only after all elapsed times are captured. - if dt_combine_complete is not None: self.elapsed_time['COMBINE_MODEL'] += dt_combine_complete - dt_store_complete - self.elapsed_time['STORE_MODEL'] += dt_store_complete - dt_start_rcv - elapsed_total = int((dt_end_rcv - dt_start_rcv).to_timedelta64()) + # Then clear the protocol attributes for the next round. + self.received = {} + self.total = [] - # Use total elapsed wallclock as computation delay. - self.setComputationDelay(elapsed_total) + # Capture elapsed wallclock at end of CLIENT_WEIGHTS. + dt_end_rcv = pd.Timestamp("now") - elif msg.body['msg'] == "FWD_MSG": - # In our star topology, all client messages are forwarded securely through the server. - sender = msg.body['sender'] - recipient = msg.body['recipient'] - msg.body['msg'] = msg.body['msgToForward'] + # Compute time deltas only after all elapsed times are captured. + if dt_combine_complete is not None: + self.elapsed_time["COMBINE_MODEL"] += dt_combine_complete - dt_store_complete + self.elapsed_time["STORE_MODEL"] += dt_store_complete - dt_start_rcv + elapsed_total = int((dt_end_rcv - dt_start_rcv).to_timedelta64()) - self.setComputationDelay(self.msg_fwd_delay) + # Use total elapsed wallclock as computation delay. + self.setComputationDelay(elapsed_total) - # Normally not advisable, but here we need to fix up the sender so the - # server can be a silent proxy. - self.kernel.sendMessage(sender, recipient, msg) + elif msg.body["msg"] == "FWD_MSG": + # In our star topology, all client messages are forwarded securely through the server. + sender = msg.body["sender"] + recipient = msg.body["recipient"] + msg.body["msg"] = msg.body["msgToForward"] + self.setComputationDelay(self.msg_fwd_delay) - ### Combine client weights and respond to each client. - def combineWeights (self): + # Normally not advisable, but here we need to fix up the sender so the + # server can be a silent proxy. + self.kernel.sendMessage(sender, recipient, msg) - log_print ("total: {}", self.total) + ### Combine client weights and respond to each client. + def combineWeights(self): - # Don't respond after the final iteration. - if (self.current_iteration < self.no_of_iterations): + log_print("total: {}", self.total) - # Take the mean weights across the clients. - self.total = np.array(self.total) - totals = np.mean(self.total, axis=0) + # Don't respond after the final iteration. + if self.current_iteration < self.no_of_iterations: - # Send the combined weights back to each client who participated. - for sender in self.received.keys(): - log_print ("Sending {} to {}", totals, sender) - self.sendMessage(sender, Message({ "msg" : "SHARED_WEIGHTS", "sender": self.id, "weights" : deepcopy(totals) })) + # Take the mean weights across the clients. + self.total = np.array(self.total) + totals = np.mean(self.total, axis=0) - # This is the end of one round of the protocol. - self.current_iteration += 1 + # Send the combined weights back to each client who participated. + for sender in self.received.keys(): + log_print("Sending {} to {}", totals, sender) + self.sendMessage( + sender, + Message( + { + "msg": "SHARED_WEIGHTS", + "sender": self.id, + "weights": deepcopy(totals), + } + ), + ) + # This is the end of one round of the protocol. + self.current_iteration += 1 diff --git a/agent/examples/crypto/PPFL_TemplateClientAgent.py b/agent/examples/crypto/PPFL_TemplateClientAgent.py index eda1f7eaa..c8ebf2f14 100755 --- a/agent/examples/crypto/PPFL_TemplateClientAgent.py +++ b/agent/examples/crypto/PPFL_TemplateClientAgent.py @@ -1,16 +1,15 @@ -from agent.Agent import Agent -from agent.examples.crypto.PPFL_ServiceAgent import PPFL_ServiceAgent -from message.Message import Message -from util.util import log_print - -from util.crypto.logReg import getWeights, reportStats -import util.crypto.diffieHellman as dh +import random +from os.path import exists import numpy as np -from os.path import exists import pandas as pd -import random +import util.crypto.diffieHellman as dh +from agent.Agent import Agent +from agent.examples.crypto.PPFL_ServiceAgent import PPFL_ServiceAgent +from message.Message import Message +from util.crypto.logReg import getWeights, reportStats +from util.util import log_print # The PPFL_TemplateClientAgent class inherits from the base Agent class. It has the # structure of a secure federated learning protocol with secure multiparty communication, @@ -18,294 +17,327 @@ # which the client parties simply pass around arbitrary data. Sections that would need # to be completed are clearly marked. -class PPFL_TemplateClientAgent(Agent): - - def __init__(self, id, name, type, peer_list=None, iterations=4, multiplier=10000, secret_scale = 100000, - X_train = None, y_train = None, X_test = None, y_test = None, split_size = None, - num_clients = None, num_subgraphs = None, random_state=None): - - # Base class init. - super().__init__(id, name, type, random_state) - - - # Store the client's peer list (subgraph, neighborhood) with which it should communicate. - self.peer_list = peer_list - - # Initialize a tracking attribute for the initial peer exchange and record the subgraph size. - self.peer_exchange_complete = False - self.num_peers = len(self.peer_list) - - # Record the total number of clients participating in the protocol and the number of subgraphs. - # Neither of these are part of the protocol, or necessary for real-world implementation, but do - # allow for convenient logging of progress and results in simulation. - self.num_clients = num_clients - self.num_subgraphs = num_subgraphs - - # Record the number of protocol (federated learning) iterations the clients will perform. - self.no_of_iterations = iterations - - # Record the multiplier that will be used to protect against floating point accuracy loss and - # the scale of the client shared secrets. - self.multiplier = multiplier - self.secret_scale = secret_scale - - # Record the training and testing splits for the data set to be learned. - self.X_train = X_train - self.y_train = y_train - - self.X_test = X_test - self.y_test = y_test - - # Record the number of features in the data set. - self.no_of_weights = X_train.shape[1] - - # Initialize an attribute to remember the shared weights returned from the server. - self.prevWeight = np.zeros(self.no_of_weights) - - # Each client receives only a portion of the training data each protocol iteration. - self.split_size = split_size - - # Initialize a dictionary to remember which peers we have heard from during peer exchange. - self.peers_received = {} - - # Initialize a dictionary to accumulate this client's timing information by task. - self.elapsed_time = { 'DH_OFFLINE' : pd.Timedelta(0), 'DH_ONLINE' : pd.Timedelta(0), - 'TRAINING' : pd.Timedelta(0), 'ENCRYPTION' : pd.Timedelta(0) } - - # Pre-generate this client's local training data for each iteration (for the sake of simulation speed). - self.trainX = [] - self.trainY = [] - - # This is a faster PRNG than the default, for times when we must select a large quantity of randomness. - self.prng = np.random.Generator(np.random.SFC64()) - - ### Data randomly selected from total training set each iteration, simulating online behavior. - for i in range(iterations): - slice = self.prng.choice(range(self.X_train.shape[0]), size = split_size, replace = False) - - # Pull together the current local training set. - self.trainX.append(self.X_train[slice].copy()) - self.trainY.append(self.y_train[slice].copy()) - - - # Create dictionaries to hold the public and secure keys for this client, and the public keys shared - # by its peers. - self.pubkeys = {} - self.seckeys = {} - self.peer_public_keys = {} - - # Create dictionaries to hold the shared key for each peer each iteration and the seed for the - # following iteration. - self.r = {} - self.R = {} - - - ### ADD DIFFERENTIAL PRIVACY CONSTANTS AND CONFIGURATION HERE, IF NEEDED. - # - # - - - # Iteration counter. - self.current_iteration = 0 - - - - - ### Simulation lifecycle messages. - - def kernelStarting(self, startTime): - - # Initialize custom state properties into which we will later accumulate results. - # To avoid redundancy, we allow only the first client to handle initialization. - if self.id == 1: - self.kernel.custom_state['dh_offline'] = pd.Timedelta(0) - self.kernel.custom_state['dh_online'] = pd.Timedelta(0) - self.kernel.custom_state['training'] = pd.Timedelta(0) - self.kernel.custom_state['encryption'] = pd.Timedelta(0) - - # Find the PPFL service agent, so messages can be directed there. - self.serviceAgentID = self.kernel.findAgentByType(PPFL_ServiceAgent) - - # Request a wake-up call as in the base Agent. Noise is kept small because - # the overall protocol duration is so short right now. (up to one microsecond) - super().kernelStarting(startTime + pd.Timedelta(self.random_state.randint(low = 0, high = 1000), unit='ns')) - - - def kernelStopping(self): - - # Accumulate into the Kernel's "custom state" this client's elapsed times per category. - # Note that times which should be reported in the mean per iteration are already so computed. - # These will be output to the config (experiment) file at the end of the simulation. - - self.kernel.custom_state['dh_offline'] += self.elapsed_time['DH_OFFLINE'] - self.kernel.custom_state['dh_online'] += (self.elapsed_time['DH_ONLINE'] / self.no_of_iterations) - self.kernel.custom_state['training'] += (self.elapsed_time['TRAINING'] / self.no_of_iterations) - self.kernel.custom_state['encryption'] += (self.elapsed_time['ENCRYPTION'] / self.no_of_iterations) - - super().kernelStopping() - - - ### Simulation participation messages. - - def wakeup (self, currentTime): - super().wakeup(currentTime) - - # Record start of wakeup for real-time computation delay.. - dt_wake_start = pd.Timestamp('now') - - # Check if the clients are still performing the one-time peer exchange. - if not self.peer_exchange_complete: +class PPFL_TemplateClientAgent(Agent): - # Generate DH keys. - self.pubkeys, self.seckeys = dh.dict_keygeneration( self.peer_list ) + def __init__( + self, + id, + name, + type, + peer_list=None, + iterations=4, + multiplier=10000, + secret_scale=100000, + X_train=None, + y_train=None, + X_test=None, + y_test=None, + split_size=None, + num_clients=None, + num_subgraphs=None, + random_state=None, + ): + + # Base class init. + super().__init__(id, name, type, random_state) + + # Store the client's peer list (subgraph, neighborhood) with which it should communicate. + self.peer_list = peer_list + + # Initialize a tracking attribute for the initial peer exchange and record the subgraph size. + self.peer_exchange_complete = False + self.num_peers = len(self.peer_list) + + # Record the total number of clients participating in the protocol and the number of subgraphs. + # Neither of these are part of the protocol, or necessary for real-world implementation, but do + # allow for convenient logging of progress and results in simulation. + self.num_clients = num_clients + self.num_subgraphs = num_subgraphs + + # Record the number of protocol (federated learning) iterations the clients will perform. + self.no_of_iterations = iterations + + # Record the multiplier that will be used to protect against floating point accuracy loss and + # the scale of the client shared secrets. + self.multiplier = multiplier + self.secret_scale = secret_scale + + # Record the training and testing splits for the data set to be learned. + self.X_train = X_train + self.y_train = y_train + + self.X_test = X_test + self.y_test = y_test + + # Record the number of features in the data set. + self.no_of_weights = X_train.shape[1] + + # Initialize an attribute to remember the shared weights returned from the server. + self.prevWeight = np.zeros(self.no_of_weights) + + # Each client receives only a portion of the training data each protocol iteration. + self.split_size = split_size + + # Initialize a dictionary to remember which peers we have heard from during peer exchange. + self.peers_received = {} + + # Initialize a dictionary to accumulate this client's timing information by task. + self.elapsed_time = { + "DH_OFFLINE": pd.Timedelta(0), + "DH_ONLINE": pd.Timedelta(0), + "TRAINING": pd.Timedelta(0), + "ENCRYPTION": pd.Timedelta(0), + } + + # Pre-generate this client's local training data for each iteration (for the sake of simulation speed). + self.trainX = [] + self.trainY = [] + + # This is a faster PRNG than the default, for times when we must select a large quantity of randomness. + self.prng = np.random.Generator(np.random.SFC64()) + + ### Data randomly selected from total training set each iteration, simulating online behavior. + for i in range(iterations): + slice = self.prng.choice(range(self.X_train.shape[0]), size=split_size, replace=False) + + # Pull together the current local training set. + self.trainX.append(self.X_train[slice].copy()) + self.trainY.append(self.y_train[slice].copy()) + + # Create dictionaries to hold the public and secure keys for this client, and the public keys shared + # by its peers. + self.pubkeys = {} + self.seckeys = {} + self.peer_public_keys = {} + + # Create dictionaries to hold the shared key for each peer each iteration and the seed for the + # following iteration. + self.r = {} + self.R = {} + + ### ADD DIFFERENTIAL PRIVACY CONSTANTS AND CONFIGURATION HERE, IF NEEDED. + # + # - # Record elapsed wallclock for Diffie Hellman offline. - dt_wake_end = pd.Timestamp('now') - self.elapsed_time['DH_OFFLINE'] += dt_wake_end - dt_wake_start + # Iteration counter. + self.current_iteration = 0 - # Set computation delay to elapsed wallclock time. - self.setComputationDelay(int((dt_wake_end - dt_wake_start).to_timedelta64())) + ### Simulation lifecycle messages. - # Send generated values to peers. - for idx, peer in enumerate(self.peer_list): - # We assume a star network configuration where all messages between peers must be forwarded - # through the server. - self.sendMessage(self.serviceAgentID, Message({ "msg" : "FWD_MSG", "msgToForward" : "PEER_EXCHANGE", - "sender": self.id, "recipient": peer, "pubkey" : self.pubkeys[peer] })) + def kernelStarting(self, startTime): - else: - - # We are waking up to start a new iteration of the protocol. - # (Peer exchange is done before all this.) + # Initialize custom state properties into which we will later accumulate results. + # To avoid redundancy, we allow only the first client to handle initialization. + if self.id == 1: + self.kernel.custom_state["dh_offline"] = pd.Timedelta(0) + self.kernel.custom_state["dh_online"] = pd.Timedelta(0) + self.kernel.custom_state["training"] = pd.Timedelta(0) + self.kernel.custom_state["encryption"] = pd.Timedelta(0) - if (self.current_iteration == 0): - # During iteration 0 (only) we complete the key exchange and prepare the - # common key list, because at this point we know we have received keys - # from all peers. + # Find the PPFL service agent, so messages can be directed there. + self.serviceAgentID = self.kernel.findAgentByType(PPFL_ServiceAgent) - # R is the common key dictionary (by peer agent id). - dh.dict_keyexchange(self.peer_list, self.id, self.pubkeys, self.seckeys, self.peer_public_keys) + # Request a wake-up call as in the base Agent. Noise is kept small because + # the overall protocol duration is so short right now. (up to one microsecond) + super().kernelStarting(startTime + pd.Timedelta(self.random_state.randint(low=0, high=1000), unit="ns")) + def kernelStopping(self): - # CREATE AND CACHE LOCAL DIFFERENTIAL PRIVACY NOISE HERE, IF NEEDED. - # - # + # Accumulate into the Kernel's "custom state" this client's elapsed times per category. + # Note that times which should be reported in the mean per iteration are already so computed. + # These will be output to the config (experiment) file at the end of the simulation. + self.kernel.custom_state["dh_offline"] += self.elapsed_time["DH_OFFLINE"] + self.kernel.custom_state["dh_online"] += self.elapsed_time["DH_ONLINE"] / self.no_of_iterations + self.kernel.custom_state["training"] += self.elapsed_time["TRAINING"] / self.no_of_iterations + self.kernel.custom_state["encryption"] += self.elapsed_time["ENCRYPTION"] / self.no_of_iterations - # Diffie Hellman is done in every iteration. - for peer_id, common_key in self.R.items(): + super().kernelStopping() - random.seed(common_key) - rand = random.getrandbits(512) + ### Simulation participation messages. - rand_b_raw = format(rand, '0512b') - rand_b_rawr = rand_b_raw[:256] - rand_b_rawR = rand_b_raw[256:] + def wakeup(self, currentTime): + super().wakeup(currentTime) + # Record start of wakeup for real-time computation delay.. + dt_wake_start = pd.Timestamp("now") - # Negate offsets below this agent's id. This ensures each offset will be - # added once and subtracted once. - r = int(rand_b_rawr,2) % (2**32) + # Check if the clients are still performing the one-time peer exchange. + if not self.peer_exchange_complete: - # Update dictionary of shared secrets for this iteration. - self.r[peer_id] = r if peer_id < self.id else -r + # Generate DH keys. + self.pubkeys, self.seckeys = dh.dict_keygeneration(self.peer_list) - # Store the shared seeds for the next iteration. - self.R[peer_id] = int(rand_b_rawR,2) + # Record elapsed wallclock for Diffie Hellman offline. + dt_wake_end = pd.Timestamp("now") + self.elapsed_time["DH_OFFLINE"] += dt_wake_end - dt_wake_start + # Set computation delay to elapsed wallclock time. + self.setComputationDelay(int((dt_wake_end - dt_wake_start).to_timedelta64())) - # Record elapsed wallclock for Diffie Hellman online. - dt_online_complete = pd.Timestamp('now') + # Send generated values to peers. + for idx, peer in enumerate(self.peer_list): + # We assume a star network configuration where all messages between peers must be forwarded + # through the server. + self.sendMessage( + self.serviceAgentID, + Message( + { + "msg": "FWD_MSG", + "msgToForward": "PEER_EXCHANGE", + "sender": self.id, + "recipient": peer, + "pubkey": self.pubkeys[peer], + } + ), + ) - # For convenience of things indexed by iteration... - i = self.current_iteration + else: + # We are waking up to start a new iteration of the protocol. + # (Peer exchange is done before all this.) - ### ADD LOCAL LEARNING METHOD HERE, IF NEEDED. - # - # + if self.current_iteration == 0: + # During iteration 0 (only) we complete the key exchange and prepare the + # common key list, because at this point we know we have received keys + # from all peers. - weight = np.random.normal (loc = self.prevWeight, scale = self.prevWeight / 10, size = self.prevWeight.shape) + # R is the common key dictionary (by peer agent id). + dh.dict_keyexchange( + self.peer_list, + self.id, + self.pubkeys, + self.seckeys, + self.peer_public_keys, + ) + # CREATE AND CACHE LOCAL DIFFERENTIAL PRIVACY NOISE HERE, IF NEEDED. + # + # - # Record elapsed wallclock for training model. - dt_training_complete = pd.Timestamp('now') + # Diffie Hellman is done in every iteration. + for peer_id, common_key in self.R.items(): + random.seed(common_key) + rand = random.getrandbits(512) - ### ADD NOISE TO THE WEIGHTS HERE, IF NEEDED. - # - # + rand_b_raw = format(rand, "0512b") + rand_b_rawr = rand_b_raw[:256] + rand_b_rawR = rand_b_raw[256:] - n = np.array(weight) * self.multiplier - weights_to_send = n + sum(self.r.values()) + # Negate offsets below this agent's id. This ensures each offset will be + # added once and subtracted once. + r = int(rand_b_rawr, 2) % (2**32) - - # Record elapsed wallclock for encryption. - dt_encryption_complete = pd.Timestamp('now') + # Update dictionary of shared secrets for this iteration. + self.r[peer_id] = r if peer_id < self.id else -r - # Set computation delay to elapsed wallclock time. - self.setComputationDelay(int((dt_encryption_complete - dt_wake_start).to_timedelta64())) + # Store the shared seeds for the next iteration. + self.R[peer_id] = int(rand_b_rawR, 2) - # Send the message to the server. - self.sendMessage(self.serviceAgentID, Message({ "msg" : "CLIENT_WEIGHTS", "sender": self.id, - "weights" : weights_to_send })) - - self.current_iteration += 1 + # Record elapsed wallclock for Diffie Hellman online. + dt_online_complete = pd.Timestamp("now") - # Store elapsed times by category. - self.elapsed_time['DH_ONLINE'] += dt_online_complete - dt_wake_start - self.elapsed_time['TRAINING'] += dt_training_complete - dt_online_complete - self.elapsed_time['ENCRYPTION'] += dt_encryption_complete - dt_training_complete + # For convenience of things indexed by iteration... + i = self.current_iteration + ### ADD LOCAL LEARNING METHOD HERE, IF NEEDED. + # + # + + weight = np.random.normal( + loc=self.prevWeight, + scale=self.prevWeight / 10, + size=self.prevWeight.shape, + ) + # Record elapsed wallclock for training model. + dt_training_complete = pd.Timestamp("now") - def receiveMessage (self, currentTime, msg): - super().receiveMessage(currentTime, msg) + ### ADD NOISE TO THE WEIGHTS HERE, IF NEEDED. + # + # + + n = np.array(weight) * self.multiplier + weights_to_send = n + sum(self.r.values()) + + # Record elapsed wallclock for encryption. + dt_encryption_complete = pd.Timestamp("now") + + # Set computation delay to elapsed wallclock time. + self.setComputationDelay(int((dt_encryption_complete - dt_wake_start).to_timedelta64())) + + # Send the message to the server. + self.sendMessage( + self.serviceAgentID, + Message( + { + "msg": "CLIENT_WEIGHTS", + "sender": self.id, + "weights": weights_to_send, + } + ), + ) + + self.current_iteration += 1 + + # Store elapsed times by category. + self.elapsed_time["DH_ONLINE"] += dt_online_complete - dt_wake_start + self.elapsed_time["TRAINING"] += dt_training_complete - dt_online_complete + self.elapsed_time["ENCRYPTION"] += dt_encryption_complete - dt_training_complete - if msg.body['msg'] == "PEER_EXCHANGE": + def receiveMessage(self, currentTime, msg): + super().receiveMessage(currentTime, msg) - # Record start of message processing. - dt_rcv_start = pd.Timestamp('now') + if msg.body["msg"] == "PEER_EXCHANGE": - # Ensure we don't somehow record the same peer twice. These all come from the - # service provider, relayed from other clients, but are "fixed up" to appear - # as if they come straight from the relevant peer. - if msg.body['sender'] not in self.peers_received: + # Record start of message processing. + dt_rcv_start = pd.Timestamp("now") - # Record the content of the message and that we received it. - self.peers_received[msg.body['sender']] = True - self.peer_public_keys[msg.body['sender']] = msg.body['pubkey'] + # Ensure we don't somehow record the same peer twice. These all come from the + # service provider, relayed from other clients, but are "fixed up" to appear + # as if they come straight from the relevant peer. + if msg.body["sender"] not in self.peers_received: - # Record end of message processing. - dt_rcv_end = pd.Timestamp('now') + # Record the content of the message and that we received it. + self.peers_received[msg.body["sender"]] = True + self.peer_public_keys[msg.body["sender"]] = msg.body["pubkey"] - # Store elapsed times by category. - self.elapsed_time['DH_OFFLINE'] += dt_rcv_end - dt_rcv_start + # Record end of message processing. + dt_rcv_end = pd.Timestamp("now") - # Set computation delay to elapsed wallclock time. - self.setComputationDelay(int((dt_rcv_end - dt_rcv_start).to_timedelta64())) + # Store elapsed times by category. + self.elapsed_time["DH_OFFLINE"] += dt_rcv_end - dt_rcv_start - # If this is the last peer from whom we expect to hear, move on with the protocol. - if len(self.peers_received) == self.num_peers: - self.peer_exchange_complete = True - self.setWakeup(currentTime + pd.Timedelta('1ns')) + # Set computation delay to elapsed wallclock time. + self.setComputationDelay(int((dt_rcv_end - dt_rcv_start).to_timedelta64())) - elif msg.body['msg'] == "SHARED_WEIGHTS": - # Reset computation delay. - self.setComputationDelay(0) + # If this is the last peer from whom we expect to hear, move on with the protocol. + if len(self.peers_received) == self.num_peers: + self.peer_exchange_complete = True + self.setWakeup(currentTime + pd.Timedelta("1ns")) - # Extract the shared weights from the message. - self.prevWeight = msg.body['weights'] + elif msg.body["msg"] == "SHARED_WEIGHTS": + # Reset computation delay. + self.setComputationDelay(0) - # Remove the multiplier that was helping guard against floating point error. - self.prevWeight /= self.multiplier + # Extract the shared weights from the message. + self.prevWeight = msg.body["weights"] - log_print ("Client weights received for iteration {} by {}: {}", self.current_iteration, self.id, self.prevWeight) + # Remove the multiplier that was helping guard against floating point error. + self.prevWeight /= self.multiplier - if self.id == 1: print (f"Protocol iteration {self.current_iteration} complete.") + log_print( + "Client weights received for iteration {} by {}: {}", + self.current_iteration, + self.id, + self.prevWeight, + ) - # Start a new iteration if we are not at the end of the protocol. - if self.current_iteration < self.no_of_iterations: - self.setWakeup(currentTime + pd.Timedelta('1ns')) + if self.id == 1: + print(f"Protocol iteration {self.current_iteration} complete.") + # Start a new iteration if we are not at the end of the protocol. + if self.current_iteration < self.no_of_iterations: + self.setWakeup(currentTime + pd.Timedelta("1ns")) diff --git a/agent/execution/ExecutionAgent.py b/agent/execution/ExecutionAgent.py index cca80fbc4..690edff66 100644 --- a/agent/execution/ExecutionAgent.py +++ b/agent/execution/ExecutionAgent.py @@ -1,16 +1,35 @@ -import pandas as pd import datetime +import pandas as pd + from agent.TradingAgent import TradingAgent from util.util import log_print class ExecutionAgent(TradingAgent): - def __init__(self, id, name, type, symbol, starting_cash, - direction, quantity, execution_time_horizon, - trade=True, log_orders=False, random_state=None): - super().__init__(id, name, type, starting_cash=starting_cash, log_orders=log_orders, random_state=random_state) + def __init__( + self, + id, + name, + type, + symbol, + starting_cash, + direction, + quantity, + execution_time_horizon, + trade=True, + log_orders=False, + random_state=None, + ): + super().__init__( + id, + name, + type, + starting_cash=starting_cash, + log_orders=log_orders, + random_state=random_state, + ) self.symbol = symbol self.direction = direction self.quantity = quantity @@ -27,23 +46,27 @@ def __init__(self, id, name, type, symbol, starting_cash, self.trade = trade self.log_orders = log_orders - self.state = 'AWAITING_WAKEUP' + self.state = "AWAITING_WAKEUP" def kernelStopping(self): super().kernelStopping() if self.trade: - slippage = self.get_average_transaction_price() - self.arrival_price if self.direction == 'BUY' else \ - self.arrival_price - self.get_average_transaction_price() - self.logEvent('DIRECTION', self.direction, True) - self.logEvent('TOTAL_QTY', self.quantity, True) - self.logEvent('REM_QTY', self.rem_quantity, True) - self.logEvent('ARRIVAL_MID', self.arrival_price, True) - self.logEvent('AVG_TXN_PRICE', self.get_average_transaction_price(), True) - self.logEvent('SLIPPAGE', slippage, True) + slippage = ( + self.get_average_transaction_price() - self.arrival_price + if self.direction == "BUY" + else self.arrival_price - self.get_average_transaction_price() + ) + self.logEvent("DIRECTION", self.direction, True) + self.logEvent("TOTAL_QTY", self.quantity, True) + self.logEvent("REM_QTY", self.rem_quantity, True) + self.logEvent("ARRIVAL_MID", self.arrival_price, True) + self.logEvent("AVG_TXN_PRICE", self.get_average_transaction_price(), True) + self.logEvent("SLIPPAGE", slippage, True) def wakeup(self, currentTime): can_trade = super().wakeup(currentTime) - if not can_trade: return + if not can_trade: + return if self.trade: try: self.setWakeup([time for time in self.execution_time_horizon if time > currentTime][0]) @@ -51,56 +74,81 @@ def wakeup(self, currentTime): pass self.getCurrentSpread(self.symbol, depth=1000) - self.state = 'AWAITING_SPREAD' + self.state = "AWAITING_SPREAD" def receiveMessage(self, currentTime, msg): super().receiveMessage(currentTime, msg) - if msg.body['msg'] == 'ORDER_EXECUTED': self.handleOrderExecution(currentTime, msg) - elif msg.body['msg'] == 'ORDER_ACCEPTED': self.handleOrderAcceptance(currentTime, msg) - if self.rem_quantity > 0 and self.state == 'AWAITING_SPREAD' and msg.body['msg'] == 'QUERY_SPREAD': + if msg.body["msg"] == "ORDER_EXECUTED": + self.handleOrderExecution(currentTime, msg) + elif msg.body["msg"] == "ORDER_ACCEPTED": + self.handleOrderAcceptance(currentTime, msg) + if self.rem_quantity > 0 and self.state == "AWAITING_SPREAD" and msg.body["msg"] == "QUERY_SPREAD": self.cancelOrders() self.placeOrders(currentTime) def handleOrderExecution(self, currentTime, msg): - executed_order = msg.body['order'] + executed_order = msg.body["order"] self.executed_orders.append(executed_order) executed_qty = sum(executed_order.quantity for executed_order in self.executed_orders) self.rem_quantity = self.quantity - executed_qty - log_print('[---- {} - {} ----]: LIMIT ORDER EXECUTED - {} @ {}'.format(self.name, currentTime, - executed_order.quantity, - executed_order.fill_price)) - log_print('[---- {} - {} ----]: EXECUTED QUANTITY: {}'.format(self.name, currentTime, executed_qty)) - log_print('[---- {} - {} ----]: REMAINING QUANTITY: {}'.format(self.name, currentTime, self.rem_quantity)) - log_print('[---- {} - {} ----]: % EXECUTED: {} \n'.format(self.name, currentTime, - round((1 - self.rem_quantity / self.quantity) * 100, 2))) + log_print( + "[---- {} - {} ----]: LIMIT ORDER EXECUTED - {} @ {}".format( + self.name, + currentTime, + executed_order.quantity, + executed_order.fill_price, + ) + ) + log_print("[---- {} - {} ----]: EXECUTED QUANTITY: {}".format(self.name, currentTime, executed_qty)) + log_print("[---- {} - {} ----]: REMAINING QUANTITY: {}".format(self.name, currentTime, self.rem_quantity)) + log_print( + "[---- {} - {} ----]: % EXECUTED: {} \n".format( + self.name, + currentTime, + round((1 - self.rem_quantity / self.quantity) * 100, 2), + ) + ) def handleOrderAcceptance(self, currentTime, msg): - accepted_order = msg.body['order'] + accepted_order = msg.body["order"] self.accepted_orders.append(accepted_order) accepted_qty = sum(accepted_order.quantity for accepted_order in self.accepted_orders) - log_print('[---- {} - {} ----]: ACCEPTED QUANTITY : {}'.format(self.name, currentTime, accepted_qty)) + log_print("[---- {} - {} ----]: ACCEPTED QUANTITY : {}".format(self.name, currentTime, accepted_qty)) def placeOrders(self, currentTime): - if currentTime.floor('1s') == self.execution_time_horizon[-2]: - self.placeMarketOrder(symbol=self.symbol, quantity=self.rem_quantity, is_buy_order=self.direction == 'BUY') - elif currentTime.floor('1s') in self.execution_time_horizon[:-2]: + if currentTime.floor("1s") == self.execution_time_horizon[-2]: + self.placeMarketOrder( + symbol=self.symbol, + quantity=self.rem_quantity, + is_buy_order=self.direction == "BUY", + ) + elif currentTime.floor("1s") in self.execution_time_horizon[:-2]: bid, _, ask, _ = self.getKnownBidAsk(self.symbol) - if currentTime.floor('1s') == self.start_time: + if currentTime.floor("1s") == self.start_time: self.arrival_price = (bid + ask) / 2 - log_print("[---- {} - {} ----]: Arrival Mid Price {}".format(self.name, currentTime, - self.arrival_price)) - - qty = self.schedule[pd.Interval(currentTime.floor('1s'), - currentTime.floor('1s')+datetime.timedelta(minutes=1))] - price = ask if self.direction == 'BUY' else bid - self.placeLimitOrder(symbol=self.symbol, quantity=qty, - is_buy_order=self.direction == 'BUY', limit_price=price) - log_print('[---- {} - {} ----]: LIMIT ORDER PLACED - {} @ {}'.format(self.name, currentTime, qty, price)) + log_print( + "[---- {} - {} ----]: Arrival Mid Price {}".format(self.name, currentTime, self.arrival_price) + ) + + qty = self.schedule[ + pd.Interval( + currentTime.floor("1s"), + currentTime.floor("1s") + datetime.timedelta(minutes=1), + ) + ] + price = ask if self.direction == "BUY" else bid + self.placeLimitOrder( + symbol=self.symbol, + quantity=qty, + is_buy_order=self.direction == "BUY", + limit_price=price, + ) + log_print("[---- {} - {} ----]: LIMIT ORDER PLACED - {} @ {}".format(self.name, currentTime, qty, price)) def cancelOrders(self): for _, order in self.orders.items(): self.cancelOrder(order) def getWakeFrequency(self): - return self.execution_time_horizon[0] - self.mkt_open \ No newline at end of file + return self.execution_time_horizon[0] - self.mkt_open diff --git a/agent/execution/POVExecutionAgent.py b/agent/execution/POVExecutionAgent.py index a0ab1e5ab..17c5c1108 100644 --- a/agent/execution/POVExecutionAgent.py +++ b/agent/execution/POVExecutionAgent.py @@ -1,20 +1,45 @@ import sys import warnings + import pandas as pd from agent.TradingAgent import TradingAgent from util.util import log_print -POVExecutionWarning_msg = "Running a configuration using POVExecutionAgent requires an ExchangeAgent with " \ - "attribute `stream_history` set to a large value, recommended at sys.maxsize." +POVExecutionWarning_msg = ( + "Running a configuration using POVExecutionAgent requires an ExchangeAgent with " + "attribute `stream_history` set to a large value, recommended at sys.maxsize." +) class POVExecutionAgent(TradingAgent): - def __init__(self, id, name, type, symbol, starting_cash, - direction, quantity, pov, start_time, freq, lookback_period, end_time=None, - trade=True, log_orders=False, random_state=None): - super().__init__(id, name, type, starting_cash=starting_cash, log_orders=log_orders, random_state=random_state) + def __init__( + self, + id, + name, + type, + symbol, + starting_cash, + direction, + quantity, + pov, + start_time, + freq, + lookback_period, + end_time=None, + trade=True, + log_orders=False, + random_state=None, + ): + super().__init__( + id, + name, + type, + starting_cash=starting_cash, + log_orders=log_orders, + random_state=random_state, + ) self.log_events = True # save events for plotting self.symbol = symbol self.direction = direction @@ -27,67 +52,79 @@ def __init__(self, id, name, type, symbol, starting_cash, self.look_back_period = lookback_period self.trade = trade self.accepted_orders = [] - self.state = 'AWAITING_WAKEUP' + self.state = "AWAITING_WAKEUP" warnings.warn(POVExecutionWarning_msg, UserWarning, stacklevel=1) self.processEndTime() def processEndTime(self): - """ Make end time of POV order sensible, i.e. if a time is given leave it alone; else, add 24 hours to start.""" + """Make end time of POV order sensible, i.e. if a time is given leave it alone; else, add 24 hours to start.""" if self.end_time is None: - self.end_time = self.start_time + pd.to_timedelta('24 hours') + self.end_time = self.start_time + pd.to_timedelta("24 hours") def wakeup(self, currentTime): can_trade = super().wakeup(currentTime) self.setWakeup(currentTime + self.getWakeFrequency()) - if not can_trade: return + if not can_trade: + return if self.trade and self.rem_quantity > 0 and self.start_time < currentTime < self.end_time: self.cancelOrders() self.getCurrentSpread(self.symbol, depth=sys.maxsize) self.get_transacted_volume(self.symbol, lookback_period=self.look_back_period) - self.state = 'AWAITING_TRANSACTED_VOLUME' + self.state = "AWAITING_TRANSACTED_VOLUME" def getWakeFrequency(self): return pd.Timedelta(self.freq) def receiveMessage(self, currentTime, msg): super().receiveMessage(currentTime, msg) - if msg.body['msg'] == 'ORDER_EXECUTED': self.handleOrderExecution(currentTime, msg) - elif msg.body['msg'] == 'ORDER_ACCEPTED': self.handleOrderAcceptance(currentTime, msg) + if msg.body["msg"] == "ORDER_EXECUTED": + self.handleOrderExecution(currentTime, msg) + elif msg.body["msg"] == "ORDER_ACCEPTED": + self.handleOrderAcceptance(currentTime, msg) if currentTime > self.end_time: log_print( - f'[---- {self.name} - {currentTime} ----]: current time {currentTime} is after specified end time of POV order ' - f'{self.end_time}. TRADING CONCLUDED. ') + f"[---- {self.name} - {currentTime} ----]: current time {currentTime} is after specified end time of POV order " + f"{self.end_time}. TRADING CONCLUDED. " + ) return - if self.rem_quantity > 0 and \ - self.state == 'AWAITING_TRANSACTED_VOLUME' \ - and msg.body['msg'] == 'QUERY_TRANSACTED_VOLUME' \ - and self.transacted_volume[self.symbol] is not None\ - and currentTime > self.start_time: + if ( + self.rem_quantity > 0 + and self.state == "AWAITING_TRANSACTED_VOLUME" + and msg.body["msg"] == "QUERY_TRANSACTED_VOLUME" + and self.transacted_volume[self.symbol] is not None + and currentTime > self.start_time + ): qty = round(self.pov * self.transacted_volume[self.symbol]) self.cancelOrders() - self.placeMarketOrder(self.symbol, qty, self.direction == 'BUY') - log_print(f'[---- {self.name} - {currentTime} ----]: TOTAL TRANSACTED VOLUME IN THE LAST {self.look_back_period} = {self.transacted_volume[self.symbol]}') - log_print(f'[---- {self.name} - {currentTime} ----]: MARKET ORDER PLACED - {qty}') + self.placeMarketOrder(self.symbol, qty, self.direction == "BUY") + log_print( + f"[---- {self.name} - {currentTime} ----]: TOTAL TRANSACTED VOLUME IN THE LAST {self.look_back_period} = {self.transacted_volume[self.symbol]}" + ) + log_print(f"[---- {self.name} - {currentTime} ----]: MARKET ORDER PLACED - {qty}") def handleOrderAcceptance(self, currentTime, msg): - accepted_order = msg.body['order'] + accepted_order = msg.body["order"] self.accepted_orders.append(accepted_order) accepted_qty = sum(accepted_order.quantity for accepted_order in self.accepted_orders) - log_print(f'[---- {self.name} - {currentTime} ----]: ACCEPTED QUANTITY : {accepted_qty}') + log_print(f"[---- {self.name} - {currentTime} ----]: ACCEPTED QUANTITY : {accepted_qty}") def handleOrderExecution(self, currentTime, msg): - executed_order = msg.body['order'] + executed_order = msg.body["order"] self.executed_orders.append(executed_order) executed_qty = sum(executed_order.quantity for executed_order in self.executed_orders) self.rem_quantity = self.quantity - executed_qty - log_print(f'[---- {self.name} - {currentTime} ----]: LIMIT ORDER EXECUTED - {executed_order.quantity} @ {executed_order.fill_price}') - log_print(f'[---- {self.name} - {currentTime} ----]: EXECUTED QUANTITY: {executed_qty}') - log_print(f'[---- {self.name} - {currentTime} ----]: REMAINING QUANTITY (NOT EXECUTED): {self.rem_quantity}') - log_print(f'[---- {self.name} - {currentTime} ----]: % EXECUTED: {round((1 - self.rem_quantity / self.quantity) * 100, 2)} \n') + log_print( + f"[---- {self.name} - {currentTime} ----]: LIMIT ORDER EXECUTED - {executed_order.quantity} @ {executed_order.fill_price}" + ) + log_print(f"[---- {self.name} - {currentTime} ----]: EXECUTED QUANTITY: {executed_qty}") + log_print(f"[---- {self.name} - {currentTime} ----]: REMAINING QUANTITY (NOT EXECUTED): {self.rem_quantity}") + log_print( + f"[---- {self.name} - {currentTime} ----]: % EXECUTED: {round((1 - self.rem_quantity / self.quantity) * 100, 2)} \n" + ) def cancelOrders(self): for _, order in self.orders.items(): - self.cancelOrder(order) \ No newline at end of file + self.cancelOrder(order) diff --git a/agent/execution/TWAPExecutionAgent.py b/agent/execution/TWAPExecutionAgent.py index ccade3f2e..eee67d97d 100644 --- a/agent/execution/TWAPExecutionAgent.py +++ b/agent/execution/TWAPExecutionAgent.py @@ -1,4 +1,5 @@ import pandas as pd + from agent.execution.ExecutionAgent import ExecutionAgent from util.util import log_print @@ -14,12 +15,34 @@ class TWAPExecutionAgent(ExecutionAgent): - > 18,000 shares over 3 minutes = 100 shares per second """ - def __init__(self, id, name, type, symbol, starting_cash, - direction, quantity, execution_time_horizon, freq, - trade=True, log_orders=False, random_state=None): - super().__init__(id, name, type, symbol, starting_cash, - direction=direction, quantity=quantity, execution_time_horizon=execution_time_horizon, - trade=trade, log_orders=log_orders, random_state=random_state) + def __init__( + self, + id, + name, + type, + symbol, + starting_cash, + direction, + quantity, + execution_time_horizon, + freq, + trade=True, + log_orders=False, + random_state=None, + ): + super().__init__( + id, + name, + type, + symbol, + starting_cash, + direction=direction, + quantity=quantity, + execution_time_horizon=execution_time_horizon, + trade=trade, + log_orders=log_orders, + random_state=random_state, + ) self.freq = freq self.schedule = self.generate_schedule() @@ -31,8 +54,8 @@ def generate_schedule(self): child_quantity = int(self.quantity / len(self.execution_time_horizon)) for b in bins: schedule[b] = child_quantity - log_print('[---- {} {} - Schedule ----]:'.format(self.name, self.currentTime)) - log_print('[---- {} {} - Total Number of Orders ----]: {}'.format(self.name, self.currentTime, len(schedule))) + log_print("[---- {} {} - Schedule ----]:".format(self.name, self.currentTime)) + log_print("[---- {} {} - Total Number of Orders ----]: {}".format(self.name, self.currentTime, len(schedule))) for t, q in schedule.items(): log_print("From: {}, To: {}, Quantity: {}".format(t.left.time(), t.right.time(), q)) - return schedule \ No newline at end of file + return schedule diff --git a/agent/execution/VWAPExecutionAgent.py b/agent/execution/VWAPExecutionAgent.py index 8058d2306..a4349d118 100644 --- a/agent/execution/VWAPExecutionAgent.py +++ b/agent/execution/VWAPExecutionAgent.py @@ -12,12 +12,35 @@ class VWAPExecutionAgent(ExecutionAgent): VWAP average price """ - def __init__(self, id, name, type, symbol, starting_cash, - direction, quantity, execution_time_horizon, freq, volume_profile_path, - trade=True, log_orders=False, random_state=None): - super().__init__(id, name, type, symbol, starting_cash, - direction=direction, quantity=quantity, execution_time_horizon=execution_time_horizon, - trade=trade, log_orders=log_orders, random_state=random_state) + def __init__( + self, + id, + name, + type, + symbol, + starting_cash, + direction, + quantity, + execution_time_horizon, + freq, + volume_profile_path, + trade=True, + log_orders=False, + random_state=None, + ): + super().__init__( + id, + name, + type, + symbol, + starting_cash, + direction=direction, + quantity=quantity, + execution_time_horizon=execution_time_horizon, + trade=trade, + log_orders=log_orders, + random_state=random_state, + ) self.freq = freq self.volume_profile_path = volume_profile_path self.schedule = self.generate_schedule() @@ -33,23 +56,23 @@ def generate_schedule(self): bins = pd.interval_range(start=self.start_time, end=self.end_time, freq=self.freq) for b in bins: schedule[b] = round(volume_profile[b.left] * self.quantity) - log_print('[---- {} {} - Schedule ----]:'.format(self.name, self.currentTime)) - log_print('[---- {} {} - Total Number of Orders ----]: {}'.format(self.name, self.currentTime, len(schedule))) + log_print("[---- {} {} - Schedule ----]:".format(self.name, self.currentTime)) + log_print("[---- {} {} - Total Number of Orders ----]: {}".format(self.name, self.currentTime, len(schedule))) for t, q in schedule.items(): log_print("From: {}, To: {}, Quantity: {}".format(t.left.time(), t.right.time(), q)) return schedule @staticmethod def synthetic_volume_profile(date, freq): - mkt_open = pd.to_datetime(date.date()) + pd.to_timedelta('09:30:00') - mkt_close = pd.to_datetime(date.date()) + pd.to_timedelta('16:00:00') + mkt_open = pd.to_datetime(date.date()) + pd.to_timedelta("09:30:00") + mkt_close = pd.to_datetime(date.date()) + pd.to_timedelta("16:00:00") day_range = pd.date_range(mkt_open, mkt_close, freq=freq) vol_profile = {} for t, x in zip(day_range, range(int(-len(day_range) / 2), int(len(day_range) / 2), 1)): - vol_profile[t] = x ** 2 + 2 * x + 2 + vol_profile[t] = x**2 + 2 * x + 2 factor = 1.0 / sum(vol_profile.values()) vol_profile = {k: v * factor for k, v in vol_profile.items()} - return vol_profile \ No newline at end of file + return vol_profile diff --git a/agent/market_makers/AdaptiveMarketMakerAgent.py b/agent/market_makers/AdaptiveMarketMakerAgent.py index 40d9de049..0596704e4 100644 --- a/agent/market_makers/AdaptiveMarketMakerAgent.py +++ b/agent/market_makers/AdaptiveMarketMakerAgent.py @@ -1,51 +1,83 @@ -from agent.TradingAgent import TradingAgent +from collections import deque, namedtuple +from math import ceil, floor + import pandas as pd -from util.util import log_print -from collections import namedtuple, deque -from util.util import ignored, sigmoid -from math import floor, ceil +from agent.TradingAgent import TradingAgent +from util.util import ignored, log_print, sigmoid -ANCHOR_TOP_STR = 'top' -ANCHOR_BOTTOM_STR = 'bottom' -ANCHOR_MIDDLE_STR = 'middle' +ANCHOR_TOP_STR = "top" +ANCHOR_BOTTOM_STR = "bottom" +ANCHOR_MIDDLE_STR = "middle" -ADAPTIVE_SPREAD_STR = 'adaptive' +ADAPTIVE_SPREAD_STR = "adaptive" INITIAL_SPREAD_VALUE = 50 class AdaptiveMarketMakerAgent(TradingAgent): - """ This class implements a modification of the Chakraborty-Kearns `ladder` market-making strategy, wherein the - the size of order placed at each level is set as a fraction of measured transacted volume in the previous time - period. + """This class implements a modification of the Chakraborty-Kearns `ladder` market-making strategy, wherein the + the size of order placed at each level is set as a fraction of measured transacted volume in the previous time + period. - Can skew orders to size of current inventory using beta parameter, whence beta == 0 represents inventory being - ignored and beta == infinity represents all liquidity placed on one side of book. + Can skew orders to size of current inventory using beta parameter, whence beta == 0 represents inventory being + ignored and beta == infinity represents all liquidity placed on one side of book. - ADAPTIVE SPREAD: the market maker's spread can be set either as a fixed or value or can be adaptive, + ADAPTIVE SPREAD: the market maker's spread can be set either as a fixed or value or can be adaptive, """ - def __init__(self, id, name, type, symbol, starting_cash, pov=0.05, min_order_size=20, window_size=5, anchor=ANCHOR_MIDDLE_STR, - num_ticks=20, level_spacing=0.5, wake_up_freq='1s', subscribe=False, subscribe_freq=10e9, subscribe_num_levels=1, cancel_limit_delay=50, - skew_beta=0, spread_alpha=0.85, backstop_quantity=None, log_orders=False, random_state=None): - - super().__init__(id, name, type, starting_cash=starting_cash, log_orders=log_orders, random_state=random_state) + def __init__( + self, + id, + name, + type, + symbol, + starting_cash, + pov=0.05, + min_order_size=20, + window_size=5, + anchor=ANCHOR_MIDDLE_STR, + num_ticks=20, + level_spacing=0.5, + wake_up_freq="1s", + subscribe=False, + subscribe_freq=10e9, + subscribe_num_levels=1, + cancel_limit_delay=50, + skew_beta=0, + spread_alpha=0.85, + backstop_quantity=None, + log_orders=False, + random_state=None, + ): + + super().__init__( + id, + name, + type, + starting_cash=starting_cash, + log_orders=log_orders, + random_state=random_state, + ) self.is_adaptive = False - self.symbol = symbol # Symbol traded + self.symbol = symbol # Symbol traded self.pov = pov # fraction of transacted volume placed at each price level self.min_order_size = min_order_size # minimum size order to place at each level, if pov <= min self.anchor = self.validateAnchor(anchor) # anchor either top of window or bottom of window to mid-price - self.window_size = self.validateWindowSize(window_size) # Size in ticks (cents) of how wide the window around mid price is. If equal to - # string 'adaptive' then ladder starts at best bid and ask + self.window_size = self.validateWindowSize( + window_size + ) # Size in ticks (cents) of how wide the window around mid price is. If equal to + # string 'adaptive' then ladder starts at best bid and ask self.num_ticks = num_ticks # number of ticks on each side of window in which to place liquidity self.level_spacing = level_spacing # level spacing as a fraction of the spread self.wake_up_freq = wake_up_freq # Frequency of agent wake up self.subscribe = subscribe # Flag to determine whether to subscribe to data or use polling mechanism self.subscribe_freq = subscribe_freq # Frequency in nanoseconds^-1 at which to receive market updates - # in subscribe mode + # in subscribe mode self.subscribe_num_levels = subscribe_num_levels # Number of orderbook levels in subscription mode - self.cancel_limit_delay = cancel_limit_delay # delay in nanoseconds between order cancellations and new limit order placements + self.cancel_limit_delay = ( + cancel_limit_delay # delay in nanoseconds between order cancellations and new limit order placements + ) self.skew_beta = skew_beta # parameter for determining order placement imbalance self.spread_alpha = spread_alpha # parameter for exponentially weighted moving average of spread. 1 corresponds to ignoring old values, 0 corresponds to no updates @@ -64,34 +96,29 @@ def __init__(self, id, name, type, symbol, starting_cash, pov=0.05, min_order_si self.tick_size = None if self.is_adaptive else ceil(self.last_spread * self.level_spacing) self.LIQUIDITY_DROPOUT_WARNING = f"Liquidity dropout for agent {self.name}." - def initialiseState(self): - """ Returns variables that keep track of whether spread and transacted volume have been observed. """ + """Returns variables that keep track of whether spread and transacted volume have been observed.""" if not self.subscribe: - return { - "AWAITING_SPREAD": True, - "AWAITING_TRANSACTED_VOLUME": True - } + return {"AWAITING_SPREAD": True, "AWAITING_TRANSACTED_VOLUME": True} else: - return { - "AWAITING_MARKET_DATA": True, - "AWAITING_TRANSACTED_VOLUME": True - } + return {"AWAITING_MARKET_DATA": True, "AWAITING_TRANSACTED_VOLUME": True} def validateAnchor(self, anchor): - """ Checks that input parameter anchor takes allowed value, raises ValueError if not. + """Checks that input parameter anchor takes allowed value, raises ValueError if not. :param anchor: str :return: """ if anchor not in [ANCHOR_TOP_STR, ANCHOR_BOTTOM_STR, ANCHOR_MIDDLE_STR]: - raise ValueError(f"Variable anchor must take the value `{ANCHOR_BOTTOM_STR}`, `{ANCHOR_MIDDLE_STR}` or " - f"`{ANCHOR_TOP_STR}`") + raise ValueError( + f"Variable anchor must take the value `{ANCHOR_BOTTOM_STR}`, `{ANCHOR_MIDDLE_STR}` or " + f"`{ANCHOR_TOP_STR}`" + ) else: return anchor def validateWindowSize(self, window_size): - """ Checks that input parameter window_size takes allowed value, raises ValueError if not + """Checks that input parameter window_size takes allowed value, raises ValueError if not :param window_size: :return: @@ -99,7 +126,7 @@ def validateWindowSize(self, window_size): try: # fixed window size specified return int(window_size) except: - if window_size.lower() == 'adaptive': + if window_size.lower() == "adaptive": self.is_adaptive = True self.anchor = ANCHOR_MIDDLE_STR return None @@ -110,11 +137,14 @@ def kernelStarting(self, startTime): super().kernelStarting(startTime) def wakeup(self, currentTime): - """ Agent wakeup is determined by self.wake_up_freq """ + """Agent wakeup is determined by self.wake_up_freq""" can_trade = super().wakeup(currentTime) if self.subscribe and not self.subscription_requested: - super().requestDataSubscription(self.symbol, levels=self.subscribe_num_levels, - freq=pd.Timedelta(self.subscribe_freq, unit='ns')) + super().requestDataSubscription( + self.symbol, + levels=self.subscribe_num_levels, + freq=pd.Timedelta(self.subscribe_freq, unit="ns"), + ) self.subscription_requested = True self.get_transacted_volume(self.symbol, lookback_period=self.subscribe_freq) self.state = self.initialiseState() @@ -127,7 +157,7 @@ def wakeup(self, currentTime): self.initialiseState() def receiveMessage(self, currentTime, msg): - """ Processes message from exchange. Main function is to update orders in orderbook relative to mid-price. + """Processes message from exchange. Main function is to update orders in orderbook relative to mid-price. :param simulation current time :param message received by self from ExchangeAgent @@ -145,12 +175,12 @@ def receiveMessage(self, currentTime, msg): if self.last_spread is not None and self.is_adaptive: self._adaptive_update_window_and_tick_size() - if msg.body['msg'] == 'QUERY_TRANSACTED_VOLUME' and self.state['AWAITING_TRANSACTED_VOLUME'] is True: + if msg.body["msg"] == "QUERY_TRANSACTED_VOLUME" and self.state["AWAITING_TRANSACTED_VOLUME"] is True: self.updateOrderSize() - self.state['AWAITING_TRANSACTED_VOLUME'] = False + self.state["AWAITING_TRANSACTED_VOLUME"] = False if not self.subscribe: - if msg.body['msg'] == 'QUERY_SPREAD' and self.state['AWAITING_SPREAD'] is True: + if msg.body["msg"] == "QUERY_SPREAD" and self.state["AWAITING_SPREAD"] is True: bid, _, ask, _ = self.getKnownBidAsk(self.symbol) if bid and ask: mid = int((ask + bid) / 2) @@ -159,18 +189,18 @@ def receiveMessage(self, currentTime, msg): spread = int(ask - bid) self._adaptive_update_spread(spread) - self.state['AWAITING_SPREAD'] = False + self.state["AWAITING_SPREAD"] = False else: log_print("SPREAD MISSING at time {}", currentTime) - self.state['AWAITING_SPREAD'] = False # use last mid price and spread + self.state["AWAITING_SPREAD"] = False # use last mid price and spread - if self.state['AWAITING_SPREAD'] is False and self.state['AWAITING_TRANSACTED_VOLUME'] is False: + if self.state["AWAITING_SPREAD"] is False and self.state["AWAITING_TRANSACTED_VOLUME"] is False: self.placeOrders(mid) self.state = self.initialiseState() self.setWakeup(currentTime + self.getWakeFrequency()) else: # subscription mode - if msg.body['msg'] == 'MARKET_DATA' and self.state['AWAITING_MARKET_DATA'] is True: + if msg.body["msg"] == "MARKET_DATA" and self.state["AWAITING_MARKET_DATA"] is True: bid = self.known_bids[self.symbol][0][0] if self.known_bids[self.symbol] else None ask = self.known_asks[self.symbol][0][0] if self.known_asks[self.symbol] else None if bid and ask: @@ -180,17 +210,17 @@ def receiveMessage(self, currentTime, msg): spread = int(ask - bid) self._adaptive_update_spread(spread) - self.state['AWAITING_MARKET_DATA'] = False + self.state["AWAITING_MARKET_DATA"] = False else: log_print("SPREAD MISSING at time {}", currentTime) - self.state['AWAITING_MARKET_DATA'] = False + self.state["AWAITING_MARKET_DATA"] = False - if self.state['MARKET_DATA'] is False and self.state['AWAITING_TRANSACTED_VOLUME'] is False: + if self.state["MARKET_DATA"] is False and self.state["AWAITING_TRANSACTED_VOLUME"] is False: self.placeOrders(mid) self.state = self.initialiseState() def _adaptive_update_spread(self, spread): - """ Update internal spread estimate with exponentially weighted moving average + """Update internal spread estimate with exponentially weighted moving average :param spread: :return: """ @@ -199,7 +229,7 @@ def _adaptive_update_spread(self, spread): self.last_spread = spread_ewma def _adaptive_update_window_and_tick_size(self): - """ Update window size and tick size relative to internal spread estimate. + """Update window size and tick size relative to internal spread estimate. :return: """ @@ -209,7 +239,7 @@ def _adaptive_update_window_and_tick_size(self): self.tick_size = 1 def updateOrderSize(self): - """ Updates size of order to be placed. """ + """Updates size of order to be placed.""" qty = round(self.pov * self.transacted_volume[self.symbol]) if self.skew_beta == 0: # ignore inventory self.buy_order_size = qty if qty >= self.min_order_size else self.min_order_size @@ -224,7 +254,7 @@ def updateOrderSize(self): self.sell_order_size = sell_size if sell_size >= self.min_order_size else self.min_order_size def computeOrdersToPlace(self, mid): - """ Given a mid price, computes the orders that need to be removed from orderbook, and adds these orders to + """Given a mid price, computes the orders that need to be removed from orderbook, and adds these orders to bid and ask deques. :param mid: mid-price @@ -252,10 +282,10 @@ def computeOrdersToPlace(self, mid): return bids_to_place, asks_to_place def placeOrders(self, mid): - """ Given a mid-price, compute new orders that need to be placed, then send the orders to the Exchange. + """Given a mid-price, compute new orders that need to be placed, then send the orders to the Exchange. - :param mid: mid-price - :type mid: int + :param mid: mid-price + :type mid: int """ @@ -263,28 +293,48 @@ def placeOrders(self, mid): if self.backstop_quantity is not None: bid_price = bid_orders[0] - log_print('{}: Placing BUY limit order of size {} @ price {}', self.name, self.backstop_quantity, bid_price) + log_print( + "{}: Placing BUY limit order of size {} @ price {}", + self.name, + self.backstop_quantity, + bid_price, + ) self.placeLimitOrder(self.symbol, self.backstop_quantity, True, bid_price) bid_orders = bid_orders[1:] ask_price = ask_orders[-1] - log_print('{}: Placing SELL limit order of size {} @ price {}', self.name, self.backstop_quantity, ask_price) + log_print( + "{}: Placing SELL limit order of size {} @ price {}", + self.name, + self.backstop_quantity, + ask_price, + ) self.placeLimitOrder(self.symbol, self.backstop_quantity, False, ask_price) ask_orders = ask_orders[:-1] for bid_price in bid_orders: - log_print('{}: Placing BUY limit order of size {} @ price {}', self.name, self.buy_order_size, bid_price) + log_print( + "{}: Placing BUY limit order of size {} @ price {}", + self.name, + self.buy_order_size, + bid_price, + ) self.placeLimitOrder(self.symbol, self.buy_order_size, True, bid_price) for ask_price in ask_orders: - log_print('{}: Placing SELL limit order of size {} @ price {}', self.name, self.sell_order_size, ask_price) + log_print( + "{}: Placing SELL limit order of size {} @ price {}", + self.name, + self.sell_order_size, + ask_price, + ) self.placeLimitOrder(self.symbol, self.sell_order_size, False, ask_price) def getWakeFrequency(self): - """ Get time increment corresponding to wakeup period. """ + """Get time increment corresponding to wakeup period.""" return pd.Timedelta(self.wake_up_freq) def cancelAllOrders(self): - """ Cancels all resting limit orders placed by the market maker """ + """Cancels all resting limit orders placed by the market maker""" for _, order in self.orders.items(): self.cancelOrder(order) diff --git a/agent/market_makers/MarketMakerAgent.py b/agent/market_makers/MarketMakerAgent.py index 93175b19e..e41e4c69b 100644 --- a/agent/market_makers/MarketMakerAgent.py +++ b/agent/market_makers/MarketMakerAgent.py @@ -1,13 +1,15 @@ -from agent.TradingAgent import TradingAgent import pandas as pd -from util.util import log_print +from agent.TradingAgent import TradingAgent +from util.util import log_print -DEFAULT_LEVELS_QUOTE_DICT = {1: [1, 0, 0, 0, 0], - 2: [.5, .5, 0, 0, 0], - 3: [.34, .33, .33, 0, 0], - 4: [.25, .25, .25, .25, 0], - 5: [.20, .20, .20, .20, .20]} +DEFAULT_LEVELS_QUOTE_DICT = { + 1: [1, 0, 0, 0, 0], + 2: [0.5, 0.5, 0, 0, 0], + 3: [0.34, 0.33, 0.33, 0, 0], + 4: [0.25, 0.25, 0.25, 0.25, 0], + 5: [0.20, 0.20, 0.20, 0.20, 0.20], +} class MarketMakerAgent(TradingAgent): @@ -25,18 +27,39 @@ class MarketMakerAgent(TradingAgent): """ - def __init__(self, id, name, type, symbol, starting_cash, min_size, max_size , wake_up_freq='1s', - subscribe=False, subscribe_freq=10e9, subscribe_num_levels=5, log_orders=False, random_state=None): - - super().__init__(id, name, type, starting_cash=starting_cash, log_orders=log_orders, random_state=random_state) - self.symbol = symbol # Symbol traded + def __init__( + self, + id, + name, + type, + symbol, + starting_cash, + min_size, + max_size, + wake_up_freq="1s", + subscribe=False, + subscribe_freq=10e9, + subscribe_num_levels=5, + log_orders=False, + random_state=None, + ): + + super().__init__( + id, + name, + type, + starting_cash=starting_cash, + log_orders=log_orders, + random_state=random_state, + ) + self.symbol = symbol # Symbol traded self.min_size = min_size # Minimum order size self.max_size = max_size # Maximum order size - self.size = round(self.random_state.randint(self.min_size, self.max_size) / 2) # order size per LOB side + self.size = round(self.random_state.randint(self.min_size, self.max_size) / 2) # order size per LOB side self.wake_up_freq = wake_up_freq # Frequency of agent wake up self.subscribe = subscribe # Flag to determine whether to subscribe to data or use polling mechanism self.subscribe_freq = subscribe_freq # Frequency in nanoseconds^-1 at which to receive market updates - # in subscribe mode + # in subscribe mode self.subscribe_num_levels = subscribe_num_levels # Number of orderbook levels in subscription mode self.subscription_requested = False self.log_orders = log_orders @@ -51,52 +74,56 @@ def kernelStarting(self, startTime): super().kernelStarting(startTime) def wakeup(self, currentTime): - """ Agent wakeup is determined by self.wake_up_freq """ + """Agent wakeup is determined by self.wake_up_freq""" can_trade = super().wakeup(currentTime) if self.subscribe and not self.subscription_requested: super().requestDataSubscription(self.symbol, levels=self.subscribe_num_levels, freq=self.subscribe_freq) self.subscription_requested = True - self.state = 'AWAITING_MARKET_DATA' + self.state = "AWAITING_MARKET_DATA" elif can_trade and not self.subscribe: self.cancelOrders() self.getCurrentSpread(self.symbol, depth=self.subscribe_num_levels) - self.state = 'AWAITING_SPREAD' + self.state = "AWAITING_SPREAD" def receiveMessage(self, currentTime, msg): - """ Market Maker actions are determined after obtaining the bids and asks in the LOB """ + """Market Maker actions are determined after obtaining the bids and asks in the LOB""" super().receiveMessage(currentTime, msg) - if not self.subscribe and self.state == 'AWAITING_SPREAD' and msg.body['msg'] == 'QUERY_SPREAD': + if not self.subscribe and self.state == "AWAITING_SPREAD" and msg.body["msg"] == "QUERY_SPREAD": self.cancelOrders() mid = self.last_trade[self.symbol] - self.num_levels = 2 * self.subscribe_num_levels # Number of price levels to place the trades in + self.num_levels = 2 * self.subscribe_num_levels # Number of price levels to place the trades in bid, bid_vol, ask, ask_vol = self.getKnownBidAsk(self.symbol) if bid and ask: mid = int((ask + bid) / 2) - spread = int(abs(ask - bid)/2) + spread = int(abs(ask - bid) / 2) else: log_print(f"SPREAD MISSING at time {currentTime}") spread = self.last_spread for i in range(self.num_levels): self.size = round(self.random_state.randint(self.min_size, self.max_size) / 2) - #bids + # bids self.placeLimitOrder(self.symbol, self.size, True, mid - spread - i) - #asks + # asks self.placeLimitOrder(self.symbol, self.size, False, mid + spread + i) self.setWakeup(currentTime + self.getWakeFrequency()) - self.state = 'AWAITING_WAKEUP' + self.state = "AWAITING_WAKEUP" - elif self.subscribe and self.state == 'AWAITING_MARKET_DATA' and msg.body['msg'] == 'MARKET_DATA': + elif self.subscribe and self.state == "AWAITING_MARKET_DATA" and msg.body["msg"] == "MARKET_DATA": self.cancelOrders() num_levels_place = len(self.levels_quote_dict.keys()) - self.num_levels = self.random_state.randint(1, num_levels_place) # Number of price levels to place the trades in - self.size_split = self.levels_quote_dict.get(self.num_levels) # % of the order size to be placed at different levels + self.num_levels = self.random_state.randint( + 1, num_levels_place + ) # Number of price levels to place the trades in + self.size_split = self.levels_quote_dict.get( + self.num_levels + ) # % of the order size to be placed at different levels self.placeOrders(self.known_bids[self.symbol], self.known_asks[self.symbol]) - self.state = 'AWAITING_MARKET_DATA' + self.state = "AWAITING_MARKET_DATA" def placeOrders(self, bids, asks): if bids and asks: @@ -120,11 +147,10 @@ def placeOrders(self, bids, asks): for price, vol in sell_quotes.items(): self.placeLimitOrder(self.symbol, vol, False, price) - def cancelOrders(self): - """ cancels all resting limit orders placed by the market maker """ + """cancels all resting limit orders placed by the market maker""" for _, order in self.orders.items(): self.cancelOrder(order) def getWakeFrequency(self): - return pd.Timedelta(self.wake_up_freq) \ No newline at end of file + return pd.Timedelta(self.wake_up_freq) diff --git a/agent/market_makers/POVMarketMakerAgent.py b/agent/market_makers/POVMarketMakerAgent.py index 6218e3ad8..91675af70 100644 --- a/agent/market_makers/POVMarketMakerAgent.py +++ b/agent/market_makers/POVMarketMakerAgent.py @@ -1,26 +1,49 @@ -from agent.TradingAgent import TradingAgent +from collections import deque, namedtuple + import pandas as pd -from util.util import log_print -from collections import namedtuple, deque -from util.util import ignored +from agent.TradingAgent import TradingAgent +from util.util import ignored, log_print -ANCHOR_TOP_STR = 'top' -ANCHOR_BOTTOM_STR = 'bottom' +ANCHOR_TOP_STR = "top" +ANCHOR_BOTTOM_STR = "bottom" class POVMarketMakerAgent(TradingAgent): - """ This class implements a modification of the Chakraborty-Kearns `ladder` market-making strategy, wherein the - the size of order placed at each level is set as a fraction of measured transacted volume in the previous time - period. + """This class implements a modification of the Chakraborty-Kearns `ladder` market-making strategy, wherein the + the size of order placed at each level is set as a fraction of measured transacted volume in the previous time + period. """ - def __init__(self, id, name, type, symbol, starting_cash, pov=0.05, min_order_size=20, window_size=5, anchor=ANCHOR_BOTTOM_STR, - num_ticks=20, wake_up_freq='1s', subscribe=False, subscribe_freq=10e9, subscribe_num_levels=1, - log_orders=False, random_state=None): - - super().__init__(id, name, type, starting_cash=starting_cash, log_orders=log_orders, random_state=random_state) - self.symbol = symbol # Symbol traded + def __init__( + self, + id, + name, + type, + symbol, + starting_cash, + pov=0.05, + min_order_size=20, + window_size=5, + anchor=ANCHOR_BOTTOM_STR, + num_ticks=20, + wake_up_freq="1s", + subscribe=False, + subscribe_freq=10e9, + subscribe_num_levels=1, + log_orders=False, + random_state=None, + ): + + super().__init__( + id, + name, + type, + starting_cash=starting_cash, + log_orders=log_orders, + random_state=random_state, + ) + self.symbol = symbol # Symbol traded self.pov = pov # fraction of transacted volume placed at each price level self.min_order_size = min_order_size # minimum size order to place at each level, if pov <= min self.window_size = window_size # Size in ticks (cents) of how wide the window around mid price is @@ -30,7 +53,7 @@ def __init__(self, id, name, type, symbol, starting_cash, pov=0.05, min_order_si self.wake_up_freq = wake_up_freq # Frequency of agent wake up self.subscribe = subscribe # Flag to determine whether to subscribe to data or use polling mechanism self.subscribe_freq = subscribe_freq # Frequency in nanoseconds^-1 at which to receive market updates - # in subscribe mode + # in subscribe mode self.subscribe_num_levels = subscribe_num_levels # Number of orderbook levels in subscription mode self.log_orders = log_orders @@ -44,20 +67,14 @@ def __init__(self, id, name, type, symbol, starting_cash, pov=0.05, min_order_si self.LIQUIDITY_DROPOUT_WARNING = f"Liquidity dropout for agent {self.name}." def initialiseState(self): - """ Returns variables that keep track of whether spread and transacted volume have been observed. """ + """Returns variables that keep track of whether spread and transacted volume have been observed.""" if not self.subscribe: - return { - "AWAITING_SPREAD": True, - "AWAITING_TRANSACTED_VOLUME": True - } + return {"AWAITING_SPREAD": True, "AWAITING_TRANSACTED_VOLUME": True} else: - return { - "AWAITING_MARKET_DATA": True, - "AWAITING_TRANSACTED_VOLUME": True - } + return {"AWAITING_MARKET_DATA": True, "AWAITING_TRANSACTED_VOLUME": True} def validateAnchor(self, anchor): - """ Checks that input parameter anchor takes allowed value, raises ValueError if not. + """Checks that input parameter anchor takes allowed value, raises ValueError if not. :param anchor: str :return: @@ -71,11 +88,14 @@ def kernelStarting(self, startTime): super().kernelStarting(startTime) def wakeup(self, currentTime): - """ Agent wakeup is determined by self.wake_up_freq """ + """Agent wakeup is determined by self.wake_up_freq""" can_trade = super().wakeup(currentTime) if self.subscribe and not self.subscription_requested: - super().requestDataSubscription(self.symbol, levels=self.subscribe_num_levels, - freq=pd.Timedelta(self.subscribe_freq, unit='ns')) + super().requestDataSubscription( + self.symbol, + levels=self.subscribe_num_levels, + freq=pd.Timedelta(self.subscribe_freq, unit="ns"), + ) self.subscription_requested = True self.get_transacted_volume(self.symbol, lookback_period=self.subscribe_freq) self.state = self.initialiseState() @@ -86,7 +106,7 @@ def wakeup(self, currentTime): self.initialiseState() def receiveMessage(self, currentTime, msg): - """ Processes message from exchange. Main function is to update orders in orderbook relative to mid-price. + """Processes message from exchange. Main function is to update orders in orderbook relative to mid-price. :param simulation current time :param message received by self from ExchangeAgent @@ -102,49 +122,49 @@ def receiveMessage(self, currentTime, msg): if self.last_mid is not None: mid = self.last_mid - if msg.body['msg'] == 'QUERY_TRANSACTED_VOLUME' and self.state['AWAITING_TRANSACTED_VOLUME'] is True: + if msg.body["msg"] == "QUERY_TRANSACTED_VOLUME" and self.state["AWAITING_TRANSACTED_VOLUME"] is True: self.updateOrderSize() - self.state['AWAITING_TRANSACTED_VOLUME'] = False + self.state["AWAITING_TRANSACTED_VOLUME"] = False if not self.subscribe: - if msg.body['msg'] == 'QUERY_SPREAD' and self.state['AWAITING_SPREAD'] is True: + if msg.body["msg"] == "QUERY_SPREAD" and self.state["AWAITING_SPREAD"] is True: bid, _, ask, _ = self.getKnownBidAsk(self.symbol) if bid and ask: mid = int((ask + bid) / 2) self.last_mid = mid - self.state['AWAITING_SPREAD'] = False + self.state["AWAITING_SPREAD"] = False else: log_print(f"SPREAD MISSING at time {currentTime}") - if self.state['AWAITING_SPREAD'] is False and self.state['AWAITING_TRANSACTED_VOLUME'] is False: + if self.state["AWAITING_SPREAD"] is False and self.state["AWAITING_TRANSACTED_VOLUME"] is False: self.cancelAllOrders() self.placeOrders(mid) self.state = self.initialiseState() self.setWakeup(currentTime + self.getWakeFrequency()) else: # subscription mode - if msg.body['msg'] == 'MARKET_DATA' and self.state['AWAITING_MARKET_DATA'] is True: + if msg.body["msg"] == "MARKET_DATA" and self.state["AWAITING_MARKET_DATA"] is True: bid = self.known_bids[self.symbol][0][0] if self.known_bids[self.symbol] else None ask = self.known_asks[self.symbol][0][0] if self.known_asks[self.symbol] else None if bid and ask: mid = int((ask + bid) / 2) self.last_mid = mid - self.state['AWAITING_MARKET_DATA'] = False + self.state["AWAITING_MARKET_DATA"] = False else: log_print(f"SPREAD MISSING at time {currentTime}") - self.state['AWAITING_MARKET_DATA'] = False + self.state["AWAITING_MARKET_DATA"] = False - if self.state['MARKET_DATA'] is False and self.state['AWAITING_TRANSACTED_VOLUME'] is False: + if self.state["MARKET_DATA"] is False and self.state["AWAITING_TRANSACTED_VOLUME"] is False: self.placeOrders(mid) self.state = self.initialiseState() def updateOrderSize(self): - """ Updates size of order to be placed. """ + """Updates size of order to be placed.""" qty = round(self.pov * self.transacted_volume[self.symbol]) self.order_size = qty if qty >= self.min_order_size else self.min_order_size def computeOrdersToPlace(self, mid): - """ Given a mid price, computes the orders that need to be removed from orderbook, and adds these orders to + """Given a mid price, computes the orders that need to be removed from orderbook, and adds these orders to bid and ask deques. :param mid: mid-price @@ -169,27 +189,27 @@ def computeOrdersToPlace(self, mid): return bids_to_place, asks_to_place def placeOrders(self, mid): - """ Given a mid-price, compute new orders that need to be placed, then send the orders to the Exchange. + """Given a mid-price, compute new orders that need to be placed, then send the orders to the Exchange. - :param mid: mid-price - :type mid: int + :param mid: mid-price + :type mid: int """ bid_orders, ask_orders = self.computeOrdersToPlace(mid) for bid_price in bid_orders: - log_print(f'{self.name}: Placing BUY limit order of size {self.order_size} @ price {bid_price}') + log_print(f"{self.name}: Placing BUY limit order of size {self.order_size} @ price {bid_price}") self.placeLimitOrder(self.symbol, self.order_size, True, bid_price) for ask_price in ask_orders: - log_print(f'{self.name}: Placing SELL limit order of size {self.order_size} @ price {ask_price}') + log_print(f"{self.name}: Placing SELL limit order of size {self.order_size} @ price {ask_price}") self.placeLimitOrder(self.symbol, self.order_size, False, ask_price) def getWakeFrequency(self): - """ Get time increment corresponding to wakeup period. """ + """Get time increment corresponding to wakeup period.""" return pd.Timedelta(self.wake_up_freq) def cancelAllOrders(self): - """ Cancels all resting limit orders placed by the market maker """ + """Cancels all resting limit orders placed by the market maker""" for _, order in self.orders.items(): self.cancelOrder(order) diff --git a/agent/market_makers/SpreadBasedMarketMakerAgent.py b/agent/market_makers/SpreadBasedMarketMakerAgent.py index c55c3a0fc..edf3766c6 100644 --- a/agent/market_makers/SpreadBasedMarketMakerAgent.py +++ b/agent/market_makers/SpreadBasedMarketMakerAgent.py @@ -1,25 +1,47 @@ -from agent.TradingAgent import TradingAgent +from collections import deque, namedtuple + import pandas as pd -from util.util import log_print -from collections import namedtuple, deque -from util.util import ignored +from agent.TradingAgent import TradingAgent +from util.util import ignored, log_print -ANCHOR_TOP_STR = 'top' -ANCHOR_BOTTOM_STR = 'bottom' +ANCHOR_TOP_STR = "top" +ANCHOR_BOTTOM_STR = "bottom" class SpreadBasedMarketMakerAgent(TradingAgent): - """ This class implements the Chakraborty-Kearns `ladder` market-making strategy. """ - - _Order = namedtuple('_Order', ['price', 'id']) # Internal data structure used to describe a placed order - - def __init__(self, id, name, type, symbol, starting_cash, order_size=1, window_size=5, anchor=ANCHOR_BOTTOM_STR, - num_ticks=20, wake_up_freq='1s', subscribe=True, subscribe_freq=10e9, subscribe_num_levels=1, - log_orders=False, random_state=None): - - super().__init__(id, name, type, starting_cash=starting_cash, log_orders=log_orders, random_state=random_state) - self.symbol = symbol # Symbol traded + """This class implements the Chakraborty-Kearns `ladder` market-making strategy.""" + + _Order = namedtuple("_Order", ["price", "id"]) # Internal data structure used to describe a placed order + + def __init__( + self, + id, + name, + type, + symbol, + starting_cash, + order_size=1, + window_size=5, + anchor=ANCHOR_BOTTOM_STR, + num_ticks=20, + wake_up_freq="1s", + subscribe=True, + subscribe_freq=10e9, + subscribe_num_levels=1, + log_orders=False, + random_state=None, + ): + + super().__init__( + id, + name, + type, + starting_cash=starting_cash, + log_orders=log_orders, + random_state=random_state, + ) + self.symbol = symbol # Symbol traded self.order_size = order_size # order size per price level self.window_size = window_size # Size in ticks (cents) of how wide the window around mid price is self.anchor = self.validateAnchor(anchor) # anchor either top of window or bottom of window to mid-price @@ -28,7 +50,7 @@ def __init__(self, id, name, type, symbol, starting_cash, order_size=1, window_s self.wake_up_freq = wake_up_freq # Frequency of agent wake up self.subscribe = subscribe # Flag to determine whether to subscribe to data or use polling mechanism self.subscribe_freq = subscribe_freq # Frequency in nanoseconds^-1 at which to receive market updates - # in subscribe mode + # in subscribe mode self.subscribe_num_levels = subscribe_num_levels # Number of orderbook levels in subscription mode self.log_orders = log_orders @@ -44,7 +66,7 @@ def __init__(self, id, name, type, symbol, starting_cash, order_size=1, window_s self.LIQUIDITY_DROPOUT_WARNING = f"Liquidity dropout for agent {self.name}." def validateAnchor(self, anchor): - """ Checks that input parameter anchor takes allowed value, raises ValueError if not. + """Checks that input parameter anchor takes allowed value, raises ValueError if not. :param anchor: str :return: @@ -58,18 +80,18 @@ def kernelStarting(self, startTime): super().kernelStarting(startTime) def wakeup(self, currentTime): - """ Agent wakeup is determined by self.wake_up_freq """ + """Agent wakeup is determined by self.wake_up_freq""" can_trade = super().wakeup(currentTime) if self.subscribe and not self.subscription_requested: super().requestDataSubscription(self.symbol, levels=self.subscribe_num_levels, freq=self.subscribe_freq) self.subscription_requested = True - self.state = 'AWAITING_MARKET_DATA' + self.state = "AWAITING_MARKET_DATA" elif can_trade and not self.subscribe: self.getCurrentSpread(self.symbol, depth=self.subscribe_num_levels) - self.state = 'AWAITING_SPREAD' + self.state = "AWAITING_SPREAD" def receiveMessage(self, currentTime, msg): - """ Processes message from exchange. Main function is to update orders in orderbook relative to mid-price. + """Processes message from exchange. Main function is to update orders in orderbook relative to mid-price. :param simulation current time :param message received by self from ExchangeAgent @@ -85,7 +107,7 @@ def receiveMessage(self, currentTime, msg): if self.last_mid is not None: mid = self.last_mid - if not self.subscribe and self.state == 'AWAITING_SPREAD' and msg.body['msg'] == 'QUERY_SPREAD': + if not self.subscribe and self.state == "AWAITING_SPREAD" and msg.body["msg"] == "QUERY_SPREAD": bid, _, ask, _ = self.getKnownBidAsk(self.symbol) if bid and ask: @@ -97,10 +119,10 @@ def receiveMessage(self, currentTime, msg): self.cancelOrders(orders_to_cancel) self.placeOrders(mid) self.setWakeup(currentTime + self.getWakeFrequency()) - self.state = 'AWAITING_WAKEUP' + self.state = "AWAITING_WAKEUP" self.last_mid = mid - elif self.subscribe and self.state == 'AWAITING_MARKET_DATA' and msg.body['msg'] == 'MARKET_DATA': + elif self.subscribe and self.state == "AWAITING_MARKET_DATA" and msg.body["msg"] == "MARKET_DATA": bid = self.known_bids[self.symbol][0][0] if self.known_bids[self.symbol] else None ask = self.known_asks[self.symbol][0][0] if self.known_asks[self.symbol] else None @@ -113,11 +135,11 @@ def receiveMessage(self, currentTime, msg): orders_to_cancel = self.computeOrdersToCancel(mid) self.cancelOrders(orders_to_cancel) self.placeOrders(mid) - self.state = 'AWAITING_MARKET_DATA' + self.state = "AWAITING_MARKET_DATA" self.last_mid = mid def computeOrdersToCancel(self, mid): - """ Given a mid price, computes the orders that need to be removed from orderbook, and pops these orders from + """Given a mid price, computes the orders that need to be removed from orderbook, and pops these orders from bid and ask deques. :param mid: mid-price @@ -140,7 +162,7 @@ def computeOrdersToCancel(self, mid): with ignored(self.LIQUIDITY_DROPOUT_WARNING, IndexError): orders_to_cancel.append(self.current_asks.popleft()) elif num_ticks_to_increase < 0: - for _ in range(- num_ticks_to_increase): + for _ in range(-num_ticks_to_increase): with ignored(self.LIQUIDITY_DROPOUT_WARNING, IndexError): orders_to_cancel.append(self.current_bids.pop()) with ignored(self.LIQUIDITY_DROPOUT_WARNING, IndexError): @@ -149,7 +171,7 @@ def computeOrdersToCancel(self, mid): return orders_to_cancel def cancelOrders(self, orders_to_cancel): - """ Given a list of _Order objects, remove the corresponding orders from ExchangeAgent's orderbook + """Given a list of _Order objects, remove the corresponding orders from ExchangeAgent's orderbook :param orders_to_cancel: orders to remove from orderbook :type orders_to_cancel: list(_Order) @@ -164,7 +186,7 @@ def cancelOrders(self, orders_to_cancel): continue def computeOrdersToPlace(self, mid): - """ Given a mid price, computes the orders that need to be removed from orderbook, and adds these orders to + """Given a mid price, computes the orders that need to be removed from orderbook, and adds these orders to bid and ask deques. :param mid: mid-price @@ -223,28 +245,40 @@ def computeOrdersToPlace(self, mid): return bids_to_place, asks_to_place def placeOrders(self, mid): - """ Given a mid-price, compute new orders that need to be placed, then send the orders to the Exchange. + """Given a mid-price, compute new orders that need to be placed, then send the orders to the Exchange. - :param mid: mid-price - :type mid: int + :param mid: mid-price + :type mid: int """ bid_orders, ask_orders = self.computeOrdersToPlace(mid) for bid_order in bid_orders: - log_print(f'{self.name}: Placing BUY limit order of size {self.order_size} @ price {bid_order.price}') - self.placeLimitOrder(self.symbol, self.order_size, True, bid_order.price, order_id=bid_order.id) + log_print(f"{self.name}: Placing BUY limit order of size {self.order_size} @ price {bid_order.price}") + self.placeLimitOrder( + self.symbol, + self.order_size, + True, + bid_order.price, + order_id=bid_order.id, + ) for ask_order in ask_orders: - log_print(f'{self.name}: Placing SELL limit order of size {self.order_size} @ price {ask_order.price}') - self.placeLimitOrder(self.symbol, self.order_size, False, ask_order.price, order_id=ask_order.id) + log_print(f"{self.name}: Placing SELL limit order of size {self.order_size} @ price {ask_order.price}") + self.placeLimitOrder( + self.symbol, + self.order_size, + False, + ask_order.price, + order_id=ask_order.id, + ) def initialiseBidsAsksDeques(self, mid): - """ Initialise the current_bids and current_asks object attributes, which internally keep track of the limit - orders sent to the Exchange. + """Initialise the current_bids and current_asks object attributes, which internally keep track of the limit + orders sent to the Exchange. - :param mid: mid-price - :type mid: int + :param mid: mid-price + :type mid: int """ @@ -262,7 +296,7 @@ def initialiseBidsAsksDeques(self, mid): self.current_asks = deque([self.generateNewOrderId(price) for price in range(lowest_ask, highest_ask + 1)]) def generateNewOrderId(self, price): - """ Generate a _Order object for a particular price level + """Generate a _Order object for a particular price level :param price: :type price: int @@ -273,10 +307,10 @@ def generateNewOrderId(self, price): return self._Order(price, order_id) def getWakeFrequency(self): - """ Get time increment corresponding to wakeup period. """ + """Get time increment corresponding to wakeup period.""" return pd.Timedelta(self.wake_up_freq) def cancelAllOrders(self): - """ Cancels all resting limit orders placed by the market maker """ + """Cancels all resting limit orders placed by the market maker""" for _, order in self.orders.items(): self.cancelOrder(order) diff --git a/cli/book_plot.py b/cli/book_plot.py index e1c0dd16a..67dca2d87 100644 --- a/cli/book_plot.py +++ b/cli/book_plot.py @@ -1,16 +1,17 @@ import copy -from mpl_toolkits.mplot3d import Axes3D + import matplotlib -matplotlib.use('TkAgg') +from mpl_toolkits.mplot3d import Axes3D + +matplotlib.use("TkAgg") +import sys + import matplotlib.pyplot as plt -import seaborn as sns import numpy as np import pandas as pd -import sys - -from matplotlib.colors import LogNorm - +import seaborn as sns from joblib import Memory +from matplotlib.colors import LogNorm # Auto-detect terminal width. pd.options.display.width = None @@ -18,9 +19,9 @@ pd.options.display.max_colwidth = 200 # Initialize a persistent memcache. -mem = Memory(cachedir='./.cached_plot_book', verbose=0) -mem_hist = Memory(cachedir='./.cached_plot_book_historical', verbose=0) -mem_hist_plot = Memory(cachedir='./.cached_plot_book_historical_heatmap', verbose=0) +mem = Memory(cachedir="./.cached_plot_book", verbose=0) +mem_hist = Memory(cachedir="./.cached_plot_book_historical", verbose=0) +mem_hist_plot = Memory(cachedir="./.cached_plot_book_historical_heatmap", verbose=0) # Turn these into command line parameters. @@ -31,94 +32,95 @@ # Used to read and cache simulated quotes (best bid/ask). # Doesn't actually pay attention to symbols yet. -#@mem.cache -def read_book_quotes (file): - print ("Simulated quotes were not cached. This will take a minute.") - df = pd.read_pickle(file, compression='bz2') +# @mem.cache +def read_book_quotes(file): + print("Simulated quotes were not cached. This will take a minute.") + df = pd.read_pickle(file, compression="bz2") - if len(df) <= 0: - print ("There appear to be no simulated quotes.") - sys.exit() + if len(df) <= 0: + print("There appear to be no simulated quotes.") + sys.exit() - print ("Cached simulated quotes.") - return df + print("Cached simulated quotes.") + return df # Used to read historical national best bid/ask spread. @mem_hist.cache -def read_historical_quotes (file, symbol): - print ("Historical quotes were not cached. This will take a minute.") - df = pd.read_pickle(file, compression='bz2') +def read_historical_quotes(file, symbol): + print("Historical quotes were not cached. This will take a minute.") + df = pd.read_pickle(file, compression="bz2") - if len(df) <= 0: - print ("There appear to be no historical quotes.") - sys.exit() + if len(df) <= 0: + print("There appear to be no historical quotes.") + sys.exit() - df = df.loc[symbol] + df = df.loc[symbol] - return df + return df # Used to cache the transformed historical dataframe for a symbol. @mem_hist_plot.cache -def prepare_histogram (df_hist): - print ("Historical dataframe transformation was not cached. This will take a minute.") +def prepare_histogram(df_hist): + print("Historical dataframe transformation was not cached. This will take a minute.") - min_quote = df_hist['BEST_BID'].min() - max_quote = df_hist['BEST_ASK'].max() + min_quote = df_hist["BEST_BID"].min() + max_quote = df_hist["BEST_ASK"].max() - quote_range = pd.Series(np.arange(min_quote, max_quote + 0.01, 0.01)).round(2).map(str) - quote_range = quote_range.str.pad(6, side='right', fillchar='0') + quote_range = pd.Series(np.arange(min_quote, max_quote + 0.01, 0.01)).round(2).map(str) + quote_range = quote_range.str.pad(6, side="right", fillchar="0") - df = pd.DataFrame(index=df_hist.index, columns=quote_range) - df[:] = 0 + df = pd.DataFrame(index=df_hist.index, columns=quote_range) + df[:] = 0 - i = 0 + i = 0 - for idx in df.index: - if i % 1000 == 0: print ("Caching {}".format(idx)) + for idx in df.index: + if i % 1000 == 0: + print("Caching {}".format(idx)) - col = '{:0.2f}'.format(round(df_hist.loc[idx].BEST_BID, 2)) - val = -df_hist.loc[idx].BEST_BIDSIZ - df.loc[idx,col] = val + col = "{:0.2f}".format(round(df_hist.loc[idx].BEST_BID, 2)) + val = -df_hist.loc[idx].BEST_BIDSIZ + df.loc[idx, col] = val - col = '{:0.2f}'.format(round(df_hist.loc[idx].BEST_ASK, 2)) - val = df_hist.loc[idx].BEST_ASKSIZ - df.loc[idx,col] = val + col = "{:0.2f}".format(round(df_hist.loc[idx].BEST_ASK, 2)) + val = df_hist.loc[idx].BEST_ASKSIZ + df.loc[idx, col] = val - i += 1 + i += 1 - return df + return df # Main program starts here. if len(sys.argv) < 2: - print ("Usage: python book_plot.py ") - sys.exit() + print("Usage: python book_plot.py ") + sys.exit() book_file = sys.argv[1] -print ("Visualizing order book from {}".format(book_file)) +print("Visualizing order book from {}".format(book_file)) sns.set() df_book = read_book_quotes(book_file) -#df_hist = read_historical_quotes('./data/nbbo/nbbo_2018/nbbom_20180518.bgz', 'IBM') +# df_hist = read_historical_quotes('./data/nbbo/nbbo_2018/nbbom_20180518.bgz', 'IBM') -fig = plt.figure(figsize=(12,9)) +fig = plt.figure(figsize=(12, 9)) # Use this to make all volume positive (ASK volume is negative in the dataframe). -#df_book.Volume = df_book.Volume.abs() +# df_book.Volume = df_book.Volume.abs() # Use this to swap the sign of BID vs ASK volume (to better fit a colormap, perhaps). -#df_book.Volume = df_book.Volume * -1 +# df_book.Volume = df_book.Volume * -1 # Use this to clip volume to an upper limit. -#df_book.Volume = df_book.Volume.clip(lower=-400,upper=400) +# df_book.Volume = df_book.Volume.clip(lower=-400,upper=400) # Use this to turn zero volume into np.nan (useful for some plot types). -#df_book.Volume[df_book.Volume == 0] = np.nan +# df_book.Volume[df_book.Volume == 0] = np.nan # This section colors the best bid, best ask, and bid/ask midpoint # differently from the rest of the heatmap below, by substituting @@ -135,125 +137,137 @@ def prepare_histogram (df_hist): # This converts the DateTimeIndex to integer nanoseconds since market open. We use # these as our time steps for discrete time simulations (e.g. SRG config). if TIME_STEPS: - df_book = df_book.unstack(1) - t = df_book.index.get_level_values(0) - df_book.index.get_level_values(0)[0] - df_book.index = (t / np.timedelta64(1, 'ns')).astype(np.int64) - df_book = df_book.stack() + df_book = df_book.unstack(1) + t = df_book.index.get_level_values(0) - df_book.index.get_level_values(0)[0] + df_book.index = (t / np.timedelta64(1, "ns")).astype(np.int64) + df_book = df_book.stack() # Use this to restrict plotting to a certain time of day. Depending on quote frequency, # plotting could be very slow without this. -#df_book = df_book.unstack(1) -#df_book = df_book.between_time('11:50:00', '12:10:00') -#df_book = df_book.stack() - +# df_book = df_book.unstack(1) +# df_book = df_book.between_time('11:50:00', '12:10:00') +# df_book = df_book.stack() if SHOW_BEST: - df_book = df_book.unstack(1) - df_book.columns = df_book.columns.droplevel(0) - - # Now row (single) index is time. Column (single) index is quote price. + df_book = df_book.unstack(1) + df_book.columns = df_book.columns.droplevel(0) - # In temporary data frame, find best bid per (time) row. - # Copy bids only. - best_bid = df_book[df_book < 0].copy() + # Now row (single) index is time. Column (single) index is quote price. - # Replace every non-zero bid volume with the column header (quote price) instead. - for col in best_bid.columns: - c = best_bid[col] - c[c < 0] = col + # In temporary data frame, find best bid per (time) row. + # Copy bids only. + best_bid = df_book[df_book < 0].copy() - # Copy asks only. - best_ask = df_book[df_book > 0].copy() + # Replace every non-zero bid volume with the column header (quote price) instead. + for col in best_bid.columns: + c = best_bid[col] + c[c < 0] = col - # Replace every non-zero ask volume with the column header (quote price) instead. - for col in best_ask.columns: - c = best_ask[col] - c[c > 0] = col + # Copy asks only. + best_ask = df_book[df_book > 0].copy() - # In a new column in each temporary data frame, compute the best bid or ask. - best_bid['best'] = best_bid.idxmax(axis=1) - best_ask['best'] = best_ask.idxmin(axis=1) + # Replace every non-zero ask volume with the column header (quote price) instead. + for col in best_ask.columns: + c = best_ask[col] + c[c > 0] = col - # Iterate over the index (all three DF have the same index) and set the special - # best bid/ask value in the correct column(s) per row. Also compute and include - # the midpoint where possible. - for idx in df_book.index: - bb = best_bid.loc[idx,'best'] - #if bb: df_book.loc[idx,bb] = best_bid_value + # In a new column in each temporary data frame, compute the best bid or ask. + best_bid["best"] = best_bid.idxmax(axis=1) + best_ask["best"] = best_ask.idxmin(axis=1) - ba = best_ask.loc[idx,'best'] - #if ba: df_book.loc[idx,ba] = best_ask_value + # Iterate over the index (all three DF have the same index) and set the special + # best bid/ask value in the correct column(s) per row. Also compute and include + # the midpoint where possible. + for idx in df_book.index: + bb = best_bid.loc[idx, "best"] + # if bb: df_book.loc[idx,bb] = best_bid_value - if ba and bb: df_book.loc[idx,round((ba+bb)/2)] = midpoint_value + ba = best_ask.loc[idx, "best"] + # if ba: df_book.loc[idx,ba] = best_ask_value + if ba and bb: + df_book.loc[idx, round((ba + bb) / 2)] = midpoint_value - # Put the data frame indices back the way they were and ensure it is a DataFrame, - # not a Series. - df_book = df_book.stack() - df_book = pd.DataFrame(data=df_book) - df_book.columns = ['Volume'] + # Put the data frame indices back the way they were and ensure it is a DataFrame, + # not a Series. + df_book = df_book.stack() + df_book = pd.DataFrame(data=df_book) + df_book.columns = ["Volume"] # Change the MultiIndex to time and dollars. -df_book['Time'] = df_book.index.get_level_values(0) -df_book['Price'] = df_book.index.get_level_values(1) +df_book["Time"] = df_book.index.get_level_values(0) +df_book["Price"] = df_book.index.get_level_values(1) # Use this to restrict plotting to a certain range of prices. -#df_book = df_book.loc[(df_book.Price > 98500) & (df_book.Price < 101500)] +# df_book = df_book.loc[(df_book.Price > 98500) & (df_book.Price < 101500)] # Use this to pad price strings for appearance. -#df_book.Price = df_book.Price.map(str) -#df_book.Price = df_book.Price.str.pad(6, side='right', fillchar='0') +# df_book.Price = df_book.Price.map(str) +# df_book.Price = df_book.Price.str.pad(6, side='right', fillchar='0') -df_book.set_index(['Time', 'Price'], inplace=True) +df_book.set_index(["Time", "Price"], inplace=True) # This section makes a 2-D histogram (time vs price, color == volume) unstacked = df_book.unstack(1) -if not TIME_STEPS: unstacked.index = unstacked.index.time +if not TIME_STEPS: + unstacked.index = unstacked.index.time unstacked.columns = unstacked.columns.droplevel(0) with sns.axes_style("white"): - ax = sns.heatmap(unstacked, cmap='seismic', mask=unstacked < min_volume, vmin=min_volume, cbar_kws={'label': 'Shares Available'}, center=0, antialiased = False) - -ax.set(xlabel='Quoted Price', ylabel='Quote Time') + ax = sns.heatmap( + unstacked, + cmap="seismic", + mask=unstacked < min_volume, + vmin=min_volume, + cbar_kws={"label": "Shares Available"}, + center=0, + antialiased=False, + ) + +ax.set(xlabel="Quoted Price", ylabel="Quote Time") # Plot layers of best bid, best ask, and midpoint in special colors. -#best_bids = unstacked[unstacked == best_bid_value].copy().notnull() +# best_bids = unstacked[unstacked == best_bid_value].copy().notnull() midpoints = unstacked[unstacked == midpoint_value].copy().notnull() -#best_asks = unstacked[unstacked == best_ask_value].copy().notnull() +# best_asks = unstacked[unstacked == best_ask_value].copy().notnull() if SHOW_BEST: - #sns.heatmap(best_bids, cmap=['xkcd:hot purple'], mask=~best_bids, cbar=False, ax=ax) - #sns.heatmap(midpoints, cmap=['xkcd:hot green'], mask=~midpoints, cbar=False, ax=ax) - sns.heatmap(midpoints, cmap=['black'], mask=~midpoints, cbar=False, ax=ax) - #sns.heatmap(best_asks, cmap=['xkcd:hot pink'], mask=~best_asks, cbar=False, ax=ax) + # sns.heatmap(best_bids, cmap=['xkcd:hot purple'], mask=~best_bids, cbar=False, ax=ax) + # sns.heatmap(midpoints, cmap=['xkcd:hot green'], mask=~midpoints, cbar=False, ax=ax) + sns.heatmap(midpoints, cmap=["black"], mask=~midpoints, cbar=False, ax=ax) + # sns.heatmap(best_asks, cmap=['xkcd:hot pink'], mask=~best_asks, cbar=False, ax=ax) plt.tight_layout() # This section plots the historical order book (no depth available). if PLOT_HISTORICAL: - fig = plt.figure(figsize=(12,9)) + fig = plt.figure(figsize=(12, 9)) - df_hist = df_hist.between_time('9:30', '16:00') - #df_hist = df_hist.between_time('10:00', '10:05') - df_hist = df_hist.resample('1S').last().ffill() + df_hist = df_hist.between_time("9:30", "16:00") + # df_hist = df_hist.between_time('10:00', '10:05') + df_hist = df_hist.resample("1S").last().ffill() - df = prepare_histogram(df_hist) - df.index = df.index.time + df = prepare_histogram(df_hist) + df.index = df.index.time - # There's no order book depth anyway, so make all bids the same volume - # and all asks the same volume, so they're easy to see. - df[df > 0] = 1 - df[df < 0] = -1 + # There's no order book depth anyway, so make all bids the same volume + # and all asks the same volume, so they're easy to see. + df[df > 0] = 1 + df[df < 0] = -1 - ax = sns.heatmap(df, cmap=sns.color_palette("coolwarm", 7), cbar_kws={'label': 'Shares Available'}, center=0) - ax.set(xlabel='Quoted Price', ylabel='Quote Time') + ax = sns.heatmap( + df, + cmap=sns.color_palette("coolwarm", 7), + cbar_kws={"label": "Shares Available"}, + center=0, + ) + ax.set(xlabel="Quoted Price", ylabel="Quote Time") - plt.tight_layout() + plt.tight_layout() # Show all the plots. plt.show() - diff --git a/cli/dump.py b/cli/dump.py index c3c9ae1f1..328b90aa3 100644 --- a/cli/dump.py +++ b/cli/dump.py @@ -1,23 +1,23 @@ -import pandas as pd import sys +import pandas as pd + # Auto-detect terminal width. pd.options.display.width = None pd.options.display.max_rows = 500000 pd.options.display.max_colwidth = 200 if len(sys.argv) < 2: - print ("Usage: python dump.py [List of Event Types]") - sys.exit() + print("Usage: python dump.py [List of Event Types]") + sys.exit() file = sys.argv[1] -df = pd.read_pickle(file, compression='bz2') +df = pd.read_pickle(file, compression="bz2") if len(sys.argv) > 2: - events = sys.argv[2:] - event = "|".join(events) - df = df[df['EventType'].str.contains(event)] + events = sys.argv[2:] + event = "|".join(events) + df = df[df["EventType"].str.contains(event)] print(df) - diff --git a/cli/event_midpoint.py b/cli/event_midpoint.py index 67d9d14a4..19d6bf6e8 100644 --- a/cli/event_midpoint.py +++ b/cli/event_midpoint.py @@ -1,12 +1,14 @@ import ast + import matplotlib -matplotlib.use('TkAgg') -import matplotlib.pyplot as plt -import pandas as pd + +matplotlib.use("TkAgg") import os import re import sys +import matplotlib.pyplot as plt +import pandas as pd from joblib import Memory # Auto-detect terminal width. @@ -15,79 +17,77 @@ pd.options.display.max_colwidth = 200 # Initialize a persistent memcache. -#mem_sim = Memory(cachedir='./.cached_plot_sim', verbose=0) +# mem_sim = Memory(cachedir='./.cached_plot_sim', verbose=0) # Linewidth for plots. LW = 2 # Rolling window for smoothing. -#SIM_WINDOW = 250 +# SIM_WINDOW = 250 SIM_WINDOW = 1 # Used to read and cache simulated quotes. # Doesn't actually pay attention to symbols yet. -#@mem_sim.cache -def read_simulated_quotes (file, symbol): - print ("Simulated quotes were not cached. This will take a minute.") - df = pd.read_pickle(file, compression='bz2') - df['Timestamp'] = df.index +# @mem_sim.cache +def read_simulated_quotes(file, symbol): + print("Simulated quotes were not cached. This will take a minute.") + df = pd.read_pickle(file, compression="bz2") + df["Timestamp"] = df.index - # Keep only the last bid and last ask event at each timestamp. - df = df.drop_duplicates(subset=['Timestamp','EventType'], keep='last') + # Keep only the last bid and last ask event at each timestamp. + df = df.drop_duplicates(subset=["Timestamp", "EventType"], keep="last") - del df['Timestamp'] + del df["Timestamp"] - df_bid = df[df['EventType'] == 'BEST_BID'].copy() - df_ask = df[df['EventType'] == 'BEST_ASK'].copy() + df_bid = df[df["EventType"] == "BEST_BID"].copy() + df_ask = df[df["EventType"] == "BEST_ASK"].copy() - if len(df) <= 0: - print ("There appear to be no simulated quotes.") - sys.exit() - - df_bid['BEST_BID'] = [b for s,b,bv in df_bid['Event'].str.split(',')] - df_bid['BEST_BID_VOL'] = [bv for s,b,bv in df_bid['Event'].str.split(',')] - df_ask['BEST_ASK'] = [a for s,a,av in df_ask['Event'].str.split(',')] - df_ask['BEST_ASK_VOL'] = [av for s,a,av in df_ask['Event'].str.split(',')] + if len(df) <= 0: + print("There appear to be no simulated quotes.") + sys.exit() - df_bid['BEST_BID'] = df_bid['BEST_BID'].str.replace('$','').astype('float64') - df_ask['BEST_ASK'] = df_ask['BEST_ASK'].str.replace('$','').astype('float64') + df_bid["BEST_BID"] = [b for s, b, bv in df_bid["Event"].str.split(",")] + df_bid["BEST_BID_VOL"] = [bv for s, b, bv in df_bid["Event"].str.split(",")] + df_ask["BEST_ASK"] = [a for s, a, av in df_ask["Event"].str.split(",")] + df_ask["BEST_ASK_VOL"] = [av for s, a, av in df_ask["Event"].str.split(",")] - df_bid['BEST_BID_VOL'] = df_bid['BEST_BID_VOL'].astype('float64') - df_ask['BEST_ASK_VOL'] = df_ask['BEST_ASK_VOL'].astype('float64') + df_bid["BEST_BID"] = df_bid["BEST_BID"].str.replace("$", "").astype("float64") + df_ask["BEST_ASK"] = df_ask["BEST_ASK"].str.replace("$", "").astype("float64") - df = df_bid.join(df_ask, how='outer', lsuffix='.bid', rsuffix='.ask') - df['BEST_BID'] = df['BEST_BID'].ffill().bfill() - df['BEST_ASK'] = df['BEST_ASK'].ffill().bfill() - df['BEST_BID_VOL'] = df['BEST_BID_VOL'].ffill().bfill() - df['BEST_ASK_VOL'] = df['BEST_ASK_VOL'].ffill().bfill() + df_bid["BEST_BID_VOL"] = df_bid["BEST_BID_VOL"].astype("float64") + df_ask["BEST_ASK_VOL"] = df_ask["BEST_ASK_VOL"].astype("float64") - df['MIDPOINT'] = (df['BEST_BID'] + df['BEST_ASK']) / 2.0 + df = df_bid.join(df_ask, how="outer", lsuffix=".bid", rsuffix=".ask") + df["BEST_BID"] = df["BEST_BID"].ffill().bfill() + df["BEST_ASK"] = df["BEST_ASK"].ffill().bfill() + df["BEST_BID_VOL"] = df["BEST_BID_VOL"].ffill().bfill() + df["BEST_ASK_VOL"] = df["BEST_ASK_VOL"].ffill().bfill() - return df + df["MIDPOINT"] = (df["BEST_BID"] + df["BEST_ASK"]) / 2.0 + return df # Main program starts here. if len(sys.argv) < 3: - print ("Usage: python event_midpoint.py ") - sys.exit() + print("Usage: python event_midpoint.py ") + sys.exit() # TODO: only really works for one symbol right now. symbol = sys.argv[1] sim_files = sys.argv[2:] -fig,ax = plt.subplots(figsize=(12,9), nrows=1, ncols=1) - +fig, ax = plt.subplots(figsize=(12, 9), nrows=1, ncols=1) # Plot each impact simulation with the baseline subtracted (i.e. residual effect). i = 1 legend = [] -#legend = ['baseline'] +# legend = ['baseline'] # Events is now a dictionary of event lists (key == greed parameter). events = {} @@ -97,54 +97,56 @@ def read_simulated_quotes (file, symbol): for sim_file in sim_files: - # Skip baseline files. - if 'baseline' in sim_file: continue + # Skip baseline files. + if "baseline" in sim_file: + continue - if 'greed' in os.path.dirname(sim_file): - # Group plots by greed parameter. - m = re.search("greed(\d\d\d)_", sim_file) - g = m.group(1) - else: - g = 'greed' + if "greed" in os.path.dirname(sim_file): + # Group plots by greed parameter. + m = re.search("greed(\d\d\d)_", sim_file) + g = m.group(1) + else: + g = "greed" - baseline_file = os.path.join(os.path.dirname(sim_file) + '_baseline', os.path.basename(sim_file)) - print ("Visualizing simulation baseline from {}".format(baseline_file)) + baseline_file = os.path.join(os.path.dirname(sim_file) + "_baseline", os.path.basename(sim_file)) + print("Visualizing simulation baseline from {}".format(baseline_file)) - df_baseline = read_simulated_quotes(baseline_file, symbol) + df_baseline = read_simulated_quotes(baseline_file, symbol) - # Read the event file. - print ("Visualizing simulated {} from {}".format(symbol, sim_file)) + # Read the event file. + print("Visualizing simulated {} from {}".format(symbol, sim_file)) - df_sim = read_simulated_quotes(sim_file, symbol) + df_sim = read_simulated_quotes(sim_file, symbol) - plt.rcParams.update({'font.size': 12}) + plt.rcParams.update({"font.size": 12}) - # Given nanosecond ("time step") data, we can just force everything to - # fill out an integer index of nanoseconds. - rng = pd.date_range(start=df_sim.index[0], end=df_sim.index[-1], freq='1N') + # Given nanosecond ("time step") data, we can just force everything to + # fill out an integer index of nanoseconds. + rng = pd.date_range(start=df_sim.index[0], end=df_sim.index[-1], freq="1N") - df_baseline = df_baseline[~df_baseline.index.duplicated(keep='last')] - df_baseline = df_baseline.reindex(rng,method='ffill') - df_baseline = df_baseline.reset_index(drop=True) + df_baseline = df_baseline[~df_baseline.index.duplicated(keep="last")] + df_baseline = df_baseline.reindex(rng, method="ffill") + df_baseline = df_baseline.reset_index(drop=True) - df_sim = df_sim[~df_sim.index.duplicated(keep='last')] - df_sim = df_sim.reindex(rng,method='ffill') - df_sim = df_sim.reset_index(drop=True) + df_sim = df_sim[~df_sim.index.duplicated(keep="last")] + df_sim = df_sim.reindex(rng, method="ffill") + df_sim = df_sim.reset_index(drop=True) - # Absolute price difference. - #s = df_sim['MIDPOINT'] - df_baseline['MIDPOINT'] + # Absolute price difference. + # s = df_sim['MIDPOINT'] - df_baseline['MIDPOINT'] - # Relative price difference. - s = (df_sim['MIDPOINT'] / df_baseline['MIDPOINT']) - 1.0 + # Relative price difference. + s = (df_sim["MIDPOINT"] / df_baseline["MIDPOINT"]) - 1.0 - s = s.rolling(window=SIM_WINDOW).mean() - s.name = sim_file + s = s.rolling(window=SIM_WINDOW).mean() + s.name = sim_file - if g not in events: events[g] = [] + if g not in events: + events[g] = [] - events[g].append(s.copy()) + events[g].append(s.copy()) - i += 1 + i += 1 # Now have a list of series (each an event) that are time-aligned. BUT the data is @@ -153,56 +155,55 @@ def read_simulated_quotes (file, symbol): legend = [] for g in events: - df = pd.DataFrame() - legend.append("greed = " + str(g)) - - for s in events[g]: - print ("Joining {}".format(s.name)) - df = df.join(s, how='outer') - - df.dropna(how='all', inplace=True) - df = df.ffill().bfill() - - # Smooth after combining means at each instant-of-trade. - #df.mean(axis=1).rolling(window=250).mean().plot(grid=True, linewidth=LW, ax=ax) - - # No additional smoothing. - m = df.mean(axis=1) - s = df.std(axis=1) - - # Plot mean and std. - m.plot(grid=True, linewidth=LW, ax=ax, fontsize=12, label="Relative mean mid-price") - - # Fill std region? - #ax.fill_between(m.index, m-s, m+s, alpha=0.2) - + df = pd.DataFrame() + legend.append("greed = " + str(g)) + + for s in events[g]: + print("Joining {}".format(s.name)) + df = df.join(s, how="outer") + + df.dropna(how="all", inplace=True) + df = df.ffill().bfill() + + # Smooth after combining means at each instant-of-trade. + # df.mean(axis=1).rolling(window=250).mean().plot(grid=True, linewidth=LW, ax=ax) + + # No additional smoothing. + m = df.mean(axis=1) + s = df.std(axis=1) + + # Plot mean and std. + m.plot(grid=True, linewidth=LW, ax=ax, fontsize=12, label="Relative mean mid-price") + + # Fill std region? + # ax.fill_between(m.index, m-s, m+s, alpha=0.2) + # Do the rest a single time for the whole plot. # If we need a vertical "time of event" line... -ax.axvline(x=200, color='0.5', linestyle='--', linewidth=2, label="Order placement time") - +ax.axvline(x=200, color="0.5", linestyle="--", linewidth=2, label="Order placement time") + # Absolute or relative time labels... -ax.set_xticklabels(['0','10000','20000','30000','40000','50000','60000','70000']) -#ax.set_xticklabels(['T-30', 'T-20', 'T-10', 'T', 'T+10', 'T+20', 'T+30']) +ax.set_xticklabels(["0", "10000", "20000", "30000", "40000", "50000", "60000", "70000"]) +# ax.set_xticklabels(['T-30', 'T-20', 'T-10', 'T', 'T+10', 'T+20', 'T+30']) ax.legend(legend) -#ax.legend() - +# ax.legend() + # Force y axis limits to make multiple plots line up exactly... -#ax.set_ylim(-0.0065,0.0010) -#ax.set_ylim(-0.0010,0.0065) +# ax.set_ylim(-0.0065,0.0010) +# ax.set_ylim(-0.0010,0.0065) # If an in-figure super title is required... -#plt.suptitle('Impact Event Study: {}'.format(symbol)) - -ax.set_xlabel('Relative Time (ms)', fontsize=12, fontweight='bold') -ax.set_ylabel('Baseline-Relative Price', fontsize=12, fontweight='bold') - -#plt.savefig('IABS_SELL_100_multi_size.png') -#plt.savefig('abides_impact_sell.png') -#plt.savefig('abides_multi_buy.png') -#plt.savefig('abides_multi_sell.png') - +# plt.suptitle('Impact Event Study: {}'.format(symbol)) + +ax.set_xlabel("Relative Time (ms)", fontsize=12, fontweight="bold") +ax.set_ylabel("Baseline-Relative Price", fontsize=12, fontweight="bold") + +# plt.savefig('IABS_SELL_100_multi_size.png') +# plt.savefig('abides_impact_sell.png') +# plt.savefig('abides_multi_buy.png') +# plt.savefig('abides_multi_sell.png') + plt.show() - diff --git a/cli/event_ticker.py b/cli/event_ticker.py index dd7a7c918..aa50313d3 100644 --- a/cli/event_ticker.py +++ b/cli/event_ticker.py @@ -1,12 +1,14 @@ import ast + import matplotlib -matplotlib.use('TkAgg') -import matplotlib.pyplot as plt -import pandas as pd + +matplotlib.use("TkAgg") import os import re import sys +import matplotlib.pyplot as plt +import pandas as pd from joblib import Memory # Auto-detect terminal width. @@ -15,57 +17,56 @@ pd.options.display.max_colwidth = 200 # Initialize a persistent memcache. -mem_sim = Memory(cachedir='./.cached_plot_sim', verbose=0) +mem_sim = Memory(cachedir="./.cached_plot_sim", verbose=0) # Linewidth for plots. LW = 2 # Rolling window for smoothing. -#SIM_WINDOW = 250 +# SIM_WINDOW = 250 SIM_WINDOW = 1 # Used to read and cache simulated trades. # Doesn't actually pay attention to symbols yet. -#@mem_sim.cache -def read_simulated_trades (file, symbol): - #print ("Simulated trades were not cached. This will take a minute.") - df = pd.read_pickle(file, compression='bz2') - df = df[df['EventType'] == 'LAST_TRADE'] - - if len(df) <= 0: - print ("There appear to be no simulated trades.") - sys.exit() +# @mem_sim.cache +def read_simulated_trades(file, symbol): + # print ("Simulated trades were not cached. This will take a minute.") + df = pd.read_pickle(file, compression="bz2") + df = df[df["EventType"] == "LAST_TRADE"] + + if len(df) <= 0: + print("There appear to be no simulated trades.") + sys.exit() - df['PRICE'] = [y for x,y in df['Event'].str.split(',')] - df['SIZE'] = [x for x,y in df['Event'].str.split(',')] + df["PRICE"] = [y for x, y in df["Event"].str.split(",")] + df["SIZE"] = [x for x, y in df["Event"].str.split(",")] - df['PRICE'] = df['PRICE'].str.replace('$','').astype('float64') - df['SIZE'] = df['SIZE'].astype('float64') + df["PRICE"] = df["PRICE"].str.replace("$", "").astype("float64") + df["SIZE"] = df["SIZE"].astype("float64") - return df + return df # Main program starts here. if len(sys.argv) < 3: - print ("Usage: python mean_std_event.py ") - sys.exit() + print("Usage: python mean_std_event.py ") + sys.exit() # TODO: only really works for one symbol right now. symbol = sys.argv[1] sim_files = sys.argv[2:] -fig,ax = plt.subplots(figsize=(12,9), nrows=1, ncols=1) - +fig, ax = plt.subplots(figsize=(12, 9), nrows=1, ncols=1) # Plot each impact simulation with the baseline subtracted (i.e. residual effect). i = 1 legend = [] -#legend = ['baseline'] +# legend = ['baseline'] events = [] @@ -74,54 +75,55 @@ def read_simulated_trades (file, symbol): for sim_file in sim_files: - # Skip baseline files. - if 'baseline' in sim_file: continue + # Skip baseline files. + if "baseline" in sim_file: + continue - baseline_file = os.path.join(os.path.dirname(sim_file) + '_baseline', os.path.basename(sim_file)) - print ("Visualizing simulation baseline from {}".format(baseline_file)) + baseline_file = os.path.join(os.path.dirname(sim_file) + "_baseline", os.path.basename(sim_file)) + print("Visualizing simulation baseline from {}".format(baseline_file)) - df_baseline = read_simulated_trades(baseline_file, symbol) + df_baseline = read_simulated_trades(baseline_file, symbol) - # Read the event file. - print ("Visualizing simulated {} from {}".format(symbol, sim_file)) + # Read the event file. + print("Visualizing simulated {} from {}".format(symbol, sim_file)) - df_sim = read_simulated_trades(sim_file, symbol) + df_sim = read_simulated_trades(sim_file, symbol) - plt.rcParams.update({'font.size': 12}) + plt.rcParams.update({"font.size": 12}) - # Given nanosecond ("time step") data, we can just force everything to - # fill out an integer index of nanoseconds. - rng = pd.date_range(start=df_sim.index[0], end=df_sim.index[-1], freq='1N') + # Given nanosecond ("time step") data, we can just force everything to + # fill out an integer index of nanoseconds. + rng = pd.date_range(start=df_sim.index[0], end=df_sim.index[-1], freq="1N") - df_baseline = df_baseline[~df_baseline.index.duplicated(keep='last')] - df_baseline = df_baseline.reindex(rng,method='ffill') - df_baseline = df_baseline.reset_index(drop=True) + df_baseline = df_baseline[~df_baseline.index.duplicated(keep="last")] + df_baseline = df_baseline.reindex(rng, method="ffill") + df_baseline = df_baseline.reset_index(drop=True) - df_sim = df_sim[~df_sim.index.duplicated(keep='last')] - df_sim = df_sim.reindex(rng,method='ffill') - df_sim = df_sim.reset_index(drop=True) + df_sim = df_sim[~df_sim.index.duplicated(keep="last")] + df_sim = df_sim.reindex(rng, method="ffill") + df_sim = df_sim.reset_index(drop=True) - s = df_sim['PRICE'] - df_baseline['PRICE'] - s = s.rolling(window=SIM_WINDOW).mean() + s = df_sim["PRICE"] - df_baseline["PRICE"] + s = s.rolling(window=SIM_WINDOW).mean() - s.name = sim_file - events.append(s.copy()) + s.name = sim_file + events.append(s.copy()) - i += 1 + i += 1 # Now have a list of series (each an event) that are time-aligned. df = pd.DataFrame() for s in events: - print ("Joining {}".format(s.name)) - df = df.join(s, how='outer') + print("Joining {}".format(s.name)) + df = df.join(s, how="outer") -df.dropna(how='all', inplace=True) +df.dropna(how="all", inplace=True) df = df.ffill().bfill() # Smooth after combining means at each instant-of-trade. -#df.mean(axis=1).rolling(window=250).mean().plot(grid=True, linewidth=LW, ax=ax) +# df.mean(axis=1).rolling(window=250).mean().plot(grid=True, linewidth=LW, ax=ax) # No additional smoothing. m = df.mean(axis=1) @@ -131,22 +133,20 @@ def read_simulated_trades (file, symbol): m.plot(grid=True, linewidth=LW, ax=ax) # Shade the stdev region? -ax.fill_between(m.index, m-s, m+s, alpha=0.2) +ax.fill_between(m.index, m - s, m + s, alpha=0.2) # Override prettier axis ticks... -#ax.set_xticklabels(['T-30', 'T-20', 'T-10', 'T', 'T+10', 'T+20', 'T+30']) +# ax.set_xticklabels(['T-30', 'T-20', 'T-10', 'T', 'T+10', 'T+20', 'T+30']) # Force y axis limits to match some other plot. -#ax.set_ylim(-0.1, 0.5) +# ax.set_ylim(-0.1, 0.5) # Set a super title if required. -plt.suptitle('Impact Event Study: {}'.format(symbol)) +plt.suptitle("Impact Event Study: {}".format(symbol)) -ax.set_xlabel('Relative Time') -ax.set_ylabel('Baseline-Relative Price') +ax.set_xlabel("Relative Time") +ax.set_ylabel("Baseline-Relative Price") -#plt.savefig('background_{}.png'.format(b)) +# plt.savefig('background_{}.png'.format(b)) plt.show() - - diff --git a/cli/intraday_index.py b/cli/intraday_index.py index 85ecd3847..48a0dec7a 100644 --- a/cli/intraday_index.py +++ b/cli/intraday_index.py @@ -5,14 +5,16 @@ # Also graphs the mid of the underlying symbols import ast + import matplotlib -matplotlib.use('Agg') -import matplotlib.pyplot as plt -import pandas as pd + +matplotlib.use("Agg") import os import sys -import numpy as np +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd from joblib import Memory # Auto-detect terminal width. @@ -21,235 +23,253 @@ pd.options.display.max_colwidth = 200 # Initialize a persistent memcache. -mem_hist = Memory(cachedir='./.cached_plot_hist', verbose=0) -mem_sim = Memory(cachedir='./.cached_plot_sim', verbose=0) +mem_hist = Memory(cachedir="./.cached_plot_hist", verbose=0) +mem_sim = Memory(cachedir="./.cached_plot_sim", verbose=0) PRINT_BASELINE = False PRINT_DELTA_ONLY = False -BETWEEN_START = pd.to_datetime('09:30').time() -BETWEEN_END = pd.to_datetime('09:30:00.000001').time() +BETWEEN_START = pd.to_datetime("09:30").time() +BETWEEN_END = pd.to_datetime("09:30:00.000001").time() # Linewidth for plots. LW = 2 + # Used to read and cache simulated quotes. # Doesn't actually pay attention to symbols yet. -#@mem_sim.cache -def read_simulated_quotes (file): - print ("Simulated quotes were not cached. This will take a minute.") - df = pd.read_pickle(file, compression='bz2') - df['Timestamp'] = df.index - - df_bid = df[df['EventType'] == 'BEST_BID'].copy() - df_ask = df[df['EventType'] == 'BEST_ASK'].copy() - - if len(df) <= 0: - print ("There appear to be no simulated quotes.") - sys.exit() - - df_bid['SYM'] = [s for s,b,bv in df_bid['Event'].str.split(',')] - df_bid['BEST_BID'] = [b for s,b,bv in df_bid['Event'].str.split(',')] - df_bid['BEST_BID_VOL'] = [bv for s,b,bv in df_bid['Event'].str.split(',')] - df_ask['SYM'] = [s for s,a,av in df_ask['Event'].str.split(',')] - df_ask['BEST_ASK'] = [a for s,a,av in df_ask['Event'].str.split(',')] - df_ask['BEST_ASK_VOL'] = [av for s,a,av in df_ask['Event'].str.split(',')] - - df_bid['BEST_BID'] = df_bid['BEST_BID'].str.replace('$','').astype('float64') - df_ask['BEST_ASK'] = df_ask['BEST_ASK'].str.replace('$','').astype('float64') - - df_bid['BEST_BID_VOL'] = df_bid['BEST_BID_VOL'].astype('float64') - df_ask['BEST_ASK_VOL'] = df_ask['BEST_ASK_VOL'].astype('float64') - - - # Keep only the last bid and last ask event at each timestamp. - df_bid = df_bid.drop_duplicates(subset=['Timestamp', 'SYM'], keep='last') - df_ask = df_ask.drop_duplicates(subset=['Timestamp', 'SYM'], keep='last') - - #df = df_bid.join(df_ask, how='outer', lsuffix='.bid', rsuffix='.ask') - - # THIS ISN'T TRUE, YOU CAN'T GET THE MID FROM FUTURE ORDERS!!! - #df['BEST_BID'] = df['BEST_BID'].ffill().bfill() - #df['BEST_ASK'] = df['BEST_ASK'].ffill().bfill() - #df['BEST_BID_VOL'] = df['BEST_BID_VOL'].ffill().bfill() - #df['BEST_ASK_VOL'] = df['BEST_ASK_VOL'].ffill().bfill() - df = pd.merge(df_bid, df_ask, how='left', left_on=['Timestamp','SYM'], right_on = ['Timestamp','SYM']) - - df['MIDPOINT'] = (df['BEST_BID'] + df['BEST_ASK']) / 2.0 - - #ts = df['Timestamp.bid'] - ts = df['Timestamp'] - #print(df) - df['INTRADAY_INDEX'] = 0 - #df['Timestamp.bid'] = df.index.to_series() - #df['Timestamp'] = df.index.to_series() - #df['SYM.bid'] = df['SYM.bid'].fillna(df['SYM.ask']) - #symbols = df['SYM.bid'].unique() - symbols = df['SYM'].unique() - #print(df) - for i,x in enumerate(symbols): - #df_one_sym = df[df['SYM.bid']==x] - df_one_sym = df[df['SYM']==x] - #df_one_sym = df_one_sym[['Timestamp.bid','MIDPOINT','BEST_BID','BEST_ASK']] - df_one_sym = df_one_sym[['Timestamp','MIDPOINT','BEST_BID','BEST_ASK']] - #md = pd.merge_asof(ts, df_one_sym, on='Timestamp.bid') - #print(ts) - #print(df_one_sym) - md = pd.merge_asof(ts, df_one_sym, on='Timestamp') - md = md.set_index(df.index) - df['MIDPOINT.' + x] = md['MIDPOINT'] - df['BID.' + x] = md['BEST_BID'] - df['ASK.' + x] = md['BEST_ASK'] - if x != 'ETF': - df['INTRADAY_INDEX'] = df['INTRADAY_INDEX'] + md['MIDPOINT'] - df['MSE'] = (df['INTRADAY_INDEX'] - df['MIDPOINT.ETF'])**2 - - #del df['Timestamp.bid'] - #del df['Timestamp.ask'] - df = df.set_index(df['Timestamp']) - del df['Timestamp'] - - return df - +# @mem_sim.cache +def read_simulated_quotes(file): + print("Simulated quotes were not cached. This will take a minute.") + df = pd.read_pickle(file, compression="bz2") + df["Timestamp"] = df.index + + df_bid = df[df["EventType"] == "BEST_BID"].copy() + df_ask = df[df["EventType"] == "BEST_ASK"].copy() + + if len(df) <= 0: + print("There appear to be no simulated quotes.") + sys.exit() + + df_bid["SYM"] = [s for s, b, bv in df_bid["Event"].str.split(",")] + df_bid["BEST_BID"] = [b for s, b, bv in df_bid["Event"].str.split(",")] + df_bid["BEST_BID_VOL"] = [bv for s, b, bv in df_bid["Event"].str.split(",")] + df_ask["SYM"] = [s for s, a, av in df_ask["Event"].str.split(",")] + df_ask["BEST_ASK"] = [a for s, a, av in df_ask["Event"].str.split(",")] + df_ask["BEST_ASK_VOL"] = [av for s, a, av in df_ask["Event"].str.split(",")] + + df_bid["BEST_BID"] = df_bid["BEST_BID"].str.replace("$", "").astype("float64") + df_ask["BEST_ASK"] = df_ask["BEST_ASK"].str.replace("$", "").astype("float64") + + df_bid["BEST_BID_VOL"] = df_bid["BEST_BID_VOL"].astype("float64") + df_ask["BEST_ASK_VOL"] = df_ask["BEST_ASK_VOL"].astype("float64") + + # Keep only the last bid and last ask event at each timestamp. + df_bid = df_bid.drop_duplicates(subset=["Timestamp", "SYM"], keep="last") + df_ask = df_ask.drop_duplicates(subset=["Timestamp", "SYM"], keep="last") + + # df = df_bid.join(df_ask, how='outer', lsuffix='.bid', rsuffix='.ask') + + # THIS ISN'T TRUE, YOU CAN'T GET THE MID FROM FUTURE ORDERS!!! + # df['BEST_BID'] = df['BEST_BID'].ffill().bfill() + # df['BEST_ASK'] = df['BEST_ASK'].ffill().bfill() + # df['BEST_BID_VOL'] = df['BEST_BID_VOL'].ffill().bfill() + # df['BEST_ASK_VOL'] = df['BEST_ASK_VOL'].ffill().bfill() + df = pd.merge( + df_bid, + df_ask, + how="left", + left_on=["Timestamp", "SYM"], + right_on=["Timestamp", "SYM"], + ) + + df["MIDPOINT"] = (df["BEST_BID"] + df["BEST_ASK"]) / 2.0 + + # ts = df['Timestamp.bid'] + ts = df["Timestamp"] + # print(df) + df["INTRADAY_INDEX"] = 0 + # df['Timestamp.bid'] = df.index.to_series() + # df['Timestamp'] = df.index.to_series() + # df['SYM.bid'] = df['SYM.bid'].fillna(df['SYM.ask']) + # symbols = df['SYM.bid'].unique() + symbols = df["SYM"].unique() + # print(df) + for i, x in enumerate(symbols): + # df_one_sym = df[df['SYM.bid']==x] + df_one_sym = df[df["SYM"] == x] + # df_one_sym = df_one_sym[['Timestamp.bid','MIDPOINT','BEST_BID','BEST_ASK']] + df_one_sym = df_one_sym[["Timestamp", "MIDPOINT", "BEST_BID", "BEST_ASK"]] + # md = pd.merge_asof(ts, df_one_sym, on='Timestamp.bid') + # print(ts) + # print(df_one_sym) + md = pd.merge_asof(ts, df_one_sym, on="Timestamp") + md = md.set_index(df.index) + df["MIDPOINT." + x] = md["MIDPOINT"] + df["BID." + x] = md["BEST_BID"] + df["ASK." + x] = md["BEST_ASK"] + if x != "ETF": + df["INTRADAY_INDEX"] = df["INTRADAY_INDEX"] + md["MIDPOINT"] + df["MSE"] = (df["INTRADAY_INDEX"] - df["MIDPOINT.ETF"]) ** 2 + + # del df['Timestamp.bid'] + # del df['Timestamp.ask'] + df = df.set_index(df["Timestamp"]) + del df["Timestamp"] + + return df # Main program starts here. if len(sys.argv) < 2: - print ("Usage: python midpoint_plot.py ") - sys.exit() + print("Usage: python midpoint_plot.py ") + sys.exit() # TODO: only really works for one symbol right now. -#symbols = sys.argv[1] +# symbols = sys.argv[1] sim_file = sys.argv[1] -print ("Visualizing simulated {} from {}".format(12,sim_file)) +print("Visualizing simulated {} from {}".format(12, sim_file)) df_sim = read_simulated_quotes(sim_file) if PRINT_BASELINE: - baseline_file = os.path.join(os.path.dirname(sim_file) + '_baseline', os.path.basename(sim_file)) - print (baseline_file) - df_baseline = read_simulated_quotes(baseline_file) + baseline_file = os.path.join(os.path.dirname(sim_file) + "_baseline", os.path.basename(sim_file)) + print(baseline_file) + df_baseline = read_simulated_quotes(baseline_file) -plt.rcParams.update({'font.size': 12}) +plt.rcParams.update({"font.size": 12}) # Use to restrict time to plot. df_sim = df_sim.between_time(BETWEEN_START, BETWEEN_END) if PRINT_BASELINE: - df_baseline = df_baseline.between_time(BETWEEN_START, BETWEEN_END) + df_baseline = df_baseline.between_time(BETWEEN_START, BETWEEN_END) -fig,ax = plt.subplots(figsize=(12,9), nrows=1, ncols=1) +fig, ax = plt.subplots(figsize=(12, 9), nrows=1, ncols=1) axes = [ax] # For smoothing... -#hist_window = 100 -#sim_window = 100 +# hist_window = 100 +# sim_window = 100 hist_window = 1 sim_window = 1 if PRINT_BASELINE: - # For nanosecond experiments, turn it into int index. Pandas gets weird if all - # the times vary only by a few nanoseconds. - rng = pd.date_range(start=df_sim.index[0], end=df_sim.index[-1], freq='1N') + # For nanosecond experiments, turn it into int index. Pandas gets weird if all + # the times vary only by a few nanoseconds. + rng = pd.date_range(start=df_sim.index[0], end=df_sim.index[-1], freq="1N") - df_baseline = df_baseline[~df_baseline.index.duplicated(keep='last')] - df_baseline = df_baseline.reindex(rng,method='ffill') - df_baseline = df_baseline.reset_index(drop=True) + df_baseline = df_baseline[~df_baseline.index.duplicated(keep="last")] + df_baseline = df_baseline.reindex(rng, method="ffill") + df_baseline = df_baseline.reset_index(drop=True) - df_sim = df_sim[~df_sim.index.duplicated(keep='last')] - df_sim = df_sim.reindex(rng,method='ffill') - df_sim = df_sim.reset_index(drop=True) + df_sim = df_sim[~df_sim.index.duplicated(keep="last")] + df_sim = df_sim.reindex(rng, method="ffill") + df_sim = df_sim.reset_index(drop=True) - # Print both separately. - if PRINT_DELTA_ONLY: - # Print the difference as a single series. - df_diff = df_sim['MIDPOINT'] - df_baseline['MIDPOINT'] + # Print both separately. + if PRINT_DELTA_ONLY: + # Print the difference as a single series. + df_diff = df_sim["MIDPOINT"] - df_baseline["MIDPOINT"] - # Smoothing. - df_diff = df_diff.rolling(window=10).mean() + # Smoothing. + df_diff = df_diff.rolling(window=10).mean() - df_diff.plot(color='C0', grid=True, linewidth=LW, ax=axes[0]) + df_diff.plot(color="C0", grid=True, linewidth=LW, ax=axes[0]) - axes[0].legend(['Bid-ask Midpoint Delta']) - else: - df_baseline['MIDPOINT'].plot(color='C0', grid=True, linewidth=LW, ax=axes[0]) - df_sim['MIDPOINT'].plot(color='C1', grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) + axes[0].legend(["Bid-ask Midpoint Delta"]) + else: + df_baseline["MIDPOINT"].plot(color="C0", grid=True, linewidth=LW, ax=axes[0]) + df_sim["MIDPOINT"].plot(color="C1", grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) - axes[0].legend(['Baseline', 'With Impact']) + axes[0].legend(["Baseline", "With Impact"]) else: - #df_sim['PRICE'] = df_sim['PRICE'].rolling(window=sim_window).mean() - - # For nanosecond experiments, turn it into int index. Pandas gets weird if all - # the times vary only by a few nanoseconds. - rng = pd.date_range(start=df_sim.index[0], end=df_sim.index[-1], freq='1N') - df_sim = df_sim[~df_sim.index.duplicated(keep='last')] - df_sim = df_sim.reindex(rng,method='ffill') - df_time = df_sim.copy() - df_sim = df_sim.reset_index(drop=True) - - #symbols = df_sim['SYM.bid'].unique() - symbols = df_sim['SYM'].unique() - for i,x in enumerate(symbols): - #df_sim[df_sim['SYM.bid']==x]['MIDPOINT.' + x].plot(color='C1', grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) - df_sim['MIDPOINT.' + x].plot(color='C1', grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) - #df_sim['BID.' + x].plot(color='C2', grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) - #df_sim['ASK.' + x].plot(color='C3', grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) - if x != 'ETF': - axes[0].legend(['Simulated']) - - plt.suptitle('Bid-Ask Midpoint: {}'.format(x)) - - axes[0].set_ylabel('Quote Price') - axes[0].set_xlabel('Quote Time') - - plt.savefig('graphs/background_' + str(x) + '_{}.png'.format('png')) - plt.cla() - - if x == 'ETF': - df_sim['MIDPOINT.' + x].plot(color='C1', grid=True, linewidth=LW, alpha=0.9, ax=axes[0], label = 'ETF Mid') - i = np.argwhere(symbols=='ETF') - symbols_portfolio = np.delete(symbols, i) - df_sim['INTRADAY_INDEX'].plot(color='C4', grid=True, linewidth=LW, alpha=0.9, ax=axes[0], label = 'Index') - #axes[0].legend(['Simulated']) - plt.suptitle('65 ZI, 0 ETF Arb, gamma = 500: {}'.format(symbols_portfolio)) - - axes[0].set_ylabel('Quote Price') - axes[0].set_xlabel('Quote Time') - axes[0].legend() - ymin = 249000 - ymax = 251000 - axes[0].set_ylim([ymin,ymax]) - - plt.savefig('graphs/index_vs_etf_ten_arb_gamma_500'.format('png')) + # df_sim['PRICE'] = df_sim['PRICE'].rolling(window=sim_window).mean() + + # For nanosecond experiments, turn it into int index. Pandas gets weird if all + # the times vary only by a few nanoseconds. + rng = pd.date_range(start=df_sim.index[0], end=df_sim.index[-1], freq="1N") + df_sim = df_sim[~df_sim.index.duplicated(keep="last")] + df_sim = df_sim.reindex(rng, method="ffill") + df_time = df_sim.copy() + df_sim = df_sim.reset_index(drop=True) + + # symbols = df_sim['SYM.bid'].unique() + symbols = df_sim["SYM"].unique() + for i, x in enumerate(symbols): + # df_sim[df_sim['SYM.bid']==x]['MIDPOINT.' + x].plot(color='C1', grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) + df_sim["MIDPOINT." + x].plot(color="C1", grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) + # df_sim['BID.' + x].plot(color='C2', grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) + # df_sim['ASK.' + x].plot(color='C3', grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) + if x != "ETF": + axes[0].legend(["Simulated"]) + + plt.suptitle("Bid-Ask Midpoint: {}".format(x)) + + axes[0].set_ylabel("Quote Price") + axes[0].set_xlabel("Quote Time") + + plt.savefig("graphs/background_" + str(x) + "_{}.png".format("png")) plt.cla() - - i = np.argwhere(symbols=='ETF') - symbols_portfolio = np.delete(symbols, i) - df_sim['INTRADAY_INDEX'].plot(color='C4', grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) - axes[0].legend(['Simulated']) - plt.suptitle('Intraday Index: {}'.format(symbols_portfolio)) - - plt.savefig('graphs/intraday_index_' + str(symbols_portfolio) + '_{}.png'.format('png')) - plt.cla() - - df_sim['MSE'].plot(color='C5', grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) - #axes[0].legend(['Simulated']) - plt.suptitle('65 ZI, 10 ETF Arb, gamma = 500') - axes[0].set_ylabel('Mean Squared Error') - axes[0].set_xlabel('Quote Time') - ymin = -1000 - ymax = 700000 - axes[0].set_ylim([ymin,ymax]) - - plt.savefig('graphs/mse_index_etf_ten_arb_gamma_500'.format('png')) - plt.close() - #df_time.to_csv('test.csv') - -#plt.show() + if x == "ETF": + df_sim["MIDPOINT." + x].plot( + color="C1", + grid=True, + linewidth=LW, + alpha=0.9, + ax=axes[0], + label="ETF Mid", + ) + i = np.argwhere(symbols == "ETF") + symbols_portfolio = np.delete(symbols, i) + df_sim["INTRADAY_INDEX"].plot( + color="C4", + grid=True, + linewidth=LW, + alpha=0.9, + ax=axes[0], + label="Index", + ) + # axes[0].legend(['Simulated']) + plt.suptitle("65 ZI, 0 ETF Arb, gamma = 500: {}".format(symbols_portfolio)) + + axes[0].set_ylabel("Quote Price") + axes[0].set_xlabel("Quote Time") + axes[0].legend() + ymin = 249000 + ymax = 251000 + axes[0].set_ylim([ymin, ymax]) + + plt.savefig("graphs/index_vs_etf_ten_arb_gamma_500".format("png")) + plt.cla() + + i = np.argwhere(symbols == "ETF") + symbols_portfolio = np.delete(symbols, i) + df_sim["INTRADAY_INDEX"].plot(color="C4", grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) + axes[0].legend(["Simulated"]) + plt.suptitle("Intraday Index: {}".format(symbols_portfolio)) + + plt.savefig("graphs/intraday_index_" + str(symbols_portfolio) + "_{}.png".format("png")) + plt.cla() + + df_sim["MSE"].plot(color="C5", grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) + # axes[0].legend(['Simulated']) + plt.suptitle("65 ZI, 10 ETF Arb, gamma = 500") + axes[0].set_ylabel("Mean Squared Error") + axes[0].set_xlabel("Quote Time") + ymin = -1000 + ymax = 700000 + axes[0].set_ylim([ymin, ymax]) + + plt.savefig("graphs/mse_index_etf_ten_arb_gamma_500".format("png")) + plt.close() + # df_time.to_csv('test.csv') + +# plt.show() diff --git a/cli/midpoint_plot.py b/cli/midpoint_plot.py index 033521397..18a594e53 100644 --- a/cli/midpoint_plot.py +++ b/cli/midpoint_plot.py @@ -1,11 +1,13 @@ import ast + import matplotlib -matplotlib.use('TkAgg') -import matplotlib.pyplot as plt -import pandas as pd + +matplotlib.use("TkAgg") import os import sys +import matplotlib.pyplot as plt +import pandas as pd from joblib import Memory # Auto-detect terminal width. @@ -14,150 +16,148 @@ pd.options.display.max_colwidth = 200 # Initialize a persistent memcache. -mem_hist = Memory(cachedir='./.cached_plot_hist', verbose=0) -mem_sim = Memory(cachedir='./.cached_plot_sim', verbose=0) +mem_hist = Memory(cachedir="./.cached_plot_hist", verbose=0) +mem_sim = Memory(cachedir="./.cached_plot_sim", verbose=0) PRINT_BASELINE = True PRINT_DELTA_ONLY = True -BETWEEN_START = pd.to_datetime('09:30').time() -BETWEEN_END = pd.to_datetime('09:30:00.000001').time() +BETWEEN_START = pd.to_datetime("09:30").time() +BETWEEN_END = pd.to_datetime("09:30:00.000001").time() # Linewidth for plots. LW = 2 + # Used to read and cache simulated quotes. # Doesn't actually pay attention to symbols yet. -#@mem_sim.cache -def read_simulated_quotes (file, symbol): - print ("Simulated quotes were not cached. This will take a minute.") - df = pd.read_pickle(file, compression='bz2') - df['Timestamp'] = df.index - - # Keep only the last bid and last ask event at each timestamp. - df = df.drop_duplicates(subset=['Timestamp','EventType'], keep='last') +# @mem_sim.cache +def read_simulated_quotes(file, symbol): + print("Simulated quotes were not cached. This will take a minute.") + df = pd.read_pickle(file, compression="bz2") + df["Timestamp"] = df.index - del df['Timestamp'] + # Keep only the last bid and last ask event at each timestamp. + df = df.drop_duplicates(subset=["Timestamp", "EventType"], keep="last") - df_bid = df[df['EventType'] == 'BEST_BID'].copy() - df_ask = df[df['EventType'] == 'BEST_ASK'].copy() + del df["Timestamp"] - if len(df) <= 0: - print ("There appear to be no simulated quotes.") - sys.exit() + df_bid = df[df["EventType"] == "BEST_BID"].copy() + df_ask = df[df["EventType"] == "BEST_ASK"].copy() - df_bid['BEST_BID'] = [b for s,b,bv in df_bid['Event'].str.split(',')] - df_bid['BEST_BID_VOL'] = [bv for s,b,bv in df_bid['Event'].str.split(',')] - df_ask['BEST_ASK'] = [a for s,a,av in df_ask['Event'].str.split(',')] - df_ask['BEST_ASK_VOL'] = [av for s,a,av in df_ask['Event'].str.split(',')] + if len(df) <= 0: + print("There appear to be no simulated quotes.") + sys.exit() - df_bid['BEST_BID'] = df_bid['BEST_BID'].str.replace('$','').astype('float64') - df_ask['BEST_ASK'] = df_ask['BEST_ASK'].str.replace('$','').astype('float64') + df_bid["BEST_BID"] = [b for s, b, bv in df_bid["Event"].str.split(",")] + df_bid["BEST_BID_VOL"] = [bv for s, b, bv in df_bid["Event"].str.split(",")] + df_ask["BEST_ASK"] = [a for s, a, av in df_ask["Event"].str.split(",")] + df_ask["BEST_ASK_VOL"] = [av for s, a, av in df_ask["Event"].str.split(",")] - df_bid['BEST_BID_VOL'] = df_bid['BEST_BID_VOL'].astype('float64') - df_ask['BEST_ASK_VOL'] = df_ask['BEST_ASK_VOL'].astype('float64') + df_bid["BEST_BID"] = df_bid["BEST_BID"].str.replace("$", "").astype("float64") + df_ask["BEST_ASK"] = df_ask["BEST_ASK"].str.replace("$", "").astype("float64") - df = df_bid.join(df_ask, how='outer', lsuffix='.bid', rsuffix='.ask') - df['BEST_BID'] = df['BEST_BID'].ffill().bfill() - df['BEST_ASK'] = df['BEST_ASK'].ffill().bfill() - df['BEST_BID_VOL'] = df['BEST_BID_VOL'].ffill().bfill() - df['BEST_ASK_VOL'] = df['BEST_ASK_VOL'].ffill().bfill() + df_bid["BEST_BID_VOL"] = df_bid["BEST_BID_VOL"].astype("float64") + df_ask["BEST_ASK_VOL"] = df_ask["BEST_ASK_VOL"].astype("float64") - df['MIDPOINT'] = (df['BEST_BID'] + df['BEST_ASK']) / 2.0 + df = df_bid.join(df_ask, how="outer", lsuffix=".bid", rsuffix=".ask") + df["BEST_BID"] = df["BEST_BID"].ffill().bfill() + df["BEST_ASK"] = df["BEST_ASK"].ffill().bfill() + df["BEST_BID_VOL"] = df["BEST_BID_VOL"].ffill().bfill() + df["BEST_ASK_VOL"] = df["BEST_ASK_VOL"].ffill().bfill() - return df + df["MIDPOINT"] = (df["BEST_BID"] + df["BEST_ASK"]) / 2.0 + return df # Main program starts here. if len(sys.argv) < 3: - print ("Usage: python midpoint_plot.py ") - sys.exit() + print("Usage: python midpoint_plot.py ") + sys.exit() # TODO: only really works for one symbol right now. symbol = sys.argv[1] sim_file = sys.argv[2] -print ("Visualizing simulated {} from {}".format(symbol, sim_file)) +print("Visualizing simulated {} from {}".format(symbol, sim_file)) df_sim = read_simulated_quotes(sim_file, symbol) if PRINT_BASELINE: - baseline_file = os.path.join(os.path.dirname(sim_file) + '_baseline', os.path.basename(sim_file)) - print (baseline_file) - df_baseline = read_simulated_quotes(baseline_file, symbol) - -plt.rcParams.update({'font.size': 12}) + baseline_file = os.path.join(os.path.dirname(sim_file) + "_baseline", os.path.basename(sim_file)) + print(baseline_file) + df_baseline = read_simulated_quotes(baseline_file, symbol) +plt.rcParams.update({"font.size": 12}) # Use to restrict time to plot. df_sim = df_sim.between_time(BETWEEN_START, BETWEEN_END) if PRINT_BASELINE: - df_baseline = df_baseline.between_time(BETWEEN_START, BETWEEN_END) + df_baseline = df_baseline.between_time(BETWEEN_START, BETWEEN_END) -fig,ax = plt.subplots(figsize=(12,9), nrows=1, ncols=1) +fig, ax = plt.subplots(figsize=(12, 9), nrows=1, ncols=1) axes = [ax] # For smoothing... -#hist_window = 100 -#sim_window = 100 +# hist_window = 100 +# sim_window = 100 hist_window = 1 sim_window = 1 if PRINT_BASELINE: - # For nanosecond experiments, turn it into int index. Pandas gets weird if all - # the times vary only by a few nanoseconds. - rng = pd.date_range(start=df_sim.index[0], end=df_sim.index[-1], freq='1N') + # For nanosecond experiments, turn it into int index. Pandas gets weird if all + # the times vary only by a few nanoseconds. + rng = pd.date_range(start=df_sim.index[0], end=df_sim.index[-1], freq="1N") - df_baseline = df_baseline[~df_baseline.index.duplicated(keep='last')] - df_baseline = df_baseline.reindex(rng,method='ffill') - df_baseline = df_baseline.reset_index(drop=True) + df_baseline = df_baseline[~df_baseline.index.duplicated(keep="last")] + df_baseline = df_baseline.reindex(rng, method="ffill") + df_baseline = df_baseline.reset_index(drop=True) - df_sim = df_sim[~df_sim.index.duplicated(keep='last')] - df_sim = df_sim.reindex(rng,method='ffill') - df_sim = df_sim.reset_index(drop=True) + df_sim = df_sim[~df_sim.index.duplicated(keep="last")] + df_sim = df_sim.reindex(rng, method="ffill") + df_sim = df_sim.reset_index(drop=True) - # Print both separately. - if PRINT_DELTA_ONLY: - # Print the difference as a single series. - df_diff = df_sim['MIDPOINT'] - df_baseline['MIDPOINT'] + # Print both separately. + if PRINT_DELTA_ONLY: + # Print the difference as a single series. + df_diff = df_sim["MIDPOINT"] - df_baseline["MIDPOINT"] - # Smoothing. - df_diff = df_diff.rolling(window=10).mean() + # Smoothing. + df_diff = df_diff.rolling(window=10).mean() - df_diff.plot(color='C0', grid=True, linewidth=LW, ax=axes[0]) + df_diff.plot(color="C0", grid=True, linewidth=LW, ax=axes[0]) - axes[0].legend(['Bid-ask Midpoint Delta']) - else: - df_baseline['MIDPOINT'].plot(color='C0', grid=True, linewidth=LW, ax=axes[0]) - df_sim['MIDPOINT'].plot(color='C1', grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) + axes[0].legend(["Bid-ask Midpoint Delta"]) + else: + df_baseline["MIDPOINT"].plot(color="C0", grid=True, linewidth=LW, ax=axes[0]) + df_sim["MIDPOINT"].plot(color="C1", grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) - axes[0].legend(['Baseline', 'With Impact']) + axes[0].legend(["Baseline", "With Impact"]) else: - #df_sim['PRICE'] = df_sim['PRICE'].rolling(window=sim_window).mean() + # df_sim['PRICE'] = df_sim['PRICE'].rolling(window=sim_window).mean() - # For nanosecond experiments, turn it into int index. Pandas gets weird if all - # the times vary only by a few nanoseconds. - rng = pd.date_range(start=df_sim.index[0], end=df_sim.index[-1], freq='1N') - df_sim = df_sim[~df_sim.index.duplicated(keep='last')] - df_sim = df_sim.reindex(rng,method='ffill') - df_sim = df_sim.reset_index(drop=True) - df_sim['MIDPOINT'].plot(color='C1', grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) - axes[0].legend(['Simulated']) + # For nanosecond experiments, turn it into int index. Pandas gets weird if all + # the times vary only by a few nanoseconds. + rng = pd.date_range(start=df_sim.index[0], end=df_sim.index[-1], freq="1N") + df_sim = df_sim[~df_sim.index.duplicated(keep="last")] + df_sim = df_sim.reindex(rng, method="ffill") + df_sim = df_sim.reset_index(drop=True) + df_sim["MIDPOINT"].plot(color="C1", grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) + axes[0].legend(["Simulated"]) -plt.suptitle('Bid-Ask Midpoint: {}'.format(symbol)) +plt.suptitle("Bid-Ask Midpoint: {}".format(symbol)) -axes[0].set_ylabel('Quote Price') -axes[0].set_xlabel('Quote Time') +axes[0].set_ylabel("Quote Price") +axes[0].set_xlabel("Quote Time") -#plt.savefig('background_{}.png'.format(b)) +# plt.savefig('background_{}.png'.format(b)) plt.show() - diff --git a/cli/plot_exchange.py b/cli/plot_exchange.py index b4bb8d602..46ee40c9f 100644 --- a/cli/plot_exchange.py +++ b/cli/plot_exchange.py @@ -1,16 +1,18 @@ import matplotlib -matplotlib.use('Agg') + +matplotlib.use("Agg") +import sys + import matplotlib.pyplot as plt import pandas as pd -import sys # Auto-detect terminal width. pd.options.display.width = None pd.options.display.max_rows = 1000 pd.options.display.max_colwidth = 200 -BETWEEN_START = pd.to_datetime('09:30').time() -BETWEEN_END = pd.to_datetime('16:00:00').time() +BETWEEN_START = pd.to_datetime("09:30").time() +BETWEEN_END = pd.to_datetime("16:00:00").time() # Linewidth for plots. LW = 2 @@ -18,41 +20,44 @@ # Main program starts here. if len(sys.argv) < 2: - print ("Usage: python sparse_fundamental.py ") - sys.exit() + print("Usage: python sparse_fundamental.py ") + sys.exit() sim_file = sys.argv[1] -df_sim = pd.read_pickle(sim_file, compression='bz2') +df_sim = pd.read_pickle(sim_file, compression="bz2") -#print(df_sim) +# print(df_sim) -df_bid = df_sim.loc[df_sim['EventType'] == 'BEST_BID'] -df_bid = df_bid.assign( BID_PRICE = lambda x: x['Event'].str.split(',').str[1].astype('float64')) +df_bid = df_sim.loc[df_sim["EventType"] == "BEST_BID"] +df_bid = df_bid.assign(BID_PRICE=lambda x: x["Event"].str.split(",").str[1].astype("float64")) -df_ask = df_sim.loc[df_sim['EventType'] == 'BEST_ASK'] -df_ask = df_ask.assign( ASK_PRICE = lambda x: x['Event'].str.split(',').str[1].astype('float64')) +df_ask = df_sim.loc[df_sim["EventType"] == "BEST_ASK"] +df_ask = df_ask.assign(ASK_PRICE=lambda x: x["Event"].str.split(",").str[1].astype("float64")) -df_trade = df_sim.loc[df_sim['EventType'] == 'LAST_TRADE'] -df_trade = df_trade.assign( TRADE_PRICE = lambda x: x['Event'].str.replace("$", " ").str.split(',').str[1].astype('float64')) -df_trade = df_trade.assign( TRADE_SIZE = lambda x: x['Event'].str.replace("$", " ").str.split(',').str[0].astype('float64')) +df_trade = df_sim.loc[df_sim["EventType"] == "LAST_TRADE"] +df_trade = df_trade.assign( + TRADE_PRICE=lambda x: x["Event"].str.replace("$", " ").str.split(",").str[1].astype("float64") +) +df_trade = df_trade.assign( + TRADE_SIZE=lambda x: x["Event"].str.replace("$", " ").str.split(",").str[0].astype("float64") +) -#print(df_trade) +# print(df_trade) -plt.rcParams.update({'font.size': 12}) +plt.rcParams.update({"font.size": 12}) -fig,ax = plt.subplots(figsize=(12,9), nrows=1, ncols=1) +fig, ax = plt.subplots(figsize=(12, 9), nrows=1, ncols=1) axes = [ax] -df_bid['BID_PRICE'].plot(color='C1', grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) -df_ask['ASK_PRICE'].plot(color='C2', grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) -df_trade['TRADE_PRICE'].plot(color='C3', marker = 'o', markersize = 10, linewidth =0, ax=axes[0]) +df_bid["BID_PRICE"].plot(color="C1", grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) +df_ask["ASK_PRICE"].plot(color="C2", grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) +df_trade["TRADE_PRICE"].plot(color="C3", marker="o", markersize=10, linewidth=0, ax=axes[0]) -axes[0].legend(['BID_PRICE', 'ASK_PRICE', "TRADE_PRICE"]) +axes[0].legend(["BID_PRICE", "ASK_PRICE", "TRADE_PRICE"]) ax.set_title("Exchange Agent") -axes[0].set_ylabel('Price') -axes[0].set_xlabel('Time') +axes[0].set_ylabel("Price") +axes[0].set_xlabel("Time") -plt.savefig('value_noise_MM_2' - '.png') +plt.savefig("value_noise_MM_2" ".png") diff --git a/cli/profile.py b/cli/profile.py index c13c1046c..2c7ab963a 100644 --- a/cli/profile.py +++ b/cli/profile.py @@ -2,15 +2,14 @@ import sys if len(sys.argv) < 2: - print ('Usage: python cli/profile.py ') - sys.exit() + print("Usage: python cli/profile.py ") + sys.exit() field = sys.argv[1] -if field not in ['time', 'cumulative', 'tottime', 'cumtime', 'ncalls']: - print ('Sort by field must be one of: time, cumulative, tottime, cumtime, ncalls.') - sys.exit() +if field not in ["time", "cumulative", "tottime", "cumtime", "ncalls"]: + print("Sort by field must be one of: time, cumulative, tottime, cumtime, ncalls.") + sys.exit() -p = pstats.Stats('runstats.prof') +p = pstats.Stats("runstats.prof") p.strip_dirs().sort_stats(field).print_stats(50) - diff --git a/cli/quote_plot.py b/cli/quote_plot.py index 20180a444..c40fd2e78 100644 --- a/cli/quote_plot.py +++ b/cli/quote_plot.py @@ -1,9 +1,10 @@ import matplotlib -matplotlib.use('TkAgg') -import matplotlib.pyplot as plt -import pandas as pd + +matplotlib.use("TkAgg") import sys +import matplotlib.pyplot as plt +import pandas as pd from joblib import Memory # Auto-detect terminal width. @@ -12,97 +13,96 @@ pd.options.display.max_colwidth = 200 # Initialize a persistent memcache. -mem_sim = Memory(cachedir='./.cached_plot_sim', verbose=0) +mem_sim = Memory(cachedir="./.cached_plot_sim", verbose=0) # Used to read and cache simulated quotes (best bid/ask). # Doesn't actually pay attention to symbols yet. -#@mem_sim.cache -def read_simulated_quotes (file, symbol): - print ("Simulated quotes were not cached. This will take a minute.") - df = pd.read_pickle(file, compression='bz2') - df['Timestamp'] = df.index +# @mem_sim.cache +def read_simulated_quotes(file, symbol): + print("Simulated quotes were not cached. This will take a minute.") + df = pd.read_pickle(file, compression="bz2") + df["Timestamp"] = df.index - # Keep only the last bid and last ask event at each timestamp. - df = df.drop_duplicates(subset=['Timestamp','EventType'], keep='last') + # Keep only the last bid and last ask event at each timestamp. + df = df.drop_duplicates(subset=["Timestamp", "EventType"], keep="last") - del df['Timestamp'] + del df["Timestamp"] - df_bid = df[df['EventType'] == 'BEST_BID'].copy() - df_ask = df[df['EventType'] == 'BEST_ASK'].copy() + df_bid = df[df["EventType"] == "BEST_BID"].copy() + df_ask = df[df["EventType"] == "BEST_ASK"].copy() - if len(df) <= 0: - print ("There appear to be no simulated quotes.") - sys.exit() + if len(df) <= 0: + print("There appear to be no simulated quotes.") + sys.exit() - df_bid['BEST_BID'] = [b for s,b,bv in df_bid['Event'].str.split(',')] - df_bid['BEST_BID_VOL'] = [bv for s,b,bv in df_bid['Event'].str.split(',')] - df_ask['BEST_ASK'] = [a for s,a,av in df_ask['Event'].str.split(',')] - df_ask['BEST_ASK_VOL'] = [av for s,a,av in df_ask['Event'].str.split(',')] + df_bid["BEST_BID"] = [b for s, b, bv in df_bid["Event"].str.split(",")] + df_bid["BEST_BID_VOL"] = [bv for s, b, bv in df_bid["Event"].str.split(",")] + df_ask["BEST_ASK"] = [a for s, a, av in df_ask["Event"].str.split(",")] + df_ask["BEST_ASK_VOL"] = [av for s, a, av in df_ask["Event"].str.split(",")] - df_bid['BEST_BID'] = df_bid['BEST_BID'].str.replace('$','').astype('float64') - df_ask['BEST_ASK'] = df_ask['BEST_ASK'].str.replace('$','').astype('float64') + df_bid["BEST_BID"] = df_bid["BEST_BID"].str.replace("$", "").astype("float64") + df_ask["BEST_ASK"] = df_ask["BEST_ASK"].str.replace("$", "").astype("float64") - df_bid['BEST_BID_VOL'] = df_bid['BEST_BID_VOL'].astype('float64') - df_ask['BEST_ASK_VOL'] = df_ask['BEST_ASK_VOL'].astype('float64') + df_bid["BEST_BID_VOL"] = df_bid["BEST_BID_VOL"].astype("float64") + df_ask["BEST_ASK_VOL"] = df_ask["BEST_ASK_VOL"].astype("float64") - df = df_bid.join(df_ask, how='outer', lsuffix='.bid', rsuffix='.ask') - df['BEST_BID'] = df['BEST_BID'].ffill().bfill() - df['BEST_ASK'] = df['BEST_ASK'].ffill().bfill() - df['BEST_BID_VOL'] = df['BEST_BID_VOL'].ffill().bfill() - df['BEST_ASK_VOL'] = df['BEST_ASK_VOL'].ffill().bfill() + df = df_bid.join(df_ask, how="outer", lsuffix=".bid", rsuffix=".ask") + df["BEST_BID"] = df["BEST_BID"].ffill().bfill() + df["BEST_ASK"] = df["BEST_ASK"].ffill().bfill() + df["BEST_BID_VOL"] = df["BEST_BID_VOL"].ffill().bfill() + df["BEST_ASK_VOL"] = df["BEST_ASK_VOL"].ffill().bfill() - df['MIDPOINT'] = (df['BEST_BID'] + df['BEST_ASK']) / 2.0 + df["MIDPOINT"] = (df["BEST_BID"] + df["BEST_ASK"]) / 2.0 - return df + return df # Main program starts here. if len(sys.argv) < 2: - print ("Usage: python ticker_plot.py ") - sys.exit() + print("Usage: python ticker_plot.py ") + sys.exit() # TODO: only really works for one symbol right now. symbol = sys.argv[1] sim_file = sys.argv[2] -print ("Visualizing {} from {}".format(symbol, sim_file)) +print("Visualizing {} from {}".format(symbol, sim_file)) -plt.rcParams.update({'font.size': 12}) +plt.rcParams.update({"font.size": 12}) df_sim = read_simulated_quotes(sim_file, symbol) -fig,axes = plt.subplots(figsize=(12,9), nrows=2, ncols=1) +fig, axes = plt.subplots(figsize=(12, 9), nrows=2, ncols=1) # Crop figures to desired times and price scales. -#df_hist = df_hist.between_time('9:46', '13:30') -#df_sim = df_sim.between_time('10:00:00', '10:00:30') +# df_hist = df_hist.between_time('9:46', '13:30') +# df_sim = df_sim.between_time('10:00:00', '10:00:30') # For nanosecond experiments, turn it into int index. Pandas gets weird if all # the times vary only by a few nanoseconds. df_sim = df_sim.reset_index(drop=True) -ax = df_sim['BEST_BID'].plot(color='C0', grid=True, linewidth=1, ax=axes[0]) -df_sim['BEST_ASK'].plot(color='C1', grid=True, linewidth=1, ax=axes[0]) -#df_sim['MIDPOINT'].plot(color='C2', grid=True, linewidth=1, ax=axes[0]) +ax = df_sim["BEST_BID"].plot(color="C0", grid=True, linewidth=1, ax=axes[0]) +df_sim["BEST_ASK"].plot(color="C1", grid=True, linewidth=1, ax=axes[0]) +# df_sim['MIDPOINT'].plot(color='C2', grid=True, linewidth=1, ax=axes[0]) -df_sim['BEST_BID_VOL'].plot(color='C3', linewidth=1, ax=axes[1]) -df_sim['BEST_ASK_VOL'].plot(color='C4', linewidth=1, ax=axes[1]) +df_sim["BEST_BID_VOL"].plot(color="C3", linewidth=1, ax=axes[1]) +df_sim["BEST_ASK_VOL"].plot(color="C4", linewidth=1, ax=axes[1]) -axes[0].legend(['Best Bid', 'Best Ask', 'Midpoint']) -axes[1].legend(['Best Bid Vol', 'Best Ask Vol']) +axes[0].legend(["Best Bid", "Best Ask", "Midpoint"]) +axes[1].legend(["Best Bid Vol", "Best Ask Vol"]) -plt.suptitle('Best Bid/Ask: {}'.format(symbol)) +plt.suptitle("Best Bid/Ask: {}".format(symbol)) -axes[0].set_ylabel('Quote Price') -axes[1].set_xlabel('Quote Time') -axes[1].set_ylabel('Quote Volume') +axes[0].set_ylabel("Quote Price") +axes[1].set_xlabel("Quote Time") +axes[1].set_ylabel("Quote Volume") axes[0].get_xaxis().set_visible(False) -#plt.savefig('background_{}.png'.format(b)) +# plt.savefig('background_{}.png'.format(b)) plt.show() - diff --git a/cli/read_agent_logs.py b/cli/read_agent_logs.py index 74ffd9e06..dc3ab179b 100644 --- a/cli/read_agent_logs.py +++ b/cli/read_agent_logs.py @@ -1,15 +1,16 @@ import os -import pandas as pd import sys +import pandas as pd + # Auto-detect terminal width. pd.options.display.width = None pd.options.display.max_rows = 500000 pd.options.display.max_colwidth = 200 if len(sys.argv) < 2: - print ("Usage: python read_agent_logs.py ") - sys.exit() + print("Usage: python read_agent_logs.py ") + sys.exit() # read_agent_logs.py takes a log directory, reads all agent log files, and produces a summary of @@ -25,36 +26,43 @@ file_count = 0 for log_dir in log_dirs: - if dir_count % 100 == 0: print ("Completed {} directories".format(dir_count)) - dir_count += 1 - for file in os.listdir(log_dir): - try: - df = pd.read_pickle(os.path.join(log_dir,file), compression='bz2') - # print(df) - events = [ 'AGENT_TYPE', 'STARTING_CASH', 'ENDING_CASH', 'FINAL_CASH_POSITION', 'MARKED_TO_MARKET' ] - event = "|".join(events) - df = df[df['EventType'].str.contains(event)] - - at = df.loc[df['EventType'] == 'AGENT_TYPE', 'Event'][0] - if 'Exchange' in at: - # There may be different fields to look at later on. - continue - - file_count += 1 - - sc = df.loc[df['EventType'] == 'STARTING_CASH', 'Event'][0] - ec = df.loc[df['EventType'] == 'ENDING_CASH', 'Event'][0] - fcp = df.loc[df['EventType'] == 'FINAL_CASH_POSITION', 'Event'][0] - fv = df.loc[df['EventType'] == 'MARKED_TO_MARKET', 'Event'][0] - - ret = fcp - sc - surp = fv - sc - stats.append({ 'AgentType' : at, 'Return' : ret, 'Surplus' : surp }) - except (IndexError, KeyError): - continue + if dir_count % 100 == 0: + print("Completed {} directories".format(dir_count)) + dir_count += 1 + for file in os.listdir(log_dir): + try: + df = pd.read_pickle(os.path.join(log_dir, file), compression="bz2") + # print(df) + events = [ + "AGENT_TYPE", + "STARTING_CASH", + "ENDING_CASH", + "FINAL_CASH_POSITION", + "MARKED_TO_MARKET", + ] + event = "|".join(events) + df = df[df["EventType"].str.contains(event)] + + at = df.loc[df["EventType"] == "AGENT_TYPE", "Event"][0] + if "Exchange" in at: + # There may be different fields to look at later on. + continue + + file_count += 1 + + sc = df.loc[df["EventType"] == "STARTING_CASH", "Event"][0] + ec = df.loc[df["EventType"] == "ENDING_CASH", "Event"][0] + fcp = df.loc[df["EventType"] == "FINAL_CASH_POSITION", "Event"][0] + fv = df.loc[df["EventType"] == "MARKED_TO_MARKET", "Event"][0] + + ret = fcp - sc + surp = fv - sc + stats.append({"AgentType": at, "Return": ret, "Surplus": surp}) + except (IndexError, KeyError): + continue df_stats = pd.DataFrame(stats) -print (df_stats.groupby('AgentType').mean()) +print(df_stats.groupby("AgentType").mean()) -print ("\nRead {} files in {} log directories.".format(file_count, dir_count)) +print("\nRead {} files in {} log directories.".format(file_count, dir_count)) diff --git a/cli/sparse_fundamental.py b/cli/sparse_fundamental.py index f86cfebff..6dd0bf35a 100644 --- a/cli/sparse_fundamental.py +++ b/cli/sparse_fundamental.py @@ -1,12 +1,14 @@ import ast + import matplotlib -matplotlib.use('TkAgg') -import matplotlib.pyplot as plt -import pandas as pd + +matplotlib.use("TkAgg") import os import re import sys +import matplotlib.pyplot as plt +import pandas as pd from joblib import Memory # Auto-detect terminal width. @@ -14,8 +16,8 @@ pd.options.display.max_rows = 1000 pd.options.display.max_colwidth = 200 -BETWEEN_START = pd.to_datetime('09:30').time() -BETWEEN_END = pd.to_datetime('16:00:00').time() +BETWEEN_START = pd.to_datetime("09:30").time() +BETWEEN_END = pd.to_datetime("16:00:00").time() # Linewidth for plots. LW = 2 @@ -23,54 +25,53 @@ # Main program starts here. if len(sys.argv) < 2: - print ("Usage: python sparse_fundamental.py ") - sys.exit() + print("Usage: python sparse_fundamental.py ") + sys.exit() # TODO: only really works for one symbol right now. sim_file = sys.argv[1] -m = re.search(r'fundamental_(.+?)\.', sim_file) +m = re.search(r"fundamental_(.+?)\.", sim_file) if not m: - print ("Usage: python sparse_fundamental.py ") - print ("{} does not appear to be a fundamental value log.".format(sim_file)) - print () - sys.exit() + print("Usage: python sparse_fundamental.py ") + print("{} does not appear to be a fundamental value log.".format(sim_file)) + print() + sys.exit() symbol = m.group(1) -print ("Visualizing simulated fundamental from {}".format(sim_file)) -df_sim = pd.read_pickle(sim_file, compression='bz2') +print("Visualizing simulated fundamental from {}".format(sim_file)) +df_sim = pd.read_pickle(sim_file, compression="bz2") -plt.rcParams.update({'font.size': 12}) +plt.rcParams.update({"font.size": 12}) -print (df_sim.head()) +print(df_sim.head()) # Use to restrict time to plot. -#df_sim = df_sim.between_time(BETWEEN_START, BETWEEN_END) +# df_sim = df_sim.between_time(BETWEEN_START, BETWEEN_END) -fig,ax = plt.subplots(figsize=(12,9), nrows=1, ncols=1) +fig, ax = plt.subplots(figsize=(12, 9), nrows=1, ncols=1) axes = [ax] # For smoothing... -#hist_window = 100 -#sim_window = 100 +# hist_window = 100 +# sim_window = 100 hist_window = 1 sim_window = 1 -#df_sim['PRICE'] = df_sim['PRICE'].rolling(window=sim_window).mean() +# df_sim['PRICE'] = df_sim['PRICE'].rolling(window=sim_window).mean() -df_sim['FundamentalValue'].plot(color='C1', grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) -axes[0].legend(['Simulated']) +df_sim["FundamentalValue"].plot(color="C1", grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) +axes[0].legend(["Simulated"]) -plt.suptitle('Fundamental Value: {}'.format(symbol)) +plt.suptitle("Fundamental Value: {}".format(symbol)) -axes[0].set_ylabel('Fundamental Value') -axes[0].set_xlabel('Fundamental Time') +axes[0].set_ylabel("Fundamental Value") +axes[0].set_xlabel("Fundamental Time") -#plt.savefig('background_{}.png'.format(b)) +# plt.savefig('background_{}.png'.format(b)) plt.show() - diff --git a/cli/sparse_midpoint.py b/cli/sparse_midpoint.py index 8fe8d6231..67d2073ff 100644 --- a/cli/sparse_midpoint.py +++ b/cli/sparse_midpoint.py @@ -1,11 +1,13 @@ import ast + import matplotlib -matplotlib.use('TkAgg') -import matplotlib.pyplot as plt -import pandas as pd + +matplotlib.use("TkAgg") import os import sys +import matplotlib.pyplot as plt +import pandas as pd from joblib import Memory # Auto-detect terminal width. @@ -14,144 +16,142 @@ pd.options.display.max_colwidth = 200 # Initialize a persistent memcache. -mem_hist = Memory(cachedir='./.cached_plot_hist', verbose=0) -mem_sim = Memory(cachedir='./.cached_plot_sim', verbose=0) +mem_hist = Memory(cachedir="./.cached_plot_hist", verbose=0) +mem_sim = Memory(cachedir="./.cached_plot_sim", verbose=0) PRINT_BASELINE = False PRINT_DELTA_ONLY = False -BETWEEN_START = pd.to_datetime('09:30').time() -BETWEEN_END = pd.to_datetime('16:00:00').time() +BETWEEN_START = pd.to_datetime("09:30").time() +BETWEEN_END = pd.to_datetime("16:00:00").time() # Linewidth for plots. LW = 2 + # Used to read and cache simulated quotes. # Doesn't actually pay attention to symbols yet. -#@mem_sim.cache -def read_simulated_quotes (file, symbol): - print ("Simulated quotes were not cached. This will take a minute.") - df = pd.read_pickle(file, compression='bz2') - df['Timestamp'] = df.index - - # Keep only the last bid and last ask event at each timestamp. - df = df.drop_duplicates(subset=['Timestamp','EventType'], keep='last') +# @mem_sim.cache +def read_simulated_quotes(file, symbol): + print("Simulated quotes were not cached. This will take a minute.") + df = pd.read_pickle(file, compression="bz2") + df["Timestamp"] = df.index - del df['Timestamp'] + # Keep only the last bid and last ask event at each timestamp. + df = df.drop_duplicates(subset=["Timestamp", "EventType"], keep="last") - df_bid = df[df['EventType'] == 'BEST_BID'].copy() - df_ask = df[df['EventType'] == 'BEST_ASK'].copy() + del df["Timestamp"] - if len(df) <= 0: - print ("There appear to be no simulated quotes.") - sys.exit() + df_bid = df[df["EventType"] == "BEST_BID"].copy() + df_ask = df[df["EventType"] == "BEST_ASK"].copy() - df_bid['BEST_BID'] = [b for s,b,bv in df_bid['Event'].str.split(',')] - df_bid['BEST_BID_VOL'] = [bv for s,b,bv in df_bid['Event'].str.split(',')] - df_ask['BEST_ASK'] = [a for s,a,av in df_ask['Event'].str.split(',')] - df_ask['BEST_ASK_VOL'] = [av for s,a,av in df_ask['Event'].str.split(',')] + if len(df) <= 0: + print("There appear to be no simulated quotes.") + sys.exit() - df_bid['BEST_BID'] = df_bid['BEST_BID'].str.replace('$','').astype('float64') - df_ask['BEST_ASK'] = df_ask['BEST_ASK'].str.replace('$','').astype('float64') + df_bid["BEST_BID"] = [b for s, b, bv in df_bid["Event"].str.split(",")] + df_bid["BEST_BID_VOL"] = [bv for s, b, bv in df_bid["Event"].str.split(",")] + df_ask["BEST_ASK"] = [a for s, a, av in df_ask["Event"].str.split(",")] + df_ask["BEST_ASK_VOL"] = [av for s, a, av in df_ask["Event"].str.split(",")] - df_bid['BEST_BID_VOL'] = df_bid['BEST_BID_VOL'].astype('float64') - df_ask['BEST_ASK_VOL'] = df_ask['BEST_ASK_VOL'].astype('float64') + df_bid["BEST_BID"] = df_bid["BEST_BID"].str.replace("$", "").astype("float64") + df_ask["BEST_ASK"] = df_ask["BEST_ASK"].str.replace("$", "").astype("float64") - df = df_bid.join(df_ask, how='outer', lsuffix='.bid', rsuffix='.ask') - df['BEST_BID'] = df['BEST_BID'].ffill().bfill() - df['BEST_ASK'] = df['BEST_ASK'].ffill().bfill() - df['BEST_BID_VOL'] = df['BEST_BID_VOL'].ffill().bfill() - df['BEST_ASK_VOL'] = df['BEST_ASK_VOL'].ffill().bfill() + df_bid["BEST_BID_VOL"] = df_bid["BEST_BID_VOL"].astype("float64") + df_ask["BEST_ASK_VOL"] = df_ask["BEST_ASK_VOL"].astype("float64") - df['MIDPOINT'] = (df['BEST_BID'] + df['BEST_ASK']) / 2.0 + df = df_bid.join(df_ask, how="outer", lsuffix=".bid", rsuffix=".ask") + df["BEST_BID"] = df["BEST_BID"].ffill().bfill() + df["BEST_ASK"] = df["BEST_ASK"].ffill().bfill() + df["BEST_BID_VOL"] = df["BEST_BID_VOL"].ffill().bfill() + df["BEST_ASK_VOL"] = df["BEST_ASK_VOL"].ffill().bfill() - return df + df["MIDPOINT"] = (df["BEST_BID"] + df["BEST_ASK"]) / 2.0 + return df # Main program starts here. if len(sys.argv) < 3: - print ("Usage: python midpoint_plot.py ") - sys.exit() + print("Usage: python midpoint_plot.py ") + sys.exit() # TODO: only really works for one symbol right now. symbol = sys.argv[1] sim_file = sys.argv[2] -print ("Visualizing simulated {} from {}".format(symbol, sim_file)) +print("Visualizing simulated {} from {}".format(symbol, sim_file)) df_sim = read_simulated_quotes(sim_file, symbol) if PRINT_BASELINE: - baseline_file = os.path.join(os.path.dirname(sim_file) + '_baseline', os.path.basename(sim_file)) - print (baseline_file) - df_baseline = read_simulated_quotes(baseline_file, symbol) - -plt.rcParams.update({'font.size': 12}) + baseline_file = os.path.join(os.path.dirname(sim_file) + "_baseline", os.path.basename(sim_file)) + print(baseline_file) + df_baseline = read_simulated_quotes(baseline_file, symbol) +plt.rcParams.update({"font.size": 12}) # Use to restrict time to plot. df_sim = df_sim.between_time(BETWEEN_START, BETWEEN_END) if PRINT_BASELINE: - df_baseline = df_baseline.between_time(BETWEEN_START, BETWEEN_END) + df_baseline = df_baseline.between_time(BETWEEN_START, BETWEEN_END) -fig,ax = plt.subplots(figsize=(12,9), nrows=1, ncols=1) +fig, ax = plt.subplots(figsize=(12, 9), nrows=1, ncols=1) axes = [ax] # For smoothing... -#hist_window = 100 -#sim_window = 100 +# hist_window = 100 +# sim_window = 100 hist_window = 1 sim_window = 1 if PRINT_BASELINE: - # For nanosecond experiments, turn it into int index. Pandas gets weird if all - # the times vary only by a few nanoseconds. - rng = pd.date_range(start=df_sim.index[0], end=df_sim.index[-1], freq='1N') + # For nanosecond experiments, turn it into int index. Pandas gets weird if all + # the times vary only by a few nanoseconds. + rng = pd.date_range(start=df_sim.index[0], end=df_sim.index[-1], freq="1N") - df_baseline = df_baseline[~df_baseline.index.duplicated(keep='last')] - df_baseline = df_baseline.reindex(rng,method='ffill') - df_baseline = df_baseline.reset_index(drop=True) + df_baseline = df_baseline[~df_baseline.index.duplicated(keep="last")] + df_baseline = df_baseline.reindex(rng, method="ffill") + df_baseline = df_baseline.reset_index(drop=True) - df_sim = df_sim[~df_sim.index.duplicated(keep='last')] - df_sim = df_sim.reindex(rng,method='ffill') - df_sim = df_sim.reset_index(drop=True) + df_sim = df_sim[~df_sim.index.duplicated(keep="last")] + df_sim = df_sim.reindex(rng, method="ffill") + df_sim = df_sim.reset_index(drop=True) - # Print both separately. - if PRINT_DELTA_ONLY: - # Print the difference as a single series. - df_diff = df_sim['MIDPOINT'] - df_baseline['MIDPOINT'] + # Print both separately. + if PRINT_DELTA_ONLY: + # Print the difference as a single series. + df_diff = df_sim["MIDPOINT"] - df_baseline["MIDPOINT"] - # Smoothing. - df_diff = df_diff.rolling(window=10).mean() + # Smoothing. + df_diff = df_diff.rolling(window=10).mean() - df_diff.plot(color='C0', grid=True, linewidth=LW, ax=axes[0]) + df_diff.plot(color="C0", grid=True, linewidth=LW, ax=axes[0]) - axes[0].legend(['Bid-ask Midpoint Delta']) - else: - df_baseline['MIDPOINT'].plot(color='C0', grid=True, linewidth=LW, ax=axes[0]) - df_sim['MIDPOINT'].plot(color='C1', grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) + axes[0].legend(["Bid-ask Midpoint Delta"]) + else: + df_baseline["MIDPOINT"].plot(color="C0", grid=True, linewidth=LW, ax=axes[0]) + df_sim["MIDPOINT"].plot(color="C1", grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) - axes[0].legend(['Baseline', 'With Impact']) + axes[0].legend(["Baseline", "With Impact"]) else: - #df_sim['PRICE'] = df_sim['PRICE'].rolling(window=sim_window).mean() + # df_sim['PRICE'] = df_sim['PRICE'].rolling(window=sim_window).mean() - df_sim['MIDPOINT'].plot(color='C1', grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) - axes[0].legend(['Simulated']) + df_sim["MIDPOINT"].plot(color="C1", grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) + axes[0].legend(["Simulated"]) -plt.suptitle('Bid-Ask Midpoint: {}'.format(symbol)) +plt.suptitle("Bid-Ask Midpoint: {}".format(symbol)) -axes[0].set_ylabel('Quote Price') -axes[0].set_xlabel('Quote Time') +axes[0].set_ylabel("Quote Price") +axes[0].set_xlabel("Quote Time") -#plt.savefig('background_{}.png'.format(b)) +# plt.savefig('background_{}.png'.format(b)) plt.show() - diff --git a/cli/sparse_ticker.py b/cli/sparse_ticker.py index 2f89238b4..32f251b28 100644 --- a/cli/sparse_ticker.py +++ b/cli/sparse_ticker.py @@ -1,11 +1,13 @@ import ast + import matplotlib -matplotlib.use('TkAgg') -import matplotlib.pyplot as plt -import pandas as pd + +matplotlib.use("TkAgg") import os import sys +import matplotlib.pyplot as plt +import pandas as pd from joblib import Memory # Auto-detect terminal width. @@ -14,8 +16,8 @@ pd.options.display.max_colwidth = 200 # Initialize a persistent memcache. -mem_hist = Memory(cachedir='./.cached_plot_hist', verbose=0) -mem_sim = Memory(cachedir='./.cached_plot_sim', verbose=0) +mem_hist = Memory(cachedir="./.cached_plot_hist", verbose=0) +mem_sim = Memory(cachedir="./.cached_plot_sim", verbose=0) # We could use some good argparse parameters here instead of # a bunch of constants to fiddle with. @@ -23,51 +25,52 @@ PRINT_BASELINE = False PRINT_VOLUME = False -BETWEEN_START = pd.to_datetime('09:30').time() -#BETWEEN_END = pd.to_datetime('09:30:00.000001').time() -BETWEEN_END = pd.to_datetime('16:00:00').time() +BETWEEN_START = pd.to_datetime("09:30").time() +# BETWEEN_END = pd.to_datetime('09:30:00.000001').time() +BETWEEN_END = pd.to_datetime("16:00:00").time() # Linewidth for plots. LW = 2 + # Used to read and cache real historical trades. -#@mem_hist.cache -def read_historical_trades (file, symbol): - print ("Historical trades were not cached. This will take a minute.") - df = pd.read_pickle(file, compression='bz2') +# @mem_hist.cache +def read_historical_trades(file, symbol): + print("Historical trades were not cached. This will take a minute.") + df = pd.read_pickle(file, compression="bz2") - df = df.loc[symbol] - df = df.between_time('9:30', '16:00') + df = df.loc[symbol] + df = df.between_time("9:30", "16:00") - return df + return df # Used to read and cache simulated trades. # Doesn't actually pay attention to symbols yet. -#@mem_sim.cache -def read_simulated_trades (file, symbol): - print ("Simulated trades were not cached. This will take a minute.") - df = pd.read_pickle(file, compression='bz2') - df = df[df['EventType'] == 'LAST_TRADE'] - - if len(df) <= 0: - print ("There appear to be no simulated trades.") - sys.exit() +# @mem_sim.cache +def read_simulated_trades(file, symbol): + print("Simulated trades were not cached. This will take a minute.") + df = pd.read_pickle(file, compression="bz2") + df = df[df["EventType"] == "LAST_TRADE"] + + if len(df) <= 0: + print("There appear to be no simulated trades.") + sys.exit() - df['PRICE'] = [y for x,y in df['Event'].str.split(',')] - df['SIZE'] = [x for x,y in df['Event'].str.split(',')] + df["PRICE"] = [y for x, y in df["Event"].str.split(",")] + df["SIZE"] = [x for x, y in df["Event"].str.split(",")] - df['PRICE'] = df['PRICE'].str.replace('$','').astype('float64') - df['SIZE'] = df['SIZE'].astype('float64') + df["PRICE"] = df["PRICE"].str.replace("$", "").astype("float64") + df["SIZE"] = df["SIZE"].astype("float64") - return df + return df # Main program starts here. if len(sys.argv) < 3: - print ("Usage: python ticker_plot.py [agent trade log]") - sys.exit() + print("Usage: python ticker_plot.py [agent trade log]") + sys.exit() # TODO: only really works for one symbol right now. @@ -75,154 +78,155 @@ def read_simulated_trades (file, symbol): sim_file = sys.argv[2] agent_log = None -if len(sys.argv) >= 4: agent_log = sys.argv[3] +if len(sys.argv) >= 4: + agent_log = sys.argv[3] -print ("Visualizing simulated {} from {}".format(symbol, sim_file)) +print("Visualizing simulated {} from {}".format(symbol, sim_file)) df_sim = read_simulated_trades(sim_file, symbol) -print (df_sim) +print(df_sim) if PRINT_BASELINE: - baseline_file = os.path.join(os.path.dirname(sim_file) + '_baseline', os.path.basename(sim_file)) - print (baseline_file) - df_baseline = read_simulated_trades(baseline_file, symbol) + baseline_file = os.path.join(os.path.dirname(sim_file) + "_baseline", os.path.basename(sim_file)) + print(baseline_file) + df_baseline = read_simulated_trades(baseline_file, symbol) # Take the date from the first index and use that to pick the correct historical date for comparison. -if PRINT_HISTORICAL: - hist_date = pd.to_datetime(df_sim.index[0]) - hist_year = hist_date.strftime('%Y') - hist_date = hist_date.strftime('%Y%m%d') - hist_file = "/nethome/cb107/emh/data/trades/trades_{}/ct{}_{}.bgz".format(hist_year, 'm' if int(hist_year) > 2014 else '', hist_date) - - print ("Visualizing historical {} from {}".format(symbol, hist_file)) - df_hist = read_historical_trades(hist_file, symbol) +if PRINT_HISTORICAL: + hist_date = pd.to_datetime(df_sim.index[0]) + hist_year = hist_date.strftime("%Y") + hist_date = hist_date.strftime("%Y%m%d") + hist_file = "/nethome/cb107/emh/data/trades/trades_{}/ct{}_{}.bgz".format( + hist_year, "m" if int(hist_year) > 2014 else "", hist_date + ) -plt.rcParams.update({'font.size': 12}) + print("Visualizing historical {} from {}".format(symbol, hist_file)) + df_hist = read_historical_trades(hist_file, symbol) +plt.rcParams.update({"font.size": 12}) # Use to restrict time to plot. df_sim = df_sim.between_time(BETWEEN_START, BETWEEN_END) -print ("Total simulated volume:", df_sim['SIZE'].sum()) +print("Total simulated volume:", df_sim["SIZE"].sum()) if PRINT_BASELINE: - df_baseline = df_baseline.between_time(BETWEEN_START, BETWEEN_END) - print ("Total baseline volume:", df_baseline['SIZE'].sum()) + df_baseline = df_baseline.between_time(BETWEEN_START, BETWEEN_END) + print("Total baseline volume:", df_baseline["SIZE"].sum()) if PRINT_VOLUME: - fig,axes = plt.subplots(figsize=(12,9), nrows=2, ncols=1) + fig, axes = plt.subplots(figsize=(12, 9), nrows=2, ncols=1) else: - fig,ax = plt.subplots(figsize=(12,9), nrows=1, ncols=1) - axes = [ax] + fig, ax = plt.subplots(figsize=(12, 9), nrows=1, ncols=1) + axes = [ax] # Crop figures to desired times and price scales. -#df_hist = df_hist.between_time('9:46', '13:30') +# df_hist = df_hist.between_time('9:46', '13:30') # For smoothing... -#hist_window = 100 -#sim_window = 100 +# hist_window = 100 +# sim_window = 100 hist_window = 1 sim_window = 1 if PRINT_HISTORICAL: - df_hist = df_hist.between_time(BETWEEN_START, BETWEEN_END) - print ("Total historical volume:", df_hist['SIZE'].sum()) + df_hist = df_hist.between_time(BETWEEN_START, BETWEEN_END) + print("Total historical volume:", df_hist["SIZE"].sum()) - df_hist['PRICE'] = df_hist['PRICE'].rolling(window=hist_window).mean() - df_sim['PRICE'] = df_sim['PRICE'].rolling(window=sim_window).mean() + df_hist["PRICE"] = df_hist["PRICE"].rolling(window=hist_window).mean() + df_sim["PRICE"] = df_sim["PRICE"].rolling(window=sim_window).mean() - df_hist['PRICE'].plot(color='C0', grid=True, linewidth=LW, ax=axes[0]) - df_sim['PRICE'].plot(color='C1', grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) - axes[0].legend(['Historical', 'Simulated']) + df_hist["PRICE"].plot(color="C0", grid=True, linewidth=LW, ax=axes[0]) + df_sim["PRICE"].plot(color="C1", grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) + axes[0].legend(["Historical", "Simulated"]) - if PRINT_VOLUME: - df_hist['SIZE'].plot(color='C0', linewidth=LW, ax=axes[1]) - df_sim['SIZE'].plot(color='C1', linewidth=LW, alpha=0.9, ax=axes[1]) - axes[1].legend(['Historical Vol', 'Simulated Vol']) + if PRINT_VOLUME: + df_hist["SIZE"].plot(color="C0", linewidth=LW, ax=axes[1]) + df_sim["SIZE"].plot(color="C1", linewidth=LW, alpha=0.9, ax=axes[1]) + axes[1].legend(["Historical Vol", "Simulated Vol"]) elif PRINT_BASELINE: - # For nanosecond experiments, turn it into int index. Pandas gets weird if all - # the times vary only by a few nanoseconds. - rng = pd.date_range(start=df_sim.index[0], end=df_sim.index[-1], freq='1N') + # For nanosecond experiments, turn it into int index. Pandas gets weird if all + # the times vary only by a few nanoseconds. + rng = pd.date_range(start=df_sim.index[0], end=df_sim.index[-1], freq="1N") - df_baseline = df_baseline[~df_baseline.index.duplicated(keep='last')] - df_baseline = df_baseline.reindex(rng,method='ffill') - df_baseline = df_baseline.reset_index(drop=True) + df_baseline = df_baseline[~df_baseline.index.duplicated(keep="last")] + df_baseline = df_baseline.reindex(rng, method="ffill") + df_baseline = df_baseline.reset_index(drop=True) - df_sim = df_sim[~df_sim.index.duplicated(keep='last')] - df_sim = df_sim.reindex(rng,method='ffill') - df_sim = df_sim.reset_index(drop=True) + df_sim = df_sim[~df_sim.index.duplicated(keep="last")] + df_sim = df_sim.reindex(rng, method="ffill") + df_sim = df_sim.reset_index(drop=True) - df_baseline['PRICE'].plot(color='C0', grid=True, linewidth=LW, ax=axes[0]) - df_sim['PRICE'].plot(color='C1', grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) + df_baseline["PRICE"].plot(color="C0", grid=True, linewidth=LW, ax=axes[0]) + df_sim["PRICE"].plot(color="C1", grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) - axes[0].legend(['Baseline', 'With Impact']) + axes[0].legend(["Baseline", "With Impact"]) else: - #df_sim['PRICE'] = df_sim['PRICE'].rolling(window=sim_window).mean() + # df_sim['PRICE'] = df_sim['PRICE'].rolling(window=sim_window).mean() - # For nanosecond experiments, turn it into int index. Pandas gets weird if all - # the times vary only by a few nanoseconds. + # For nanosecond experiments, turn it into int index. Pandas gets weird if all + # the times vary only by a few nanoseconds. - # Frequency needs to be a CLI arg. - #rng = pd.date_range(start=df_sim.index[0], end=df_sim.index[-1], freq='1N') - #rng = pd.date_range(start=df_sim.index[0], end=df_sim.index[-1], freq='1S') + # Frequency needs to be a CLI arg. + # rng = pd.date_range(start=df_sim.index[0], end=df_sim.index[-1], freq='1N') + # rng = pd.date_range(start=df_sim.index[0], end=df_sim.index[-1], freq='1S') - # Resample obviates this need. - #df_sim = df_sim[~df_sim.index.duplicated(keep='last')] - #df_sim = df_sim.resample('1S').mean() + # Resample obviates this need. + # df_sim = df_sim[~df_sim.index.duplicated(keep='last')] + # df_sim = df_sim.resample('1S').mean() - # When printing volume, we'll need to split series, because price can be mean - # (or avg share price) but volume should be sum. + # When printing volume, we'll need to split series, because price can be mean + # (or avg share price) but volume should be sum. - #df_sim = df_sim.reindex(rng,method='ffill') - #df_sim = df_sim.reset_index(drop=True) - df_sim['PRICE'].plot(color='C1', grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) - axes[0].legend(['Simulated']) + # df_sim = df_sim.reindex(rng,method='ffill') + # df_sim = df_sim.reset_index(drop=True) + df_sim["PRICE"].plot(color="C1", grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) + axes[0].legend(["Simulated"]) - if PRINT_VOLUME: - df_sim['SIZE'].plot(color='C1', linewidth=LW, alpha=0.9, ax=axes[1]) - axes[1].legend(['Simulated Vol']) + if PRINT_VOLUME: + df_sim["SIZE"].plot(color="C1", linewidth=LW, alpha=0.9, ax=axes[1]) + axes[1].legend(["Simulated Vol"]) # Superimpose a particular trading agent's trade decisions on top of the ticker # plot to make it easy to visually see if it is making sensible choices. if agent_log: - df_agent = pd.read_pickle(agent_log, compression='bz2') - df_agent = df_agent.between_time(BETWEEN_START, BETWEEN_END) - df_agent = df_agent[df_agent.EventType == 'HOLDINGS_UPDATED'] - - first = True - - for idx in df_agent.index: - event = df_agent.loc[idx,'Event'] - if symbol in event: - shares = event[symbol] - if shares > 0: - print ("LONG at {}".format(idx)) - axes[0].axvline(x=idx, linewidth=LW, color='g') - elif shares < 0: - print ("SHORT at {}".format(idx)) - axes[0].axvline(x=idx, linewidth=LW, color='r') - else: - print ("EXIT at {}".format(idx)) - axes[0].axvline(x=idx, linewidth=LW, color='k') - else: - print ("EXIT at {}".format(idx)) - axes[0].axvline(x=idx, linewidth=LW, color='k') - -plt.suptitle('Execution Price/Volume: {}'.format(symbol)) - -axes[0].set_ylabel('Executed Price') + df_agent = pd.read_pickle(agent_log, compression="bz2") + df_agent = df_agent.between_time(BETWEEN_START, BETWEEN_END) + df_agent = df_agent[df_agent.EventType == "HOLDINGS_UPDATED"] + + first = True + + for idx in df_agent.index: + event = df_agent.loc[idx, "Event"] + if symbol in event: + shares = event[symbol] + if shares > 0: + print("LONG at {}".format(idx)) + axes[0].axvline(x=idx, linewidth=LW, color="g") + elif shares < 0: + print("SHORT at {}".format(idx)) + axes[0].axvline(x=idx, linewidth=LW, color="r") + else: + print("EXIT at {}".format(idx)) + axes[0].axvline(x=idx, linewidth=LW, color="k") + else: + print("EXIT at {}".format(idx)) + axes[0].axvline(x=idx, linewidth=LW, color="k") + +plt.suptitle("Execution Price/Volume: {}".format(symbol)) + +axes[0].set_ylabel("Executed Price") if PRINT_VOLUME: - axes[1].set_xlabel('Execution Time') - axes[1].set_ylabel('Executed Volume') - axes[0].get_xaxis().set_visible(False) + axes[1].set_xlabel("Execution Time") + axes[1].set_ylabel("Executed Volume") + axes[0].get_xaxis().set_visible(False) else: - axes[0].set_xlabel('Execution Time') + axes[0].set_xlabel("Execution Time") -#plt.savefig('background_{}.png'.format(b)) +# plt.savefig('background_{}.png'.format(b)) plt.show() - diff --git a/cli/stats.py b/cli/stats.py index 8df158194..3350d0fcd 100644 --- a/cli/stats.py +++ b/cli/stats.py @@ -1,15 +1,16 @@ import os -import pandas as pd import sys +import pandas as pd + # Auto-detect terminal width. pd.options.display.width = None pd.options.display.max_rows = 500000 pd.options.display.max_colwidth = 200 if len(sys.argv) < 2: - print ("Usage: python dump.py ") - sys.exit() + print("Usage: python dump.py ") + sys.exit() # stats.py takes one or more log directories, reads the summary log files, and produces a summary of @@ -26,57 +27,65 @@ dir_count = 0 for log_dir in log_dirs: - if dir_count % 100 == 0: print ("Completed {} directories".format(dir_count)) - dir_count += 1 - for file in os.listdir(log_dir): - if 'summary' not in file: continue + if dir_count % 100 == 0: + print("Completed {} directories".format(dir_count)) + dir_count += 1 + for file in os.listdir(log_dir): + if "summary" not in file: + continue + + df = pd.read_pickle(os.path.join(log_dir, file), compression="bz2") + + events = [ + "STARTING_CASH", + "ENDING_CASH", + "FINAL_CASH_POSITION", + "FINAL_VALUATION", + ] + event = "|".join(events) + df = df[df["EventType"].str.contains(event)] + + for x in df.itertuples(): + id = x.AgentID + if id not in agents: + agents[id] = {"AGENT_TYPE": x.AgentStrategy} + agents[id][x.EventType] = x.Event - df = pd.read_pickle(os.path.join(log_dir,file), compression='bz2') - - events = [ 'STARTING_CASH', 'ENDING_CASH', 'FINAL_CASH_POSITION', 'FINAL_VALUATION' ] - event = "|".join(events) - df = df[df['EventType'].str.contains(event)] - - for x in df.itertuples(): - id = x.AgentID - if id not in agents: - agents[id] = { 'AGENT_TYPE' : x.AgentStrategy } - agents[id][x.EventType] = x.Event + game_ret = 0 + game_surp = 0 - game_ret = 0 - game_surp = 0 + for id, agent in agents.items(): + at = agent["AGENT_TYPE"] - for id, agent in agents.items(): - at = agent['AGENT_TYPE'] + if "Impact" in at: + continue - if 'Impact' in at: continue + sc = agent["STARTING_CASH"] + ec = agent["ENDING_CASH"] + fcp = agent["FINAL_CASH_POSITION"] + fv = agent["FINAL_VALUATION"] - sc = agent['STARTING_CASH'] - ec = agent['ENDING_CASH'] - fcp = agent['FINAL_CASH_POSITION'] - fv = agent['FINAL_VALUATION'] - - ret = ec - sc - surp = fcp - sc + fv + ret = ec - sc + surp = fcp - sc + fv - game_ret += ret - game_surp += surp + game_ret += ret + game_surp += surp - stats.append({ 'AgentType' : at, 'Return' : ret, 'Surplus' : surp }) + stats.append({"AgentType": at, "Return": ret, "Surplus": surp}) - games.append({ 'GameReturn' : game_ret, 'GameSurplus' : game_surp }) + games.append({"GameReturn": game_ret, "GameSurplus": game_surp}) df_stats = pd.DataFrame(stats) df_game = pd.DataFrame(games) -print ("Agent Mean") -print (df_stats.groupby('AgentType').mean()) -print ("Agent Std") -print (df_stats.groupby('AgentType').std()) -print ("Game Mean") -print (df_game.mean()) -print ("Game Std") -print (df_game.std()) +print("Agent Mean") +print(df_stats.groupby("AgentType").mean()) +print("Agent Std") +print(df_stats.groupby("AgentType").std()) +print("Game Mean") +print(df_game.mean()) +print("Game Std") +print(df_game.std()) -print ("\nRead summary files in {} log directories.".format(dir_count)) +print("\nRead summary files in {} log directories.".format(dir_count)) diff --git a/cli/ticker_plot.py b/cli/ticker_plot.py index 1bd63f807..c857e125a 100644 --- a/cli/ticker_plot.py +++ b/cli/ticker_plot.py @@ -1,11 +1,13 @@ import ast + import matplotlib -matplotlib.use('TkAgg') -import matplotlib.pyplot as plt -import pandas as pd + +matplotlib.use("TkAgg") import os import sys +import matplotlib.pyplot as plt +import pandas as pd from joblib import Memory # Auto-detect terminal width. @@ -14,8 +16,8 @@ pd.options.display.max_colwidth = 200 # Initialize a persistent memcache. -mem_hist = Memory(cachedir='./.cached_plot_hist', verbose=0) -mem_sim = Memory(cachedir='./.cached_plot_sim', verbose=0) +mem_hist = Memory(cachedir="./.cached_plot_hist", verbose=0) +mem_sim = Memory(cachedir="./.cached_plot_sim", verbose=0) # We could use some good argparse parameters here instead of # a bunch of constants to fiddle with. @@ -23,50 +25,51 @@ PRINT_BASELINE = False PRINT_VOLUME = False -BETWEEN_START = pd.to_datetime('09:30').time() -BETWEEN_END = pd.to_datetime('09:30:00.000001').time() +BETWEEN_START = pd.to_datetime("09:30").time() +BETWEEN_END = pd.to_datetime("09:30:00.000001").time() # Linewidth for plots. LW = 2 + # Used to read and cache real historical trades. -#@mem_hist.cache -def read_historical_trades (file, symbol): - print ("Historical trades were not cached. This will take a minute.") - df = pd.read_pickle(file, compression='bz2') +# @mem_hist.cache +def read_historical_trades(file, symbol): + print("Historical trades were not cached. This will take a minute.") + df = pd.read_pickle(file, compression="bz2") - df = df.loc[symbol] - df = df.between_time('9:30', '16:00') + df = df.loc[symbol] + df = df.between_time("9:30", "16:00") - return df + return df # Used to read and cache simulated trades. # Doesn't actually pay attention to symbols yet. -#@mem_sim.cache -def read_simulated_trades (file, symbol): - print ("Simulated trades were not cached. This will take a minute.") - df = pd.read_pickle(file, compression='bz2') - df = df[df['EventType'] == 'LAST_TRADE'] - - if len(df) <= 0: - print ("There appear to be no simulated trades.") - sys.exit() +# @mem_sim.cache +def read_simulated_trades(file, symbol): + print("Simulated trades were not cached. This will take a minute.") + df = pd.read_pickle(file, compression="bz2") + df = df[df["EventType"] == "LAST_TRADE"] + + if len(df) <= 0: + print("There appear to be no simulated trades.") + sys.exit() - df['PRICE'] = [y for x,y in df['Event'].str.split(',')] - df['SIZE'] = [x for x,y in df['Event'].str.split(',')] + df["PRICE"] = [y for x, y in df["Event"].str.split(",")] + df["SIZE"] = [x for x, y in df["Event"].str.split(",")] - df['PRICE'] = df['PRICE'].str.replace('$','').astype('float64') - df['SIZE'] = df['SIZE'].astype('float64') + df["PRICE"] = df["PRICE"].str.replace("$", "").astype("float64") + df["SIZE"] = df["SIZE"].astype("float64") - return df + return df # Main program starts here. if len(sys.argv) < 3: - print ("Usage: python ticker_plot.py [agent trade log]") - sys.exit() + print("Usage: python ticker_plot.py [agent trade log]") + sys.exit() # TODO: only really works for one symbol right now. @@ -74,143 +77,144 @@ def read_simulated_trades (file, symbol): sim_file = sys.argv[2] agent_log = None -if len(sys.argv) >= 4: agent_log = sys.argv[3] +if len(sys.argv) >= 4: + agent_log = sys.argv[3] -print ("Visualizing simulated {} from {}".format(symbol, sim_file)) +print("Visualizing simulated {} from {}".format(symbol, sim_file)) df_sim = read_simulated_trades(sim_file, symbol) if PRINT_BASELINE: - baseline_file = os.path.join(os.path.dirname(sim_file) + '_baseline', os.path.basename(sim_file)) - print (baseline_file) - df_baseline = read_simulated_trades(baseline_file, symbol) + baseline_file = os.path.join(os.path.dirname(sim_file) + "_baseline", os.path.basename(sim_file)) + print(baseline_file) + df_baseline = read_simulated_trades(baseline_file, symbol) # Take the date from the first index and use that to pick the correct historical date for comparison. -if PRINT_HISTORICAL: - hist_date = pd.to_datetime(df_sim.index[0]) - hist_year = hist_date.strftime('%Y') - hist_date = hist_date.strftime('%Y%m%d') - hist_file = "/nethome/cb107/emh/data/trades/trades_{}/ct{}_{}.bgz".format(hist_year, 'm' if int(hist_year) > 2014 else '', hist_date) - - print ("Visualizing historical {} from {}".format(symbol, hist_file)) - df_hist = read_historical_trades(hist_file, symbol) +if PRINT_HISTORICAL: + hist_date = pd.to_datetime(df_sim.index[0]) + hist_year = hist_date.strftime("%Y") + hist_date = hist_date.strftime("%Y%m%d") + hist_file = "/nethome/cb107/emh/data/trades/trades_{}/ct{}_{}.bgz".format( + hist_year, "m" if int(hist_year) > 2014 else "", hist_date + ) -plt.rcParams.update({'font.size': 12}) + print("Visualizing historical {} from {}".format(symbol, hist_file)) + df_hist = read_historical_trades(hist_file, symbol) +plt.rcParams.update({"font.size": 12}) # Use to restrict time to plot. df_sim = df_sim.between_time(BETWEEN_START, BETWEEN_END) -print ("Total simulated volume:", df_sim['SIZE'].sum()) +print("Total simulated volume:", df_sim["SIZE"].sum()) if PRINT_BASELINE: - df_baseline = df_baseline.between_time(BETWEEN_START, BETWEEN_END) - print ("Total baseline volume:", df_baseline['SIZE'].sum()) + df_baseline = df_baseline.between_time(BETWEEN_START, BETWEEN_END) + print("Total baseline volume:", df_baseline["SIZE"].sum()) if PRINT_VOLUME: - fig,axes = plt.subplots(figsize=(12,9), nrows=2, ncols=1) + fig, axes = plt.subplots(figsize=(12, 9), nrows=2, ncols=1) else: - fig,ax = plt.subplots(figsize=(12,9), nrows=1, ncols=1) - axes = [ax] + fig, ax = plt.subplots(figsize=(12, 9), nrows=1, ncols=1) + axes = [ax] # Crop figures to desired times and price scales. -#df_hist = df_hist.between_time('9:46', '13:30') +# df_hist = df_hist.between_time('9:46', '13:30') # For smoothing... -#hist_window = 100 -#sim_window = 100 +# hist_window = 100 +# sim_window = 100 hist_window = 1 sim_window = 1 if PRINT_HISTORICAL: - df_hist = df_hist.between_time(BETWEEN_START, BETWEEN_END) - print ("Total historical volume:", df_hist['SIZE'].sum()) + df_hist = df_hist.between_time(BETWEEN_START, BETWEEN_END) + print("Total historical volume:", df_hist["SIZE"].sum()) - df_hist['PRICE'] = df_hist['PRICE'].rolling(window=hist_window).mean() - df_sim['PRICE'] = df_sim['PRICE'].rolling(window=sim_window).mean() + df_hist["PRICE"] = df_hist["PRICE"].rolling(window=hist_window).mean() + df_sim["PRICE"] = df_sim["PRICE"].rolling(window=sim_window).mean() - df_hist['PRICE'].plot(color='C0', grid=True, linewidth=LW, ax=axes[0]) - df_sim['PRICE'].plot(color='C1', grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) - axes[0].legend(['Historical', 'Simulated']) + df_hist["PRICE"].plot(color="C0", grid=True, linewidth=LW, ax=axes[0]) + df_sim["PRICE"].plot(color="C1", grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) + axes[0].legend(["Historical", "Simulated"]) - if PRINT_VOLUME: - df_hist['SIZE'].plot(color='C0', linewidth=LW, ax=axes[1]) - df_sim['SIZE'].plot(color='C1', linewidth=LW, alpha=0.9, ax=axes[1]) - axes[1].legend(['Historical Vol', 'Simulated Vol']) + if PRINT_VOLUME: + df_hist["SIZE"].plot(color="C0", linewidth=LW, ax=axes[1]) + df_sim["SIZE"].plot(color="C1", linewidth=LW, alpha=0.9, ax=axes[1]) + axes[1].legend(["Historical Vol", "Simulated Vol"]) elif PRINT_BASELINE: - # For nanosecond experiments, turn it into int index. Pandas gets weird if all - # the times vary only by a few nanoseconds. - rng = pd.date_range(start=df_sim.index[0], end=df_sim.index[-1], freq='1N') + # For nanosecond experiments, turn it into int index. Pandas gets weird if all + # the times vary only by a few nanoseconds. + rng = pd.date_range(start=df_sim.index[0], end=df_sim.index[-1], freq="1N") - df_baseline = df_baseline[~df_baseline.index.duplicated(keep='last')] - df_baseline = df_baseline.reindex(rng,method='ffill') - df_baseline = df_baseline.reset_index(drop=True) + df_baseline = df_baseline[~df_baseline.index.duplicated(keep="last")] + df_baseline = df_baseline.reindex(rng, method="ffill") + df_baseline = df_baseline.reset_index(drop=True) - df_sim = df_sim[~df_sim.index.duplicated(keep='last')] - df_sim = df_sim.reindex(rng,method='ffill') - df_sim = df_sim.reset_index(drop=True) + df_sim = df_sim[~df_sim.index.duplicated(keep="last")] + df_sim = df_sim.reindex(rng, method="ffill") + df_sim = df_sim.reset_index(drop=True) - df_baseline['PRICE'].plot(color='C0', grid=True, linewidth=LW, ax=axes[0]) - df_sim['PRICE'].plot(color='C1', grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) + df_baseline["PRICE"].plot(color="C0", grid=True, linewidth=LW, ax=axes[0]) + df_sim["PRICE"].plot(color="C1", grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) - axes[0].legend(['Baseline', 'With Impact']) + axes[0].legend(["Baseline", "With Impact"]) else: - #df_sim['PRICE'] = df_sim['PRICE'].rolling(window=sim_window).mean() + # df_sim['PRICE'] = df_sim['PRICE'].rolling(window=sim_window).mean() - # For nanosecond experiments, turn it into int index. Pandas gets weird if all - # the times vary only by a few nanoseconds. + # For nanosecond experiments, turn it into int index. Pandas gets weird if all + # the times vary only by a few nanoseconds. - rng = pd.date_range(start=df_sim.index[0], end=df_sim.index[-1], freq='1N') - df_sim = df_sim[~df_sim.index.duplicated(keep='last')] - df_sim = df_sim.reindex(rng,method='ffill') - df_sim = df_sim.reset_index(drop=True) - df_sim['PRICE'].plot(color='C1', grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) - axes[0].legend(['Simulated']) + rng = pd.date_range(start=df_sim.index[0], end=df_sim.index[-1], freq="1N") + df_sim = df_sim[~df_sim.index.duplicated(keep="last")] + df_sim = df_sim.reindex(rng, method="ffill") + df_sim = df_sim.reset_index(drop=True) + df_sim["PRICE"].plot(color="C1", grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) + axes[0].legend(["Simulated"]) - if PRINT_VOLUME: - df_sim['SIZE'].plot(color='C1', linewidth=LW, alpha=0.9, ax=axes[1]) - axes[1].legend(['Simulated Vol']) + if PRINT_VOLUME: + df_sim["SIZE"].plot(color="C1", linewidth=LW, alpha=0.9, ax=axes[1]) + axes[1].legend(["Simulated Vol"]) # Superimpose a particular trading agent's trade decisions on top of the ticker # plot to make it easy to visually see if it is making sensible choices. if agent_log: - df_agent = pd.read_pickle(agent_log, compression='bz2') - df_agent = df_agent.between_time(BETWEEN_START, BETWEEN_END) - df_agent = df_agent[df_agent.EventType == 'HOLDINGS_UPDATED'] - - first = True - - for idx in df_agent.index: - event = df_agent.loc[idx,'Event'] - if symbol in event: - shares = event[symbol] - if shares > 0: - print ("LONG at {}".format(idx)) - axes[0].axvline(x=idx, linewidth=LW, color='g') - elif shares < 0: - print ("SHORT at {}".format(idx)) - axes[0].axvline(x=idx, linewidth=LW, color='r') - else: - print ("EXIT at {}".format(idx)) - axes[0].axvline(x=idx, linewidth=LW, color='k') - else: - print ("EXIT at {}".format(idx)) - axes[0].axvline(x=idx, linewidth=LW, color='k') - -plt.suptitle('Execution Price/Volume: {}'.format(symbol)) - -axes[0].set_ylabel('Executed Price') + df_agent = pd.read_pickle(agent_log, compression="bz2") + df_agent = df_agent.between_time(BETWEEN_START, BETWEEN_END) + df_agent = df_agent[df_agent.EventType == "HOLDINGS_UPDATED"] + + first = True + + for idx in df_agent.index: + event = df_agent.loc[idx, "Event"] + if symbol in event: + shares = event[symbol] + if shares > 0: + print("LONG at {}".format(idx)) + axes[0].axvline(x=idx, linewidth=LW, color="g") + elif shares < 0: + print("SHORT at {}".format(idx)) + axes[0].axvline(x=idx, linewidth=LW, color="r") + else: + print("EXIT at {}".format(idx)) + axes[0].axvline(x=idx, linewidth=LW, color="k") + else: + print("EXIT at {}".format(idx)) + axes[0].axvline(x=idx, linewidth=LW, color="k") + +plt.suptitle("Execution Price/Volume: {}".format(symbol)) + +axes[0].set_ylabel("Executed Price") if PRINT_VOLUME: - axes[1].set_xlabel('Execution Time') - axes[1].set_ylabel('Executed Volume') - axes[0].get_xaxis().set_visible(False) + axes[1].set_xlabel("Execution Time") + axes[1].set_ylabel("Executed Volume") + axes[0].get_xaxis().set_visible(False) else: - axes[0].set_xlabel('Execution Time') + axes[0].set_xlabel("Execution Time") -#plt.savefig('background_{}.png'.format(b)) +# plt.savefig('background_{}.png'.format(b)) plt.show() - diff --git a/config/execution.py b/config/execution.py index 9876fc442..9b13825c0 100644 --- a/config/execution.py +++ b/config/execution.py @@ -1,68 +1,65 @@ import argparse +import datetime as dt +import sys + import numpy as np import pandas as pd -import sys -import datetime as dt from dateutil.parser import parse -from Kernel import Kernel -from util import util -from util.order import LimitOrder -from util.oracle.SparseMeanRevertingOracle import SparseMeanRevertingOracle -from util.oracle.ExternalFileOracle import ExternalFileOracle -from model.LatencyModel import LatencyModel - -from agent.ExchangeAgent import ExchangeAgent -from agent.NoiseAgent import NoiseAgent -from agent.ValueAgent import ValueAgent -from agent.market_makers.AdaptiveMarketMakerAgent import AdaptiveMarketMakerAgent from agent.examples.MomentumAgent import MomentumAgent - +from agent.ExchangeAgent import ExchangeAgent +from agent.execution.POVExecutionAgent import POVExecutionAgent from agent.execution.TWAPExecutionAgent import TWAPExecutionAgent from agent.execution.VWAPExecutionAgent import VWAPExecutionAgent -from agent.execution.POVExecutionAgent import POVExecutionAgent +from agent.market_makers.AdaptiveMarketMakerAgent import AdaptiveMarketMakerAgent +from agent.NoiseAgent import NoiseAgent +from agent.ValueAgent import ValueAgent +from Kernel import Kernel +from model.LatencyModel import LatencyModel +from util import util +from util.oracle.ExternalFileOracle import ExternalFileOracle +from util.oracle.SparseMeanRevertingOracle import SparseMeanRevertingOracle +from util.order import LimitOrder ######################################################################################################################## ############################################### GENERAL CONFIG ######################################################### -parser = argparse.ArgumentParser(description='Detailed options for the config.') - -parser.add_argument('-c', - '--config', - required=True, - help='Name of config file to execute') -parser.add_argument('-t', - '--ticker', - required=True, - help='Ticker (symbol) to use for simulation') -parser.add_argument('-d', '--historical-date', - required=True, - type=parse, - help='historical date being simulated in format YYYYMMDD.') -parser.add_argument('-f', - '--fundamental-file-path', - required=False, - help="Path to external fundamental file.") -parser.add_argument('-e', - '--execution_agents', - action='store_true', - help='Flag to add the execution agents') -parser.add_argument('-s', - '--seed', - type=int, - default=None, - help='numpy.random.seed() for simulation') -parser.add_argument('-l', - '--log_dir', - default=None, - help='Log directory name (default: unix timestamp at program start)') -parser.add_argument('-v', - '--verbose', - action='store_true', - help='Maximum verbosity!') -parser.add_argument('--config_help', - action='store_true', - help='Print argument options for this config file') +parser = argparse.ArgumentParser(description="Detailed options for the config.") + +parser.add_argument("-c", "--config", required=True, help="Name of config file to execute") +parser.add_argument("-t", "--ticker", required=True, help="Ticker (symbol) to use for simulation") +parser.add_argument( + "-d", + "--historical-date", + required=True, + type=parse, + help="historical date being simulated in format YYYYMMDD.", +) +parser.add_argument( + "-f", + "--fundamental-file-path", + required=False, + help="Path to external fundamental file.", +) +parser.add_argument( + "-e", + "--execution_agents", + action="store_true", + help="Flag to add the execution agents", +) +parser.add_argument("-s", "--seed", type=int, default=None, help="numpy.random.seed() for simulation") +parser.add_argument( + "-l", + "--log_dir", + default=None, + help="Log directory name (default: unix timestamp at program start)", +) +parser.add_argument("-v", "--verbose", action="store_true", help="Maximum verbosity!") +parser.add_argument( + "--config_help", + action="store_true", + help="Print argument options for this config file", +) args, remaining_args = parser.parse_known_args() @@ -71,7 +68,8 @@ sys.exit() seed = args.seed # Random seed specification on the command line. -if not seed: seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2 ** 32 - 1) +if not seed: + seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) np.random.seed(seed) util.silent_mode = not args.verbose @@ -85,8 +83,8 @@ # Historical date to simulate. historical_date = pd.to_datetime(args.historical_date) -mkt_open = historical_date + pd.to_timedelta('09:30:00') -mkt_close = historical_date + pd.to_timedelta('11:30:00') +mkt_open = historical_date + pd.to_timedelta("09:30:00") +mkt_close = historical_date + pd.to_timedelta("11:30:00") agent_count, agents, agent_types = 0, [], [] @@ -98,14 +96,18 @@ # (1) Sparse Mean Reverting Oracle r_bar = 1e5 -symbols = {symbol: {'r_bar': r_bar, - 'kappa': 1.67e-16, - 'sigma_s': 0, - 'fund_vol': 1e-8, # volatility of fundamental time series. - 'megashock_lambda_a': 2.77778e-18, - 'megashock_mean': 1e3, - 'megashock_var': 5e4, - 'random_state': np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32))}} +symbols = { + symbol: { + "r_bar": r_bar, + "kappa": 1.67e-16, + "sigma_s": 0, + "fund_vol": 1e-8, # volatility of fundamental time series. + "megashock_lambda_a": 2.77778e-18, + "megashock_mean": 1e3, + "megashock_var": 5e4, + "random_state": np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), + } +} oracle = SparseMeanRevertingOracle(mkt_open, mkt_close, symbols) @@ -128,19 +130,25 @@ # How many orders in the past to store for transacted volume computation # stream_history_length = int(pd.to_timedelta(args.mm_wake_up_freq).total_seconds() * 100) stream_history_length = 25000 -agents.extend([ExchangeAgent(id=0, - name="ExchangeAgent", - type="ExchangeAgent", - mkt_open=mkt_open, - mkt_close=mkt_close, - symbols=[symbol], - log_orders=True, - pipeline_delay=0, - computation_delay=0, - stream_history=stream_history_length, - book_freq=0, - wide_book=True, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32)))]) +agents.extend( + [ + ExchangeAgent( + id=0, + name="ExchangeAgent", + type="ExchangeAgent", + mkt_open=mkt_open, + mkt_close=mkt_close, + symbols=[symbol], + log_orders=True, + pipeline_delay=0, + computation_delay=0, + stream_history=stream_history_length, + book_freq=0, + wide_book=True, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), + ) + ] +) agent_types.extend("ExchangeAgent") agent_count += 1 @@ -148,36 +156,48 @@ num_noise = 5000 noise_mkt_open = historical_date + pd.to_timedelta("09:00:00") noise_mkt_close = historical_date + pd.to_timedelta("16:00:00") -agents.extend([NoiseAgent(id=j, - name="NoiseAgent_{}".format(j), - type="NoiseAgent", - symbol=symbol, - starting_cash=starting_cash, - wakeup_time=util.get_wake_time(noise_mkt_open, noise_mkt_close), - log_orders=False, - log_to_file=False, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32))) - for j in range(agent_count, agent_count + num_noise)]) +agents.extend( + [ + NoiseAgent( + id=j, + name="NoiseAgent_{}".format(j), + type="NoiseAgent", + symbol=symbol, + starting_cash=starting_cash, + wakeup_time=util.get_wake_time(noise_mkt_open, noise_mkt_close), + log_orders=False, + log_to_file=False, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), + ) + for j in range(agent_count, agent_count + num_noise) + ] +) agent_count += num_noise -agent_types.extend(['NoiseAgent']) +agent_types.extend(["NoiseAgent"]) # 3) Value Agents num_value = 100 -agents.extend([ValueAgent(id=j, - name="ValueAgent_{}".format(j), - type="ValueAgent", - symbol=symbol, - starting_cash=starting_cash, - sigma_n=r_bar / 10, - r_bar=r_bar, - kappa=1.67e-15, - lambda_a=7e-11, - log_orders=False, - log_to_file=False, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32))) - for j in range(agent_count, agent_count + num_value)]) +agents.extend( + [ + ValueAgent( + id=j, + name="ValueAgent_{}".format(j), + type="ValueAgent", + symbol=symbol, + starting_cash=starting_cash, + sigma_n=r_bar / 10, + r_bar=r_bar, + kappa=1.67e-15, + lambda_a=7e-11, + log_orders=False, + log_to_file=False, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), + ) + for j in range(agent_count, agent_count + num_value) + ] +) agent_count += num_value -agent_types.extend(['ValueAgent']) +agent_types.extend(["ValueAgent"]) # 4) Market Maker Agents @@ -191,45 +211,56 @@ """ # each elem of mm_params is tuple (window_size, pov, num_ticks, wake_up_freq, min_order_size) -mm_params = [('adaptive', 0.025, 10, '10S', 1), - ('adaptive', 0.025, 10, '10S', 1)] +mm_params = [("adaptive", 0.025, 10, "10S", 1), ("adaptive", 0.025, 10, "10S", 1)] num_mm_agents = len(mm_params) -agents.extend([AdaptiveMarketMakerAgent(id=j, - name="AdaptiveMarketMakerAgent_{}".format(j), - type='AdaptiveMarketMakerAgent', - symbol=symbol, - starting_cash=starting_cash, - pov=mm_params[idx][1], - min_order_size=mm_params[idx][4], - window_size=mm_params[idx][0], - num_ticks=mm_params[idx][2], - wake_up_freq=mm_params[idx][3], - cancel_limit_delay=50, - skew_beta=0, - level_spacing=5, - spread_alpha=0.75, - backstop_quantity=50000, - log_orders=True, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32))) - for idx, j in enumerate(range(agent_count, agent_count + num_mm_agents))]) +agents.extend( + [ + AdaptiveMarketMakerAgent( + id=j, + name="AdaptiveMarketMakerAgent_{}".format(j), + type="AdaptiveMarketMakerAgent", + symbol=symbol, + starting_cash=starting_cash, + pov=mm_params[idx][1], + min_order_size=mm_params[idx][4], + window_size=mm_params[idx][0], + num_ticks=mm_params[idx][2], + wake_up_freq=mm_params[idx][3], + cancel_limit_delay=50, + skew_beta=0, + level_spacing=5, + spread_alpha=0.75, + backstop_quantity=50000, + log_orders=True, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), + ) + for idx, j in enumerate(range(agent_count, agent_count + num_mm_agents)) + ] +) agent_count += num_mm_agents -agent_types.extend('AdaptiveMarketMakerAgent') +agent_types.extend("AdaptiveMarketMakerAgent") # 5) Momentum Agents num_momentum_agents = 25 -agents.extend([MomentumAgent(id=j, - name="MOMENTUM_AGENT_{}".format(j), - type="MomentumAgent", - symbol=symbol, - starting_cash=starting_cash, - min_size=1, - max_size=10, - wake_up_freq='20s', - log_orders=True, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32))) - for j in range(agent_count, agent_count + num_momentum_agents)]) +agents.extend( + [ + MomentumAgent( + id=j, + name="MOMENTUM_AGENT_{}".format(j), + type="MomentumAgent", + symbol=symbol, + starting_cash=starting_cash, + min_size=1, + max_size=10, + wake_up_freq="20s", + log_orders=True, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), + ) + for j in range(agent_count, agent_count + num_momentum_agents) + ] +) agent_count += num_momentum_agents agent_types.extend("MomentumAgent") @@ -239,23 +270,28 @@ execution_agent_start_time = historical_date + pd.to_timedelta("10:00:00") execution_agent_end_time = historical_date + pd.to_timedelta("11:00:00") execution_quantity = 12e5 -execution_frequency = '1min' +execution_frequency = "1min" execution_direction = "BUY" -execution_time_horizon = pd.date_range(start=execution_agent_start_time, end=execution_agent_end_time, - freq=execution_frequency) - -twap_agent = TWAPExecutionAgent(id=agent_count, - name='TWAPExecutionAgent', - type='ExecutionAgent', - symbol=symbol, - starting_cash=0, - direction=execution_direction, - quantity=execution_quantity, - execution_time_horizon=execution_time_horizon, - freq=execution_frequency, - trade=trade, - log_orders=True, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32))) +execution_time_horizon = pd.date_range( + start=execution_agent_start_time, + end=execution_agent_end_time, + freq=execution_frequency, +) + +twap_agent = TWAPExecutionAgent( + id=agent_count, + name="TWAPExecutionAgent", + type="ExecutionAgent", + symbol=symbol, + starting_cash=0, + direction=execution_direction, + quantity=execution_quantity, + execution_time_horizon=execution_time_horizon, + freq=execution_frequency, + trade=trade, + log_orders=True, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), +) execution_agents = [twap_agent] """ @@ -299,40 +335,41 @@ ######################################################################################################################## ########################################### KERNEL AND OTHER CONFIG #################################################### -kernel = Kernel("Kernel", random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32))) +kernel = Kernel( + "Kernel", + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), +) kernelStartTime = historical_date -kernelStopTime = mkt_close + pd.to_timedelta('00:01:00') +kernelStopTime = mkt_close + pd.to_timedelta("00:01:00") defaultComputationDelay = 50 # 50 nanoseconds # LATENCY -latency_rstate = np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32)) +latency_rstate = np.random.RandomState(seed=np.random.randint(low=0, high=2**32)) pairwise = (agent_count, agent_count) # All agents sit on line from Seattle to NYC nyc_to_seattle_meters = 3866660 -pairwise_distances = util.generate_uniform_random_pairwise_dist_on_line(0.0, nyc_to_seattle_meters, agent_count, - random_state=latency_rstate) +pairwise_distances = util.generate_uniform_random_pairwise_dist_on_line( + 0.0, nyc_to_seattle_meters, agent_count, random_state=latency_rstate +) pairwise_latencies = util.meters_to_light_ns(pairwise_distances) -model_args = { - 'connected': True, - 'min_latency': pairwise_latencies -} +model_args = {"connected": True, "min_latency": pairwise_latencies} -latency_model = LatencyModel(latency_model='deterministic', - random_state=latency_rstate, - kwargs=model_args) +latency_model = LatencyModel(latency_model="deterministic", random_state=latency_rstate, kwargs=model_args) # KERNEL -kernel.runner(agents=agents, - startTime=kernelStartTime, - stopTime=kernelStopTime, - agentLatencyModel=latency_model, - defaultComputationDelay=defaultComputationDelay, - oracle=oracle, - log_dir=args.log_dir) +kernel.runner( + agents=agents, + startTime=kernelStartTime, + stopTime=kernelStopTime, + agentLatencyModel=latency_model, + defaultComputationDelay=defaultComputationDelay, + oracle=oracle, + log_dir=args.log_dir, +) simulation_end_time = dt.datetime.now() print("Simulation End Time: {}".format(simulation_end_time)) -print("Time taken to run simulation: {}".format(simulation_end_time - simulation_start_time)) \ No newline at end of file +print("Time taken to run simulation: {}".format(simulation_end_time - simulation_start_time)) diff --git a/config/exp_agent_demo.py b/config/exp_agent_demo.py index 05a3d7a2c..83c075875 100644 --- a/config/exp_agent_demo.py +++ b/config/exp_agent_demo.py @@ -7,87 +7,81 @@ # - 1 (Optional) Example Experimental agent import argparse +import datetime as dt +import sys + import numpy as np import pandas as pd -import sys -import datetime as dt from dateutil.parser import parse -from Kernel import Kernel -from util import util -from util.order import LimitOrder -from util.oracle.SparseMeanRevertingOracle import SparseMeanRevertingOracle - +from agent.examples.ExampleExperimentalAgent import ( + ExampleExperimentalAgent, + ExampleExperimentalAgentTemplate, +) +from agent.examples.MomentumAgent import MomentumAgent from agent.ExchangeAgent import ExchangeAgent +from agent.market_makers.AdaptiveMarketMakerAgent import AdaptiveMarketMakerAgent from agent.NoiseAgent import NoiseAgent from agent.ValueAgent import ValueAgent -from agent.market_makers.AdaptiveMarketMakerAgent import AdaptiveMarketMakerAgent -from agent.examples.MomentumAgent import MomentumAgent -from agent.examples.ExampleExperimentalAgent import ExampleExperimentalAgentTemplate, ExampleExperimentalAgent +from Kernel import Kernel from model.LatencyModel import LatencyModel +from util import util +from util.oracle.SparseMeanRevertingOracle import SparseMeanRevertingOracle +from util.order import LimitOrder ######################################################################################################################## ############################################### GENERAL CONFIG ######################################################### -parser = argparse.ArgumentParser(description='Detailed options for RMSC03 config.') - -parser.add_argument('-c', - '--config', - required=True, - help='Name of config file to execute') -parser.add_argument('-t', - '--ticker', - required=True, - help='Ticker (symbol) to use for simulation') -parser.add_argument('-d', '--historical-date', - required=True, - type=parse, - help='historical date being simulated in format YYYYMMDD.') -parser.add_argument('--start-time', - default='09:30:00', - type=parse, - help='Starting time of simulation.' - ) -parser.add_argument('--end-time', - default='10:30:00', - type=parse, - help='Ending time of simulation.' - ) -parser.add_argument('-l', - '--log_dir', - default=None, - help='Log directory name (default: unix timestamp at program start)') -parser.add_argument('-s', - '--seed', - type=int, - default=None, - help='numpy.random.seed() for simulation') -parser.add_argument('-v', - '--verbose', - action='store_true', - help='Maximum verbosity!') -parser.add_argument('--config_help', - action='store_true', - help='Print argument options for this config file') -parser.add_argument('--fund-vol', - type=float, - default=1e-8, - help='Volatility of fundamental time series.' - ) -parser.add_argument('-e', - '--experimental-agent', - action='store_true', - help='Switch to allow presence of ExampleExperimentalAgent in market') -parser.add_argument('--ea-short-window', - type=pd.to_timedelta, - default='1s', - help='Length of short window for use in experimental agent mean-reversion strategy.' - ) -parser.add_argument('--ea-long-window', - type=pd.to_timedelta, - default='30s', - help='Length of long window for use in experimental agent mean-reversion strategy.' - ) +parser = argparse.ArgumentParser(description="Detailed options for RMSC03 config.") + +parser.add_argument("-c", "--config", required=True, help="Name of config file to execute") +parser.add_argument("-t", "--ticker", required=True, help="Ticker (symbol) to use for simulation") +parser.add_argument( + "-d", + "--historical-date", + required=True, + type=parse, + help="historical date being simulated in format YYYYMMDD.", +) +parser.add_argument("--start-time", default="09:30:00", type=parse, help="Starting time of simulation.") +parser.add_argument("--end-time", default="10:30:00", type=parse, help="Ending time of simulation.") +parser.add_argument( + "-l", + "--log_dir", + default=None, + help="Log directory name (default: unix timestamp at program start)", +) +parser.add_argument("-s", "--seed", type=int, default=None, help="numpy.random.seed() for simulation") +parser.add_argument("-v", "--verbose", action="store_true", help="Maximum verbosity!") +parser.add_argument( + "--config_help", + action="store_true", + help="Print argument options for this config file", +) +parser.add_argument( + "--fund-vol", + type=float, + default=1e-8, + help="Volatility of fundamental time series.", +) +parser.add_argument( + "-e", + "--experimental-agent", + action="store_true", + help="Switch to allow presence of ExampleExperimentalAgent in market", +) +parser.add_argument( + "--ea-short-window", + type=pd.to_timedelta, + default="1s", + help="Length of short window for use in experimental agent mean-reversion strategy.", +) +parser.add_argument( + "--ea-long-window", + type=pd.to_timedelta, + default="30s", + help="Length of long window for use in experimental agent mean-reversion strategy.", +) args, remaining_args = parser.parse_known_args() @@ -97,7 +91,8 @@ log_dir = args.log_dir # Requested log directory. seed = args.seed # Random seed specification on the command line. -if not seed: seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2 ** 32 - 1) +if not seed: + seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) np.random.seed(seed) util.silent_mode = not args.verbose @@ -115,8 +110,8 @@ # Historical date to simulate. historical_date = pd.to_datetime(args.historical_date) -mkt_open = historical_date + pd.to_timedelta(args.start_time.strftime('%H:%M:%S')) -mkt_close = historical_date + pd.to_timedelta(args.end_time.strftime('%H:%M:%S')) +mkt_open = historical_date + pd.to_timedelta(args.start_time.strftime("%H:%M:%S")) +mkt_close = historical_date + pd.to_timedelta(args.end_time.strftime("%H:%M:%S")) agent_count, agents, agent_types = 0, [], [] # Hyperparameters @@ -129,14 +124,18 @@ lambda_a = 7e-11 # Oracle -symbols = {symbol: {'r_bar': r_bar, - 'kappa': 1.67e-16, - 'sigma_s': 0, - 'fund_vol': args.fund_vol, - 'megashock_lambda_a': 2.77778e-18, - 'megashock_mean': 1e3, - 'megashock_var': 5e4, - 'random_state': np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64'))}} +symbols = { + symbol: { + "r_bar": r_bar, + "kappa": 1.67e-16, + "sigma_s": 0, + "fund_vol": args.fund_vol, + "megashock_lambda_a": 2.77778e-18, + "megashock_mean": 1e3, + "megashock_var": 5e4, + "random_state": np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + } +} oracle = SparseMeanRevertingOracle(mkt_open, mkt_close, symbols) @@ -146,55 +145,73 @@ # stream_history_length = int(pd.to_timedelta(args.mm_wake_up_freq).total_seconds() * 100) stream_history_length = 25000 -agents.extend([ExchangeAgent(id=0, - name="EXCHANGE_AGENT", - type="ExchangeAgent", - mkt_open=mkt_open, - mkt_close=mkt_close, - symbols=[symbol], - log_orders=exchange_log_orders, - pipeline_delay=0, - computation_delay=0, - stream_history=stream_history_length, - book_freq=book_freq, - wide_book=True, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64')))]) +agents.extend( + [ + ExchangeAgent( + id=0, + name="EXCHANGE_AGENT", + type="ExchangeAgent", + mkt_open=mkt_open, + mkt_close=mkt_close, + symbols=[symbol], + log_orders=exchange_log_orders, + pipeline_delay=0, + computation_delay=0, + stream_history=stream_history_length, + book_freq=book_freq, + wide_book=True, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + ] +) agent_types.extend("ExchangeAgent") agent_count += 1 # 2) Noise Agents num_noise = 5000 noise_mkt_open = historical_date + pd.to_timedelta("09:00:00") # These times needed for distribution of arrival times - # of Noise Agents +# of Noise Agents noise_mkt_close = historical_date + pd.to_timedelta("16:00:00") -agents.extend([NoiseAgent(id=j, - name="NoiseAgent {}".format(j), - type="NoiseAgent", - symbol=symbol, - starting_cash=starting_cash, - wakeup_time=util.get_wake_time(noise_mkt_open, noise_mkt_close), - log_orders=log_orders, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64'))) - for j in range(agent_count, agent_count + num_noise)]) +agents.extend( + [ + NoiseAgent( + id=j, + name="NoiseAgent {}".format(j), + type="NoiseAgent", + symbol=symbol, + starting_cash=starting_cash, + wakeup_time=util.get_wake_time(noise_mkt_open, noise_mkt_close), + log_orders=log_orders, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_noise) + ] +) agent_count += num_noise -agent_types.extend(['NoiseAgent']) +agent_types.extend(["NoiseAgent"]) # 3) Value Agents num_value = 100 -agents.extend([ValueAgent(id=j, - name="Value Agent {}".format(j), - type="ValueAgent", - symbol=symbol, - starting_cash=starting_cash, - sigma_n=sigma_n, - r_bar=r_bar, - kappa=kappa, - lambda_a=lambda_a, - log_orders=log_orders, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64'))) - for j in range(agent_count, agent_count + num_value)]) +agents.extend( + [ + ValueAgent( + id=j, + name="Value Agent {}".format(j), + type="ValueAgent", + symbol=symbol, + starting_cash=starting_cash, + sigma_n=sigma_n, + r_bar=r_bar, + kappa=kappa, + lambda_a=lambda_a, + log_orders=log_orders, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_value) + ] +) agent_count += num_value -agent_types.extend(['ValueAgent']) +agent_types.extend(["ValueAgent"]) # 4) Market Maker Agents @@ -208,51 +225,59 @@ """ # each elem of mm_params is tuple (window_size, pov, num_ticks, wake_up_freq, min_order_size) -mm_params = [('adaptive', 0.025, 10, '10S', 1), - ('adaptive', 0.025, 10, '10S', 1) - ] +mm_params = [("adaptive", 0.025, 10, "10S", 1), ("adaptive", 0.025, 10, "10S", 1)] num_mm_agents = len(mm_params) mm_cancel_limit_delay = 50 # 50 nanoseconds -agents.extend([AdaptiveMarketMakerAgent(id=j, - name="ADAPTIVE_POV_MARKET_MAKER_AGENT_{}".format(j), - type='AdaptivePOVMarketMakerAgent', - symbol=symbol, - starting_cash=starting_cash, - pov=mm_params[idx][1], - min_order_size=mm_params[idx][4], - window_size=mm_params[idx][0], - num_ticks=mm_params[idx][2], - wake_up_freq=mm_params[idx][3], - cancel_limit_delay=mm_cancel_limit_delay, - skew_beta=0, - level_spacing=5, - spread_alpha=0.75, - backstop_quantity=50000, - log_orders=log_orders, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))) - for idx, j in enumerate(range(agent_count, agent_count + num_mm_agents))]) +agents.extend( + [ + AdaptiveMarketMakerAgent( + id=j, + name="ADAPTIVE_POV_MARKET_MAKER_AGENT_{}".format(j), + type="AdaptivePOVMarketMakerAgent", + symbol=symbol, + starting_cash=starting_cash, + pov=mm_params[idx][1], + min_order_size=mm_params[idx][4], + window_size=mm_params[idx][0], + num_ticks=mm_params[idx][2], + wake_up_freq=mm_params[idx][3], + cancel_limit_delay=mm_cancel_limit_delay, + skew_beta=0, + level_spacing=5, + spread_alpha=0.75, + backstop_quantity=50000, + log_orders=log_orders, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for idx, j in enumerate(range(agent_count, agent_count + num_mm_agents)) + ] +) agent_count += num_mm_agents -agent_types.extend('POVMarketMakerAgent') +agent_types.extend("POVMarketMakerAgent") # 5) Momentum Agents num_momentum_agents = 25 -agents.extend([MomentumAgent(id=j, - name="MOMENTUM_AGENT_{}".format(j), - type="MomentumAgent", - symbol=symbol, - starting_cash=starting_cash, - min_size=1, - max_size=10, - wake_up_freq='20s', - log_orders=log_orders, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))) - for j in range(agent_count, agent_count + num_momentum_agents)]) +agents.extend( + [ + MomentumAgent( + id=j, + name="MOMENTUM_AGENT_{}".format(j), + type="MomentumAgent", + symbol=symbol, + starting_cash=starting_cash, + min_size=1, + max_size=10, + wake_up_freq="20s", + log_orders=log_orders, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_momentum_agents) + ] +) agent_count += num_momentum_agents agent_types.extend("MomentumAgent") @@ -263,30 +288,30 @@ if args.experimental_agent: experimental_agent = ExampleExperimentalAgent( id=agent_count, - name='EXAMPLE_EXPERIMENTAL_AGENT', - type='ExampleExperimentalAgent', + name="EXAMPLE_EXPERIMENTAL_AGENT", + type="ExampleExperimentalAgent", symbol=symbol, starting_cash=starting_cash, levels=5, subscription_freq=1e9, - wake_freq='10s', + wake_freq="10s", order_size=100, short_window=args.ea_short_window, long_window=args.ea_long_window, log_orders=True, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64')) + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), ) else: experimental_agent = ExampleExperimentalAgentTemplate( id=agent_count, - name='EXAMPLE_EXPERIMENTAL_AGENT', - type='ExampleExperimentalAgent', + name="EXAMPLE_EXPERIMENTAL_AGENT", + type="ExampleExperimentalAgent", symbol=symbol, starting_cash=starting_cash, levels=5, subscription_freq=1e9, log_orders=True, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64')) + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), ) experimental_agents = [experimental_agent] @@ -298,11 +323,13 @@ ######################################################################################################################## ########################################### KERNEL AND OTHER CONFIG #################################################### -kernel = Kernel("RMSC03 Kernel", random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))) +kernel = Kernel( + "RMSC03 Kernel", + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), +) kernelStartTime = historical_date -kernelStopTime = mkt_close + pd.to_timedelta('00:01:00') +kernelStopTime = mkt_close + pd.to_timedelta("00:01:00") defaultComputationDelay = 50 # 50 nanoseconds @@ -313,28 +340,25 @@ # All agents sit on line from Seattle to NYC nyc_to_seattle_meters = 3866660 -pairwise_distances = util.generate_uniform_random_pairwise_dist_on_line(0.0, nyc_to_seattle_meters, agent_count, - random_state=latency_rstate) +pairwise_distances = util.generate_uniform_random_pairwise_dist_on_line( + 0.0, nyc_to_seattle_meters, agent_count, random_state=latency_rstate +) pairwise_latencies = util.meters_to_light_ns(pairwise_distances) -model_args = { - 'connected': True, - 'min_latency': pairwise_latencies -} +model_args = {"connected": True, "min_latency": pairwise_latencies} -latency_model = LatencyModel(latency_model='deterministic', - random_state=latency_rstate, - kwargs=model_args - ) +latency_model = LatencyModel(latency_model="deterministic", random_state=latency_rstate, kwargs=model_args) # KERNEL -kernel.runner(agents=agents, - startTime=kernelStartTime, - stopTime=kernelStopTime, - agentLatencyModel=latency_model, - defaultComputationDelay=defaultComputationDelay, - oracle=oracle, - log_dir=args.log_dir) +kernel.runner( + agents=agents, + startTime=kernelStartTime, + stopTime=kernelStopTime, + agentLatencyModel=latency_model, + defaultComputationDelay=defaultComputationDelay, + oracle=oracle, + log_dir=args.log_dir, +) simulation_end_time = dt.datetime.now() print("Simulation End Time: {}".format(simulation_end_time)) diff --git a/config/hist_fund_diverse.py b/config/hist_fund_diverse.py index c4e269397..e3c2ca22e 100644 --- a/config/hist_fund_diverse.py +++ b/config/hist_fund_diverse.py @@ -1,59 +1,55 @@ import argparse +import datetime as dt +import sys + import numpy as np import pandas as pd -import sys -import datetime as dt from dateutil.parser import parse -from Kernel import Kernel -from util import util -from util.order import LimitOrder -from util.oracle.ExternalFileOracle import ExternalFileOracle -from model.LatencyModel import LatencyModel - +from agent.examples.MomentumAgent import MomentumAgent from agent.ExchangeAgent import ExchangeAgent +from agent.market_makers.MarketMakerAgent import MarketMakerAgent from agent.NoiseAgent import NoiseAgent from agent.ValueAgent import ValueAgent -from agent.market_makers.MarketMakerAgent import MarketMakerAgent -from agent.examples.MomentumAgent import MomentumAgent +from Kernel import Kernel +from model.LatencyModel import LatencyModel +from util import util +from util.oracle.ExternalFileOracle import ExternalFileOracle +from util.order import LimitOrder ######################################################################################################################## ############################################### GENERAL CONFIG ######################################################### -parser = argparse.ArgumentParser(description='Detailed options for hist_fund_diverse config.') - -parser.add_argument('-c', - '--config', - required=True, - help='Name of config file to execute') -parser.add_argument('-t', - '--ticker', - required=True, - help='Ticker (symbol) to use for simulation') -parser.add_argument('-d', '--historical-date', - required=True, - type=parse, - help='historical date being simulated in format YYYYMMDD.') -parser.add_argument('-f', - '--fundamental-file-path', - required=True, - help="Path to external fundamental file.") -parser.add_argument('-l', - '--log_dir', - default=None, - help='Log directory name (default: unix timestamp at program start)') -parser.add_argument('-s', - '--seed', - type=int, - default=None, - help='numpy.random.seed() for simulation') -parser.add_argument('-v', - '--verbose', - action='store_true', - help='Maximum verbosity!') -parser.add_argument('--config_help', - action='store_true', - help='Print argument options for this config file') +parser = argparse.ArgumentParser(description="Detailed options for hist_fund_diverse config.") + +parser.add_argument("-c", "--config", required=True, help="Name of config file to execute") +parser.add_argument("-t", "--ticker", required=True, help="Ticker (symbol) to use for simulation") +parser.add_argument( + "-d", + "--historical-date", + required=True, + type=parse, + help="historical date being simulated in format YYYYMMDD.", +) +parser.add_argument( + "-f", + "--fundamental-file-path", + required=True, + help="Path to external fundamental file.", +) +parser.add_argument( + "-l", + "--log_dir", + default=None, + help="Log directory name (default: unix timestamp at program start)", +) +parser.add_argument("-s", "--seed", type=int, default=None, help="numpy.random.seed() for simulation") +parser.add_argument("-v", "--verbose", action="store_true", help="Maximum verbosity!") +parser.add_argument( + "--config_help", + action="store_true", + help="Print argument options for this config file", +) args, remaining_args = parser.parse_known_args() @@ -63,7 +59,8 @@ log_dir = args.log_dir # Requested log directory. seed = args.seed # Random seed specification on the command line. -if not seed: seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2 ** 32 - 1) +if not seed: + seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) np.random.seed(seed) util.silent_mode = not args.verbose @@ -80,8 +77,8 @@ # Historical date to simulate. historical_date = pd.to_datetime(args.historical_date) -mkt_open = historical_date + pd.to_timedelta('09:30:00') -mkt_close = historical_date + pd.to_timedelta('16:00:00') +mkt_open = historical_date + pd.to_timedelta("09:30:00") +mkt_close = historical_date + pd.to_timedelta("16:00:00") agent_count, agents, agent_types = 0, [], [] # Hyperparameters @@ -90,9 +87,9 @@ # Oracle symbols = { - symbol : { - 'fundamental_file_path': args.fundamental_file_path, - 'random_state': np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64')) + symbol: { + "fundamental_file_path": args.fundamental_file_path, + "random_state": np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), } } oracle = ExternalFileOracle(symbols) @@ -103,93 +100,123 @@ lambda_a = 1e-12 # 1) Exchange Agent -agents.extend([ExchangeAgent(id=0, - name="EXCHANGE_AGENT", - type="ExchangeAgent", - mkt_open=mkt_open, - mkt_close=mkt_close, - symbols=[symbol], - log_orders=True, - pipeline_delay=0, - computation_delay=0, - stream_history=10, - book_freq=book_freq, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64')))]) +agents.extend( + [ + ExchangeAgent( + id=0, + name="EXCHANGE_AGENT", + type="ExchangeAgent", + mkt_open=mkt_open, + mkt_close=mkt_close, + symbols=[symbol], + log_orders=True, + pipeline_delay=0, + computation_delay=0, + stream_history=10, + book_freq=book_freq, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + ] +) agent_types.extend("ExchangeAgent") agent_count += 1 # 2) Noise Agents num_noise = 5000 -agents.extend([NoiseAgent(id=j, - name="NoiseAgent {}".format(j), - type="NoiseAgent", - symbol=symbol, - starting_cash=starting_cash, - wakeup_time=util.get_wake_time(mkt_open, mkt_close), - log_orders=log_orders, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64'))) - for j in range(agent_count, agent_count + num_noise)]) +agents.extend( + [ + NoiseAgent( + id=j, + name="NoiseAgent {}".format(j), + type="NoiseAgent", + symbol=symbol, + starting_cash=starting_cash, + wakeup_time=util.get_wake_time(mkt_open, mkt_close), + log_orders=log_orders, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_noise) + ] +) agent_count += num_noise -agent_types.extend(['NoiseAgent']) +agent_types.extend(["NoiseAgent"]) # 3) Value Agents num_value = 100 -agents.extend([ValueAgent(id=j, - name="Value Agent {}".format(j), - type="ValueAgent", - symbol=symbol, - starting_cash=starting_cash, - sigma_n=sigma_n, - r_bar=r_bar, - kappa=kappa, - lambda_a=lambda_a, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64'))) - for j in range(agent_count, agent_count + num_value)]) +agents.extend( + [ + ValueAgent( + id=j, + name="Value Agent {}".format(j), + type="ValueAgent", + symbol=symbol, + starting_cash=starting_cash, + sigma_n=sigma_n, + r_bar=r_bar, + kappa=kappa, + lambda_a=lambda_a, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_value) + ] +) agent_count += num_value -agent_types.extend(['ValueAgent']) +agent_types.extend(["ValueAgent"]) # 4) Market Maker Agent num_mm_agents = 1 -agents.extend([MarketMakerAgent(id=j, - name="MARKET_MAKER_AGENT_{}".format(j), - type='MarketMakerAgent', - symbol=symbol, - starting_cash=starting_cash, - min_size=100, - max_size=101, - wake_up_freq="1min", - log_orders=log_orders, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))) - for j in range(agent_count, agent_count + num_mm_agents)]) +agents.extend( + [ + MarketMakerAgent( + id=j, + name="MARKET_MAKER_AGENT_{}".format(j), + type="MarketMakerAgent", + symbol=symbol, + starting_cash=starting_cash, + min_size=100, + max_size=101, + wake_up_freq="1min", + log_orders=log_orders, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_mm_agents) + ] +) agent_count += num_mm_agents -agent_types.extend('MarketMakerAgent') +agent_types.extend("MarketMakerAgent") # 5) Momentum Agents num_momentum_agents = 25 -agents.extend([MomentumAgent(id=j, - name="MOMENTUM_AGENT_{}".format(j), - type="MomentumAgent", - symbol=symbol, - starting_cash=starting_cash, - min_size=1, - max_size=10, - log_orders=log_orders, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))) - for j in range(agent_count, agent_count + num_momentum_agents)]) +agents.extend( + [ + MomentumAgent( + id=j, + name="MOMENTUM_AGENT_{}".format(j), + type="MomentumAgent", + symbol=symbol, + starting_cash=starting_cash, + min_size=1, + max_size=10, + log_orders=log_orders, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_momentum_agents) + ] +) agent_count += num_momentum_agents agent_types.extend("MomentumAgent") ######################################################################################################################## ########################################### KERNEL AND OTHER CONFIG #################################################### -kernel = Kernel("hist_fund_diverse Kernel", random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))) +kernel = Kernel( + "hist_fund_diverse Kernel", + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), +) kernelStartTime = historical_date -kernelStopTime = mkt_close + pd.to_timedelta('00:01:00') +kernelStopTime = mkt_close + pd.to_timedelta("00:01:00") defaultComputationDelay = 50 # 50 nanoseconds @@ -200,28 +227,25 @@ # All agents sit on line from Seattle to NYC nyc_to_seattle_meters = 3866660 -pairwise_distances = util.generate_uniform_random_pairwise_dist_on_line(0.0, nyc_to_seattle_meters, agent_count, - random_state=latency_rstate) +pairwise_distances = util.generate_uniform_random_pairwise_dist_on_line( + 0.0, nyc_to_seattle_meters, agent_count, random_state=latency_rstate +) pairwise_latencies = util.meters_to_light_ns(pairwise_distances) -model_args = { - 'connected': True, - 'min_latency': pairwise_latencies -} +model_args = {"connected": True, "min_latency": pairwise_latencies} -latency_model = LatencyModel(latency_model='deterministic', - random_state=latency_rstate, - kwargs=model_args - ) +latency_model = LatencyModel(latency_model="deterministic", random_state=latency_rstate, kwargs=model_args) # KERNEL -kernel.runner(agents=agents, - startTime=kernelStartTime, - stopTime=kernelStopTime, - agentLatencyModel=latency_model, - defaultComputationDelay=defaultComputationDelay, - oracle=oracle, - log_dir=args.log_dir) +kernel.runner( + agents=agents, + startTime=kernelStartTime, + stopTime=kernelStopTime, + agentLatencyModel=latency_model, + defaultComputationDelay=defaultComputationDelay, + oracle=oracle, + log_dir=args.log_dir, +) simulation_end_time = dt.datetime.now() print("Simulation End Time: {}".format(simulation_end_time)) diff --git a/config/hist_fund_value.py b/config/hist_fund_value.py index f89055227..60e2b1053 100644 --- a/config/hist_fund_value.py +++ b/config/hist_fund_value.py @@ -1,57 +1,53 @@ import argparse +import datetime as dt +import sys + import numpy as np import pandas as pd -import sys -import datetime as dt from dateutil.parser import parse -from Kernel import Kernel -from util import util -from util.order import LimitOrder -from util.oracle.ExternalFileOracle import ExternalFileOracle -from model.LatencyModel import LatencyModel - from agent.ExchangeAgent import ExchangeAgent from agent.NoiseAgent import NoiseAgent from agent.ValueAgent import ValueAgent +from Kernel import Kernel +from model.LatencyModel import LatencyModel +from util import util +from util.oracle.ExternalFileOracle import ExternalFileOracle +from util.order import LimitOrder ######################################################################################################################## ############################################### GENERAL CONFIG ######################################################### -parser = argparse.ArgumentParser(description='Detailed options for hist_fund_value config.') - -parser.add_argument('-c', - '--config', - required=True, - help='Name of config file to execute') -parser.add_argument('-t', - '--ticker', - required=True, - help='Ticker (symbol) to use for simulation') -parser.add_argument('-d', '--historical-date', - required=True, - type=parse, - help='historical date being simulated in format YYYYMMDD.') -parser.add_argument('-f', - '--fundamental-file-path', - required=True, - help="Path to external fundamental file.") -parser.add_argument('-l', - '--log_dir', - default=None, - help='Log directory name (default: unix timestamp at program start)') -parser.add_argument('-s', - '--seed', - type=int, - default=None, - help='numpy.random.seed() for simulation') -parser.add_argument('-v', - '--verbose', - action='store_true', - help='Maximum verbosity!') -parser.add_argument('--config_help', - action='store_true', - help='Print argument options for this config file') +parser = argparse.ArgumentParser(description="Detailed options for hist_fund_value config.") + +parser.add_argument("-c", "--config", required=True, help="Name of config file to execute") +parser.add_argument("-t", "--ticker", required=True, help="Ticker (symbol) to use for simulation") +parser.add_argument( + "-d", + "--historical-date", + required=True, + type=parse, + help="historical date being simulated in format YYYYMMDD.", +) +parser.add_argument( + "-f", + "--fundamental-file-path", + required=True, + help="Path to external fundamental file.", +) +parser.add_argument( + "-l", + "--log_dir", + default=None, + help="Log directory name (default: unix timestamp at program start)", +) +parser.add_argument("-s", "--seed", type=int, default=None, help="numpy.random.seed() for simulation") +parser.add_argument("-v", "--verbose", action="store_true", help="Maximum verbosity!") +parser.add_argument( + "--config_help", + action="store_true", + help="Print argument options for this config file", +) args, remaining_args = parser.parse_known_args() @@ -61,7 +57,8 @@ log_dir = args.log_dir # Requested log directory. seed = args.seed # Random seed specification on the command line. -if not seed: seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2 ** 32 - 1) +if not seed: + seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) np.random.seed(seed) util.silent_mode = not args.verbose @@ -78,8 +75,8 @@ # Historical date to simulate. historical_date = pd.to_datetime(args.historical_date) -mkt_open = historical_date + pd.to_timedelta('09:30:00') -mkt_close = historical_date + pd.to_timedelta('16:00:00') +mkt_open = historical_date + pd.to_timedelta("09:30:00") +mkt_close = historical_date + pd.to_timedelta("16:00:00") agent_count, agents, agent_types = 0, [], [] # Hyperparameters @@ -88,9 +85,9 @@ # Oracle symbols = { - symbol : { - 'fundamental_file_path': args.fundamental_file_path, - 'random_state': np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64')) + symbol: { + "fundamental_file_path": args.fundamental_file_path, + "random_state": np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), } } oracle = ExternalFileOracle(symbols) @@ -101,59 +98,79 @@ lambda_a = 1e-12 # 1) Exchange Agent -agents.extend([ExchangeAgent(id=0, - name="EXCHANGE_AGENT", - type="ExchangeAgent", - mkt_open=mkt_open, - mkt_close=mkt_close, - symbols=[symbol], - log_orders=True, - pipeline_delay=0, - computation_delay=0, - stream_history=10, - book_freq=book_freq, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64')))]) +agents.extend( + [ + ExchangeAgent( + id=0, + name="EXCHANGE_AGENT", + type="ExchangeAgent", + mkt_open=mkt_open, + mkt_close=mkt_close, + symbols=[symbol], + log_orders=True, + pipeline_delay=0, + computation_delay=0, + stream_history=10, + book_freq=book_freq, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + ] +) agent_types.extend("ExchangeAgent") agent_count += 1 # 2) Noise Agents num_noise = 5000 -agents.extend([NoiseAgent(id=j, - name="NoiseAgent {}".format(j), - type="NoiseAgent", - symbol=symbol, - starting_cash=starting_cash, - wakeup_time=util.get_wake_time(mkt_open, mkt_close), - log_orders=log_orders, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64'))) - for j in range(agent_count, agent_count + num_noise)]) +agents.extend( + [ + NoiseAgent( + id=j, + name="NoiseAgent {}".format(j), + type="NoiseAgent", + symbol=symbol, + starting_cash=starting_cash, + wakeup_time=util.get_wake_time(mkt_open, mkt_close), + log_orders=log_orders, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_noise) + ] +) agent_count += num_noise -agent_types.extend(['NoiseAgent']) +agent_types.extend(["NoiseAgent"]) # 3) Value Agents num_value = 100 -agents.extend([ValueAgent(id=j, - name="Value Agent {}".format(j), - type="ValueAgent", - symbol=symbol, - starting_cash=starting_cash, - sigma_n=sigma_n, - r_bar=r_bar, - kappa=kappa, - lambda_a=lambda_a, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64'))) - for j in range(agent_count, agent_count + num_value)]) +agents.extend( + [ + ValueAgent( + id=j, + name="Value Agent {}".format(j), + type="ValueAgent", + symbol=symbol, + starting_cash=starting_cash, + sigma_n=sigma_n, + r_bar=r_bar, + kappa=kappa, + lambda_a=lambda_a, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_value) + ] +) agent_count += num_value -agent_types.extend(['ValueAgent']) +agent_types.extend(["ValueAgent"]) ######################################################################################################################## ########################################### KERNEL AND OTHER CONFIG #################################################### -kernel = Kernel("hist_fund_diverse Kernel", random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))) +kernel = Kernel( + "hist_fund_diverse Kernel", + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), +) kernelStartTime = historical_date -kernelStopTime = mkt_close + pd.to_timedelta('00:01:00') +kernelStopTime = mkt_close + pd.to_timedelta("00:01:00") defaultComputationDelay = 50 # 50 nanoseconds @@ -164,28 +181,25 @@ # All agents sit on line from Seattle to NYC nyc_to_seattle_meters = 3866660 -pairwise_distances = util.generate_uniform_random_pairwise_dist_on_line(0.0, nyc_to_seattle_meters, agent_count, - random_state=latency_rstate) +pairwise_distances = util.generate_uniform_random_pairwise_dist_on_line( + 0.0, nyc_to_seattle_meters, agent_count, random_state=latency_rstate +) pairwise_latencies = util.meters_to_light_ns(pairwise_distances) -model_args = { - 'connected': True, - 'min_latency': pairwise_latencies -} +model_args = {"connected": True, "min_latency": pairwise_latencies} -latency_model = LatencyModel(latency_model='deterministic', - random_state=latency_rstate, - kwargs=model_args - ) +latency_model = LatencyModel(latency_model="deterministic", random_state=latency_rstate, kwargs=model_args) # KERNEL -kernel.runner(agents=agents, - startTime=kernelStartTime, - stopTime=kernelStopTime, - agentLatencyModel=latency_model, - defaultComputationDelay=defaultComputationDelay, - oracle=oracle, - log_dir=args.log_dir) +kernel.runner( + agents=agents, + startTime=kernelStartTime, + stopTime=kernelStopTime, + agentLatencyModel=latency_model, + defaultComputationDelay=defaultComputationDelay, + oracle=oracle, + log_dir=args.log_dir, +) simulation_end_time = dt.datetime.now() print("Simulation End Time: {}".format(simulation_end_time)) diff --git a/config/impact.py b/config/impact.py index 24d12d531..44930a61f 100644 --- a/config/impact.py +++ b/config/impact.py @@ -1,16 +1,16 @@ -from Kernel import Kernel -from agent.ExchangeAgent import ExchangeAgent -from agent.HeuristicBeliefLearningAgent import HeuristicBeliefLearningAgent -from agent.examples.ImpactAgent import ImpactAgent -from agent.ZeroIntelligenceAgent import ZeroIntelligenceAgent -from util.order import LimitOrder -from util.oracle.MeanRevertingOracle import MeanRevertingOracle -from util import util +import sys import numpy as np import pandas as pd -import sys +from agent.examples.ImpactAgent import ImpactAgent +from agent.ExchangeAgent import ExchangeAgent +from agent.HeuristicBeliefLearningAgent import HeuristicBeliefLearningAgent +from agent.ZeroIntelligenceAgent import ZeroIntelligenceAgent +from Kernel import Kernel +from util import util +from util.oracle.MeanRevertingOracle import MeanRevertingOracle +from util.order import LimitOrder DATA_DIR = "~/data" @@ -19,38 +19,58 @@ # control agent or simulation hyperparameters during coarse parallelization. import argparse -parser = argparse.ArgumentParser(description='Detailed options for momentum config.') -parser.add_argument('-b', '--book_freq', default=None, - help='Frequency at which to archive order book for visualization') -parser.add_argument('-c', '--config', required=True, - help='Name of config file to execute') -parser.add_argument('-g', '--greed', type=float, default=0.25, - help='Impact agent greed') -parser.add_argument('-i', '--impact', action='store_false', - help='Do not actually fire an impact trade.') -parser.add_argument('-l', '--log_dir', default=None, - help='Log directory name (default: unix timestamp at program start)') -parser.add_argument('-n', '--obs_noise', type=float, default=1000000, - help='Observation noise variance for zero intelligence agents (sigma^2_n)') -parser.add_argument('-r', '--shock_variance', type=float, default=500000, - help='Shock variance for mean reversion process (sigma^2_s)') -parser.add_argument('-o', '--log_orders', action='store_true', - help='Log every order-related action by every agent.') -parser.add_argument('-s', '--seed', type=int, default=None, - help='numpy.random.seed() for simulation') -parser.add_argument('-v', '--verbose', action='store_true', - help='Maximum verbosity!') -parser.add_argument('--config_help', action='store_true', - help='Print argument options for this config file') +parser = argparse.ArgumentParser(description="Detailed options for momentum config.") +parser.add_argument( + "-b", + "--book_freq", + default=None, + help="Frequency at which to archive order book for visualization", +) +parser.add_argument("-c", "--config", required=True, help="Name of config file to execute") +parser.add_argument("-g", "--greed", type=float, default=0.25, help="Impact agent greed") +parser.add_argument("-i", "--impact", action="store_false", help="Do not actually fire an impact trade.") +parser.add_argument( + "-l", + "--log_dir", + default=None, + help="Log directory name (default: unix timestamp at program start)", +) +parser.add_argument( + "-n", + "--obs_noise", + type=float, + default=1000000, + help="Observation noise variance for zero intelligence agents (sigma^2_n)", +) +parser.add_argument( + "-r", + "--shock_variance", + type=float, + default=500000, + help="Shock variance for mean reversion process (sigma^2_s)", +) +parser.add_argument( + "-o", + "--log_orders", + action="store_true", + help="Log every order-related action by every agent.", +) +parser.add_argument("-s", "--seed", type=int, default=None, help="numpy.random.seed() for simulation") +parser.add_argument("-v", "--verbose", action="store_true", help="Maximum verbosity!") +parser.add_argument( + "--config_help", + action="store_true", + help="Print argument options for this config file", +) args, remaining_args = parser.parse_known_args() if args.config_help: - parser.print_help() - sys.exit() + parser.print_help() + sys.exit() # Historical date to simulate. Required even if not relevant. -historical_date = pd.to_datetime('2014-01-28') +historical_date = pd.to_datetime("2014-01-28") # Requested log directory. log_dir = args.log_dir @@ -84,7 +104,8 @@ # before) seed = args.seed -if not seed: seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) +if not seed: + seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) np.random.seed(seed) # Config parameter that causes util.util.print to suppress most output. @@ -98,15 +119,14 @@ log_orders = args.log_orders -print ("Silent mode: {}".format(util.silent_mode)) -print ("Logging orders: {}".format(log_orders)) -print ("Book freq: {}".format(book_freq)) -print ("ZeroIntelligenceAgent noise: {:0.4f}".format(sigma_n)) -print ("ImpactAgent greed: {:0.2f}".format(greed)) -print ("ImpactAgent firing: {}".format(impact)) -print ("Shock variance: {:0.4f}".format(sigma_s)) -print ("Configuration seed: {}\n".format(seed)) - +print("Silent mode: {}".format(util.silent_mode)) +print("Logging orders: {}".format(log_orders)) +print("Book freq: {}".format(book_freq)) +print("ZeroIntelligenceAgent noise: {:0.4f}".format(sigma_n)) +print("ImpactAgent greed: {:0.2f}".format(greed)) +print("ImpactAgent firing: {}".format(impact)) +print("Shock variance: {:0.4f}".format(sigma_s)) +print("Configuration seed: {}\n".format(seed)) # Since the simulator often pulls historical data, we use a real-world @@ -122,12 +142,12 @@ # When should the Kernel shut down? (This should be after market close.) # Here we go for 5 PM the same day. -kernelStopTime = midnight + pd.to_timedelta('17:00:00') +kernelStopTime = midnight + pd.to_timedelta("17:00:00") # This will configure the kernel with a default computation delay # (time penalty) for each agent's wakeup and recvMsg. An agent # can change this at any time for itself. (nanoseconds) -defaultComputationDelay = 0 # no delay for this config +defaultComputationDelay = 0 # no delay for this config # IMPORTANT NOTE CONCERNING AGENT IDS: the id passed to each agent must: @@ -142,13 +162,14 @@ # only IBM. This config uses generated data, so the symbol doesn't really matter. # If shock variance must differ for each traded symbol, it can be overridden here. -symbols = { 'IBM' : { 'r_bar' : 100000, 'kappa' : 0.05, 'sigma_s' : sigma_s } } - +symbols = {"IBM": {"r_bar": 100000, "kappa": 0.05, "sigma_s": sigma_s}} ### Configure the Kernel. -kernel = Kernel("Base Kernel", random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32))) - +kernel = Kernel( + "Base Kernel", + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), +) ### Configure the agents. When conducting "agent of change" experiments, the @@ -161,10 +182,10 @@ ### Configure an exchange agent. # Let's open the exchange at 9:30 AM. -mkt_open = midnight + pd.to_timedelta('09:30:00') +mkt_open = midnight + pd.to_timedelta("09:30:00") # And close it at 9:30:00.000001 (i.e. 1,000 nanoseconds or "time steps") -mkt_close = midnight + pd.to_timedelta('09:30:00.000001') +mkt_close = midnight + pd.to_timedelta("09:30:00.000001") # Configure an appropriate oracle for all traded stocks. @@ -174,20 +195,36 @@ # Create the exchange. num_exchanges = 1 -agents.extend([ ExchangeAgent(j, "Exchange Agent {}".format(j), "ExchangeAgent", mkt_open, mkt_close, [s for s in symbols], log_orders=log_orders, book_freq=book_freq, pipeline_delay = 0, computation_delay = 0, stream_history = 10, random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32))) - for j in range(agent_count, agent_count + num_exchanges) ]) +agents.extend( + [ + ExchangeAgent( + j, + "Exchange Agent {}".format(j), + "ExchangeAgent", + mkt_open, + mkt_close, + [s for s in symbols], + log_orders=log_orders, + book_freq=book_freq, + pipeline_delay=0, + computation_delay=0, + stream_history=10, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), + ) + for j in range(agent_count, agent_count + num_exchanges) + ] +) agent_types.extend(["ExchangeAgent" for j in range(num_exchanges)]) agent_count += num_exchanges - ### Configure some zero intelligence agents. # Cash in this simulator is always in CENTS. starting_cash = 10000000 # Here are the zero intelligence agents. -symbol = 'IBM' +symbol = "IBM" s = symbols[symbol] # Tuples are: (# agents, R_min, R_max, eta, L). L for HBL only. @@ -195,84 +232,154 @@ # Some configs for ZI agents only (among seven parameter settings). # 4 agents -#zi = [ (1, 0, 250, 1), (1, 0, 500, 1), (1, 0, 1000, 0.8), (1, 0, 1000, 1), (0, 0, 2000, 0.8), (0, 250, 500, 0.8), (0, 250, 500, 1) ] -#hbl = [] +# zi = [ (1, 0, 250, 1), (1, 0, 500, 1), (1, 0, 1000, 0.8), (1, 0, 1000, 1), (0, 0, 2000, 0.8), (0, 250, 500, 0.8), (0, 250, 500, 1) ] +# hbl = [] # 28 agents -#zi = [ (4, 0, 250, 1), (4, 0, 500, 1), (4, 0, 1000, 0.8), (4, 0, 1000, 1), (4, 0, 2000, 0.8), (4, 250, 500, 0.8), (4, 250, 500, 1) ] -#hbl = [] +# zi = [ (4, 0, 250, 1), (4, 0, 500, 1), (4, 0, 1000, 0.8), (4, 0, 1000, 1), (4, 0, 2000, 0.8), (4, 250, 500, 0.8), (4, 250, 500, 1) ] +# hbl = [] # 65 agents -#zi = [ (10, 0, 250, 1), (10, 0, 500, 1), (9, 0, 1000, 0.8), (9, 0, 1000, 1), (9, 0, 2000, 0.8), (9, 250, 500, 0.8), (9, 250, 500, 1) ] -#hbl = [] +# zi = [ (10, 0, 250, 1), (10, 0, 500, 1), (9, 0, 1000, 0.8), (9, 0, 1000, 1), (9, 0, 2000, 0.8), (9, 250, 500, 0.8), (9, 250, 500, 1) ] +# hbl = [] # 100 agents -#zi = [ (15, 0, 250, 1), (15, 0, 500, 1), (14, 0, 1000, 0.8), (14, 0, 1000, 1), (14, 0, 2000, 0.8), (14, 250, 500, 0.8), (14, 250, 500, 1) ] -#hbl = [] +# zi = [ (15, 0, 250, 1), (15, 0, 500, 1), (14, 0, 1000, 0.8), (14, 0, 1000, 1), (14, 0, 2000, 0.8), (14, 250, 500, 0.8), (14, 250, 500, 1) ] +# hbl = [] # 1000 agents -#zi = [ (143, 0, 250, 1), (143, 0, 500, 1), (143, 0, 1000, 0.8), (143, 0, 1000, 1), (143, 0, 2000, 0.8), (143, 250, 500, 0.8), (142, 250, 500, 1) ] -#hbl = [] +# zi = [ (143, 0, 250, 1), (143, 0, 500, 1), (143, 0, 1000, 0.8), (143, 0, 1000, 1), (143, 0, 2000, 0.8), (143, 250, 500, 0.8), (142, 250, 500, 1) ] +# hbl = [] # 10000 agents -#zi = [ (1429, 0, 250, 1), (1429, 0, 500, 1), (1429, 0, 1000, 0.8), (1429, 0, 1000, 1), (1428, 0, 2000, 0.8), (1428, 250, 500, 0.8), (1428, 250, 500, 1) ] -#hbl = [] +# zi = [ (1429, 0, 250, 1), (1429, 0, 500, 1), (1429, 0, 1000, 0.8), (1429, 0, 1000, 1), (1428, 0, 2000, 0.8), (1428, 250, 500, 0.8), (1428, 250, 500, 1) ] +# hbl = [] # Some configs for HBL agents only (among four parameter settings). # 4 agents -#zi = [] -#hbl = [ (1, 250, 500, 1, 2), (1, 250, 500, 1, 3), (1, 250, 500, 1, 5), (1, 250, 500, 1, 8) ] +# zi = [] +# hbl = [ (1, 250, 500, 1, 2), (1, 250, 500, 1, 3), (1, 250, 500, 1, 5), (1, 250, 500, 1, 8) ] # 28 agents -#zi = [] -#hbl = [ (7, 250, 500, 1, 2), (7, 250, 500, 1, 3), (7, 250, 500, 1, 5), (7, 250, 500, 1, 8) ] +# zi = [] +# hbl = [ (7, 250, 500, 1, 2), (7, 250, 500, 1, 3), (7, 250, 500, 1, 5), (7, 250, 500, 1, 8) ] # 1000 agents -#zi = [] -#hbl = [ (250, 250, 500, 1, 2), (250, 250, 500, 1, 3), (250, 250, 500, 1, 5), (250, 250, 500, 1, 8) ] +# zi = [] +# hbl = [ (250, 250, 500, 1, 2), (250, 250, 500, 1, 3), (250, 250, 500, 1, 5), (250, 250, 500, 1, 8) ] # Some configs that mix both types of agents. # 28 agents -#zi = [ (3, 0, 250, 1), (3, 0, 500, 1), (3, 0, 1000, 0.8), (3, 0, 1000, 1), (3, 0, 2000, 0.8), (3, 250, 500, 0.8), (2, 250, 500, 1) ] -#hbl = [ (2, 250, 500, 1, 2), (2, 250, 500, 1, 3), (2, 250, 500, 1, 5), (2, 250, 500, 1, 8) ] +# zi = [ (3, 0, 250, 1), (3, 0, 500, 1), (3, 0, 1000, 0.8), (3, 0, 1000, 1), (3, 0, 2000, 0.8), (3, 250, 500, 0.8), (2, 250, 500, 1) ] +# hbl = [ (2, 250, 500, 1, 2), (2, 250, 500, 1, 3), (2, 250, 500, 1, 5), (2, 250, 500, 1, 8) ] # 65 agents -#zi = [ (7, 0, 250, 1), (7, 0, 500, 1), (7, 0, 1000, 0.8), (7, 0, 1000, 1), (7, 0, 2000, 0.8), (7, 250, 500, 0.8), (7, 250, 500, 1) ] -#hbl = [ (4, 250, 500, 1, 2), (4, 250, 500, 1, 3), (4, 250, 500, 1, 5), (4, 250, 500, 1, 8) ] +# zi = [ (7, 0, 250, 1), (7, 0, 500, 1), (7, 0, 1000, 0.8), (7, 0, 1000, 1), (7, 0, 2000, 0.8), (7, 250, 500, 0.8), (7, 250, 500, 1) ] +# hbl = [ (4, 250, 500, 1, 2), (4, 250, 500, 1, 3), (4, 250, 500, 1, 5), (4, 250, 500, 1, 8) ] # 1000 agents -zi = [ (100, 0, 250, 1), (100, 0, 500, 1), (100, 0, 1000, 0.8), (100, 0, 1000, 1), (100, 0, 2000, 0.8), (100, 250, 500, 0.8), (100, 250, 500, 1) ] -hbl = [ (75, 250, 500, 1, 2), (75, 250, 500, 1, 3), (75, 250, 500, 1, 5), (75, 250, 500, 1, 8) ] - +zi = [ + (100, 0, 250, 1), + (100, 0, 500, 1), + (100, 0, 1000, 0.8), + (100, 0, 1000, 1), + (100, 0, 2000, 0.8), + (100, 250, 500, 0.8), + (100, 250, 500, 1), +] +hbl = [ + (75, 250, 500, 1, 2), + (75, 250, 500, 1, 3), + (75, 250, 500, 1, 5), + (75, 250, 500, 1, 8), +] # ZI strategy split. -for i,x in enumerate(zi): - strat_name = "Type {} [{} <= R <= {}, eta={}]".format(i+1, x[1], x[2], x[3]) - agents.extend([ ZeroIntelligenceAgent(j, "ZI Agent {} {}".format(j, strat_name), "ZeroIntelligenceAgent {}".format(strat_name), random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32)),log_orders=log_orders, symbol=symbol, starting_cash=starting_cash, sigma_n=sigma_n, r_bar=s['r_bar'], kappa=s['kappa'], sigma_s=s['sigma_s'], q_max=10, sigma_pv=5000000, R_min=x[1], R_max=x[2], eta=x[3], lambda_a=0.005) for j in range(agent_count,agent_count+x[0]) ]) - agent_types.extend([ "ZeroIntelligenceAgent {}".format(strat_name) for j in range(x[0]) ]) - agent_count += x[0] +for i, x in enumerate(zi): + strat_name = "Type {} [{} <= R <= {}, eta={}]".format(i + 1, x[1], x[2], x[3]) + agents.extend( + [ + ZeroIntelligenceAgent( + j, + "ZI Agent {} {}".format(j, strat_name), + "ZeroIntelligenceAgent {}".format(strat_name), + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), + log_orders=log_orders, + symbol=symbol, + starting_cash=starting_cash, + sigma_n=sigma_n, + r_bar=s["r_bar"], + kappa=s["kappa"], + sigma_s=s["sigma_s"], + q_max=10, + sigma_pv=5000000, + R_min=x[1], + R_max=x[2], + eta=x[3], + lambda_a=0.005, + ) + for j in range(agent_count, agent_count + x[0]) + ] + ) + agent_types.extend(["ZeroIntelligenceAgent {}".format(strat_name) for j in range(x[0])]) + agent_count += x[0] # HBL strategy split. -for i,x in enumerate(hbl): - strat_name = "Type {} [{} <= R <= {}, eta={}, L={}]".format(i+1, x[1], x[2], x[3], x[4]) - agents.extend([ HeuristicBeliefLearningAgent(j, "HBL Agent {} {}".format(j, strat_name), "HeuristicBeliefLearningAgent {}".format(strat_name), random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32)), log_orders=log_orders, symbol=symbol, starting_cash=starting_cash, sigma_n=sigma_n, r_bar=s['r_bar'], kappa=s['kappa'], sigma_s=s['sigma_s'], q_max=10, sigma_pv=5000000, R_min=x[1], R_max=x[2], eta=x[3], lambda_a=0.005, L=x[4]) for j in range(agent_count,agent_count+x[0]) ]) - agent_types.extend([ "HeuristicBeliefLearningAgent {}".format(strat_name) for j in range(x[0]) ]) - agent_count += x[0] - +for i, x in enumerate(hbl): + strat_name = "Type {} [{} <= R <= {}, eta={}, L={}]".format(i + 1, x[1], x[2], x[3], x[4]) + agents.extend( + [ + HeuristicBeliefLearningAgent( + j, + "HBL Agent {} {}".format(j, strat_name), + "HeuristicBeliefLearningAgent {}".format(strat_name), + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), + log_orders=log_orders, + symbol=symbol, + starting_cash=starting_cash, + sigma_n=sigma_n, + r_bar=s["r_bar"], + kappa=s["kappa"], + sigma_s=s["sigma_s"], + q_max=10, + sigma_pv=5000000, + R_min=x[1], + R_max=x[2], + eta=x[3], + lambda_a=0.005, + L=x[4], + ) + for j in range(agent_count, agent_count + x[0]) + ] + ) + agent_types.extend(["HeuristicBeliefLearningAgent {}".format(strat_name) for j in range(x[0])]) + agent_count += x[0] # Impact agent. # 200 time steps in... -impact_time = midnight + pd.to_timedelta('09:30:00.0000002') +impact_time = midnight + pd.to_timedelta("09:30:00.0000002") i = agent_count -agents.append(ImpactAgent(i, "Impact Agent {}".format(i), "ImpactAgent", symbol = "IBM", starting_cash = starting_cash, greed = greed, impact = impact, impact_time = impact_time, random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32)))) +agents.append( + ImpactAgent( + i, + "Impact Agent {}".format(i), + "ImpactAgent", + symbol="IBM", + starting_cash=starting_cash, + greed=greed, + impact=impact, + impact_time=impact_time, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), + ) +) agent_types.append("Impact Agent {}".format(i)) agent_count += 1 @@ -282,19 +389,22 @@ # Square numpy array with dimensions equal to total agent count. In this config, # there should not be any communication delay. -latency = np.zeros((len(agent_types),len(agent_types))) +latency = np.zeros((len(agent_types), len(agent_types))) # Configure a simple latency noise model for the agents. # Index is ns extra delay, value is probability of this delay being applied. # In this config, there is no latency (noisy or otherwise). -noise = [ 1.0 ] - +noise = [1.0] # Start the kernel running. -kernel.runner(agents = agents, startTime = kernelStartTime, - stopTime = kernelStopTime, agentLatency = latency, - latencyNoise = noise, - defaultComputationDelay = defaultComputationDelay, - oracle = oracle, log_dir = log_dir) - +kernel.runner( + agents=agents, + startTime=kernelStartTime, + stopTime=kernelStopTime, + agentLatency=latency, + latencyNoise=noise, + defaultComputationDelay=defaultComputationDelay, + oracle=oracle, + log_dir=log_dir, +) diff --git a/config/loop_obi.py b/config/loop_obi.py index 50964e250..748ac8d08 100644 --- a/config/loop_obi.py +++ b/config/loop_obi.py @@ -1,34 +1,34 @@ -from Kernel import Kernel +import sys +from math import ceil, floor +from statistics import mean, median, stdev + +import numpy as np +import pandas as pd + +from agent.examples.MarketMakerAgent import MarketMakerAgent +from agent.examples.MomentumAgent import MomentumAgent from agent.ExchangeAgent import ExchangeAgent from agent.OrderBookImbalanceAgent import OrderBookImbalanceAgent from agent.ValueAgent import ValueAgent from agent.ZeroIntelligenceAgent import ZeroIntelligenceAgent -from agent.examples.MarketMakerAgent import MarketMakerAgent -from agent.examples.MomentumAgent import MomentumAgent -from statistics import median, mean, stdev -from util.order import LimitOrder -from util.oracle.SparseMeanRevertingOracle import SparseMeanRevertingOracle +from Kernel import Kernel from util import util from util.model.QTable import QTable - -import numpy as np -import pandas as pd -import sys - -from math import ceil, floor +from util.oracle.SparseMeanRevertingOracle import SparseMeanRevertingOracle +from util.order import LimitOrder ###### Helper functions for this configuration file. Just commonly-used code ###### ###### that would otherwise have been repeated many times. ###### + def get_rand_obj(seed_obj): - return np.random.RandomState(seed = seed_obj.randint(low = 0, high = 2**32)) + return np.random.RandomState(seed=seed_obj.randint(low=0, high=2**32)) ###### Wallclock tracking for overall experimental scheduling to CPUs. -wallclock_start = pd.Timestamp('now') +wallclock_start = pd.Timestamp("now") -print ("\n====== Experimental wallclock elapsed: {} ======\n".format( - pd.Timestamp('now') - wallclock_start)) +print("\n====== Experimental wallclock elapsed: {} ======\n".format(pd.Timestamp("now") - wallclock_start)) ###### One-time configuration section. This section sets up definitions that ###### @@ -48,14 +48,22 @@ def get_rand_obj(seed_obj): # Thus our discrete time stamps are effectively nanoseconds, although # they can be interepreted otherwise for ahistorical (e.g. generated) # simulations. These timestamps do require a valid date component. -midnight = pd.to_datetime('2014-01-28') +midnight = pd.to_datetime("2014-01-28") ### STOCK SYMBOL CONFIGURATION. -symbols = { 'IBM' : { 'r_bar' : 1e5, 'kappa' : 1.67e-12, 'agent_kappa' : 1.67e-15, - 'sigma_s' : 0, 'fund_vol' : 1e-8, 'megashock_lambda_a' : 2.77778e-13, - 'megashock_mean' : 1e3, 'megashock_var' : 5e4 } - } +symbols = { + "IBM": { + "r_bar": 1e5, + "kappa": 1.67e-12, + "agent_kappa": 1.67e-15, + "sigma_s": 0, + "fund_vol": 1e-8, + "megashock_lambda_a": 2.77778e-13, + "megashock_mean": 1e3, + "megashock_var": 5e4, + } +} ### INITIAL AGENT DISTRIBUTION. @@ -79,52 +87,59 @@ def get_rand_obj(seed_obj): ### EXCHANGE AGENTS -mkt_open = midnight + pd.to_timedelta('09:30:00') -mkt_close = midnight + pd.to_timedelta('16:00:00') +mkt_open = midnight + pd.to_timedelta("09:30:00") +mkt_close = midnight + pd.to_timedelta("16:00:00") ### Record the type and strategy of the agents for reporting purposes. for i in range(num_exch): - agent_types.append("ExchangeAgent") - agent_strats.append("ExchangeAgent") + agent_types.append("ExchangeAgent") + agent_strats.append("ExchangeAgent") ### ZERO INTELLIGENCE AGENTS ### ZeroIntelligence fixed parameters (i.e. not strategic). -zi_obs_noise = 1000000 # a property of the agent, not an individual stock +zi_obs_noise = 1000000 # a property of the agent, not an individual stock ### Lay out the ZI strategies (parameter settings) that will be used in this ### experiment, so we can assign particular numbers of agents to each strategy. ### Tuples are: (R_min, R_max, eta). -zi_strategy = [ (0, 250, 1), (0, 500, 1), (0, 1000, 0.8), (0, 1000, 1), - (0, 2000, 0.8), (250, 500, 0.8), (250, 500, 1) ] +zi_strategy = [ + (0, 250, 1), + (0, 500, 1), + (0, 1000, 0.8), + (0, 1000, 1), + (0, 2000, 0.8), + (250, 500, 0.8), + (250, 500, 1), +] ### Record the initial distribution of agents to ZI strategies. ### Split the agents as evenly as possible among the strategy settings. -zi = [ floor(num_zi / len(zi_strategy)) ] * len(zi_strategy) +zi = [floor(num_zi / len(zi_strategy))] * len(zi_strategy) i = 0 while sum(zi) < num_zi: - zi[i] += 1 - i += 1 + zi[i] += 1 + i += 1 ### Record the type and strategy of the agents for reporting purposes. for i in range(len(zi_strategy)): - x = zi_strategy[i] - strat_name = "Type {} [{} <= R <= {}, eta={}]".format(i+1, x[0], x[1], x[2]) - agent_types.extend([ 'ZeroIntelligenceAgent' ] * zi[i]) - agent_strats.extend([ 'ZeroIntelligenceAgent ({})'.format(strat_name) ] * zi[i]) + x = zi_strategy[i] + strat_name = "Type {} [{} <= R <= {}, eta={}]".format(i + 1, x[0], x[1], x[2]) + agent_types.extend(["ZeroIntelligenceAgent"] * zi[i]) + agent_strats.extend(["ZeroIntelligenceAgent ({})".format(strat_name)] * zi[i]) ### VALUE AGENTS ### Value agent fixed parameters (i.e. not strategic). -zi_obs_noise = 1000000 # a property of the agent, not an individual stock +zi_obs_noise = 1000000 # a property of the agent, not an individual stock for i in range(num_val): - agent_types.extend([ 'ValueAgent' ]) - agent_strats.extend([ 'ValueAgent' ]) + agent_types.extend(["ValueAgent"]) + agent_strats.extend(["ValueAgent"]) ### OBI AGENTS @@ -133,8 +148,8 @@ def get_rand_obj(seed_obj): ### Record the type and strategy of the agents for reporting purposes. for i in range(num_obi): - agent_types.append("OBIAgent") - agent_strats.append("OBIAgent") + agent_types.append("OBIAgent") + agent_strats.append("OBIAgent") ### MARKET MAKER AGENTS @@ -143,8 +158,8 @@ def get_rand_obj(seed_obj): ### Record the type and strategy of the agents for reporting purposes. for i in range(num_mm): - agent_types.append("MarketMakerAgent") - agent_strats.append("MarketMakerAgent") + agent_types.append("MarketMakerAgent") + agent_strats.append("MarketMakerAgent") ### MOMENTUM AGENTS @@ -153,8 +168,8 @@ def get_rand_obj(seed_obj): ### Record the type and strategy of the agents for reporting purposes. for i in range(num_mom): - agent_types.append("MomentumAgent") - agent_strats.append("MomentumAgent") + agent_types.append("MomentumAgent") + agent_strats.append("MomentumAgent") ### FINAL AGENT PREPARATION @@ -175,49 +190,74 @@ def get_rand_obj(seed_obj): # (i.e. the entire "experiment"), rather than a single instance of the simulation. import argparse -parser = argparse.ArgumentParser(description='Detailed options for sparse_zi config.') -parser.add_argument('-b', '--book_freq', default=None, - help='Frequency at which to archive order book for visualization') -parser.add_argument('-c', '--config', required=True, - help='Name of config file to execute') -parser.add_argument('-l', '--log_dir', default=None, - help='Log directory name (default: unix timestamp at program start)') -parser.add_argument('-o', '--log_orders', action='store_true', - help='Log every order-related action by every agent.') -parser.add_argument('-s', '--seed', type=int, default=None, - help='numpy.random.seed() for simulation') -parser.add_argument('-v', '--verbose', action='store_true', - help='Maximum verbosity!') -parser.add_argument('--config_help', action='store_true', - help='Print argument options for this config file') +parser = argparse.ArgumentParser(description="Detailed options for sparse_zi config.") +parser.add_argument( + "-b", + "--book_freq", + default=None, + help="Frequency at which to archive order book for visualization", +) +parser.add_argument("-c", "--config", required=True, help="Name of config file to execute") +parser.add_argument( + "-l", + "--log_dir", + default=None, + help="Log directory name (default: unix timestamp at program start)", +) +parser.add_argument( + "-o", + "--log_orders", + action="store_true", + help="Log every order-related action by every agent.", +) +parser.add_argument("-s", "--seed", type=int, default=None, help="numpy.random.seed() for simulation") +parser.add_argument("-v", "--verbose", action="store_true", help="Maximum verbosity!") +parser.add_argument( + "--config_help", + action="store_true", + help="Print argument options for this config file", +) # Specialized for loop_obi. -parser.add_argument('-f', '--obi_freq', type=int, default=1e9 * 60, - help='OBI subscription frequency') -parser.add_argument('-r', '--flat_range', type=float, default=0.1, - help='OBI dead zone for staying flat') -parser.add_argument('-y', '--entry_threshold', type=float, default=0.1, - help='OBI imbalance to enter a position') -parser.add_argument('-t', '--trail_dist', type=float, default=0.05, - help='OBI trailing stop distance to exit positions') -parser.add_argument('-e', '--levels', type=int, default=10, - help='OBI order book levels to consider') -parser.add_argument('-n', '--num_simulations', type=int, default=5, - help='Number of consecutive simulations in one episode.') +parser.add_argument("-f", "--obi_freq", type=int, default=1e9 * 60, help="OBI subscription frequency") +parser.add_argument("-r", "--flat_range", type=float, default=0.1, help="OBI dead zone for staying flat") +parser.add_argument( + "-y", + "--entry_threshold", + type=float, + default=0.1, + help="OBI imbalance to enter a position", +) +parser.add_argument( + "-t", + "--trail_dist", + type=float, + default=0.05, + help="OBI trailing stop distance to exit positions", +) +parser.add_argument("-e", "--levels", type=int, default=10, help="OBI order book levels to consider") +parser.add_argument( + "-n", + "--num_simulations", + type=int, + default=5, + help="Number of consecutive simulations in one episode.", +) args, remaining_args = parser.parse_known_args() if args.config_help: - parser.print_help() - sys.exit() + parser.print_help() + sys.exit() # If nothing specifically requested, use starting timestamp. In either case, successive # simulations will have simulation number appended. log_dir = args.log_dir -if log_dir is None: log_dir = str(int(pd.Timestamp('now').timestamp())) +if log_dir is None: + log_dir = str(int(pd.Timestamp("now").timestamp())) # Requested order book snapshot archive frequency. @@ -237,7 +277,8 @@ def get_rand_obj(seed_obj): # before) seed = args.seed -if not seed: seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) +if not seed: + seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) np.random.seed(seed) # Config parameter that causes util.util.print to suppress most output. @@ -259,13 +300,15 @@ def get_rand_obj(seed_obj): num_consecutive_simulations = args.num_simulations -print ("Silent mode: {}".format(util.silent_mode)) -print ("Logging orders: {}".format(log_orders)) -print ("Book freq: {}".format(book_freq)) -print ("OBI Freq: {}, OB Levels: {}, Entry Thresh: {}, Trail Dist: {}".format( - obi_freq, levels, entry_threshold, trail_dist)) -print ("Configuration seed: {}\n".format(seed)) - +print("Silent mode: {}".format(util.silent_mode)) +print("Logging orders: {}".format(log_orders)) +print("Book freq: {}".format(book_freq)) +print( + "OBI Freq: {}, OB Levels: {}, Entry Thresh: {}, Trail Dist: {}".format( + obi_freq, levels, entry_threshold, trail_dist + ) +) +print("Configuration seed: {}\n".format(seed)) ### STOCHASTIC CONTROL @@ -276,13 +319,13 @@ def get_rand_obj(seed_obj): ### seed for each simulation, but the entire experiment will still be deterministic ### given the same initial (global) seed. -kernel_seeds = np.random.RandomState(seed=np.random.randint(low=0,high=2**32)) +kernel_seeds = np.random.RandomState(seed=np.random.randint(low=0, high=2**32)) symbol_seeds = {} -for sym in symbols: symbol_seeds[sym] = np.random.RandomState(seed=np.random.randint(low=0,high=2**32)) - -agent_seeds = [ np.random.RandomState(seed=np.random.randint(low=0,high=2**32)) ] * num_agents +for sym in symbols: + symbol_seeds[sym] = np.random.RandomState(seed=np.random.randint(low=0, high=2**32)) +agent_seeds = [np.random.RandomState(seed=np.random.randint(low=0, high=2**32))] * num_agents ### LATENCY CONFIGURATION @@ -297,33 +340,32 @@ def get_rand_obj(seed_obj): # Other agents can be explicitly set afterward (and the mirror half of the matrix is also). # This configures all agents to a starting latency as described above. -latency = np.random.uniform(low = 21000, high = 13000000, size=(len(agent_types),len(agent_types))) +latency = np.random.uniform(low=21000, high=13000000, size=(len(agent_types), len(agent_types))) # Overriding the latency for certain agent pairs happens below, as does forcing mirroring # of the matrix to be symmetric. for i, t1 in zip(range(latency.shape[0]), agent_types): - for j, t2 in zip(range(latency.shape[1]), agent_types): - # Three cases for symmetric array. Set latency when j > i, copy it when i > j, same agent when i == j. - if j > i: - # Presently, strategy agents shouldn't be talking to each other, so we set them to extremely high latency. - if (t1 == "ZeroIntelligenceAgent" and t2 == "ZeroIntelligenceAgent"): - latency[i,j] = 1000000000 * 60 * 60 * 24 # Twenty-four hours. - elif i > j: - # This "bottom" half of the matrix simply mirrors the top. - latency[i,j] = latency[j,i] - else: - # This is the same agent. How long does it take to reach localhost? In our data center, it actually - # takes about 20 microseconds. - latency[i,j] = 20000 + for j, t2 in zip(range(latency.shape[1]), agent_types): + # Three cases for symmetric array. Set latency when j > i, copy it when i > j, same agent when i == j. + if j > i: + # Presently, strategy agents shouldn't be talking to each other, so we set them to extremely high latency. + if t1 == "ZeroIntelligenceAgent" and t2 == "ZeroIntelligenceAgent": + latency[i, j] = 1000000000 * 60 * 60 * 24 # Twenty-four hours. + elif i > j: + # This "bottom" half of the matrix simply mirrors the top. + latency[i, j] = latency[j, i] + else: + # This is the same agent. How long does it take to reach localhost? In our data center, it actually + # takes about 20 microseconds. + latency[i, j] = 20000 # OBI to Exchange and back. -latency[0,101] = 1 -latency[101,0] = 1 +latency[0, 101] = 1 +latency[101, 0] = 1 # Configure a simple latency noise model for the agents. # Index is ns extra delay, value is probability of this delay being applied. -noise = [ 0.25, 0.25, 0.20, 0.15, 0.10, 0.05 ] - +noise = [0.25, 0.25, 0.20, 0.15, 0.10, 0.05] ### FINAL GLOBAL CONFIGURATION FOR ALL SIMULATIONS @@ -335,111 +377,201 @@ def get_rand_obj(seed_obj): # There is no requirement these times be on the same date, although # none of the current agents handle markets closing and reopening. kernelStartTime = midnight -kernelStopTime = midnight + pd.to_timedelta('17:00:00') +kernelStopTime = midnight + pd.to_timedelta("17:00:00") # This will configure the kernel with a default computation delay # (time penalty) for each agent's wakeup and recvMsg. An agent # can change this at any time for itself. (nanoseconds) -defaultComputationDelay = 1000000000 # one second - +defaultComputationDelay = 1000000000 # one second ###### Per-simulation configuration section. This section initializes ###### ###### from scratch those objects and settings that should be reset withr ###### ###### each "run" of the simulation within an overall experiment. ###### -for sim in range(num_consecutive_simulations): # eventually make this a stopping criteria - - # Flush the agent population and start over for each simulation. - agents = [] - - # The random state of each symbol needs to be set for each simulation, so the - # stocks won't always do the same thing. Note that the entire experiment - # should still be fully repeatable with the same initial seed, because the - # list of random seeds for a symbol is fixed at the start, based on the initial - # seed. - for symbol in symbols: symbols[symbol]['random_state'] = get_rand_obj(symbol_seeds[symbol]) - - # Obtain a fresh simulation Kernel with the next appropriate random_state, seeded - # from the list obtained before the first simulation. - kernel = Kernel("Base Kernel", random_state = get_rand_obj(kernel_seeds)) - - # Configure an appropriate oracle for all traded stocks. - # All agents requiring the same type of Oracle will use the same oracle instance. - # The oracle does not require its own source of randomness, because each symbol - # and agent has those, and the oracle will always use on of those sources, as appropriate. - oracle = SparseMeanRevertingOracle(mkt_open, mkt_close, symbols) - - - # Create the agents in the same order they were specified in the first configuration - # section (outside the simulation loop). It is very important they be in the same - # order. - - agent_id = 0 - - # Create the exchange. - for i in range(num_exch): - agents.append( ExchangeAgent(agent_id, "{} {}".format(agent_types[agent_id], agent_id), - agent_strats[agent_id], mkt_open, mkt_close, - [s for s in symbols], log_orders = log_orders, - book_freq = book_freq, pipeline_delay = 0, - computation_delay = 0, stream_history = 10, - random_state = get_rand_obj(agent_seeds[agent_id])) ) - agent_id += 1 - - - # Configure some zero intelligence agents. - starting_cash = 10000000 # Cash in this simulator is always in CENTS. - symbol = 'IBM' - s = symbols[symbol] - - # ZI strategy split. Note that agent arrival rates are quite small, because our minimum - # time step is a nanosecond, and we want the agents to arrive more on the order of - # minutes. - for n, x in zip(zi, zi_strategy): - strat_name = agent_strats[agent_id] - while n > 0: - agents.append(ZeroIntelligenceAgent(agent_id, "ZI Agent {}".format(agent_id), strat_name, random_state = get_rand_obj(agent_seeds[agent_id]), log_orders=log_orders, symbol=symbol, starting_cash=starting_cash, sigma_n=zi_obs_noise, r_bar=s['r_bar'], kappa=s['agent_kappa'], sigma_s=s['fund_vol'], q_max=10, sigma_pv=5e6, R_min=x[0], R_max=x[1], eta=x[2], lambda_a=1e-12)) - agent_id += 1 - n -= 1 - - # Add value agents. - for i in range(num_val): - agents.extend([ ValueAgent(agent_id, "Value Agent {}".format(agent_id), "ValueAgent", symbol = symbol, random_state = get_rand_obj(agent_seeds[agent_id]), log_orders=log_orders, starting_cash=starting_cash, sigma_n=zi_obs_noise, r_bar=s['r_bar'], kappa=s['agent_kappa'], sigma_s=s['fund_vol'], lambda_a=1e-12) ]) - agent_id += 1 - - - # Add an OBI agent to try to beat this market. - for i in range(num_obi): - random_state = get_rand_obj(agent_seeds[agent_id]) - agents.extend([ OrderBookImbalanceAgent(agent_id, "OBI Agent {}".format(agent_id), "OrderBookImbalanceAgent", symbol = symbol, starting_cash = starting_cash, levels = levels, entry_threshold = entry_threshold, trail_dist = trail_dist, freq = obi_freq, random_state = random_state) ]) - agent_id += 1 - - # Add market maker agents. - for i in range(num_mm): - random_state = get_rand_obj(agent_seeds[agent_id]) - agents.extend([ MarketMakerAgent(agent_id, "Market Maker Agent {}".format(agent_id), "MarketMakerAgent", symbol=symbol, starting_cash=starting_cash, min_size=500, max_size=1000, subscribe=True, log_orders=False, random_state = random_state) ]) - agent_id += 1 - - # Add momentum agents. - for i in range(num_mom): - random_state = get_rand_obj(agent_seeds[agent_id]) - agents.extend([ MomentumAgent(agent_id, "Momentum Agent {}".format(agent_id), "MomentumAgent", symbol=symbol, starting_cash=starting_cash, min_size=1, max_size=10, subscribe=True, log_orders=False, random_state = random_state) ]) - agent_id += 1 - - - # Start the kernel running. This call will not return until the - # simulation is complete. (Eventually this should be made - # parallel for learning.) - agent_saved_states = kernel.runner( - agents = agents, startTime = kernelStartTime, - stopTime = kernelStopTime, agentLatency = latency, - latencyNoise = noise, - defaultComputationDelay = defaultComputationDelay, - oracle = oracle, log_dir = "{}_{}".format(log_dir,sim)) - - obi_perf.append(agent_saved_states['agent_state'][0]) - - print ("\n====== Experimental wallclock elapsed: {} ======\n".format( - pd.Timestamp('now') - wallclock_start)) - +for sim in range(num_consecutive_simulations): # eventually make this a stopping criteria + + # Flush the agent population and start over for each simulation. + agents = [] + + # The random state of each symbol needs to be set for each simulation, so the + # stocks won't always do the same thing. Note that the entire experiment + # should still be fully repeatable with the same initial seed, because the + # list of random seeds for a symbol is fixed at the start, based on the initial + # seed. + for symbol in symbols: + symbols[symbol]["random_state"] = get_rand_obj(symbol_seeds[symbol]) + + # Obtain a fresh simulation Kernel with the next appropriate random_state, seeded + # from the list obtained before the first simulation. + kernel = Kernel("Base Kernel", random_state=get_rand_obj(kernel_seeds)) + + # Configure an appropriate oracle for all traded stocks. + # All agents requiring the same type of Oracle will use the same oracle instance. + # The oracle does not require its own source of randomness, because each symbol + # and agent has those, and the oracle will always use on of those sources, as appropriate. + oracle = SparseMeanRevertingOracle(mkt_open, mkt_close, symbols) + + # Create the agents in the same order they were specified in the first configuration + # section (outside the simulation loop). It is very important they be in the same + # order. + + agent_id = 0 + + # Create the exchange. + for i in range(num_exch): + agents.append( + ExchangeAgent( + agent_id, + "{} {}".format(agent_types[agent_id], agent_id), + agent_strats[agent_id], + mkt_open, + mkt_close, + [s for s in symbols], + log_orders=log_orders, + book_freq=book_freq, + pipeline_delay=0, + computation_delay=0, + stream_history=10, + random_state=get_rand_obj(agent_seeds[agent_id]), + ) + ) + agent_id += 1 + + # Configure some zero intelligence agents. + starting_cash = 10000000 # Cash in this simulator is always in CENTS. + symbol = "IBM" + s = symbols[symbol] + + # ZI strategy split. Note that agent arrival rates are quite small, because our minimum + # time step is a nanosecond, and we want the agents to arrive more on the order of + # minutes. + for n, x in zip(zi, zi_strategy): + strat_name = agent_strats[agent_id] + while n > 0: + agents.append( + ZeroIntelligenceAgent( + agent_id, + "ZI Agent {}".format(agent_id), + strat_name, + random_state=get_rand_obj(agent_seeds[agent_id]), + log_orders=log_orders, + symbol=symbol, + starting_cash=starting_cash, + sigma_n=zi_obs_noise, + r_bar=s["r_bar"], + kappa=s["agent_kappa"], + sigma_s=s["fund_vol"], + q_max=10, + sigma_pv=5e6, + R_min=x[0], + R_max=x[1], + eta=x[2], + lambda_a=1e-12, + ) + ) + agent_id += 1 + n -= 1 + + # Add value agents. + for i in range(num_val): + agents.extend( + [ + ValueAgent( + agent_id, + "Value Agent {}".format(agent_id), + "ValueAgent", + symbol=symbol, + random_state=get_rand_obj(agent_seeds[agent_id]), + log_orders=log_orders, + starting_cash=starting_cash, + sigma_n=zi_obs_noise, + r_bar=s["r_bar"], + kappa=s["agent_kappa"], + sigma_s=s["fund_vol"], + lambda_a=1e-12, + ) + ] + ) + agent_id += 1 + + # Add an OBI agent to try to beat this market. + for i in range(num_obi): + random_state = get_rand_obj(agent_seeds[agent_id]) + agents.extend( + [ + OrderBookImbalanceAgent( + agent_id, + "OBI Agent {}".format(agent_id), + "OrderBookImbalanceAgent", + symbol=symbol, + starting_cash=starting_cash, + levels=levels, + entry_threshold=entry_threshold, + trail_dist=trail_dist, + freq=obi_freq, + random_state=random_state, + ) + ] + ) + agent_id += 1 + + # Add market maker agents. + for i in range(num_mm): + random_state = get_rand_obj(agent_seeds[agent_id]) + agents.extend( + [ + MarketMakerAgent( + agent_id, + "Market Maker Agent {}".format(agent_id), + "MarketMakerAgent", + symbol=symbol, + starting_cash=starting_cash, + min_size=500, + max_size=1000, + subscribe=True, + log_orders=False, + random_state=random_state, + ) + ] + ) + agent_id += 1 + + # Add momentum agents. + for i in range(num_mom): + random_state = get_rand_obj(agent_seeds[agent_id]) + agents.extend( + [ + MomentumAgent( + agent_id, + "Momentum Agent {}".format(agent_id), + "MomentumAgent", + symbol=symbol, + starting_cash=starting_cash, + min_size=1, + max_size=10, + subscribe=True, + log_orders=False, + random_state=random_state, + ) + ] + ) + agent_id += 1 + + # Start the kernel running. This call will not return until the + # simulation is complete. (Eventually this should be made + # parallel for learning.) + agent_saved_states = kernel.runner( + agents=agents, + startTime=kernelStartTime, + stopTime=kernelStopTime, + agentLatency=latency, + latencyNoise=noise, + defaultComputationDelay=defaultComputationDelay, + oracle=oracle, + log_dir="{}_{}".format(log_dir, sim), + ) + + obi_perf.append(agent_saved_states["agent_state"][0]) + + print("\n====== Experimental wallclock elapsed: {} ======\n".format(pd.Timestamp("now") - wallclock_start)) diff --git a/config/marketreplay.py b/config/marketreplay.py index 33fac18f8..5d46a1aa6 100644 --- a/config/marketreplay.py +++ b/config/marketreplay.py @@ -1,48 +1,37 @@ import argparse +import datetime as dt +import sys + import numpy as np import pandas as pd -import sys -import datetime as dt +from agent.examples.MarketReplayAgent import MarketReplayAgent +from agent.ExchangeAgent import ExchangeAgent from Kernel import Kernel from util import util from util.order import LimitOrder -from agent.ExchangeAgent import ExchangeAgent -from agent.examples.MarketReplayAgent import MarketReplayAgent ######################################################################################################################## ############################################### GENERAL CONFIG ######################################################### -parser = argparse.ArgumentParser(description='Detailed options for market replay config.') - -parser.add_argument('-c', - '--config', - required=True, - help='Name of config file to execute') -parser.add_argument('-t', - '--ticker', - required=True, - help='Name of the stock/symbol') -parser.add_argument('-d', - '--date', - required=True, - help='Historical date') -parser.add_argument('-l', - '--log_dir', - default=None, - help='Log directory name (default: unix timestamp at program start)') -parser.add_argument('-s', - '--seed', - type=int, - default=None, - help='numpy.random.seed() for simulation') -parser.add_argument('-v', - '--verbose', - action='store_true', - help='Maximum verbosity!') -parser.add_argument('--config_help', - action='store_true', - help='Print argument options for this config file') +parser = argparse.ArgumentParser(description="Detailed options for market replay config.") + +parser.add_argument("-c", "--config", required=True, help="Name of config file to execute") +parser.add_argument("-t", "--ticker", required=True, help="Name of the stock/symbol") +parser.add_argument("-d", "--date", required=True, help="Historical date") +parser.add_argument( + "-l", + "--log_dir", + default=None, + help="Log directory name (default: unix timestamp at program start)", +) +parser.add_argument("-s", "--seed", type=int, default=None, help="numpy.random.seed() for simulation") +parser.add_argument("-v", "--verbose", action="store_true", help="Maximum verbosity!") +parser.add_argument( + "--config_help", + action="store_true", + help="Print argument options for this config file", +) args, remaining_args = parser.parse_known_args() @@ -52,7 +41,8 @@ log_dir = args.log_dir # Requested log directory. seed = args.seed # Random seed specification on the command line. -if not seed: seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2 ** 32 - 1) +if not seed: + seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) np.random.seed(seed) util.silent_mode = not args.verbose @@ -75,70 +65,84 @@ agent_count, agents, agent_types = 0, [], [] # 1) Exchange Agent -mkt_open = historical_date_pd + pd.to_timedelta('09:00:00') -mkt_close = historical_date_pd + pd.to_timedelta('16:00:00') +mkt_open = historical_date_pd + pd.to_timedelta("09:00:00") +mkt_close = historical_date_pd + pd.to_timedelta("16:00:00") print("Market Open : {}".format(mkt_open)) print("Market Close: {}".format(mkt_close)) -agents.extend([ExchangeAgent(id=0, - name="EXCHANGE_AGENT", - type="ExchangeAgent", - mkt_open=mkt_open, - mkt_close=mkt_close, - symbols=[symbol], - log_orders=True, - pipeline_delay=0, - computation_delay=0, - stream_history=10, - book_freq='all', - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64')))]) +agents.extend( + [ + ExchangeAgent( + id=0, + name="EXCHANGE_AGENT", + type="ExchangeAgent", + mkt_open=mkt_open, + mkt_close=mkt_close, + symbols=[symbol], + log_orders=True, + pipeline_delay=0, + computation_delay=0, + stream_history=10, + book_freq="all", + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + ] +) agent_types.extend("ExchangeAgent") agent_count += 1 # 2) Market Replay Agent -file_name = f'DOW30/{symbol}/{symbol}.{historical_date}' -orders_file_path = f'/efs/data/{file_name}' - -agents.extend([MarketReplayAgent(id=1, - name="MARKET_REPLAY_AGENT", - type='MarketReplayAgent', - symbol=symbol, - log_orders=False, - date=historical_date_pd, - start_time=mkt_open, - end_time=mkt_close, - orders_file_path=orders_file_path, - processed_orders_folder_path='/efs/data/marketreplay/', - starting_cash=0, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64')))]) +file_name = f"DOW30/{symbol}/{symbol}.{historical_date}" +orders_file_path = f"/efs/data/{file_name}" + +agents.extend( + [ + MarketReplayAgent( + id=1, + name="MARKET_REPLAY_AGENT", + type="MarketReplayAgent", + symbol=symbol, + log_orders=False, + date=historical_date_pd, + start_time=mkt_open, + end_time=mkt_close, + orders_file_path=orders_file_path, + processed_orders_folder_path="/efs/data/marketreplay/", + starting_cash=0, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + ] +) agent_types.extend("MarketReplayAgent") agent_count += 1 ######################################################################################################################## ########################################### KERNEL AND OTHER CONFIG #################################################### -kernel = Kernel("Market Replay Kernel", random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))) +kernel = Kernel( + "Market Replay Kernel", + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), +) kernelStartTime = historical_date_pd -kernelStopTime = historical_date_pd + pd.to_timedelta('17:00:00') +kernelStopTime = historical_date_pd + pd.to_timedelta("17:00:00") defaultComputationDelay = 0 latency = np.zeros((agent_count, agent_count)) noise = [0.0] -kernel.runner(agents=agents, - startTime=kernelStartTime, - stopTime=kernelStopTime, - agentLatency=latency, - latencyNoise=noise, - defaultComputationDelay=defaultComputationDelay, - defaultLatency=0, - oracle=None, - log_dir=args.log_dir) +kernel.runner( + agents=agents, + startTime=kernelStartTime, + stopTime=kernelStopTime, + agentLatency=latency, + latencyNoise=noise, + defaultComputationDelay=defaultComputationDelay, + defaultLatency=0, + oracle=None, + log_dir=args.log_dir, +) simulation_end_time = dt.datetime.now() print("Simulation End Time: {}".format(simulation_end_time)) diff --git a/config/obi_rmsc02.py b/config/obi_rmsc02.py index d93d1b4af..21e985fb4 100755 --- a/config/obi_rmsc02.py +++ b/config/obi_rmsc02.py @@ -8,55 +8,46 @@ # - 5 Momentum Agent import argparse -import numpy as np -import pandas as pd -import sys import datetime as dt import importlib +import sys -from Kernel import Kernel -from util import util -from util.order import LimitOrder -from util.oracle.SparseMeanRevertingOracle import SparseMeanRevertingOracle +import numpy as np +import pandas as pd -from agent.ExchangeAgent import ExchangeAgent from agent.examples.MarketMakerAgent import MarketMakerAgent from agent.examples.MomentumAgent import MomentumAgent from agent.examples.SubscriptionAgent import SubscriptionAgent - -from agent.ZeroIntelligenceAgent import ZeroIntelligenceAgent +from agent.ExchangeAgent import ExchangeAgent from agent.OrderBookImbalanceAgent import OrderBookImbalanceAgent +from agent.ZeroIntelligenceAgent import ZeroIntelligenceAgent +from Kernel import Kernel +from util import util +from util.oracle.SparseMeanRevertingOracle import SparseMeanRevertingOracle +from util.order import LimitOrder ######################################################################################################################## ############################################### GENERAL CONFIG ######################################################### parser = argparse.ArgumentParser( - description='Detailed options for RMSC-2 (Reference Market Simulation Configuration) config.') - -parser.add_argument('-c', - '--config', - required=True, - help='Name of config file to execute') -parser.add_argument('-l', - '--log_dir', - default=None, - help='Log directory name (default: unix timestamp at program start)') -parser.add_argument('-s', - '--seed', - type=int, - default=None, - help='numpy.random.seed() for simulation') -parser.add_argument('-v', - '--verbose', - action='store_true', - help='Maximum verbosity!') -parser.add_argument('--config_help', - action='store_true', - help='Print argument options for this config file') -parser.add_argument('-a', - '--agent_name', - default=None, - help='Specify the agent to test with') + description="Detailed options for RMSC-2 (Reference Market Simulation Configuration) config." +) + +parser.add_argument("-c", "--config", required=True, help="Name of config file to execute") +parser.add_argument( + "-l", + "--log_dir", + default=None, + help="Log directory name (default: unix timestamp at program start)", +) +parser.add_argument("-s", "--seed", type=int, default=None, help="numpy.random.seed() for simulation") +parser.add_argument("-v", "--verbose", action="store_true", help="Maximum verbosity!") +parser.add_argument( + "--config_help", + action="store_true", + help="Print argument options for this config file", +) +parser.add_argument("-a", "--agent_name", default=None, help="Specify the agent to test with") args, remaining_args = parser.parse_known_args() @@ -66,7 +57,8 @@ log_dir = args.log_dir # Requested log directory. seed = args.seed # Random seed specification on the command line. -if not seed: seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2 ** 32 - 1) +if not seed: + seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) np.random.seed(seed) util.silent_mode = not args.verbose @@ -79,114 +71,142 @@ ############################################### AGENTS CONFIG ########################################################## # Historical date to simulate. -historical_date = pd.to_datetime('2019-06-28') -symbol = 'JPM' +historical_date = pd.to_datetime("2019-06-28") +symbol = "JPM" agent_count, agents, agent_types = 0, [], [] starting_cash = 10000000 # Cash in this simulator is always in CENTS. # 1) 1 Exchange Agent -mkt_open = historical_date + pd.to_timedelta('09:30:00') -mkt_close = historical_date + pd.to_timedelta('16:00:00') - -agents.extend([ExchangeAgent(id=0, - name="EXCHANGE_AGENT", - type="ExchangeAgent", - mkt_open=mkt_open, - mkt_close=mkt_close, - symbols=[symbol], - log_orders=False, - pipeline_delay=0, - computation_delay=0, - stream_history=10, - book_freq='all', - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64')))]) +mkt_open = historical_date + pd.to_timedelta("09:30:00") +mkt_close = historical_date + pd.to_timedelta("16:00:00") + +agents.extend( + [ + ExchangeAgent( + id=0, + name="EXCHANGE_AGENT", + type="ExchangeAgent", + mkt_open=mkt_open, + mkt_close=mkt_close, + symbols=[symbol], + log_orders=False, + pipeline_delay=0, + computation_delay=0, + stream_history=10, + book_freq="all", + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + ] +) agent_types.extend("ExchangeAgent") agent_count += 1 # 2) 1 Market Maker Agent num_mm_agents = 1 -agents.extend([MarketMakerAgent(id=j, - name="MARKET_MAKER_AGENT_{}".format(j), - type='MarketMakerAgent', - symbol=symbol, - starting_cash=starting_cash, - min_size=500, - max_size=1000, - subscribe=True, - log_orders=False, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))) - for j in range(agent_count, agent_count + num_mm_agents)]) - -agent_types.extend('MarketMakerAgent') +agents.extend( + [ + MarketMakerAgent( + id=j, + name="MARKET_MAKER_AGENT_{}".format(j), + type="MarketMakerAgent", + symbol=symbol, + starting_cash=starting_cash, + min_size=500, + max_size=1000, + subscribe=True, + log_orders=False, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_mm_agents) + ] +) + +agent_types.extend("MarketMakerAgent") agent_count += num_mm_agents # 3) 50 Zero Intelligence Agents -symbols = {symbol: {'r_bar': 1e5, - 'kappa': 1.67e-12, - 'agent_kappa': 1.67e-15, - 'sigma_s': 0, - 'fund_vol': 1e-8, - 'megashock_lambda_a': 2.77778e-13, - 'megashock_mean': 1e3, - 'megashock_var': 5e4, - 'random_state': np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))}} +symbols = { + symbol: { + "r_bar": 1e5, + "kappa": 1.67e-12, + "agent_kappa": 1.67e-15, + "sigma_s": 0, + "fund_vol": 1e-8, + "megashock_lambda_a": 2.77778e-13, + "megashock_mean": 1e3, + "megashock_var": 5e4, + "random_state": np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + } +} oracle = SparseMeanRevertingOracle(mkt_open, mkt_close, symbols) num_zi_agents = 89 -agents.extend([ZeroIntelligenceAgent(id=j, - name="ZI_AGENT_{}".format(j), - type="ZeroIntelligenceAgent", - symbol=symbol, - starting_cash=starting_cash, - sigma_n=10000, - sigma_s=symbols[symbol]['fund_vol'], - kappa=symbols[symbol]['agent_kappa'], - r_bar=symbols[symbol]['r_bar'], - q_max=10, - sigma_pv=5e4, - R_min=0, - R_max=100, - eta=1, - lambda_a=1e-12, - log_orders=False, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))) - for j in range(agent_count, agent_count + num_zi_agents)]) +agents.extend( + [ + ZeroIntelligenceAgent( + id=j, + name="ZI_AGENT_{}".format(j), + type="ZeroIntelligenceAgent", + symbol=symbol, + starting_cash=starting_cash, + sigma_n=10000, + sigma_s=symbols[symbol]["fund_vol"], + kappa=symbols[symbol]["agent_kappa"], + r_bar=symbols[symbol]["r_bar"], + q_max=10, + sigma_pv=5e4, + R_min=0, + R_max=100, + eta=1, + lambda_a=1e-12, + log_orders=False, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_zi_agents) + ] +) agent_types.extend("ZeroIntelligenceAgent") agent_count += num_zi_agents # 4) 5 Order Book Imbalance agents num_obi_agents = 5 -agents.extend([OrderBookImbalanceAgent(id=j, - name="OBI_AGENT_{}".format(j), - type="OrderBookImbalanceAgent", - symbol=symbol, - starting_cash=starting_cash, - log_orders=False, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))) - for j in range(agent_count, agent_count + num_obi_agents)]) +agents.extend( + [ + OrderBookImbalanceAgent( + id=j, + name="OBI_AGENT_{}".format(j), + type="OrderBookImbalanceAgent", + symbol=symbol, + starting_cash=starting_cash, + log_orders=False, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_obi_agents) + ] +) agent_types.extend("OrderBookImbalanceAgent") agent_count += num_obi_agents # 5) 5 Momentum Agents: num_momentum_agents = 5 -agents.extend([MomentumAgent(id=j, - name="MOMENTUM_AGENT_{}".format(j), - type="MomentumAgent", - symbol=symbol, - starting_cash=starting_cash, - min_size=1, - max_size=10, - subscribe=True, - log_orders=False, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))) - for j in range(agent_count, agent_count + num_momentum_agents)]) +agents.extend( + [ + MomentumAgent( + id=j, + name="MOMENTUM_AGENT_{}".format(j), + type="MomentumAgent", + symbol=symbol, + starting_cash=starting_cash, + min_size=1, + max_size=10, + subscribe=True, + log_orders=False, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_momentum_agents) + ] +) agent_types.extend("MomentumAgent") agent_count += num_momentum_agents @@ -206,48 +226,57 @@ agent_count += 1 """ # 7) User defined agent -# Load the agent to evaluate against the market +# Load the agent to evaluate against the market if args.agent_name: - mod_name = args.agent_name.rsplit('.', 1)[0] - class_name = args.agent_name.split('.')[-1] + mod_name = args.agent_name.rsplit(".", 1)[0] + class_name = args.agent_name.split(".")[-1] m = importlib.import_module(args.agent_name, package=None) testagent = getattr(m, class_name) - agents.extend([testagent(id=agent_count, - name=args.agent_name, - type="AgentUnderTest", - symbol=symbol, - starting_cash=starting_cash, - min_size=1, - max_size=10, - log_orders=False, - random_state=np.random.RandomState( - seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64')))]) + agents.extend( + [ + testagent( + id=agent_count, + name=args.agent_name, + type="AgentUnderTest", + symbol=symbol, + starting_cash=starting_cash, + min_size=1, + max_size=10, + log_orders=False, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + ] + ) agent_count += 1 - agent_types.extend('AgentUnderTest') + agent_types.extend("AgentUnderTest") ######################################################################################################################## ########################################### KERNEL AND OTHER CONFIG #################################################### -kernel = Kernel("Market Replay Kernel", random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))) +kernel = Kernel( + "Market Replay Kernel", + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), +) kernelStartTime = historical_date -kernelStopTime = historical_date + pd.to_timedelta('17:00:00') +kernelStopTime = historical_date + pd.to_timedelta("17:00:00") defaultComputationDelay = 0 -latency = np.random.uniform(low = 21000, high = 13000000, size=(agent_count, agent_count)) -noise = [ 0.25, 0.25, 0.20, 0.15, 0.10, 0.05 ] - -kernel.runner(agents=agents, - startTime=kernelStartTime, - stopTime=kernelStopTime, - agentLatency=latency, - latencyNoise=noise, - defaultComputationDelay=defaultComputationDelay, - defaultLatency=0, - oracle=oracle, - log_dir=args.log_dir) +latency = np.random.uniform(low=21000, high=13000000, size=(agent_count, agent_count)) +noise = [0.25, 0.25, 0.20, 0.15, 0.10, 0.05] + +kernel.runner( + agents=agents, + startTime=kernelStartTime, + stopTime=kernelStopTime, + agentLatency=latency, + latencyNoise=noise, + defaultComputationDelay=defaultComputationDelay, + defaultLatency=0, + oracle=oracle, + log_dir=args.log_dir, +) simulation_end_time = dt.datetime.now() print("Simulation End Time: {}".format(simulation_end_time)) diff --git a/config/parallel.py b/config/parallel.py index 9989ddb94..36a0b62fd 100644 --- a/config/parallel.py +++ b/config/parallel.py @@ -1,18 +1,21 @@ import argparse +import datetime as dt import os from multiprocessing import Pool -import psutil -import datetime as dt + import numpy as np +import psutil def run_in_parallel(num_simulations, num_parallel, config, log_folder, verbose): - global_seeds = np.random.randint(0, 2 ** 32, num_simulations) - print(f'Global Seeds: {global_seeds}') + global_seeds = np.random.randint(0, 2**32, num_simulations) + print(f"Global Seeds: {global_seeds}") - processes = [f'python -u abides.py -c {config} -l {log_folder}_seed_{seed} {"-v" if verbose else ""} -s {seed}' - for seed in global_seeds] + processes = [ + f'python -u abides.py -c {config} -l {log_folder}_seed_{seed} {"-v" if verbose else ""} -s {seed}' + for seed in global_seeds + ] pool = Pool(processes=num_parallel) pool.map(run_process, processes) @@ -25,40 +28,51 @@ def run_process(process): if __name__ == "__main__": start_time = dt.datetime.now() - parser = argparse.ArgumentParser(description='Main config to run multiple ABIDES simulations in parallel') - parser.add_argument('--seed', type=int, default=None, - help='Seed controlling the generated global seeds') - parser.add_argument('--num_simulations', type=int, default=1, - help='Total number of simulations to run') - parser.add_argument('--num_parallel', type=int, default=None, - help='Number of simulations to run in parallel') - parser.add_argument('--config', required=True, - help='Name of config file to execute') - parser.add_argument('--log_folder', required=True, - help='Log directory name') - parser.add_argument('-v', '--verbose', action='store_true', - help='Maximum verbosity!') + parser = argparse.ArgumentParser(description="Main config to run multiple ABIDES simulations in parallel") + parser.add_argument( + "--seed", + type=int, + default=None, + help="Seed controlling the generated global seeds", + ) + parser.add_argument( + "--num_simulations", + type=int, + default=1, + help="Total number of simulations to run", + ) + parser.add_argument( + "--num_parallel", + type=int, + default=None, + help="Number of simulations to run in parallel", + ) + parser.add_argument("--config", required=True, help="Name of config file to execute") + parser.add_argument("--log_folder", required=True, help="Log directory name") + parser.add_argument("-v", "--verbose", action="store_true", help="Maximum verbosity!") args, remaining_args = parser.parse_known_args() seed = args.seed num_simulations = args.num_simulations - num_parallel = args.num_parallel if args.num_parallel else psutil.cpu_count() # count of the CPUs on the machine + num_parallel = args.num_parallel if args.num_parallel else psutil.cpu_count() # count of the CPUs on the machine config = args.config log_folder = args.log_folder verbose = args.verbose - print(f'Total number of simulation: {num_simulations}') - print(f'Number of simulations to run in parallel: {num_parallel}') - print(f'Configuration: {config}') + print(f"Total number of simulation: {num_simulations}") + print(f"Number of simulations to run in parallel: {num_parallel}") + print(f"Configuration: {config}") np.random.seed(seed) - run_in_parallel(num_simulations=num_simulations, - num_parallel=num_parallel, - config=config, - log_folder=log_folder, - verbose=verbose) + run_in_parallel( + num_simulations=num_simulations, + num_parallel=num_parallel, + config=config, + log_folder=log_folder, + verbose=verbose, + ) end_time = dt.datetime.now() - print(f'Total time taken to run in parallel: {end_time - start_time}') \ No newline at end of file + print(f"Total time taken to run in parallel: {end_time - start_time}") diff --git a/config/ppfl_icaif20.py b/config/ppfl_icaif20.py index 820fa3a27..7a1da3951 100755 --- a/config/ppfl_icaif20.py +++ b/config/ppfl_icaif20.py @@ -1,66 +1,106 @@ # Our custom modules. -from Kernel import Kernel -from agent.examples.crypto.PPFL_ClientAgent import PPFL_ClientAgent -from agent.examples.crypto.PPFL_ServiceAgent import PPFL_ServiceAgent -from model.LatencyModel import LatencyModel -from util import util -from util.crypto import logReg +# Some config files require additional command line parameters to easily +# control agent or simulation hyperparameters during coarse parallelization. +import argparse # Standard modules. from datetime import timedelta from math import floor -import numpy as np from os.path import exists -import pandas as pd -from sklearn.model_selection import train_test_split from sys import exit from time import time +import numpy as np +import pandas as pd +from sklearn.model_selection import train_test_split -# Some config files require additional command line parameters to easily -# control agent or simulation hyperparameters during coarse parallelization. -import argparse +from agent.examples.crypto.PPFL_ClientAgent import PPFL_ClientAgent +from agent.examples.crypto.PPFL_ServiceAgent import PPFL_ServiceAgent +from Kernel import Kernel +from model.LatencyModel import LatencyModel +from util import util +from util.crypto import logReg -parser = argparse.ArgumentParser(description='Detailed options for PPFL config.') -parser.add_argument('-a', '--clear_learning', action='store_true', - help='Learning in the clear (vs SMP protocol)') -parser.add_argument('-c', '--config', required=True, - help='Name of config file to execute') -parser.add_argument('-e', '--epsilon', type=float, default=1.0, - help='Privacy loss epsilon') -parser.add_argument('-g', '--num_subgraphs', type=int, default=1, - help='Number of connected subgraphs into which to place client agents') -parser.add_argument('-i', '--num_iterations', type=int, default=5, - help='Number of iterations for the secure multiparty protocol)') -parser.add_argument('-k', '--skip_log', action='store_true', - help='Skip writing agent logs to disk') -parser.add_argument('-l', '--log_dir', default=None, - help='Log directory name (default: unix timestamp at program start)') -parser.add_argument('-m', '--max_logreg_iterations', type=int, default=50, - help='Number of iterations for client local LogReg'), -parser.add_argument('-n', '--num_clients', type=int, default=5, - help='Number of clients for the secure multiparty protocol)') -parser.add_argument('-o', '--collusion', action='store_true', - help='Compute collusion analysis (big and slow!)') -parser.add_argument('-p', '--split_size', type=int, default=20, - help='Local training size per client per iteration') -parser.add_argument('-r', '--learning_rate', type=float, default=10.0, - help='Local learning rate for training on client data') -parser.add_argument('-s', '--seed', type=int, default=None, - help='numpy.random.seed() for simulation') -parser.add_argument('-v', '--verbose', action='store_true', - help='Maximum verbosity!') -parser.add_argument('--config_help', action='store_true', - help='Print argument options for this config file') +parser = argparse.ArgumentParser(description="Detailed options for PPFL config.") +parser.add_argument( + "-a", + "--clear_learning", + action="store_true", + help="Learning in the clear (vs SMP protocol)", +) +parser.add_argument("-c", "--config", required=True, help="Name of config file to execute") +parser.add_argument("-e", "--epsilon", type=float, default=1.0, help="Privacy loss epsilon") +parser.add_argument( + "-g", + "--num_subgraphs", + type=int, + default=1, + help="Number of connected subgraphs into which to place client agents", +) +parser.add_argument( + "-i", + "--num_iterations", + type=int, + default=5, + help="Number of iterations for the secure multiparty protocol)", +) +parser.add_argument("-k", "--skip_log", action="store_true", help="Skip writing agent logs to disk") +parser.add_argument( + "-l", + "--log_dir", + default=None, + help="Log directory name (default: unix timestamp at program start)", +) +parser.add_argument( + "-m", + "--max_logreg_iterations", + type=int, + default=50, + help="Number of iterations for client local LogReg", +), +parser.add_argument( + "-n", + "--num_clients", + type=int, + default=5, + help="Number of clients for the secure multiparty protocol)", +) +parser.add_argument( + "-o", + "--collusion", + action="store_true", + help="Compute collusion analysis (big and slow!)", +) +parser.add_argument( + "-p", + "--split_size", + type=int, + default=20, + help="Local training size per client per iteration", +) +parser.add_argument( + "-r", + "--learning_rate", + type=float, + default=10.0, + help="Local learning rate for training on client data", +) +parser.add_argument("-s", "--seed", type=int, default=None, help="numpy.random.seed() for simulation") +parser.add_argument("-v", "--verbose", action="store_true", help="Maximum verbosity!") +parser.add_argument( + "--config_help", + action="store_true", + help="Print argument options for this config file", +) args, remaining_args = parser.parse_known_args() if args.config_help: - parser.print_help() - exit() + parser.print_help() + exit() # Historical date to simulate. Required even if not relevant. -historical_date = pd.to_datetime('2014-01-28') +historical_date = pd.to_datetime("2014-01-28") # Requested log directory. log_dir = args.log_dir @@ -80,7 +120,8 @@ # before) seed = args.seed -if not seed: seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) +if not seed: + seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) np.random.seed(seed) # Config parameter that causes util.util.print to suppress most output. @@ -98,9 +139,8 @@ ### How many client agents will there be? 1000 in 125 subgraphs of 8 fits ln(n), for example num_subgraphs = args.num_subgraphs -print ("Silent mode: {}".format(util.silent_mode)) -print ("Configuration seed: {}\n".format(seed)) - +print("Silent mode: {}".format(util.silent_mode)) +print("Configuration seed: {}\n".format(seed)) # Since the simulator often pulls historical data, we use a real-world @@ -115,13 +155,13 @@ kernelStartTime = midnight # When should the Kernel shut down? -kernelStopTime = midnight + pd.to_timedelta('17:00:00') +kernelStopTime = midnight + pd.to_timedelta("17:00:00") # This will configure the kernel with a default computation delay # (time penalty) for each agent's wakeup and recvMsg. An agent # can change this at any time for itself. (nanoseconds) -defaultComputationDelay = 1000000000 * 5 # five seconds +defaultComputationDelay = 1000000000 * 5 # five seconds # IMPORTANT NOTE CONCERNING AGENT IDS: the id passed to each agent must: # 1. be unique @@ -131,13 +171,16 @@ ### Configure the Kernel. -kernel = Kernel("Base Kernel", random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32))) +kernel = Kernel( + "Base Kernel", + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), +) ### Obtain random state for whatever latency model will be used. -latency_rstate = np.random.RandomState(seed=np.random.randint(low=0,high=2**32)) +latency_rstate = np.random.RandomState(seed=np.random.randint(low=0, high=2**32)) ### Obtain a seed for the train-test split shuffling. -shuffle_seed = np.random.randint(low=0,high=2**32) +shuffle_seed = np.random.randint(low=0, high=2**32) ### Configure the agents. When conducting "agent of change" experiments, the ### new agents should be added at the END only. @@ -162,27 +205,37 @@ # Note that time and amount columns are not preprocessed, which might affect # how we would like to approach them. We exclude Time (time since first record) # since we are not using a time-sensitive method. -nd1 = np.loadtxt('util/crypto/datasets/creditcard/creditcard.csv', delimiter=',', skiprows=1) -X_data = nd1[:,1:-1] -y_data = nd1[:,-1] +nd1 = np.loadtxt("util/crypto/datasets/creditcard/creditcard.csv", delimiter=",", skiprows=1) +X_data = nd1[:, 1:-1] +y_data = nd1[:, -1] # We add a feature zero, always with value 1, to allow the intercept to be just # another weight for purposes of vector math. X_data = np.insert(X_data, 0, 1.0, axis=1) -print (X_data.shape,y_data.shape) -print (np.unique(y_data)) +print(X_data.shape, y_data.shape) +print(np.unique(y_data)) # Randomly shuffle and split the data for training and testing. -X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.25, random_state = shuffle_seed) +X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.25, random_state=shuffle_seed) ### Configure a service agent. -agents.extend([ PPFL_ServiceAgent(0, "PPFL Service Agent 0", "PPFL_ServiceAgent", - random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32)), - msg_fwd_delay=0, iterations = num_iterations, num_clients = num_clients) ]) +agents.extend( + [ + PPFL_ServiceAgent( + 0, + "PPFL Service Agent 0", + "PPFL_ServiceAgent", + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), + msg_fwd_delay=0, + iterations=num_iterations, + num_clients=num_clients, + ) + ] +) agent_types.extend(["PPFL_ServiceAgent"]) agent_count += 1 @@ -193,89 +246,115 @@ client_init_start = time() # Iterate over all client IDs. -for i in range (a, b): - - # Determine subgraph. - subgraph = int(floor((i - a) / subgraph_size)) - - #print ("Neighborhood for agent {} is {}".format(i, subgraph)) - - # Determine agents in subgraph. - subgraph_start = a + (subgraph * subgraph_size) - subgraph_end = a + ((subgraph + 1) * subgraph_size) - - neighbors = range(subgraph_start, subgraph_end) - - #print ("Peers for {} are {}".format(i, [x for x in neighbors if x != i])) - - # Peer list is all agents in subgraph except self. - agents.append(PPFL_ClientAgent(i, "PPFL Client Agent {}".format(i), "PPFL_ClientAgent", - peer_list = [ x for x in neighbors if x != i ], iterations = num_iterations, - max_logreg_iterations = max_logreg_iterations, epsilon = epsilon, learning_rate = learning_rate, - clear_learning = clear_learning, num_clients = num_clients, num_subgraphs = num_subgraphs, - multiplier = accy_multiplier, X_train = X_train, y_train = y_train, X_test = X_test, y_test = y_test, - split_size = split_size, secret_scale = secret_scale, collusion = collusion, - random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32)))) - -agent_types.extend([ "PPFL_ClientAgent" for i in range(a,b) ]) +for i in range(a, b): + + # Determine subgraph. + subgraph = int(floor((i - a) / subgraph_size)) + + # print ("Neighborhood for agent {} is {}".format(i, subgraph)) + + # Determine agents in subgraph. + subgraph_start = a + (subgraph * subgraph_size) + subgraph_end = a + ((subgraph + 1) * subgraph_size) + + neighbors = range(subgraph_start, subgraph_end) + + # print ("Peers for {} are {}".format(i, [x for x in neighbors if x != i])) + + # Peer list is all agents in subgraph except self. + agents.append( + PPFL_ClientAgent( + i, + "PPFL Client Agent {}".format(i), + "PPFL_ClientAgent", + peer_list=[x for x in neighbors if x != i], + iterations=num_iterations, + max_logreg_iterations=max_logreg_iterations, + epsilon=epsilon, + learning_rate=learning_rate, + clear_learning=clear_learning, + num_clients=num_clients, + num_subgraphs=num_subgraphs, + multiplier=accy_multiplier, + X_train=X_train, + y_train=y_train, + X_test=X_test, + y_test=y_test, + split_size=split_size, + secret_scale=secret_scale, + collusion=collusion, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), + ) + ) + +agent_types.extend(["PPFL_ClientAgent" for i in range(a, b)]) agent_count += num_clients client_init_end = time() init_seconds = client_init_end - client_init_start -td_init = timedelta(seconds = init_seconds) -print (f"Client init took {td_init}") +td_init = timedelta(seconds=init_seconds) +print(f"Client init took {td_init}") ### Configure a latency model for the agents. # Get a new-style cubic LatencyModel from the networking literature. -pairwise = (len(agent_types),len(agent_types)) +pairwise = (len(agent_types), len(agent_types)) -model_args = { 'connected' : True, +model_args = { + "connected": True, + # All in NYC. Only matters for evaluating "real world" protocol duration, + # not for accuracy, collusion, or reconstruction. + "min_latency": np.random.uniform(low=21000, high=100000, size=pairwise), + "jitter": 0.3, + "jitter_clip": 0.05, + "jitter_unit": 5, +} - # All in NYC. Only matters for evaluating "real world" protocol duration, - # not for accuracy, collusion, or reconstruction. - 'min_latency' : np.random.uniform(low = 21000, high = 100000, size = pairwise), - 'jitter' : 0.3, - 'jitter_clip' : 0.05, - 'jitter_unit' : 5, - } - -latency_model = LatencyModel ( latency_model = 'cubic', random_state = latency_rstate, kwargs = model_args ) +latency_model = LatencyModel(latency_model="cubic", random_state=latency_rstate, kwargs=model_args) # Start the kernel running. -results = kernel.runner(agents = agents, startTime = kernelStartTime, stopTime = kernelStopTime, - agentLatencyModel = latency_model, - defaultComputationDelay = defaultComputationDelay, - skip_log = skip_log, log_dir = log_dir) +results = kernel.runner( + agents=agents, + startTime=kernelStartTime, + stopTime=kernelStopTime, + agentLatencyModel=latency_model, + defaultComputationDelay=defaultComputationDelay, + skip_log=skip_log, + log_dir=log_dir, +) # Print parameter summary and elapsed times by category for this experimental trial. -print () -print (f"Protocol Iterations: {num_iterations}, Clients: {num_clients}, Split Size: {split_size}, " \ - f"Local Iterations {max_logreg_iterations}, Learning Rate: {learning_rate}.") -print (f"Learning in the clear? {clear_learning}, Privacy Epsilon: {epsilon}.") -print () -print ("Service Agent mean time per iteration...") -print (f" Storing models: {results['srv_store_model'] / num_iterations}") -print (f" Combining models: {results['srv_combine_model'] / num_iterations}") -print () -print ("Client Agent mean time per iteration (except DH Offline)...") -print (f" DH Offline: {results['dh_offline'] / num_clients}") -print (f" DH Online: {results['dh_online'] / num_clients}") -print (f" Training: {results['training'] / num_clients}") -print (f" Encryption: {results['encryption'] / num_clients}") -print () -print (f"Slowest agent simulated time: {results['kernel_slowest_agent_finish_time']}") +print() +print( + f"Protocol Iterations: {num_iterations}, Clients: {num_clients}, Split Size: {split_size}, " + f"Local Iterations {max_logreg_iterations}, Learning Rate: {learning_rate}." +) +print(f"Learning in the clear? {clear_learning}, Privacy Epsilon: {epsilon}.") +print() +print("Service Agent mean time per iteration...") +print(f" Storing models: {results['srv_store_model'] / num_iterations}") +print(f" Combining models: {results['srv_combine_model'] / num_iterations}") +print() +print("Client Agent mean time per iteration (except DH Offline)...") +print(f" DH Offline: {results['dh_offline'] / num_clients}") +print(f" DH Online: {results['dh_online'] / num_clients}") +print(f" Training: {results['training'] / num_clients}") +print(f" Encryption: {results['encryption'] / num_clients}") +print() +print(f"Slowest agent simulated time: {results['kernel_slowest_agent_finish_time']}") # Write out the timing log to disk. if not exists("results/timing_log.csv"): - with open('results/timing_log.csv', 'a') as results_file: - results_file.write(f"Clients,Peers,Subgraphs,Iterations,Train Rows,Learning Rate,In The Clear?,Local Iterations,Epsilon,DH Offline,DH Online,Training,Encryption,Store Model,Combine Model,Last Agent Finish,Time to Simulate\n") - - with open('results/timing_log.csv', 'a') as results_file: - results_file.write(f"{num_clients},{subgraph_size-1},{num_subgraphs},{num_iterations},{split_size},{learning_rate},{clear_learning},{max_logreg_iterations},{epsilon},{results['dh_offline'] / num_clients},{results['dh_online'] / num_clients},{results['training'] / num_clients},{results['encryption'] / num_clients},{results['srv_store_model']},{results['srv_combine_model']},{results['kernel_event_queue_elapsed_wallclock']},{results['kernel_slowest_agent_finish_time']}\n") - - + with open("results/timing_log.csv", "a") as results_file: + results_file.write( + f"Clients,Peers,Subgraphs,Iterations,Train Rows,Learning Rate,In The Clear?,Local Iterations,Epsilon,DH Offline,DH Online,Training,Encryption,Store Model,Combine Model,Last Agent Finish,Time to Simulate\n" + ) + + with open("results/timing_log.csv", "a") as results_file: + results_file.write( + f"{num_clients},{subgraph_size-1},{num_subgraphs},{num_iterations},{split_size},{learning_rate},{clear_learning},{max_logreg_iterations},{epsilon},{results['dh_offline'] / num_clients},{results['dh_online'] / num_clients},{results['training'] / num_clients},{results['encryption'] / num_clients},{results['srv_store_model']},{results['srv_combine_model']},{results['kernel_event_queue_elapsed_wallclock']},{results['kernel_slowest_agent_finish_time']}\n" + ) diff --git a/config/ppfl_template.py b/config/ppfl_template.py index f1287efd1..46f7e6679 100755 --- a/config/ppfl_template.py +++ b/config/ppfl_template.py @@ -1,66 +1,106 @@ # Our custom modules. -from Kernel import Kernel -from agent.examples.crypto.PPFL_TemplateClientAgent import PPFL_TemplateClientAgent as PPFL_ClientAgent -from agent.examples.crypto.PPFL_ServiceAgent import PPFL_ServiceAgent -from model.LatencyModel import LatencyModel -from util import util -from util.crypto import logReg +# Some config files require additional command line parameters to easily +# control agent or simulation hyperparameters during coarse parallelization. +import argparse # Standard modules. from datetime import timedelta from math import floor -import numpy as np from os.path import exists -import pandas as pd -from sklearn.model_selection import train_test_split from sys import exit from time import time +import numpy as np +import pandas as pd +from sklearn.model_selection import train_test_split -# Some config files require additional command line parameters to easily -# control agent or simulation hyperparameters during coarse parallelization. -import argparse +from agent.examples.crypto.PPFL_ServiceAgent import PPFL_ServiceAgent +from agent.examples.crypto.PPFL_TemplateClientAgent import PPFL_TemplateClientAgent as PPFL_ClientAgent +from Kernel import Kernel +from model.LatencyModel import LatencyModel +from util import util +from util.crypto import logReg -parser = argparse.ArgumentParser(description='Detailed options for PPFL config.') -parser.add_argument('-a', '--clear_learning', action='store_true', - help='Learning in the clear (vs SMP protocol)') -parser.add_argument('-c', '--config', required=True, - help='Name of config file to execute') -parser.add_argument('-e', '--epsilon', type=float, default=1.0, - help='Privacy loss epsilon') -parser.add_argument('-g', '--num_subgraphs', type=int, default=1, - help='Number of connected subgraphs into which to place client agents') -parser.add_argument('-i', '--num_iterations', type=int, default=5, - help='Number of iterations for the secure multiparty protocol)') -parser.add_argument('-k', '--skip_log', action='store_true', - help='Skip writing agent logs to disk') -parser.add_argument('-l', '--log_dir', default=None, - help='Log directory name (default: unix timestamp at program start)') -parser.add_argument('-m', '--max_logreg_iterations', type=int, default=50, - help='Number of iterations for client local LogReg'), -parser.add_argument('-n', '--num_clients', type=int, default=5, - help='Number of clients for the secure multiparty protocol)') -parser.add_argument('-o', '--collusion', action='store_true', - help='Compute collusion analysis (big and slow!)') -parser.add_argument('-p', '--split_size', type=int, default=20, - help='Local training size per client per iteration') -parser.add_argument('-r', '--learning_rate', type=float, default=10.0, - help='Local learning rate for training on client data') -parser.add_argument('-s', '--seed', type=int, default=None, - help='numpy.random.seed() for simulation') -parser.add_argument('-v', '--verbose', action='store_true', - help='Maximum verbosity!') -parser.add_argument('--config_help', action='store_true', - help='Print argument options for this config file') +parser = argparse.ArgumentParser(description="Detailed options for PPFL config.") +parser.add_argument( + "-a", + "--clear_learning", + action="store_true", + help="Learning in the clear (vs SMP protocol)", +) +parser.add_argument("-c", "--config", required=True, help="Name of config file to execute") +parser.add_argument("-e", "--epsilon", type=float, default=1.0, help="Privacy loss epsilon") +parser.add_argument( + "-g", + "--num_subgraphs", + type=int, + default=1, + help="Number of connected subgraphs into which to place client agents", +) +parser.add_argument( + "-i", + "--num_iterations", + type=int, + default=5, + help="Number of iterations for the secure multiparty protocol)", +) +parser.add_argument("-k", "--skip_log", action="store_true", help="Skip writing agent logs to disk") +parser.add_argument( + "-l", + "--log_dir", + default=None, + help="Log directory name (default: unix timestamp at program start)", +) +parser.add_argument( + "-m", + "--max_logreg_iterations", + type=int, + default=50, + help="Number of iterations for client local LogReg", +), +parser.add_argument( + "-n", + "--num_clients", + type=int, + default=5, + help="Number of clients for the secure multiparty protocol)", +) +parser.add_argument( + "-o", + "--collusion", + action="store_true", + help="Compute collusion analysis (big and slow!)", +) +parser.add_argument( + "-p", + "--split_size", + type=int, + default=20, + help="Local training size per client per iteration", +) +parser.add_argument( + "-r", + "--learning_rate", + type=float, + default=10.0, + help="Local learning rate for training on client data", +) +parser.add_argument("-s", "--seed", type=int, default=None, help="numpy.random.seed() for simulation") +parser.add_argument("-v", "--verbose", action="store_true", help="Maximum verbosity!") +parser.add_argument( + "--config_help", + action="store_true", + help="Print argument options for this config file", +) args, remaining_args = parser.parse_known_args() if args.config_help: - parser.print_help() - exit() + parser.print_help() + exit() # Historical date to simulate. Required even if not relevant. -historical_date = pd.to_datetime('2014-01-28') +historical_date = pd.to_datetime("2014-01-28") # Requested log directory. log_dir = args.log_dir @@ -80,7 +120,8 @@ # before) seed = args.seed -if not seed: seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) +if not seed: + seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) np.random.seed(seed) # Config parameter that causes util.util.print to suppress most output. @@ -98,9 +139,8 @@ ### How many client agents will there be? 1000 in 125 subgraphs of 8 fits ln(n), for example num_subgraphs = args.num_subgraphs -print ("Silent mode: {}".format(util.silent_mode)) -print ("Configuration seed: {}\n".format(seed)) - +print("Silent mode: {}".format(util.silent_mode)) +print("Configuration seed: {}\n".format(seed)) # Since the simulator often pulls historical data, we use a real-world @@ -115,13 +155,13 @@ kernelStartTime = midnight # When should the Kernel shut down? -kernelStopTime = midnight + pd.to_timedelta('17:00:00') +kernelStopTime = midnight + pd.to_timedelta("17:00:00") # This will configure the kernel with a default computation delay # (time penalty) for each agent's wakeup and recvMsg. An agent # can change this at any time for itself. (nanoseconds) -defaultComputationDelay = 1000000000 * 5 # five seconds +defaultComputationDelay = 1000000000 * 5 # five seconds # IMPORTANT NOTE CONCERNING AGENT IDS: the id passed to each agent must: # 1. be unique @@ -131,10 +171,13 @@ ### Configure the Kernel. -kernel = Kernel("Base Kernel", random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32))) +kernel = Kernel( + "Base Kernel", + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), +) ### Obtain random state for whatever latency model will be used. -latency_rstate = np.random.RandomState(seed=np.random.randint(low=0,high=2**32)) +latency_rstate = np.random.RandomState(seed=np.random.randint(low=0, high=2**32)) ### Configure the agents. When conducting "agent of change" experiments, the ### new agents should be added at the END only. @@ -174,8 +217,8 @@ # for data of a specific format. example_features = 10 example_rows = 10000 -X_data = np.random.uniform(size=(example_rows,example_features)) -y_data = np.random.uniform(size=(example_rows,1)) +X_data = np.random.uniform(size=(example_rows, example_features)) +y_data = np.random.uniform(size=(example_rows, 1)) # Randomly shuffle and split the data for training and testing. X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.25) @@ -187,9 +230,19 @@ ### Configure a service agent. -agents.extend([ PPFL_ServiceAgent(0, "PPFL Service Agent 0", "PPFL_ServiceAgent", - random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32)), - msg_fwd_delay=0, iterations = num_iterations, num_clients = num_clients) ]) +agents.extend( + [ + PPFL_ServiceAgent( + 0, + "PPFL Service Agent 0", + "PPFL_ServiceAgent", + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), + msg_fwd_delay=0, + iterations=num_iterations, + num_clients=num_clients, + ) + ] +) agent_types.extend(["PPFL_ServiceAgent"]) agent_count += 1 @@ -200,88 +253,110 @@ client_init_start = time() # Iterate over all client IDs. -for i in range (a, b): - - # Determine subgraph. - subgraph = int(floor((i - a) / subgraph_size)) - - #print ("Neighborhood for agent {} is {}".format(i, subgraph)) - - # Determine agents in subgraph. - subgraph_start = a + (subgraph * subgraph_size) - subgraph_end = a + ((subgraph + 1) * subgraph_size) - - neighbors = range(subgraph_start, subgraph_end) - - #print ("Peers for {} are {}".format(i, [x for x in neighbors if x != i])) - - # Peer list is all agents in subgraph except self. - agents.append(PPFL_ClientAgent(i, "PPFL Client Agent {}".format(i), "PPFL_ClientAgent", - peer_list = [ x for x in neighbors if x != i ], iterations = num_iterations, - num_clients = num_clients, num_subgraphs = num_subgraphs, - multiplier = accy_multiplier, X_train = X_train, y_train = y_train, X_test = X_test, y_test = y_test, - split_size = split_size, secret_scale = secret_scale, - random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32)))) - -agent_types.extend([ "PPFL_ClientAgent" for i in range(a,b) ]) +for i in range(a, b): + + # Determine subgraph. + subgraph = int(floor((i - a) / subgraph_size)) + + # print ("Neighborhood for agent {} is {}".format(i, subgraph)) + + # Determine agents in subgraph. + subgraph_start = a + (subgraph * subgraph_size) + subgraph_end = a + ((subgraph + 1) * subgraph_size) + + neighbors = range(subgraph_start, subgraph_end) + + # print ("Peers for {} are {}".format(i, [x for x in neighbors if x != i])) + + # Peer list is all agents in subgraph except self. + agents.append( + PPFL_ClientAgent( + i, + "PPFL Client Agent {}".format(i), + "PPFL_ClientAgent", + peer_list=[x for x in neighbors if x != i], + iterations=num_iterations, + num_clients=num_clients, + num_subgraphs=num_subgraphs, + multiplier=accy_multiplier, + X_train=X_train, + y_train=y_train, + X_test=X_test, + y_test=y_test, + split_size=split_size, + secret_scale=secret_scale, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), + ) + ) + +agent_types.extend(["PPFL_ClientAgent" for i in range(a, b)]) agent_count += num_clients client_init_end = time() init_seconds = client_init_end - client_init_start -td_init = timedelta(seconds = init_seconds) -print (f"Client init took {td_init}") +td_init = timedelta(seconds=init_seconds) +print(f"Client init took {td_init}") ### Configure a latency model for the agents. # Get a new-style cubic LatencyModel from the networking literature. -pairwise = (len(agent_types),len(agent_types)) +pairwise = (len(agent_types), len(agent_types)) -model_args = { 'connected' : True, +model_args = { + "connected": True, + # All in NYC. Only matters for evaluating "real world" protocol duration, + # not for accuracy, collusion, or reconstruction. + "min_latency": np.random.uniform(low=21000, high=100000, size=pairwise), + "jitter": 0.3, + "jitter_clip": 0.05, + "jitter_unit": 5, +} - # All in NYC. Only matters for evaluating "real world" protocol duration, - # not for accuracy, collusion, or reconstruction. - 'min_latency' : np.random.uniform(low = 21000, high = 100000, size = pairwise), - 'jitter' : 0.3, - 'jitter_clip' : 0.05, - 'jitter_unit' : 5, - } - -latency_model = LatencyModel ( latency_model = 'cubic', random_state = latency_rstate, kwargs = model_args ) +latency_model = LatencyModel(latency_model="cubic", random_state=latency_rstate, kwargs=model_args) # Start the kernel running. -results = kernel.runner(agents = agents, startTime = kernelStartTime, stopTime = kernelStopTime, - agentLatencyModel = latency_model, - defaultComputationDelay = defaultComputationDelay, - skip_log = skip_log, log_dir = log_dir) +results = kernel.runner( + agents=agents, + startTime=kernelStartTime, + stopTime=kernelStopTime, + agentLatencyModel=latency_model, + defaultComputationDelay=defaultComputationDelay, + skip_log=skip_log, + log_dir=log_dir, +) # Print parameter summary and elapsed times by category for this experimental trial. -print () -print (f"Protocol Iterations: {num_iterations}, Clients: {num_clients}, Split Size: {split_size}, " \ - f"Local Iterations {max_logreg_iterations}, Learning Rate: {learning_rate}.") -print (f"Learning in the clear? {clear_learning}, Privacy Epsilon: {epsilon}.") -print () -print ("Service Agent mean time per iteration...") -print (f" Storing models: {results['srv_store_model'] / num_iterations}") -print (f" Combining models: {results['srv_combine_model'] / num_iterations}") -print () -print ("Client Agent mean time per iteration (except DH Offline)...") -print (f" DH Offline: {results['dh_offline'] / num_clients}") -print (f" DH Online: {results['dh_online'] / num_clients}") -print (f" Training: {results['training'] / num_clients}") -print (f" Encryption: {results['encryption'] / num_clients}") -print () -print (f"Slowest agent simulated time: {results['kernel_slowest_agent_finish_time']}") +print() +print( + f"Protocol Iterations: {num_iterations}, Clients: {num_clients}, Split Size: {split_size}, " + f"Local Iterations {max_logreg_iterations}, Learning Rate: {learning_rate}." +) +print(f"Learning in the clear? {clear_learning}, Privacy Epsilon: {epsilon}.") +print() +print("Service Agent mean time per iteration...") +print(f" Storing models: {results['srv_store_model'] / num_iterations}") +print(f" Combining models: {results['srv_combine_model'] / num_iterations}") +print() +print("Client Agent mean time per iteration (except DH Offline)...") +print(f" DH Offline: {results['dh_offline'] / num_clients}") +print(f" DH Online: {results['dh_online'] / num_clients}") +print(f" Training: {results['training'] / num_clients}") +print(f" Encryption: {results['encryption'] / num_clients}") +print() +print(f"Slowest agent simulated time: {results['kernel_slowest_agent_finish_time']}") # Write out the timing log to disk. if not exists("results/timing_log.csv"): - with open('results/timing_log.csv', 'a') as results_file: - results_file.write(f"Clients,Peers,Subgraphs,Iterations,Train Rows,Learning Rate,In The Clear?,Local Iterations,Epsilon,DH Offline,DH Online,Training,Encryption,Store Model,Combine Model,Last Agent Finish,Time to Simulate\n") - - with open('results/timing_log.csv', 'a') as results_file: - results_file.write(f"{num_clients},{subgraph_size-1},{num_subgraphs},{num_iterations},{split_size},{learning_rate},{clear_learning},{max_logreg_iterations},{epsilon},{results['dh_offline'] / num_clients},{results['dh_online'] / num_clients},{results['training'] / num_clients},{results['encryption'] / num_clients},{results['srv_store_model']},{results['srv_combine_model']},{results['kernel_event_queue_elapsed_wallclock']},{results['kernel_slowest_agent_finish_time']}\n") - - + with open("results/timing_log.csv", "a") as results_file: + results_file.write( + f"Clients,Peers,Subgraphs,Iterations,Train Rows,Learning Rate,In The Clear?,Local Iterations,Epsilon,DH Offline,DH Online,Training,Encryption,Store Model,Combine Model,Last Agent Finish,Time to Simulate\n" + ) + + with open("results/timing_log.csv", "a") as results_file: + results_file.write( + f"{num_clients},{subgraph_size-1},{num_subgraphs},{num_iterations},{split_size},{learning_rate},{clear_learning},{max_logreg_iterations},{epsilon},{results['dh_offline'] / num_clients},{results['dh_online'] / num_clients},{results['training'] / num_clients},{results['encryption'] / num_clients},{results['srv_store_model']},{results['srv_combine_model']},{results['kernel_event_queue_elapsed_wallclock']},{results['kernel_slowest_agent_finish_time']}\n" + ) diff --git a/config/qlearning.py b/config/qlearning.py index 5ef9ed995..9a08f3446 100644 --- a/config/qlearning.py +++ b/config/qlearning.py @@ -1,23 +1,24 @@ -from Kernel import Kernel -from agent.ExchangeAgent import ExchangeAgent -from agent.examples.QLearningAgent import QLearningAgent -from agent.ZeroIntelligenceAgent import ZeroIntelligenceAgent -from util.order import LimitOrder -from util.oracle.SparseMeanRevertingOracle import SparseMeanRevertingOracle -from util import util -from util.model.QTable import QTable +import sys +from math import ceil, floor import numpy as np import pandas as pd -import sys -from math import ceil, floor +from agent.examples.QLearningAgent import QLearningAgent +from agent.ExchangeAgent import ExchangeAgent +from agent.ZeroIntelligenceAgent import ZeroIntelligenceAgent +from Kernel import Kernel +from util import util +from util.model.QTable import QTable +from util.oracle.SparseMeanRevertingOracle import SparseMeanRevertingOracle +from util.order import LimitOrder ###### Helper functions for this configuration file. Just commonly-used code ###### ###### that would otherwise have been repeated many times. ###### + def get_rand_obj(seed_obj): - return np.random.RandomState(seed = seed_obj.randint(low = 0, high = 2**32)) + return np.random.RandomState(seed=seed_obj.randint(low=0, high=2**32)) ###### One-time configuration section. This section sets up definitions that ###### @@ -37,14 +38,22 @@ def get_rand_obj(seed_obj): # Thus our discrete time stamps are effectively nanoseconds, although # they can be interepreted otherwise for ahistorical (e.g. generated) # simulations. These timestamps do require a valid date component. -midnight = pd.to_datetime('2014-01-28') +midnight = pd.to_datetime("2014-01-28") ### STOCK SYMBOL CONFIGURATION. -symbols = { 'IBM' : { 'r_bar' : 1e5, 'kappa' : 1.67e-12, 'agent_kappa' : 1.67e-15, - 'sigma_s' : 0, 'fund_vol' : 1e-8, 'megashock_lambda_a' : 2.77778e-13, - 'megashock_mean' : 1e3, 'megashock_var' : 5e4 } - } +symbols = { + "IBM": { + "r_bar": 1e5, + "kappa": 1.67e-12, + "agent_kappa": 1.67e-15, + "sigma_s": 0, + "fund_vol": 1e-8, + "megashock_lambda_a": 2.77778e-13, + "megashock_mean": 1e3, + "megashock_var": 5e4, + } +} ### INITIAL AGENT DISTRIBUTION. @@ -65,42 +74,49 @@ def get_rand_obj(seed_obj): ### EXCHANGE AGENTS -mkt_open = midnight + pd.to_timedelta('09:30:00') -mkt_close = midnight + pd.to_timedelta('16:00:00') +mkt_open = midnight + pd.to_timedelta("09:30:00") +mkt_close = midnight + pd.to_timedelta("16:00:00") ### Record the type and strategy of the agents for reporting purposes. for i in range(num_exch): - agent_types.append("ExchangeAgent") - agent_strats.append("ExchangeAgent") + agent_types.append("ExchangeAgent") + agent_strats.append("ExchangeAgent") ### ZERO INTELLIGENCE AGENTS ### ZeroIntelligence fixed parameters (i.e. not strategic). -zi_obs_noise = 1000000 # a property of the agent, not an individual stock +zi_obs_noise = 1000000 # a property of the agent, not an individual stock ### Lay out the ZI strategies (parameter settings) that will be used in this ### experiment, so we can assign particular numbers of agents to each strategy. ### Tuples are: (R_min, R_max, eta). -zi_strategy = [ (0, 250, 1), (0, 500, 1), (0, 1000, 0.8), (0, 1000, 1), - (0, 2000, 0.8), (250, 500, 0.8), (250, 500, 1) ] +zi_strategy = [ + (0, 250, 1), + (0, 500, 1), + (0, 1000, 0.8), + (0, 1000, 1), + (0, 2000, 0.8), + (250, 500, 0.8), + (250, 500, 1), +] ### Record the initial distribution of agents to ZI strategies. ### Split the agents as evenly as possible among the strategy settings. -zi = [ floor(num_zi / len(zi_strategy)) ] * len(zi_strategy) +zi = [floor(num_zi / len(zi_strategy))] * len(zi_strategy) i = 0 while sum(zi) < num_zi: - zi[i] += 1 - i += 1 + zi[i] += 1 + i += 1 ### Record the type and strategy of the agents for reporting purposes. for i in range(len(zi_strategy)): - x = zi_strategy[i] - strat_name = "Type {} [{} <= R <= {}, eta={}]".format(i+1, x[0], x[1], x[2]) - agent_types.extend([ 'ZeroIntelligenceAgent' ] * zi[i]) - agent_strats.extend([ 'ZeroIntelligenceAgent ({})'.format(strat_name) ] * zi[i]) + x = zi_strategy[i] + strat_name = "Type {} [{} <= R <= {}, eta={}]".format(i + 1, x[0], x[1], x[2]) + agent_types.extend(["ZeroIntelligenceAgent"] * zi[i]) + agent_strats.extend(["ZeroIntelligenceAgent ({})".format(strat_name)] * zi[i]) ### Q-LEARNING AGENTS @@ -109,8 +125,8 @@ def get_rand_obj(seed_obj): ### Record the type and strategy of the agents for reporting purposes. for i in range(num_qlearners): - agent_types.append("QLearningAgent") - agent_strats.append("QLearningAgent") + agent_types.append("QLearningAgent") + agent_strats.append("QLearningAgent") ### FINAL AGENT PREPARATION @@ -123,7 +139,7 @@ def get_rand_obj(seed_obj): num_agents = num_exch + num_zi + num_qlearners agent_saved_states = {} -agent_saved_states['agent_state'] = [None] * num_agents +agent_saved_states["agent_state"] = [None] * num_agents ### SIMULATION CONTROL SETTINGS. @@ -134,34 +150,47 @@ def get_rand_obj(seed_obj): # (i.e. the entire "experiment"), rather than a single instance of the simulation. import argparse -parser = argparse.ArgumentParser(description='Detailed options for sparse_zi config.') -parser.add_argument('-b', '--book_freq', default=None, - help='Frequency at which to archive order book for visualization') -parser.add_argument('-c', '--config', required=True, - help='Name of config file to execute') -parser.add_argument('-l', '--log_dir', default=None, - help='Log directory name (default: unix timestamp at program start)') -parser.add_argument('-o', '--log_orders', action='store_true', - help='Log every order-related action by every agent.') -parser.add_argument('-s', '--seed', type=int, default=None, - help='numpy.random.seed() for simulation') -parser.add_argument('-v', '--verbose', action='store_true', - help='Maximum verbosity!') -parser.add_argument('--config_help', action='store_true', - help='Print argument options for this config file') +parser = argparse.ArgumentParser(description="Detailed options for sparse_zi config.") +parser.add_argument( + "-b", + "--book_freq", + default=None, + help="Frequency at which to archive order book for visualization", +) +parser.add_argument("-c", "--config", required=True, help="Name of config file to execute") +parser.add_argument( + "-l", + "--log_dir", + default=None, + help="Log directory name (default: unix timestamp at program start)", +) +parser.add_argument( + "-o", + "--log_orders", + action="store_true", + help="Log every order-related action by every agent.", +) +parser.add_argument("-s", "--seed", type=int, default=None, help="numpy.random.seed() for simulation") +parser.add_argument("-v", "--verbose", action="store_true", help="Maximum verbosity!") +parser.add_argument( + "--config_help", + action="store_true", + help="Print argument options for this config file", +) args, remaining_args = parser.parse_known_args() if args.config_help: - parser.print_help() - sys.exit() + parser.print_help() + sys.exit() # If nothing specifically requested, use starting timestamp. In either case, successive # simulations will have simulation number appended. log_dir = args.log_dir -if log_dir is None: log_dir = str(int(pd.Timestamp('now').timestamp())) +if log_dir is None: + log_dir = str(int(pd.Timestamp("now").timestamp())) # Requested order book snapshot archive frequency. book_freq = args.book_freq @@ -180,7 +209,8 @@ def get_rand_obj(seed_obj): # before) seed = args.seed -if not seed: seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) +if not seed: + seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) np.random.seed(seed) # Config parameter that causes util.util.print to suppress most output. @@ -194,11 +224,10 @@ def get_rand_obj(seed_obj): log_orders = args.log_orders -print ("Silent mode: {}".format(util.silent_mode)) -print ("Logging orders: {}".format(log_orders)) -print ("Book freq: {}".format(book_freq)) -print ("Configuration seed: {}\n".format(seed)) - +print("Silent mode: {}".format(util.silent_mode)) +print("Logging orders: {}".format(log_orders)) +print("Book freq: {}".format(book_freq)) +print("Configuration seed: {}\n".format(seed)) ### STOCHASTIC CONTROL @@ -209,13 +238,13 @@ def get_rand_obj(seed_obj): ### seed for each simulation, but the entire experiment will still be deterministic ### given the same initial (global) seed. -kernel_seeds = np.random.RandomState(seed=np.random.randint(low=0,high=2**32)) +kernel_seeds = np.random.RandomState(seed=np.random.randint(low=0, high=2**32)) symbol_seeds = {} -for sym in symbols: symbol_seeds[sym] = np.random.RandomState(seed=np.random.randint(low=0,high=2**32)) - -agent_seeds = [ np.random.RandomState(seed=np.random.randint(low=0,high=2**32)) ] * num_agents +for sym in symbols: + symbol_seeds[sym] = np.random.RandomState(seed=np.random.randint(low=0, high=2**32)) +agent_seeds = [np.random.RandomState(seed=np.random.randint(low=0, high=2**32))] * num_agents ### LATENCY CONFIGURATION @@ -230,30 +259,29 @@ def get_rand_obj(seed_obj): # Other agents can be explicitly set afterward (and the mirror half of the matrix is also). # This configures all agents to a starting latency as described above. -latency = np.random.uniform(low = 21000, high = 13000000, size=(len(agent_types),len(agent_types))) +latency = np.random.uniform(low=21000, high=13000000, size=(len(agent_types), len(agent_types))) # Overriding the latency for certain agent pairs happens below, as does forcing mirroring # of the matrix to be symmetric. for i, t1 in zip(range(latency.shape[0]), agent_types): - for j, t2 in zip(range(latency.shape[1]), agent_types): - # Three cases for symmetric array. Set latency when j > i, copy it when i > j, same agent when i == j. - if j > i: - # Presently, strategy agents shouldn't be talking to each other, so we set them to extremely high latency. - if (t1 == "ZeroIntelligenceAgent" and t2 == "ZeroIntelligenceAgent"): - latency[i,j] = 1000000000 * 60 * 60 * 24 # Twenty-four hours. - elif i > j: - # This "bottom" half of the matrix simply mirrors the top. - latency[i,j] = latency[j,i] - else: - # This is the same agent. How long does it take to reach localhost? In our data center, it actually - # takes about 20 microseconds. - latency[i,j] = 20000 + for j, t2 in zip(range(latency.shape[1]), agent_types): + # Three cases for symmetric array. Set latency when j > i, copy it when i > j, same agent when i == j. + if j > i: + # Presently, strategy agents shouldn't be talking to each other, so we set them to extremely high latency. + if t1 == "ZeroIntelligenceAgent" and t2 == "ZeroIntelligenceAgent": + latency[i, j] = 1000000000 * 60 * 60 * 24 # Twenty-four hours. + elif i > j: + # This "bottom" half of the matrix simply mirrors the top. + latency[i, j] = latency[j, i] + else: + # This is the same agent. How long does it take to reach localhost? In our data center, it actually + # takes about 20 microseconds. + latency[i, j] = 20000 # Configure a simple latency noise model for the agents. # Index is ns extra delay, value is probability of this delay being applied. -noise = [ 0.25, 0.25, 0.20, 0.15, 0.10, 0.05 ] - +noise = [0.25, 0.25, 0.20, 0.15, 0.10, 0.05] ### FINAL GLOBAL CONFIGURATION FOR ALL SIMULATIONS @@ -265,96 +293,144 @@ def get_rand_obj(seed_obj): # There is no requirement these times be on the same date, although # none of the current agents handle markets closing and reopening. kernelStartTime = midnight -kernelStopTime = midnight + pd.to_timedelta('17:00:00') +kernelStopTime = midnight + pd.to_timedelta("17:00:00") # This will configure the kernel with a default computation delay # (time penalty) for each agent's wakeup and recvMsg. An agent # can change this at any time for itself. (nanoseconds) -defaultComputationDelay = 1000000000 # one second - +defaultComputationDelay = 1000000000 # one second ###### Per-simulation configuration section. This section initializes ###### ###### from scratch those objects and settings that should be reset withr ###### ###### each "run" of the simulation within an overall experiment. ###### -for sim in range(num_consecutive_simulations): # eventually make this a stopping criteria - - # Flush the agent population and start over for each simulation. - agents = [] - - # The random state of each symbol needs to be set for each simulation, so the - # stocks won't always do the same thing. Note that the entire experiment - # should still be fully repeatable with the same initial seed, because the - # list of random seeds for a symbol is fixed at the start, based on the initial - # seed. - for symbol in symbols: symbols[symbol]['random_state'] = get_rand_obj(symbol_seeds[symbol]) - - # Obtain a fresh simulation Kernel with the next appropriate random_state, seeded - # from the list obtained before the first simulation. - kernel = Kernel("Base Kernel", random_state = get_rand_obj(kernel_seeds)) - - # Configure an appropriate oracle for all traded stocks. - # All agents requiring the same type of Oracle will use the same oracle instance. - # The oracle does not require its own source of randomness, because each symbol - # and agent has those, and the oracle will always use on of those sources, as appropriate. - oracle = SparseMeanRevertingOracle(mkt_open, mkt_close, symbols) - - - # Create the agents in the same order they were specified in the first configuration - # section (outside the simulation loop). It is very important they be in the same - # order. - - agent_id = 0 - - # Create the exchange. - for i in range(num_exch): - agents.append( ExchangeAgent(agent_id, "{} {}".format(agent_types[agent_id], agent_id), - agent_strats[agent_id], mkt_open, mkt_close, - [s for s in symbols], log_orders = log_orders, - book_freq = book_freq, pipeline_delay = 0, - computation_delay = 0, stream_history = 10, - random_state = get_rand_obj(agent_seeds[agent_id])) ) - agent_id += 1 - - - # Configure some zero intelligence agents. - starting_cash = 10000000 # Cash in this simulator is always in CENTS. - symbol = 'IBM' - s = symbols[symbol] - - # ZI strategy split. Note that agent arrival rates are quite small, because our minimum - # time step is a nanosecond, and we want the agents to arrive more on the order of - # minutes. - for n, x in zip(zi, zi_strategy): - strat_name = agent_strats[agent_id] - while n > 0: - agents.append(ZeroIntelligenceAgent(agent_id, "ZI Agent {}".format(agent_id), strat_name, random_state = get_rand_obj(agent_seeds[agent_id]), log_orders=log_orders, symbol=symbol, starting_cash=starting_cash, sigma_n=zi_obs_noise, r_bar=s['r_bar'], kappa=s['agent_kappa'], sigma_s=s['fund_vol'], q_max=10, sigma_pv=5e6, R_min=x[0], R_max=x[1], eta=x[2], lambda_a=1e-12)) - agent_id += 1 - n -= 1 - - # Add a QLearning agent to try to beat this market. - for i in range(num_qlearners): - if agent_saved_states['agent_state'][agent_id] is None: - random_state = get_rand_obj(agent_seeds[agent_id]) - qtable = QTable(dims = (2201, 3), alpha = 0.99, alpha_decay = 0.999, - alpha_min = 0, epsilon = 0.99, epsilon_decay = 0.999, epsilon_min = 0, - gamma = 0.98, random_state = random_state) - else: - qtable = agent_saved_states['agent_state'][agent_id] - - agents.extend([ QLearningAgent(agent_id, "QLearning Agent {}".format(agent_id), "QLearningAgent", starting_cash = starting_cash, qtable = qtable, random_state = random_state) ]) - agent_id += 1 - - - - # Start the kernel running. This call will not return until the - # simulation is complete. (Eventually this should be made - # parallel for learning.) - agent_saved_states = kernel.runner( - agents = agents, startTime = kernelStartTime, - stopTime = kernelStopTime, agentLatency = latency, - latencyNoise = noise, - defaultComputationDelay = defaultComputationDelay, - oracle = oracle, log_dir = "{}_{}".format(log_dir,sim)) - +for sim in range(num_consecutive_simulations): # eventually make this a stopping criteria + + # Flush the agent population and start over for each simulation. + agents = [] + + # The random state of each symbol needs to be set for each simulation, so the + # stocks won't always do the same thing. Note that the entire experiment + # should still be fully repeatable with the same initial seed, because the + # list of random seeds for a symbol is fixed at the start, based on the initial + # seed. + for symbol in symbols: + symbols[symbol]["random_state"] = get_rand_obj(symbol_seeds[symbol]) + + # Obtain a fresh simulation Kernel with the next appropriate random_state, seeded + # from the list obtained before the first simulation. + kernel = Kernel("Base Kernel", random_state=get_rand_obj(kernel_seeds)) + + # Configure an appropriate oracle for all traded stocks. + # All agents requiring the same type of Oracle will use the same oracle instance. + # The oracle does not require its own source of randomness, because each symbol + # and agent has those, and the oracle will always use on of those sources, as appropriate. + oracle = SparseMeanRevertingOracle(mkt_open, mkt_close, symbols) + + # Create the agents in the same order they were specified in the first configuration + # section (outside the simulation loop). It is very important they be in the same + # order. + + agent_id = 0 + + # Create the exchange. + for i in range(num_exch): + agents.append( + ExchangeAgent( + agent_id, + "{} {}".format(agent_types[agent_id], agent_id), + agent_strats[agent_id], + mkt_open, + mkt_close, + [s for s in symbols], + log_orders=log_orders, + book_freq=book_freq, + pipeline_delay=0, + computation_delay=0, + stream_history=10, + random_state=get_rand_obj(agent_seeds[agent_id]), + ) + ) + agent_id += 1 + + # Configure some zero intelligence agents. + starting_cash = 10000000 # Cash in this simulator is always in CENTS. + symbol = "IBM" + s = symbols[symbol] + + # ZI strategy split. Note that agent arrival rates are quite small, because our minimum + # time step is a nanosecond, and we want the agents to arrive more on the order of + # minutes. + for n, x in zip(zi, zi_strategy): + strat_name = agent_strats[agent_id] + while n > 0: + agents.append( + ZeroIntelligenceAgent( + agent_id, + "ZI Agent {}".format(agent_id), + strat_name, + random_state=get_rand_obj(agent_seeds[agent_id]), + log_orders=log_orders, + symbol=symbol, + starting_cash=starting_cash, + sigma_n=zi_obs_noise, + r_bar=s["r_bar"], + kappa=s["agent_kappa"], + sigma_s=s["fund_vol"], + q_max=10, + sigma_pv=5e6, + R_min=x[0], + R_max=x[1], + eta=x[2], + lambda_a=1e-12, + ) + ) + agent_id += 1 + n -= 1 + + # Add a QLearning agent to try to beat this market. + for i in range(num_qlearners): + if agent_saved_states["agent_state"][agent_id] is None: + random_state = get_rand_obj(agent_seeds[agent_id]) + qtable = QTable( + dims=(2201, 3), + alpha=0.99, + alpha_decay=0.999, + alpha_min=0, + epsilon=0.99, + epsilon_decay=0.999, + epsilon_min=0, + gamma=0.98, + random_state=random_state, + ) + else: + qtable = agent_saved_states["agent_state"][agent_id] + + agents.extend( + [ + QLearningAgent( + agent_id, + "QLearning Agent {}".format(agent_id), + "QLearningAgent", + starting_cash=starting_cash, + qtable=qtable, + random_state=random_state, + ) + ] + ) + agent_id += 1 + + # Start the kernel running. This call will not return until the + # simulation is complete. (Eventually this should be made + # parallel for learning.) + agent_saved_states = kernel.runner( + agents=agents, + startTime=kernelStartTime, + stopTime=kernelStopTime, + agentLatency=latency, + latencyNoise=noise, + defaultComputationDelay=defaultComputationDelay, + oracle=oracle, + log_dir="{}_{}".format(log_dir, sim), + ) diff --git a/config/random_fund_diverse.py b/config/random_fund_diverse.py index 3f5b917c0..66154a3c2 100644 --- a/config/random_fund_diverse.py +++ b/config/random_fund_diverse.py @@ -1,55 +1,49 @@ import argparse +import datetime as dt +import sys + import numpy as np import pandas as pd -import sys -import datetime as dt from dateutil.parser import parse -from Kernel import Kernel -from util import util -from util.order import LimitOrder -from util.oracle.SparseMeanRevertingOracle import SparseMeanRevertingOracle -from model.LatencyModel import LatencyModel - +from agent.examples.MomentumAgent import MomentumAgent from agent.ExchangeAgent import ExchangeAgent +from agent.market_makers.MarketMakerAgent import MarketMakerAgent from agent.NoiseAgent import NoiseAgent from agent.ValueAgent import ValueAgent -from agent.market_makers.MarketMakerAgent import MarketMakerAgent -from agent.examples.MomentumAgent import MomentumAgent +from Kernel import Kernel +from model.LatencyModel import LatencyModel +from util import util +from util.oracle.SparseMeanRevertingOracle import SparseMeanRevertingOracle +from util.order import LimitOrder ######################################################################################################################## ############################################### GENERAL CONFIG ######################################################### -parser = argparse.ArgumentParser(description='Detailed options for random_fund_diverse config.') - -parser.add_argument('-c', - '--config', - required=True, - help='Name of config file to execute') -parser.add_argument('-t', - '--ticker', - required=True, - help='Ticker (symbol) to use for simulation') -parser.add_argument('-d', '--historical-date', - required=True, - type=parse, - help='historical date being simulated in format YYYYMMDD.') -parser.add_argument('-l', - '--log_dir', - default=None, - help='Log directory name (default: unix timestamp at program start)') -parser.add_argument('-s', - '--seed', - type=int, - default=None, - help='numpy.random.seed() for simulation') -parser.add_argument('-v', - '--verbose', - action='store_true', - help='Maximum verbosity!') -parser.add_argument('--config_help', - action='store_true', - help='Print argument options for this config file') +parser = argparse.ArgumentParser(description="Detailed options for random_fund_diverse config.") + +parser.add_argument("-c", "--config", required=True, help="Name of config file to execute") +parser.add_argument("-t", "--ticker", required=True, help="Ticker (symbol) to use for simulation") +parser.add_argument( + "-d", + "--historical-date", + required=True, + type=parse, + help="historical date being simulated in format YYYYMMDD.", +) +parser.add_argument( + "-l", + "--log_dir", + default=None, + help="Log directory name (default: unix timestamp at program start)", +) +parser.add_argument("-s", "--seed", type=int, default=None, help="numpy.random.seed() for simulation") +parser.add_argument("-v", "--verbose", action="store_true", help="Maximum verbosity!") +parser.add_argument( + "--config_help", + action="store_true", + help="Print argument options for this config file", +) args, remaining_args = parser.parse_known_args() @@ -59,7 +53,8 @@ log_dir = args.log_dir # Requested log directory. seed = args.seed # Random seed specification on the command line. -if not seed: seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2 ** 32 - 1) +if not seed: + seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) np.random.seed(seed) util.silent_mode = not args.verbose @@ -76,8 +71,8 @@ # Historical date to simulate. historical_date = pd.to_datetime(args.historical_date) -mkt_open = historical_date + pd.to_timedelta('09:30:00') -mkt_close = historical_date + pd.to_timedelta('16:00:00') +mkt_open = historical_date + pd.to_timedelta("09:30:00") +mkt_close = historical_date + pd.to_timedelta("16:00:00") agent_count, agents, agent_types = 0, [], [] # Hyperparameters @@ -90,106 +85,140 @@ lambda_a = 1e-12 # Oracle -symbols = {symbol: {'r_bar': r_bar, - 'kappa': 1.67e-12, - 'agent_kappa': kappa, - 'sigma_s': 0, - 'fund_vol': 1e-8, - 'megashock_lambda_a': 2.77778e-13, - 'megashock_mean': 1e3, - 'megashock_var': 5e4, - 'random_state': np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64'))}} +symbols = { + symbol: { + "r_bar": r_bar, + "kappa": 1.67e-12, + "agent_kappa": kappa, + "sigma_s": 0, + "fund_vol": 1e-8, + "megashock_lambda_a": 2.77778e-13, + "megashock_mean": 1e3, + "megashock_var": 5e4, + "random_state": np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + } +} oracle = SparseMeanRevertingOracle(mkt_open, mkt_close, symbols) # 1) Exchange Agent -agents.extend([ExchangeAgent(id=0, - name="EXCHANGE_AGENT", - type="ExchangeAgent", - mkt_open=mkt_open, - mkt_close=mkt_close, - symbols=[symbol], - log_orders=True, - pipeline_delay=0, - computation_delay=0, - stream_history=10, - book_freq=book_freq, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64')))]) +agents.extend( + [ + ExchangeAgent( + id=0, + name="EXCHANGE_AGENT", + type="ExchangeAgent", + mkt_open=mkt_open, + mkt_close=mkt_close, + symbols=[symbol], + log_orders=True, + pipeline_delay=0, + computation_delay=0, + stream_history=10, + book_freq=book_freq, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + ] +) agent_types.extend("ExchangeAgent") agent_count += 1 # 2) Noise Agents num_noise = 5000 -agents.extend([NoiseAgent(id=j, - name="NoiseAgent {}".format(j), - type="NoiseAgent", - symbol=symbol, - starting_cash=starting_cash, - wakeup_time=util.get_wake_time(mkt_open, mkt_close), - log_orders=log_orders, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64'))) - for j in range(agent_count, agent_count + num_noise)]) +agents.extend( + [ + NoiseAgent( + id=j, + name="NoiseAgent {}".format(j), + type="NoiseAgent", + symbol=symbol, + starting_cash=starting_cash, + wakeup_time=util.get_wake_time(mkt_open, mkt_close), + log_orders=log_orders, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_noise) + ] +) agent_count += num_noise -agent_types.extend(['NoiseAgent']) +agent_types.extend(["NoiseAgent"]) # 3) Value Agents num_value = 100 -agents.extend([ValueAgent(id=j, - name="Value Agent {}".format(j), - type="ValueAgent", - symbol=symbol, - starting_cash=starting_cash, - sigma_n=sigma_n, - r_bar=r_bar, - kappa=kappa, - lambda_a=lambda_a, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64'))) - for j in range(agent_count, agent_count + num_value)]) +agents.extend( + [ + ValueAgent( + id=j, + name="Value Agent {}".format(j), + type="ValueAgent", + symbol=symbol, + starting_cash=starting_cash, + sigma_n=sigma_n, + r_bar=r_bar, + kappa=kappa, + lambda_a=lambda_a, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_value) + ] +) agent_count += num_value -agent_types.extend(['ValueAgent']) +agent_types.extend(["ValueAgent"]) # 4) Market Maker Agent num_mm_agents = 1 -agents.extend([MarketMakerAgent(id=j, - name="MARKET_MAKER_AGENT_{}".format(j), - type='MarketMakerAgent', - symbol=symbol, - starting_cash=starting_cash, - min_size=100, - max_size=101, - wake_up_freq="1min", - log_orders=log_orders, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))) - for j in range(agent_count, agent_count + num_mm_agents)]) +agents.extend( + [ + MarketMakerAgent( + id=j, + name="MARKET_MAKER_AGENT_{}".format(j), + type="MarketMakerAgent", + symbol=symbol, + starting_cash=starting_cash, + min_size=100, + max_size=101, + wake_up_freq="1min", + log_orders=log_orders, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_mm_agents) + ] +) agent_count += num_mm_agents -agent_types.extend('MarketMakerAgent') +agent_types.extend("MarketMakerAgent") # 5) Momentum Agents num_momentum_agents = 25 -agents.extend([MomentumAgent(id=j, - name="MOMENTUM_AGENT_{}".format(j), - type="MomentumAgent", - symbol=symbol, - starting_cash=starting_cash, - min_size=1, - max_size=10, - log_orders=log_orders, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))) - for j in range(agent_count, agent_count + num_momentum_agents)]) +agents.extend( + [ + MomentumAgent( + id=j, + name="MOMENTUM_AGENT_{}".format(j), + type="MomentumAgent", + symbol=symbol, + starting_cash=starting_cash, + min_size=1, + max_size=10, + log_orders=log_orders, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_momentum_agents) + ] +) agent_count += num_momentum_agents agent_types.extend("MomentumAgent") ######################################################################################################################## ########################################### KERNEL AND OTHER CONFIG #################################################### -kernel = Kernel("random_fund_diverse Kernel", random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))) +kernel = Kernel( + "random_fund_diverse Kernel", + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), +) kernelStartTime = historical_date -kernelStopTime = mkt_close + pd.to_timedelta('00:01:00') +kernelStopTime = mkt_close + pd.to_timedelta("00:01:00") defaultComputationDelay = 50 # 50 nanoseconds @@ -200,28 +229,25 @@ # All agents sit on line from Seattle to NYC nyc_to_seattle_meters = 3866660 -pairwise_distances = util.generate_uniform_random_pairwise_dist_on_line(0.0, nyc_to_seattle_meters, agent_count, - random_state=latency_rstate) +pairwise_distances = util.generate_uniform_random_pairwise_dist_on_line( + 0.0, nyc_to_seattle_meters, agent_count, random_state=latency_rstate +) pairwise_latencies = util.meters_to_light_ns(pairwise_distances) -model_args = { - 'connected': True, - 'min_latency': pairwise_latencies -} +model_args = {"connected": True, "min_latency": pairwise_latencies} -latency_model = LatencyModel(latency_model='deterministic', - random_state=latency_rstate, - kwargs=model_args - ) +latency_model = LatencyModel(latency_model="deterministic", random_state=latency_rstate, kwargs=model_args) # KERNEL -kernel.runner(agents=agents, - startTime=kernelStartTime, - stopTime=kernelStopTime, - agentLatencyModel=latency_model, - defaultComputationDelay=defaultComputationDelay, - oracle=oracle, - log_dir=args.log_dir) +kernel.runner( + agents=agents, + startTime=kernelStartTime, + stopTime=kernelStopTime, + agentLatencyModel=latency_model, + defaultComputationDelay=defaultComputationDelay, + oracle=oracle, + log_dir=args.log_dir, +) simulation_end_time = dt.datetime.now() diff --git a/config/random_fund_value.py b/config/random_fund_value.py index a3db69d1a..cc1a7e8f0 100644 --- a/config/random_fund_value.py +++ b/config/random_fund_value.py @@ -1,53 +1,47 @@ import argparse +import datetime as dt +import sys + import numpy as np import pandas as pd -import sys -import datetime as dt from dateutil.parser import parse -from Kernel import Kernel -from util import util -from util.order import LimitOrder -from util.oracle.SparseMeanRevertingOracle import SparseMeanRevertingOracle -from model.LatencyModel import LatencyModel - from agent.ExchangeAgent import ExchangeAgent from agent.NoiseAgent import NoiseAgent from agent.ValueAgent import ValueAgent +from Kernel import Kernel +from model.LatencyModel import LatencyModel +from util import util +from util.oracle.SparseMeanRevertingOracle import SparseMeanRevertingOracle +from util.order import LimitOrder ######################################################################################################################## ############################################### GENERAL CONFIG ######################################################### -parser = argparse.ArgumentParser(description='Detailed options for random_fund_value config.') - -parser.add_argument('-c', - '--config', - required=True, - help='Name of config file to execute') -parser.add_argument('-t', - '--ticker', - required=True, - help='Ticker (symbol) to use for simulation') -parser.add_argument('-d', '--historical-date', - required=True, - type=parse, - help='historical date being simulated in format YYYYMMDD.') -parser.add_argument('-l', - '--log_dir', - default=None, - help='Log directory name (default: unix timestamp at program start)') -parser.add_argument('-s', - '--seed', - type=int, - default=None, - help='numpy.random.seed() for simulation') -parser.add_argument('-v', - '--verbose', - action='store_true', - help='Maximum verbosity!') -parser.add_argument('--config_help', - action='store_true', - help='Print argument options for this config file') +parser = argparse.ArgumentParser(description="Detailed options for random_fund_value config.") + +parser.add_argument("-c", "--config", required=True, help="Name of config file to execute") +parser.add_argument("-t", "--ticker", required=True, help="Ticker (symbol) to use for simulation") +parser.add_argument( + "-d", + "--historical-date", + required=True, + type=parse, + help="historical date being simulated in format YYYYMMDD.", +) +parser.add_argument( + "-l", + "--log_dir", + default=None, + help="Log directory name (default: unix timestamp at program start)", +) +parser.add_argument("-s", "--seed", type=int, default=None, help="numpy.random.seed() for simulation") +parser.add_argument("-v", "--verbose", action="store_true", help="Maximum verbosity!") +parser.add_argument( + "--config_help", + action="store_true", + help="Print argument options for this config file", +) args, remaining_args = parser.parse_known_args() @@ -57,7 +51,8 @@ log_dir = args.log_dir # Requested log directory. seed = args.seed # Random seed specification on the command line. -if not seed: seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2 ** 32 - 1) +if not seed: + seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) np.random.seed(seed) util.silent_mode = not args.verbose @@ -74,8 +69,8 @@ # Historical date to simulate. historical_date = pd.to_datetime(args.historical_date) -mkt_open = historical_date + pd.to_timedelta('09:30:00') -mkt_close = historical_date + pd.to_timedelta('16:00:00') +mkt_open = historical_date + pd.to_timedelta("09:30:00") +mkt_close = historical_date + pd.to_timedelta("16:00:00") agent_count, agents, agent_types = 0, [], [] # Hyperparameters @@ -88,72 +83,96 @@ lambda_a = 1e-12 # Oracle -symbols = {symbol: {'r_bar': r_bar, - 'kappa': 1.67e-12, - 'agent_kappa': kappa, - 'sigma_s': 0, - 'fund_vol': 1e-8, - 'megashock_lambda_a': 2.77778e-13, - 'megashock_mean': 1e3, - 'megashock_var': 5e4, - 'random_state': np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64'))}} +symbols = { + symbol: { + "r_bar": r_bar, + "kappa": 1.67e-12, + "agent_kappa": kappa, + "sigma_s": 0, + "fund_vol": 1e-8, + "megashock_lambda_a": 2.77778e-13, + "megashock_mean": 1e3, + "megashock_var": 5e4, + "random_state": np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + } +} oracle = SparseMeanRevertingOracle(mkt_open, mkt_close, symbols) # 1) Exchange Agent -agents.extend([ExchangeAgent(id=0, - name="EXCHANGE_AGENT", - type="ExchangeAgent", - mkt_open=mkt_open, - mkt_close=mkt_close, - symbols=[symbol], - log_orders=True, - pipeline_delay=0, - computation_delay=0, - stream_history=10, - book_freq=book_freq, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64')))]) +agents.extend( + [ + ExchangeAgent( + id=0, + name="EXCHANGE_AGENT", + type="ExchangeAgent", + mkt_open=mkt_open, + mkt_close=mkt_close, + symbols=[symbol], + log_orders=True, + pipeline_delay=0, + computation_delay=0, + stream_history=10, + book_freq=book_freq, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + ] +) agent_types.extend("ExchangeAgent") agent_count += 1 # 2) Noise Agents num_noise = 5000 -agents.extend([NoiseAgent(id=j, - name="NoiseAgent {}".format(j), - type="NoiseAgent", - symbol=symbol, - starting_cash=starting_cash, - wakeup_time=util.get_wake_time(mkt_open, mkt_close), - log_orders=log_orders, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64'))) - for j in range(agent_count, agent_count + num_noise)]) +agents.extend( + [ + NoiseAgent( + id=j, + name="NoiseAgent {}".format(j), + type="NoiseAgent", + symbol=symbol, + starting_cash=starting_cash, + wakeup_time=util.get_wake_time(mkt_open, mkt_close), + log_orders=log_orders, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_noise) + ] +) agent_count += num_noise -agent_types.extend(['NoiseAgent']) +agent_types.extend(["NoiseAgent"]) # 3) Value Agents num_value = 100 -agents.extend([ValueAgent(id=j, - name="Value Agent {}".format(j), - type="ValueAgent", - symbol=symbol, - starting_cash=starting_cash, - sigma_n=sigma_n, - r_bar=r_bar, - kappa=kappa, - lambda_a=lambda_a, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64'))) - for j in range(agent_count, agent_count + num_value)]) +agents.extend( + [ + ValueAgent( + id=j, + name="Value Agent {}".format(j), + type="ValueAgent", + symbol=symbol, + starting_cash=starting_cash, + sigma_n=sigma_n, + r_bar=r_bar, + kappa=kappa, + lambda_a=lambda_a, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_value) + ] +) agent_count += num_value -agent_types.extend(['ValueAgent']) +agent_types.extend(["ValueAgent"]) ######################################################################################################################## ########################################### KERNEL AND OTHER CONFIG #################################################### -kernel = Kernel("random_fund_value Kernel", random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))) +kernel = Kernel( + "random_fund_value Kernel", + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), +) kernelStartTime = historical_date -kernelStopTime = mkt_close + pd.to_timedelta('00:01:00') +kernelStopTime = mkt_close + pd.to_timedelta("00:01:00") defaultComputationDelay = 50 # 50 nanoseconds @@ -164,28 +183,25 @@ # All agents sit on line from Seattle to NYC nyc_to_seattle_meters = 3866660 -pairwise_distances = util.generate_uniform_random_pairwise_dist_on_line(0.0, nyc_to_seattle_meters, agent_count, - random_state=latency_rstate) +pairwise_distances = util.generate_uniform_random_pairwise_dist_on_line( + 0.0, nyc_to_seattle_meters, agent_count, random_state=latency_rstate +) pairwise_latencies = util.meters_to_light_ns(pairwise_distances) -model_args = { - 'connected': True, - 'min_latency': pairwise_latencies -} +model_args = {"connected": True, "min_latency": pairwise_latencies} -latency_model = LatencyModel(latency_model='deterministic', - random_state=latency_rstate, - kwargs=model_args - ) +latency_model = LatencyModel(latency_model="deterministic", random_state=latency_rstate, kwargs=model_args) # KERNEL -kernel.runner(agents=agents, - startTime=kernelStartTime, - stopTime=kernelStopTime, - agentLatencyModel=latency_model, - defaultComputationDelay=defaultComputationDelay, - oracle=oracle, - log_dir=args.log_dir) +kernel.runner( + agents=agents, + startTime=kernelStartTime, + stopTime=kernelStopTime, + agentLatencyModel=latency_model, + defaultComputationDelay=defaultComputationDelay, + oracle=oracle, + log_dir=args.log_dir, +) simulation_end_time = dt.datetime.now() print("Simulation End Time: {}".format(simulation_end_time)) diff --git a/config/rmsc01.py b/config/rmsc01.py index eefdc574b..7b89a4569 100755 --- a/config/rmsc01.py +++ b/config/rmsc01.py @@ -6,55 +6,46 @@ # - 24 Momentum Agent import argparse -import numpy as np -import pandas as pd -import sys import datetime as dt import importlib +import sys -from Kernel import Kernel -from util import util -from util.order import LimitOrder -from util.oracle.SparseMeanRevertingOracle import SparseMeanRevertingOracle -from model.LatencyModel import LatencyModel +import numpy as np +import pandas as pd +from agent.examples.MomentumAgent import MomentumAgent from agent.ExchangeAgent import ExchangeAgent +from agent.HeuristicBeliefLearningAgent import HeuristicBeliefLearningAgent from agent.market_makers.MarketMakerAgent import MarketMakerAgent -from agent.examples.MomentumAgent import MomentumAgent - from agent.ZeroIntelligenceAgent import ZeroIntelligenceAgent -from agent.HeuristicBeliefLearningAgent import HeuristicBeliefLearningAgent +from Kernel import Kernel +from model.LatencyModel import LatencyModel +from util import util +from util.oracle.SparseMeanRevertingOracle import SparseMeanRevertingOracle +from util.order import LimitOrder ######################################################################################################################## ############################################### GENERAL CONFIG ######################################################### parser = argparse.ArgumentParser( - description='Detailed options for RMSC-1 (Reference Market Simulation Configuration) config.') - -parser.add_argument('-c', - '--config', - required=True, - help='Name of config file to execute') -parser.add_argument('-l', - '--log_dir', - default=None, - help='Log directory name (default: unix timestamp at program start)') -parser.add_argument('-s', - '--seed', - type=int, - default=None, - help='numpy.random.seed() for simulation') -parser.add_argument('-v', - '--verbose', - action='store_true', - help='Maximum verbosity!') -parser.add_argument('--config_help', - action='store_true', - help='Print argument options for this config file') -parser.add_argument('-a', - '--agent_name', - default=None, - help='Specify the agent to test with') + description="Detailed options for RMSC-1 (Reference Market Simulation Configuration) config." +) + +parser.add_argument("-c", "--config", required=True, help="Name of config file to execute") +parser.add_argument( + "-l", + "--log_dir", + default=None, + help="Log directory name (default: unix timestamp at program start)", +) +parser.add_argument("-s", "--seed", type=int, default=None, help="numpy.random.seed() for simulation") +parser.add_argument("-v", "--verbose", action="store_true", help="Maximum verbosity!") +parser.add_argument( + "--config_help", + action="store_true", + help="Print argument options for this config file", +) +parser.add_argument("-a", "--agent_name", default=None, help="Specify the agent to test with") args, remaining_args = parser.parse_known_args() @@ -65,7 +56,8 @@ log_dir = args.log_dir # Requested log directory. seed = args.seed # Random seed specification on the command line. -if not seed: seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2 ** 32 - 1) +if not seed: + seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) np.random.seed(seed) util.silent_mode = not args.verbose @@ -78,153 +70,189 @@ ############################################### AGENTS CONFIG ########################################################## # Historical date to simulate. -historical_date = pd.to_datetime('2019-06-28') -symbol = 'JPM' +historical_date = pd.to_datetime("2019-06-28") +symbol = "JPM" agent_count, agents, agent_types = 0, [], [] starting_cash = 10000000 # Cash in this simulator is always in CENTS. # 1) 1 Exchange Agent -mkt_open = historical_date + pd.to_timedelta('09:30:00') -mkt_close = historical_date + pd.to_timedelta('16:00:00') - -agents.extend([ExchangeAgent(id=0, - name="EXCHANGE_AGENT", - type="ExchangeAgent", - mkt_open=mkt_open, - mkt_close=mkt_close, - symbols=[symbol], - log_orders=False, - pipeline_delay=0, - computation_delay=0, - stream_history=10, - book_freq=0, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64')))]) +mkt_open = historical_date + pd.to_timedelta("09:30:00") +mkt_close = historical_date + pd.to_timedelta("16:00:00") + +agents.extend( + [ + ExchangeAgent( + id=0, + name="EXCHANGE_AGENT", + type="ExchangeAgent", + mkt_open=mkt_open, + mkt_close=mkt_close, + symbols=[symbol], + log_orders=False, + pipeline_delay=0, + computation_delay=0, + stream_history=10, + book_freq=0, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + ] +) agent_types.extend("ExchangeAgent") agent_count += 1 # 2) 1 Market Maker Agent num_mm_agents = 1 -agents.extend([MarketMakerAgent(id=j, - name="MARKET_MAKER_AGENT_{}".format(j), - type='MarketMakerAgent', - symbol=symbol, - starting_cash=starting_cash, - min_size=500, - max_size=1000, - log_orders=False, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))) - for j in range(agent_count, agent_count + num_mm_agents)]) - -agent_types.extend('MarketMakerAgent') +agents.extend( + [ + MarketMakerAgent( + id=j, + name="MARKET_MAKER_AGENT_{}".format(j), + type="MarketMakerAgent", + symbol=symbol, + starting_cash=starting_cash, + min_size=500, + max_size=1000, + log_orders=False, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_mm_agents) + ] +) + +agent_types.extend("MarketMakerAgent") agent_count += num_mm_agents # 3) 50 Zero Intelligence Agents -symbols = {symbol: {'r_bar': 1e5, - 'kappa': 1.67e-12, - 'agent_kappa': 1.67e-15, - 'sigma_s': 0, - 'fund_vol': 1e-8, - 'megashock_lambda_a': 2.77778e-13, - 'megashock_mean': 1e3, - 'megashock_var': 5e4, - 'random_state': np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))}} +symbols = { + symbol: { + "r_bar": 1e5, + "kappa": 1.67e-12, + "agent_kappa": 1.67e-15, + "sigma_s": 0, + "fund_vol": 1e-8, + "megashock_lambda_a": 2.77778e-13, + "megashock_mean": 1e3, + "megashock_var": 5e4, + "random_state": np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + } +} oracle = SparseMeanRevertingOracle(mkt_open, mkt_close, symbols) num_zi_agents = 50 -agents.extend([ZeroIntelligenceAgent(id=j, - name="ZI_AGENT_{}".format(j), - type="ZeroIntelligenceAgent", - symbol=symbol, - starting_cash=starting_cash, - sigma_n=10000, - sigma_s=symbols[symbol]['fund_vol'], - kappa=symbols[symbol]['agent_kappa'], - r_bar=symbols[symbol]['r_bar'], - q_max=10, - sigma_pv=5e4, - R_min=0, - R_max=100, - eta=1, - lambda_a=1e-12, - log_orders=False, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))) - for j in range(agent_count, agent_count + num_zi_agents)]) +agents.extend( + [ + ZeroIntelligenceAgent( + id=j, + name="ZI_AGENT_{}".format(j), + type="ZeroIntelligenceAgent", + symbol=symbol, + starting_cash=starting_cash, + sigma_n=10000, + sigma_s=symbols[symbol]["fund_vol"], + kappa=symbols[symbol]["agent_kappa"], + r_bar=symbols[symbol]["r_bar"], + q_max=10, + sigma_pv=5e4, + R_min=0, + R_max=100, + eta=1, + lambda_a=1e-12, + log_orders=False, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_zi_agents) + ] +) agent_types.extend("ZeroIntelligenceAgent") agent_count += num_zi_agents # 4) 25 Heuristic Belief Learning Agents num_hbl_agents = 25 -agents.extend([HeuristicBeliefLearningAgent(id=j, - name="HBL_AGENT_{}".format(j), - type="HeuristicBeliefLearningAgent", - symbol=symbol, - starting_cash=starting_cash, - sigma_n=10000, - sigma_s=symbols[symbol]['fund_vol'], - kappa=symbols[symbol]['agent_kappa'], - r_bar=symbols[symbol]['r_bar'], - q_max=10, - sigma_pv=5e4, - R_min=0, - R_max=100, - eta=1, - lambda_a=1e-12, - L=2, - log_orders=False, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))) - for j in range(agent_count, agent_count + num_hbl_agents)]) +agents.extend( + [ + HeuristicBeliefLearningAgent( + id=j, + name="HBL_AGENT_{}".format(j), + type="HeuristicBeliefLearningAgent", + symbol=symbol, + starting_cash=starting_cash, + sigma_n=10000, + sigma_s=symbols[symbol]["fund_vol"], + kappa=symbols[symbol]["agent_kappa"], + r_bar=symbols[symbol]["r_bar"], + q_max=10, + sigma_pv=5e4, + R_min=0, + R_max=100, + eta=1, + lambda_a=1e-12, + L=2, + log_orders=False, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_hbl_agents) + ] +) agent_types.extend("HeuristicBeliefLearningAgent") agent_count += num_hbl_agents # 5) 24 Momentum Agents: num_momentum_agents = 24 -agents.extend([MomentumAgent(id=j, - name="MOMENTUM_AGENT_{}".format(j), - type="MomentumAgent", - symbol=symbol, - starting_cash=starting_cash, - min_size=1, - max_size=10, - log_orders=False, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))) - for j in range(agent_count, agent_count + num_momentum_agents)]) +agents.extend( + [ + MomentumAgent( + id=j, + name="MOMENTUM_AGENT_{}".format(j), + type="MomentumAgent", + symbol=symbol, + starting_cash=starting_cash, + min_size=1, + max_size=10, + log_orders=False, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_momentum_agents) + ] +) agent_types.extend("MomentumAgent") agent_count += num_momentum_agents # 6) User defined agent -# Load the agent to evaluate against the market +# Load the agent to evaluate against the market if args.agent_name: - mod_name = args.agent_name.rsplit('.', 1)[0] - class_name = args.agent_name.split('.')[-1] + mod_name = args.agent_name.rsplit(".", 1)[0] + class_name = args.agent_name.split(".")[-1] m = importlib.import_module(args.agent_name, package=None) testagent = getattr(m, class_name) - agents.extend([testagent(id=agent_count, - name=args.agent_name, - type="AgentUnderTest", - symbol=symbol, - starting_cash=starting_cash, - min_size=1, - max_size=10, - log_orders=False, - random_state=np.random.RandomState(seed=np.random.randint(low=0,high=2**32,dtype='uint64')))]) + agents.extend( + [ + testagent( + id=agent_count, + name=args.agent_name, + type="AgentUnderTest", + symbol=symbol, + starting_cash=starting_cash, + min_size=1, + max_size=10, + log_orders=False, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + ] + ) agent_count += 1 ######################################################################################################################## ########################################### KERNEL AND OTHER CONFIG #################################################### -kernel = Kernel("RMSC01 Kernel", random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))) +kernel = Kernel( + "RMSC01 Kernel", + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), +) kernelStartTime = historical_date -kernelStopTime = mkt_close + pd.to_timedelta('00:01:00') +kernelStopTime = mkt_close + pd.to_timedelta("00:01:00") defaultComputationDelay = 50 # 50 nanoseconds @@ -235,28 +263,25 @@ # All agents sit on line from Seattle to NYC nyc_to_seattle_meters = 3866660 -pairwise_distances = util.generate_uniform_random_pairwise_dist_on_line(0.0, nyc_to_seattle_meters, agent_count, - random_state=latency_rstate) +pairwise_distances = util.generate_uniform_random_pairwise_dist_on_line( + 0.0, nyc_to_seattle_meters, agent_count, random_state=latency_rstate +) pairwise_latencies = util.meters_to_light_ns(pairwise_distances) -model_args = { - 'connected': True, - 'min_latency': pairwise_latencies -} +model_args = {"connected": True, "min_latency": pairwise_latencies} -latency_model = LatencyModel(latency_model='deterministic', - random_state=latency_rstate, - kwargs=model_args - ) +latency_model = LatencyModel(latency_model="deterministic", random_state=latency_rstate, kwargs=model_args) # KERNEL -kernel.runner(agents=agents, - startTime=kernelStartTime, - stopTime=kernelStopTime, - agentLatencyModel=latency_model, - defaultComputationDelay=defaultComputationDelay, - oracle=oracle, - log_dir=args.log_dir) +kernel.runner( + agents=agents, + startTime=kernelStartTime, + stopTime=kernelStopTime, + agentLatencyModel=latency_model, + defaultComputationDelay=defaultComputationDelay, + oracle=oracle, + log_dir=args.log_dir, +) simulation_end_time = dt.datetime.now() diff --git a/config/rmsc02.py b/config/rmsc02.py index a1906b935..0245b5848 100755 --- a/config/rmsc02.py +++ b/config/rmsc02.py @@ -6,56 +6,47 @@ # - 24 Momentum Agent import argparse -import numpy as np -import pandas as pd -import sys import datetime as dt import importlib +import sys -from Kernel import Kernel -from util import util -from util.order import LimitOrder -from util.oracle.SparseMeanRevertingOracle import SparseMeanRevertingOracle -from model.LatencyModel import LatencyModel +import numpy as np +import pandas as pd -from agent.ExchangeAgent import ExchangeAgent -from agent.market_makers.MarketMakerAgent import MarketMakerAgent from agent.examples.MomentumAgent import MomentumAgent from agent.examples.SubscriptionAgent import SubscriptionAgent - -from agent.ZeroIntelligenceAgent import ZeroIntelligenceAgent +from agent.ExchangeAgent import ExchangeAgent from agent.HeuristicBeliefLearningAgent import HeuristicBeliefLearningAgent +from agent.market_makers.MarketMakerAgent import MarketMakerAgent +from agent.ZeroIntelligenceAgent import ZeroIntelligenceAgent +from Kernel import Kernel +from model.LatencyModel import LatencyModel +from util import util +from util.oracle.SparseMeanRevertingOracle import SparseMeanRevertingOracle +from util.order import LimitOrder ######################################################################################################################## ############################################### GENERAL CONFIG ######################################################### parser = argparse.ArgumentParser( - description='Detailed options for RMSC-1 (Reference Market Simulation Configuration) config.') - -parser.add_argument('-c', - '--config', - required=True, - help='Name of config file to execute') -parser.add_argument('-l', - '--log_dir', - default=None, - help='Log directory name (default: unix timestamp at program start)') -parser.add_argument('-s', - '--seed', - type=int, - default=None, - help='numpy.random.seed() for simulation') -parser.add_argument('-v', - '--verbose', - action='store_true', - help='Maximum verbosity!') -parser.add_argument('--config_help', - action='store_true', - help='Print argument options for this config file') -parser.add_argument('-a', - '--agent_name', - default=None, - help='Specify the agent to test with') + description="Detailed options for RMSC-1 (Reference Market Simulation Configuration) config." +) + +parser.add_argument("-c", "--config", required=True, help="Name of config file to execute") +parser.add_argument( + "-l", + "--log_dir", + default=None, + help="Log directory name (default: unix timestamp at program start)", +) +parser.add_argument("-s", "--seed", type=int, default=None, help="numpy.random.seed() for simulation") +parser.add_argument("-v", "--verbose", action="store_true", help="Maximum verbosity!") +parser.add_argument( + "--config_help", + action="store_true", + help="Print argument options for this config file", +) +parser.add_argument("-a", "--agent_name", default=None, help="Specify the agent to test with") args, remaining_args = parser.parse_known_args() @@ -65,7 +56,8 @@ log_dir = args.log_dir # Requested log directory. seed = args.seed # Random seed specification on the command line. -if not seed: seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2 ** 32 - 1) +if not seed: + seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) np.random.seed(seed) util.silent_mode = not args.verbose @@ -78,126 +70,153 @@ ############################################### AGENTS CONFIG ########################################################## # Historical date to simulate. -historical_date = pd.to_datetime('2019-06-28') -symbol = 'JPM' +historical_date = pd.to_datetime("2019-06-28") +symbol = "JPM" agent_count, agents, agent_types = 0, [], [] starting_cash = 10000000 # Cash in this simulator is always in CENTS. # 1) 1 Exchange Agent -mkt_open = historical_date + pd.to_timedelta('09:30:00') -mkt_close = historical_date + pd.to_timedelta('16:00:00') - -agents.extend([ExchangeAgent(id=0, - name="EXCHANGE_AGENT", - type="ExchangeAgent", - mkt_open=mkt_open, - mkt_close=mkt_close, - symbols=[symbol], - log_orders=False, - pipeline_delay=0, - computation_delay=0, - stream_history=10, - book_freq=0, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64')))]) +mkt_open = historical_date + pd.to_timedelta("09:30:00") +mkt_close = historical_date + pd.to_timedelta("16:00:00") + +agents.extend( + [ + ExchangeAgent( + id=0, + name="EXCHANGE_AGENT", + type="ExchangeAgent", + mkt_open=mkt_open, + mkt_close=mkt_close, + symbols=[symbol], + log_orders=False, + pipeline_delay=0, + computation_delay=0, + stream_history=10, + book_freq=0, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + ] +) agent_types.extend("ExchangeAgent") agent_count += 1 # 2) 1 Market Maker Agent num_mm_agents = 1 -agents.extend([MarketMakerAgent(id=j, - name="MARKET_MAKER_AGENT_{}".format(j), - type='MarketMakerAgent', - symbol=symbol, - starting_cash=starting_cash, - min_size=500, - max_size=1000, - subscribe=True, - log_orders=False, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))) - for j in range(agent_count, agent_count + num_mm_agents)]) - -agent_types.extend('MarketMakerAgent') +agents.extend( + [ + MarketMakerAgent( + id=j, + name="MARKET_MAKER_AGENT_{}".format(j), + type="MarketMakerAgent", + symbol=symbol, + starting_cash=starting_cash, + min_size=500, + max_size=1000, + subscribe=True, + log_orders=False, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_mm_agents) + ] +) + +agent_types.extend("MarketMakerAgent") agent_count += num_mm_agents # 3) 50 Zero Intelligence Agents -symbols = {symbol: {'r_bar': 1e5, - 'kappa': 1.67e-12, - 'agent_kappa': 1.67e-15, - 'sigma_s': 0, - 'fund_vol': 1e-8, - 'megashock_lambda_a': 2.77778e-13, - 'megashock_mean': 1e3, - 'megashock_var': 5e4, - 'random_state': np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))}} +symbols = { + symbol: { + "r_bar": 1e5, + "kappa": 1.67e-12, + "agent_kappa": 1.67e-15, + "sigma_s": 0, + "fund_vol": 1e-8, + "megashock_lambda_a": 2.77778e-13, + "megashock_mean": 1e3, + "megashock_var": 5e4, + "random_state": np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + } +} oracle = SparseMeanRevertingOracle(mkt_open, mkt_close, symbols) num_zi_agents = 50 -agents.extend([ZeroIntelligenceAgent(id=j, - name="ZI_AGENT_{}".format(j), - type="ZeroIntelligenceAgent", - symbol=symbol, - starting_cash=starting_cash, - sigma_n=10000, - sigma_s=symbols[symbol]['fund_vol'], - kappa=symbols[symbol]['agent_kappa'], - r_bar=symbols[symbol]['r_bar'], - q_max=10, - sigma_pv=5e4, - R_min=0, - R_max=100, - eta=1, - lambda_a=1e-12, - log_orders=False, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))) - for j in range(agent_count, agent_count + num_zi_agents)]) +agents.extend( + [ + ZeroIntelligenceAgent( + id=j, + name="ZI_AGENT_{}".format(j), + type="ZeroIntelligenceAgent", + symbol=symbol, + starting_cash=starting_cash, + sigma_n=10000, + sigma_s=symbols[symbol]["fund_vol"], + kappa=symbols[symbol]["agent_kappa"], + r_bar=symbols[symbol]["r_bar"], + q_max=10, + sigma_pv=5e4, + R_min=0, + R_max=100, + eta=1, + lambda_a=1e-12, + log_orders=False, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_zi_agents) + ] +) agent_types.extend("ZeroIntelligenceAgent") agent_count += num_zi_agents # 4) 25 Heuristic Belief Learning Agents num_hbl_agents = 25 -agents.extend([HeuristicBeliefLearningAgent(id=j, - name="HBL_AGENT_{}".format(j), - type="HeuristicBeliefLearningAgent", - symbol=symbol, - starting_cash=starting_cash, - sigma_n=10000, - sigma_s=symbols[symbol]['fund_vol'], - kappa=symbols[symbol]['agent_kappa'], - r_bar=symbols[symbol]['r_bar'], - q_max=10, - sigma_pv=5e4, - R_min=0, - R_max=100, - eta=1, - lambda_a=1e-12, - L=2, - log_orders=False, - random_state=np.random.RandomState( - seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))) - for j in range(agent_count, agent_count + num_hbl_agents)]) +agents.extend( + [ + HeuristicBeliefLearningAgent( + id=j, + name="HBL_AGENT_{}".format(j), + type="HeuristicBeliefLearningAgent", + symbol=symbol, + starting_cash=starting_cash, + sigma_n=10000, + sigma_s=symbols[symbol]["fund_vol"], + kappa=symbols[symbol]["agent_kappa"], + r_bar=symbols[symbol]["r_bar"], + q_max=10, + sigma_pv=5e4, + R_min=0, + R_max=100, + eta=1, + lambda_a=1e-12, + L=2, + log_orders=False, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_hbl_agents) + ] +) agent_types.extend("HeuristicBeliefLearningAgent") agent_count += num_hbl_agents # 5) 24 Momentum Agents: num_momentum_agents = 24 -agents.extend([MomentumAgent(id=j, - name="MOMENTUM_AGENT_{}".format(j), - type="MomentumAgent", - symbol=symbol, - starting_cash=starting_cash, - min_size=1, - max_size=10, - subscribe=True, - log_orders=False, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))) - for j in range(agent_count, agent_count + num_momentum_agents)]) +agents.extend( + [ + MomentumAgent( + id=j, + name="MOMENTUM_AGENT_{}".format(j), + type="MomentumAgent", + symbol=symbol, + starting_cash=starting_cash, + min_size=1, + max_size=10, + subscribe=True, + log_orders=False, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_momentum_agents) + ] +) agent_types.extend("MomentumAgent") agent_count += num_momentum_agents @@ -217,34 +236,41 @@ agent_count += 1 """ # 7) User defined agent -# Load the agent to evaluate against the market +# Load the agent to evaluate against the market if args.agent_name: - mod_name = args.agent_name.rsplit('.', 1)[0] - class_name = args.agent_name.split('.')[-1] + mod_name = args.agent_name.rsplit(".", 1)[0] + class_name = args.agent_name.split(".")[-1] m = importlib.import_module(args.agent_name, package=None) testagent = getattr(m, class_name) - agents.extend([testagent(id=agent_count, - name=args.agent_name, - type="AgentUnderTest", - symbol=symbol, - starting_cash=starting_cash, - min_size=1, - max_size=10, - log_orders=False, - random_state=np.random.RandomState( - seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64')))]) + agents.extend( + [ + testagent( + id=agent_count, + name=args.agent_name, + type="AgentUnderTest", + symbol=symbol, + starting_cash=starting_cash, + min_size=1, + max_size=10, + log_orders=False, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + ] + ) agent_count += 1 - agent_types.extend('AgentUnderTest') + agent_types.extend("AgentUnderTest") ######################################################################################################################## ########################################### KERNEL AND OTHER CONFIG #################################################### -kernel = Kernel("RMSC02 Kernel", random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))) +kernel = Kernel( + "RMSC02 Kernel", + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), +) kernelStartTime = historical_date -kernelStopTime = mkt_close + pd.to_timedelta('00:01:00') +kernelStopTime = mkt_close + pd.to_timedelta("00:01:00") defaultComputationDelay = 50 # 50 nanoseconds @@ -255,28 +281,25 @@ # All agents sit on line from Seattle to NYC nyc_to_seattle_meters = 3866660 -pairwise_distances = util.generate_uniform_random_pairwise_dist_on_line(0.0, nyc_to_seattle_meters, agent_count, - random_state=latency_rstate) +pairwise_distances = util.generate_uniform_random_pairwise_dist_on_line( + 0.0, nyc_to_seattle_meters, agent_count, random_state=latency_rstate +) pairwise_latencies = util.meters_to_light_ns(pairwise_distances) -model_args = { - 'connected': True, - 'min_latency': pairwise_latencies -} +model_args = {"connected": True, "min_latency": pairwise_latencies} -latency_model = LatencyModel(latency_model='deterministic', - random_state=latency_rstate, - kwargs=model_args - ) +latency_model = LatencyModel(latency_model="deterministic", random_state=latency_rstate, kwargs=model_args) # KERNEL -kernel.runner(agents=agents, - startTime=kernelStartTime, - stopTime=kernelStopTime, - agentLatencyModel=latency_model, - defaultComputationDelay=defaultComputationDelay, - oracle=oracle, - log_dir=args.log_dir) +kernel.runner( + agents=agents, + startTime=kernelStartTime, + stopTime=kernelStopTime, + agentLatencyModel=latency_model, + defaultComputationDelay=defaultComputationDelay, + oracle=oracle, + log_dir=args.log_dir, +) simulation_end_time = dt.datetime.now() diff --git a/config/rmsc03.py b/config/rmsc03.py index 2f49ef795..33f87715a 100644 --- a/config/rmsc03.py +++ b/config/rmsc03.py @@ -7,120 +7,85 @@ # - 1 (Optional) POV Execution agent import argparse +import datetime as dt +import sys + import numpy as np import pandas as pd -import sys -import datetime as dt from dateutil.parser import parse -from Kernel import Kernel -from util import util -from util.order import LimitOrder -from util.oracle.SparseMeanRevertingOracle import SparseMeanRevertingOracle - +from agent.examples.MomentumAgent import MomentumAgent from agent.ExchangeAgent import ExchangeAgent +from agent.execution.POVExecutionAgent import POVExecutionAgent +from agent.market_makers.AdaptiveMarketMakerAgent import AdaptiveMarketMakerAgent from agent.NoiseAgent import NoiseAgent from agent.ValueAgent import ValueAgent -from agent.market_makers.AdaptiveMarketMakerAgent import AdaptiveMarketMakerAgent -from agent.examples.MomentumAgent import MomentumAgent -from agent.execution.POVExecutionAgent import POVExecutionAgent +from Kernel import Kernel from model.LatencyModel import LatencyModel +from util import util +from util.oracle.SparseMeanRevertingOracle import SparseMeanRevertingOracle +from util.order import LimitOrder ######################################################################################################################## ############################################### GENERAL CONFIG ######################################################### -parser = argparse.ArgumentParser(description='Detailed options for RMSC03 config.') - -parser.add_argument('-c', - '--config', - required=True, - help='Name of config file to execute') -parser.add_argument('-t', - '--ticker', - required=True, - help='Ticker (symbol) to use for simulation') -parser.add_argument('-d', '--historical-date', - required=True, - type=parse, - help='historical date being simulated in format YYYYMMDD.') -parser.add_argument('--start-time', - default='09:30:00', - type=parse, - help='Starting time of simulation.' - ) -parser.add_argument('--end-time', - default='11:30:00', - type=parse, - help='Ending time of simulation.' - ) -parser.add_argument('-l', - '--log_dir', - default=None, - help='Log directory name (default: unix timestamp at program start)') -parser.add_argument('-s', - '--seed', - type=int, - default=None, - help='numpy.random.seed() for simulation') -parser.add_argument('-v', - '--verbose', - action='store_true', - help='Maximum verbosity!') -parser.add_argument('--config_help', - action='store_true', - help='Print argument options for this config file') +parser = argparse.ArgumentParser(description="Detailed options for RMSC03 config.") + +parser.add_argument("-c", "--config", required=True, help="Name of config file to execute") +parser.add_argument("-t", "--ticker", required=True, help="Ticker (symbol) to use for simulation") +parser.add_argument( + "-d", + "--historical-date", + required=True, + type=parse, + help="historical date being simulated in format YYYYMMDD.", +) +parser.add_argument("--start-time", default="09:30:00", type=parse, help="Starting time of simulation.") +parser.add_argument("--end-time", default="11:30:00", type=parse, help="Ending time of simulation.") +parser.add_argument( + "-l", + "--log_dir", + default=None, + help="Log directory name (default: unix timestamp at program start)", +) +parser.add_argument("-s", "--seed", type=int, default=None, help="numpy.random.seed() for simulation") +parser.add_argument("-v", "--verbose", action="store_true", help="Maximum verbosity!") +parser.add_argument( + "--config_help", + action="store_true", + help="Print argument options for this config file", +) # Execution agent config -parser.add_argument('-e', - '--execution-agents', - action='store_true', - help='Flag to allow the execution agent to trade.') -parser.add_argument('-p', - '--execution-pov', - type=float, - default=0.1, - help='Participation of Volume level for execution agent') +parser.add_argument( + "-e", + "--execution-agents", + action="store_true", + help="Flag to allow the execution agent to trade.", +) +parser.add_argument( + "-p", + "--execution-pov", + type=float, + default=0.1, + help="Participation of Volume level for execution agent", +) # market maker config -parser.add_argument('--mm-pov', - type=float, - default=0.025 - ) -parser.add_argument('--mm-window-size', - type=util.validate_window_size, - default='adaptive' - ) -parser.add_argument('--mm-min-order-size', - type=int, - default=1 - ) -parser.add_argument('--mm-num-ticks', - type=int, - default=10 - ) -parser.add_argument('--mm-wake-up-freq', - type=str, - default='10S' - ) -parser.add_argument('--mm-skew-beta', - type=float, - default=0 - ) -parser.add_argument('--mm-level-spacing', - type=float, - default=5 - ) -parser.add_argument('--mm-spread-alpha', - type=float, - default=0.75 - ) -parser.add_argument('--mm-backstop-quantity', - type=float, - default=50000) - -parser.add_argument('--fund-vol', - type=float, - default=1e-8, - help='Volatility of fundamental time series.' - ) +parser.add_argument("--mm-pov", type=float, default=0.025) +parser.add_argument("--mm-window-size", type=util.validate_window_size, default="adaptive") +parser.add_argument("--mm-min-order-size", type=int, default=1) +parser.add_argument("--mm-num-ticks", type=int, default=10) +parser.add_argument("--mm-wake-up-freq", type=str, default="10S") +parser.add_argument("--mm-skew-beta", type=float, default=0) +parser.add_argument("--mm-level-spacing", type=float, default=5) +parser.add_argument("--mm-spread-alpha", type=float, default=0.75) +parser.add_argument("--mm-backstop-quantity", type=float, default=50000) + +parser.add_argument( + "--fund-vol", + type=float, + default=1e-8, + help="Volatility of fundamental time series.", +) args, remaining_args = parser.parse_known_args() @@ -130,7 +95,8 @@ log_dir = args.log_dir # Requested log directory. seed = args.seed # Random seed specification on the command line. -if not seed: seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2 ** 32 - 1) +if not seed: + seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) np.random.seed(seed) util.silent_mode = not args.verbose @@ -148,8 +114,8 @@ # Historical date to simulate. historical_date = pd.to_datetime(args.historical_date) -mkt_open = historical_date + pd.to_timedelta(args.start_time.strftime('%H:%M:%S')) -mkt_close = historical_date + pd.to_timedelta(args.end_time.strftime('%H:%M:%S')) +mkt_open = historical_date + pd.to_timedelta(args.start_time.strftime("%H:%M:%S")) +mkt_close = historical_date + pd.to_timedelta(args.end_time.strftime("%H:%M:%S")) agent_count, agents, agent_types = 0, [], [] # Hyperparameters @@ -162,14 +128,18 @@ lambda_a = 7e-11 # Oracle -symbols = {symbol: {'r_bar': r_bar, - 'kappa': 1.67e-16, - 'sigma_s': 0, - 'fund_vol': args.fund_vol, - 'megashock_lambda_a': 2.77778e-18, - 'megashock_mean': 1e3, - 'megashock_var': 5e4, - 'random_state': np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64'))}} +symbols = { + symbol: { + "r_bar": r_bar, + "kappa": 1.67e-16, + "sigma_s": 0, + "fund_vol": args.fund_vol, + "megashock_lambda_a": 2.77778e-18, + "megashock_mean": 1e3, + "megashock_var": 5e4, + "random_state": np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + } +} oracle = SparseMeanRevertingOracle(mkt_open, mkt_close, symbols) @@ -179,55 +149,73 @@ # stream_history_length = int(pd.to_timedelta(args.mm_wake_up_freq).total_seconds() * 100) stream_history_length = 25000 -agents.extend([ExchangeAgent(id=0, - name="EXCHANGE_AGENT", - type="ExchangeAgent", - mkt_open=mkt_open, - mkt_close=mkt_close, - symbols=[symbol], - log_orders=exchange_log_orders, - pipeline_delay=0, - computation_delay=0, - stream_history=stream_history_length, - book_freq=book_freq, - wide_book=True, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64')))]) +agents.extend( + [ + ExchangeAgent( + id=0, + name="EXCHANGE_AGENT", + type="ExchangeAgent", + mkt_open=mkt_open, + mkt_close=mkt_close, + symbols=[symbol], + log_orders=exchange_log_orders, + pipeline_delay=0, + computation_delay=0, + stream_history=stream_history_length, + book_freq=book_freq, + wide_book=True, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + ] +) agent_types.extend("ExchangeAgent") agent_count += 1 # 2) Noise Agents num_noise = 5000 noise_mkt_open = historical_date + pd.to_timedelta("09:00:00") # These times needed for distribution of arrival times - # of Noise Agents +# of Noise Agents noise_mkt_close = historical_date + pd.to_timedelta("16:00:00") -agents.extend([NoiseAgent(id=j, - name="NoiseAgent {}".format(j), - type="NoiseAgent", - symbol=symbol, - starting_cash=starting_cash, - wakeup_time=util.get_wake_time(noise_mkt_open, noise_mkt_close), - log_orders=log_orders, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64'))) - for j in range(agent_count, agent_count + num_noise)]) +agents.extend( + [ + NoiseAgent( + id=j, + name="NoiseAgent {}".format(j), + type="NoiseAgent", + symbol=symbol, + starting_cash=starting_cash, + wakeup_time=util.get_wake_time(noise_mkt_open, noise_mkt_close), + log_orders=log_orders, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_noise) + ] +) agent_count += num_noise -agent_types.extend(['NoiseAgent']) +agent_types.extend(["NoiseAgent"]) # 3) Value Agents num_value = 100 -agents.extend([ValueAgent(id=j, - name="Value Agent {}".format(j), - type="ValueAgent", - symbol=symbol, - starting_cash=starting_cash, - sigma_n=sigma_n, - r_bar=r_bar, - kappa=kappa, - lambda_a=lambda_a, - log_orders=log_orders, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64'))) - for j in range(agent_count, agent_count + num_value)]) +agents.extend( + [ + ValueAgent( + id=j, + name="Value Agent {}".format(j), + type="ValueAgent", + symbol=symbol, + starting_cash=starting_cash, + sigma_n=sigma_n, + r_bar=r_bar, + kappa=kappa, + lambda_a=lambda_a, + log_orders=log_orders, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_value) + ] +) agent_count += num_value -agent_types.extend(['ValueAgent']) +agent_types.extend(["ValueAgent"]) # 4) Market Maker Agents @@ -241,51 +229,74 @@ """ # each elem of mm_params is tuple (window_size, pov, num_ticks, wake_up_freq, min_order_size) -mm_params = [(args.mm_window_size, args.mm_pov, args.mm_num_ticks, args.mm_wake_up_freq, args.mm_min_order_size), - (args.mm_window_size, args.mm_pov, args.mm_num_ticks, args.mm_wake_up_freq, args.mm_min_order_size) - ] +mm_params = [ + ( + args.mm_window_size, + args.mm_pov, + args.mm_num_ticks, + args.mm_wake_up_freq, + args.mm_min_order_size, + ), + ( + args.mm_window_size, + args.mm_pov, + args.mm_num_ticks, + args.mm_wake_up_freq, + args.mm_min_order_size, + ), +] num_mm_agents = len(mm_params) mm_cancel_limit_delay = 50 # 50 nanoseconds -agents.extend([AdaptiveMarketMakerAgent(id=j, - name="ADAPTIVE_POV_MARKET_MAKER_AGENT_{}".format(j), - type='AdaptivePOVMarketMakerAgent', - symbol=symbol, - starting_cash=starting_cash, - pov=mm_params[idx][1], - min_order_size=mm_params[idx][4], - window_size=mm_params[idx][0], - num_ticks=mm_params[idx][2], - wake_up_freq=mm_params[idx][3], - cancel_limit_delay=mm_cancel_limit_delay, - skew_beta=args.mm_skew_beta, - level_spacing=args.mm_level_spacing, - spread_alpha=args.mm_spread_alpha, - backstop_quantity=args.mm_backstop_quantity, - log_orders=log_orders, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))) - for idx, j in enumerate(range(agent_count, agent_count + num_mm_agents))]) +agents.extend( + [ + AdaptiveMarketMakerAgent( + id=j, + name="ADAPTIVE_POV_MARKET_MAKER_AGENT_{}".format(j), + type="AdaptivePOVMarketMakerAgent", + symbol=symbol, + starting_cash=starting_cash, + pov=mm_params[idx][1], + min_order_size=mm_params[idx][4], + window_size=mm_params[idx][0], + num_ticks=mm_params[idx][2], + wake_up_freq=mm_params[idx][3], + cancel_limit_delay=mm_cancel_limit_delay, + skew_beta=args.mm_skew_beta, + level_spacing=args.mm_level_spacing, + spread_alpha=args.mm_spread_alpha, + backstop_quantity=args.mm_backstop_quantity, + log_orders=log_orders, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for idx, j in enumerate(range(agent_count, agent_count + num_mm_agents)) + ] +) agent_count += num_mm_agents -agent_types.extend('POVMarketMakerAgent') +agent_types.extend("POVMarketMakerAgent") # 5) Momentum Agents num_momentum_agents = 25 -agents.extend([MomentumAgent(id=j, - name="MOMENTUM_AGENT_{}".format(j), - type="MomentumAgent", - symbol=symbol, - starting_cash=starting_cash, - min_size=1, - max_size=10, - wake_up_freq='20s', - log_orders=log_orders, - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))) - for j in range(agent_count, agent_count + num_momentum_agents)]) +agents.extend( + [ + MomentumAgent( + id=j, + name="MOMENTUM_AGENT_{}".format(j), + type="MomentumAgent", + symbol=symbol, + starting_cash=starting_cash, + min_size=1, + max_size=10, + wake_up_freq="20s", + log_orders=log_orders, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_momentum_agents) + ] +) agent_count += num_momentum_agents agent_types.extend("MomentumAgent") @@ -295,29 +306,30 @@ #### Participation of Volume Agent parameters -pov_agent_start_time = mkt_open + pd.to_timedelta('00:30:00') -pov_agent_end_time = mkt_close - pd.to_timedelta('00:30:00') +pov_agent_start_time = mkt_open + pd.to_timedelta("00:30:00") +pov_agent_end_time = mkt_close - pd.to_timedelta("00:30:00") pov_proportion_of_volume = args.execution_pov pov_quantity = 12e5 -pov_frequency = '1min' +pov_frequency = "1min" pov_direction = "BUY" -pov_agent = POVExecutionAgent(id=agent_count, - name='POV_EXECUTION_AGENT', - type='ExecutionAgent', - symbol=symbol, - starting_cash=starting_cash, - start_time=pov_agent_start_time, - end_time=pov_agent_end_time, - freq=pov_frequency, - lookback_period=pov_frequency, - pov=pov_proportion_of_volume, - direction=pov_direction, - quantity=pov_quantity, - trade=trade, - log_orders=True, # needed for plots so conflicts with others - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))) +pov_agent = POVExecutionAgent( + id=agent_count, + name="POV_EXECUTION_AGENT", + type="ExecutionAgent", + symbol=symbol, + starting_cash=starting_cash, + start_time=pov_agent_start_time, + end_time=pov_agent_end_time, + freq=pov_frequency, + lookback_period=pov_frequency, + pov=pov_proportion_of_volume, + direction=pov_direction, + quantity=pov_quantity, + trade=trade, + log_orders=True, # needed for plots so conflicts with others + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), +) execution_agents = [pov_agent] agents.extend(execution_agents) @@ -328,11 +340,13 @@ ######################################################################################################################## ########################################### KERNEL AND OTHER CONFIG #################################################### -kernel = Kernel("RMSC03 Kernel", random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, - dtype='uint64'))) +kernel = Kernel( + "RMSC03 Kernel", + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), +) kernelStartTime = historical_date -kernelStopTime = mkt_close + pd.to_timedelta('00:01:00') +kernelStopTime = mkt_close + pd.to_timedelta("00:01:00") defaultComputationDelay = 50 # 50 nanoseconds @@ -343,28 +357,25 @@ # All agents sit on line from Seattle to NYC nyc_to_seattle_meters = 3866660 -pairwise_distances = util.generate_uniform_random_pairwise_dist_on_line(0.0, nyc_to_seattle_meters, agent_count, - random_state=latency_rstate) +pairwise_distances = util.generate_uniform_random_pairwise_dist_on_line( + 0.0, nyc_to_seattle_meters, agent_count, random_state=latency_rstate +) pairwise_latencies = util.meters_to_light_ns(pairwise_distances) -model_args = { - 'connected': True, - 'min_latency': pairwise_latencies -} +model_args = {"connected": True, "min_latency": pairwise_latencies} -latency_model = LatencyModel(latency_model='deterministic', - random_state=latency_rstate, - kwargs=model_args - ) +latency_model = LatencyModel(latency_model="deterministic", random_state=latency_rstate, kwargs=model_args) # KERNEL -kernel.runner(agents=agents, - startTime=kernelStartTime, - stopTime=kernelStopTime, - agentLatencyModel=latency_model, - defaultComputationDelay=defaultComputationDelay, - oracle=oracle, - log_dir=args.log_dir) +kernel.runner( + agents=agents, + startTime=kernelStartTime, + stopTime=kernelStopTime, + agentLatencyModel=latency_model, + defaultComputationDelay=defaultComputationDelay, + oracle=oracle, + log_dir=args.log_dir, +) simulation_end_time = dt.datetime.now() diff --git a/config/sparse_zi_100.py b/config/sparse_zi_100.py index 22410e4c8..986a6ad5f 100755 --- a/config/sparse_zi_100.py +++ b/config/sparse_zi_100.py @@ -1,46 +1,62 @@ -from Kernel import Kernel -from agent.ExchangeAgent import ExchangeAgent -from agent.ZeroIntelligenceAgent import ZeroIntelligenceAgent -from model.LatencyModel import LatencyModel -from util.order import LimitOrder -from util.oracle.SparseMeanRevertingOracle import SparseMeanRevertingOracle -from util import util +# Some config files require additional command line parameters to easily +# control agent or simulation hyperparameters during coarse parallelization. +import argparse +import sys import numpy as np import pandas as pd -import sys +from agent.ExchangeAgent import ExchangeAgent +from agent.ZeroIntelligenceAgent import ZeroIntelligenceAgent +from Kernel import Kernel +from model.LatencyModel import LatencyModel +from util import util +from util.oracle.SparseMeanRevertingOracle import SparseMeanRevertingOracle +from util.order import LimitOrder -# Some config files require additional command line parameters to easily -# control agent or simulation hyperparameters during coarse parallelization. -import argparse - -parser = argparse.ArgumentParser(description='Detailed options for sparse_zi config.') -parser.add_argument('-b', '--book_freq', default=None, - help='Frequency at which to archive order book for visualization') -parser.add_argument('-c', '--config', required=True, - help='Name of config file to execute') -parser.add_argument('-l', '--log_dir', default=None, - help='Log directory name (default: unix timestamp at program start)') -parser.add_argument('-n', '--obs_noise', type=float, default=1000000, - help='Observation noise variance for zero intelligence agents (sigma^2_n)') -parser.add_argument('-o', '--log_orders', action='store_true', - help='Log every order-related action by every agent.') -parser.add_argument('-s', '--seed', type=int, default=None, - help='numpy.random.seed() for simulation') -parser.add_argument('-v', '--verbose', action='store_true', - help='Maximum verbosity!') -parser.add_argument('--config_help', action='store_true', - help='Print argument options for this config file') +parser = argparse.ArgumentParser(description="Detailed options for sparse_zi config.") +parser.add_argument( + "-b", + "--book_freq", + default=None, + help="Frequency at which to archive order book for visualization", +) +parser.add_argument("-c", "--config", required=True, help="Name of config file to execute") +parser.add_argument( + "-l", + "--log_dir", + default=None, + help="Log directory name (default: unix timestamp at program start)", +) +parser.add_argument( + "-n", + "--obs_noise", + type=float, + default=1000000, + help="Observation noise variance for zero intelligence agents (sigma^2_n)", +) +parser.add_argument( + "-o", + "--log_orders", + action="store_true", + help="Log every order-related action by every agent.", +) +parser.add_argument("-s", "--seed", type=int, default=None, help="numpy.random.seed() for simulation") +parser.add_argument("-v", "--verbose", action="store_true", help="Maximum verbosity!") +parser.add_argument( + "--config_help", + action="store_true", + help="Print argument options for this config file", +) args, remaining_args = parser.parse_known_args() if args.config_help: - parser.print_help() - sys.exit() + parser.print_help() + sys.exit() # Historical date to simulate. Required even if not relevant. -historical_date = pd.to_datetime('2019-06-28') +historical_date = pd.to_datetime("2019-06-28") # Requested log directory. log_dir = args.log_dir @@ -67,7 +83,8 @@ # before) seed = args.seed -if not seed: seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) +if not seed: + seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) np.random.seed(seed) # Config parameter that causes util.util.print to suppress most output. @@ -81,12 +98,11 @@ log_orders = args.log_orders -print ("Silent mode: {}".format(util.silent_mode)) -print ("Logging orders: {}".format(log_orders)) -print ("Book freq: {}".format(book_freq)) -print ("ZeroIntelligenceAgent noise: {:0.4f}".format(sigma_n)) -print ("Configuration seed: {}\n".format(seed)) - +print("Silent mode: {}".format(util.silent_mode)) +print("Logging orders: {}".format(log_orders)) +print("Book freq: {}".format(book_freq)) +print("ZeroIntelligenceAgent noise: {:0.4f}".format(sigma_n)) +print("Configuration seed: {}\n".format(seed)) # Since the simulator often pulls historical data, we use a real-world @@ -102,12 +118,12 @@ # When should the Kernel shut down? (This should be after market close.) # Here we go for 5 PM the same day. -kernelStopTime = midnight + pd.to_timedelta('17:00:00') +kernelStopTime = midnight + pd.to_timedelta("17:00:00") # This will configure the kernel with a default computation delay # (time penalty) for each agent's wakeup and recvMsg. An agent # can change this at any time for itself. (nanoseconds) -defaultComputationDelay = 1000000000 # one second +defaultComputationDelay = 1000000000 # one second # IMPORTANT NOTE CONCERNING AGENT IDS: the id passed to each agent must: @@ -127,15 +143,29 @@ # Note: sigma_s is no longer used by the agents or the fundamental (for sparse discrete simulation). -symbols = { 'JPM' : { 'r_bar' : 1e5, 'kappa' : 1.67e-12, 'agent_kappa' : 1.67e-15, 'sigma_s' : 0, 'fund_vol' : 1e-8, 'megashock_lambda_a' : 2.77778e-13, 'megashock_mean' : 1e3, 'megashock_var' : 5e4, 'random_state' : np.random.RandomState(seed=np.random.randint(low=0,high=2**32, dtype='uint64')) } } - +symbols = { + "JPM": { + "r_bar": 1e5, + "kappa": 1.67e-12, + "agent_kappa": 1.67e-15, + "sigma_s": 0, + "fund_vol": 1e-8, + "megashock_lambda_a": 2.77778e-13, + "megashock_mean": 1e3, + "megashock_var": 5e4, + "random_state": np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + } +} + ### Configure the Kernel. -kernel = Kernel("Base Kernel", random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32, dtype='uint64'))) +kernel = Kernel( + "Base Kernel", + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), +) ### Obtain random state for whatever latency model will be used. -latency_rstate = np.random.RandomState(seed=np.random.randint(low=0,high=2**32)) - +latency_rstate = np.random.RandomState(seed=np.random.randint(low=0, high=2**32)) ### Configure the agents. When conducting "agent of change" experiments, the @@ -148,10 +178,10 @@ ### Configure an exchange agent. # Let's open the exchange at 9:30 AM. -mkt_open = midnight + pd.to_timedelta('09:30:00') +mkt_open = midnight + pd.to_timedelta("09:30:00") # And close it at 4:00 PM. -mkt_close = midnight + pd.to_timedelta('16:00:00') +mkt_close = midnight + pd.to_timedelta("16:00:00") # Configure an appropriate oracle for all traded stocks. @@ -161,20 +191,36 @@ # Create the exchange. num_exchanges = 1 -agents.extend([ ExchangeAgent(j, "Exchange Agent {}".format(j), "ExchangeAgent", mkt_open, mkt_close, [s for s in symbols], log_orders=log_orders, book_freq=book_freq, pipeline_delay = 0, computation_delay = 0, stream_history = 10, random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32, dtype='uint64'))) - for j in range(agent_count, agent_count + num_exchanges) ]) +agents.extend( + [ + ExchangeAgent( + j, + "Exchange Agent {}".format(j), + "ExchangeAgent", + mkt_open, + mkt_close, + [s for s in symbols], + log_orders=log_orders, + book_freq=book_freq, + pipeline_delay=0, + computation_delay=0, + stream_history=10, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_exchanges) + ] +) agent_types.extend(["ExchangeAgent" for j in range(num_exchanges)]) agent_count += num_exchanges - ### Configure some zero intelligence agents. # Cash in this simulator is always in CENTS. starting_cash = 10000000 # Here are the zero intelligence agents. -symbol = 'JPM' +symbol = "JPM" s = symbols[symbol] # Tuples are: (# agents, R_min, R_max, eta). @@ -182,16 +228,47 @@ # Some configs for ZI agents only (among seven parameter settings). # 100 agents -zi = [ (15, 0, 250, 1), (15, 0, 500, 1), (14, 0, 1000, 0.8), (14, 0, 1000, 1), (14, 0, 2000, 0.8), (14, 250, 500, 0.8), (14, 250, 500, 1) ] +zi = [ + (15, 0, 250, 1), + (15, 0, 500, 1), + (14, 0, 1000, 0.8), + (14, 0, 1000, 1), + (14, 0, 2000, 0.8), + (14, 250, 500, 0.8), + (14, 250, 500, 1), +] # ZI strategy split. Note that agent arrival rates are quite small, because our minimum # time step is a nanosecond, and we want the agents to arrive more on the order of # minutes. -for i,x in enumerate(zi): - strat_name = "Type {} [{} <= R <= {}, eta={}]".format(i+1, x[1], x[2], x[3]) - agents.extend([ ZeroIntelligenceAgent(j, "ZI Agent {} {}".format(j, strat_name), "ZeroIntelligenceAgent {}".format(strat_name), random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32, dtype='uint64')),log_orders=log_orders, symbol=symbol, starting_cash=starting_cash, sigma_n=sigma_n, r_bar=s['r_bar'], kappa=s['agent_kappa'], sigma_s=s['fund_vol'], q_max=10, sigma_pv=5e6, R_min=x[1], R_max=x[2], eta=x[3], lambda_a=1e-12) for j in range(agent_count,agent_count+x[0]) ]) - agent_types.extend([ "ZeroIntelligenceAgent {}".format(strat_name) for j in range(x[0]) ]) - agent_count += x[0] +for i, x in enumerate(zi): + strat_name = "Type {} [{} <= R <= {}, eta={}]".format(i + 1, x[1], x[2], x[3]) + agents.extend( + [ + ZeroIntelligenceAgent( + j, + "ZI Agent {} {}".format(j, strat_name), + "ZeroIntelligenceAgent {}".format(strat_name), + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + log_orders=log_orders, + symbol=symbol, + starting_cash=starting_cash, + sigma_n=sigma_n, + r_bar=s["r_bar"], + kappa=s["agent_kappa"], + sigma_s=s["fund_vol"], + q_max=10, + sigma_pv=5e6, + R_min=x[1], + R_max=x[2], + eta=x[3], + lambda_a=1e-12, + ) + for j in range(agent_count, agent_count + x[0]) + ] + ) + agent_types.extend(["ZeroIntelligenceAgent {}".format(strat_name) for j in range(x[0])]) + agent_count += x[0] latency = None @@ -212,30 +289,29 @@ # Other agents can be explicitly set afterward (and the mirror half of the matrix is also). if not USE_NEW_MODEL: - # This configures all agents to a starting latency as described above. - latency = np.random.uniform(low = 21000, high = 13000000, size=(len(agent_types),len(agent_types))) - - # Overriding the latency for certain agent pairs happens below, as does forcing mirroring - # of the matrix to be symmetric. - for i, t1 in zip(range(latency.shape[0]), agent_types): - for j, t2 in zip(range(latency.shape[1]), agent_types): - # Three cases for symmetric array. Set latency when j > i, copy it when i > j, same agent when i == j. - if j > i: - # Presently, strategy agents shouldn't be talking to each other, so we set them to extremely high latency. - if (t1 == "ZeroIntelligenceAgent" and t2 == "ZeroIntelligenceAgent"): - latency[i,j] = 1000000000 * 60 * 60 * 24 # Twenty-four hours. - elif i > j: - # This "bottom" half of the matrix simply mirrors the top. - latency[i,j] = latency[j,i] - else: - # This is the same agent. How long does it take to reach localhost? In our data center, it actually - # takes about 20 microseconds. - latency[i,j] = 20000 - - - # Configure a simple latency noise model for the agents. - # Index is ns extra delay, value is probability of this delay being applied. - noise = [ 0.25, 0.25, 0.20, 0.15, 0.10, 0.05 ] + # This configures all agents to a starting latency as described above. + latency = np.random.uniform(low=21000, high=13000000, size=(len(agent_types), len(agent_types))) + + # Overriding the latency for certain agent pairs happens below, as does forcing mirroring + # of the matrix to be symmetric. + for i, t1 in zip(range(latency.shape[0]), agent_types): + for j, t2 in zip(range(latency.shape[1]), agent_types): + # Three cases for symmetric array. Set latency when j > i, copy it when i > j, same agent when i == j. + if j > i: + # Presently, strategy agents shouldn't be talking to each other, so we set them to extremely high latency. + if t1 == "ZeroIntelligenceAgent" and t2 == "ZeroIntelligenceAgent": + latency[i, j] = 1000000000 * 60 * 60 * 24 # Twenty-four hours. + elif i > j: + # This "bottom" half of the matrix simply mirrors the top. + latency[i, j] = latency[j, i] + else: + # This is the same agent. How long does it take to reach localhost? In our data center, it actually + # takes about 20 microseconds. + latency[i, j] = 20000 + + # Configure a simple latency noise model for the agents. + # Index is ns extra delay, value is probability of this delay being applied. + noise = [0.25, 0.25, 0.20, 0.15, 0.10, 0.05] ### END OLD LATENCY ATTRIBUTE CONFIGURATION ### @@ -243,28 +319,32 @@ ### BEGIN NEW LATENCY MODEL CONFIGURATION ### else: - # Get a new-style cubic LatencyModel from the networking literature. - pairwise = (len(agent_types),len(agent_types)) - - model_args = { 'connected' : True, + # Get a new-style cubic LatencyModel from the networking literature. + pairwise = (len(agent_types), len(agent_types)) - # All in NYC. - 'min_latency' : np.random.uniform(low = 21000, high = 100000, size = pairwise), - 'jitter' : 0.3, - 'jitter_clip' : 0.05, - 'jitter_unit' : 5, - } + model_args = { + "connected": True, + # All in NYC. + "min_latency": np.random.uniform(low=21000, high=100000, size=pairwise), + "jitter": 0.3, + "jitter_clip": 0.05, + "jitter_unit": 5, + } - latency_model = LatencyModel ( latency_model = 'cubic', random_state = latency_rstate, kwargs = model_args ) + latency_model = LatencyModel(latency_model="cubic", random_state=latency_rstate, kwargs=model_args) ### END NEW LATENCY MODEL CONFIGURATION ### # Start the kernel running. -kernel.runner(agents = agents, startTime = kernelStartTime, - stopTime = kernelStopTime, - agentLatencyModel = latency_model, - agentLatency = latency, latencyNoise = noise, - defaultComputationDelay = defaultComputationDelay, - oracle = oracle, log_dir = log_dir) - +kernel.runner( + agents=agents, + startTime=kernelStartTime, + stopTime=kernelStopTime, + agentLatencyModel=latency_model, + agentLatency=latency, + latencyNoise=noise, + defaultComputationDelay=defaultComputationDelay, + oracle=oracle, + log_dir=log_dir, +) diff --git a/config/sparse_zi_1000.py b/config/sparse_zi_1000.py index 8b8d3d0c4..186b8a39b 100755 --- a/config/sparse_zi_1000.py +++ b/config/sparse_zi_1000.py @@ -1,45 +1,61 @@ -from Kernel import Kernel -from agent.ExchangeAgent import ExchangeAgent -from agent.ZeroIntelligenceAgent import ZeroIntelligenceAgent -from util.order import LimitOrder -from util.oracle.SparseMeanRevertingOracle import SparseMeanRevertingOracle -from util import util +# Some config files require additional command line parameters to easily +# control agent or simulation hyperparameters during coarse parallelization. +import argparse +import sys import numpy as np import pandas as pd -import sys +from agent.ExchangeAgent import ExchangeAgent +from agent.ZeroIntelligenceAgent import ZeroIntelligenceAgent +from Kernel import Kernel +from util import util +from util.oracle.SparseMeanRevertingOracle import SparseMeanRevertingOracle +from util.order import LimitOrder -# Some config files require additional command line parameters to easily -# control agent or simulation hyperparameters during coarse parallelization. -import argparse - -parser = argparse.ArgumentParser(description='Detailed options for sparse_zi config.') -parser.add_argument('-b', '--book_freq', default=None, - help='Frequency at which to archive order book for visualization') -parser.add_argument('-c', '--config', required=True, - help='Name of config file to execute') -parser.add_argument('-l', '--log_dir', default=None, - help='Log directory name (default: unix timestamp at program start)') -parser.add_argument('-n', '--obs_noise', type=float, default=1000000, - help='Observation noise variance for zero intelligence agents (sigma^2_n)') -parser.add_argument('-o', '--log_orders', action='store_true', - help='Log every order-related action by every agent.') -parser.add_argument('-s', '--seed', type=int, default=None, - help='numpy.random.seed() for simulation') -parser.add_argument('-v', '--verbose', action='store_true', - help='Maximum verbosity!') -parser.add_argument('--config_help', action='store_true', - help='Print argument options for this config file') +parser = argparse.ArgumentParser(description="Detailed options for sparse_zi config.") +parser.add_argument( + "-b", + "--book_freq", + default=None, + help="Frequency at which to archive order book for visualization", +) +parser.add_argument("-c", "--config", required=True, help="Name of config file to execute") +parser.add_argument( + "-l", + "--log_dir", + default=None, + help="Log directory name (default: unix timestamp at program start)", +) +parser.add_argument( + "-n", + "--obs_noise", + type=float, + default=1000000, + help="Observation noise variance for zero intelligence agents (sigma^2_n)", +) +parser.add_argument( + "-o", + "--log_orders", + action="store_true", + help="Log every order-related action by every agent.", +) +parser.add_argument("-s", "--seed", type=int, default=None, help="numpy.random.seed() for simulation") +parser.add_argument("-v", "--verbose", action="store_true", help="Maximum verbosity!") +parser.add_argument( + "--config_help", + action="store_true", + help="Print argument options for this config file", +) args, remaining_args = parser.parse_known_args() if args.config_help: - parser.print_help() - sys.exit() + parser.print_help() + sys.exit() # Historical date to simulate. Required even if not relevant. -historical_date = pd.to_datetime('2019-06-28') +historical_date = pd.to_datetime("2019-06-28") # Requested log directory. log_dir = args.log_dir @@ -66,7 +82,8 @@ # before) seed = args.seed -if not seed: seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) +if not seed: + seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) np.random.seed(seed) # Config parameter that causes util.util.print to suppress most output. @@ -80,12 +97,11 @@ log_orders = args.log_orders -print ("Silent mode: {}".format(util.silent_mode)) -print ("Logging orders: {}".format(log_orders)) -print ("Book freq: {}".format(book_freq)) -print ("ZeroIntelligenceAgent noise: {:0.4f}".format(sigma_n)) -print ("Configuration seed: {}\n".format(seed)) - +print("Silent mode: {}".format(util.silent_mode)) +print("Logging orders: {}".format(log_orders)) +print("Book freq: {}".format(book_freq)) +print("ZeroIntelligenceAgent noise: {:0.4f}".format(sigma_n)) +print("Configuration seed: {}\n".format(seed)) # Since the simulator often pulls historical data, we use a real-world @@ -101,12 +117,12 @@ # When should the Kernel shut down? (This should be after market close.) # Here we go for 5 PM the same day. -kernelStopTime = midnight + pd.to_timedelta('17:00:00') +kernelStopTime = midnight + pd.to_timedelta("17:00:00") # This will configure the kernel with a default computation delay # (time penalty) for each agent's wakeup and recvMsg. An agent # can change this at any time for itself. (nanoseconds) -defaultComputationDelay = 1000000000 # one second +defaultComputationDelay = 1000000000 # one second # IMPORTANT NOTE CONCERNING AGENT IDS: the id passed to each agent must: @@ -126,12 +142,26 @@ # Note: sigma_s is no longer used by the agents or the fundamental (for sparse discrete simulation). -symbols = { 'JPM' : { 'r_bar' : 1e5, 'kappa' : 1.67e-12, 'agent_kappa' : 1.67e-15, 'sigma_s' : 0, 'fund_vol' : 1e-8, 'megashock_lambda_a' : 2.77778e-13, 'megashock_mean' : 1e3, 'megashock_var' : 5e4, 'random_state' : np.random.RandomState(seed=np.random.randint(low=0,high=2**32, dtype='uint64')) } } - +symbols = { + "JPM": { + "r_bar": 1e5, + "kappa": 1.67e-12, + "agent_kappa": 1.67e-15, + "sigma_s": 0, + "fund_vol": 1e-8, + "megashock_lambda_a": 2.77778e-13, + "megashock_mean": 1e3, + "megashock_var": 5e4, + "random_state": np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + } +} -### Configure the Kernel. -kernel = Kernel("Base Kernel", random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32, dtype='uint64'))) +### Configure the Kernel. +kernel = Kernel( + "Base Kernel", + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), +) ### Configure the agents. When conducting "agent of change" experiments, the @@ -144,10 +174,10 @@ ### Configure an exchange agent. # Let's open the exchange at 9:30 AM. -mkt_open = midnight + pd.to_timedelta('09:30:00') +mkt_open = midnight + pd.to_timedelta("09:30:00") # And close it at 4:00 PM. -mkt_close = midnight + pd.to_timedelta('16:00:00') +mkt_close = midnight + pd.to_timedelta("16:00:00") # Configure an appropriate oracle for all traded stocks. @@ -157,20 +187,36 @@ # Create the exchange. num_exchanges = 1 -agents.extend([ ExchangeAgent(j, "Exchange Agent {}".format(j), "ExchangeAgent", mkt_open, mkt_close, [s for s in symbols], log_orders=log_orders, book_freq=book_freq, pipeline_delay = 0, computation_delay = 0, stream_history = 10, random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32, dtype='uint64'))) - for j in range(agent_count, agent_count + num_exchanges) ]) +agents.extend( + [ + ExchangeAgent( + j, + "Exchange Agent {}".format(j), + "ExchangeAgent", + mkt_open, + mkt_close, + [s for s in symbols], + log_orders=log_orders, + book_freq=book_freq, + pipeline_delay=0, + computation_delay=0, + stream_history=10, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_exchanges) + ] +) agent_types.extend(["ExchangeAgent" for j in range(num_exchanges)]) agent_count += num_exchanges - ### Configure some zero intelligence agents. # Cash in this simulator is always in CENTS. starting_cash = 10000000 # Here are the zero intelligence agents. -symbol = 'JPM' +symbol = "JPM" s = symbols[symbol] # Tuples are: (# agents, R_min, R_max, eta). @@ -178,16 +224,47 @@ # Some configs for ZI agents only (among seven parameter settings). # 1000 agents -zi = [ (143, 0, 250, 1), (143, 0, 500, 1), (143, 0, 1000, 0.8), (143, 0, 1000, 1), (143, 0, 2000, 0.8), (143, 250, 500, 0.8), (142, 250, 500, 1) ] +zi = [ + (143, 0, 250, 1), + (143, 0, 500, 1), + (143, 0, 1000, 0.8), + (143, 0, 1000, 1), + (143, 0, 2000, 0.8), + (143, 250, 500, 0.8), + (142, 250, 500, 1), +] # ZI strategy split. Note that agent arrival rates are quite small, because our minimum # time step is a nanosecond, and we want the agents to arrive more on the order of # minutes. -for i,x in enumerate(zi): - strat_name = "Type {} [{} <= R <= {}, eta={}]".format(i+1, x[1], x[2], x[3]) - agents.extend([ ZeroIntelligenceAgent(j, "ZI Agent {} {}".format(j, strat_name), "ZeroIntelligenceAgent {}".format(strat_name), random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32, dtype='uint64')),log_orders=log_orders, symbol=symbol, starting_cash=starting_cash, sigma_n=sigma_n, r_bar=s['r_bar'], kappa=s['agent_kappa'], sigma_s=s['fund_vol'], q_max=10, sigma_pv=5e6, R_min=x[1], R_max=x[2], eta=x[3], lambda_a=1e-12) for j in range(agent_count,agent_count+x[0]) ]) - agent_types.extend([ "ZeroIntelligenceAgent {}".format(strat_name) for j in range(x[0]) ]) - agent_count += x[0] +for i, x in enumerate(zi): + strat_name = "Type {} [{} <= R <= {}, eta={}]".format(i + 1, x[1], x[2], x[3]) + agents.extend( + [ + ZeroIntelligenceAgent( + j, + "ZI Agent {} {}".format(j, strat_name), + "ZeroIntelligenceAgent {}".format(strat_name), + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + log_orders=log_orders, + symbol=symbol, + starting_cash=starting_cash, + sigma_n=sigma_n, + r_bar=s["r_bar"], + kappa=s["agent_kappa"], + sigma_s=s["fund_vol"], + q_max=10, + sigma_pv=5e6, + R_min=x[1], + R_max=x[2], + eta=x[3], + lambda_a=1e-12, + ) + for j in range(agent_count, agent_count + x[0]) + ] + ) + agent_types.extend(["ZeroIntelligenceAgent {}".format(strat_name) for j in range(x[0])]) + agent_count += x[0] ### Configure a simple message latency matrix for the agents. Each entry is the minimum @@ -200,36 +277,39 @@ # Other agents can be explicitly set afterward (and the mirror half of the matrix is also). # This configures all agents to a starting latency as described above. -latency = np.random.uniform(low = 21000, high = 13000000, size=(len(agent_types),len(agent_types))) +latency = np.random.uniform(low=21000, high=13000000, size=(len(agent_types), len(agent_types))) # Overriding the latency for certain agent pairs happens below, as does forcing mirroring # of the matrix to be symmetric. for i, t1 in zip(range(latency.shape[0]), agent_types): - for j, t2 in zip(range(latency.shape[1]), agent_types): - # Three cases for symmetric array. Set latency when j > i, copy it when i > j, same agent when i == j. - if j > i: - # Presently, strategy agents shouldn't be talking to each other, so we set them to extremely high latency. - if (t1 == "ZeroIntelligenceAgent" and t2 == "ZeroIntelligenceAgent"): - latency[i,j] = 1000000000 * 60 * 60 * 24 # Twenty-four hours. - elif i > j: - # This "bottom" half of the matrix simply mirrors the top. - latency[i,j] = latency[j,i] - else: - # This is the same agent. How long does it take to reach localhost? In our data center, it actually - # takes about 20 microseconds. - latency[i,j] = 20000 + for j, t2 in zip(range(latency.shape[1]), agent_types): + # Three cases for symmetric array. Set latency when j > i, copy it when i > j, same agent when i == j. + if j > i: + # Presently, strategy agents shouldn't be talking to each other, so we set them to extremely high latency. + if t1 == "ZeroIntelligenceAgent" and t2 == "ZeroIntelligenceAgent": + latency[i, j] = 1000000000 * 60 * 60 * 24 # Twenty-four hours. + elif i > j: + # This "bottom" half of the matrix simply mirrors the top. + latency[i, j] = latency[j, i] + else: + # This is the same agent. How long does it take to reach localhost? In our data center, it actually + # takes about 20 microseconds. + latency[i, j] = 20000 # Configure a simple latency noise model for the agents. # Index is ns extra delay, value is probability of this delay being applied. -noise = [ 0.25, 0.25, 0.20, 0.15, 0.10, 0.05 ] - +noise = [0.25, 0.25, 0.20, 0.15, 0.10, 0.05] # Start the kernel running. -kernel.runner(agents = agents, startTime = kernelStartTime, - stopTime = kernelStopTime, agentLatency = latency, - latencyNoise = noise, - defaultComputationDelay = defaultComputationDelay, - oracle = oracle, log_dir = log_dir) - +kernel.runner( + agents=agents, + startTime=kernelStartTime, + stopTime=kernelStopTime, + agentLatency=latency, + latencyNoise=noise, + defaultComputationDelay=defaultComputationDelay, + oracle=oracle, + log_dir=log_dir, +) diff --git a/config/sum.py b/config/sum.py index a79c75ab2..2c259f62a 100644 --- a/config/sum.py +++ b/config/sum.py @@ -1,37 +1,40 @@ -from Kernel import Kernel -from agent.examples.SumClientAgent import SumClientAgent -from agent.examples.SumServiceAgent import SumServiceAgent -from util import util +# Some config files require additional command line parameters to easily +# control agent or simulation hyperparameters during coarse parallelization. +import argparse +import sys import numpy as np import pandas as pd -import sys +from agent.examples.SumClientAgent import SumClientAgent +from agent.examples.SumServiceAgent import SumServiceAgent +from Kernel import Kernel +from util import util -# Some config files require additional command line parameters to easily -# control agent or simulation hyperparameters during coarse parallelization. -import argparse - -parser = argparse.ArgumentParser(description='Detailed options for example sum config.') -parser.add_argument('-c', '--config', required=True, - help='Name of config file to execute') -parser.add_argument('-l', '--log_dir', default=None, - help='Log directory name (default: unix timestamp at program start)') -parser.add_argument('-s', '--seed', type=int, default=None, - help='numpy.random.seed() for simulation') -parser.add_argument('-v', '--verbose', action='store_true', - help='Maximum verbosity!') -parser.add_argument('--config_help', action='store_true', - help='Print argument options for this config file') +parser = argparse.ArgumentParser(description="Detailed options for example sum config.") +parser.add_argument("-c", "--config", required=True, help="Name of config file to execute") +parser.add_argument( + "-l", + "--log_dir", + default=None, + help="Log directory name (default: unix timestamp at program start)", +) +parser.add_argument("-s", "--seed", type=int, default=None, help="numpy.random.seed() for simulation") +parser.add_argument("-v", "--verbose", action="store_true", help="Maximum verbosity!") +parser.add_argument( + "--config_help", + action="store_true", + help="Print argument options for this config file", +) args, remaining_args = parser.parse_known_args() if args.config_help: - parser.print_help() - sys.exit() + parser.print_help() + sys.exit() # Historical date to simulate. Required even if not relevant. -historical_date = pd.to_datetime('2014-01-28') +historical_date = pd.to_datetime("2014-01-28") # Requested log directory. log_dir = args.log_dir @@ -50,15 +53,15 @@ # before) seed = args.seed -if not seed: seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) +if not seed: + seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) np.random.seed(seed) # Config parameter that causes util.util.print to suppress most output. util.silent_mode = not args.verbose -print ("Silent mode: {}".format(util.silent_mode)) -print ("Configuration seed: {}\n".format(seed)) - +print("Silent mode: {}".format(util.silent_mode)) +print("Configuration seed: {}\n".format(seed)) # Since the simulator often pulls historical data, we use a real-world @@ -73,12 +76,12 @@ kernelStartTime = midnight # When should the Kernel shut down? -kernelStopTime = midnight + pd.to_timedelta('17:00:00') +kernelStopTime = midnight + pd.to_timedelta("17:00:00") # This will configure the kernel with a default computation delay # (time penalty) for each agent's wakeup and recvMsg. An agent # can change this at any time for itself. (nanoseconds) -defaultComputationDelay = 1000000000 * 5 # five seconds +defaultComputationDelay = 1000000000 * 5 # five seconds # IMPORTANT NOTE CONCERNING AGENT IDS: the id passed to each agent must: @@ -89,7 +92,10 @@ ### Configure the Kernel. -kernel = Kernel("Base Kernel", random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32))) +kernel = Kernel( + "Base Kernel", + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), +) ### Configure the agents. When conducting "agent of change" experiments, the @@ -105,23 +111,40 @@ ### Configure a sum service agent. -agents.extend([ SumServiceAgent(0, "Sum Service Agent 0", "SumServiceAgent", - random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32)), - num_clients = num_clients) ]) +agents.extend( + [ + SumServiceAgent( + 0, + "Sum Service Agent 0", + "SumServiceAgent", + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), + num_clients=num_clients, + ) + ] +) agent_types.extend(["SumServiceAgent"]) agent_count += 1 - ### Configure a population of sum client agents. a, b = agent_count, agent_count + num_clients -agents.extend([ SumClientAgent(i, "Sum Client Agent {}".format(i), "SumClientAgent", peer_list = [ x for x in range(a,b) if x != i ], random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32))) for i in range(a,b) ]) -agent_types.extend([ "SumClientAgent" for i in range(a,b) ]) +agents.extend( + [ + SumClientAgent( + i, + "Sum Client Agent {}".format(i), + "SumClientAgent", + peer_list=[x for x in range(a, b) if x != i], + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), + ) + for i in range(a, b) + ] +) +agent_types.extend(["SumClientAgent" for i in range(a, b)]) agent_count += num_clients - ### Configure a simple message latency matrix for the agents. Each entry is the minimum ### nanosecond delay on communication [from][to] agent ID. @@ -132,32 +155,35 @@ # Other agents can be explicitly set afterward (and the mirror half of the matrix is also). # This configures all agents to a starting latency as described above. -latency = np.random.uniform(low = 21000, high = 13000000, size=(len(agent_types),len(agent_types))) +latency = np.random.uniform(low=21000, high=13000000, size=(len(agent_types), len(agent_types))) # Overriding the latency for certain agent pairs happens below, as does forcing mirroring # of the matrix to be symmetric. for i, t1 in zip(range(latency.shape[0]), agent_types): - for j, t2 in zip(range(latency.shape[1]), agent_types): - # Three cases for symmetric array. Set latency when j > i, copy it when i > j, same agent when i == j. - # The j > i case is handled in the initialization above, unless we need to override specific agents. - if i > j: - # This "bottom" half of the matrix simply mirrors the top. - latency[i,j] = latency[j,i] - elif i == j: - # This is the same agent. How long does it take to reach localhost? In our data center, it actually - # takes about 20 microseconds. - latency[i,j] = 20000 + for j, t2 in zip(range(latency.shape[1]), agent_types): + # Three cases for symmetric array. Set latency when j > i, copy it when i > j, same agent when i == j. + # The j > i case is handled in the initialization above, unless we need to override specific agents. + if i > j: + # This "bottom" half of the matrix simply mirrors the top. + latency[i, j] = latency[j, i] + elif i == j: + # This is the same agent. How long does it take to reach localhost? In our data center, it actually + # takes about 20 microseconds. + latency[i, j] = 20000 # Configure a simple latency noise model for the agents. # Index is ns extra delay, value is probability of this delay being applied. -noise = [ 0.25, 0.25, 0.20, 0.15, 0.10, 0.05 ] - +noise = [0.25, 0.25, 0.20, 0.15, 0.10, 0.05] # Start the kernel running. -kernel.runner(agents = agents, startTime = kernelStartTime, stopTime = kernelStopTime, - agentLatency = latency, latencyNoise = noise, - defaultComputationDelay = defaultComputationDelay, - log_dir = log_dir) - +kernel.runner( + agents=agents, + startTime=kernelStartTime, + stopTime=kernelStopTime, + agentLatency=latency, + latencyNoise=noise, + defaultComputationDelay=defaultComputationDelay, + log_dir=log_dir, +) diff --git a/config/twoSymbols.py b/config/twoSymbols.py index e947cb26e..a1afcede1 100644 --- a/config/twoSymbols.py +++ b/config/twoSymbols.py @@ -1,20 +1,20 @@ -from Kernel import Kernel -from agent.ExchangeAgent import ExchangeAgent -from agent.etf.EtfPrimaryAgent import EtfPrimaryAgent -from agent.HeuristicBeliefLearningAgent import HeuristicBeliefLearningAgent -from agent.examples.ImpactAgent import ImpactAgent -from agent.ZeroIntelligenceAgent import ZeroIntelligenceAgent -from agent.examples.MomentumAgent import MomentumAgent -from agent.etf.EtfArbAgent import EtfArbAgent -from agent.etf.EtfMarketMakerAgent import EtfMarketMakerAgent -from util.order import LimitOrder -from util.oracle.MeanRevertingOracle import MeanRevertingOracle -from util import util +import sys import numpy as np import pandas as pd -import sys +from agent.etf.EtfArbAgent import EtfArbAgent +from agent.etf.EtfMarketMakerAgent import EtfMarketMakerAgent +from agent.etf.EtfPrimaryAgent import EtfPrimaryAgent +from agent.examples.ImpactAgent import ImpactAgent +from agent.examples.MomentumAgent import MomentumAgent +from agent.ExchangeAgent import ExchangeAgent +from agent.HeuristicBeliefLearningAgent import HeuristicBeliefLearningAgent +from agent.ZeroIntelligenceAgent import ZeroIntelligenceAgent +from Kernel import Kernel +from util import util +from util.oracle.MeanRevertingOracle import MeanRevertingOracle +from util.order import LimitOrder DATA_DIR = "~/data" @@ -23,38 +23,58 @@ # control agent or simulation hyperparameters during coarse parallelization. import argparse -parser = argparse.ArgumentParser(description='Detailed options for momentum config.') -parser.add_argument('-b', '--book_freq', default=None, - help='Frequency at which to archive order book for visualization') -parser.add_argument('-c', '--config', required=True, - help='Name of config file to execute') -parser.add_argument('-g', '--greed', type=float, default=0.25, - help='Impact agent greed') -parser.add_argument('-i', '--impact', action='store_false', - help='Do not actually fire an impact trade.') -parser.add_argument('-l', '--log_dir', default=None, - help='Log directory name (default: unix timestamp at program start)') -parser.add_argument('-n', '--obs_noise', type=float, default=1000000, - help='Observation noise variance for zero intelligence agents (sigma^2_n)') -parser.add_argument('-r', '--shock_variance', type=float, default=500000, - help='Shock variance for mean reversion process (sigma^2_s)') -parser.add_argument('-o', '--log_orders', action='store_true', - help='Log every order-related action by every agent.') -parser.add_argument('-s', '--seed', type=int, default=None, - help='numpy.random.seed() for simulation') -parser.add_argument('-v', '--verbose', action='store_true', - help='Maximum verbosity!') -parser.add_argument('--config_help', action='store_true', - help='Print argument options for this config file') +parser = argparse.ArgumentParser(description="Detailed options for momentum config.") +parser.add_argument( + "-b", + "--book_freq", + default=None, + help="Frequency at which to archive order book for visualization", +) +parser.add_argument("-c", "--config", required=True, help="Name of config file to execute") +parser.add_argument("-g", "--greed", type=float, default=0.25, help="Impact agent greed") +parser.add_argument("-i", "--impact", action="store_false", help="Do not actually fire an impact trade.") +parser.add_argument( + "-l", + "--log_dir", + default=None, + help="Log directory name (default: unix timestamp at program start)", +) +parser.add_argument( + "-n", + "--obs_noise", + type=float, + default=1000000, + help="Observation noise variance for zero intelligence agents (sigma^2_n)", +) +parser.add_argument( + "-r", + "--shock_variance", + type=float, + default=500000, + help="Shock variance for mean reversion process (sigma^2_s)", +) +parser.add_argument( + "-o", + "--log_orders", + action="store_true", + help="Log every order-related action by every agent.", +) +parser.add_argument("-s", "--seed", type=int, default=None, help="numpy.random.seed() for simulation") +parser.add_argument("-v", "--verbose", action="store_true", help="Maximum verbosity!") +parser.add_argument( + "--config_help", + action="store_true", + help="Print argument options for this config file", +) args, remaining_args = parser.parse_known_args() if args.config_help: - parser.print_help() - sys.exit() + parser.print_help() + sys.exit() # Historical date to simulate. Required even if not relevant. -historical_date = pd.to_datetime('2014-01-28') +historical_date = pd.to_datetime("2014-01-28") # Requested log directory. log_dir = args.log_dir @@ -88,7 +108,8 @@ # before) seed = args.seed -if not seed: seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) +if not seed: + seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) np.random.seed(seed) # Config parameter that causes util.util.print to suppress most output. @@ -102,15 +123,14 @@ log_orders = args.log_orders -print ("Silent mode: {}".format(util.silent_mode)) -print ("Logging orders: {}".format(log_orders)) -print ("Book freq: {}".format(book_freq)) -print ("ZeroIntelligenceAgent noise: {:0.4f}".format(sigma_n)) -print ("ImpactAgent greed: {:0.2f}".format(greed)) -print ("ImpactAgent firing: {}".format(impact)) -print ("Shock variance: {:0.4f}".format(sigma_s)) -print ("Configuration seed: {}\n".format(seed)) - +print("Silent mode: {}".format(util.silent_mode)) +print("Logging orders: {}".format(log_orders)) +print("Book freq: {}".format(book_freq)) +print("ZeroIntelligenceAgent noise: {:0.4f}".format(sigma_n)) +print("ImpactAgent greed: {:0.2f}".format(greed)) +print("ImpactAgent firing: {}".format(impact)) +print("Shock variance: {:0.4f}".format(sigma_s)) +print("Configuration seed: {}\n".format(seed)) # Since the simulator often pulls historical data, we use a real-world @@ -126,12 +146,12 @@ # When should the Kernel shut down? (This should be after market close.) # Here we go for 8:00 PM the same day to reflect the ETF primary market -kernelStopTime = midnight + pd.to_timedelta('20:00:00') +kernelStopTime = midnight + pd.to_timedelta("20:00:00") # This will configure the kernel with a default computation delay # (time penalty) for each agent's wakeup and recvMsg. An agent # can change this at any time for itself. (nanoseconds) -defaultComputationDelay = 0 # no delay for this config +defaultComputationDelay = 0 # no delay for this config # IMPORTANT NOTE CONCERNING AGENT IDS: the id passed to each agent must: @@ -146,16 +166,27 @@ # only IBM. This config uses generated data, so the symbol doesn't really matter. # If shock variance must differ for each traded symbol, it can be overridden here. -symbols = { 'SYM1' : { 'r_bar' : 100000, 'kappa' : 0.05, 'sigma_s' : sigma_s }, 'SYM2': { 'r_bar' : 150000, 'kappa' : 0.05, 'sigma_s' : sigma_s } , 'ETF' : { 'r_bar' : 250000, 'kappa' : 0.10, 'sigma_s' : np.sqrt(2) * sigma_s, 'portfolio': ['SYM1', 'SYM2']}} -#symbols = { 'IBM' : { 'r_bar' : 100000, 'kappa' : 0.05, 'sigma_s' : sigma_s }, 'GOOG' : { 'r_bar' : 150000, 'kappa' : 0.05, 'sigma_s' : sigma_s } } +symbols = { + "SYM1": {"r_bar": 100000, "kappa": 0.05, "sigma_s": sigma_s}, + "SYM2": {"r_bar": 150000, "kappa": 0.05, "sigma_s": sigma_s}, + "ETF": { + "r_bar": 250000, + "kappa": 0.10, + "sigma_s": np.sqrt(2) * sigma_s, + "portfolio": ["SYM1", "SYM2"], + }, +} +# symbols = { 'IBM' : { 'r_bar' : 100000, 'kappa' : 0.05, 'sigma_s' : sigma_s }, 'GOOG' : { 'r_bar' : 150000, 'kappa' : 0.05, 'sigma_s' : sigma_s } } symbols_full = symbols.copy() -#seed=np.random.randint(low=0,high=2**32) -#seed = 2000 +# seed=np.random.randint(low=0,high=2**32) +# seed = 2000 ### Configure the Kernel. -kernel = Kernel("Base Kernel", random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32))) - +kernel = Kernel( + "Base Kernel", + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), +) ### Configure the agents. When conducting "agent of change" experiments, the @@ -168,11 +199,11 @@ ### Configure an exchange agent. # Let's open the exchange at 9:30 AM. -mkt_open = midnight + pd.to_timedelta('09:30:00') +mkt_open = midnight + pd.to_timedelta("09:30:00") # And close it at 9:30:00.000001 (i.e. 1,000 nanoseconds or "time steps") -#mkt_close = midnight + pd.to_timedelta('09:30:00.001') -mkt_close = midnight + pd.to_timedelta('9:30:00.000001') +# mkt_close = midnight + pd.to_timedelta('09:30:00.001') +mkt_close = midnight + pd.to_timedelta("9:30:00.000001") # Configure an appropriate oracle for all traded stocks. @@ -182,22 +213,53 @@ # Create the exchange. num_exchanges = 1 -agents.extend([ ExchangeAgent(j, "Exchange Agent {}".format(j), "ExchangeAgent", mkt_open, mkt_close, [s for s in symbols_full], log_orders=log_orders, book_freq=book_freq, pipeline_delay = 0, computation_delay = 0, stream_history = 10, random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32))) - for j in range(agent_count, agent_count + num_exchanges) ]) +agents.extend( + [ + ExchangeAgent( + j, + "Exchange Agent {}".format(j), + "ExchangeAgent", + mkt_open, + mkt_close, + [s for s in symbols_full], + log_orders=log_orders, + book_freq=book_freq, + pipeline_delay=0, + computation_delay=0, + stream_history=10, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), + ) + for j in range(agent_count, agent_count + num_exchanges) + ] +) agent_types.extend(["ExchangeAgent" for j in range(num_exchanges)]) agent_count += num_exchanges # Let's open the exchange at 5:00 PM. -prime_open = midnight + pd.to_timedelta('17:00:00') +prime_open = midnight + pd.to_timedelta("17:00:00") # And close it at 5:00:01 PM -prime_close = midnight + pd.to_timedelta('17:00:01') +prime_close = midnight + pd.to_timedelta("17:00:01") # Create the primary. num_primes = 1 -agents.extend([ EtfPrimaryAgent(j, "ETF Primary Agent {}".format(j), "EtfPrimaryAgent", prime_open, prime_close, 'ETF', pipeline_delay = 0, computation_delay = 0, random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32))) - for j in range(agent_count, agent_count + num_primes) ]) +agents.extend( + [ + EtfPrimaryAgent( + j, + "ETF Primary Agent {}".format(j), + "EtfPrimaryAgent", + prime_open, + prime_close, + "ETF", + pipeline_delay=0, + computation_delay=0, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), + ) + for j in range(agent_count, agent_count + num_primes) + ] +) agent_types.extend(["EtfPrimeAgent" for j in range(num_primes)]) agent_count += num_primes @@ -208,9 +270,9 @@ starting_cash = 10000000 # Here are the zero intelligence agents. -symbol1 = 'SYM1' -symbol2 = 'SYM2' -symbol3 = 'ETF' +symbol1 = "SYM1" +symbol2 = "SYM2" +symbol3 = "ETF" print(symbols_full) s1 = symbols_full[symbol1] s2 = symbols_full[symbol2] @@ -221,170 +283,370 @@ # Some configs for ZI agents only (among seven parameter settings). # 4 agents -#zi = [ (1, 0, 250, 1), (1, 0, 500, 1), (1, 0, 1000, 0.8), (1, 0, 1000, 1), (0, 0, 2000, 0.8), (0, 250, 500, 0.8), (0, 250, 500, 1) ] -#hbl = [] +# zi = [ (1, 0, 250, 1), (1, 0, 500, 1), (1, 0, 1000, 0.8), (1, 0, 1000, 1), (0, 0, 2000, 0.8), (0, 250, 500, 0.8), (0, 250, 500, 1) ] +# hbl = [] # 28 agents -#zi = [ (4, 0, 250, 1), (4, 0, 500, 1), (4, 0, 1000, 0.8), (4, 0, 1000, 1), (4, 0, 2000, 0.8), (4, 250, 500, 0.8), (4, 250, 500, 1) ] -#hbl = [] +# zi = [ (4, 0, 250, 1), (4, 0, 500, 1), (4, 0, 1000, 0.8), (4, 0, 1000, 1), (4, 0, 2000, 0.8), (4, 250, 500, 0.8), (4, 250, 500, 1) ] +# hbl = [] # 65 agents -#zi = [ (10, 0, 250, 1), (10, 0, 500, 1), (9, 0, 1000, 0.8), (9, 0, 1000, 1), (9, 0, 2000, 0.8), (9, 250, 500, 0.8), (9, 250, 500, 1) ] -#hbl = [] +# zi = [ (10, 0, 250, 1), (10, 0, 500, 1), (9, 0, 1000, 0.8), (9, 0, 1000, 1), (9, 0, 2000, 0.8), (9, 250, 500, 0.8), (9, 250, 500, 1) ] +# hbl = [] # 100 agents -#zi = [ (15, 0, 250, 1), (15, 0, 500, 1), (14, 0, 1000, 0.8), (14, 0, 1000, 1), (14, 0, 2000, 0.8), (14, 250, 500, 0.8), (14, 250, 500, 1) ] -#hbl = [] +# zi = [ (15, 0, 250, 1), (15, 0, 500, 1), (14, 0, 1000, 0.8), (14, 0, 1000, 1), (14, 0, 2000, 0.8), (14, 250, 500, 0.8), (14, 250, 500, 1) ] +# hbl = [] # 1000 agents -#zi = [ (143, 0, 250, 1), (143, 0, 500, 1), (143, 0, 1000, 0.8), (143, 0, 1000, 1), (143, 0, 2000, 0.8), (143, 250, 500, 0.8), (142, 250, 500, 1) ] -#hbl = [] +# zi = [ (143, 0, 250, 1), (143, 0, 500, 1), (143, 0, 1000, 0.8), (143, 0, 1000, 1), (143, 0, 2000, 0.8), (143, 250, 500, 0.8), (142, 250, 500, 1) ] +# hbl = [] # 10000 agents -#zi = [ (1429, 0, 250, 1), (1429, 0, 500, 1), (1429, 0, 1000, 0.8), (1429, 0, 1000, 1), (1428, 0, 2000, 0.8), (1428, 250, 500, 0.8), (1428, 250, 500, 1) ] -#hbl = [] +# zi = [ (1429, 0, 250, 1), (1429, 0, 500, 1), (1429, 0, 1000, 0.8), (1429, 0, 1000, 1), (1428, 0, 2000, 0.8), (1428, 250, 500, 0.8), (1428, 250, 500, 1) ] +# hbl = [] # Some configs for HBL agents only (among four parameter settings). # 4 agents -#zi = [] -#hbl = [ (1, 250, 500, 1, 2), (1, 250, 500, 1, 3), (1, 250, 500, 1, 5), (1, 250, 500, 1, 8) ] +# zi = [] +# hbl = [ (1, 250, 500, 1, 2), (1, 250, 500, 1, 3), (1, 250, 500, 1, 5), (1, 250, 500, 1, 8) ] # 28 agents -#zi = [] -#hbl = [ (7, 250, 500, 1, 2), (7, 250, 500, 1, 3), (7, 250, 500, 1, 5), (7, 250, 500, 1, 8) ] +# zi = [] +# hbl = [ (7, 250, 500, 1, 2), (7, 250, 500, 1, 3), (7, 250, 500, 1, 5), (7, 250, 500, 1, 8) ] # 1000 agents -#zi = [] -#hbl = [ (250, 250, 500, 1, 2), (250, 250, 500, 1, 3), (250, 250, 500, 1, 5), (250, 250, 500, 1, 8) ] +# zi = [] +# hbl = [ (250, 250, 500, 1, 2), (250, 250, 500, 1, 3), (250, 250, 500, 1, 5), (250, 250, 500, 1, 8) ] # Some configs that mix both types of agents. # 28 agents -#zi = [ (3, 0, 250, 1), (3, 0, 500, 1), (3, 0, 1000, 0.8), (3, 0, 1000, 1), (3, 0, 2000, 0.8), (3, 250, 500, 0.8), (2, 250, 500, 1) ] -#hbl = [ (2, 250, 500, 1, 2), (2, 250, 500, 1, 3), (2, 250, 500, 1, 5), (2, 250, 500, 1, 8) ] +# zi = [ (3, 0, 250, 1), (3, 0, 500, 1), (3, 0, 1000, 0.8), (3, 0, 1000, 1), (3, 0, 2000, 0.8), (3, 250, 500, 0.8), (2, 250, 500, 1) ] +# hbl = [ (2, 250, 500, 1, 2), (2, 250, 500, 1, 3), (2, 250, 500, 1, 5), (2, 250, 500, 1, 8) ] # 65 agents -#zi = [ (7, 0, 250, 1), (7, 0, 500, 1), (7, 0, 1000, 0.8), (7, 0, 1000, 1), (7, 0, 2000, 0.8), (7, 250, 500, 0.8), (7, 250, 500, 1) ] -#hbl = [ (4, 250, 500, 1, 2), (4, 250, 500, 1, 3), (4, 250, 500, 1, 5), (4, 250, 500, 1, 8) ] +# zi = [ (7, 0, 250, 1), (7, 0, 500, 1), (7, 0, 1000, 0.8), (7, 0, 1000, 1), (7, 0, 2000, 0.8), (7, 250, 500, 0.8), (7, 250, 500, 1) ] +# hbl = [ (4, 250, 500, 1, 2), (4, 250, 500, 1, 3), (4, 250, 500, 1, 5), (4, 250, 500, 1, 8) ] # 1000 agents -zi = [ (100, 0, 250, 1), (100, 0, 500, 1), (100, 0, 10000, 0.8), (100, 0, 10000, 1), (100, 0, 2000, 0.8), (100, 250, 500, 0.8), (100, 250, 500, 1) ] -hbl = [ (75, 250, 500, 1, 2), (75, 250, 500, 1, 3), (75, 250, 500, 1, 5), (75, 250, 500, 1, 8) ] - +zi = [ + (100, 0, 250, 1), + (100, 0, 500, 1), + (100, 0, 10000, 0.8), + (100, 0, 10000, 1), + (100, 0, 2000, 0.8), + (100, 250, 500, 0.8), + (100, 250, 500, 1), +] +hbl = [ + (75, 250, 500, 1, 2), + (75, 250, 500, 1, 3), + (75, 250, 500, 1, 5), + (75, 250, 500, 1, 8), +] # ZI strategy split. -for i,x in enumerate(zi): - strat_name = "Type {} [{} <= R <= {}, eta={}]".format(i+1, x[1], x[2], x[3]) - agents.extend([ ZeroIntelligenceAgent(j, "ZI Agent {} {}".format(j, strat_name), "ZeroIntelligenceAgent {}".format(strat_name), random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32)),log_orders=log_orders, symbol=symbol1, starting_cash=starting_cash, sigma_n=sigma_n, r_bar=s1['r_bar'], q_max=10, sigma_pv=5000000, R_min=x[1], R_max=x[2], eta=x[3], lambda_a=0.005) for j in range(agent_count,agent_count+x[0]) ]) - agent_types.extend([ "ZeroIntelligenceAgent {}".format(strat_name) for j in range(x[0]) ]) - agent_count += x[0] - -for i,x in enumerate(zi): - strat_name = "Type {} [{} <= R <= {}, eta={}]".format(i+1, x[1], x[2], x[3]) - agents.extend([ ZeroIntelligenceAgent(j, "ZI Agent {} {}".format(j, strat_name), "ZeroIntelligenceAgent {}".format(strat_name), random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32)),log_orders=log_orders, symbol=symbol2, starting_cash=starting_cash, sigma_n=sigma_n, r_bar=s2['r_bar'], q_max=10, sigma_pv=5000000, R_min=x[1], R_max=x[2], eta=x[3], lambda_a=0.005) for j in range(agent_count,agent_count+x[0]) ]) - agent_types.extend([ "ZeroIntelligenceAgent {}".format(strat_name) for j in range(x[0]) ]) - agent_count += x[0] - -for i,x in enumerate(zi): - strat_name = "Type {} [{} <= R <= {}, eta={}]".format(i+1, x[1], x[2], x[3]) - agents.extend([ ZeroIntelligenceAgent(j, "ZI Agent {} {}".format(j, strat_name), "ZeroIntelligenceAgent {}".format(strat_name), random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32)),log_orders=log_orders, symbol=symbol3, starting_cash=starting_cash, sigma_n=sigma_n, portfolio = {'SYM1':s1['r_bar'], 'SYM2': s2['r_bar']}, q_max=10, sigma_pv=5000000, R_min=x[1], R_max=x[2], eta=x[3], lambda_a=0.005) for j in range(agent_count,agent_count+x[0]) ]) - agent_types.extend([ "ZeroIntelligenceAgent {}".format(strat_name) for j in range(x[0]) ]) - agent_count += x[0] - - +for i, x in enumerate(zi): + strat_name = "Type {} [{} <= R <= {}, eta={}]".format(i + 1, x[1], x[2], x[3]) + agents.extend( + [ + ZeroIntelligenceAgent( + j, + "ZI Agent {} {}".format(j, strat_name), + "ZeroIntelligenceAgent {}".format(strat_name), + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), + log_orders=log_orders, + symbol=symbol1, + starting_cash=starting_cash, + sigma_n=sigma_n, + r_bar=s1["r_bar"], + q_max=10, + sigma_pv=5000000, + R_min=x[1], + R_max=x[2], + eta=x[3], + lambda_a=0.005, + ) + for j in range(agent_count, agent_count + x[0]) + ] + ) + agent_types.extend(["ZeroIntelligenceAgent {}".format(strat_name) for j in range(x[0])]) + agent_count += x[0] + +for i, x in enumerate(zi): + strat_name = "Type {} [{} <= R <= {}, eta={}]".format(i + 1, x[1], x[2], x[3]) + agents.extend( + [ + ZeroIntelligenceAgent( + j, + "ZI Agent {} {}".format(j, strat_name), + "ZeroIntelligenceAgent {}".format(strat_name), + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), + log_orders=log_orders, + symbol=symbol2, + starting_cash=starting_cash, + sigma_n=sigma_n, + r_bar=s2["r_bar"], + q_max=10, + sigma_pv=5000000, + R_min=x[1], + R_max=x[2], + eta=x[3], + lambda_a=0.005, + ) + for j in range(agent_count, agent_count + x[0]) + ] + ) + agent_types.extend(["ZeroIntelligenceAgent {}".format(strat_name) for j in range(x[0])]) + agent_count += x[0] + +for i, x in enumerate(zi): + strat_name = "Type {} [{} <= R <= {}, eta={}]".format(i + 1, x[1], x[2], x[3]) + agents.extend( + [ + ZeroIntelligenceAgent( + j, + "ZI Agent {} {}".format(j, strat_name), + "ZeroIntelligenceAgent {}".format(strat_name), + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), + log_orders=log_orders, + symbol=symbol3, + starting_cash=starting_cash, + sigma_n=sigma_n, + portfolio={"SYM1": s1["r_bar"], "SYM2": s2["r_bar"]}, + q_max=10, + sigma_pv=5000000, + R_min=x[1], + R_max=x[2], + eta=x[3], + lambda_a=0.005, + ) + for j in range(agent_count, agent_count + x[0]) + ] + ) + agent_types.extend(["ZeroIntelligenceAgent {}".format(strat_name) for j in range(x[0])]) + agent_count += x[0] + + # HBL strategy split. -for i,x in enumerate(hbl): - strat_name = "Type {} [{} <= R <= {}, eta={}, L={}]".format(i+1, x[1], x[2], x[3], x[4]) - agents.extend([ HeuristicBeliefLearningAgent(j, "HBL Agent {} {}".format(j, strat_name), "HeuristicBeliefLearningAgent {}".format(strat_name), random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32)), log_orders=log_orders, symbol=symbol1, starting_cash=starting_cash, sigma_n=sigma_n, r_bar=s1['r_bar'], q_max=10, sigma_pv=5000000, R_min=x[1], R_max=x[2], eta=x[3], lambda_a=0.005, L=x[4]) for j in range(agent_count,agent_count+x[0]) ]) - agent_types.extend([ "HeuristicBeliefLearningAgent {}".format(strat_name) for j in range(x[0]) ]) - agent_count += x[0] - -for i,x in enumerate(hbl): - strat_name = "Type {} [{} <= R <= {}, eta={}, L={}]".format(i+1, x[1], x[2], x[3], x[4]) - agents.extend([ HeuristicBeliefLearningAgent(j, "HBL Agent {} {}".format(j, strat_name), "HeuristicBeliefLearningAgent {}".format(strat_name), random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32)), log_orders=log_orders, symbol=symbol2, starting_cash=starting_cash, sigma_n=sigma_n, r_bar=s2['r_bar'], q_max=10, sigma_pv=5000000, R_min=x[1], R_max=x[2], eta=x[3], lambda_a=0.005, L=x[4]) for j in range(agent_count,agent_count+x[0]) ]) - agent_types.extend([ "HeuristicBeliefLearningAgent {}".format(strat_name) for j in range(x[0]) ]) - agent_count += x[0] - -for i,x in enumerate(hbl): - strat_name = "Type {} [{} <= R <= {}, eta={}, L={}]".format(i+1, x[1], x[2], x[3], x[4]) - agents.extend([ HeuristicBeliefLearningAgent(j, "HBL Agent {} {}".format(j, strat_name), "HeuristicBeliefLearningAgent {}".format(strat_name), random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32)), log_orders=log_orders, symbol=symbol3, starting_cash=starting_cash, sigma_n=sigma_n, portfolio = {'SYM1':s1['r_bar'], 'SYM2': s2['r_bar']}, q_max=10, sigma_pv=5000000, R_min=x[1], R_max=x[2], eta=x[3], lambda_a=0.005, L=x[4]) for j in range(agent_count,agent_count+x[0]) ]) - agent_types.extend([ "HeuristicBeliefLearningAgent {}".format(strat_name) for j in range(x[0]) ]) - agent_count += x[0] - -# Trend followers agent +for i, x in enumerate(hbl): + strat_name = "Type {} [{} <= R <= {}, eta={}, L={}]".format(i + 1, x[1], x[2], x[3], x[4]) + agents.extend( + [ + HeuristicBeliefLearningAgent( + j, + "HBL Agent {} {}".format(j, strat_name), + "HeuristicBeliefLearningAgent {}".format(strat_name), + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), + log_orders=log_orders, + symbol=symbol1, + starting_cash=starting_cash, + sigma_n=sigma_n, + r_bar=s1["r_bar"], + q_max=10, + sigma_pv=5000000, + R_min=x[1], + R_max=x[2], + eta=x[3], + lambda_a=0.005, + L=x[4], + ) + for j in range(agent_count, agent_count + x[0]) + ] + ) + agent_types.extend(["HeuristicBeliefLearningAgent {}".format(strat_name) for j in range(x[0])]) + agent_count += x[0] + +for i, x in enumerate(hbl): + strat_name = "Type {} [{} <= R <= {}, eta={}, L={}]".format(i + 1, x[1], x[2], x[3], x[4]) + agents.extend( + [ + HeuristicBeliefLearningAgent( + j, + "HBL Agent {} {}".format(j, strat_name), + "HeuristicBeliefLearningAgent {}".format(strat_name), + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), + log_orders=log_orders, + symbol=symbol2, + starting_cash=starting_cash, + sigma_n=sigma_n, + r_bar=s2["r_bar"], + q_max=10, + sigma_pv=5000000, + R_min=x[1], + R_max=x[2], + eta=x[3], + lambda_a=0.005, + L=x[4], + ) + for j in range(agent_count, agent_count + x[0]) + ] + ) + agent_types.extend(["HeuristicBeliefLearningAgent {}".format(strat_name) for j in range(x[0])]) + agent_count += x[0] + +for i, x in enumerate(hbl): + strat_name = "Type {} [{} <= R <= {}, eta={}, L={}]".format(i + 1, x[1], x[2], x[3], x[4]) + agents.extend( + [ + HeuristicBeliefLearningAgent( + j, + "HBL Agent {} {}".format(j, strat_name), + "HeuristicBeliefLearningAgent {}".format(strat_name), + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), + log_orders=log_orders, + symbol=symbol3, + starting_cash=starting_cash, + sigma_n=sigma_n, + portfolio={"SYM1": s1["r_bar"], "SYM2": s2["r_bar"]}, + q_max=10, + sigma_pv=5000000, + R_min=x[1], + R_max=x[2], + eta=x[3], + lambda_a=0.005, + L=x[4], + ) + for j in range(agent_count, agent_count + x[0]) + ] + ) + agent_types.extend(["HeuristicBeliefLearningAgent {}".format(strat_name) for j in range(x[0])]) + agent_count += x[0] + +# Trend followers agent i = agent_count lookback = 10 num_tf = 20 for j in range(num_tf): - agents.append(MomentumAgent(i, "Momentum Agent {}".format(i), symbol=symbol1, starting_cash = starting_cash, lookback=lookback, random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32)), log_orders = log_orders)) - agent_types.append("MomentumAgent {}".format(i)) - i+=1 + agents.append( + MomentumAgent( + i, + "Momentum Agent {}".format(i), + symbol=symbol1, + starting_cash=starting_cash, + lookback=lookback, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), + log_orders=log_orders, + ) + ) + agent_types.append("MomentumAgent {}".format(i)) + i += 1 agent_count += num_tf -#for j in range(num_tf): - #agents.append(MomentumAgent(i, "Momentum Agent {}".format(i), symbol=symbol2, startingCash = starting_cash, lookback=lookback)) - #agent_types.append("MomentumAgent {}".format(i)) - #i+=1 -#agent_count += num_tf - -#for j in range(num_tf): - #agents.append(MomentumAgent(i, "Momentum Agent {}".format(i), symbol=symbol3, startingCash = starting_cash, lookback=lookback)) - #agent_types.append("MomentumAgent {}".format(i)) - #i+=1 -#agent_count += num_tf - -# ETF arbitrage agent +# for j in range(num_tf): +# agents.append(MomentumAgent(i, "Momentum Agent {}".format(i), symbol=symbol2, startingCash = starting_cash, lookback=lookback)) +# agent_types.append("MomentumAgent {}".format(i)) +# i+=1 +# agent_count += num_tf + +# for j in range(num_tf): +# agents.append(MomentumAgent(i, "Momentum Agent {}".format(i), symbol=symbol3, startingCash = starting_cash, lookback=lookback)) +# agent_types.append("MomentumAgent {}".format(i)) +# i+=1 +# agent_count += num_tf + +# ETF arbitrage agent i = agent_count gamma = 0 num_arb = 25 for j in range(num_arb): - agents.append(EtfArbAgent(i, "Etf Arb Agent {}".format(i), "EtfArbAgent", portfolio = ['SYM1','SYM2'], gamma = gamma, starting_cash = starting_cash, lambda_a=0.005, log_orders=log_orders, random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32)))) - agent_types.append("EtfArbAgent {}".format(i)) - i+=1 + agents.append( + EtfArbAgent( + i, + "Etf Arb Agent {}".format(i), + "EtfArbAgent", + portfolio=["SYM1", "SYM2"], + gamma=gamma, + starting_cash=starting_cash, + lambda_a=0.005, + log_orders=log_orders, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), + ) + ) + agent_types.append("EtfArbAgent {}".format(i)) + i += 1 agent_count += num_arb -# ETF market maker agent -#i = agent_count -#gamma = 100 -#num_mm = 10 -mm = [(5,0),(5,50),(5,100),(5,200),(5,300)] -#for j in range(num_mm): - #agents.append(EtfMarketMakerAgent(i, "Etf MM Agent {}".format(i), "EtfMarketMakerAgent", portfolio = ['IBM','GOOG'], gamma = gamma, starting_cash = starting_cash, lambda_a=0.005, log_orders=log_orders, random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32)))) - #agent_types.append("EtfMarketMakerAgent {}".format(i)) - #i+=1 -#agent_count += num_mm - -for i,x in enumerate(mm): - strat_name = "Type {} [gamma = {}]".format(i+1, x[1]) - print(strat_name) - agents.extend([ EtfMarketMakerAgent(j, "Etf MM Agent {} {}".format(j, strat_name), "EtfMarketMakerAgent {}".format(strat_name), portfolio = ['SYM1','SYM2'], gamma = x[1], starting_cash = starting_cash, lambda_a=0.005, log_orders=log_orders, random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32))) for j in range(agent_count,agent_count+x[0]) ]) - agent_types.extend([ "EtfMarketMakerAgent {}".format(strat_name) for j in range(x[0]) ]) - agent_count += x[0] +# ETF market maker agent +# i = agent_count +# gamma = 100 +# num_mm = 10 +mm = [(5, 0), (5, 50), (5, 100), (5, 200), (5, 300)] +# for j in range(num_mm): +# agents.append(EtfMarketMakerAgent(i, "Etf MM Agent {}".format(i), "EtfMarketMakerAgent", portfolio = ['IBM','GOOG'], gamma = gamma, starting_cash = starting_cash, lambda_a=0.005, log_orders=log_orders, random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32)))) +# agent_types.append("EtfMarketMakerAgent {}".format(i)) +# i+=1 +# agent_count += num_mm + +for i, x in enumerate(mm): + strat_name = "Type {} [gamma = {}]".format(i + 1, x[1]) + print(strat_name) + agents.extend( + [ + EtfMarketMakerAgent( + j, + "Etf MM Agent {} {}".format(j, strat_name), + "EtfMarketMakerAgent {}".format(strat_name), + portfolio=["SYM1", "SYM2"], + gamma=x[1], + starting_cash=starting_cash, + lambda_a=0.005, + log_orders=log_orders, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), + ) + for j in range(agent_count, agent_count + x[0]) + ] + ) + agent_types.extend(["EtfMarketMakerAgent {}".format(strat_name) for j in range(x[0])]) + agent_count += x[0] # Impact agent. # 200 time steps in... -impact_time = midnight + pd.to_timedelta('09:30:00.0000002') +impact_time = midnight + pd.to_timedelta("09:30:00.0000002") i = agent_count -agents.append(ImpactAgent(i, "Impact Agent1 {}".format(i), "ImpactAgent1", symbol = "SYM1", starting_cash = starting_cash, impact = impact, impact_time = impact_time, random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32)))) +agents.append( + ImpactAgent( + i, + "Impact Agent1 {}".format(i), + "ImpactAgent1", + symbol="SYM1", + starting_cash=starting_cash, + impact=impact, + impact_time=impact_time, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), + ) +) agent_types.append("ImpactAgent 1 {}".format(i)) agent_count += 1 -impact_time = midnight + pd.to_timedelta('09:30:00.0000005') +impact_time = midnight + pd.to_timedelta("09:30:00.0000005") i = agent_count -agents.append(ImpactAgent(i, "Impact Agent2 {}".format(i), "ImpactAgent2", symbol = "SYM1", starting_cash = starting_cash, impact = impact, impact_time = impact_time, random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32)))) +agents.append( + ImpactAgent( + i, + "Impact Agent2 {}".format(i), + "ImpactAgent2", + symbol="SYM1", + starting_cash=starting_cash, + impact=impact, + impact_time=impact_time, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32)), + ) +) agent_types.append("ImpactAgent 2 {}".format(i)) agent_count += 1 -#i = agent_count -#agents.append(ImpactAgent(i, "Impact Agent3 {}".format(i), "ImpactAgent3", symbol = "ETF", starting_cash = starting_cash, greed = greed, impact = impact, impact_time = impact_time, random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32)))) -#agent_types.append("ImpactAgent 3 {}".format(i)) -#agent_count += 1 +# i = agent_count +# agents.append(ImpactAgent(i, "Impact Agent3 {}".format(i), "ImpactAgent3", symbol = "ETF", starting_cash = starting_cash, greed = greed, impact = impact, impact_time = impact_time, random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32)))) +# agent_types.append("ImpactAgent 3 {}".format(i)) +# agent_count += 1 ### Configure a simple message latency matrix for the agents. Each entry is the minimum ### nanosecond delay on communication [from][to] agent ID. @@ -396,61 +658,63 @@ # Other agents can be explicitly set afterward (and the mirror half of the matrix is also). # This configures all agents to a starting latency as described above. -#latency = np.random.uniform(low = 21000, high = 13000000, size=(len(agent_types),len(agent_types))) -latency = np.random.uniform(low = 10, high = 100, size=(len(agent_types),len(agent_types))) +# latency = np.random.uniform(low = 21000, high = 13000000, size=(len(agent_types),len(agent_types))) +latency = np.random.uniform(low=10, high=100, size=(len(agent_types), len(agent_types))) # Overriding the latency for certain agent pairs happens below, as does forcing mirroring # of the matrix to be symmetric. for i, t1 in zip(range(latency.shape[0]), agent_types): - for j, t2 in zip(range(latency.shape[1]), agent_types): - # Three cases for symmetric array. Set latency when j > i, copy it when i > j, same agent when i == j. - if j > i: - # Arb agents should be the fastest in the market. - if (("ExchangeAgent" in t1 and "EtfArbAgent" in t2) - or ("ExchangeAgent" in t2 and "EtfArbAgent" in t1)): - #latency[i,j] = 20000 - latency[i,j] = 5 - elif (("ExchangeAgent" in t1 and "EtfMarketMakerAgent" in t2) - or ("ExchangeAgent" in t2 and "EtfMarketMakerAgent" in t1)): - #latency[i,j] = 20000 - latency[i,j] = 1 - elif (("ExchangeAgent" in t1 and "ImpactAgent" in t2) - or ("ExchangeAgent" in t2 and "ImpactAgent" in t1)): - #latency[i,j] = 20000 - latency[i,j] = 1 - - elif i > j: - # This "bottom" half of the matrix simply mirrors the top. - if (("ExchangeAgent" in t1 and "EtfArbAgent" in t2) - or ("ExchangeAgent" in t2 and "EtfArbAgent" in t1)): - #latency[i,j] = 20000 - latency[i,j] = 5 - elif (("ExchangeAgent" in t1 and "EtfMarketMakerAgent" in t2) - or ("ExchangeAgent" in t2 and "EtfMarketMakerAgent" in t1)): - #latency[i,j] = 20000 - latency[i,j] = 1 - elif (("ExchangeAgent" in t1 and "ImpactAgent" in t2) - or ("ExchangeAgent" in t2 and "ImpactAgent" in t1)): - #latency[i,j] = 20000 - latency[i,j] = 1 - else: latency[i,j] = latency[j,i] - else: - # This is the same agent. How long does it take to reach localhost? In our data center, it actually - # takes about 20 microseconds. - #latency[i,j] = 10000 - latency[i,j] = 1 + for j, t2 in zip(range(latency.shape[1]), agent_types): + # Three cases for symmetric array. Set latency when j > i, copy it when i > j, same agent when i == j. + if j > i: + # Arb agents should be the fastest in the market. + if ("ExchangeAgent" in t1 and "EtfArbAgent" in t2) or ("ExchangeAgent" in t2 and "EtfArbAgent" in t1): + # latency[i,j] = 20000 + latency[i, j] = 5 + elif ("ExchangeAgent" in t1 and "EtfMarketMakerAgent" in t2) or ( + "ExchangeAgent" in t2 and "EtfMarketMakerAgent" in t1 + ): + # latency[i,j] = 20000 + latency[i, j] = 1 + elif ("ExchangeAgent" in t1 and "ImpactAgent" in t2) or ("ExchangeAgent" in t2 and "ImpactAgent" in t1): + # latency[i,j] = 20000 + latency[i, j] = 1 + + elif i > j: + # This "bottom" half of the matrix simply mirrors the top. + if ("ExchangeAgent" in t1 and "EtfArbAgent" in t2) or ("ExchangeAgent" in t2 and "EtfArbAgent" in t1): + # latency[i,j] = 20000 + latency[i, j] = 5 + elif ("ExchangeAgent" in t1 and "EtfMarketMakerAgent" in t2) or ( + "ExchangeAgent" in t2 and "EtfMarketMakerAgent" in t1 + ): + # latency[i,j] = 20000 + latency[i, j] = 1 + elif ("ExchangeAgent" in t1 and "ImpactAgent" in t2) or ("ExchangeAgent" in t2 and "ImpactAgent" in t1): + # latency[i,j] = 20000 + latency[i, j] = 1 + else: + latency[i, j] = latency[j, i] + else: + # This is the same agent. How long does it take to reach localhost? In our data center, it actually + # takes about 20 microseconds. + # latency[i,j] = 10000 + latency[i, j] = 1 # Configure a simple latency noise model for the agents. # Index is ns extra delay, value is probability of this delay being applied. # In this config, there is no latency (noisy or otherwise). -noise = [ 0.0 ] - +noise = [0.0] # Start the kernel running. -kernel.runner(agents = agents, startTime = kernelStartTime, - stopTime = kernelStopTime, agentLatency = latency, - latencyNoise = noise, - defaultComputationDelay = defaultComputationDelay, - oracle = oracle, log_dir = log_dir) - +kernel.runner( + agents=agents, + startTime=kernelStartTime, + stopTime=kernelStopTime, + agentLatency=latency, + latencyNoise=noise, + defaultComputationDelay=defaultComputationDelay, + oracle=oracle, + log_dir=log_dir, +) diff --git a/config/value_noise.py b/config/value_noise.py index 42569fde5..e1b0be35a 100644 --- a/config/value_noise.py +++ b/config/value_noise.py @@ -1,37 +1,54 @@ -from Kernel import Kernel -from agent.ExchangeAgent import ExchangeAgent -from agent.NoiseAgent import NoiseAgent -from agent.ValueAgent import ValueAgent -from agent.market_makers.MarketMakerAgent import MarketMakerAgent -from util.order import LimitOrder -from util.oracle.SparseMeanRevertingOracle import SparseMeanRevertingOracle -from util import util +# Some config files require additional command line parameters to easily +# control agent or simulation hyperparameters during coarse parallelization. +import argparse +import sys import numpy as np import pandas as pd -import sys -# Some config files require additional command line parameters to easily -# control agent or simulation hyperparameters during coarse parallelization. -import argparse +from agent.ExchangeAgent import ExchangeAgent +from agent.market_makers.MarketMakerAgent import MarketMakerAgent +from agent.NoiseAgent import NoiseAgent +from agent.ValueAgent import ValueAgent +from Kernel import Kernel +from util import util +from util.oracle.SparseMeanRevertingOracle import SparseMeanRevertingOracle +from util.order import LimitOrder -parser = argparse.ArgumentParser(description='Detailed options for sparse_zi config.') -parser.add_argument('-b', '--book_freq', default=None, - help='Frequency at which to archive order book for visualization') -parser.add_argument('-c', '--config', required=True, - help='Name of config file to execute') -parser.add_argument('-l', '--log_dir', default=None, - help='Log directory name (default: unix timestamp at program start)') -parser.add_argument('-n', '--obs_noise', type=float, default=1000000, - help='Observation noise variance for zero intelligence agents (sigma^2_n)') -parser.add_argument('-o', '--log_orders', action='store_true', - help='Log every order-related action by every agent.') -parser.add_argument('-s', '--seed', type=int, default=None, - help='numpy.random.seed() for simulation') -parser.add_argument('-v', '--verbose', action='store_true', - help='Maximum verbosity!') -parser.add_argument('--config_help', action='store_true', - help='Print argument options for this config file') +parser = argparse.ArgumentParser(description="Detailed options for sparse_zi config.") +parser.add_argument( + "-b", + "--book_freq", + default=None, + help="Frequency at which to archive order book for visualization", +) +parser.add_argument("-c", "--config", required=True, help="Name of config file to execute") +parser.add_argument( + "-l", + "--log_dir", + default=None, + help="Log directory name (default: unix timestamp at program start)", +) +parser.add_argument( + "-n", + "--obs_noise", + type=float, + default=1000000, + help="Observation noise variance for zero intelligence agents (sigma^2_n)", +) +parser.add_argument( + "-o", + "--log_orders", + action="store_true", + help="Log every order-related action by every agent.", +) +parser.add_argument("-s", "--seed", type=int, default=None, help="numpy.random.seed() for simulation") +parser.add_argument("-v", "--verbose", action="store_true", help="Maximum verbosity!") +parser.add_argument( + "--config_help", + action="store_true", + help="Print argument options for this config file", +) args, remaining_args = parser.parse_known_args() @@ -40,7 +57,7 @@ sys.exit() # Historical date to simulate. Required even if not relevant. -historical_date = pd.to_datetime('2019-06-28') +historical_date = pd.to_datetime("2019-06-28") # Requested log directory. log_dir = args.log_dir @@ -67,7 +84,8 @@ # before) seed = args.seed -if not seed: seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2 ** 32 - 1) +if not seed: + seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) np.random.seed(seed) # Config parameter that causes util.util.print to suppress most output. @@ -99,7 +117,7 @@ # When should the Kernel shut down? (This should be after market close.) # Here we go for 5 PM the same day. -kernelStopTime = midnight + pd.to_timedelta('17:00:00') +kernelStopTime = midnight + pd.to_timedelta("17:00:00") # This will configure the kernel with a default computation delay # (time penalty) for each agent's wakeup and recvMsg. An agent @@ -123,13 +141,25 @@ # Note: sigma_s is no longer used by the agents or the fundamental (for sparse discrete simulation). -symbols = {'JPM': {'r_bar': 1e5, 'kappa': 1.67e-12, 'agent_kappa': 1.67e-15, 'sigma_s': 0, 'fund_vol': 1e-8, - 'megashock_lambda_a': 2.77778e-13, 'megashock_mean': 1e3, 'megashock_var': 5e4, - 'random_state': np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64'))}} +symbols = { + "JPM": { + "r_bar": 1e5, + "kappa": 1.67e-12, + "agent_kappa": 1.67e-15, + "sigma_s": 0, + "fund_vol": 1e-8, + "megashock_lambda_a": 2.77778e-13, + "megashock_mean": 1e3, + "megashock_var": 5e4, + "random_state": np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + } +} ### Configure the Kernel. -kernel = Kernel("Base Kernel", - random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64'))) +kernel = Kernel( + "Base Kernel", + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), +) ### Configure the agents. When conducting "agent of change" experiments, the ### new agents should be added at the END only. @@ -140,10 +170,10 @@ ### Configure an exchange agent. # Let's open the exchange at 9:30 AM. -mkt_open = midnight + pd.to_timedelta('09:30:00') +mkt_open = midnight + pd.to_timedelta("09:30:00") # And close it at 4:00 PM. -mkt_close = midnight + pd.to_timedelta('10:30:00') +mkt_close = midnight + pd.to_timedelta("10:30:00") # Configure an appropriate oracle for all traded stocks. # All agents requiring the same type of Oracle will use the same oracle instance. @@ -151,11 +181,25 @@ # Create the exchange. num_exchanges = 1 -agents.extend([ExchangeAgent(j, "Exchange Agent {}".format(j), "ExchangeAgent", mkt_open, mkt_close, - [s for s in symbols], log_orders=log_orders, book_freq=book_freq, pipeline_delay=0, - computation_delay=0, stream_history=10, random_state=np.random.RandomState( - seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64'))) - for j in range(agent_count, agent_count + num_exchanges)]) +agents.extend( + [ + ExchangeAgent( + j, + "Exchange Agent {}".format(j), + "ExchangeAgent", + mkt_open, + mkt_close, + [s for s in symbols], + log_orders=log_orders, + book_freq=book_freq, + pipeline_delay=0, + computation_delay=0, + stream_history=10, + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + ) + for j in range(agent_count, agent_count + num_exchanges) + ] +) agent_types.extend(["ExchangeAgent" for j in range(num_exchanges)]) agent_count += num_exchanges @@ -165,28 +209,37 @@ starting_cash = 10000000 # Here are the zero intelligence agents. -symbol = 'JPM' +symbol = "JPM" s = symbols[symbol] # Tuples are: (# agents, R_min, R_max, eta). # Some configs for ZI agents only (among seven parameter settings). -#number of noise agents +# number of noise agents num_noise = 100 # ZI strategy split. Note that agent arrival rates are quite small, because our minimum # time step is a nanosecond, and we want the agents to arrive more on the order of # minutes. -agents.extend( [NoiseAgent(j, "NoiseAgent {}".format(j), - "NoiseAgent", - random_state=np.random.RandomState( - seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64')), - log_orders=log_orders, symbol=symbol, starting_cash=starting_cash, - wakeup_time = mkt_open + np.random.rand() * (mkt_close - mkt_open) ) for j in range(agent_count, agent_count + num_noise )]) +agents.extend( + [ + NoiseAgent( + j, + "NoiseAgent {}".format(j), + "NoiseAgent", + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + log_orders=log_orders, + symbol=symbol, + starting_cash=starting_cash, + wakeup_time=mkt_open + np.random.rand() * (mkt_close - mkt_open), + ) + for j in range(agent_count, agent_count + num_noise) + ] +) agent_count += num_noise -agent_types.extend(['NoiseAgent' for j in range(agent_count, agent_count + num_noise) ]) +agent_types.extend(["NoiseAgent" for j in range(agent_count, agent_count + num_noise)]) # 100 agents num_value = 50 @@ -194,14 +247,24 @@ # ZI strategy split. Note that agent arrival rates are quite small, because our minimum # time step is a nanosecond, and we want the agents to arrive more on the order of # minutes. -agents.extend([ValueAgent(j, "Value Agent {}".format(j), - "ValueAgent {}".format(j), - random_state=np.random.RandomState( - seed=np.random.randint(low=0, high=2 ** 32, dtype='uint64')), - log_orders=log_orders, symbol=symbol, #starting_cash=starting_cash, - sigma_n=sigma_n, r_bar=s['r_bar'], kappa=s['agent_kappa'], - sigma_s=s['fund_vol'], - lambda_a=1e-12) for j in range(agent_count, agent_count + num_value )]) +agents.extend( + [ + ValueAgent( + j, + "Value Agent {}".format(j), + "ValueAgent {}".format(j), + random_state=np.random.RandomState(seed=np.random.randint(low=0, high=2**32, dtype="uint64")), + log_orders=log_orders, + symbol=symbol, # starting_cash=starting_cash, + sigma_n=sigma_n, + r_bar=s["r_bar"], + kappa=s["agent_kappa"], + sigma_s=s["fund_vol"], + lambda_a=1e-12, + ) + for j in range(agent_count, agent_count + num_value) + ] +) agent_types.extend(["ValueAgent {}".format(j) for j in range(num_value)]) agent_count += num_value @@ -224,7 +287,7 @@ # Three cases for symmetric array. Set latency when j > i, copy it when i > j, same agent when i == j. if j > i: # Presently, strategy agents shouldn't be talking to each other, so we set them to extremely high latency. - if (t1 == "ZeroIntelligenceAgent" and t2 == "ZeroIntelligenceAgent"): + if t1 == "ZeroIntelligenceAgent" and t2 == "ZeroIntelligenceAgent": latency[i, j] = 1000000000 * 60 * 60 * 24 # Twenty-four hours. elif i > j: # This "bottom" half of the matrix simply mirrors the top. @@ -239,9 +302,13 @@ noise = [0.25, 0.25, 0.20, 0.15, 0.10, 0.05] # Start the kernel running. -kernel.runner(agents=agents, startTime=kernelStartTime, - stopTime=kernelStopTime, agentLatency=latency, - latencyNoise=noise, - defaultComputationDelay=defaultComputationDelay, - oracle=oracle, log_dir=log_dir) - +kernel.runner( + agents=agents, + startTime=kernelStartTime, + stopTime=kernelStopTime, + agentLatency=latency, + latencyNoise=noise, + defaultComputationDelay=defaultComputationDelay, + oracle=oracle, + log_dir=log_dir, +) diff --git a/contributed_traders/SimpleAgent.py b/contributed_traders/SimpleAgent.py index d1a9f67fd..08442c638 100644 --- a/contributed_traders/SimpleAgent.py +++ b/contributed_traders/SimpleAgent.py @@ -1,9 +1,12 @@ -from agent.TradingAgent import TradingAgent -import pandas as pd -import numpy as np import os + +import numpy as np +import pandas as pd + +from agent.TradingAgent import TradingAgent from contributed_traders.util import get_file + class SimpleAgent(TradingAgent): """ Simple Trading Agent that compares the past mid-price observations and places a @@ -11,11 +14,28 @@ class SimpleAgent(TradingAgent): sell limit order if the first window mid-price exponential average < the second window mid-price exponential average """ - def __init__(self, id, name, type, symbol, starting_cash, - min_size, max_size, wake_up_freq='60s', - log_orders=False, random_state=None): + def __init__( + self, + id, + name, + type, + symbol, + starting_cash, + min_size, + max_size, + wake_up_freq="60s", + log_orders=False, + random_state=None, + ): - super().__init__(id, name, type, starting_cash=starting_cash, log_orders=log_orders, random_state=random_state) + super().__init__( + id, + name, + type, + starting_cash=starting_cash, + log_orders=log_orders, + random_state=random_state, + ) self.symbol = symbol self.min_size = min_size # Minimum order size self.max_size = max_size # Maximum order size @@ -24,22 +44,23 @@ def __init__(self, id, name, type, symbol, starting_cash, self.mid_list, self.avg_win1_list, self.avg_win2_list = [], [], [] self.log_orders = log_orders self.state = "AWAITING_WAKEUP" - #self.window1 = 100 - #self.window2 = 5 + # self.window1 = 100 + # self.window2 = 5 def kernelStarting(self, startTime): super().kernelStarting(startTime) # Read in the configuration through util - with open(get_file('simple_agent.cfg'), 'r') as f: + with open(get_file("simple_agent.cfg"), "r") as f: self.window1, self.window2 = [int(w) for w in f.readline().split()] - #print(f"{self.window1} {self.window2}") + # print(f"{self.window1} {self.window2}") def wakeup(self, currentTime): - """ Agent wakeup is determined by self.wake_up_freq """ + """Agent wakeup is determined by self.wake_up_freq""" can_trade = super().wakeup(currentTime) - if not can_trade: return + if not can_trade: + return self.getCurrentSpread(self.symbol) - self.state = 'AWAITING_SPREAD' + self.state = "AWAITING_SPREAD" def dump_shares(self): # get rid of any outstanding shares we have @@ -50,29 +71,45 @@ def dump_shares(self): self.placeLimitOrder(self.symbol, quantity=order_size, is_buy_order=False, limit_price=0) def receiveMessage(self, currentTime, msg): - """ Momentum agent actions are determined after obtaining the best bid and ask in the LOB """ + """Momentum agent actions are determined after obtaining the best bid and ask in the LOB""" super().receiveMessage(currentTime, msg) - if self.state == 'AWAITING_SPREAD' and msg.body['msg'] == 'QUERY_SPREAD': - dt = (self.mkt_close - currentTime) / np.timedelta64(1, 'm') + if self.state == "AWAITING_SPREAD" and msg.body["msg"] == "QUERY_SPREAD": + dt = (self.mkt_close - currentTime) / np.timedelta64(1, "m") if dt < 25: self.dump_shares() else: bid, _, ask, _ = self.getKnownBidAsk(self.symbol) if bid and ask: self.mid_list.append((bid + ask) / 2) - if len(self.mid_list) > self.window1: self.avg_win1_list.append(pd.Series(self.mid_list).ewm(span=self.window1).mean().values[-1].round(2)) - if len(self.mid_list) > self.window2: self.avg_win2_list.append(pd.Series(self.mid_list).ewm(span=self.window2).mean().values[-1].round(2)) + if len(self.mid_list) > self.window1: + self.avg_win1_list.append( + pd.Series(self.mid_list).ewm(span=self.window1).mean().values[-1].round(2) + ) + if len(self.mid_list) > self.window2: + self.avg_win2_list.append( + pd.Series(self.mid_list).ewm(span=self.window2).mean().values[-1].round(2) + ) if len(self.avg_win1_list) > 0 and len(self.avg_win2_list) > 0 and len(self.orders) == 0: if self.avg_win1_list[-1] >= self.avg_win2_list[-1]: # Check that we have enough cash to place the order - if self.holdings['CASH'] >= (self.size * ask): - self.placeLimitOrder(self.symbol, quantity=self.size, is_buy_order=True, limit_price=ask) + if self.holdings["CASH"] >= (self.size * ask): + self.placeLimitOrder( + self.symbol, + quantity=self.size, + is_buy_order=True, + limit_price=ask, + ) else: if self.symbol in self.holdings and self.holdings[self.symbol] > 0: order_size = min(self.size, self.holdings[self.symbol]) - self.placeLimitOrder(self.symbol, quantity=order_size, is_buy_order=False, limit_price=bid) + self.placeLimitOrder( + self.symbol, + quantity=order_size, + is_buy_order=False, + limit_price=bid, + ) self.setWakeup(currentTime + self.getWakeFrequency()) - self.state = 'AWAITING_WAKEUP' + self.state = "AWAITING_WAKEUP" def getWakeFrequency(self): return pd.Timedelta(self.wake_up_freq) diff --git a/contributed_traders/util.py b/contributed_traders/util.py index b69ffc5b5..f70e9ab41 100644 --- a/contributed_traders/util.py +++ b/contributed_traders/util.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 from pathlib import Path + def get_file(fname): return Path(__file__).resolve().parent / fname - - diff --git a/message/Message.py b/message/Message.py index b12870ace..dbfbc946a 100644 --- a/message/Message.py +++ b/message/Message.py @@ -1,50 +1,49 @@ from enum import Enum, unique + @unique class MessageType(Enum): - MESSAGE = 1 - WAKEUP = 2 + MESSAGE = 1 + WAKEUP = 2 - def __lt__(self, other): - return self.value < other.value + def __lt__(self, other): + return self.value < other.value class Message: - uniq = 0 - - def __init__ (self, body = None): - # The base Message class no longer holds envelope/header information, - # however any desired information can be placed in the arbitrary - # body. Delivery metadata is now handled outside the message itself. - # The body may be overridden by specific message type subclasses. - # It is acceptable for WAKEUP type messages to have no body. - self.body = body - - # The autoincrementing variable here will ensure that, when Messages are - # due for delivery at the same time step, the Message that was created - # first is delivered first. (Which is not important, but Python 3 - # requires a fully resolved chain of priority in all cases, so we need - # something consistent.) We might want to generate these with stochasticity, - # but guarantee uniqueness somehow, to make delivery of orders at the same - # exact timestamp "random" instead of "arbitrary" (FIFO among tied times) - # as it currently is. - self.uniq = Message.uniq - Message.uniq += 1 - - # The base Message class can no longer do any real error checking. - # Subclasses are strongly encouraged to do so based on their body. - - - def __lt__(self, other): - # Required by Python3 for this object to be placed in a priority queue. - # If we ever decide to place something on the queue other than Messages, - # we will need to alter the below to not assume the other object is - # also a Message. - - return (self.uniq < other.uniq) - - - def __str__(self): - # Make a printable representation of this message. - return str(self.body) + uniq = 0 + + def __init__(self, body=None): + # The base Message class no longer holds envelope/header information, + # however any desired information can be placed in the arbitrary + # body. Delivery metadata is now handled outside the message itself. + # The body may be overridden by specific message type subclasses. + # It is acceptable for WAKEUP type messages to have no body. + self.body = body + + # The autoincrementing variable here will ensure that, when Messages are + # due for delivery at the same time step, the Message that was created + # first is delivered first. (Which is not important, but Python 3 + # requires a fully resolved chain of priority in all cases, so we need + # something consistent.) We might want to generate these with stochasticity, + # but guarantee uniqueness somehow, to make delivery of orders at the same + # exact timestamp "random" instead of "arbitrary" (FIFO among tied times) + # as it currently is. + self.uniq = Message.uniq + Message.uniq += 1 + + # The base Message class can no longer do any real error checking. + # Subclasses are strongly encouraged to do so based on their body. + + def __lt__(self, other): + # Required by Python3 for this object to be placed in a priority queue. + # If we ever decide to place something on the queue other than Messages, + # we will need to alter the below to not assume the other object is + # also a Message. + + return self.uniq < other.uniq + + def __str__(self): + # Make a printable representation of this message. + return str(self.body) diff --git a/model/LatencyModel.py b/model/LatencyModel.py index 7ec829a3c..e2642c5c8 100644 --- a/model/LatencyModel.py +++ b/model/LatencyModel.py @@ -1,168 +1,170 @@ -import numpy as np import sys -class LatencyModel: - - """ - LatencyModel provides a latency model for messages in the ABIDES simulation. The default - is a cubic model as described herein. - - Model parameters may either be passed as kwargs or a single dictionary with a key named 'kwargs'. - - Using the 'cubic' model, the final latency for a message is computed as: min_latency + [ a / (x^3) ], - where 'x' is randomly drawn from a uniform distribution (jitter_clip,1], and 'a' is the jitter - parameter defined below. - - The 'cubic' model requires five parameters (there are defaults for four). Scalar values - apply to all messages between all agents. Numpy array parameters are all indexed by simulation - agent_id. Vector arrays (1-D) are indexed to the sending agent. For 2-D arrays of directional - pairwise values, row index is the sending agent and column index is the receiving agent. - These do not have to be symmetric. - - 'connected' must be either scalar True or a 2-D numpy array. A False array entry prohibits - communication regardless of values in other parameters. Boolean. Default is scalar True. - - 'min_latency' requires a 2-D numpy array of pairwise minimum latency. Integer nanoseconds. - No default value. - - 'jitter' requires a scalar, a 1-D numpy vector, or a 2-D numpy array. Controls shape of cubic - curve for per-message additive latency noise. This is the 'a' parameter in the cubic equation above. - Float in range [0,1]. Default is scalar 0.5. - - 'jitter_clip' requires a scalar, a 1-D numpy vector, or a 2-D numpy array. Controls the minimum value - of the uniform range from which 'x' is selected when applying per-message noise. Higher values - create a LOWER maximum value for latency noise (clipping the cubic curve). Parameter is exclusive: - 'x' is drawn from (jitter_clip,1]. Float in range [0,1]. Default is scalar 0.1. - - 'jitter_unit' requires a scalar, a 1-D numpy vector, or a 2-D numpy array. This is the fraction of - min_latency that will be considered the unit of measurement for jitter. For example, - if this parameter is 10, an agent pair with min_latency of 333ns will have a 33.3ns unit of measurement - for jitter, and an agent pair with min_latency of 13ms will have a 1.3ms unit of measurement for jitter. - Assuming 'jitter' = 0.5 and 'jitter_clip' = 0, the first agent pair will have 50th percentile (median) - jitter of 133.3ns and 90th percentile jitter of 16.65us, and the second agent pair will have 50th percentile - (median) jitter of 5.2ms and 90th percentile jitter of 650ms. Float. Default is scalar 10. - - All values except min_latency may be specified as a single scalar for simplicity, and have defaults to - allow ease of use as: latency = LatencyModel('cubic', min_latency = some_array). - - All values may be specified with directional pairwise granularity to permit quite complex network models, - varying quality of service, or asymmetric capabilities when these are necessary. - - Selection within the range is from a cubic distribution, so extreme high values will be - quite rare. The table below shows example values based on the jitter parameter a (column - header) and x drawn from a uniform distribution from [0,1] (row header). - - x \ a 0.001 0.10 0.20 0.30 0.40 0.50 0.60 0.70 0.80 0.90 1.00 - 0.001 1M 100M 200M 300M 400M 500M 600M 700M 800M 900M 1B - 0.01 1K 100K 200K 300K 400K 500K 600K 700K 800K 900K 1M - 0.05 8.00 800.00 1.6K 2.4K 3.2K 4.0K 4.8K 5.6K 6.4K 7.2K 8.0K - 0.10 1.00 100.00 200.00 300.00 400.00 500.00 600.00 700.00 800.00 900.00 1,000.00 - 0.20 0.13 12.50 25.00 37.50 50.00 62.50 75.00 87.50 100.00 112.50 125.00 - 0.30 0.04 3.70 7.41 11.11 14.81 18.52 22.22 25.93 29.63 33.33 37.04 - 0.40 0.02 1.56 3.13 4.69 6.25 7.81 9.38 10.94 12.50 14.06 15.63 - 0.50 0.01 0.80 1.60 2.40 3.20 4.00 4.80 5.60 6.40 7.20 8.00 - 0.60 0.00 0.46 0.93 1.39 1.85 2.31 2.78 3.24 3.70 4.17 4.63 - 0.70 0.00 0.29 0.58 0.87 1.17 1.46 1.75 2.04 2.33 2.62 2.92 - 0.80 0.00 0.20 0.39 0.59 0.78 0.98 1.17 1.37 1.56 1.76 1.95 - 0.90 0.00 0.14 0.27 0.41 0.55 0.69 0.82 0.96 1.10 1.23 1.37 - 0.95 0.00 0.12 0.23 0.35 0.47 0.58 0.70 0.82 0.93 1.05 1.17 - 0.99 0.00 0.10 0.21 0.31 0.41 0.52 0.62 0.72 0.82 0.93 1.03 - 1.00 0.00 0.10 0.20 0.30 0.40 0.50 0.60 0.70 0.80 0.90 1.00 - """ - - - def __init__(self, latency_model = 'cubic', random_state = None, **kwargs): - """ - Model-specific parameters may be specified as keyword args or a dictionary with key 'kwargs'. - - Required keyword parameters: - 'latency_model' : 'cubic' - - Optional keyword parameters: - 'random_state' : an initialized np.random.RandomState object. - """ - - self.latency_model = latency_model.lower() - self.random_state = random_state - - # This permits either keyword args or a dictionary of kwargs. The two cannot be mixed. - if 'kwargs' in kwargs: kwargs = kwargs['kwargs'] - - # Check required parameters and apply defaults for the selected model. - if (latency_model.lower() == 'cubic'): - if 'min_latency' not in kwargs: - print ("Config error: cubic latency model requires parameter 'min_latency' as 2-D ndarray.") - sys.exit() - - # Set defaults. - kwargs.setdefault('connected', True) - kwargs.setdefault('jitter', 0.5) - kwargs.setdefault('jitter_clip', 0.1) - kwargs.setdefault('jitter_unit', 10.0) - elif (latency_model.lower() == 'deterministic'): - if 'min_latency' not in kwargs: - print("Config error: deterministic latency model requires parameter 'min_latency' as 2-D ndarray.") - sys.exit() - else: - print (f"Config error: unknown latency model requested ({latency_model.lower()})") - sys.exit() - - # Remember the kwargs for use generating jitter (latency noise). - self.kwargs = kwargs - - def get_latency(self, sender_id = None, recipient_id = None): - """ - LatencyModel.get_latency() samples and returns the final latency for a single Message according to the - model specified during initialization. - - Required parameters: - 'sender_id' : simulation agent_id for the agent sending the message - 'recipient_id' : simulation agent_id for the agent receiving the message - """ - - kw = self.kwargs - min_latency = self._extract(kw['min_latency'], sender_id, recipient_id) - - if self.latency_model == 'cubic': - # Generate latency for a single message using the cubic model. - - # If agents cannot communicate in this direction, return special latency -1. - if not self._extract( kw['connected'], sender_id, recipient_id ): return -1 - - # Extract the cubic parameters and compute the final latency. - a = self._extract( kw['jitter'], sender_id, recipient_id ) - clip = self._extract( kw['jitter_clip'], sender_id, recipient_id ) - unit = self._extract( kw['jitter_unit'], sender_id, recipient_id ) - # Jitter requires a uniform random draw. - x = self.random_state.uniform( low = clip, high = 1.0 ) - - # Now apply the cubic model to compute jitter and the final message latency. - latency = min_latency + ((a / x**3) * (min_latency / unit)) - - elif self.latency_model == 'deterministic': - return min_latency - - return latency +import numpy as np - def _extract(self, param, sid, rid): +class LatencyModel: """ - Internal function to extract correct values for a sender->recipient pair from parameters that can - be specified as scalar, 1-D ndarray, or 2-D ndarray. - - Required parameters: - 'param' : the parameter (not parameter name) from which to extract a value - 'sid' : the simulation sender agent id - 'rid' : the simulation recipient agent id + LatencyModel provides a latency model for messages in the ABIDES simulation. The default + is a cubic model as described herein. + + Model parameters may either be passed as kwargs or a single dictionary with a key named 'kwargs'. + + Using the 'cubic' model, the final latency for a message is computed as: min_latency + [ a / (x^3) ], + where 'x' is randomly drawn from a uniform distribution (jitter_clip,1], and 'a' is the jitter + parameter defined below. + + The 'cubic' model requires five parameters (there are defaults for four). Scalar values + apply to all messages between all agents. Numpy array parameters are all indexed by simulation + agent_id. Vector arrays (1-D) are indexed to the sending agent. For 2-D arrays of directional + pairwise values, row index is the sending agent and column index is the receiving agent. + These do not have to be symmetric. + + 'connected' must be either scalar True or a 2-D numpy array. A False array entry prohibits + communication regardless of values in other parameters. Boolean. Default is scalar True. + + 'min_latency' requires a 2-D numpy array of pairwise minimum latency. Integer nanoseconds. + No default value. + + 'jitter' requires a scalar, a 1-D numpy vector, or a 2-D numpy array. Controls shape of cubic + curve for per-message additive latency noise. This is the 'a' parameter in the cubic equation above. + Float in range [0,1]. Default is scalar 0.5. + + 'jitter_clip' requires a scalar, a 1-D numpy vector, or a 2-D numpy array. Controls the minimum value + of the uniform range from which 'x' is selected when applying per-message noise. Higher values + create a LOWER maximum value for latency noise (clipping the cubic curve). Parameter is exclusive: + 'x' is drawn from (jitter_clip,1]. Float in range [0,1]. Default is scalar 0.1. + + 'jitter_unit' requires a scalar, a 1-D numpy vector, or a 2-D numpy array. This is the fraction of + min_latency that will be considered the unit of measurement for jitter. For example, + if this parameter is 10, an agent pair with min_latency of 333ns will have a 33.3ns unit of measurement + for jitter, and an agent pair with min_latency of 13ms will have a 1.3ms unit of measurement for jitter. + Assuming 'jitter' = 0.5 and 'jitter_clip' = 0, the first agent pair will have 50th percentile (median) + jitter of 133.3ns and 90th percentile jitter of 16.65us, and the second agent pair will have 50th percentile + (median) jitter of 5.2ms and 90th percentile jitter of 650ms. Float. Default is scalar 10. + + All values except min_latency may be specified as a single scalar for simplicity, and have defaults to + allow ease of use as: latency = LatencyModel('cubic', min_latency = some_array). + + All values may be specified with directional pairwise granularity to permit quite complex network models, + varying quality of service, or asymmetric capabilities when these are necessary. + + Selection within the range is from a cubic distribution, so extreme high values will be + quite rare. The table below shows example values based on the jitter parameter a (column + header) and x drawn from a uniform distribution from [0,1] (row header). + + x \ a 0.001 0.10 0.20 0.30 0.40 0.50 0.60 0.70 0.80 0.90 1.00 + 0.001 1M 100M 200M 300M 400M 500M 600M 700M 800M 900M 1B + 0.01 1K 100K 200K 300K 400K 500K 600K 700K 800K 900K 1M + 0.05 8.00 800.00 1.6K 2.4K 3.2K 4.0K 4.8K 5.6K 6.4K 7.2K 8.0K + 0.10 1.00 100.00 200.00 300.00 400.00 500.00 600.00 700.00 800.00 900.00 1,000.00 + 0.20 0.13 12.50 25.00 37.50 50.00 62.50 75.00 87.50 100.00 112.50 125.00 + 0.30 0.04 3.70 7.41 11.11 14.81 18.52 22.22 25.93 29.63 33.33 37.04 + 0.40 0.02 1.56 3.13 4.69 6.25 7.81 9.38 10.94 12.50 14.06 15.63 + 0.50 0.01 0.80 1.60 2.40 3.20 4.00 4.80 5.60 6.40 7.20 8.00 + 0.60 0.00 0.46 0.93 1.39 1.85 2.31 2.78 3.24 3.70 4.17 4.63 + 0.70 0.00 0.29 0.58 0.87 1.17 1.46 1.75 2.04 2.33 2.62 2.92 + 0.80 0.00 0.20 0.39 0.59 0.78 0.98 1.17 1.37 1.56 1.76 1.95 + 0.90 0.00 0.14 0.27 0.41 0.55 0.69 0.82 0.96 1.10 1.23 1.37 + 0.95 0.00 0.12 0.23 0.35 0.47 0.58 0.70 0.82 0.93 1.05 1.17 + 0.99 0.00 0.10 0.21 0.31 0.41 0.52 0.62 0.72 0.82 0.93 1.03 + 1.00 0.00 0.10 0.20 0.30 0.40 0.50 0.60 0.70 0.80 0.90 1.00 """ - if np.isscalar(param): return param - - if type(param) is np.ndarray: - if param.ndim == 1: return param[sid] - elif param.ndim == 2: return param[sid, rid] - - print("Config error: LatencyModel parameter is not scalar, 1-D ndarray, or 2-D ndarray.") - sys.exit() - - + def __init__(self, latency_model="cubic", random_state=None, **kwargs): + """ + Model-specific parameters may be specified as keyword args or a dictionary with key 'kwargs'. + + Required keyword parameters: + 'latency_model' : 'cubic' + + Optional keyword parameters: + 'random_state' : an initialized np.random.RandomState object. + """ + + self.latency_model = latency_model.lower() + self.random_state = random_state + + # This permits either keyword args or a dictionary of kwargs. The two cannot be mixed. + if "kwargs" in kwargs: + kwargs = kwargs["kwargs"] + + # Check required parameters and apply defaults for the selected model. + if latency_model.lower() == "cubic": + if "min_latency" not in kwargs: + print("Config error: cubic latency model requires parameter 'min_latency' as 2-D ndarray.") + sys.exit() + + # Set defaults. + kwargs.setdefault("connected", True) + kwargs.setdefault("jitter", 0.5) + kwargs.setdefault("jitter_clip", 0.1) + kwargs.setdefault("jitter_unit", 10.0) + elif latency_model.lower() == "deterministic": + if "min_latency" not in kwargs: + print("Config error: deterministic latency model requires parameter 'min_latency' as 2-D ndarray.") + sys.exit() + else: + print(f"Config error: unknown latency model requested ({latency_model.lower()})") + sys.exit() + + # Remember the kwargs for use generating jitter (latency noise). + self.kwargs = kwargs + + def get_latency(self, sender_id=None, recipient_id=None): + """ + LatencyModel.get_latency() samples and returns the final latency for a single Message according to the + model specified during initialization. + + Required parameters: + 'sender_id' : simulation agent_id for the agent sending the message + 'recipient_id' : simulation agent_id for the agent receiving the message + """ + + kw = self.kwargs + min_latency = self._extract(kw["min_latency"], sender_id, recipient_id) + + if self.latency_model == "cubic": + # Generate latency for a single message using the cubic model. + + # If agents cannot communicate in this direction, return special latency -1. + if not self._extract(kw["connected"], sender_id, recipient_id): + return -1 + + # Extract the cubic parameters and compute the final latency. + a = self._extract(kw["jitter"], sender_id, recipient_id) + clip = self._extract(kw["jitter_clip"], sender_id, recipient_id) + unit = self._extract(kw["jitter_unit"], sender_id, recipient_id) + # Jitter requires a uniform random draw. + x = self.random_state.uniform(low=clip, high=1.0) + + # Now apply the cubic model to compute jitter and the final message latency. + latency = min_latency + ((a / x**3) * (min_latency / unit)) + + elif self.latency_model == "deterministic": + return min_latency + + return latency + + def _extract(self, param, sid, rid): + """ + Internal function to extract correct values for a sender->recipient pair from parameters that can + be specified as scalar, 1-D ndarray, or 2-D ndarray. + + Required parameters: + 'param' : the parameter (not parameter name) from which to extract a value + 'sid' : the simulation sender agent id + 'rid' : the simulation recipient agent id + """ + + if np.isscalar(param): + return param + + if type(param) is np.ndarray: + if param.ndim == 1: + return param[sid] + elif param.ndim == 2: + return param[sid, rid] + + print("Config error: LatencyModel parameter is not scalar, 1-D ndarray, or 2-D ndarray.") + sys.exit() diff --git a/pyproject.toml b/pyproject.toml new file mode 100755 index 000000000..65a714480 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,34 @@ +[tool.isort] +profile = "black" +line_length = 120 +skip = [".gitignore", "env", "venv", ".git", ".mypy_cache", "__pycache__", "sandbox", "build", "dist"] + +[tool.black] +line-length = 120 +force-exclude = ''' env| + venv| + .mypy_cache| + build| + dist| + __pycache__ + sandbox + ''' + +[tool.flake8] +max-line-length = 120 + +ignore = [ + # Missing docstring in __init__ + "D107", + # Missing docstring in public package + "D104", + # Whitespace before ':' - for black + "E203", + # Line break occurred before a binary operator + "W503", + # Module level import not at top of file + "E402" +] +exclude = ["env", "venv", "sandbox", "build", "dist"] + + diff --git a/realism/asset_returns_stylized_facts.py b/realism/asset_returns_stylized_facts.py index 06b6e8b51..ba4e65ffe 100644 --- a/realism/asset_returns_stylized_facts.py +++ b/realism/asset_returns_stylized_facts.py @@ -1,40 +1,52 @@ -import sys +import argparse +import math import os -import random import pickle -import math -import pandas as pd +import random +import sys +from glob import glob +from pathlib import Path + import matplotlib.pyplot as plt import numpy as np +import pandas as pd from realism_utils import get_trades -from glob import glob -from pathlib import Path -import argparse from tqdm import tqdm p = str(Path(__file__).resolve().parents[1]) # directory one level up from this file sys.path.append(p) from realism_utils import get_plot_colors + from util.formatting.convert_order_stream import dir_path # Create cache folder if it does not exist -try: os.mkdir("cache") -except: pass +try: + os.mkdir("cache") +except: + pass -from metrics.minutely_returns import MinutelyReturns from metrics.aggregation_normality import AggregationNormality from metrics.autocorrelation import Autocorrelation -from metrics.volatility_clustering import VolatilityClustering from metrics.kurtosis import Kurtosis -from metrics.volume_volatility_correlation import VolumeVolatilityCorrelation +from metrics.minutely_returns import MinutelyReturns from metrics.returns_volatility_correlation import ReturnsVolatilityCorrelation +from metrics.volatility_clustering import VolatilityClustering +from metrics.volume_volatility_correlation import VolumeVolatilityCorrelation -all_metrics = [MinutelyReturns, AggregationNormality, Autocorrelation, VolatilityClustering, Kurtosis, VolumeVolatilityCorrelation, ReturnsVolatilityCorrelation] +all_metrics = [ + MinutelyReturns, + AggregationNormality, + Autocorrelation, + VolatilityClustering, + Kurtosis, + VolumeVolatilityCorrelation, + ReturnsVolatilityCorrelation, +] def get_sims(sim_dir, my_metric, ohlcv_dict): sims = [] - exchanges = [a for a in Path(sim_dir).rglob('*.bz2') if "exchange" in str(a).lower()] + exchanges = [a for a in Path(sim_dir).rglob("*.bz2") if "exchange" in str(a).lower()] for exchange in exchanges: ohlcv = ohlcv_dict[exchange] @@ -50,15 +62,13 @@ def get_ohlcvs(sim_dirs, recompute): print("Loading simulation data...") exchanges = [] for sim_dir in sim_dirs: - exchanges += [a for a in Path(sim_dir).rglob('*.bz2') if "exchange" in str(a).lower()] + exchanges += [a for a in Path(sim_dir).rglob("*.bz2") if "exchange" in str(a).lower()] pickled_ohclv = "cache/{}_ohclv.pickle".format("_".join(sim_dirs).replace("/", "")) if (not os.path.exists(pickled_ohclv)) or recompute: # Pickled simulated metric not found in cache. ohclv_dict = dict() for exchange in tqdm(exchanges, desc="Files loaded"): - ohclv_dict.update( - {exchange: get_trades(exchange)} - ) + ohclv_dict.update({exchange: get_trades(exchange)}) pickle.dump(ohclv_dict, open(pickled_ohclv, "wb")) else: # Pickled ohclv found in cache. ohclv_dict = pickle.load(open(pickled_ohclv, "rb")) @@ -74,36 +84,60 @@ def plot_metrics(sim_dirs, sim_colors, output_dir, ohclv_dict, recompute): result = dict() for i, sim_dir in enumerate(sim_dirs): # Calculate metrics for simulated data (via sampling) - pickled_sim = "cache/{}_{}.pickle".format(my_metric.__class__.__name__, sim_dir.replace("/","")) - if (not os.path.exists(pickled_sim)) or recompute: # Pickled simulated metric not found in cache. + pickled_sim = "cache/{}_{}.pickle".format(my_metric.__class__.__name__, sim_dir.replace("/", "")) + if (not os.path.exists(pickled_sim)) or recompute: # Pickled simulated metric not found in cache. sims = get_sims(sim_dir, my_metric, ohclv_dict) pickle.dump(sims, open(pickled_sim, "wb")) - else: # Pickled simulated metric found in cache. + else: # Pickled simulated metric found in cache. sims = pickle.load(open(pickled_sim, "rb")) - sim_name = sim_dir.rstrip('/').split("/")[-1] + sim_name = sim_dir.rstrip("/").split("/")[-1] result.update({(sim_name, sim_colors[i]): sims}) # Create plot for each config and metric my_metric.visualize(result) plt.title(plt.gca().title.get_text()) - try: os.mkdir(output_dir) - except: pass - plt.savefig("{}/{}.png".format(output_dir, my_metric.__class__.__name__), bbox_inches='tight') + try: + os.mkdir(output_dir) + except: + pass + plt.savefig( + "{}/{}.png".format(output_dir, my_metric.__class__.__name__), + bbox_inches="tight", + ) plt.clf() if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Processes historical data and simulated stream files and produce plots' - ' of stylized fact metrics for asset return distributions.') - parser.add_argument('-s', '--simulated-data-dir', type=dir_path, action='append', required=True, - help="Directory containing .bz2 output log files from ABIDES Exchange Agent. Note that the " - "filenames MUST contain the word 'Exchange' in any case. One can add many simulated data " - "directories") - parser.add_argument('-z', '--recompute', action="store_true", help="Rerun computations without caching.") - parser.add_argument('-o', '--output-dir', default='visualizations', help='Path to output directory', type=dir_path) + parser = argparse.ArgumentParser( + description="Processes historical data and simulated stream files and produce plots" + " of stylized fact metrics for asset return distributions." + ) + parser.add_argument( + "-s", + "--simulated-data-dir", + type=dir_path, + action="append", + required=True, + help="Directory containing .bz2 output log files from ABIDES Exchange Agent. Note that the " + "filenames MUST contain the word 'Exchange' in any case. One can add many simulated data " + "directories", + ) + parser.add_argument( + "-z", + "--recompute", + action="store_true", + help="Rerun computations without caching.", + ) + parser.add_argument( + "-o", + "--output-dir", + default="visualizations", + help="Path to output directory", + type=dir_path, + ) args, remaining_args = parser.parse_known_args() diff --git a/realism/execution_aggregate_statistics.py b/realism/execution_aggregate_statistics.py index aa0b605b6..e683d43ab 100755 --- a/realism/execution_aggregate_statistics.py +++ b/realism/execution_aggregate_statistics.py @@ -1,57 +1,61 @@ -from glob import glob -import pandas as pd +import argparse import pickle +from glob import glob from pprint import pprint -import argparse + +import pandas as pd def main(path_glob, out_filepath): - """ Computes aggregated statistics (median) across multiple market impact experiments and saves the result. + """Computes aggregated statistics (median) across multiple market impact experiments and saves the result. - :param path_glob: Glob pattern for paths to cached experiment files to be aggregated. Note by `cached` the output of the function impact_single_day_pov.prep_data is meant. - :param out_filepath: path to file storing aggregated statistics (.csv extension) + :param path_glob: Glob pattern for paths to cached experiment files to be aggregated. Note by `cached` the output of the function impact_single_day_pov.prep_data is meant. + :param out_filepath: path to file storing aggregated statistics (.csv extension) - :type path_glob: str - :type out_filepath: str + :type path_glob: str + :type out_filepath: str """ impact_stats = [] for path in glob(path_glob): - print(f'Processing file {path}') + print(f"Processing file {path}") try: - with open(path, 'rb') as f: + with open(path, "rb") as f: consolidated = pickle.load(f) except pickle.UnpicklingError: continue for elem in consolidated: - stats_dict = elem['impact_statistics'] - pov = elem['pov'] - stats_dict.update({ - 'pov': pov, - }) + stats_dict = elem["impact_statistics"] + pov = elem["pov"] + stats_dict.update( + { + "pov": pov, + } + ) impact_stats.append(stats_dict) stats_df_aggregate = pd.DataFrame(impact_stats) - median = stats_df_aggregate.groupby(['pov']).apply(pd.DataFrame.median).drop(columns=['pov']) - print(f'Aggregated statistics (median) for glob {path_glob}:') + median = stats_df_aggregate.groupby(["pov"]).apply(pd.DataFrame.median).drop(columns=["pov"]) + print(f"Aggregated statistics (median) for glob {path_glob}:") print(median) median.to_csv(out_filepath, index=True) -if __name__ == '__main__': +if __name__ == "__main__": - parser = argparse.ArgumentParser(description='CLI utility for aggregating statistics of multi-day POV experiments.') + parser = argparse.ArgumentParser(description="CLI utility for aggregating statistics of multi-day POV experiments.") - parser.add_argument('path_glob', - help='Glob pattern for paths to cached experiment files to be aggregated. Note by `cached` the output of the function impact_single_day_pov.prep_data is meant. ', - type=str) - parser.add_argument('out_file', - help='Path to csv output file.') + parser.add_argument( + "path_glob", + help="Glob pattern for paths to cached experiment files to be aggregated. Note by `cached` the output of the function impact_single_day_pov.prep_data is meant. ", + type=str, + ) + parser.add_argument("out_file", help="Path to csv output file.") args, remaining_args = parser.parse_known_args() diff --git a/realism/get_quotes.py b/realism/get_quotes.py index 4969cbab5..42da62f58 100644 --- a/realism/get_quotes.py +++ b/realism/get_quotes.py @@ -1,47 +1,50 @@ import sys + import pandas as pd -def read_simulated_quotes (file): - df = pd.read_pickle(file, compression='bz2') - df['Timestamp'] = df.index + +def read_simulated_quotes(file): + df = pd.read_pickle(file, compression="bz2") + df["Timestamp"] = df.index # Keep only the last bid and last ask event at each timestamp. - df = df.drop_duplicates(subset=['Timestamp','EventType'], keep='last') + df = df.drop_duplicates(subset=["Timestamp", "EventType"], keep="last") - del df['Timestamp'] + del df["Timestamp"] - df_bid = df[df['EventType'] == 'BEST_BID'].copy() - df_ask = df[df['EventType'] == 'BEST_ASK'].copy() + df_bid = df[df["EventType"] == "BEST_BID"].copy() + df_ask = df[df["EventType"] == "BEST_ASK"].copy() if len(df) <= 0: print("There appear to be no simulated quotes.") sys.exit() - df_bid['BEST_BID'] = [b for s,b,bv in df_bid['Event'].str.split(',')] - df_bid['BEST_BID_VOL'] = [bv for s,b,bv in df_bid['Event'].str.split(',')] - df_ask['BEST_ASK'] = [a for s,a,av in df_ask['Event'].str.split(',')] - df_ask['BEST_ASK_VOL'] = [av for s,a,av in df_ask['Event'].str.split(',')] + df_bid["BEST_BID"] = [b for s, b, bv in df_bid["Event"].str.split(",")] + df_bid["BEST_BID_VOL"] = [bv for s, b, bv in df_bid["Event"].str.split(",")] + df_ask["BEST_ASK"] = [a for s, a, av in df_ask["Event"].str.split(",")] + df_ask["BEST_ASK_VOL"] = [av for s, a, av in df_ask["Event"].str.split(",")] - df_bid['BEST_BID'] = df_bid['BEST_BID'].str.replace('$','').astype('float64') - df_ask['BEST_ASK'] = df_ask['BEST_ASK'].str.replace('$','').astype('float64') + df_bid["BEST_BID"] = df_bid["BEST_BID"].str.replace("$", "").astype("float64") + df_ask["BEST_ASK"] = df_ask["BEST_ASK"].str.replace("$", "").astype("float64") - df_bid['BEST_BID_VOL'] = df_bid['BEST_BID_VOL'].astype('float64') - df_ask['BEST_ASK_VOL'] = df_ask['BEST_ASK_VOL'].astype('float64') + df_bid["BEST_BID_VOL"] = df_bid["BEST_BID_VOL"].astype("float64") + df_ask["BEST_ASK_VOL"] = df_ask["BEST_ASK_VOL"].astype("float64") - df = df_bid.join(df_ask, how='outer', lsuffix='.bid', rsuffix='.ask') - df['BEST_BID'] = df['BEST_BID'].ffill().bfill() - df['BEST_ASK'] = df['BEST_ASK'].ffill().bfill() - df['BEST_BID_VOL'] = df['BEST_BID_VOL'].ffill().bfill() - df['BEST_ASK_VOL'] = df['BEST_ASK_VOL'].ffill().bfill() - df['MIDPOINT'] = (df['BEST_BID'] + df['BEST_ASK']) / 2.0 + df = df_bid.join(df_ask, how="outer", lsuffix=".bid", rsuffix=".ask") + df["BEST_BID"] = df["BEST_BID"].ffill().bfill() + df["BEST_ASK"] = df["BEST_ASK"].ffill().bfill() + df["BEST_BID_VOL"] = df["BEST_BID_VOL"].ffill().bfill() + df["BEST_ASK_VOL"] = df["BEST_ASK_VOL"].ffill().bfill() + df["MIDPOINT"] = (df["BEST_BID"] + df["BEST_ASK"]) / 2.0 return df + sim_file = sys.argv[1] df_sim = read_simulated_quotes(sim_file) df_sim = df_sim.drop(["EventType.bid", "Event.bid", "EventType.ask", "Event.ask"], axis=1).resample("1T").ffill() df_sim.to_csv("simulated_quotes.csv") -''' +""" import matplotlib.pyplot as plt plt.plot(df_sim["BEST_BID"], color="g") plt.plot(df_sim["BEST_ASK"], color="r") @@ -49,4 +52,4 @@ def read_simulated_quotes (file): plt.plot(df_sim["BEST_ASK"]-df_sim["BEST_BID"]) plt.show() -''' \ No newline at end of file +""" diff --git a/realism/impact_multiday_pov.py b/realism/impact_multiday_pov.py index ec7f69104..6bcddc581 100755 --- a/realism/impact_multiday_pov.py +++ b/realism/impact_multiday_pov.py @@ -1,33 +1,31 @@ -import pandas as pd -import pickle +import argparse import glob as glob +import json +import pickle import re +from functools import reduce +from multiprocessing import Pool from pprint import pprint -import numpy as np + import matplotlib.dates as mdates import matplotlib.pyplot as plt - +import numpy as np +import pandas as pd from impact_single_day_pov import prep_data -from realism_utils import forward_fill_series, make_cache_and_visualisation_dir -from multiprocessing import Pool - -from functools import reduce - -import json -import argparse from pandas.plotting import register_matplotlib_converters +from realism_utils import forward_fill_series, make_cache_and_visualisation_dir -RESAMPLE_RATE = '0.5S' # times at which to sample mid price series +RESAMPLE_RATE = "0.5S" # times at which to sample mid price series -def normalise_time(ts, base_date='2016-01-30'): - """ Force a pandas timestamp to be on a certain date. +def normalise_time(ts, base_date="2016-01-30"): + """Force a pandas timestamp to be on a certain date. - :param ts: timestamp - :param base_date: string in format YYYY-MM-DD + :param ts: timestamp + :param base_date: string in format YYYY-MM-DD - :type ts: pd.Timestamp - :type base_date: str + :type ts: pd.Timestamp + :type base_date: str """ pd_base_date = pd.Timestamp(base_date) time = ts.time() @@ -36,7 +34,7 @@ def normalise_time(ts, base_date='2016-01-30'): def extract_seed_date_from_path(p, yes_base, no_base): - """ Extracts random seed and date from path of ABIDES output file + """Extracts random seed and date from path of ABIDES output file :param p: path from which to extract seed and date :param yes_base: regex pattern for output files WITH execution agent. Must obey the following: @@ -50,37 +48,28 @@ def extract_seed_date_from_path(p, yes_base, no_base): If the above are not satisfied you will have to rename experimental output files. :return: """ - base = yes_base if 'yes' in p else no_base - yes_no = 'yes' if 'yes' in p else 'no' + base = yes_base if "yes" in p else no_base + yes_no = "yes" if "yes" in p else "no" m = re.search(base, p) - if yes_no == 'yes': + if yes_no == "yes": seed = m.group(1) pov = m.group(2) date = m.group(3) - out_dict = { - 'type': yes_no, - 'seed': seed, - 'pov': pov, - 'date': date - } + out_dict = {"type": yes_no, "seed": seed, "pov": pov, "date": date} return out_dict - elif yes_no == 'no': + elif yes_no == "no": seed = m.group(1) date = m.group(2) - out_dict = { - 'type': yes_no, - 'seed': seed, - 'date': date - } + out_dict = {"type": yes_no, "seed": seed, "date": date} return out_dict else: - raise ValueError(f'Path {p} not in required format') + raise ValueError(f"Path {p} not in required format") def generate_plot_data_cache_dicts(log_dirs_no_glob, log_dirs_yes_glob, yes_base, no_base): - """ Generates data structure containing information about ABIDES output paths and experimental parameters, for + """Generates data structure containing information about ABIDES output paths and experimental parameters, for downstream processing :param log_dirs_no_glob: Glob pattern listing directories corresponding to experiments WITHOUT execution agent. @@ -107,49 +96,42 @@ def generate_plot_data_cache_dicts(log_dirs_no_glob, log_dirs_yes_glob, yes_base for dn in log_dirs_no: info_dict_no = extract_seed_date_from_path(dn, yes_base, no_base) - no_seed = info_dict_no['seed'] - no_date = info_dict_no['date'] + no_seed = info_dict_no["seed"] + no_date = info_dict_no["date"] - no_yes_dict[dn] = { - 'seed': no_seed, - 'date': no_date, - 'execution_paths': [] - } + no_yes_dict[dn] = {"seed": no_seed, "date": no_date, "execution_paths": []} for dy in log_dirs_yes: info_dict_yes = extract_seed_date_from_path(dy, yes_base, no_base) - yes_seed = info_dict_yes['seed'] - yes_date = info_dict_yes['date'] - pov = info_dict_yes['pov'] + yes_seed = info_dict_yes["seed"] + yes_date = info_dict_yes["date"] + pov = info_dict_yes["pov"] if yes_seed == no_seed and yes_date == no_date: - no_yes_dict[dn]['execution_paths'].append({ - 'path': dy, - 'pov': pov - }) + no_yes_dict[dn]["execution_paths"].append({"path": dy, "pov": pov}) out_list = [] for dn, dn_info_dict in no_yes_dict.items(): - seed = dn_info_dict['seed'] - date = dn_info_dict['date'] + seed = dn_info_dict["seed"] + date = dn_info_dict["date"] PLOT_DATA = [] - for execution_d in dn_info_dict['execution_paths']: - pov = execution_d['pov'] - execution_path = execution_d['path'] + for execution_d in dn_info_dict["execution_paths"]: + pov = execution_d["pov"] + execution_path = execution_d["path"] plot_data_dict = { - 'participation_of_volume': pov, - 'no_execution_exchange_path': f'{dn}/{NO_EXECUTION_EXCHANGE_NAME}', - 'no_execution_orderbook_path': f'{dn}/{NO_EXECUTION_ORDERBOOK_NAME}', - 'yes_execution_exchange_path': f'{execution_path}/{YES_EXECUTION_EXCHANGE_NAME}', - 'yes_execution_orderbook_path': f'{execution_path}/{YES_EXECUTION_ORDERBOOK_NAME}', - 'seed': seed + "participation_of_volume": pov, + "no_execution_exchange_path": f"{dn}/{NO_EXECUTION_EXCHANGE_NAME}", + "no_execution_orderbook_path": f"{dn}/{NO_EXECUTION_ORDERBOOK_NAME}", + "yes_execution_exchange_path": f"{execution_path}/{YES_EXECUTION_EXCHANGE_NAME}", + "yes_execution_orderbook_path": f"{execution_path}/{YES_EXECUTION_ORDERBOOK_NAME}", + "seed": seed, } PLOT_DATA.append(plot_data_dict) - CACHE_DIR = f'cache/{CACHE_PREFIX}_{seed}_{date}' + CACHE_DIR = f"cache/{CACHE_PREFIX}_{seed}_{date}" out_tuple = (PLOT_DATA, CACHE_DIR) out_list.append(out_tuple) @@ -158,22 +140,29 @@ def generate_plot_data_cache_dicts(log_dirs_no_glob, log_dirs_yes_glob, yes_base def process_tuple(tup): - """ Helper method for __name__.make_cached. """ + """Helper method for __name__.make_cached.""" PLOT_DATA, CACHE_DIR = tup PLOT_PARAMS_DICT = { - 'shade_start_datetime': SHADE_START_TIME, - 'shade_end_datetime': SHADE_END_TIME, - 'spread_lookback': SPREAD_LOOKBACK, - 'experiment_name': EXPERIMENT_NAME, - 'log_dir': LOG_DIR, - 'execution_agent_name': EXECUTION_AGENT_NAME + "shade_start_datetime": SHADE_START_TIME, + "shade_end_datetime": SHADE_END_TIME, + "spread_lookback": SPREAD_LOOKBACK, + "experiment_name": EXPERIMENT_NAME, + "log_dir": LOG_DIR, + "execution_agent_name": EXECUTION_AGENT_NAME, } - prep_data(PLOT_DATA, CACHE_DIR, ONLY_EXECUTED, CLIPPED_START_TIME, CLIPPED_FINISH_TIME, PLOT_PARAMS_DICT, - compute_impact_stats=COMPUTE_IMPACT_STATS) + prep_data( + PLOT_DATA, + CACHE_DIR, + ONLY_EXECUTED, + CLIPPED_START_TIME, + CLIPPED_FINISH_TIME, + PLOT_PARAMS_DICT, + compute_impact_stats=COMPUTE_IMPACT_STATS, + ) def get_differences(tup): - """ Computes mid price differential for execution experiment. + """Computes mid price differential for execution experiment. :param tup: tuple with elements: - [0] dictionary holding data to be processed. @@ -181,53 +170,58 @@ def get_differences(tup): :return: """ dict_in, count = tup - orderbook_df = dict_in['no_execution_df'] - orderbook_with_execution_df = dict_in['yes_execution_df'] + orderbook_df = dict_in["no_execution_df"] + orderbook_with_execution_df = dict_in["yes_execution_df"] - impact_statistics = dict_in['impact_statistics'] if COMPUTE_IMPACT_STATS else None + impact_statistics = dict_in["impact_statistics"] if COMPUTE_IMPACT_STATS else None - pov = dict_in['pov'] + pov = dict_in["pov"] print(f"Processing df pair {count+1}") - mid_price_yes_execution, mid_price_no_execution = forward_fill_series(orderbook_with_execution_df["MID_PRICE"], - orderbook_df["MID_PRICE"]) + mid_price_yes_execution, mid_price_no_execution = forward_fill_series( + orderbook_with_execution_df["MID_PRICE"], orderbook_df["MID_PRICE"] + ) mid_price_diff = 10000 * (mid_price_yes_execution - mid_price_no_execution) / mid_price_no_execution out_df = mid_price_diff.to_frame() - out_df = out_df.loc[~out_df.index.duplicated(keep='last')] - out_df = out_df.rename(columns={ - 0: f'MID_PRICE_{count}', - }) + out_df = out_df.loc[~out_df.index.duplicated(keep="last")] + out_df = out_df.rename( + columns={ + 0: f"MID_PRICE_{count}", + } + ) # make date the same out_df.index = pd.DatetimeIndex(out_df.index.to_series().apply(normalise_time)) print(f"Finished processing df pair {count+1}") - out_dict = { - 'out_df': out_df, - 'impact_statistics': impact_statistics, - 'pov': pov - } + out_dict = {"out_df": out_df, "impact_statistics": impact_statistics, "pov": pov} return out_dict def concat_horizontal(df, s, resample_rate=RESAMPLE_RATE): - """ Concats a pd.Series object to a pd.DataFrame horizontally. They need to each have a DatetimeIndex""" - - df_out = pd.merge(df.resample(resample_rate).last(), s.resample(resample_rate).last(), how='outer', left_index=True, right_index=True) + """Concats a pd.Series object to a pd.DataFrame horizontally. They need to each have a DatetimeIndex""" + + df_out = pd.merge( + df.resample(resample_rate).last(), + s.resample(resample_rate).last(), + how="outer", + left_index=True, + right_index=True, + ) return df_out -def aggregate_orderbook_stats(saved_orderbooks, pov, cache_file_suffix='', num_workers=24): - """ Compute quantiles for mid-price and liquidity measures. +def aggregate_orderbook_stats(saved_orderbooks, pov, cache_file_suffix="", num_workers=24): + """Compute quantiles for mid-price and liquidity measures. - :param saved_orderbooks: output of __name__.process_orderbooks_for_liquidity_plots - :param num_workers: How many CPU cores to use for executing this function + :param saved_orderbooks: output of __name__.process_orderbooks_for_liquidity_plots + :param num_workers: How many CPU cores to use for executing this function - :type saved_orderbooks: list(tuple(pd.DataFrame)) - :type num_workers: int + :type saved_orderbooks: list(tuple(pd.DataFrame)) + :type num_workers: int """ @@ -236,16 +230,16 @@ def aggregate_orderbook_stats(saved_orderbooks, pov, cache_file_suffix='', num_w tuple_list = [(v, idx) for idx, v in enumerate(saved_orderbooks)] stat_dicts = p.map(get_differences, tuple_list) - stat_dfs = [s['out_df'] for s in stat_dicts] + stat_dfs = [s["out_df"] for s in stat_dicts] print("Merging DataFrames...") df_final = reduce(lambda left, right: concat_horizontal(left, right), stat_dfs) df_final = df_final.ffill() df_final = df_final.dropna() - df_final = df_final.loc[~df_final.index.duplicated(keep='last')] + df_final = df_final.loc[~df_final.index.duplicated(keep="last")] print("Computing summary statistics...") - mid_price_cols = [f'MID_PRICE_{x}' for x in range(len(saved_orderbooks))] + mid_price_cols = [f"MID_PRICE_{x}" for x in range(len(saved_orderbooks))] mid_price_median = df_final[mid_price_cols].median(axis=1) mid_price_05_q = df_final[mid_price_cols].quantile(q=0.05, axis=1) mid_price_25_q = df_final[mid_price_cols].quantile(q=0.25, axis=1) @@ -253,30 +247,30 @@ def aggregate_orderbook_stats(saved_orderbooks, pov, cache_file_suffix='', num_w mid_price_95_q = df_final[mid_price_cols].quantile(q=0.95, axis=1) df_data = { - 'mid_price_median': mid_price_median, - 'mid_price_05_q': mid_price_05_q, - 'mid_price_25_q': mid_price_25_q, - 'mid_price_75_q': mid_price_75_q, - 'mid_price_95_q': mid_price_95_q, + "mid_price_median": mid_price_median, + "mid_price_05_q": mid_price_05_q, + "mid_price_25_q": mid_price_25_q, + "mid_price_75_q": mid_price_75_q, + "mid_price_95_q": mid_price_95_q, } df_out = pd.DataFrame(data=df_data, index=df_final.index) - with open(f'cache/aggregated_execution_pov_{pov}_{cache_file_suffix}.pkl', 'wb') as f: + with open(f"cache/aggregated_execution_pov_{pov}_{cache_file_suffix}.pkl", "wb") as f: pickle.dump(df_out, f) - print('Done!') + print("Done!") return df_out def plot_aggregated(aggregated, plot_params_dict): - """ Make aggregated mid-price and liquidity plots. + """Make aggregated mid-price and liquidity plots. - :param aggregated: output of __name__.aggregate_orderbook_stats - :param plot_params_dict: dictionary containing some plotting parameters + :param aggregated: output of __name__.aggregate_orderbook_stats + :param plot_params_dict: dictionary containing some plotting parameters - :type aggregated: pd.DataFrame - :type plot_params_dict: dict + :type aggregated: pd.DataFrame + :type plot_params_dict: dict """ @@ -284,49 +278,77 @@ def plot_aggregated(aggregated, plot_params_dict): fig.set_size_inches(h=9, w=12) # mid_price - aggregated["mid_price_median"].plot(ax=axes, color='blue', label='Median') - axes.axhline(y=0, color='black', linestyle='--', linewidth=0.7, zorder=1) - axes.fill_between(x=aggregated.index, y1=aggregated["mid_price_median"], y2=aggregated['mid_price_95_q'], - color='blue', alpha=plot_params_dict['alpha_90'], label='90%') - axes.fill_between(x=aggregated.index, y2=aggregated["mid_price_median"], y1=aggregated['mid_price_05_q'], - color='blue', alpha=plot_params_dict['alpha_90']) - axes.fill_between(x=aggregated.index, y2=aggregated["mid_price_median"], y1=aggregated['mid_price_75_q'], - color='blue', alpha=plot_params_dict['alpha_50'], label='50%') - axes.fill_between(x=aggregated.index, y2=aggregated["mid_price_median"], y1=aggregated['mid_price_25_q'], - color='blue', alpha=plot_params_dict['alpha_50']) + aggregated["mid_price_median"].plot(ax=axes, color="blue", label="Median") + axes.axhline(y=0, color="black", linestyle="--", linewidth=0.7, zorder=1) + axes.fill_between( + x=aggregated.index, + y1=aggregated["mid_price_median"], + y2=aggregated["mid_price_95_q"], + color="blue", + alpha=plot_params_dict["alpha_90"], + label="90%", + ) + axes.fill_between( + x=aggregated.index, + y2=aggregated["mid_price_median"], + y1=aggregated["mid_price_05_q"], + color="blue", + alpha=plot_params_dict["alpha_90"], + ) + axes.fill_between( + x=aggregated.index, + y2=aggregated["mid_price_median"], + y1=aggregated["mid_price_75_q"], + color="blue", + alpha=plot_params_dict["alpha_50"], + label="50%", + ) + axes.fill_between( + x=aggregated.index, + y2=aggregated["mid_price_median"], + y1=aggregated["mid_price_25_q"], + color="blue", + alpha=plot_params_dict["alpha_50"], + ) date = aggregated.index[0].date() midnight = pd.Timestamp(date) - xmin = midnight + pd.to_timedelta(plot_params_dict['xmin']) - xmax = midnight + pd.to_timedelta(plot_params_dict['xmax']) + xmin = midnight + pd.to_timedelta(plot_params_dict["xmin"]) + xmax = midnight + pd.to_timedelta(plot_params_dict["xmax"]) - axes.set_ylim(plot_params_dict['ymin'], plot_params_dict['ymax']) + axes.set_ylim(plot_params_dict["ymin"], plot_params_dict["ymax"]) axes.set_xlim(xmin, xmax) - shade_start = midnight + pd.to_timedelta(plot_params_dict['shade_start_time']) - shade_end = midnight + pd.to_timedelta(plot_params_dict['shade_end_time']) - axes.axvspan(shade_start, shade_end, alpha=0.2, color='grey') + shade_start = midnight + pd.to_timedelta(plot_params_dict["shade_start_time"]) + shade_end = midnight + pd.to_timedelta(plot_params_dict["shade_end_time"]) + axes.axvspan(shade_start, shade_end, alpha=0.2, color="grey") - axes.set_xlabel('Time', size=18) + axes.set_xlabel("Time", size=18) - axes.set_ylabel('Mid-price normalized\n difference (bps)', size=18) + axes.set_ylabel("Mid-price normalized\n difference (bps)", size=18) # axes.xaxis.set_major_formatter(mdates.DateFormatter("%H:%M")) # axes.xaxis.set_minor_formatter(mdates.DateFormatter("%H:%M")) - axes.legend(loc='upper right', fontsize=18) - axes.tick_params(axis='both', which='major', labelsize=16) - axes.tick_params(axis='both', which='minor', labelsize=16) + axes.legend(loc="upper right", fontsize=18) + axes.tick_params(axis="both", which="major", labelsize=16) + axes.tick_params(axis="both", which="minor", labelsize=16) - fig.suptitle(plot_params_dict['execution_label'], fontsize=20, fontweight='bold') + fig.suptitle(plot_params_dict["execution_label"], fontsize=20, fontweight="bold") fig.tight_layout() fig.subplots_adjust(top=0.94) - fig.savefig(plot_params_dict['output_file_path'], format='png', dpi=300, transparent=False, bbox_inches='tight', - pad_inches=0.03) + fig.savefig( + plot_params_dict["output_file_path"], + format="png", + dpi=300, + transparent=False, + bbox_inches="tight", + pad_inches=0.03, + ) def make_cached(params, num_workers): - """ Process ABIDES output data into format suitable for aggregation and cache. + """Process ABIDES output data into format suitable for aggregation and cache. :param params: Data structure constructed by __name__.generate_plot_data_cache_dicts :param num_workers: Number of CPU cores to use @@ -337,14 +359,14 @@ def make_cached(params, num_workers): def load_cached(): - """ Loads cached execution experiment data from __name__.make_cached into memory + """Loads cached execution experiment data from __name__.make_cached into memory :return: """ data_to_process = [] - for path in glob.glob(f'cache/{CACHE_PREFIX}_*_[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]'): - with open(path, 'rb') as f: + for path in glob.glob(f"cache/{CACHE_PREFIX}_*_[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]"): + with open(path, "rb") as f: try: data = pickle.load(f) data_to_process.append(data) @@ -355,7 +377,7 @@ def load_cached(): def aggregate_data(data_to_process, num_workers): - """ Aggregates cached experimental data. + """Aggregates cached experimental data. :param data_to_process: output of __name__.load_cached :return: @@ -366,22 +388,20 @@ def aggregate_data(data_to_process, num_workers): data_pov = [] for data in data_to_process: for dd in data: - if dd['pov'] == str(pov): + if dd["pov"] == str(pov): data_pov.append(dd) aggregated = aggregate_orderbook_stats(data_pov, pov, cache_file_suffix=CACHE_PREFIX, num_workers=num_workers) - aggregated_data.update({ - pov: aggregated - }) + aggregated_data.update({pov: aggregated}) - with open(f'cache/{CACHE_PREFIX}_pov_multiday_agg.pkl', 'wb') as f: + with open(f"cache/{CACHE_PREFIX}_pov_multiday_agg.pkl", "wb") as f: pickle.dump(aggregated_data, f) return aggregated_data def plot_all_aggregated(aggregated_data, params): - """ Draw plots of aggregated data. + """Draw plots of aggregated data. :param aggregated_data: output of __name__.aggregate_data :param params: output of __name__.generate_plot_data_cache_dicts @@ -391,24 +411,24 @@ def plot_all_aggregated(aggregated_data, params): aggregated = aggregated_data[pov] PLOT_PARAMS_DICT = { - 'baseline_label': BASELINE_LABEL, - 'execution_label': f'POV {DIRECTION} order @ {100 * pov} %, freq. {FREQUENCY}, {len(params)} traces', - 'shade_start_time': SHADE_START_TIME, - 'shade_end_time': SHADE_END_TIME, - 'ymax': YMAX, - 'ymin': YMIN, - 'xmin': XMIN, - 'xmax': XMAX, - 'alpha_90': ALPHA_90, - 'alpha_50': ALPHA_50, - 'output_file_path': f'visualizations/{CACHE_PREFIX}_pov_{pov}_multiday.png' + "baseline_label": BASELINE_LABEL, + "execution_label": f"POV {DIRECTION} order @ {100 * pov} %, freq. {FREQUENCY}, {len(params)} traces", + "shade_start_time": SHADE_START_TIME, + "shade_end_time": SHADE_END_TIME, + "ymax": YMAX, + "ymin": YMIN, + "xmin": XMIN, + "xmax": XMAX, + "alpha_90": ALPHA_90, + "alpha_50": ALPHA_50, + "output_file_path": f"visualizations/{CACHE_PREFIX}_pov_{pov}_multiday.png", } plot_aggregated(aggregated, PLOT_PARAMS_DICT) def main(config_path, num_workers, recompute): - """ Load config file for multiday POV market impact experiment and draws plots, doing necessary data processing if necessary. + """Load config file for multiday POV market impact experiment and draws plots, doing necessary data processing if necessary. :param config_path: path to config file, see e.g. plot_configs/pov_plot_config.example.json :param num_workers: number of CPU cores to use during processing @@ -420,15 +440,11 @@ def main(config_path, num_workers, recompute): :return: """ - with open(config_path, 'r') as f: + with open(config_path, "r") as f: config_dict = json.load(f) # TODO: do this without global vars - global CLIPPED_START_TIME, CLIPPED_FINISH_TIME, ONLY_EXECUTED, NO_EXECUTION_EXCHANGE_NAME, \ - NO_EXECUTION_ORDERBOOK_NAME, YES_EXECUTION_EXCHANGE_NAME, YES_EXECUTION_ORDERBOOK_NAME, \ - CACHE_PREFIX, POVs, FREQUENCY, DIRECTION, BASELINE_LABEL, SHADE_START_TIME, SHADE_END_TIME, \ - YMAX, YMIN, XMIN, XMAX, ALPHA_90, ALPHA_50, yes_base, no_base, log_dirs_no_glob, log_dirs_yes_glob, \ - SPREAD_LOOKBACK, EXPERIMENT_NAME, LOG_DIR, EXECUTION_AGENT_NAME, COMPUTE_IMPACT_STATS + global CLIPPED_START_TIME, CLIPPED_FINISH_TIME, ONLY_EXECUTED, NO_EXECUTION_EXCHANGE_NAME, NO_EXECUTION_ORDERBOOK_NAME, YES_EXECUTION_EXCHANGE_NAME, YES_EXECUTION_ORDERBOOK_NAME, CACHE_PREFIX, POVs, FREQUENCY, DIRECTION, BASELINE_LABEL, SHADE_START_TIME, SHADE_END_TIME, YMAX, YMIN, XMIN, XMAX, ALPHA_90, ALPHA_50, yes_base, no_base, log_dirs_no_glob, log_dirs_yes_glob, SPREAD_LOOKBACK, EXPERIMENT_NAME, LOG_DIR, EXECUTION_AGENT_NAME, COMPUTE_IMPACT_STATS CLIPPED_START_TIME = config_dict["CLIPPED_START_TIME"] CLIPPED_FINISH_TIME = config_dict["CLIPPED_FINISH_TIME"] @@ -462,8 +478,10 @@ def main(config_path, num_workers, recompute): params = generate_plot_data_cache_dicts(log_dirs_no_glob, log_dirs_yes_glob, yes_base, no_base) if not params: - print("No files to process. Please check fields `yes_base`, `no_base`, `log_dirs_no_glob` and `log_dirs_yes_" - "glob` in config file") + print( + "No files to process. Please check fields `yes_base`, `no_base`, `log_dirs_no_glob` and `log_dirs_yes_" + "glob` in config file" + ) if recompute: print(f"Recomputing aggregate data for experiment {CACHE_PREFIX}") @@ -475,14 +493,15 @@ def main(config_path, num_workers, recompute): # check for cached aggregate data try: print("Searching for cached aggregate file.") - with open(f'cache/{CACHE_PREFIX}_pov_multiday_agg.pkl', 'rb') as f: + with open(f"cache/{CACHE_PREFIX}_pov_multiday_agg.pkl", "rb") as f: aggregated_data = pickle.load(f) print("Aggregate cached file found") except FileNotFoundError: print("Cached aggregate file not found. Searching for individual processed files.") try: data_to_process = load_cached() - if not data_to_process: raise FileNotFoundError("No individual cached files") + if not data_to_process: + raise FileNotFoundError("No individual cached files") print("Aggregating cached files...") aggregated_data = aggregate_data(data_to_process, num_workers) except (FileNotFoundError, TypeError) as e: @@ -497,25 +516,29 @@ def main(config_path, num_workers, recompute): print("Done!") -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description='CLI utility for plotting results of multiday POV experiments.') - - parser.add_argument('plot_config', - help='Name of config file to execute. See plot_configs/multiday/pov_plot_config.example.json for an example.', - type=str) - parser.add_argument('--num_workers', - '-n', - help='Number of cores to use in computation', - required=False, - default=8, - type=int - ) - parser.add_argument('--recompute', - '-r', - help='Switch to reaggregate data.', - action='store_true', - ) +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description="CLI utility for plotting results of multiday POV experiments.") + + parser.add_argument( + "plot_config", + help="Name of config file to execute. See plot_configs/multiday/pov_plot_config.example.json for an example.", + type=str, + ) + parser.add_argument( + "--num_workers", + "-n", + help="Number of cores to use in computation", + required=False, + default=8, + type=int, + ) + parser.add_argument( + "--recompute", + "-r", + help="Switch to reaggregate data.", + action="store_true", + ) args, remaining_args = parser.parse_known_args() @@ -526,4 +549,3 @@ def main(config_path, num_workers, recompute): recompute = args.recompute main(config_path, num_workers, recompute) - diff --git a/realism/impact_single_day_pov.py b/realism/impact_single_day_pov.py index 02c5ca05f..f4c906b5c 100755 --- a/realism/impact_single_day_pov.py +++ b/realism/impact_single_day_pov.py @@ -1,27 +1,33 @@ -import pandas as pd -import numpy as np -import matplotlib.pyplot as plt -from datetime import datetime, time import itertools -import pickle import os.path - +import pickle import sys -sys.path.extend(['../util/formatting']) -from convert_order_book import process_orderbook -from convert_order_stream import convert_stream_to_format -import itertools +from datetime import datetime, time -import matplotlib.dates as mdates -from realism_utils import clip_times, make_orderbook_for_analysis, compute_impact_statistics, get_plot_colors, get_plot_linestyles, forward_fill_series, make_cache_and_visualisation_dir +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd -from pprint import pprint -from bisect import bisect -import json +sys.path.extend(["../util/formatting"]) import argparse - +import itertools +import json import re +from bisect import bisect +from pprint import pprint +import matplotlib.dates as mdates +from convert_order_book import process_orderbook +from convert_order_stream import convert_stream_to_format +from realism_utils import ( + clip_times, + compute_impact_statistics, + forward_fill_series, + get_plot_colors, + get_plot_linestyles, + make_cache_and_visualisation_dir, + make_orderbook_for_analysis, +) """ Script plots the output for a single day market impact experiment. @@ -30,7 +36,7 @@ def make_plots(plot_data, plot_params_dict): - """ Draw and save plot from preprocessed data. + """Draw and save plot from preprocessed data. :param plot_data: Data structure processed by __name__.prep_data :param plot_params_dict: dict holding some plotting paramaters, see e.g. plot_configs/single_day/pov_single_day_config.example.json["PLOT_PARAMS_DICT"] @@ -39,61 +45,89 @@ def make_plots(plot_data, plot_params_dict): fig, axes = plt.subplots(nrows=2, ncols=1) fig.set_size_inches(h=14, w=9) - colors = list(get_plot_colors(plot_data + ['dummy']*2)) + colors = list(get_plot_colors(plot_data + ["dummy"] * 2)) color = colors.pop(0) linestyles = get_plot_linestyles(len(plot_data) + 2) linestyle = linestyles.pop(0) - orderbook_df = plot_data[0]['no_execution_df'] - orderbook_df["MID_PRICE"].plot(ax=axes[0], label=plot_params_dict['baseline_label'], color=color, linestyle=linestyle) + orderbook_df = plot_data[0]["no_execution_df"] + orderbook_df["MID_PRICE"].plot( + ax=axes[0], + label=plot_params_dict["baseline_label"], + color=color, + linestyle=linestyle, + ) for plot_data_dict in plot_data: color = colors.pop(0) linestyle = linestyles.pop(0) pov = f'{100 * plot_data_dict["pov"]} %' - orderbook_df = plot_data_dict['no_execution_df'] - orderbook_with_execution_df = plot_data_dict['yes_execution_df'] + orderbook_df = plot_data_dict["no_execution_df"] + orderbook_with_execution_df = plot_data_dict["yes_execution_df"] # mid_price - orderbook_with_execution_df["MID_PRICE"].plot(ax=axes[0], label= - f'{plot_params_dict["execution_label"]}{pov}', color=color, linestyle=linestyle) + orderbook_with_execution_df["MID_PRICE"].plot( + ax=axes[0], + label=f'{plot_params_dict["execution_label"]}{pov}', + color=color, + linestyle=linestyle, + ) # normalised difference - mid_price_yes_execution, mid_price_no_execution = forward_fill_series(orderbook_with_execution_df["MID_PRICE"], - orderbook_df["MID_PRICE"]) + mid_price_yes_execution, mid_price_no_execution = forward_fill_series( + orderbook_with_execution_df["MID_PRICE"], orderbook_df["MID_PRICE"] + ) diff = 10000 * (mid_price_yes_execution - mid_price_no_execution) / mid_price_no_execution diff = diff.to_frame() - diff = diff.loc[~diff.index.duplicated(keep='last')] + diff = diff.loc[~diff.index.duplicated(keep="last")] diff = diff[diff.columns[0]] # to series for plotting - diff.plot(ax=axes[1], label=f'{plot_params_dict["execution_label"]}{pov}', color=color, linestyle=linestyle) - - - axes[0].axvspan(pd.Timestamp(plot_params_dict['shade_start_datetime']), - pd.Timestamp(plot_params_dict['shade_end_datetime']), alpha=0.2, color='grey') - axes[1].axvspan(pd.Timestamp(plot_params_dict['shade_start_datetime']), - pd.Timestamp(plot_params_dict['shade_end_datetime']), alpha=0.2, color='grey') + diff.plot( + ax=axes[1], + label=f'{plot_params_dict["execution_label"]}{pov}', + color=color, + linestyle=linestyle, + ) + + axes[0].axvspan( + pd.Timestamp(plot_params_dict["shade_start_datetime"]), + pd.Timestamp(plot_params_dict["shade_end_datetime"]), + alpha=0.2, + color="grey", + ) + axes[1].axvspan( + pd.Timestamp(plot_params_dict["shade_start_datetime"]), + pd.Timestamp(plot_params_dict["shade_end_datetime"]), + alpha=0.2, + color="grey", + ) axes[-1].xaxis.set_major_formatter(mdates.DateFormatter("%H:%M")) axes[-1].xaxis.set_minor_formatter(mdates.DateFormatter("%H:%M")) axes[0].xaxis.set_visible(False) axes[0].legend() - axes[-1].set_xlabel('Time', size=15) - axes[0].set_ylabel('Mid-price ($)', size=15) - axes[1].set_ylabel('Normalized Difference (bps)', size=15) + axes[-1].set_xlabel("Time", size=15) + axes[0].set_ylabel("Mid-price ($)", size=15) + axes[1].set_ylabel("Normalized Difference (bps)", size=15) fig.tight_layout() fig.subplots_adjust(top=0.7) - fig.savefig(plot_params_dict["output_file_path"], format='png', dpi=300, transparent=False, bbox_inches='tight', - pad_inches=0.03) + fig.savefig( + plot_params_dict["output_file_path"], + format="png", + dpi=300, + transparent=False, + bbox_inches="tight", + pad_inches=0.03, + ) def check_date_in_string(s): - """ Check if date in format YYYY-MM-DD in a string. """ - m = re.search(r'(\d{4}-\d{2}-\d{2})', s) + """Check if date in format YYYY-MM-DD in a string.""" + m = re.search(r"(\d{4}-\d{2}-\d{2})", s) if m: return True else: @@ -101,25 +135,33 @@ def check_date_in_string(s): def prepare_shade_dates(start, end, historical_date): - """ Helper method for prep_data. Formats date string in correct for compute_impact_statistics method""" + """Helper method for prep_data. Formats date string in correct for compute_impact_statistics method""" if not check_date_in_string(start): shade_start_time = historical_date + pd.to_timedelta(start) - shade_start_time = shade_start_time.strftime('%Y-%m-%d %H:%M:%S') + shade_start_time = shade_start_time.strftime("%Y-%m-%d %H:%M:%S") else: shade_start_time = start if not check_date_in_string(end): shade_end_time = historical_date + pd.to_timedelta(end) - shade_end_time = shade_end_time.strftime('%Y-%m-%d %H:%M:%S') + shade_end_time = shade_end_time.strftime("%Y-%m-%d %H:%M:%S") else: shade_end_time = end return shade_start_time, shade_end_time -def prep_data(plot_data, cache_file, only_executed, clipped_start_time, clipped_end_time, plot_params_dict, compute_impact_stats=False): - """ Prepares and caches POV market impact experiment output files for further aggregation and processing. +def prep_data( + plot_data, + cache_file, + only_executed, + clipped_start_time, + clipped_end_time, + plot_params_dict, + compute_impact_stats=False, +): + """Prepares and caches POV market impact experiment output files for further aggregation and processing. :param plot_data: Data structure holding paths to relevant ABIDES output files, see e.g. plot_configs/single_day/pov_single_day_config.example.json["PLOT_DATA"] :param cache_file: Path to file where processed data will be cached. @@ -143,66 +185,83 @@ def prep_data(plot_data, cache_file, only_executed, clipped_start_time, clipped_ for data_dict in plot_data: print(f"Processing data for POV {data_dict['participation_of_volume']}") - abides_orderbook_df = make_orderbook_for_analysis(data_dict['no_execution_exchange_path'], data_dict['no_execution_orderbook_path'], num_levels=1) - abides_execution_orderbook_df = make_orderbook_for_analysis(data_dict['yes_execution_exchange_path'], data_dict['yes_execution_orderbook_path'], - num_levels=1) + abides_orderbook_df = make_orderbook_for_analysis( + data_dict["no_execution_exchange_path"], + data_dict["no_execution_orderbook_path"], + num_levels=1, + ) + abides_execution_orderbook_df = make_orderbook_for_analysis( + data_dict["yes_execution_exchange_path"], + data_dict["yes_execution_orderbook_path"], + num_levels=1, + ) if only_executed: abides_orderbook_df = abides_orderbook_df.loc[abides_orderbook_df["TYPE"] == "ORDER_EXECUTED"] abides_execution_orderbook_df = abides_execution_orderbook_df.loc[ - abides_execution_orderbook_df["TYPE"] == "ORDER_EXECUTED"] + abides_execution_orderbook_df["TYPE"] == "ORDER_EXECUTED" + ] historical_date = pd.Timestamp(abides_orderbook_df.index[0].date()) start = historical_date + pd.to_timedelta(clipped_start_time) end = historical_date + pd.to_timedelta(clipped_end_time) - shade_start_time, shade_end_time = prepare_shade_dates(plot_params_dict['shade_start_datetime'], - plot_params_dict['shade_end_datetime'], - historical_date) + shade_start_time, shade_end_time = prepare_shade_dates( + plot_params_dict["shade_start_datetime"], + plot_params_dict["shade_end_datetime"], + historical_date, + ) abides_orderbook_df = clip_times(abides_orderbook_df, start, end) abides_execution_orderbook_df = clip_times(abides_execution_orderbook_df, start, end) - date_str = historical_date.strftime('%Y%m%d') + date_str = historical_date.strftime("%Y%m%d") pov = data_dict["participation_of_volume"] seed = data_dict["seed"] if compute_impact_stats: - stats_dict = compute_impact_statistics(abides_orderbook_df, abides_execution_orderbook_df, - shade_start_time, shade_end_time, - date_str=date_str, - pov=pov, - seed=seed, - experiment_name=plot_params_dict['experiment_name'], - execution_agent_name=plot_params_dict['execution_agent_name'], - log_dir=plot_params_dict['log_dir'], - spread_lookback=plot_params_dict['spread_lookback'] - ) + stats_dict = compute_impact_statistics( + abides_orderbook_df, + abides_execution_orderbook_df, + shade_start_time, + shade_end_time, + date_str=date_str, + pov=pov, + seed=seed, + experiment_name=plot_params_dict["experiment_name"], + execution_agent_name=plot_params_dict["execution_agent_name"], + log_dir=plot_params_dict["log_dir"], + spread_lookback=plot_params_dict["spread_lookback"], + ) print(f"Statistics for participation of volume at level {100 * data_dict['participation_of_volume']}%") print("Statistics:") pprint(stats_dict) - out_data.append({ - "no_execution_df": abides_orderbook_df, - "yes_execution_df": abides_execution_orderbook_df, - "impact_statistics": stats_dict, - "pov": data_dict['participation_of_volume'] - }) + out_data.append( + { + "no_execution_df": abides_orderbook_df, + "yes_execution_df": abides_execution_orderbook_df, + "impact_statistics": stats_dict, + "pov": data_dict["participation_of_volume"], + } + ) else: - out_data.append({ - "no_execution_df": abides_orderbook_df, - "yes_execution_df": abides_execution_orderbook_df, - "pov": data_dict['participation_of_volume'] - }) - - with open(cache_file, 'wb') as f: + out_data.append( + { + "no_execution_df": abides_orderbook_df, + "yes_execution_df": abides_execution_orderbook_df, + "pov": data_dict["participation_of_volume"], + } + ) + + with open(cache_file, "wb") as f: pickle.dump(out_data, f) return out_data def main(config_path): - """ Loads a plot config file for a single day POV market impact experiment and plots the result. + """Loads a plot config file for a single day POV market impact experiment and plots the result. :param config_path: 'Name of config file to execute. See plot_configs/single_day/pov_single_day_config.example.json for an example.' :type config_path: str @@ -210,12 +269,11 @@ def main(config_path): :return: """ - with open(config_path, 'r') as f: + with open(config_path, "r") as f: config_dict = json.load(f) # TODO: do this without global vars - global ONLY_EXECUTED, CLIPPED_START_TIME, CLIPPED_FINISH_TIME, PLOT_DATA, CACHE_FILE, USE_CACHE, PLOT_PARAMS_DICT, \ - SPREAD_PLOT_LOG_SCALE, ORDERBOOK_IMBALANCE_PLOT_LOG_SCALE, COMPUTE_IMPACT_STATS + global ONLY_EXECUTED, CLIPPED_START_TIME, CLIPPED_FINISH_TIME, PLOT_DATA, CACHE_FILE, USE_CACHE, PLOT_PARAMS_DICT, SPREAD_PLOT_LOG_SCALE, ORDERBOOK_IMBALANCE_PLOT_LOG_SCALE, COMPUTE_IMPACT_STATS ONLY_EXECUTED = config_dict["ONLY_EXECUTED"] CLIPPED_START_TIME = config_dict["CLIPPED_START_TIME"] @@ -229,26 +287,35 @@ def main(config_path): COMPUTE_IMPACT_STATS = config_dict["COMPUTE_IMPACT_STATS"] if USE_CACHE and os.path.exists(CACHE_FILE): - with open(CACHE_FILE, 'rb') as f: + with open(CACHE_FILE, "rb") as f: print("Using cache...") out_data = pickle.load(f) else: - out_data = prep_data(PLOT_DATA, CACHE_FILE, ONLY_EXECUTED, CLIPPED_START_TIME, CLIPPED_FINISH_TIME, - PLOT_PARAMS_DICT, compute_impact_stats=COMPUTE_IMPACT_STATS) + out_data = prep_data( + PLOT_DATA, + CACHE_FILE, + ONLY_EXECUTED, + CLIPPED_START_TIME, + CLIPPED_FINISH_TIME, + PLOT_PARAMS_DICT, + compute_impact_stats=COMPUTE_IMPACT_STATS, + ) print("Constructing plots...") make_plots(out_data, PLOT_PARAMS_DICT) - print('Plotting complete!') + print("Plotting complete!") -if __name__ == '__main__': +if __name__ == "__main__": - parser = argparse.ArgumentParser(description='CLI utility for plotting results of single day POV experiments.') + parser = argparse.ArgumentParser(description="CLI utility for plotting results of single day POV experiments.") - parser.add_argument('plot_config', - help='Name of config file to execute. See plot_configs/single_day/pov_single_day_config.example.json for an example.', - type=str) + parser.add_argument( + "plot_config", + help="Name of config file to execute. See plot_configs/single_day/pov_single_day_config.example.json for an example.", + type=str, + ) args, remaining_args = parser.parse_known_args() config_path = args.plot_config @@ -256,9 +323,3 @@ def main(config_path): make_cache_and_visualisation_dir() main(config_path) - - - - - - diff --git a/realism/market_impact/abm_market_impact.py b/realism/market_impact/abm_market_impact.py index 54a324464..ba9ce91ba 100644 --- a/realism/market_impact/abm_market_impact.py +++ b/realism/market_impact/abm_market_impact.py @@ -1,7 +1,9 @@ import argparse -import pandas as pd -import numpy as np import sys + +import numpy as np +import pandas as pd + p = str(Path(__file__).resolve().parents[2]) # directory two levels up from this file sys.path.append(p) @@ -10,15 +12,17 @@ def create_orderbooks(exchange_path, ob_path): MID_PRICE_CUTOFF = 10000 - processed_orderbook = make_orderbook_for_analysis(exchange_path, ob_path, num_levels=1, - hide_liquidity_collapse=False) - cleaned_orderbook = processed_orderbook[(processed_orderbook['MID_PRICE'] > - MID_PRICE_CUTOFF) & - (processed_orderbook['MID_PRICE'] < MID_PRICE_CUTOFF)] + processed_orderbook = make_orderbook_for_analysis( + exchange_path, ob_path, num_levels=1, hide_liquidity_collapse=False + ) + cleaned_orderbook = processed_orderbook[ + (processed_orderbook["MID_PRICE"] > -MID_PRICE_CUTOFF) & (processed_orderbook["MID_PRICE"] < MID_PRICE_CUTOFF) + ] transacted_orders = cleaned_orderbook.loc[cleaned_orderbook.TYPE == "ORDER_EXECUTED"] transacted_orders = transacted_orders.reset_index() - transacted_orders = transacted_orders.sort_values(by=['index', 'ORDER_ID']).iloc[1::2] - transacted_orders.set_index('index', inplace=True) + transacted_orders = transacted_orders.sort_values(by=["index", "ORDER_ID"]).iloc[1::2] + transacted_orders.set_index("index", inplace=True) return processed_orderbook, transacted_orders, cleaned_orderbook @@ -37,45 +41,55 @@ def calculate_mid_move(row): mid_t_start = mid_resampled.loc[mid_resampled.index == t_start].item() mid_t_end = mid_resampled.loc[mid_resampled.index == t_end].item() if row.ti < 0: - row.mi = -1 * ((mid_t_end - mid_t_start) / mid_t_start) * 10000 # bps + row.mi = -1 * ((mid_t_end - mid_t_start) / mid_t_start) * 10000 # bps else: - row.mi = (mid_t_end - mid_t_start) / mid_t_start * 10000 # bps + row.mi = (mid_t_end - mid_t_start) / mid_t_start * 10000 # bps return row.mi except: pass - ob_df = ob_df.reset_index().drop_duplicates(subset='index', keep='last').set_index('index') + ob_df = ob_df.reset_index().drop_duplicates(subset="index", keep="last").set_index("index") mid = ob_df.MID_PRICE - mid_resampled = mid.resample(f'{tao}s').ffill() - - binned_buy_volume = create_bins(tao=int(tao), start_time=start_time, end_time=end_time, orders_df=orders_df, - is_buy=True).fillna(0) - binned_sell_volume = create_bins(tao=int(tao), start_time=start_time, end_time=end_time, orders_df=orders_df, - is_buy=False).fillna(0) + mid_resampled = mid.resample(f"{tao}s").ffill() + + binned_buy_volume = create_bins( + tao=int(tao), + start_time=start_time, + end_time=end_time, + orders_df=orders_df, + is_buy=True, + ).fillna(0) + binned_sell_volume = create_bins( + tao=int(tao), + start_time=start_time, + end_time=end_time, + orders_df=orders_df, + is_buy=False, + ).fillna(0) midf = pd.DataFrame() - midf['buy_vol'] = binned_buy_volume - midf['sell_vol'] = binned_sell_volume - midf['ti'] = midf['buy_vol'] - midf['sell_vol'] # Trade Imbalance - midf['pov'] = abs(midf['ti']) / (midf['buy_vol'] + midf['sell_vol']) # Participation of Volume in tao - midf['mi'] = None + midf["buy_vol"] = binned_buy_volume + midf["sell_vol"] = binned_sell_volume + midf["ti"] = midf["buy_vol"] - midf["sell_vol"] # Trade Imbalance + midf["pov"] = abs(midf["ti"]) / (midf["buy_vol"] + midf["sell_vol"]) # Participation of Volume in tao + midf["mi"] = None midf.index = pd.interval_range(start=start_time, end=end_time, freq=pd.DateOffset(seconds=int(tao))) midf.mi = midf.apply(calculate_mid_move, axis=1) pov_bins = np.linspace(start=0, stop=1, num=1000, endpoint=False) - pov_binned = pd.cut(x=midf['pov'], bins=pov_bins) + pov_binned = pd.cut(x=midf["pov"], bins=pov_bins) - midf['pov_bins'] = pov_binned + midf["pov_bins"] = pov_binned - midf_gpd = midf.sort_values(by='pov_bins') + midf_gpd = midf.sort_values(by="pov_bins") midf_gpd.index = midf_gpd.pov_bins - del midf_gpd['pov_bins'] + del midf_gpd["pov_bins"] df = pd.DataFrame(index=midf_gpd.index) - df['mi'] = midf_gpd['mi'] - df['pov'] = midf_gpd['pov'] + df["mi"] = midf_gpd["mi"] + df["pov"] = midf_gpd["pov"] df = df.groupby(df.index).mean() return df @@ -83,28 +97,29 @@ def calculate_mid_move(row): if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Market Impact Curve as described in AlmgrenChriss 05 paper') + parser = argparse.ArgumentParser(description="Market Impact Curve as described in AlmgrenChriss 05 paper") - parser.add_argument('--stock', default=None, required=True, help='stock (ABM)') - parser.add_argument('--date', default=None, required=True, help='date (20200101)') - parser.add_argument('--log', type=str, default=None, required=True, help='log folder') - parser.add_argument('--tao', type=int, required=True, help='Number of seconds in each bin') + parser.add_argument("--stock", default=None, required=True, help="stock (ABM)") + parser.add_argument("--date", default=None, required=True, help="date (20200101)") + parser.add_argument("--log", type=str, default=None, required=True, help="log folder") + parser.add_argument("--tao", type=int, required=True, help="Number of seconds in each bin") args, remaining_args = parser.parse_known_args() stock = args.stock date = args.date - start_time = pd.Timestamp(date) + pd.to_timedelta('09:30:00') - end_time = pd.Timestamp(date) + pd.to_timedelta('16:00:00') + start_time = pd.Timestamp(date) + pd.to_timedelta("09:30:00") + end_time = pd.Timestamp(date) + pd.to_timedelta("16:00:00") abides_log_folder = args.log - print('Processing market impact data for {}'.format(abides_log_folder)) + print("Processing market impact data for {}".format(abides_log_folder)) processed_orderbook, transacted_orders, cleaned_orderbook = create_orderbooks( - exchange_path=abides_log_folder + '/EXCHANGE_AGENT.bz2', - ob_path=abides_log_folder + '/ORDERBOOK_{}_FULL.bz2'.format(stock)) + exchange_path=abides_log_folder + "/EXCHANGE_AGENT.bz2", + ob_path=abides_log_folder + "/ORDERBOOK_{}_FULL.bz2".format(stock), + ) df = calculate_market_impact(transacted_orders, cleaned_orderbook, start_time, end_time, tao=args.tao) - df.to_pickle(abides_log_folder + f'/market_impact_df_tao_{args.tao}.bz2') + df.to_pickle(abides_log_folder + f"/market_impact_df_tao_{args.tao}.bz2") - print('Processed market impact data for {}'.format(abides_log_folder)) \ No newline at end of file + print("Processed market impact data for {}".format(abides_log_folder)) diff --git a/realism/market_impact/marketreplay_market_impact.py b/realism/market_impact/marketreplay_market_impact.py index 4f256268e..497f5460d 100644 --- a/realism/market_impact/marketreplay_market_impact.py +++ b/realism/market_impact/marketreplay_market_impact.py @@ -1,45 +1,60 @@ import argparse -import pandas as pd + import numpy as np +import pandas as pd num_levels = 50 -columns = [[f'ask_price_{level}', f'ask_size_{level}', f'bid_price_{level}', f'bid_size_{level}'] for level in range(1, num_levels+1)] +columns = [ + [ + f"ask_price_{level}", + f"ask_size_{level}", + f"bid_price_{level}", + f"bid_size_{level}", + ] + for level in range(1, num_levels + 1) +] columns = [x for b in columns for x in b] def process_data(abides_log_folder, stock, date): - csv_orderbooks_parent_folder = '/efs/data/get_real_data/lobsterized/orderbook/' + csv_orderbooks_parent_folder = "/efs/data/get_real_data/lobsterized/orderbook/" # Orderbook snapshots - ob_df = pd.read_csv(csv_orderbooks_parent_folder + f'orderbook_{stock}_{date}.csv') + ob_df = pd.read_csv(csv_orderbooks_parent_folder + f"orderbook_{stock}_{date}.csv") ob_df.columns = columns - ob_df.index = pd.read_pickle(abides_log_folder + f'ORDERBOOK_{stock}_FREQ_ALL_{date.replace("-", "")}.bz2').index[1:] + ob_df.index = pd.read_pickle(abides_log_folder + f'ORDERBOOK_{stock}_FREQ_ALL_{date.replace("-", "")}.bz2').index[ + 1: + ] - start_time = pd.Timestamp(date) + pd.to_timedelta('09:30:00') - end_time = pd.Timestamp(date) + pd.to_timedelta('16:00:00') + start_time = pd.Timestamp(date) + pd.to_timedelta("09:30:00") + end_time = pd.Timestamp(date) + pd.to_timedelta("16:00:00") ob_df = ob_df.loc[(ob_df.index >= start_time) & (ob_df.index <= end_time)] - # Transacted Orders - ea_df = pd.read_pickle(abides_log_folder + 'EXCHANGE_AGENT.bz2') - ea_df = ea_df.loc[ea_df.EventType == 'ORDER_EXECUTED'] + ea_df = pd.read_pickle(abides_log_folder + "EXCHANGE_AGENT.bz2") + ea_df = ea_df.loc[ea_df.EventType == "ORDER_EXECUTED"] - transacted_orders_df = pd.DataFrame(columns=['TIMESTAMP', 'ORDER_ID', 'PRICE', 'SIZE', 'BUY_SELL_FLAG']) + transacted_orders_df = pd.DataFrame(columns=["TIMESTAMP", "ORDER_ID", "PRICE", "SIZE", "BUY_SELL_FLAG"]) i = 0 for index, row in ea_df.iterrows(): - transacted_orders_df = transacted_orders_df.append(pd.Series(data={ - 'TIMESTAMP': index, - 'ORDER_ID': row.Event['order_id'], - 'PRICE': row.Event['fill_price'], - 'SIZE': row.Event['quantity'], - 'BUY_SELL_FLAG': row.Event['is_buy_order'] - }), ignore_index=True) + transacted_orders_df = transacted_orders_df.append( + pd.Series( + data={ + "TIMESTAMP": index, + "ORDER_ID": row.Event["order_id"], + "PRICE": row.Event["fill_price"], + "SIZE": row.Event["quantity"], + "BUY_SELL_FLAG": row.Event["is_buy_order"], + } + ), + ignore_index=True, + ) i += 1 - transacted_orders_df.set_index('TIMESTAMP', inplace=True) + transacted_orders_df.set_index("TIMESTAMP", inplace=True) - transacted_orders_df = transacted_orders_df.sort_values(by=['TIMESTAMP', 'ORDER_ID']).iloc[1::2] + transacted_orders_df = transacted_orders_df.sort_values(by=["TIMESTAMP", "ORDER_ID"]).iloc[1::2] return ob_df, transacted_orders_df, start_time, end_time @@ -59,43 +74,53 @@ def calculate_mid_move(row): mid_t_start = mid_resampled.loc[mid_resampled.index == t_start].item() mid_t_end = mid_resampled.loc[mid_resampled.index == t_end].item() if row.ti < 0: - row.mi = -1 * ((mid_t_end - mid_t_start) / mid_t_start) * 10000 # bps + row.mi = -1 * ((mid_t_end - mid_t_start) / mid_t_start) * 10000 # bps else: - row.mi = (mid_t_end - mid_t_start) / mid_t_start * 10000 # bps + row.mi = (mid_t_end - mid_t_start) / mid_t_start * 10000 # bps return row.mi except: pass mid = (ob_df.ask_price_1 + ob_df.bid_price_1) / 2 - mid_resampled = mid.resample(f'{tao}s').ffill() - - binned_buy_volume = create_bins(tao=int(tao), start_time=start_time, end_time=end_time, orders_df=orders_df, - is_buy=True).fillna(0) - binned_sell_volume = create_bins(tao=int(tao), start_time=start_time, end_time=end_time, orders_df=orders_df, - is_buy=False).fillna(0) + mid_resampled = mid.resample(f"{tao}s").ffill() + + binned_buy_volume = create_bins( + tao=int(tao), + start_time=start_time, + end_time=end_time, + orders_df=orders_df, + is_buy=True, + ).fillna(0) + binned_sell_volume = create_bins( + tao=int(tao), + start_time=start_time, + end_time=end_time, + orders_df=orders_df, + is_buy=False, + ).fillna(0) midf = pd.DataFrame() - midf['buy_vol'] = binned_buy_volume - midf['sell_vol'] = binned_sell_volume - midf['ti'] = midf['buy_vol'] - midf['sell_vol'] # Trade Imbalance - midf['pov'] = abs(midf['ti']) / (midf['buy_vol'] + midf['sell_vol']) # Participation of Volume in tao - midf['mi'] = None + midf["buy_vol"] = binned_buy_volume + midf["sell_vol"] = binned_sell_volume + midf["ti"] = midf["buy_vol"] - midf["sell_vol"] # Trade Imbalance + midf["pov"] = abs(midf["ti"]) / (midf["buy_vol"] + midf["sell_vol"]) # Participation of Volume in tao + midf["mi"] = None midf.index = pd.interval_range(start=start_time, end=end_time, freq=pd.DateOffset(seconds=int(tao))) midf.mi = midf.apply(calculate_mid_move, axis=1) pov_bins = np.linspace(start=0, stop=1, num=1000, endpoint=False) - pov_binned = pd.cut(x=midf['pov'], bins=pov_bins) + pov_binned = pd.cut(x=midf["pov"], bins=pov_bins) - midf['pov_bins'] = pov_binned + midf["pov_bins"] = pov_binned - midf_gpd = midf.sort_values(by='pov_bins') + midf_gpd = midf.sort_values(by="pov_bins") midf_gpd.index = midf_gpd.pov_bins - del midf_gpd['pov_bins'] + del midf_gpd["pov_bins"] df = pd.DataFrame(index=midf_gpd.index) - df['mi'] = midf_gpd['mi'] - df['pov'] = midf_gpd['pov'] + df["mi"] = midf_gpd["mi"] + df["pov"] = midf_gpd["pov"] df = df.groupby(df.index).mean() return df @@ -103,26 +128,24 @@ def calculate_mid_move(row): if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Market Impact Curve as described in AlmgrenChriss 05 paper') + parser = argparse.ArgumentParser(description="Market Impact Curve as described in AlmgrenChriss 05 paper") - parser.add_argument('--tao', required=True, help='Number of seconds in each bin') - parser.add_argument('--ticker', required=True, help='Name of the stock/symbol') - parser.add_argument('--date', required=True, help='Historical date') + parser.add_argument("--tao", required=True, help="Number of seconds in each bin") + parser.add_argument("--ticker", required=True, help="Name of the stock/symbol") + parser.add_argument("--date", required=True, help="Historical date") args, remaining_args = parser.parse_known_args() stock = args.ticker date = args.date - abides_logs_parent_folder = '/efs/data/get_real_data/marketreplay-logs/log/' + abides_logs_parent_folder = "/efs/data/get_real_data/marketreplay-logs/log/" abides_log_folder = abides_logs_parent_folder + f'marketreplay_{stock}_{date.replace("-", "")}/' - ob_df, orders_df, start_time, end_time = process_data(abides_log_folder=abides_log_folder, - stock=stock, - date=date) - print(f'Processed order book data for {stock} {date}, calculating market impact ...') + ob_df, orders_df, start_time, end_time = process_data(abides_log_folder=abides_log_folder, stock=stock, date=date) + print(f"Processed order book data for {stock} {date}, calculating market impact ...") df = calculate_market_impact(orders_df, ob_df, start_time, end_time, tao=args.tao) - df.to_pickle(abides_log_folder + f'market_impact_df_tao_{args.tao}.bz2') + df.to_pickle(abides_log_folder + f"market_impact_df_tao_{args.tao}.bz2") - print(f'Processed market impact data for {stock} {date}') \ No newline at end of file + print(f"Processed market impact data for {stock} {date}") diff --git a/realism/metrics/aggregation_normality.py b/realism/metrics/aggregation_normality.py index 7f2ca3a13..17591df66 100644 --- a/realism/metrics/aggregation_normality.py +++ b/realism/metrics/aggregation_normality.py @@ -1,6 +1,7 @@ from metrics.metric import Metric from metrics.minutely_returns import MinutelyReturns + class AggregationNormality(Metric): def __init__(self): @@ -11,4 +12,10 @@ def compute(self, df): return self.mr.compute(df) def visualize(self, simulated): - self.hist(simulated, "Aggregation Normality (10 minutes)", "Log Returns", log=True, clip=.05) + self.hist( + simulated, + "Aggregation Normality (10 minutes)", + "Log Returns", + log=True, + clip=0.05, + ) diff --git a/realism/metrics/autocorrelation.py b/realism/metrics/autocorrelation.py index 7164addac..524cbd213 100644 --- a/realism/metrics/autocorrelation.py +++ b/realism/metrics/autocorrelation.py @@ -1,7 +1,8 @@ import random + +import pandas as pd from metrics.metric import Metric from metrics.minutely_returns import MinutelyReturns -import pandas as pd class Autocorrelation(Metric): @@ -22,5 +23,9 @@ def visualize(self, simulated): for k, v in simulated.items(): random.shuffle(v) simulated[k] = v[:min_sim] - self.hist(simulated, title="Autocorrelation (lag={}, window={})".format(self.lag, self.window), xlabel="Correlation coefficient", log=False) - + self.hist( + simulated, + title="Autocorrelation (lag={}, window={})".format(self.lag, self.window), + xlabel="Correlation coefficient", + log=False, + ) diff --git a/realism/metrics/kurtosis.py b/realism/metrics/kurtosis.py index 863a3f0af..072e83dfc 100644 --- a/realism/metrics/kurtosis.py +++ b/realism/metrics/kurtosis.py @@ -11,11 +11,17 @@ def __init__(self, intervals=4): def compute(self, df): ks = [] - for i in range(1,self.intervals+1): + for i in range(1, self.intervals + 1): temp = df[["close"]].resample("{}T".format(i)).last() rets = self.mr.compute(temp) ks.append(kurtosis(rets)) return [ks] def visualize(self, simulated): - self.line(simulated, title="Kurtosis", xlabel="Time scale (min)", ylabel="Average kurtosis", logy=True) + self.line( + simulated, + title="Kurtosis", + xlabel="Time scale (min)", + ylabel="Average kurtosis", + logy=True, + ) diff --git a/realism/metrics/metric.py b/realism/metrics/metric.py index 1cf284d97..47b3a5202 100644 --- a/realism/metrics/metric.py +++ b/realism/metrics/metric.py @@ -1,5 +1,5 @@ -import numpy as np import matplotlib.pyplot as plt +import numpy as np class Metric: @@ -13,7 +13,15 @@ def visualize(self, simulated): raise NotImplementedError # Create an overlapping histogram of the provided data. - def hist(self, simulated, title="Simulation data histogram", xlabel="Values", log=False, bins=75, clip=None): + def hist( + self, + simulated, + title="Simulation data histogram", + xlabel="Values", + log=False, + bins=75, + clip=None, + ): for k, v in simulated.items(): simulated[k] = np.array(v).reshape((len(v), 1)) first_sim = simulated[list(simulated.keys())[0]] @@ -24,7 +32,16 @@ def hist(self, simulated, title="Simulation data histogram", xlabel="Values", lo # Show histograms for k, v in simulated.items(): - plt.hist(v, bins=bins, color=k[1], log=log, alpha=1, label=k[0], histtype="step", linewidth=3) + plt.hist( + v, + bins=bins, + color=k[1], + log=log, + alpha=1, + label=k[0], + histtype="step", + linewidth=3, + ) plt.title(title + (" (log scale)" if log else "") + ("" if clip is None else " (clipped @ ±{})".format(clip))) plt.xlabel(xlabel) @@ -36,18 +53,18 @@ def line(self, simulated, title="Simulation data", xlabel="X", ylabel="Y", logy= for k, v in simulated.items(): simulated[k] = np.array(v) first_sim = simulated[list(simulated.keys())[0]] - x = np.arange(first_sim.shape[1])+1 + x = np.arange(first_sim.shape[1]) + 1 for k, v in simulated.items(): err_simulated = np.nanstd(v, axis=0) v = np.nanmean(v, axis=0) plt.plot(x, v, color=k[1], linewidth=4, label=k[0]) - #plt.fill_between(x, v-err_simulated, v+err_simulated, alpha=.1, color=k[1]).set_linestyle('dashed') + # plt.fill_between(x, v-err_simulated, v+err_simulated, alpha=.1, color=k[1]).set_linestyle('dashed') plt.xticks(x) plt.title(title + (" (log scale)" if logy else "")) plt.xlabel(xlabel) plt.ylabel(ylabel + (" (log)" if logy else "")) - if (logy): + if logy: plt.yscale("log") plt.legend() diff --git a/realism/metrics/minutely_returns.py b/realism/metrics/minutely_returns.py index db44af2f8..394ad4ffa 100644 --- a/realism/metrics/minutely_returns.py +++ b/realism/metrics/minutely_returns.py @@ -1,6 +1,6 @@ -from metrics.metric import Metric import matplotlib.pyplot as plt import numpy as np +from metrics.metric import Metric class MinutelyReturns(Metric): @@ -12,4 +12,10 @@ def compute(self, df): return df.tolist() def visualize(self, simulated): - self.hist(simulated, title="Minutely Log Returns", xlabel="Log Returns", log=True, clip=.05) + self.hist( + simulated, + title="Minutely Log Returns", + xlabel="Log Returns", + log=True, + clip=0.05, + ) diff --git a/realism/metrics/returns_volatility_correlation.py b/realism/metrics/returns_volatility_correlation.py index cbc8d0370..c7c272a60 100644 --- a/realism/metrics/returns_volatility_correlation.py +++ b/realism/metrics/returns_volatility_correlation.py @@ -1,7 +1,7 @@ +import numpy as np from metrics.metric import Metric from metrics.minutely_returns import MinutelyReturns from scipy.stats import kurtosis -import numpy as np class ReturnsVolatilityCorrelation(Metric): @@ -12,7 +12,12 @@ def __init__(self, intervals=4): def compute(self, df): returns = np.array(self.mr.compute(df)) volatility = abs(returns) - return [np.corrcoef(returns, volatility)[0,1]] + return [np.corrcoef(returns, volatility)[0, 1]] def visualize(self, simulated): - self.hist(simulated, title="Returns/Volatility Correlation", xlabel="Correlation coefficient", bins=50) + self.hist( + simulated, + title="Returns/Volatility Correlation", + xlabel="Correlation coefficient", + bins=50, + ) diff --git a/realism/metrics/volatility_clustering.py b/realism/metrics/volatility_clustering.py index aae606793..35d5c294b 100644 --- a/realism/metrics/volatility_clustering.py +++ b/realism/metrics/volatility_clustering.py @@ -1,8 +1,9 @@ import random + +import numpy as np +import pandas as pd from metrics.metric import Metric from metrics.minutely_returns import MinutelyReturns -import pandas as pd -import numpy as np class VolatilityClustering(Metric): @@ -21,8 +22,13 @@ def compute(self, df): if self.mode == "abs": df = abs(df) elif self.mode == "square": - df = df ** 2 - return [[df.autocorr(lag) for lag in range(1, self.lags+1)]] + df = df**2 + return [[df.autocorr(lag) for lag in range(1, self.lags + 1)]] def visualize(self, simulated): - self.line(simulated, "Volatility Clustering/Long Range Dependence", "Lag", "Correlation coefficient") + self.line( + simulated, + "Volatility Clustering/Long Range Dependence", + "Lag", + "Correlation coefficient", + ) diff --git a/realism/metrics/volume_volatility_correlation.py b/realism/metrics/volume_volatility_correlation.py index 29b254a6b..552ed37c3 100644 --- a/realism/metrics/volume_volatility_correlation.py +++ b/realism/metrics/volume_volatility_correlation.py @@ -1,7 +1,7 @@ +import numpy as np from metrics.metric import Metric from metrics.minutely_returns import MinutelyReturns from scipy.stats import kurtosis -import numpy as np class VolumeVolatilityCorrelation(Metric): @@ -12,7 +12,11 @@ def __init__(self, intervals=4): def compute(self, df): volatility = abs(np.array(self.mr.compute(df))) volume = df["volume"].iloc[1:].values - return [np.corrcoef(volume, volatility)[0,1]] + return [np.corrcoef(volume, volatility)[0, 1]] def visualize(self, simulated): - self.hist(simulated, title="Volume/Volatility Correlation", xlabel="Correlation coefficient") + self.hist( + simulated, + title="Volume/Volatility Correlation", + xlabel="Correlation coefficient", + ) diff --git a/realism/order_flow_stylized_facts.py b/realism/order_flow_stylized_facts.py index a01af46dc..a35a553c4 100644 --- a/realism/order_flow_stylized_facts.py +++ b/realism/order_flow_stylized_facts.py @@ -1,23 +1,26 @@ import argparse import sys + sys.path.append("..") -from util.formatting.convert_order_stream import dir_path import glob +import itertools +import os +import pickle import re -import pandas as pd + import matplotlib.pyplot as plt -from realism_utils import get_plot_colors import numpy as np -from scipy import stats +import pandas as pd from matplotlib.dates import DateFormatter from pandas.plotting import register_matplotlib_converters -import itertools -import os -import pickle +from realism_utils import get_plot_colors +from scipy import stats + +from util.formatting.convert_order_stream import dir_path class Constants: - """ Stores constants for use in plotting code. """ + """Stores constants for use in plotting code.""" # Plot params -- Generic fig_height = 10 @@ -26,7 +29,7 @@ class Constants: legend_font_size = 20 axes_label_font_size = 20 title_font_size = 22 - scatter_marker_styles_sizes = [('x', 60), ('+', 60), ('o', 14), (',', 60)] + scatter_marker_styles_sizes = [("x", 60), ("+", 60), ("o", 14), (",", 60)] # Plot params -- Interarrival_times interarrival_times_xlabel = "Quote interarrival time /s" @@ -51,12 +54,12 @@ class Constants: def unpickle_stream_dfs_to_stream_list(dir_containing_pickles): - """ Extracts pickled dataframes over a number of dates to a dict containing dataframes and their dates. + """Extracts pickled dataframes over a number of dates to a dict containing dataframes and their dates. - :param dir_containing_pickles: path of directory containing pickled data frames, in format `orders_SYMB_YYYYMMDD.pkl` - :type dir_containing_pickles: str + :param dir_containing_pickles: path of directory containing pickled data frames, in format `orders_SYMB_YYYYMMDD.pkl` + :type dir_containing_pickles: str - :return bundled_streams: list of dicts, where each dict has symbol, date and orders_df + :return bundled_streams: list of dicts, where each dict has symbol, date and orders_df """ bundled_streams = [] @@ -65,28 +68,24 @@ def unpickle_stream_dfs_to_stream_list(dir_containing_pickles): stream_file_list = glob.glob(f"{dir_containing_pickles}/orders*.pkl") for stream_pkl in stream_file_list: print(f"Processing {stream_pkl}") - match = re.search(symbol_regex, stream_pkl) + match = re.search(symbol_regex, stream_pkl) symbol = match.group(1) date_YYYYMMDD = match.group(2) - orders_df = pd.read_pickle(stream_pkl) - bundled_streams.append({ - "symbol": symbol, - "date": date_YYYYMMDD, - "orders_df": orders_df - }) + orders_df = pd.read_pickle(stream_pkl) + bundled_streams.append({"symbol": symbol, "date": date_YYYYMMDD, "orders_df": orders_df}) return bundled_streams def set_up_plotting(): - """ Sets matplotlib variables for plotting. """ - plt.rc('xtick', labelsize=Constants.tick_label_size) - plt.rc('ytick', labelsize=Constants.tick_label_size) - plt.rc('legend', fontsize=Constants.legend_font_size) - plt.rc('axes', labelsize=Constants.axes_label_font_size) + """Sets matplotlib variables for plotting.""" + plt.rc("xtick", labelsize=Constants.tick_label_size) + plt.rc("ytick", labelsize=Constants.tick_label_size) + plt.rc("legend", fontsize=Constants.legend_font_size) + plt.rc("axes", labelsize=Constants.axes_label_font_size) def bundled_stream_interarrival_times(bundled_streams): - """ From bundled streams return dict with interarrival times collated by symbol. """ + """From bundled streams return dict with interarrival times collated by symbol.""" interarrivals_dict = dict() year_offset = 0 @@ -94,7 +93,7 @@ def bundled_stream_interarrival_times(bundled_streams): for idx, elem in enumerate(bundled_streams): print(f"Processing elem {idx + 1} of {len(bundled_streams)}") - year_offset_td = pd.Timedelta(int(365 * (year_offset * YEAR_OFFSET)), unit='day') + year_offset_td = pd.Timedelta(int(365 * (year_offset * YEAR_OFFSET)), unit="day") orders_df = elem["orders_df"] symbol = elem["symbol"] arrival_times = orders_df.index.to_series() @@ -116,12 +115,12 @@ def bundled_stream_interarrival_times(bundled_streams): return interarrivals_dict -def plot_interarrival_times(interarrivals_dict, output_dir, scale='log'): - """ Plots histogram of the interarrival times for symbols. """ +def plot_interarrival_times(interarrivals_dict, output_dir, scale="log"): + """Plots histogram of the interarrival times for symbols.""" fig, ax = plt.subplots(figsize=(Constants.fig_width, Constants.fig_height)) - if scale == 'log': + if scale == "log": ax.set(xscale="symlog", yscale="log") ax.set_ylabel(Constants.interarrival_times_ylabel) @@ -138,8 +137,16 @@ def plot_interarrival_times(interarrivals_dict, output_dir, scale='log'): interarrival_times_series = interarrivals_dict[symbol] x = interarrival_times_series.sort_values() x_s.append(x) - plt.hist(x, bins="sqrt", density=True, label=symbol, color=color, alpha=alpha, histtype="step", - linewidth=Constants.interarrival_linewidth) + plt.hist( + x, + bins="sqrt", + density=True, + label=symbol, + color=color, + alpha=alpha, + histtype="step", + linewidth=Constants.interarrival_linewidth, + ) ylim = ax.get_ylim() xlim = ax.get_xlim() @@ -156,25 +163,40 @@ def plot_interarrival_times(interarrivals_dict, output_dir, scale='log'): x_right = xx[xx > x.max()] xxx = np.concatenate((x_left, x_mid, x_right)) - plt.plot(xxx, stats.weibull_min.pdf(xxx, *weibull_params), linestyle="--", color=color, - label=f"{symbol} Weibull fit", linewidth=Constants.interarrival_linewidth) + plt.plot( + xxx, + stats.weibull_min.pdf(xxx, *weibull_params), + linestyle="--", + color=color, + label=f"{symbol} Weibull fit", + linewidth=Constants.interarrival_linewidth, + ) plt.legend(fontsize=Constants.legend_font_size) ax.set_ylim(ylim) - fig.savefig(f'{output_dir}/{Constants.interarrival_times_filename}.png', format='png', dpi=300, - transparent=False, bbox_inches='tight', pad_inches=0.03) + fig.savefig( + f"{output_dir}/{Constants.interarrival_times_filename}.png", + format="png", + dpi=300, + transparent=False, + bbox_inches="tight", + pad_inches=0.03, + ) def count_trades_within_bins(interarrival_times_series, binwidth=1): - """ Bins trades into specified-width bins and counts the number. + """Bins trades into specified-width bins and counts the number. - :param interarrival_times_series: pandas Series object corresponding to the interarrival times, indexed on timestamp - :param binwidth: width of time bin in seconds + :param interarrival_times_series: pandas Series object corresponding to the interarrival times, indexed on timestamp + :param binwidth: width of time bin in seconds """ - bins = pd.interval_range(start=interarrival_times_series.index[0].floor('min'), end=interarrival_times_series.index[-1].ceil('min'), - freq=pd.DateOffset(seconds=binwidth)) + bins = pd.interval_range( + start=interarrival_times_series.index[0].floor("min"), + end=interarrival_times_series.index[-1].ceil("min"), + freq=pd.DateOffset(seconds=binwidth), + ) binned = pd.cut(interarrival_times_series.index, bins=bins) counted = interarrival_times_series.groupby(binned).count() return counted @@ -189,21 +211,21 @@ def bundled_stream_binned_trade_counts(bundled_interarrivals_dict, binwidth): print(f"Processing series {idx + 1} of {len(series_list)} for symbol {symbol}") counted_trades = count_trades_within_bins(series, binwidth=binwidth) counted_trades_copy = counted_trades.copy(deep=True) - + if idx == 0: base_counted = counted_trades_copy hist_index = base_counted.index - #print(f'base_counted.index: {base_counted.index}') - #print(f'hist_index: {hist_index}') + # print(f'base_counted.index: {base_counted.index}') + # print(f'hist_index: {hist_index}') else: - #print(f'counted_trades_copy.index: {counted_trades_copy.index}') - #print(f'hist_index: {hist_index}') + # print(f'counted_trades_copy.index: {counted_trades_copy.index}') + # print(f'hist_index: {hist_index}') try: counted_trades_copy.index = hist_index base_counted = base_counted.add(counted_trades_copy) - except ValueError: # length mismatch of hist bins (currently ignore) + except ValueError: # length mismatch of hist bins (currently ignore) continue trades_within_bins_dict[symbol] = base_counted @@ -212,7 +234,7 @@ def bundled_stream_binned_trade_counts(bundled_interarrivals_dict, binwidth): def plot_binned_trade_counts(trades_within_bins_dict, binwidth, output_dir): - """ Plot binned counts of trade volume. """ + """Plot binned counts of trade volume.""" fig, ax = plt.subplots(figsize=(Constants.fig_width, Constants.fig_height)) @@ -232,8 +254,16 @@ def plot_binned_trade_counts(trades_within_bins_dict, binwidth, output_dir): binned_trades_counts = binned_trades_counts / binned_trades_counts.sum() x = binned_trades_counts.sort_values() x_s.append(x) - plt.hist(x, bins="sqrt", density=True, label=symbol, color=color, alpha=alpha, histtype="step", - linewidth=Constants.binned_count_linewidth) + plt.hist( + x, + bins="sqrt", + density=True, + label=symbol, + color=color, + alpha=alpha, + histtype="step", + linewidth=Constants.binned_count_linewidth, + ) xlim = ax.get_xlim() ylim = ax.get_ylim() @@ -242,39 +272,55 @@ def plot_binned_trade_counts(trades_within_bins_dict, binwidth, output_dir): # Plot fitted curves for x, symbol, color in zip(x_s, symbols, colors): gamma_params = stats.gamma.fit(x.values[x.values > 0], floc=0) - plt.plot(xx, stats.gamma.pdf(xx, *gamma_params), linestyle="--", color=color, label=f"{symbol} gamma fit", - linewidth=Constants.binned_count_linewidth) + plt.plot( + xx, + stats.gamma.pdf(xx, *gamma_params), + linestyle="--", + color=color, + label=f"{symbol} gamma fit", + linewidth=Constants.binned_count_linewidth, + ) ax.set_ylim(ylim) - plt.title(f"Order volume within time window $\\tau = ${binwidth} seconds, normalized", - size=Constants.title_font_size) + plt.title( + f"Order volume within time window $\\tau = ${binwidth} seconds, normalized", + size=Constants.title_font_size, + ) plt.legend(fontsize=Constants.legend_font_size) - fig.savefig(f'{output_dir}/{Constants.binned_trade_counts_filename}_tau_{binwidth}.png', - format='png', dpi=300, transparent=False, bbox_inches='tight', pad_inches=0.03) + fig.savefig( + f"{output_dir}/{Constants.binned_trade_counts_filename}_tau_{binwidth}.png", + format="png", + dpi=300, + transparent=False, + bbox_inches="tight", + pad_inches=0.03, + ) def get_scatter_plot_params_dict(symbols): - """ Creates dictionary of parameters used by intraday seasonality plots. """ + """Creates dictionary of parameters used by intraday seasonality plots.""" colors = get_plot_colors(symbols) scatter_styles_sizes = itertools.cycle(Constants.scatter_marker_styles_sizes) scatter_plot_params_dict = dict() for symbol, color, style_and_size in zip(symbols, colors, scatter_styles_sizes): - scatter_plot_params_dict.update({ - symbol : { - 'color': color, - 'marker': style_and_size[0], - 'marker_size': style_and_size[1] + scatter_plot_params_dict.update( + { + symbol: { + "color": color, + "marker": style_and_size[0], + "marker_size": style_and_size[1], + } } - }) + ) return scatter_plot_params_dict def plot_intraday_seasonality(trades_within_bins_dict, binsize, output_dir): - """ Plots intraday order volume over a day for multiple symbols.""" + """Plots intraday order volume over a day for multiple symbols.""" fig, ax = plt.subplots(figsize=(Constants.fig_width, Constants.fig_height)) ax.set_ylabel("Normalized activity") @@ -312,35 +358,75 @@ def plot_intraday_seasonality(trades_within_bins_dict, binsize, output_dir): x.append(elem.right.time()) plt.scatter(x, y, marker=marker, color=color, label=symbol, s=marker_size) - plt.plot(x, intraday_quadratic_fitted_y, color=color, label=f"{symbol} quadratic fit", - linewidth=Constants.intraday_volume_linewidth) + plt.plot( + x, + intraday_quadratic_fitted_y, + color=color, + label=f"{symbol} quadratic fit", + linewidth=Constants.intraday_volume_linewidth, + ) plt.legend(fontsize=Constants.legend_font_size) - plt.title(f"Number of limit orders submitted in $\Delta t = {binsize}$ seconds, normalized by mean volume.", size=Constants.title_font_size, pad=20) + plt.title( + f"Number of limit orders submitted in $\Delta t = {binsize}$ seconds, normalized by mean volume.", + size=Constants.title_font_size, + pad=20, + ) ax.set_xticklabels(["", "09:45", "11:00", "12:30", "14:00", "15:15"]) ax.set_ylim(-0.1, 3) - fig.savefig(f'{output_dir}/{Constants.intraday_volume_filename}.png', format='png', dpi=300, - transparent=False, bbox_inches='tight', pad_inches=0.03) + fig.savefig( + f"{output_dir}/{Constants.intraday_volume_filename}.png", + format="png", + dpi=300, + transparent=False, + bbox_inches="tight", + pad_inches=0.03, + ) if __name__ == "__main__": # Create cache and visualizations folders if they do not exist - try: os.mkdir("cache") - except: pass - try: os.mkdir("visualizations") - except: pass - - parser = argparse.ArgumentParser(description='Process order stream files and produce plots of relevant metrics.') - parser.add_argument('targetdir', type=dir_path, help='Path of directory containing order stream files. Note that they must have been preprocessed' - ' by formatting scripts into format orders_{symbol}_{date_str}.pkl') - parser.add_argument('-o', '--output-dir', default='visualizations', help='Path to output directory', type=dir_path) - parser.add_argument('-f','--facts-to-plot', choices=['all'], type=str, default='all', - help="Decide which stylized facts should be plotted.") - - parser.add_argument('-z', '--recompute', action="store_true", help="Rerun computations without caching.") + try: + os.mkdir("cache") + except: + pass + try: + os.mkdir("visualizations") + except: + pass + + parser = argparse.ArgumentParser(description="Process order stream files and produce plots of relevant metrics.") + parser.add_argument( + "targetdir", + type=dir_path, + help="Path of directory containing order stream files. Note that they must have been preprocessed" + " by formatting scripts into format orders_{symbol}_{date_str}.pkl", + ) + parser.add_argument( + "-o", + "--output-dir", + default="visualizations", + help="Path to output directory", + type=dir_path, + ) + parser.add_argument( + "-f", + "--facts-to-plot", + choices=["all"], + type=str, + default="all", + help="Decide which stylized facts should be plotted.", + ) + + parser.add_argument( + "-z", + "--recompute", + action="store_true", + help="Rerun computations without caching.", + ) args, remaining_args = parser.parse_known_args() print("### Order stream stylized facts plots ###") @@ -387,4 +473,3 @@ def plot_intraday_seasonality(trades_within_bins_dict, binsize, output_dir): ## intraday seasonality print("Plotting intraday seasonality...") plot_intraday_seasonality(binned_30_days_5_minute, 300, args.output_dir) - diff --git a/realism/realism_utils.py b/realism/realism_utils.py index cc99d96d3..f8050ee43 100644 --- a/realism/realism_utils.py +++ b/realism/realism_utils.py @@ -1,78 +1,83 @@ import sys -import pandas as pd -import numpy as np from pathlib import Path + +import numpy as np +import pandas as pd + p = str(Path(__file__).resolve().parents[1]) # directory one level up from this file sys.path.append(p) -from util.formatting.convert_order_book import process_orderbook, is_wide_book -from util.formatting.convert_order_stream import convert_stream_to_format import itertools -from bisect import bisect -from matplotlib.cm import get_cmap import os import warnings -from util.util import get_value_from_timestamp +from bisect import bisect +from matplotlib.cm import get_cmap + +from util.formatting.convert_order_book import is_wide_book, process_orderbook +from util.formatting.convert_order_stream import convert_stream_to_format +from util.util import get_value_from_timestamp MID_PRICE_CUTOFF = 10000 # Price above which mid price is set as `NaN` and subsequently forgotten. WARNING: This - # effectively hides dropout of liquidity on ask side. Set to sys.max_size to reset. +# effectively hides dropout of liquidity on ask side. Set to sys.max_size to reset. LIQUIDITY_DROPOUT_WARNING_MSG = "No liquidity on one side of the order book during this experimental trace." def get_trades(sim_file): - - # Code taken from `read_simulated_trades` - try: - df = pd.read_pickle(sim_file, compression='bz2') - except (OSError, EOFError): - return None - - df = df[df['EventType'] == 'LAST_TRADE'] - if len(df) <= 0: - print("There appear to be no simulated trades.") - sys.exit() - df['PRICE'] = [y for x,y in df['Event'].str.split(',')] - df['SIZE'] = [x for x,y in df['Event'].str.split(',')] - df['PRICE'] = df['PRICE'].str.replace('$','').astype('float64') - df['SIZE'] = df['SIZE'].astype('float64') - - # New code for minutely resampling and renaming columns. - df = df[["PRICE","SIZE"]].resample("1T") - df_open = df["PRICE"].first().ffill() - df_close = df["PRICE"].last().ffill() - df_high = df["PRICE"].max().ffill() - df_low = df["PRICE"].min().ffill() - df_vol = df["SIZE"].sum() - ohlcv = pd.DataFrame({ - "open": df_open, - "high": df_high, - "low": df_low, - "close": df_close, - "volume": df_vol - }) - ohlcv = ohlcv.iloc[:390,:] - return ohlcv + + # Code taken from `read_simulated_trades` + try: + df = pd.read_pickle(sim_file, compression="bz2") + except (OSError, EOFError): + return None + + df = df[df["EventType"] == "LAST_TRADE"] + if len(df) <= 0: + print("There appear to be no simulated trades.") + sys.exit() + df["PRICE"] = [y for x, y in df["Event"].str.split(",")] + df["SIZE"] = [x for x, y in df["Event"].str.split(",")] + df["PRICE"] = df["PRICE"].str.replace("$", "").astype("float64") + df["SIZE"] = df["SIZE"].astype("float64") + + # New code for minutely resampling and renaming columns. + df = df[["PRICE", "SIZE"]].resample("1T") + df_open = df["PRICE"].first().ffill() + df_close = df["PRICE"].last().ffill() + df_high = df["PRICE"].max().ffill() + df_low = df["PRICE"].min().ffill() + df_vol = df["SIZE"].sum() + ohlcv = pd.DataFrame( + { + "open": df_open, + "high": df_high, + "low": df_low, + "close": df_close, + "volume": df_vol, + } + ) + ohlcv = ohlcv.iloc[:390, :] + return ohlcv def clip_times(df, start, end): - """ Keep only rows within certain time bounds of dataframe. + """Keep only rows within certain time bounds of dataframe. - :param df: DataFrame with DatetimeIndex - :param start: lower bound - :param end: upper bound + :param df: DataFrame with DatetimeIndex + :param start: lower bound + :param end: upper bound - :type df: pd.DataFrame - :type start: pd.Timestamp - :type end: pd.Timestamp + :type df: pd.DataFrame + :type start: pd.Timestamp + :type end: pd.Timestamp """ return df.loc[(df.index > start) & (df.index < end)] def mid_price_cutoff(df): - """ Removes outliers for mid-price induced by no liquidity. """ + """Removes outliers for mid-price induced by no liquidity.""" - out = df.loc[(df["MID_PRICE"] < MID_PRICE_CUTOFF) & (df["MID_PRICE"] > - MID_PRICE_CUTOFF)] + out = df.loc[(df["MID_PRICE"] < MID_PRICE_CUTOFF) & (df["MID_PRICE"] > -MID_PRICE_CUTOFF)] if len(df.index) > len(out.index): warnings.warn(LIQUIDITY_DROPOUT_WARNING_MSG, UserWarning, stacklevel=1) @@ -81,46 +86,53 @@ def mid_price_cutoff(df): def augment_with_VWAP(merged): - """ Method augments orderbook with volume weighted average price. - """ + """Method augments orderbook with volume weighted average price.""" merged = merged.reset_index() executed_df = merged.loc[merged["TYPE"] == "ORDER_EXECUTED"] executed_df = executed_df.dropna() - vwap = (executed_df['PRICE'].multiply(executed_df['SIZE'])).cumsum() / executed_df['SIZE'].cumsum() - vwap = vwap.to_frame(name='VWAP') - merged = pd.merge(merged.reset_index(), vwap, how='left', left_index=True, right_index=True) - merged['VWAP'] = merged['VWAP'].fillna(method='ffill') - merged['VWAP'] = merged['VWAP'] - merged = merged.set_index('index') - merged = merged.drop(columns=['level_0']) - del merged.index.name + vwap = (executed_df["PRICE"].multiply(executed_df["SIZE"])).cumsum() / executed_df["SIZE"].cumsum() + vwap = vwap.to_frame(name="VWAP") + merged = pd.merge(merged.reset_index(), vwap, how="left", left_index=True, right_index=True) + merged["VWAP"] = merged["VWAP"].ffill() + merged["VWAP"] = merged["VWAP"] + merged = merged.set_index("index") + merged = merged.drop(columns=["level_0"]) + merged.index.name = None + + # del merged.index.name return merged -def make_orderbook_for_analysis(stream_path, orderbook_path, num_levels=5, ignore_cancellations=True, hide_liquidity_collapse=True): - """ Make orderbook amenable to mid-price + liquidity plots from ABIDES input. +def make_orderbook_for_analysis( + stream_path, + orderbook_path, + num_levels=5, + ignore_cancellations=True, + hide_liquidity_collapse=True, +): + """Make orderbook amenable to mid-price + liquidity plots from ABIDES input. - :param stream_path: path to ABIDES Exchange output, e.g. ExchangeAgent0.bz2. Note ABIDES must have been run with --log-orders=True - :param orderbook_path: path to ABIDES order book output, e.g. ORDERBOOK_TICKER_FULL.bz2. Note ABIDES must have been run with --book-freq not set to None - :param num_levels: number of levels of orderbook to keep in DataFrame. - :param ignore_cancellations: flag to only include executed trades - :param hide_liquidity_collapse: flag to remove times in order book with no liquidity on one side of book + :param stream_path: path to ABIDES Exchange output, e.g. ExchangeAgent0.bz2. Note ABIDES must have been run with --log-orders=True + :param orderbook_path: path to ABIDES order book output, e.g. ORDERBOOK_TICKER_FULL.bz2. Note ABIDES must have been run with --book-freq not set to None + :param num_levels: number of levels of orderbook to keep in DataFrame. + :param ignore_cancellations: flag to only include executed trades + :param hide_liquidity_collapse: flag to remove times in order book with no liquidity on one side of book - :type stream_path: str - :type orderbook_path: str - :type num_levels: int - :type ignore_cancellations: bool - :type hide_liquidity_collapse: bool + :type stream_path: str + :type orderbook_path: str + :type num_levels: int + :type ignore_cancellations: bool + :type hide_liquidity_collapse: bool """ stream_df = pd.read_pickle(stream_path) orderbook_df = pd.read_pickle(orderbook_path) - stream_processed = convert_stream_to_format(stream_df.reset_index(), fmt='plot-scripts') - stream_processed = stream_processed.set_index('TIMESTAMP') + stream_processed = convert_stream_to_format(stream_df.reset_index(), fmt="plot-scripts") + stream_processed = stream_processed.set_index("TIMESTAMP") ob_processed = process_orderbook(orderbook_df, num_levels) @@ -129,13 +141,23 @@ def make_orderbook_for_analysis(stream_path, orderbook_path, num_levels=5, ignor else: # orderbook in wide format ob_processed.index = orderbook_df.index - columns = list(itertools.chain( - *[[f'ask_price_{level}', f'ask_size_{level}', f'bid_price_{level}', f'bid_size_{level}'] for level in - range(1, num_levels + 1)])) - merged = pd.merge(stream_processed, ob_processed, left_index=True, right_index=True, how='left') - merge_cols = ['ORDER_ID', 'PRICE', 'SIZE', 'BUY_SELL_FLAG', 'TYPE'] + columns + columns = list( + itertools.chain( + *[ + [ + f"ask_price_{level}", + f"ask_size_{level}", + f"bid_price_{level}", + f"bid_size_{level}", + ] + for level in range(1, num_levels + 1) + ] + ) + ) + merged = pd.merge(stream_processed, ob_processed, left_index=True, right_index=True, how="left") + merge_cols = ["ORDER_ID", "PRICE", "SIZE", "BUY_SELL_FLAG", "TYPE"] + columns merged = merged[merge_cols] - merged['PRICE'] = merged['PRICE'] / 100 + merged["PRICE"] = merged["PRICE"] / 100 # clean # merged = merged.dropna() @@ -145,9 +167,9 @@ def make_orderbook_for_analysis(stream_path, orderbook_path, num_levels=5, ignor if ignore_cancellations: merged = merged[merged.SIZE != 0] - merged['MID_PRICE'] = (merged['ask_price_1'] + merged['bid_price_1']) / (2 * 100) - merged['SPREAD'] = (merged['ask_price_1'] - merged['bid_price_1']) / 100 - merged['ORDER_VOLUME_IMBALANCE'] = merged['ask_size_1'] / (merged['bid_size_1'] + merged['ask_size_1']) + merged["MID_PRICE"] = (merged["ask_price_1"] + merged["bid_price_1"]) / (2 * 100) + merged["SPREAD"] = (merged["ask_price_1"] - merged["bid_price_1"]) / 100 + merged["ORDER_VOLUME_IMBALANCE"] = merged["ask_size_1"] / (merged["bid_size_1"] + merged["ask_size_1"]) if hide_liquidity_collapse: merged = mid_price_cutoff(merged) @@ -159,16 +181,16 @@ def make_orderbook_for_analysis(stream_path, orderbook_path, num_levels=5, ignor return merged -def get_daily_spread(orderbook_df, start_time='09:30:00', end_time='16:00:00'): - """ Get mean spread for the day's trading. +def get_daily_spread(orderbook_df, start_time="09:30:00", end_time="16:00:00"): + """Get mean spread for the day's trading. - :param orderbook_df: preprocessed orderbook (see __name__.make_orderbook_for_analysis) for data without execution agent. - :param start_time: time to "start" trading day -- in format HH:MM:SS - :param end_time: time to "finish" trading day -- in format HH:MM:SS + :param orderbook_df: preprocessed orderbook (see __name__.make_orderbook_for_analysis) for data without execution agent. + :param start_time: time to "start" trading day -- in format HH:MM:SS + :param end_time: time to "finish" trading day -- in format HH:MM:SS - :type orderbook_df: pd.DataFrame - :type start_time: str - :type end_time: str + :type orderbook_df: pd.DataFrame + :type start_time: str + :type end_time: str """ historical_date = pd.Timestamp(orderbook_df.index[0].date()) start = historical_date + pd.to_timedelta(start_time) @@ -179,16 +201,21 @@ def get_daily_spread(orderbook_df, start_time='09:30:00', end_time='16:00:00'): def find_nearest_ts_idx(df, np_dt64): - """ https://stackoverflow.com/a/42266882 """ + """https://stackoverflow.com/a/42266882""" timestamps = np.array(df.index) upper_index = bisect(timestamps, np_dt64, hi=len(timestamps) - 1) # find the upper index of the closest time stamp - df_index = df.index.get_loc(min(timestamps[upper_index], timestamps[upper_index - 1], key=lambda x: abs( - x - np_dt64))) # find the closest between upper and lower timestamp + df_index = df.index.get_loc( + min( + timestamps[upper_index], + timestamps[upper_index - 1], + key=lambda x: abs(x - np_dt64), + ) + ) # find the closest between upper and lower timestamp return df_index def first_elem(s): - """ Extracts first element of pandas Series s, or returns s if not a series. """ + """Extracts first element of pandas Series s, or returns s if not a series.""" try: return s.iloc[0] except AttributeError: @@ -201,65 +228,79 @@ def get_relevant_prices(orderbook_df, orderbook_with_execution_df, start_ts, end end_idx_orig = find_nearest_ts_idx(orderbook_df, end_ts.to_datetime64()) end_idx_execution = find_nearest_ts_idx(orderbook_with_execution_df, end_ts.to_datetime64()) - start_mid_price_orig = first_elem(orderbook_df['MID_PRICE'].iloc[start_idx_orig]) - end_mid_price_orig = first_elem(orderbook_df['MID_PRICE'].iloc[end_idx_orig]) - end_mid_price_execution = first_elem(orderbook_with_execution_df['MID_PRICE'].iloc[end_idx_execution]) + start_mid_price_orig = first_elem(orderbook_df["MID_PRICE"].iloc[start_idx_orig]) + end_mid_price_orig = first_elem(orderbook_df["MID_PRICE"].iloc[end_idx_orig]) + end_mid_price_execution = first_elem(orderbook_with_execution_df["MID_PRICE"].iloc[end_idx_execution]) return start_mid_price_orig, end_mid_price_orig, end_mid_price_execution def get_execution_agent_vwap(experiment_name, agent_name, date, seed, pov, log_dir): - """ Function computes the VWAP for an execution agent's orders, when ran from the `execution_iabs_plots` config. - - :param experiment_name: name for experiment - :param agent_name: name of agent, e.g. POV_EXECUTION_AGENT - :param date: date of experiment in format YYYY-MM-DD - :param seed: seed used to run experiment - :param pov: Participation of volume for agent - :param log_dir: location of directory with all ABIDES logs - - :type experiment_name: str - :type agent_name: str - :type date: str - :type seed: int - :type pov: float - :type log_dir: str + """Function computes the VWAP for an execution agent's orders, when ran from the `execution_iabs_plots` config. + + :param experiment_name: name for experiment + :param agent_name: name of agent, e.g. POV_EXECUTION_AGENT + :param date: date of experiment in format YYYY-MM-DD + :param seed: seed used to run experiment + :param pov: Participation of volume for agent + :param log_dir: location of directory with all ABIDES logs + + :type experiment_name: str + :type agent_name: str + :type date: str + :type seed: int + :type pov: float + :type log_dir: str """ - file_path = f'{log_dir}/{experiment_name}_yes_{seed}_{pov}_{date}/{agent_name}.bz2' + file_path = f"{log_dir}/{experiment_name}_yes_{seed}_{pov}_{date}/{agent_name}.bz2" exec_df = pd.read_pickle(file_path) - executed_orders = exec_df.loc[exec_df['EventType'] == 'ORDER_EXECUTED'] - executed_orders['PRICE'] = executed_orders['Event'].apply(lambda x: x['fill_price']) - executed_orders['SIZE'] = executed_orders['Event'].apply(lambda x: x['quantity']) + executed_orders = exec_df.loc[exec_df["EventType"] == "ORDER_EXECUTED"] + executed_orders["PRICE"] = executed_orders["Event"].apply(lambda x: x["fill_price"]) + executed_orders["SIZE"] = executed_orders["Event"].apply(lambda x: x["quantity"]) - executed_orders["VWAP"] = (executed_orders['PRICE'].multiply(executed_orders['SIZE'])).cumsum() / executed_orders[ - 'SIZE'].cumsum() + executed_orders["VWAP"] = (executed_orders["PRICE"].multiply(executed_orders["SIZE"])).cumsum() / executed_orders[ + "SIZE" + ].cumsum() executed_orders["VWAP"] = executed_orders["VWAP"] / 100 final_vwap = executed_orders.iloc[-1].VWAP return final_vwap -def compute_impact_statistics(orderbook_df, orderbook_with_execution_df, start_time, end_time, date_str, pov, seed, experiment_name, - spread_lookback='1min', execution_agent_name='POV_EXECUTION_AGENT', log_dir='../log'): - """ Computes dictionary of run statistics for comparison. - - :param orderbook_df: preprocessed orderbook (see __name__.make_orderbook_for_analysis) for data without execution agent. - :param orderbook_with_execution_df: preprocessed orderbook (see __name__.make_orderbook_for_analysis) for data with execution agent. - - :type orderbook_df: pd.DataFrame - :type orderbook_with_execution_df: pd.DataFrame +def compute_impact_statistics( + orderbook_df, + orderbook_with_execution_df, + start_time, + end_time, + date_str, + pov, + seed, + experiment_name, + spread_lookback="1min", + execution_agent_name="POV_EXECUTION_AGENT", + log_dir="../log", +): + """Computes dictionary of run statistics for comparison. + + :param orderbook_df: preprocessed orderbook (see __name__.make_orderbook_for_analysis) for data without execution agent. + :param orderbook_with_execution_df: preprocessed orderbook (see __name__.make_orderbook_for_analysis) for data with execution agent. + + :type orderbook_df: pd.DataFrame + :type orderbook_with_execution_df: pd.DataFrame """ start_ts = pd.Timestamp(start_time) end_ts = pd.Timestamp(end_time) - start_mid_price_orig, end_mid_price_orig, end_mid_price_execution = get_relevant_prices(orderbook_df, orderbook_with_execution_df, start_ts, end_ts) + start_mid_price_orig, end_mid_price_orig, end_mid_price_execution = get_relevant_prices( + orderbook_df, orderbook_with_execution_df, start_ts, end_ts + ) end_mid_price_execution_bps = 10000 * end_mid_price_execution / end_mid_price_orig - end_shade_str = end_ts.strftime('%H:%M:%S') - end_shade_str_lookback = (end_ts - pd.to_timedelta(spread_lookback)).strftime('%H:%M:%S') + end_shade_str = end_ts.strftime("%H:%M:%S") + end_shade_str_lookback = (end_ts - pd.to_timedelta(spread_lookback)).strftime("%H:%M:%S") mean_daily_spread = get_daily_spread(orderbook_df, end_shade_str_lookback, end_shade_str) @@ -276,18 +317,18 @@ def compute_impact_statistics(orderbook_df, orderbook_with_execution_df, start_t "end_mid_price_orig ($)": end_mid_price_orig, "end_mid_price_execution ($)": end_mid_price_execution, "end_mid_price_execution (bps)": end_mid_price_execution_bps, - 'mid_price_difference ($)': (end_mid_price_execution - end_mid_price_orig), + "mid_price_difference ($)": (end_mid_price_execution - end_mid_price_orig), "mid_price_impact_bps": mid_price_shift_bps, "daily_VWAP_price ($)": daily_VWAP_price, "daily_VWAP_price (bps)": 10000 * daily_VWAP_price / end_mid_price_orig, "mean_daily_spread ($)": mean_daily_spread, "mean_daily_spread (bps)": 10000 * mean_daily_spread / end_mid_price_orig, - "VWAP + half spread ($)": vwap_plus_half_spread_dollars, + "VWAP + half spread ($)": vwap_plus_half_spread_dollars, "VWAP + half spread (bps)": vwap_plus_half_spread_bps, "execution_impact_from_VWAP_plus_half_spread ($)": end_mid_price_execution - vwap_plus_half_spread_dollars, "execution_impact_from_VWAP_plus_half_spread (bps)": end_mid_price_execution_bps - vwap_plus_half_spread_bps, "execution_agent_vwap ($)": execution_agent_vwap, - "execution_agent_vwap (bps)": 10000 * execution_agent_vwap / end_mid_price_orig + "execution_agent_vwap (bps)": 10000 * execution_agent_vwap / end_mid_price_orig, } return stats_dict @@ -297,37 +338,37 @@ def get_plot_colors(symbols, start_idx=0): name = "Set1" cmap = get_cmap(name) # type: matplotlib.colors.ListedColormap colors = cmap.colors # type: list - return colors[start_idx:(len(symbols) + start_idx)] + return colors[start_idx : (len(symbols) + start_idx)] def get_plot_linestyles(n): - """ https://matplotlib.org/3.1.1/gallery/lines_bars_and_markers/linestyles.html """ + """https://matplotlib.org/3.1.1/gallery/lines_bars_and_markers/linestyles.html""" linestyle_tuple = [ - ('solid', (0, ())), - ('dotted', (0, (1, 1))), # Same as (0, (1, 1)) or '.' - ('densely dotted', (0, (1, 1))), - ('dashed', (0, (5, 5))), - ('less densely dashed', (0, (3, 1))), - ('dashdotted', (0, (3, 5, 1, 5))), - ('densely dashdotted', (0, (3, 1, 1, 1))), - ('dashdotdotted', (0, (3, 5, 1, 5, 1, 5))), - ('densely dashdotdotted', (0, (3, 1, 1, 1, 1, 1))) - ] - out_list = [y for (x,y) in linestyle_tuple] + ("solid", (0, ())), + ("dotted", (0, (1, 1))), # Same as (0, (1, 1)) or '.' + ("densely dotted", (0, (1, 1))), + ("dashed", (0, (5, 5))), + ("less densely dashed", (0, (3, 1))), + ("dashdotted", (0, (3, 5, 1, 5))), + ("densely dashdotted", (0, (3, 1, 1, 1))), + ("dashdotdotted", (0, (3, 5, 1, 5, 1, 5))), + ("densely dashdotdotted", (0, (3, 1, 1, 1, 1, 1))), + ] + out_list = [y for (x, y) in linestyle_tuple] return out_list[:n] def forward_fill_series(s1, s2): - """ For two pandas series with DateTimeIndex , return corresponding series with the same numer of entries, forward-filled. + """For two pandas series with DateTimeIndex , return corresponding series with the same numer of entries, forward-filled. - :type s1: pd.Series - :type s2: pd.Series + :type s1: pd.Series + :type s2: pd.Series """ def dedup_index(s): - """ Deduplicate index values of pd.Series""" + """Deduplicate index values of pd.Series""" df = s.to_frame() - df = df.loc[~df.index.duplicated(keep='last')] + df = df.loc[~df.index.duplicated(keep="last")] s_out = df[df.columns[0]] return s_out diff --git a/requirements.txt b/requirements.txt index f9f01f9e9..928ff571e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,16 +1,29 @@ -cycler==0.10.0 -joblib==0.13.2 -jsons==0.8.8 -kiwisolver==1.1.0 -matplotlib==3.0.3 -numpy==1.16.3 -pandas==0.25.1 -pprofile==2.0.2 -pyparsing==2.4.0 -python-dateutil==2.8.0 -pytz==2019.1 -scipy==1.2.1 -seaborn==0.9.0 -six==1.12.0 -tqdm==4.36.1 -psutil \ No newline at end of file +# === Core Scientific Stack === +numpy # Core numerical operations +scipy # Scientific computing +pandas>=2.0.0 # Data analysis and manipulation + +# === Plotting and Visualization === +matplotlib # Plotting library +seaborn # Statistical data visualization +cycler # Cycles through properties (used by matplotlib) +kiwisolver # Used by matplotlib for layout +pyparsing # Parsing expressions (used by matplotlib) +python-dateutil # Date handling (used by matplotlib/pandas) +pytz # Time zone handling + +# === Performance === +joblib # Lightweight pipelining, parallelization +tqdm # Progress bars +psutil # Process monitoring, system utilization + +# === JSON and Data Handling === +jsons # JSON (de)serialization for complex objects + +# === Developer Tools (Optional) === +pprofile # Profiling tool for performance testing +black # Code formatter +isort # Import sorter + +# === Compatibility === +six # Python 2/3 compatibility helpers (used by various libs) diff --git a/setup.py b/setup.py index 120120d8c..ca9d4e6dd 100644 --- a/setup.py +++ b/setup.py @@ -1,29 +1,30 @@ -from setuptools import setup, find_packages +from setuptools import find_packages, setup -setup(name='abides', - version='1.0.0', - description='Agent-Based Interactive Discrete Event Simulation', - url='https://github.com/abides-sim/abides', - author='davebyrd', - author_email='dave@imtc.gatech.edu', - license='BSD 3-Clause License', - packages=find_packages(), - install_requires=[ - 'cycler', - 'joblib', - 'jsons', - 'kiwisolver', - 'matplotlib', - 'numpy', - 'pandas', - 'pprofile', - 'pyparsing', - 'python-dateutil', - 'pytz', - 'scipy', - 'seaborn', - 'six', - 'tqdm', - 'psutil' - ] - ) +setup( + name="abides", + version="1.0.0", + description="Agent-Based Interactive Discrete Event Simulation", + url="https://github.com/abides-sim/abides", + author="davebyrd", + author_email="dave@imtc.gatech.edu", + license="BSD 3-Clause License", + packages=find_packages(), + install_requires=[ + "cycler", + "joblib", + "jsons", + "kiwisolver", + "matplotlib", + "numpy", + "pandas", + "pprofile", + "pyparsing", + "python-dateutil", + "pytz", + "scipy", + "seaborn", + "six", + "tqdm", + "psutil", + ], +) diff --git a/util/OrderBook.py b/util/OrderBook.py index 64d83d0aa..09311d95a 100644 --- a/util/OrderBook.py +++ b/util/OrderBook.py @@ -2,18 +2,18 @@ # List of bid prices (index zero is best bid), each with a list of LimitOrders. # List of ask prices (index zero is best ask), each with a list of LimitOrders. import sys - -from message.Message import Message -from util.order.LimitOrder import LimitOrder -from util.util import log_print, be_silent - from copy import deepcopy -import pandas as pd -from pandas.io.json import json_normalize from functools import reduce + +import pandas as pd +from pandas import json_normalize from scipy.sparse import dok_matrix from tqdm import tqdm +from message.Message import Message +from util.order.LimitOrder import LimitOrder +from util.util import be_silent, log_print + class OrderBook: @@ -40,7 +40,7 @@ def __init__(self, owner, symbol): # Internal variable used for computing transacted volumes self._transacted_volume = { "unrolled_transactions": None, - "self.history_previous_length": 0 + "self.history_previous_length": 0, } def handleLimitOrder(self, order): @@ -49,19 +49,31 @@ def handleLimitOrder(self, order): # order size "fit" or minimizing number of transactions. Sends one notification per # match. if order.symbol != self.symbol: - log_print("{} order discarded. Does not match OrderBook symbol: {}", order.symbol, self.symbol) + log_print( + "{} order discarded. Does not match OrderBook symbol: {}", + order.symbol, + self.symbol, + ) return if (order.quantity <= 0) or (int(order.quantity) != order.quantity): - log_print("{} order discarded. Quantity ({}) must be a positive integer.", order.symbol, order.quantity) + log_print( + "{} order discarded. Quantity ({}) must be a positive integer.", + order.symbol, + order.quantity, + ) return # Add the order under index 0 of history: orders since the most recent trade. - self.history[0][order.order_id] = {'entry_time': self.owner.currentTime, - 'quantity': order.quantity, 'is_buy_order': order.is_buy_order, - 'limit_price': order.limit_price, 'transactions': [], - 'modifications': [], - 'cancellations': []} + self.history[0][order.order_id] = { + "entry_time": self.owner.currentTime, + "quantity": order.quantity, + "is_buy_order": order.is_buy_order, + "limit_price": order.limit_price, + "transactions": [], + "modifications": [], + "cancellations": [], + } matching = True @@ -81,12 +93,22 @@ def handleLimitOrder(self, order): order.quantity -= filled_order.quantity log_print("MATCHED: new order {} vs old order {}", filled_order, matched_order) - log_print("SENT: notifications of order execution to agents {} and {} for orders {} and {}", - filled_order.agent_id, matched_order.agent_id, filled_order.order_id, matched_order.order_id) - - self.owner.sendMessage(order.agent_id, Message({"msg": "ORDER_EXECUTED", "order": filled_order})) - self.owner.sendMessage(matched_order.agent_id, - Message({"msg": "ORDER_EXECUTED", "order": matched_order})) + log_print( + "SENT: notifications of order execution to agents {} and {} for orders {} and {}", + filled_order.agent_id, + matched_order.agent_id, + filled_order.order_id, + matched_order.order_id, + ) + + self.owner.sendMessage( + order.agent_id, + Message({"msg": "ORDER_EXECUTED", "order": filled_order}), + ) + self.owner.sendMessage( + matched_order.agent_id, + Message({"msg": "ORDER_EXECUTED", "order": matched_order}), + ) # Accumulate the volume and average share price of the currently executing inbound trade. executed.append((filled_order.quantity, filled_order.fill_price)) @@ -99,8 +121,11 @@ def handleLimitOrder(self, order): self.enterOrder(deepcopy(order)) log_print("ACCEPTED: new order {}", order) - log_print("SENT: notifications of order acceptance to agent {} for order {}", - order.agent_id, order.order_id) + log_print( + "SENT: notifications of order acceptance to agent {} for order {}", + order.agent_id, + order.order_id, + ) self.owner.sendMessage(order.agent_id, Message({"msg": "ORDER_ACCEPTED", "order": order})) @@ -109,14 +134,24 @@ def handleLimitOrder(self, order): if not matching: # Now that we are done executing or accepting this order, log the new best bid and ask. if self.bids: - self.owner.logEvent('BEST_BID', "{},{},{}".format(self.symbol, - self.bids[0][0].limit_price, - sum([o.quantity for o in self.bids[0]]))) + self.owner.logEvent( + "BEST_BID", + "{},{},{}".format( + self.symbol, + self.bids[0][0].limit_price, + sum([o.quantity for o in self.bids[0]]), + ), + ) if self.asks: - self.owner.logEvent('BEST_ASK', "{},{},{}".format(self.symbol, - self.asks[0][0].limit_price, - sum([o.quantity for o in self.asks[0]]))) + self.owner.logEvent( + "BEST_ASK", + "{},{},{}".format( + self.symbol, + self.asks[0][0].limit_price, + sum([o.quantity for o in self.asks[0]]), + ), + ) # Also log the last trade (total share quantity, average share price). if executed: @@ -125,11 +160,11 @@ def handleLimitOrder(self, order): for q, p in executed: log_print("Executed: {} @ {}", q, p) trade_qty += q - trade_price += (p * q) + trade_price += p * q avg_price = int(round(trade_price / trade_qty)) log_print("Avg: {} @ ${:0.4f}", trade_qty, avg_price) - self.owner.logEvent('LAST_TRADE', "{},${:0.4f}".format(trade_qty, avg_price)) + self.owner.logEvent("LAST_TRADE", "{},${:0.4f}".format(trade_qty, avg_price)) self.last_trade = avg_price @@ -137,12 +172,12 @@ def handleLimitOrder(self, order): self.history.insert(0, {}) # Truncate history to required length. - self.history = self.history[:self.owner.stream_history + 1] + self.history = self.history[: self.owner.stream_history + 1] # Finally, log the full depth of the order book, ONLY if we have been requested to store the order book # for later visualization. (This is slow.) if self.owner.book_freq is not None: - row = {'QuoteTime': self.owner.currentTime} + row = {"QuoteTime": self.owner.currentTime} for quote, volume in self.getInsideBids(): row[quote] = -volume self.quotes_seen.add(quote) @@ -150,7 +185,8 @@ def handleLimitOrder(self, order): if quote in row: if row[quote] is not None: print( - "WARNING: THIS IS A REAL PROBLEM: an order book contains bids and asks at the same quote price!") + "WARNING: THIS IS A REAL PROBLEM: an order book contains bids and asks at the same quote price!" + ) row[quote] = volume self.quotes_seen.add(quote) self.book_log.append(row) @@ -160,31 +196,50 @@ def handleLimitOrder(self, order): def handleMarketOrder(self, order): if order.symbol != self.symbol: - log_print("{} order discarded. Does not match OrderBook symbol: {}", order.symbol, self.symbol) + log_print( + "{} order discarded. Does not match OrderBook symbol: {}", + order.symbol, + self.symbol, + ) return if (order.quantity <= 0) or (int(order.quantity) != order.quantity): - log_print("{} order discarded. Quantity ({}) must be a positive integer.", order.symbol, order.quantity) + log_print( + "{} order discarded. Quantity ({}) must be a positive integer.", + order.symbol, + order.quantity, + ) return orderbook_side = self.getInsideAsks() if order.is_buy_order else self.getInsideBids() - limit_orders = {} # limit orders to be placed (key=price, value=quantity) + limit_orders = {} # limit orders to be placed (key=price, value=quantity) order_quantity = order.quantity for price_level in orderbook_side: price, size = price_level[0], price_level[1] if order_quantity <= size: - limit_orders[price] = order_quantity #i.e. the top of the book has enough volume for the full order + limit_orders[price] = order_quantity # i.e. the top of the book has enough volume for the full order break else: - limit_orders[price] = size # i.e. not enough liquidity at the top of the book for the full order - # therefore walk through the book until all the quantities are matched + limit_orders[price] = size # i.e. not enough liquidity at the top of the book for the full order + # therefore walk through the book until all the quantities are matched order_quantity -= size continue - log_print("{} placing market order as multiple limit orders", order.symbol, order.quantity) + log_print( + "{} placing market order as multiple limit orders", + order.symbol, + order.quantity, + ) for lo in limit_orders.items(): p, q = lo[0], lo[1] - limit_order = LimitOrder(order.agent_id, order.time_placed, order.symbol, q, order.is_buy_order, p) + limit_order = LimitOrder( + order.agent_id, + order.time_placed, + order.symbol, + q, + order.is_buy_order, + p, + ) self.handleLimitOrder(limit_order) def executeOrder(self, order): @@ -242,15 +297,17 @@ def executeOrder(self, order): # out one, possibly truncating to the maximum history length. # The incoming order is guaranteed to exist under index 0. - self.history[0][order.order_id]['transactions'].append((self.owner.currentTime, order.quantity)) + self.history[0][order.order_id]["transactions"].append((self.owner.currentTime, order.quantity)) # The pre-existing order may or may not still be in the recent history. for idx, orders in enumerate(self.history): - if matched_order.order_id not in orders: continue + if matched_order.order_id not in orders: + continue # Found the matched order in history. Update it with this transaction. - self.history[idx][matched_order.order_id]['transactions'].append( - (self.owner.currentTime, matched_order.quantity)) + self.history[idx][matched_order.order_id]["transactions"].append( + (self.owner.currentTime, matched_order.quantity) + ) # Return (only the executed portion of) the matched order. return matched_order @@ -312,7 +369,8 @@ def cancelOrder(self, order): book = self.asks # If there are no orders on this side of the book, there is nothing to do. - if not book: return + if not book: + return # There are orders on this side. Find the price level of the order to cancel, # then find the exact order and cancel it. @@ -327,45 +385,61 @@ def cancelOrder(self, order): # Record cancellation of the order if it is still present in the recent history structure. for idx, orders in enumerate(self.history): - if cancelled_order.order_id not in orders: continue + if cancelled_order.order_id not in orders: + continue # Found the cancelled order in history. Update it with the cancelation. - self.history[idx][cancelled_order.order_id]['cancellations'].append( - (self.owner.currentTime, cancelled_order.quantity)) + self.history[idx][cancelled_order.order_id]["cancellations"].append( + (self.owner.currentTime, cancelled_order.quantity) + ) # If the cancelled price now has no orders, remove it completely. if not book[i]: del book[i] log_print("CANCELLED: order {}", order) - log_print("SENT: notifications of order cancellation to agent {} for order {}", - cancelled_order.agent_id, cancelled_order.order_id) - - self.owner.sendMessage(order.agent_id, - Message({"msg": "ORDER_CANCELLED", "order": cancelled_order})) + log_print( + "SENT: notifications of order cancellation to agent {} for order {}", + cancelled_order.agent_id, + cancelled_order.order_id, + ) + + self.owner.sendMessage( + order.agent_id, + Message({"msg": "ORDER_CANCELLED", "order": cancelled_order}), + ) # We found the order and cancelled it, so stop looking. self.last_update_ts = self.owner.currentTime return def modifyOrder(self, order, new_order): # Modifies the quantity of an existing limit order in the order book - if not self.isSameOrder(order, new_order): return + if not self.isSameOrder(order, new_order): + return book = self.bids if order.is_buy_order else self.asks - if not book: return + if not book: + return for i, o in enumerate(book): if self.isEqualPrice(order, o[0]): for mi, mo in enumerate(book[i]): if order.order_id == mo.order_id: book[i][0] = new_order for idx, orders in enumerate(self.history): - if new_order.order_id not in orders: continue - self.history[idx][new_order.order_id]['modifications'].append( - (self.owner.currentTime, new_order.quantity)) + if new_order.order_id not in orders: + continue + self.history[idx][new_order.order_id]["modifications"].append( + (self.owner.currentTime, new_order.quantity) + ) log_print("MODIFIED: order {}", order) - log_print("SENT: notifications of order modification to agent {} for order {}", - new_order.agent_id, new_order.order_id) - self.owner.sendMessage(order.agent_id, - Message({"msg": "ORDER_MODIFIED", "new_order": new_order})) + log_print( + "SENT: notifications of order modification to agent {} for order {}", + new_order.agent_id, + new_order.order_id, + ) + self.owner.sendMessage( + order.agent_id, + Message({"msg": "ORDER_MODIFIED", "new_order": new_order}), + ) if order.is_buy_order: self.bids = book else: @@ -399,7 +473,7 @@ def getInsideAsks(self, depth=sys.maxsize): return book def _get_recent_history(self): - """ Gets portion of self.history that has arrived since last call of self.get_transacted_volume. + """Gets portion of self.history that has arrived since last call of self.get_transacted_volume. Also updates self._transacted_volume[self.history_previous_length] :return: @@ -416,7 +490,7 @@ def _get_recent_history(self): return recent_history def _update_unrolled_transactions(self, recent_history): - """ Updates self._transacted_volume["unrolled_transactions"] with data from recent_history + """Updates self._transacted_volume["unrolled_transactions"] with data from recent_history :return: """ @@ -426,8 +500,8 @@ def _update_unrolled_transactions(self, recent_history): self._transacted_volume["unrolled_transactions"] = total_unrolled_txn def _unrolled_transactions_from_order_history(self, history): - """ Returns a DataFrame with columns ['execution_time', 'quantity'] from a dictionary with same format as - self.history, describing executed transactions. + """Returns a DataFrame with columns ['execution_time', 'quantity'] from a dictionary with same format as + self.history, describing executed transactions. """ # Load history into DataFrame unrolled_history = [] @@ -435,26 +509,37 @@ def _unrolled_transactions_from_order_history(self, history): for _, val in elem.items(): unrolled_history.append(val) - unrolled_history_df = pd.DataFrame(unrolled_history, columns=[ - 'entry_time', 'quantity', 'is_buy_order', 'limit_price', 'transactions', 'modifications', 'cancellations' - ]) + unrolled_history_df = pd.DataFrame( + unrolled_history, + columns=[ + "entry_time", + "quantity", + "is_buy_order", + "limit_price", + "transactions", + "modifications", + "cancellations", + ], + ) if unrolled_history_df.empty: - return pd.DataFrame(columns=['execution_time', 'quantity']) + return pd.DataFrame(columns=["execution_time", "quantity"]) - executed_transactions = unrolled_history_df[unrolled_history_df['transactions'].map(lambda d: len(d)) > 0] # remove cells that are an empty list + executed_transactions = unrolled_history_df[ + unrolled_history_df["transactions"].map(lambda d: len(d)) > 0 + ] # remove cells that are an empty list # Reshape into DataFrame with columns ['execution_time', 'quantity'] - transaction_list = [element for list_ in executed_transactions['transactions'].values for element in list_] - unrolled_transactions = pd.DataFrame(transaction_list, columns=['execution_time', 'quantity']) - unrolled_transactions = unrolled_transactions.sort_values(by=['execution_time']) - unrolled_transactions = unrolled_transactions.drop_duplicates(keep='last') + transaction_list = [element for list_ in executed_transactions["transactions"].values for element in list_] + unrolled_transactions = pd.DataFrame(transaction_list, columns=["execution_time", "quantity"]) + unrolled_transactions = unrolled_transactions.sort_values(by=["execution_time"]) + unrolled_transactions = unrolled_transactions.drop_duplicates(keep="last") return unrolled_transactions - def get_transacted_volume(self, lookback_period='10min'): - """ Method retrieves the total transacted volume for a symbol over a lookback period finishing at the current - simulation time. + def get_transacted_volume(self, lookback_period="10min"): + """Method retrieves the total transacted volume for a symbol over a lookback period finishing at the current + simulation time. """ # Update unrolled transactions DataFrame @@ -465,8 +550,8 @@ def get_transacted_volume(self, lookback_period='10min'): # Get transacted volume in time window lookback_pd = pd.to_timedelta(lookback_period) window_start = self.owner.currentTime - lookback_pd - executed_within_lookback_period = unrolled_transactions[unrolled_transactions['execution_time'] >= window_start] - transacted_volume = executed_within_lookback_period['quantity'].sum() + executed_within_lookback_period = unrolled_transactions[unrolled_transactions["execution_time"] >= window_start] + transacted_volume = executed_within_lookback_period["quantity"].sum() return transacted_volume @@ -494,7 +579,7 @@ def isSameOrder(self, order, new_order): return order.order_id == new_order.order_id def book_log_to_df(self): - """ Returns a pandas DataFrame constructed from the order book log, to be consumed by + """Returns a pandas DataFrame constructed from the order book log, to be consumed by agent.ExchangeAgent.logOrderbookSnapshots. The first column of the DataFrame is `QuoteTime`. The succeeding columns are prices quoted during the @@ -513,12 +598,11 @@ def book_log_to_df(self): quote_idx_dict = {quote: idx for idx, quote in enumerate(quotes)} quotes_times = [] - # Construct sparse matrix, where rows are timesteps, columns are quotes and elements are volume. S = dok_matrix((log_len, len(quotes)), dtype=int) # Dictionary Of Keys based sparse matrix. for i, row in enumerate(tqdm(self.book_log, desc="Processing orderbook log")): - quotes_times.append(row['QuoteTime']) + quotes_times.append(row["QuoteTime"]) for quote, vol in row.items(): if quote == "QuoteTime": continue @@ -526,7 +610,7 @@ def book_log_to_df(self): S = S.tocsc() # Convert this matrix to Compressed Sparse Column format for pandas to consume. df = pd.DataFrame.sparse.from_spmatrix(S, columns=quotes) - df.insert(0, 'QuoteTime', quotes_times, allow_duplicates=True) + df.insert(0, "QuoteTime", quotes_times, allow_duplicates=True) return df # Print a nicely-formatted view of the current order book. @@ -535,17 +619,22 @@ def prettyPrint(self, silent=False): # Show the total volume at each price. If silent is True, return the accumulated string and print nothing. # If the global silent flag is set, skip prettyPrinting entirely, as it takes a LOT of time. - if be_silent: return '' + if be_silent: + return "" book = "{} order book as of {}\n".format(self.symbol, self.owner.currentTime) - book += "Last trades: simulated {:d}, historical {:d}\n".format(self.last_trade, - self.owner.oracle.observePrice(self.symbol, - self.owner.currentTime, - sigma_n=0, - random_state=self.owner.random_state)) - - book += "{:10s}{:10s}{:10s}\n".format('BID', 'PRICE', 'ASK') - book += "{:10s}{:10s}{:10s}\n".format('---', '-----', '---') + book += "Last trades: simulated {:d}, historical {:d}\n".format( + self.last_trade, + self.owner.oracle.observePrice( + self.symbol, + self.owner.currentTime, + sigma_n=0, + random_state=self.owner.random_state, + ), + ) + + book += "{:10s}{:10s}{:10s}\n".format("BID", "PRICE", "ASK") + book += "{:10s}{:10s}{:10s}\n".format("---", "-----", "---") for quote, volume in self.getInsideAsks()[-1::-1]: book += "{:10s}{:10s}{:10s}\n".format("", "{:d}".format(quote), "{:d}".format(volume)) @@ -553,7 +642,7 @@ def prettyPrint(self, silent=False): for quote, volume in self.getInsideBids(): book += "{:10s}{:10s}{:10s}\n".format("{:d}".format(volume), "{:d}".format(quote), "") - if silent: return book + if silent: + return book log_print(book) - diff --git a/util/crypto/diffieHellman.py b/util/crypto/diffieHellman.py index d7e2381b9..1fbf5a3a9 100755 --- a/util/crypto/diffieHellman.py +++ b/util/crypto/diffieHellman.py @@ -1,60 +1,66 @@ -import nacl.bindings as nb +import math import random -import pandas as pd + +import nacl.bindings as nb import numpy as np -import math +import pandas as pd def dict_keygeneration(peer_list): - # CDB: turned these into dictionaries to relax assumptions around agent IDs. - pkeys = {} - skeys = {} + # CDB: turned these into dictionaries to relax assumptions around agent IDs. + pkeys = {} + skeys = {} - for peer_id in peer_list: - pkeys[peer_id], skeys[peer_id] = nb.crypto_kx_keypair() + for peer_id in peer_list: + pkeys[peer_id], skeys[peer_id] = nb.crypto_kx_keypair() - return pkeys, skeys + return pkeys, skeys def dict_keyexchange(peer_list, self_id, my_pkeys, my_skeys, peer_pkeys): - # CDB: The last three parameters are now all dictionaries. Dictionary keys - # are peer ids to which we gave the key, or from which we received the key. - # comkeys is also now a dictionary keyed by peer id. - comkeys = {} + # CDB: The last three parameters are now all dictionaries. Dictionary keys + # are peer ids to which we gave the key, or from which we received the key. + # comkeys is also now a dictionary keyed by peer id. + comkeys = {} + + for peer_id in peer_list: + if peer_id > self_id: + common_key_raw, _ = nb.crypto_kx_client_session_keys( + my_pkeys[peer_id], my_skeys[peer_id], peer_pkeys[peer_id] + ) + else: + _, common_key_raw = nb.crypto_kx_server_session_keys( + my_pkeys[peer_id], my_skeys[peer_id], peer_pkeys[peer_id] + ) - for peer_id in peer_list: - if peer_id > self_id: - common_key_raw, _ = nb.crypto_kx_client_session_keys(my_pkeys[peer_id], my_skeys[peer_id], peer_pkeys[peer_id]) - else: - _, common_key_raw = nb.crypto_kx_server_session_keys(my_pkeys[peer_id], my_skeys[peer_id], peer_pkeys[peer_id]) + # Hash the common keys. + comkeys[peer_id] = int.from_bytes(nb.crypto_hash_sha256(common_key_raw), byteorder="big") - # Hash the common keys. - comkeys[peer_id] = int.from_bytes(nb.crypto_hash_sha256(common_key_raw), byteorder='big') + return comkeys - return comkeys +# PRG -#PRG -def randomize( r, modulo, clientsign): - # Call the double lenght pseudorsndom generator - random.seed(r) - rand = random.getrandbits(256*2) - rand_b_raw = bin(rand) - nr_zeros_append = 256 - (len(rand_b_raw) - 2) - rand_b = '0' * nr_zeros_append + rand_b_raw[2:] - # Use first half to mask the inputs and second half as the next seed to the pseudorsndom generator - R = int(rand_b[0:256], 2) - r = int(rand_b[256:] , 2) - return r, R +def randomize(r, modulo, clientsign): + # Call the double lenght pseudorsndom generator + random.seed(r) + rand = random.getrandbits(256 * 2) + rand_b_raw = bin(rand) + nr_zeros_append = 256 - (len(rand_b_raw) - 2) + rand_b = "0" * nr_zeros_append + rand_b_raw[2:] + # Use first half to mask the inputs and second half as the next seed to the pseudorsndom generator + R = int(rand_b[0:256], 2) + r = int(rand_b[256:], 2) + return r, R def randomize_all(party_i, common_key_list, modulo): - + for i in range(len(common_key_list)): if i == party_i: - continue + continue clientsign = 1 if i > party_i else -1 - common_key_list[i], client = randomize( common_key_list[i], modulo, clientsign) - + common_key_list[i], client = randomize(common_key_list[i], modulo, clientsign) + return common_key_list, client diff --git a/util/crypto/logReg.py b/util/crypto/logReg.py index ee0f3e413..6151cdde9 100644 --- a/util/crypto/logReg.py +++ b/util/crypto/logReg.py @@ -1,15 +1,16 @@ +import numpy as np +from scipy.special import expit from sklearn.metrics import accuracy_score as acc from sklearn.metrics import average_precision_score as auprc from sklearn.metrics import confusion_matrix from sklearn.metrics import f1_score as f1 from sklearn.metrics import matthews_corrcoef as mcc from sklearn.metrics import mean_squared_error as mse -from scipy.special import expit -import numpy as np np.set_printoptions(linewidth=120, precision=4, threshold=np.inf) -def getWeights(previous_weight = None, max_iter = 5, lr = 1.0, trainX = None, trainY = None, self_id = None): + +def getWeights(previous_weight=None, max_iter=5, lr=1.0, trainX=None, trainY=None, self_id=None): """ getWeights initializes a logistic regresssion model with the average weights from the previous iteration, then trains the model using mini-batch gradient descent for max_iter iterations with learning rate lr, @@ -22,16 +23,16 @@ def getWeights(previous_weight = None, max_iter = 5, lr = 1.0, trainX = None, t # If there was not a previous iteration of the protocol, initialize all weights to zero. # Otherwise, initialize to the average weights from the previous iteration. if previous_weight is None: - weight = np.zeros(trainX.shape[1]) + weight = np.zeros(trainX.shape[1]) else: - weight = previous_weight.copy() + weight = previous_weight.copy() # Train the model for max_iter iterations with learning rate lr. weight = np_train(weight, trainX, trainY, lr, max_iter) ### Uncomment next two lines for each client to print local training accuracy each iteration. Will be slower. - #pred = predict_all(trainX, weight) - #print (f"Client {self_id} local acc {acc(trainY, np.array(pred)):0.3f}.") + # pred = predict_all(trainX, weight) + # print (f"Client {self_id} local acc {acc(trainY, np.array(pred)):0.3f}.") # Return the local weights from this client training only on its own local data. return weight @@ -45,7 +46,7 @@ def reportStats(weight, current_iteration, X_train, y_train, X_test, y_test): ypred_is = predict_all(X_train, weight) ypred_oos = predict_all(X_test, weight) - np_err_handling = np.seterr(invalid = 'ignore') + np_err_handling = np.seterr(invalid="ignore") is_acc = acc(y_train, ypred_is) is_mcc = mcc(y_train, ypred_is) @@ -64,31 +65,46 @@ def reportStats(weight, current_iteration, X_train, y_train, X_test, y_test): np.seterr(**np_err_handling) - print (f"Consensus {current_iteration}: IS acc {is_acc:0.5f}. IS MCC {is_mcc:0.5f}. IS F1 {is_f1:0.5f}. IS MSE {is_mse:0.5f}. OOS acc {oos_acc:0.5f}. OOS MCC {oos_mcc:0.5f}. OOS F1 {oos_f1:0.5f}. OOS MSE {oos_mse:0.5f}.") - print (f"Confusion {current_iteration}: IS TP: {is_tp}, IS FP: {is_fp}, IS TN: {is_tn}, IS FN: {is_fn}, IS AUPRC: {is_auprc:0.5f}. OOS TP: {oos_tp}, OOS FP: {oos_fp}, OOS TN: {oos_tn}, OOS FN: {oos_fn}, OOS AUPRC: {oos_auprc:0.5f}.") - - return is_acc, is_mcc, is_f1, is_mse, is_auprc, oos_acc, oos_mcc, oos_f1, oos_mse, oos_auprc + print( + f"Consensus {current_iteration}: IS acc {is_acc:0.5f}. IS MCC {is_mcc:0.5f}. IS F1 {is_f1:0.5f}. IS MSE {is_mse:0.5f}. OOS acc {oos_acc:0.5f}. OOS MCC {oos_mcc:0.5f}. OOS F1 {oos_f1:0.5f}. OOS MSE {oos_mse:0.5f}." + ) + print( + f"Confusion {current_iteration}: IS TP: {is_tp}, IS FP: {is_fp}, IS TN: {is_tn}, IS FN: {is_fn}, IS AUPRC: {is_auprc:0.5f}. OOS TP: {oos_tp}, OOS FP: {oos_fp}, OOS TN: {oos_tn}, OOS FN: {oos_fn}, OOS AUPRC: {oos_auprc:0.5f}." + ) + + return ( + is_acc, + is_mcc, + is_f1, + is_mse, + is_auprc, + oos_acc, + oos_mcc, + oos_f1, + oos_mse, + oos_auprc, + ) def np_predict_all(X, weight): - w = np.tile(weight, (X.shape[0],1)) - pred = np.einsum('ij,ij->i', X, w) - return expit(pred) + w = np.tile(weight, (X.shape[0], 1)) + pred = np.einsum("ij,ij->i", X, w) + return expit(pred) def np_train(weight, trainX, trainY, lr, n): - for i in range(n): - sum_error = 0 - m = np_predict_all(trainX, weight) - e = trainY - m - g = (e * m * (1.0 - m)).reshape(-1,1) * trainX - ag = np.mean(g, axis=0) - weight = weight + lr * ag - se = np.sum(e ** 2) + for i in range(n): + sum_error = 0 + m = np_predict_all(trainX, weight) + e = trainY - m + g = (e * m * (1.0 - m)).reshape(-1, 1) * trainX + ag = np.mean(g, axis=0) + weight = weight + lr * ag + se = np.sum(e**2) - #print(f"new>epoch={i}, lr={lr:0.3f}, error={se:0.3f}") + # print(f"new>epoch={i}, lr={lr:0.3f}, error={se:0.3f}") - return weight + return weight def predict_all(trainX, weight): @@ -98,4 +114,3 @@ def predict_all(trainX, weight): pred[pred_raw > 0.5] = 1 return pred - diff --git a/util/formatting/clean_ohlc_price_series.py b/util/formatting/clean_ohlc_price_series.py index 61d7afa0e..238dd4c86 100644 --- a/util/formatting/clean_ohlc_price_series.py +++ b/util/formatting/clean_ohlc_price_series.py @@ -1,8 +1,9 @@ -import pandas as pd -import sys -import numpy as np import os +import sys from random import sample + +import numpy as np +import pandas as pd from dateutil.parser import parse """ Clean OHLC WRDS data series into historical fundamental format.""" @@ -17,10 +18,10 @@ symbol = sample(df.index.unique().tolist(), 1)[0] print(symbol, filename) -df = df[df.index==symbol] +df = df[df.index == symbol] df.set_index("level_1", inplace=True) -df = np.round((1000*df/df.iloc[0]).dropna()) -df = (df["open"]*100).astype("int") +df = np.round((1000 * df / df.iloc[0]).dropna()) +df = (df["open"] * 100).astype("int") df.index = df.index - pd.DateOffset(year=2000, month=1, day=1) pd.to_pickle(df, "clean.pkl".format(symbol, filename[:-4])) diff --git a/util/formatting/convert_order_book.py b/util/formatting/convert_order_book.py index 4891a790d..700a50155 100644 --- a/util/formatting/convert_order_book.py +++ b/util/formatting/convert_order_book.py @@ -1,29 +1,36 @@ import argparse import os -import pandas as pd -import numpy as np - import sys from pathlib import Path + +import numpy as np +import pandas as pd + p = str(Path(__file__).resolve().parents[2]) # directory two levels up from this file sys.path.append(p) -from util.formatting.convert_order_stream import get_year_month_day, get_start_end_time, dir_path, check_positive from tqdm import tqdm +from util.formatting.convert_order_stream import ( + check_positive, + dir_path, + get_start_end_time, + get_year_month_day, +) + def get_larger_int_and_gap(a, b): return (True, a - b) if a >= b else (False, b - a) def get_int_from_string(s): - int_list = [int(s) for s in s.split('_') if s.isdigit()] + int_list = [int(s) for s in s.split("_") if s.isdigit()] return int_list[0] def process_row(row, quote_levels): - """ Method takes row of unstacked orderbook log and processes into a dictionary representing a row of the LOBSTER- - ised DataFrame. + """Method takes row of unstacked orderbook log and processes into a dictionary representing a row of the LOBSTER- + ised DataFrame. """ row_arr = row[1].to_numpy() @@ -45,28 +52,53 @@ def process_row(row, quote_levels): more_bids_then_asks, difference = get_larger_int_and_gap(num_bids, num_asks) if more_bids_then_asks: - ask_values = np.pad(ask_values.astype(np.float32), (0, difference), 'constant', constant_values=np.nan) - ask_volumes = np.pad(ask_volumes.astype(np.float32), (0, difference), 'constant', constant_values=np.nan) + ask_values = np.pad( + ask_values.astype(np.float32), + (0, difference), + "constant", + constant_values=np.nan, + ) + ask_volumes = np.pad( + ask_volumes.astype(np.float32), + (0, difference), + "constant", + constant_values=np.nan, + ) else: - bid_values = np.pad(bid_values.astype(np.float32), (0, difference), 'constant', constant_values=np.nan) - bid_volumes = np.pad(bid_volumes.astype(np.float32), (0, difference), 'constant', constant_values=np.nan) + bid_values = np.pad( + bid_values.astype(np.float32), + (0, difference), + "constant", + constant_values=np.nan, + ) + bid_volumes = np.pad( + bid_volumes.astype(np.float32), + (0, difference), + "constant", + constant_values=np.nan, + ) ask_volumes_dict = {f"ask_size_{idx + 1}": ask_volumes[idx] for idx in range(len(ask_volumes))} ask_values_dict = {f"ask_price_{idx + 1}": ask_values[idx] for idx in range(len(ask_values))} bid_volumes_dict = {f"bid_size_{idx + 1}": bid_volumes[idx] for idx in range(len(bid_volumes))} bid_values_dict = {f"bid_price_{idx + 1}": bid_values[idx] for idx in range(len(bid_values))} - row_dict = {**ask_volumes_dict, **ask_values_dict, **bid_volumes_dict, **bid_values_dict} + row_dict = { + **ask_volumes_dict, + **ask_values_dict, + **bid_volumes_dict, + **bid_values_dict, + } return row_dict def reorder_columns(unordered_cols): - """ Reorders column list to coincide with columns of LOBSTER csv file format. """ + """Reorders column list to coincide with columns of LOBSTER csv file format.""" - ask_price_cols = [label for label in unordered_cols if 'ask_price' in label] - ask_size_cols = [label for label in unordered_cols if 'ask_size' in label] - bid_price_cols = [label for label in unordered_cols if 'bid_price' in label] - bid_size_cols = [label for label in unordered_cols if 'bid_size' in label] + ask_price_cols = [label for label in unordered_cols if "ask_price" in label] + ask_size_cols = [label for label in unordered_cols if "ask_size" in label] + bid_price_cols = [label for label in unordered_cols if "bid_price" in label] + bid_size_cols = [label for label in unordered_cols if "bid_size" in label] bid_price_cols.sort(key=get_int_from_string) bid_size_cols.sort(key=get_int_from_string) @@ -79,7 +111,7 @@ def reorder_columns(unordered_cols): ask_size_cols = np.array(ask_size_cols) new_col_list_size = ask_price_cols.size + ask_size_cols.size + bid_price_cols.size + bid_size_cols.size - new_col_list = np.empty((new_col_list_size,), dtype=' level]) - columns_to_drop.extend([f'ask_size_{idx}' for idx in range(1, num_levels + 1) if idx > level]) - columns_to_drop.extend([f'bid_price_{idx}' for idx in range(1, num_levels + 1) if idx > level]) - columns_to_drop.extend([f'bid_size_{idx}' for idx in range(1, num_levels + 1) if idx > level]) + columns_to_drop.extend([f"ask_price_{idx}" for idx in range(1, num_levels + 1) if idx > level]) + columns_to_drop.extend([f"ask_size_{idx}" for idx in range(1, num_levels + 1) if idx > level]) + columns_to_drop.extend([f"bid_price_{idx}" for idx in range(1, num_levels + 1) if idx > level]) + columns_to_drop.extend([f"bid_size_{idx}" for idx in range(1, num_levels + 1) if idx > level]) orderbook_df = orderbook_df.drop(columns=columns_to_drop) @@ -110,15 +142,15 @@ def finalise_processing(orderbook_df, level): def is_wide_book(df): - """ Checks if orderbook dataframe is in wide or skinny format. """ - if isinstance(df.index, pd.core.index.MultiIndex): + """Checks if orderbook dataframe is in wide or skinny format.""" + if isinstance(df.index, pd.MultiIndex): return False else: return True def process_orderbook(df, level): - """ Method takes orderbook log and transforms into format amenable to "LOBSTER-ification" + """Method takes orderbook log and transforms into format amenable to "LOBSTER-ification" :param df: pd.DataFrame orderbook output by ABIDES :param level: Maximum displayed level in book @@ -151,35 +183,35 @@ def process_orderbook(df, level): return orderbook_df -def save_formatted_order_book(orderbook_bz2, ticker, level, out_dir='.'): - """ Saves orderbook data from ABIDES in LOBSTER format. +def save_formatted_order_book(orderbook_bz2, ticker, level, out_dir="."): + """Saves orderbook data from ABIDES in LOBSTER format. - :param orderbook_bz2: file path of order book bz2 output file. - :type orderbook_bz2: str - :param ticker: label of security - :type ticker: str - :param level: maximum level of order book to display - :type level: int - :param out_dir: path to output directory - :type out_dir: str + :param orderbook_bz2: file path of order book bz2 output file. + :type orderbook_bz2: str + :param ticker: label of security + :type ticker: str + :param level: maximum level of order book to display + :type level: int + :param out_dir: path to output directory + :type out_dir: str - :return: + :return: - ============ + ============ - Orderbook File: (Matrix of size: (Nx(4xNumberOfLevels))) - --------------- + Orderbook File: (Matrix of size: (Nx(4xNumberOfLevels))) + --------------- - Name: TICKER_Year-Month-Day_StartTime_EndTime_orderbook_LEVEL.csv + Name: TICKER_Year-Month-Day_StartTime_EndTime_orderbook_LEVEL.csv - Columns: + Columns: - 1.) Ask Price 1: Level 1 Ask Price (Best Ask) - 2.) Ask Size 1: Level 1 Ask Volume (Best Ask Volume) - 3.) Bid Price 1: Level 1 Bid Price (Best Bid) - 4.) Bid Size 1: Level 1 Bid Volume (Best Bid Volume) - 5.) Ask Price 2: Level 2 Ask Price (2nd Best Ask) - ... + 1.) Ask Price 1: Level 1 Ask Price (Best Ask) + 2.) Ask Size 1: Level 1 Ask Volume (Best Ask Volume) + 3.) Bid Price 1: Level 1 Bid Price (Best Bid) + 4.) Bid Size 1: Level 1 Bid Volume (Best Bid Volume) + 5.) Ask Price 2: Level 2 Ask Price (2nd Best Ask) + ... """ @@ -188,17 +220,17 @@ def save_formatted_order_book(orderbook_bz2, ticker, level, out_dir='.'): if not is_wide_book(orderbook_df): # skinny format trading_day = get_year_month_day(pd.Series(orderbook_df.index.levels[0])) - start_time, end_time = get_start_end_time(orderbook_df, 'orderbook_skinny') + start_time, end_time = get_start_end_time(orderbook_df, "orderbook_skinny") else: # wide format trading_day = get_year_month_day(pd.Series(orderbook_df.index)) - start_time, end_time = get_start_end_time(orderbook_df, 'orderbook_wide') + start_time, end_time = get_start_end_time(orderbook_df, "orderbook_wide") orderbook_df = process_orderbook(orderbook_df, level) # Save to file - #filename = f'{ticker}_{trading_day}_{start_time}_{end_time}_orderbook_{str(level)}.csv' - filename = f'orderbook.csv' + # filename = f'{ticker}_{trading_day}_{start_time}_{end_time}_orderbook_{str(level)}.csv' + filename = f"orderbook.csv" filename = os.path.join(out_dir, filename) orderbook_df.to_csv(filename, index=False, header=False) @@ -206,12 +238,21 @@ def save_formatted_order_book(orderbook_bz2, ticker, level, out_dir='.'): if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Process ABIDES order book data into the LOBSTER format.') - parser.add_argument('book', type=str, help='ABIDES order book in bz2 format. ' - 'Typical example is `orderbook_TICKER.bz2`') - parser.add_argument('-o', '--output-dir', default='.', help='Path to output directory', type=dir_path) - parser.add_argument('ticker', type=str, help="Ticker label") - parser.add_argument('level', type=check_positive, help="Maximum orderbook level.") + parser = argparse.ArgumentParser(description="Process ABIDES order book data into the LOBSTER format.") + parser.add_argument( + "book", + type=str, + help="ABIDES order book in bz2 format. " "Typical example is `orderbook_TICKER.bz2`", + ) + parser.add_argument( + "-o", + "--output-dir", + default=".", + help="Path to output directory", + type=dir_path, + ) + parser.add_argument("ticker", type=str, help="Ticker label") + parser.add_argument("level", type=check_positive, help="Maximum orderbook level.") args, remaining_args = parser.parse_known_args() diff --git a/util/formatting/convert_order_stream.py b/util/formatting/convert_order_stream.py index 475105c4e..736bb8d32 100644 --- a/util/formatting/convert_order_stream.py +++ b/util/formatting/convert_order_stream.py @@ -1,44 +1,55 @@ import argparse -import pandas as pd -from pandas.io.json import json_normalize import json import os +import pandas as pd +from pandas import json_normalize -def extract_events_from_stream(stream_df, event_type): - """ Extracts specific event from stream. - """ - events = stream_df.loc[stream_df.EventType == event_type][['EventTime', 'Event']] - events_json = events['Event'].to_json(orient="records") +def extract_events_from_stream(stream_df, event_type): + """Extracts specific event from stream.""" + events = stream_df.loc[stream_df.EventType == event_type][["EventTime", "Event"]] + events_json = events["Event"].to_json(orient="records") json_struct = json.loads(events_json) # TODO : get rid of structs containing all `int` types event_extracted = json_normalize(json_struct) - event_extracted = pd.merge(events['EventTime'].reset_index(), event_extracted, left_index=True, right_index=True) + event_extracted = pd.merge( + events["EventTime"].reset_index(), + event_extracted, + left_index=True, + right_index=True, + ) if not event_extracted.empty: - event_extracted = event_extracted[['EventTime', 'order_id', 'limit_price', 'quantity', 'is_buy_order']] - event_extracted.rename(columns={'EventTime': 'TIMESTAMP', - 'order_id': 'ORDER_ID', - 'limit_price': 'PRICE', - 'quantity': 'SIZE', - 'is_buy_order': 'BUY_SELL_FLAG'}, inplace=True) + event_extracted = event_extracted[["EventTime", "order_id", "limit_price", "quantity", "is_buy_order"]] + event_extracted.rename( + columns={ + "EventTime": "TIMESTAMP", + "order_id": "ORDER_ID", + "limit_price": "PRICE", + "quantity": "SIZE", + "is_buy_order": "BUY_SELL_FLAG", + }, + inplace=True, + ) else: - event_extracted = pd.DataFrame({ - 'TIMESTAMP': [], - 'ORDER_ID': [], - 'PRICE': [], - 'SIZE': [], - 'BUY_SELL_FLAG': [] - }) + event_extracted = pd.DataFrame( + { + "TIMESTAMP": [], + "ORDER_ID": [], + "PRICE": [], + "SIZE": [], + "BUY_SELL_FLAG": [], + } + ) return event_extracted def seconds_since_midnight(s): - """ Converts a pandas Series object of datetime64[ns] timestamps to Series of seconds from midnight on that day. + """Converts a pandas Series object of datetime64[ns] timestamps to Series of seconds from midnight on that day. - Inspired by https://stackoverflow.com/a/38050344 + Inspired by https://stackoverflow.com/a/38050344 """ d = pd.to_datetime(s.dt.date) delta_t = s - d @@ -46,14 +57,13 @@ def seconds_since_midnight(s): def convert_stream_to_format(stream_df, fmt="LOBSTER"): - """ Converts imported ABIDES DataFrame into LOBSTER FORMAT. - """ + """Converts imported ABIDES DataFrame into LOBSTER FORMAT.""" event_dfs = [] market_events = { "LIMIT_ORDER": 1, # "MODIFY_ORDER": 2, # causing errors in market replay "ORDER_CANCELLED": 3, - "ORDER_EXECUTED": 4 + "ORDER_EXECUTED": 4, } reversed_market_events = {val: key for key, val in market_events.items()} @@ -70,10 +80,10 @@ def convert_stream_to_format(stream_df, fmt="LOBSTER"): if fmt == "plot-scripts": - lobster_df["Type"].replace(reversed_market_events, inplace=True) - lobster_df.rename(columns={'Type': "TYPE"}, inplace=True) - lobster_df = lobster_df.sort_values(by=['TIMESTAMP']) - lobster_df = lobster_df[['TIMESTAMP', 'ORDER_ID', 'PRICE', 'SIZE', 'BUY_SELL_FLAG', 'TYPE']] + lobster_df["Type"] = lobster_df["Type"].replace(reversed_market_events) + lobster_df.rename(columns={"Type": "TYPE"}, inplace=True) + lobster_df = lobster_df.sort_values(by=["TIMESTAMP"]) + lobster_df = lobster_df[["TIMESTAMP", "ORDER_ID", "PRICE", "SIZE", "BUY_SELL_FLAG", "TYPE"]] return lobster_df elif fmt == "LOBSTER": @@ -83,7 +93,7 @@ def convert_stream_to_format(stream_df, fmt="LOBSTER"): lobster_df["Direction"] = (lobster_df["BUY_SELL_FLAG"] * 2) - 1 lobster_df = lobster_df[["Time", "Type", "Order ID", "Size", "Price", "Direction"]] - lobster_df = lobster_df.sort_values(by=['Time']) + lobster_df = lobster_df.sort_values(by=["Time"]) return lobster_df else: @@ -91,12 +101,12 @@ def convert_stream_to_format(stream_df, fmt="LOBSTER"): def get_year_month_day(s): - """ Returns date as string from pandas series of timestamps. + """Returns date as string from pandas series of timestamps. - :param s: - :type s: pandas.Series(datetime64[ns]) + :param s: + :type s: pandas.Series(datetime64[ns]) - :return s_date_str: str in format YYYY-MM-DD + :return s_date_str: str in format YYYY-MM-DD """ t = s.loc[s.first_valid_index()] @@ -104,13 +114,13 @@ def get_year_month_day(s): def get_start_end_time(df, fmt): - """ Returns first and last timestamp of pandas DataFrame in plot-scripts format or LOBSTER format. """ + """Returns first and last timestamp of pandas DataFrame in plot-scripts format or LOBSTER format.""" if fmt == "plot-scripts": - t = seconds_since_midnight(df['TIMESTAMP']) + t = seconds_since_midnight(df["TIMESTAMP"]) return int(round(t.iloc[0])), int(round(t.iloc[-1])) elif fmt == "LOBSTER": - return int(round(df['Time'].iloc[0])), int(round(df['Time'].iloc[-1])) + return int(round(df["Time"].iloc[0])), int(round(df["Time"].iloc[-1])) elif fmt == "orderbook_skinny": t = seconds_since_midnight(df.index.levels[0].to_series()) return int(round(t.iloc[0])), int(round(t.iloc[-1])) @@ -121,88 +131,88 @@ def get_start_end_time(df, fmt): raise ValueError('Format needs to be "plot-scripts" or "LOBSTER" or "orderbook_skinny" or "orderbook_wide"') -def save_formatted_order_stream(stream_bz2, ticker, level, fmt, suffix, out_dir='.'): - """ Saves ABIDES logged order stream into csv in requested format. +def save_formatted_order_stream(stream_bz2, ticker, level, fmt, suffix, out_dir="."): + """Saves ABIDES logged order stream into csv in requested format. - :param stream_bz2: file path of Exchange Agent bz2 output file. - :type stream_bz2: str - :param ticker: label of security - :type ticker: str - :param level: maximum level of order book to display - :type level: int - :param fmt: Specifies the output format, current options are "plot-scripts" and "LOBSTER". - :type fmt: str - :param suffix: suffix to add to file name before extension - :type suffix: str - :param out_dir: path to output directory - :type out_dir: str + :param stream_bz2: file path of Exchange Agent bz2 output file. + :type stream_bz2: str + :param ticker: label of security + :type ticker: str + :param level: maximum level of order book to display + :type level: int + :param fmt: Specifies the output format, current options are "plot-scripts" and "LOBSTER". + :type fmt: str + :param suffix: suffix to add to file name before extension + :type suffix: str + :param out_dir: path to output directory + :type out_dir: str - :return: + :return: - ============= + ============= - PLOT-SCRIPTS FORMAT (Matrix of size: (Nx5)) - -------------- + PLOT-SCRIPTS FORMAT (Matrix of size: (Nx5)) + -------------- - Name: TICKER_Year-Month-Day_StartTime_EndTime_message_LEVEL.csv + Name: TICKER_Year-Month-Day_StartTime_EndTime_message_LEVEL.csv - Columns: + Columns: - 1) TIMESTAMP + 1) TIMESTAMP - 2) ORDER_ID + 2) ORDER_ID - 3) PRICE + 3) PRICE - 4) SIZE + 4) SIZE - 5) BUY_SELL_FLAG + 5) BUY_SELL_FLAG - LOBSTER FORMAT (compliant with version 01 Sept 2013): (Matrix of size: (Nx6)) - -------------- + LOBSTER FORMAT (compliant with version 01 Sept 2013): (Matrix of size: (Nx6)) + -------------- - Name: TICKER_Year-Month-Day_StartTime_EndTime_message_LEVEL.csv + Name: TICKER_Year-Month-Day_StartTime_EndTime_message_LEVEL.csv - StartTime and EndTime give the theoretical beginning - and end time of the output file in milliseconds after - mid night. LEVEL refers to the number of levels of the - requested limit order book. + StartTime and EndTime give the theoretical beginning + and end time of the output file in milliseconds after + mid night. LEVEL refers to the number of levels of the + requested limit order book. - Columns: + Columns: - 1.) Time: - Seconds after midnight with decimal - precision of at least milliseconds - and up to nanoseconds depending on - the requested period - 2.) Type: - 1: Submission of a new limit order - 2: Cancellation (Partial deletion - of a limit order) - 3: Deletion (Total deletion of a limit order) - 4: Execution of a visible limit order - 5: Execution of a hidden limit order - 7: Trading halt indicator - (Detailed information below) - 3.) Order ID: - Unique order reference number - (Assigned in order flow) - 4.) Size: - Number of shares - 5.) Price: - Dollar price times 10000 - (i.e., A stock price of $91.14 is given - by 911400) - 6.) Direction: - -1: Sell limit order - 1: Buy limit order + 1.) Time: + Seconds after midnight with decimal + precision of at least milliseconds + and up to nanoseconds depending on + the requested period + 2.) Type: + 1: Submission of a new limit order + 2: Cancellation (Partial deletion + of a limit order) + 3: Deletion (Total deletion of a limit order) + 4: Execution of a visible limit order + 5: Execution of a hidden limit order + 7: Trading halt indicator + (Detailed information below) + 3.) Order ID: + Unique order reference number + (Assigned in order flow) + 4.) Size: + Number of shares + 5.) Price: + Dollar price times 10000 + (i.e., A stock price of $91.14 is given + by 911400) + 6.) Direction: + -1: Sell limit order + 1: Buy limit order - Note: - Execution of a sell (buy) limit - order corresponds to a buyer (seller) - initiated trade, i.e. Buy (Sell) trade. + Note: + Execution of a sell (buy) limit + order corresponds to a buyer (seller) + initiated trade, i.e. Buy (Sell) trade. """ @@ -210,27 +220,24 @@ def save_formatted_order_stream(stream_bz2, ticker, level, fmt, suffix, out_dir= write_df = convert_stream_to_format(stream_df, fmt=fmt) # Save to file - trading_day = get_year_month_day(stream_df['EventTime']) + trading_day = get_year_month_day(stream_df["EventTime"]) start_time, end_time = get_start_end_time(write_df, fmt) - if fmt == "plot-scripts": filename = f'orders_{ticker}_{trading_day.replace("-", "")}{suffix}.pkl' filename = os.path.join(out_dir, filename) - write_df = write_df.set_index('TIMESTAMP') + write_df = write_df.set_index("TIMESTAMP") write_df.to_pickle(filename) elif fmt == "LOBSTER": - filename = f'{ticker}_{trading_day}_{start_time}_{end_time}_message_{str(level)}{suffix}.csv' + filename = f"{ticker}_{trading_day}_{start_time}_{end_time}_message_{str(level)}{suffix}.csv" filename = os.path.join(out_dir, filename) write_df.to_csv(filename, index=False, header=False) else: raise ValueError('Format needs to be "plot-scripts" or "LOBSTER"') - - def dir_path(string): - """ https://stackoverflow.com/a/51212150 """ + """https://stackoverflow.com/a/51212150""" if os.path.isdir(string): return string else: @@ -238,7 +245,7 @@ def dir_path(string): def check_positive(value): - """ https://stackoverflow.com/a/14117511 """ + """https://stackoverflow.com/a/14117511""" ivalue = int(value) if ivalue <= 0: raise argparse.ArgumentTypeError(f"{value} is an invalid positive int value") @@ -247,17 +254,36 @@ def check_positive(value): if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Process ABIDES stream data into either plotting or LOBSTER formats.') - parser.add_argument('stream', type=str, help='ABIDES order stream in bz2 format. ' - 'Typical example is `ExchangeAgent.bz2`') - parser.add_argument('-o', '--output-dir', default='.', help='Path to output directory', type=dir_path) - parser.add_argument('ticker', type=str, help="Ticker label") - parser.add_argument('level', type=check_positive, help="Maximum orderbook level.") - parser.add_argument('format', choices=['plot-scripts', 'LOBSTER'], type=str, - help="Output format of stream") - parser.add_argument('--suffix', type=str, help="optional suffix to add to filename.", default="") - + parser = argparse.ArgumentParser(description="Process ABIDES stream data into either plotting or LOBSTER formats.") + parser.add_argument( + "stream", + type=str, + help="ABIDES order stream in bz2 format. " "Typical example is `ExchangeAgent.bz2`", + ) + parser.add_argument( + "-o", + "--output-dir", + default=".", + help="Path to output directory", + type=dir_path, + ) + parser.add_argument("ticker", type=str, help="Ticker label") + parser.add_argument("level", type=check_positive, help="Maximum orderbook level.") + parser.add_argument( + "format", + choices=["plot-scripts", "LOBSTER"], + type=str, + help="Output format of stream", + ) + parser.add_argument("--suffix", type=str, help="optional suffix to add to filename.", default="") args, remaining_args = parser.parse_known_args() - save_formatted_order_stream(args.stream, args.ticker, args.level, args.format, args.suffix, out_dir=args.output_dir) + save_formatted_order_stream( + args.stream, + args.ticker, + args.level, + args.format, + args.suffix, + out_dir=args.output_dir, + ) diff --git a/util/formatting/mid_price_from_orderbook.py b/util/formatting/mid_price_from_orderbook.py index e7381385b..b8e1c7e50 100644 --- a/util/formatting/mid_price_from_orderbook.py +++ b/util/formatting/mid_price_from_orderbook.py @@ -4,21 +4,23 @@ p = str(Path(__file__).resolve().parents[2]) # directory two levels up from this file sys.path.append(p) -from util.formatting.convert_order_book import process_orderbook, is_wide_book -from util.formatting.convert_order_stream import dir_path -import pandas as pd -import os import argparse +import os + +import pandas as pd + +from util.formatting.convert_order_book import is_wide_book, process_orderbook +from util.formatting.convert_order_stream import dir_path def save_mid_price(orderbook_file_path, output_dir): - """ Save order book mid price, computed from ABIDES orderbook log. """ + """Save order book mid price, computed from ABIDES orderbook log.""" orderbook_df = pd.read_pickle(orderbook_file_path) processed_df = process_orderbook(orderbook_df, 1) # Compute mid price and associate to timestamp - mid_price = (processed_df['ask_price_1'] + processed_df['bid_price_1']) / 2 + mid_price = (processed_df["ask_price_1"] + processed_df["bid_price_1"]) / 2 if not is_wide_book(orderbook_df): mid_price.index = orderbook_df.index.levels[0] else: @@ -32,10 +34,17 @@ def save_mid_price(orderbook_file_path, output_dir): if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Processes ABIDES orderbook data into pickled pd.DataFrame containing ' - 'mid price at timestep.') - parser.add_argument('-o', '--output-dir', default='.', help='Path to output directory', type=dir_path) - parser.add_argument('book', type=str, help='ABIDES order book output in bz2 format. ') + parser = argparse.ArgumentParser( + description="Processes ABIDES orderbook data into pickled pd.DataFrame containing " "mid price at timestep." + ) + parser.add_argument( + "-o", + "--output-dir", + default=".", + help="Path to output directory", + type=dir_path, + ) + parser.add_argument("book", type=str, help="ABIDES order book output in bz2 format. ") args, remaining_args = parser.parse_known_args() save_mid_price(args.book, args.output_dir) diff --git a/util/formatting/prepare_abides_data_for_plotting.py b/util/formatting/prepare_abides_data_for_plotting.py index 3898773b8..96fc29c67 100644 --- a/util/formatting/prepare_abides_data_for_plotting.py +++ b/util/formatting/prepare_abides_data_for_plotting.py @@ -1,29 +1,39 @@ -from util.formatting.convert_order_stream import convert_stream_to_format -import os import argparse -from dateutil.parser import parse -from util.formatting.convert_order_stream import dir_path +import os + import pandas as pd +from dateutil.parser import parse + +from util.formatting.convert_order_stream import convert_stream_to_format, dir_path def process_abides_order_stream(stream_bz2, symbol, out_dir, date): - """ Writes ABIDES stream data into pandas DataFrame required by plotting programs. """ - stream_df = pd.read_pickle(stream_bz2, compression='bz2').reset_index() + """Writes ABIDES stream data into pandas DataFrame required by plotting programs.""" + stream_df = pd.read_pickle(stream_bz2, compression="bz2").reset_index() write_df = convert_stream_to_format(stream_df, fmt="plot-scripts") - write_df = write_df.set_index('TIMESTAMP') - date_str = date.strftime('%Y%m%d') + write_df = write_df.set_index("TIMESTAMP") + date_str = date.strftime("%Y%m%d") file_name = f"{out_dir}/orders_{symbol}_{date_str}.pkl" write_df.to_pickle(file_name) if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Process ABIDES stream data into plotting format and pickles it.') - parser.add_argument('stream', type=str, help='ABIDES order stream in bz2 format. ' - 'Typical example is `ExchangeAgent.bz2`') - parser.add_argument('-o', '--output-dir', default='.', help='Path to output directory', type=dir_path) - parser.add_argument('ticker', type=str, help="Ticker label to give the ABIDES data.") - parser.add_argument('date', type=parse, help="Date of the trading day in format YYYYMMDD.") + parser = argparse.ArgumentParser(description="Process ABIDES stream data into plotting format and pickles it.") + parser.add_argument( + "stream", + type=str, + help="ABIDES order stream in bz2 format. " "Typical example is `ExchangeAgent.bz2`", + ) + parser.add_argument( + "-o", + "--output-dir", + default=".", + help="Path to output directory", + type=dir_path, + ) + parser.add_argument("ticker", type=str, help="Ticker label to give the ABIDES data.") + parser.add_argument("date", type=parse, help="Date of the trading day in format YYYYMMDD.") args, remaining_args = parser.parse_known_args() process_abides_order_stream(args.stream, args.ticker, args.output_dir, args.date) diff --git a/util/formatting/prepare_dow_data_for_plotting.py b/util/formatting/prepare_dow_data_for_plotting.py index 56216938e..d57e3c0bb 100644 --- a/util/formatting/prepare_dow_data_for_plotting.py +++ b/util/formatting/prepare_dow_data_for_plotting.py @@ -1,47 +1,49 @@ -import pandas as pd -from datetime import datetime -from util.formatting.convert_order_stream import dir_path import argparse -from dateutil.parser import parse -from datetime import timedelta import os +from datetime import datetime, timedelta + +import pandas as pd +from dateutil.parser import parse + +from util.formatting.convert_order_stream import dir_path class Oracle: - """ Class that creates a pandas DataFrame ready for processing by plotting framework. + """Class that creates a pandas DataFrame ready for processing by plotting framework. - Thanks @Mahmoud for the code! + Thanks @Mahmoud for the code! """ - COLUMNS = ['TIMESTAMP', 'ORDER_ID', 'PRICE', 'SIZE', 'BUY_SELL_FLAG'] - DIRECTION = {0: 'BUY', 1: 'SELL'} + + COLUMNS = ["TIMESTAMP", "ORDER_ID", "PRICE", "SIZE", "BUY_SELL_FLAG"] + DIRECTION = {0: "BUY", 1: "SELL"} def __init__(self, symbol, date, start_time, end_time, orders_file_path): - self.symbol = symbol - self.date = date - self.start_time = start_time - self.end_time = end_time - self.orders_file_path = orders_file_path - self.orders_df = self.processOrders() + self.symbol = symbol + self.date = date + self.start_time = start_time + self.end_time = end_time + self.orders_file_path = orders_file_path + self.orders_df = self.processOrders() def processOrders(self): def convertDate(date_str): try: - return datetime.strptime(date_str, '%Y%m%d%H%M%S.%f') + return datetime.strptime(date_str, "%Y%m%d%H%M%S.%f") except ValueError: return convertDate(date_str[:-1]) orders_df = pd.read_csv(self.orders_file_path).iloc[1:] - all_columns = orders_df.columns[0].split('|') - orders_df = orders_df[orders_df.columns[0]].str.split('|', 16, expand=True) + all_columns = orders_df.columns[0].split("|") + orders_df = orders_df[orders_df.columns[0]].str.split("|", 16, expand=True) orders_df.columns = all_columns orders_df = orders_df[Oracle.COLUMNS] - orders_df['BUY_SELL_FLAG'] = orders_df['BUY_SELL_FLAG'].astype(int).replace(Oracle.DIRECTION) - orders_df['TIMESTAMP'] = orders_df['TIMESTAMP'].astype(str).apply(convertDate) - orders_df['SIZE'] = orders_df['SIZE'].astype(int) - orders_df['PRICE'] = orders_df['PRICE'].astype(float) + orders_df["BUY_SELL_FLAG"] = orders_df["BUY_SELL_FLAG"].astype(int).replace(Oracle.DIRECTION) + orders_df["TIMESTAMP"] = orders_df["TIMESTAMP"].astype(str).apply(convertDate) + orders_df["SIZE"] = orders_df["SIZE"].astype(int) + orders_df["PRICE"] = orders_df["PRICE"].astype(float) orders_df = orders_df.loc[(orders_df.TIMESTAMP >= self.start_time) & (orders_df.TIMESTAMP < self.end_time)] - orders_df.set_index('TIMESTAMP', inplace=True) + orders_df.set_index("TIMESTAMP", inplace=True) return orders_df @@ -52,34 +54,58 @@ def check_dates_valid(start_date, end_date): def get_date_range(start_date, end_date): - """ https://stackoverflow.com/a/7274316 """ + """https://stackoverflow.com/a/7274316""" delta = end_date - start_date - date_range = [(start_date + timedelta(days=i)).strftime('%Y%m%d') for i in range(delta.days + 1)] + date_range = [(start_date + timedelta(days=i)).strftime("%Y%m%d") for i in range(delta.days + 1)] return date_range def dow_data_to_pickle(dow_data_dir, symbol, start_date, end_date, out_dir): - """ Saves files of the form orders_{symbol}_{date}.pkl """ + """Saves files of the form orders_{symbol}_{date}.pkl""" date_range = get_date_range(start_date, end_date) for date in date_range: print(f"Processing file for symbol {symbol} on date: {date}") - mkt_open = pd.to_datetime(date) + pd.to_timedelta('09:30:00') - mkt_close = pd.to_datetime(date) + pd.to_timedelta('16:00:00') - oracle = Oracle(symbol=symbol, date=date, start_time=mkt_open, end_time=mkt_close, - orders_file_path=f'{dow_data_dir}/{symbol}/{symbol}.{date}') - oracle.orders_df.to_pickle(f'{out_dir}/orders_{symbol}_{date}.pkl') + mkt_open = pd.to_datetime(date) + pd.to_timedelta("09:30:00") + mkt_close = pd.to_datetime(date) + pd.to_timedelta("16:00:00") + oracle = Oracle( + symbol=symbol, + date=date, + start_time=mkt_open, + end_time=mkt_close, + orders_file_path=f"{dow_data_dir}/{symbol}/{symbol}.{date}", + ) + oracle.orders_df.to_pickle(f"{out_dir}/orders_{symbol}_{date}.pkl") if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Processes DOW30 data into pickled pd.DataFrame files required as ' - 'input for plotting.') - parser.add_argument('dow_data_dir', type=dir_path, help='Path to directory containing all of the DOW30 data in EFS.') - parser.add_argument('-o', '--output-dir', default='.', help='Path to output directory', type=dir_path) - parser.add_argument('ticker', type=str, help="Ticker label") - parser.add_argument('start_date', type=parse, help='First date of DOW30 data to use for a given symbol in format YYYYMMDD.') - parser.add_argument('end_date', type=parse, help='Final date of DOW30 data to use for a given symbol in format YYYYMMDD.') + parser = argparse.ArgumentParser( + description="Processes DOW30 data into pickled pd.DataFrame files required as " "input for plotting." + ) + parser.add_argument( + "dow_data_dir", + type=dir_path, + help="Path to directory containing all of the DOW30 data in EFS.", + ) + parser.add_argument( + "-o", + "--output-dir", + default=".", + help="Path to output directory", + type=dir_path, + ) + parser.add_argument("ticker", type=str, help="Ticker label") + parser.add_argument( + "start_date", + type=parse, + help="First date of DOW30 data to use for a given symbol in format YYYYMMDD.", + ) + parser.add_argument( + "end_date", + type=parse, + help="Final date of DOW30 data to use for a given symbol in format YYYYMMDD.", + ) args, remaining_args = parser.parse_known_args() diff --git a/util/grid_search.py b/util/grid_search.py index 2db3d3712..03d29c5ab 100755 --- a/util/grid_search.py +++ b/util/grid_search.py @@ -1,16 +1,24 @@ import argparse import itertools + from util import numeric def parse_cli(): - parser = argparse.ArgumentParser(description='Prints the Cartesian product of a group of lists.') - parser.add_argument('-l', '--list', nargs='+', action='append', - help='Start of list', required=True, type=numeric) + parser = argparse.ArgumentParser(description="Prints the Cartesian product of a group of lists.") + parser.add_argument( + "-l", + "--list", + nargs="+", + action="append", + help="Start of list", + required=True, + type=numeric, + ) args = parser.parse_args() prod = itertools.product(*args.list) for items in prod: - print(','.join(str(s) for s in items)) + print(",".join(str(s) for s in items)) if __name__ == "__main__": diff --git a/util/make_grid.py b/util/make_grid.py index ba1d72803..dffa35996 100755 --- a/util/make_grid.py +++ b/util/make_grid.py @@ -1,8 +1,10 @@ import argparse -import numpy as np import sys -SCALE_OPTIONS = ['log', 'linear'] +import numpy as np + +SCALE_OPTIONS = ["log", "linear"] + def check_both_int(a, b): if a.is_integer() and b.is_integer(): @@ -10,15 +12,21 @@ def check_both_int(a, b): else: return False + def process_args(args): - """ Prints grid to standard error. """ + """Prints grid to standard error.""" + + min_val, max_val, num_points, scale = ( + args.min, + args.max, + args.num_points, + args.scale, + ) - min_val, max_val, num_points, scale = args.min, args.max, args.num_points, args.scale - - if scale == 'linear': + if scale == "linear": dtype = int if check_both_int(min_val, max_val) else float grid = np.linspace(min_val, max_val, num=num_points, dtype=dtype) - elif scale == 'log': + elif scale == "log": grid = np.logspace(min_val, max_val, num=num_points) else: raise ValueError(f"Option not in {SCALE_OPTIONS}") @@ -26,20 +34,22 @@ def process_args(args): grid = np.unique(grid) for elem in grid: - sys.stdout.write(str(elem)+'\n') + sys.stdout.write(str(elem) + "\n") def parse_cli(): - parser = argparse.ArgumentParser(description='Generates a 1D grid of points from min to max') - parser.add_argument('--min', type=float, required=True, - help='Minimum value.') - parser.add_argument('--max', type=float, required=True, - help='Maximum value.') - parser.add_argument('--num-points', type=int, required=True, - help='Number of grid points.') - parser.add_argument('--scale', type=str, default='linear', choices=SCALE_OPTIONS, - help='Scaling of grid points. Note if "log" then min and max are interpreted ' - 'as 10 ** min and 10 ** max respectively.') + parser = argparse.ArgumentParser(description="Generates a 1D grid of points from min to max") + parser.add_argument("--min", type=float, required=True, help="Minimum value.") + parser.add_argument("--max", type=float, required=True, help="Maximum value.") + parser.add_argument("--num-points", type=int, required=True, help="Number of grid points.") + parser.add_argument( + "--scale", + type=str, + default="linear", + choices=SCALE_OPTIONS, + help='Scaling of grid points. Note if "log" then min and max are interpreted ' + "as 10 ** min and 10 ** max respectively.", + ) args = parser.parse_args() return args @@ -47,4 +57,4 @@ def parse_cli(): if __name__ == "__main__": args = parse_cli() - process_args(args) \ No newline at end of file + process_args(args) diff --git a/util/model/QTable.py b/util/model/QTable.py index 445ecb85b..5abc96d5b 100644 --- a/util/model/QTable.py +++ b/util/model/QTable.py @@ -5,21 +5,30 @@ # An agent's random_state attribute is required to ensure properly repeatable # experiments. -class QTable(): - def __init__ (self, dims = (100, 2), alpha = 0.99, alpha_decay = 0.999, - alpha_min = 0.3, epsilon = 0.99, epsilon_decay = 0.999, epsilon_min = 0.1, - gamma = 0.98, random_state = None) : +class QTable: - self.q = random_state.normal(loc = 0, scale = 1, size = dims) + def __init__( + self, + dims=(100, 2), + alpha=0.99, + alpha_decay=0.999, + alpha_min=0.3, + epsilon=0.99, + epsilon_decay=0.999, + epsilon_min=0.1, + gamma=0.98, + random_state=None, + ): - self.alpha = alpha - self.alpha_decay = alpha_decay - self.alpha_min = alpha_min + self.q = random_state.normal(loc=0, scale=1, size=dims) - self.epsilon = epsilon - self.epsilon_decay = epsilon_decay - self.epsilon_min = epsilon_min + self.alpha = alpha + self.alpha_decay = alpha_decay + self.alpha_min = alpha_min - self.gamma = gamma + self.epsilon = epsilon + self.epsilon_decay = epsilon_decay + self.epsilon_min = epsilon_min + self.gamma = gamma diff --git a/util/oracle/DataOracle.py b/util/oracle/DataOracle.py index 810a43996..ee6ff78f0 100644 --- a/util/oracle/DataOracle.py +++ b/util/oracle/DataOracle.py @@ -8,129 +8,134 @@ ### experiment we are running with more active agent types. import datetime as dt +import os +import sys +from math import sqrt + import numpy as np import pandas as pd -import os, sys +from joblib import Memory -from math import sqrt -from util.util import print, log_print +from util.util import log_print, print -from joblib import Memory -mem = Memory(cachedir='./cache', verbose=0) +mem = Memory(cachedir="./cache", verbose=0) -#@mem.cache +# @mem.cache def read_trades(trade_file, symbols): - log_print ("Data not cached. This will take a minute...") + log_print("Data not cached. This will take a minute...") - df = pd.read_pickle(trade_file, compression='bz2') + df = pd.read_pickle(trade_file, compression="bz2") - # Filter to requested symbols. - df = df.loc[symbols] + # Filter to requested symbols. + df = df.loc[symbols] - # Filter duplicate indices (trades on two exchanges at the PRECISE same time). Rare. - df = df[~df.index.duplicated(keep='first')] + # Filter duplicate indices (trades on two exchanges at the PRECISE same time). Rare. + df = df[~df.index.duplicated(keep="first")] - # Ensure resulting index is sorted for best performance later on. - df = df.sort_index() + # Ensure resulting index is sorted for best performance later on. + df = df.sort_index() - return (df) + return df class DataOracle: - def __init__(self, historical_date = None, symbols = None, data_dir = None): - self.historical_date = historical_date - self.symbols = symbols - - self.mkt_open = None - - # Read historical trades here... - h = historical_date - pre = 'ct' if h.year < 2015 else 'ctm' - trade_file = os.path.join(data_dir, 'trades', 'trades_{}'.format(h.year), - '{}_{}{:02d}{:02d}.bgz'.format(pre, h.year, h.month, h.day)) - - bars_1m_file = os.path.join(data_dir, '1m_ohlc', '1m_ohlc_{}'.format(h.year), - '{}{:02d}{:02d}_ohlc_1m.bgz'.format(h.year, h.month, h.day)) - - log_print ("DataOracle initializing trades from file {}", trade_file) - log_print ("DataOracle initializing 1m bars from file {}", bars_1m_file) - - then = dt.datetime.now() - self.df_trades = read_trades(trade_file, symbols) - self.df_bars_1m = read_trades(bars_1m_file, symbols) - now = dt.datetime.now() - - log_print ("DataOracle initialized for {} with symbols {}", historical_date, symbols) - log_print ("DataOracle initialization took {}", now - then) - - - - # Return the daily open price for the symbol given. The processing to create the 1m OHLC - # files does propagate the earliest trade backwards, which helps. The exchange should - # pass its opening time. - def getDailyOpenPrice (self, symbol, mkt_open, cents=True): - # Remember market open time. - self.mkt_open = mkt_open - - log_print ("Oracle: client requested {} at market open: {}", symbol, mkt_open) - - # Find the opening historical price in the 1m OHLC bars for this symbol. - open = self.df_bars_1m.loc[(symbol,mkt_open.time()),'open'] - log_print ("Oracle: market open price was was {}", open) - - return int(round(open * 100)) if cents else open - - - # Return the latest trade price for the symbol at or prior to the given currentTime, - # which must be of type pd.Timestamp. - def getLatestTrade (self, symbol, currentTime): - - log_print ("Oracle: client requested {} as of {}", symbol, currentTime) - - # See when the last historical trade was, prior to simulated currentTime. - dt_last_trade = self.df_trades.loc[symbol].index.asof(currentTime) - if pd.notnull(dt_last_trade): - last_trade = self.df_trades.loc[(symbol,dt_last_trade)] - - price = last_trade['PRICE'] - time = dt_last_trade - - # If we know the market open time, and the last historical trade was before it, use - # the market open price instead. If there were no trades before the requested time, - # also use the market open price. - if pd.isnull(dt_last_trade) or (self.mkt_open and time < self.mkt_open): - price = self.getDailyOpenPrice(symbol, self.mkt_open, cents=False) - time = self.mkt_open - - log_print ("Oracle: latest historical trade was {} at {}", price, time) - - return price - - - # Return a noisy observed historical price for agents which have that ability. - # currentTime must be of type pd.Timestamp. Only the Exchange or other privileged - # agents should use noisy=False. - # - # NOTE: sigma_n is the observation variance, NOT STANDARD DEVIATION. - # - # Each agent must pass its own np.random.RandomState object to the oracle. - # This helps to preserve the consistency of multiple simulations with experimental - # changes (if the oracle used a global Random object, simply adding one new agent - # would change everyone's "noise" on all subsequent observations). - def observePrice(self, symbol, currentTime, sigma_n = 0.0001, random_state = None): - last_trade_price = self.getLatestTrade(symbol, currentTime) - - # Noisy belief is a normal distribution around 1% the last trade price with variance - # as requested by the agent. - if sigma_n == 0: - belief = float(last_trade_price) - else: - belief = random_state.normal(loc=last_trade_price, scale=last_trade_price * sqrt(sigma_n)) - - log_print ("Oracle: giving client value observation {:0.2f}", belief) - - # All simulator prices are specified in integer cents. - return int(round(belief * 100)) - + def __init__(self, historical_date=None, symbols=None, data_dir=None): + self.historical_date = historical_date + self.symbols = symbols + + self.mkt_open = None + + # Read historical trades here... + h = historical_date + pre = "ct" if h.year < 2015 else "ctm" + trade_file = os.path.join( + data_dir, + "trades", + "trades_{}".format(h.year), + "{}_{}{:02d}{:02d}.bgz".format(pre, h.year, h.month, h.day), + ) + + bars_1m_file = os.path.join( + data_dir, + "1m_ohlc", + "1m_ohlc_{}".format(h.year), + "{}{:02d}{:02d}_ohlc_1m.bgz".format(h.year, h.month, h.day), + ) + + log_print("DataOracle initializing trades from file {}", trade_file) + log_print("DataOracle initializing 1m bars from file {}", bars_1m_file) + + then = dt.datetime.now() + self.df_trades = read_trades(trade_file, symbols) + self.df_bars_1m = read_trades(bars_1m_file, symbols) + now = dt.datetime.now() + + log_print("DataOracle initialized for {} with symbols {}", historical_date, symbols) + log_print("DataOracle initialization took {}", now - then) + + # Return the daily open price for the symbol given. The processing to create the 1m OHLC + # files does propagate the earliest trade backwards, which helps. The exchange should + # pass its opening time. + def getDailyOpenPrice(self, symbol, mkt_open, cents=True): + # Remember market open time. + self.mkt_open = mkt_open + + log_print("Oracle: client requested {} at market open: {}", symbol, mkt_open) + + # Find the opening historical price in the 1m OHLC bars for this symbol. + open = self.df_bars_1m.loc[(symbol, mkt_open.time()), "open"] + log_print("Oracle: market open price was was {}", open) + + return int(round(open * 100)) if cents else open + + # Return the latest trade price for the symbol at or prior to the given currentTime, + # which must be of type pd.Timestamp. + def getLatestTrade(self, symbol, currentTime): + + log_print("Oracle: client requested {} as of {}", symbol, currentTime) + + # See when the last historical trade was, prior to simulated currentTime. + dt_last_trade = self.df_trades.loc[symbol].index.asof(currentTime) + if pd.notnull(dt_last_trade): + last_trade = self.df_trades.loc[(symbol, dt_last_trade)] + + price = last_trade["PRICE"] + time = dt_last_trade + + # If we know the market open time, and the last historical trade was before it, use + # the market open price instead. If there were no trades before the requested time, + # also use the market open price. + if pd.isnull(dt_last_trade) or (self.mkt_open and time < self.mkt_open): + price = self.getDailyOpenPrice(symbol, self.mkt_open, cents=False) + time = self.mkt_open + + log_print("Oracle: latest historical trade was {} at {}", price, time) + + return price + + # Return a noisy observed historical price for agents which have that ability. + # currentTime must be of type pd.Timestamp. Only the Exchange or other privileged + # agents should use noisy=False. + # + # NOTE: sigma_n is the observation variance, NOT STANDARD DEVIATION. + # + # Each agent must pass its own np.random.RandomState object to the oracle. + # This helps to preserve the consistency of multiple simulations with experimental + # changes (if the oracle used a global Random object, simply adding one new agent + # would change everyone's "noise" on all subsequent observations). + def observePrice(self, symbol, currentTime, sigma_n=0.0001, random_state=None): + last_trade_price = self.getLatestTrade(symbol, currentTime) + + # Noisy belief is a normal distribution around 1% the last trade price with variance + # as requested by the agent. + if sigma_n == 0: + belief = float(last_trade_price) + else: + belief = random_state.normal(loc=last_trade_price, scale=last_trade_price * sqrt(sigma_n)) + + log_print("Oracle: giving client value observation {:0.2f}", belief) + + # All simulator prices are specified in integer cents. + return int(round(belief * 100)) diff --git a/util/oracle/ExternalFileOracle.py b/util/oracle/ExternalFileOracle.py index 80b074868..c8d63a261 100644 --- a/util/oracle/ExternalFileOracle.py +++ b/util/oracle/ExternalFileOracle.py @@ -1,14 +1,17 @@ -import pandas as pd -from util.util import log_print from bisect import bisect_left from math import sqrt +import pandas as pd + +from util.util import log_print + class ExternalFileOracle: - """ Oracle using an external price series as the fundamental. The external series are specified files in the ABIDES - config. If an agent requests the fundamental value in between two timestamps the returned fundamental value is - linearly interpolated. + """Oracle using an external price series as the fundamental. The external series are specified files in the ABIDES + config. If an agent requests the fundamental value in between two timestamps the returned fundamental value is + linearly interpolated. """ + def __init__(self, symbols): self.mkt_open = None self.symbols = symbols @@ -16,13 +19,13 @@ def __init__(self, symbols): self.f_log = {symbol: [] for symbol in symbols} def load_fundamentals(self): - """ Method extracts fundamentals for each symbol into DataFrames. Note that input files must be of the form - generated by util/formatting/mid_price_from_orderbook.py. + """Method extracts fundamentals for each symbol into DataFrames. Note that input files must be of the form + generated by util/formatting/mid_price_from_orderbook.py. """ fundamentals = dict() log_print("Oracle: loading fundamental price series...") for symbol, params_dict in self.symbols.items(): - fundamental_file_path = params_dict['fundamental_file_path'] + fundamental_file_path = params_dict["fundamental_file_path"] log_print("Oracle: loading {}", fundamental_file_path) fundamental_df = pd.read_pickle(fundamental_file_path) fundamentals.update({symbol: fundamental_df}) @@ -44,11 +47,11 @@ def getDailyOpenPrice(self, symbol, mkt_open): return int(round(open_price)) def getPriceAtTime(self, symbol, query_time): - """ Get the true price of a symbol at the requested time. - :param symbol: which symbol to query - :type symbol: str - :param time: at this time - :type time: pd.Timestamp + """Get the true price of a symbol at the requested time. + :param symbol: which symbol to query + :type symbol: str + :param time: at this time + :type time: pd.Timestamp """ log_print("Oracle: client requested {} as of {}", symbol, query_time) @@ -73,19 +76,32 @@ def getPriceAtTime(self, symbol, query_time): lower_val = fundamental_series[lower_idx] upper_val = fundamental_series[upper_idx] - log_print(f"DEBUG: lower_idx: {lower_idx}, lower_val: {lower_val}, upper_idx: {upper_idx}, upper_val: {upper_val}") - - interpolated_price = self.getInterpolatedPrice(query_time, fundamental_series.index[lower_idx], - fundamental_series.index[upper_idx], lower_val, upper_val) - log_print("Oracle: latest historical trade was {} at {}. Next historical trade is {}. " - "Interpolated price is {}", lower_val, query_time, upper_val, interpolated_price) - - self.f_log[symbol].append({'FundamentalTime': query_time, 'FundamentalValue': interpolated_price}) + log_print( + f"DEBUG: lower_idx: {lower_idx}, lower_val: {lower_val}, upper_idx: {upper_idx}, upper_val: {upper_val}" + ) + + interpolated_price = self.getInterpolatedPrice( + query_time, + fundamental_series.index[lower_idx], + fundamental_series.index[upper_idx], + lower_val, + upper_val, + ) + log_print( + "Oracle: latest historical trade was {} at {}. Next historical trade is {}. " + "Interpolated price is {}", + lower_val, + query_time, + upper_val, + interpolated_price, + ) + + self.f_log[symbol].append({"FundamentalTime": query_time, "FundamentalValue": interpolated_price}) return interpolated_price def observePrice(self, symbol, currentTime, sigma_n=0.0001, random_state=None): - """ Make observation of price at a given time. + """Make observation of price at a given time. :param symbol: symbol for which to observe price :type symbol: str :param currentTime: time of observation @@ -105,21 +121,23 @@ def observePrice(self, symbol, currentTime, sigma_n=0.0001, random_state=None): return int(round(observed)) def getInterpolatedPrice(self, current_time, time_low, time_high, price_low, price_high): - """ Get the price at current_time, linearly interpolated between price_low and price_high measured at times - time_low and time_high - :param current_time: time for which price is to be interpolated - :type current_time: pd.Timestamp - :param time_low: time of first fundamental value - :type time_low: pd.Timestamp - :param time_high: time of first fundamental value - :type time_high: pd.Timestamp - :param price_low: first fundamental value - :type price_low: float - :param price_high: first fundamental value - :type price_high: float - :return float of interpolated price: + """Get the price at current_time, linearly interpolated between price_low and price_high measured at times + time_low and time_high + :param current_time: time for which price is to be interpolated + :type current_time: pd.Timestamp + :param time_low: time of first fundamental value + :type time_low: pd.Timestamp + :param time_high: time of first fundamental value + :type time_high: pd.Timestamp + :param price_low: first fundamental value + :type price_low: float + :param price_high: first fundamental value + :type price_high: float + :return float of interpolated price: """ - log_print(f'DEBUG: current_time: {current_time} time_low {time_low} time_high: {time_high} price_low: {price_low} price_high: {price_high}' ) + log_print( + f"DEBUG: current_time: {current_time} time_low {time_low} time_high: {time_high} price_low: {price_low} price_high: {price_high}" + ) delta_y = price_high - price_low delta_x = (time_high - time_low).total_seconds() diff --git a/util/oracle/MeanRevertingOracle.py b/util/oracle/MeanRevertingOracle.py index 554a54d88..998db808c 100644 --- a/util/oracle/MeanRevertingOracle.py +++ b/util/oracle/MeanRevertingOracle.py @@ -14,116 +14,117 @@ ### as seconds or minutes. import datetime as dt +import os +import random +import sys +from math import sqrt + import numpy as np import pandas as pd -import os, random, sys -from math import sqrt from util.util import log_print class MeanRevertingOracle: - def __init__(self, mkt_open, mkt_close, symbols): - # Symbols must be a dictionary of dictionaries with outer keys as symbol names and - # inner keys: r_bar, kappa, sigma_s. - self.mkt_open = mkt_open - self.mkt_close = mkt_close - self.symbols = symbols - - # The dictionary r holds the fundamenal value series for each symbol. - self.r = {} - - then = dt.datetime.now() - - for symbol in symbols: - s = symbols[symbol] - log_print ("MeanRevertingOracle computing fundamental value series for {}", symbol) - self.r[symbol] = self.generate_fundamental_value_series(symbol=symbol, **s) - - now = dt.datetime.now() - - log_print ("MeanRevertingOracle initialized for symbols {}", symbols) - log_print ("MeanRevertingOracle initialization took {}", now - then) - - def generate_fundamental_value_series(self, symbol, r_bar, kappa, sigma_s): - # Generates the fundamental value series for a single stock symbol. r_bar is the - # mean fundamental value, kappa is the mean reversion coefficient, and sigma_s - # is the shock variance. (Note: NOT STANDARD DEVIATION.) - - # Because the oracle uses the global np.random PRNG to create the fundamental value - # series, it is important to create the oracle BEFORE the agents. In this way the - # addition of a new agent will not affect the sequence created. (Observations using - # the oracle will use an agent's PRNG and thus not cause a problem.) - - # Turn variance into std. - sigma_s = sqrt(sigma_s) - - # Create the time series into which values will be projected and initialize the first value. - date_range = pd.date_range(self.mkt_open, self.mkt_close, closed='left', freq='N') - - s = pd.Series(index=date_range) - r = np.zeros(len(s.index)) - r[0] = r_bar - - # Predetermine the random shocks for all time steps (at once, for computation speed). - shock = np.random.normal(scale=sigma_s, size=(r.shape[0])) - - # Compute the mean reverting fundamental value series. - for t in range(1, r.shape[0]): - r[t] = max(0, (kappa * r_bar) + ( (1 - kappa) * r[t-1] ) + shock[t]) - - # Replace the series values with the fundamental value series. Round and convert to - # integer cents. - s[:] = np.round(r) - s = s.astype(int) - - return (s) - - - # Return the daily open price for the symbol given. In the case of the MeanRevertingOracle, - # this will simply be the first fundamental value, which is also the fundamental mean. - # We will use the mkt_open time as given, however, even if it disagrees with this. - def getDailyOpenPrice (self, symbol, mkt_open=None): - - # If we did not already know mkt_open, we should remember it. - if (mkt_open is not None) and (self.mkt_open is None): - self.mkt_open = mkt_open - - log_print ("Oracle: client requested {} at market open: {}", symbol, self.mkt_open) - - open = self.r[symbol].loc[self.mkt_open] - log_print ("Oracle: market open price was was {}", open) - - return open - - - # Return a noisy observation of the current fundamental value. While the fundamental - # value for a given equity at a given time step does not change, multiple agents - # observing that value will receive different observations. - # - # Only the Exchange or other privileged agents should use noisy=False. - # - # sigma_n is experimental observation variance. NOTE: NOT STANDARD DEVIATION. - # - # Each agent must pass its RandomState object to observePrice. This ensures that - # each agent will receive the same answers across multiple same-seed simulations - # even if a new agent has been added to the experiment. - def observePrice(self, symbol, currentTime, sigma_n = 1000, random_state = None): - # If the request is made after market close, return the close price. - if currentTime >= self.mkt_close: - r_t = self.r[symbol].loc[self.mkt_close - pd.Timedelta('1ns')] - else: - r_t = self.r[symbol].loc[currentTime] - - # Generate a noisy observation of fundamental value at the current time. - if sigma_n == 0: - obs = r_t - else: - obs = int(round(random_state.normal(loc=r_t, scale=sqrt(sigma_n)))) - - log_print ("Oracle: current fundamental value is {} at {}", r_t, currentTime) - log_print ("Oracle: giving client value observation {}", obs) - - # Reminder: all simulator prices are specified in integer cents. - return obs \ No newline at end of file + def __init__(self, mkt_open, mkt_close, symbols): + # Symbols must be a dictionary of dictionaries with outer keys as symbol names and + # inner keys: r_bar, kappa, sigma_s. + self.mkt_open = mkt_open + self.mkt_close = mkt_close + self.symbols = symbols + + # The dictionary r holds the fundamenal value series for each symbol. + self.r = {} + + then = dt.datetime.now() + + for symbol in symbols: + s = symbols[symbol] + log_print("MeanRevertingOracle computing fundamental value series for {}", symbol) + self.r[symbol] = self.generate_fundamental_value_series(symbol=symbol, **s) + + now = dt.datetime.now() + + log_print("MeanRevertingOracle initialized for symbols {}", symbols) + log_print("MeanRevertingOracle initialization took {}", now - then) + + def generate_fundamental_value_series(self, symbol, r_bar, kappa, sigma_s): + # Generates the fundamental value series for a single stock symbol. r_bar is the + # mean fundamental value, kappa is the mean reversion coefficient, and sigma_s + # is the shock variance. (Note: NOT STANDARD DEVIATION.) + + # Because the oracle uses the global np.random PRNG to create the fundamental value + # series, it is important to create the oracle BEFORE the agents. In this way the + # addition of a new agent will not affect the sequence created. (Observations using + # the oracle will use an agent's PRNG and thus not cause a problem.) + + # Turn variance into std. + sigma_s = sqrt(sigma_s) + + # Create the time series into which values will be projected and initialize the first value. + date_range = pd.date_range(self.mkt_open, self.mkt_close, closed="left", freq="N") + + s = pd.Series(index=date_range) + r = np.zeros(len(s.index)) + r[0] = r_bar + + # Predetermine the random shocks for all time steps (at once, for computation speed). + shock = np.random.normal(scale=sigma_s, size=(r.shape[0])) + + # Compute the mean reverting fundamental value series. + for t in range(1, r.shape[0]): + r[t] = max(0, (kappa * r_bar) + ((1 - kappa) * r[t - 1]) + shock[t]) + + # Replace the series values with the fundamental value series. Round and convert to + # integer cents. + s[:] = np.round(r) + s = s.astype(int) + + return s + + # Return the daily open price for the symbol given. In the case of the MeanRevertingOracle, + # this will simply be the first fundamental value, which is also the fundamental mean. + # We will use the mkt_open time as given, however, even if it disagrees with this. + def getDailyOpenPrice(self, symbol, mkt_open=None): + + # If we did not already know mkt_open, we should remember it. + if (mkt_open is not None) and (self.mkt_open is None): + self.mkt_open = mkt_open + + log_print("Oracle: client requested {} at market open: {}", symbol, self.mkt_open) + + open = self.r[symbol].loc[self.mkt_open] + log_print("Oracle: market open price was was {}", open) + + return open + + # Return a noisy observation of the current fundamental value. While the fundamental + # value for a given equity at a given time step does not change, multiple agents + # observing that value will receive different observations. + # + # Only the Exchange or other privileged agents should use noisy=False. + # + # sigma_n is experimental observation variance. NOTE: NOT STANDARD DEVIATION. + # + # Each agent must pass its RandomState object to observePrice. This ensures that + # each agent will receive the same answers across multiple same-seed simulations + # even if a new agent has been added to the experiment. + def observePrice(self, symbol, currentTime, sigma_n=1000, random_state=None): + # If the request is made after market close, return the close price. + if currentTime >= self.mkt_close: + r_t = self.r[symbol].loc[self.mkt_close - pd.Timedelta("1ns")] + else: + r_t = self.r[symbol].loc[currentTime] + + # Generate a noisy observation of fundamental value at the current time. + if sigma_n == 0: + obs = r_t + else: + obs = int(round(random_state.normal(loc=r_t, scale=sqrt(sigma_n)))) + + log_print("Oracle: current fundamental value is {} at {}", r_t, currentTime) + log_print("Oracle: giving client value observation {}", obs) + + # Reminder: all simulator prices are specified in integer cents. + return obs diff --git a/util/oracle/SparseMeanRevertingOracle.py b/util/oracle/SparseMeanRevertingOracle.py index e81d4c60a..7a151c021 100644 --- a/util/oracle/SparseMeanRevertingOracle.py +++ b/util/oracle/SparseMeanRevertingOracle.py @@ -19,211 +19,213 @@ ### agents each acting at realistic "retail" intervals, on the order of seconds ### or minutes, spread out across the day. -from util.oracle.MeanRevertingOracle import MeanRevertingOracle - import datetime as dt +import os +import random +import sys +from math import exp, sqrt + import numpy as np import pandas as pd -import os, random, sys -from math import exp, sqrt +from util.oracle.MeanRevertingOracle import MeanRevertingOracle from util.util import log_print class SparseMeanRevertingOracle(MeanRevertingOracle): - def __init__(self, mkt_open, mkt_close, symbols): - # Symbols must be a dictionary of dictionaries with outer keys as symbol names and - # inner keys: r_bar, kappa, sigma_s. - self.mkt_open = mkt_open - self.mkt_close = mkt_close - self.symbols = symbols - self.f_log = {} + def __init__(self, mkt_open, mkt_close, symbols): + # Symbols must be a dictionary of dictionaries with outer keys as symbol names and + # inner keys: r_bar, kappa, sigma_s. + self.mkt_open = mkt_open + self.mkt_close = mkt_close + self.symbols = symbols + self.f_log = {} + + # The dictionary r holds the most recent fundamental values for each symbol. + self.r = {} + + # The dictionary megashocks holds the time series of megashocks for each symbol. + # The last one will always be in the future (relative to the current simulation time). + # + # Without these, the OU process just makes a noisy return to the mean and then stays there + # with relatively minor noise. Here we want them to follow a Poisson process, so we sample + # from an exponential distribution for the separation intervals. + self.megashocks = {} + + then = dt.datetime.now() + + # Note that each value in the self.r dictionary is a 2-tuple of the timestamp at + # which the series was computed and the true fundamental value at that time. + for symbol in symbols: + s = symbols[symbol] + log_print( + "SparseMeanRevertingOracle computing initial fundamental value for {}", + symbol, + ) + self.r[symbol] = (mkt_open, s["r_bar"]) + self.f_log[symbol] = [{"FundamentalTime": mkt_open, "FundamentalValue": s["r_bar"]}] + + # Compute the time and value of the first megashock. Note that while the values are + # mean-zero, they are intentionally bimodal (i.e. we always want to push the stock + # some, but we will tend to cancel out via pushes in opposite directions). + ms_time_delta = np.random.exponential(scale=1.0 / s["megashock_lambda_a"]) + mst = self.mkt_open + pd.Timedelta(ms_time_delta, unit="ns") + msv = s["random_state"].normal(loc=s["megashock_mean"], scale=sqrt(s["megashock_var"])) + msv = msv if s["random_state"].randint(2) == 0 else -msv + + self.megashocks[symbol] = [{"MegashockTime": mst, "MegashockValue": msv}] - # The dictionary r holds the most recent fundamental values for each symbol. - self.r = {} + now = dt.datetime.now() - # The dictionary megashocks holds the time series of megashocks for each symbol. - # The last one will always be in the future (relative to the current simulation time). - # - # Without these, the OU process just makes a noisy return to the mean and then stays there - # with relatively minor noise. Here we want them to follow a Poisson process, so we sample - # from an exponential distribution for the separation intervals. - self.megashocks = {} + log_print("SparseMeanRevertingOracle initialized for symbols {}", symbols) + log_print("SparseMeanRevertingOracle initialization took {}", now - then) + + # This method takes a requested timestamp to which we should advance the fundamental, + # a value adjustment to apply after advancing time (must pass zero if none), + # a symbol for which to advance time, a previous timestamp, and a previous fundamental + # value. The last two parameters should relate to the most recent time this method + # was invoked. It returns the new value. As a side effect, it updates the log of + # computed fundamental values. + + def compute_fundamental_at_timestamp(self, ts, v_adj, symbol, pt, pv): + s = self.symbols[symbol] + + # This oracle uses the Ornstein-Uhlenbeck Process. It is quite close to being a + # continuous version of the discrete mean reverting process used in the regular + # (dense) MeanRevertingOracle. + + # Compute the time delta from the previous time to the requested time. + d = int((ts - pt) / np.timedelta64(1, "ns")) + + # Extract the parameters for the OU process update. + mu = s["r_bar"] + gamma = s["kappa"] + theta = s["fund_vol"] + + # The OU process is able to skip any amount of time and sample the next desired value + # from the appropriate distribution of possible values. + v = s["random_state"].normal( + loc=mu + (pv - mu) * (exp(-gamma * d)), + scale=((theta) / (2 * gamma)) * (1 - exp(-2 * gamma * d)), + ) + + # Apply the value adjustment that was passed in. + v += v_adj - then = dt.datetime.now() + # The process is not permitted to become negative. + v = max(0, v) - # Note that each value in the self.r dictionary is a 2-tuple of the timestamp at - # which the series was computed and the true fundamental value at that time. - for symbol in symbols: - s = symbols[symbol] - log_print ("SparseMeanRevertingOracle computing initial fundamental value for {}", symbol) - self.r[symbol] = (mkt_open, s['r_bar']) - self.f_log[symbol] = [{ 'FundamentalTime' : mkt_open, 'FundamentalValue' : s['r_bar'] }] + # For our purposes, the value must be rounded and converted to integer cents. + v = int(round(v)) - # Compute the time and value of the first megashock. Note that while the values are - # mean-zero, they are intentionally bimodal (i.e. we always want to push the stock - # some, but we will tend to cancel out via pushes in opposite directions). - ms_time_delta = np.random.exponential(scale=1.0 / s['megashock_lambda_a']) - mst = self.mkt_open + pd.Timedelta(ms_time_delta, unit='ns') - msv = s['random_state'].normal(loc = s['megashock_mean'], scale = sqrt(s['megashock_var'])) - msv = msv if s['random_state'].randint(2) == 0 else -msv + # Cache the new time and value as the "previous" fundamental values. + self.r[symbol] = (ts, v) - self.megashocks[symbol] = [{ 'MegashockTime' : mst, 'MegashockValue' : msv }] + # Append the change to the permanent log of fundamental values for this symbol. + self.f_log[symbol].append({"FundamentalTime": ts, "FundamentalValue": v}) + # Return the new value for the requested timestamp. + return v - now = dt.datetime.now() + # This method advances the fundamental value series for a single stock symbol, + # using the OU process. It may proceed in several steps due to our periodic + # application of "megashocks" to push the stock price around, simulating + # exogenous forces. + def advance_fundamental_value_series(self, currentTime, symbol): - log_print ("SparseMeanRevertingOracle initialized for symbols {}", symbols) - log_print ("SparseMeanRevertingOracle initialization took {}", now - then) + # Generation of the fundamental value series uses a separate random state object + # per symbol, which is part of the dictionary we maintain for each symbol. + # Agent observations using the oracle will use an agent's random state object. + s = self.symbols[symbol] + # This is the previous fundamental time and value. + pt, pv = self.r[symbol] - # This method takes a requested timestamp to which we should advance the fundamental, - # a value adjustment to apply after advancing time (must pass zero if none), - # a symbol for which to advance time, a previous timestamp, and a previous fundamental - # value. The last two parameters should relate to the most recent time this method - # was invoked. It returns the new value. As a side effect, it updates the log of - # computed fundamental values. + # If time hasn't changed since the last advance, just use the current value. + if currentTime <= pt: + return pv - def compute_fundamental_at_timestamp(self, ts, v_adj, symbol, pt, pv): - s = self.symbols[symbol] + # Otherwise, we have some work to do, advancing time and computing the fundamental. - # This oracle uses the Ornstein-Uhlenbeck Process. It is quite close to being a - # continuous version of the discrete mean reverting process used in the regular - # (dense) MeanRevertingOracle. + # We may not jump straight to the requested time, because we periodically apply + # megashocks to push the series around (not always away from the mean) and we need + # to compute OU at each of those times, so the aftereffects of the megashocks + # properly affect the remaining OU interval. - # Compute the time delta from the previous time to the requested time. - d = int((ts - pt) / np.timedelta64(1, 'ns')) + mst = self.megashocks[symbol][-1]["MegashockTime"] + msv = self.megashocks[symbol][-1]["MegashockValue"] - # Extract the parameters for the OU process update. - mu = s['r_bar'] - gamma = s['kappa'] - theta = s['fund_vol'] + while mst < currentTime: + # A megashock is scheduled to occur before the new time to which we are advancing. Handle it. - # The OU process is able to skip any amount of time and sample the next desired value - # from the appropriate distribution of possible values. - v = s['random_state'].normal(loc = mu + (pv - mu) * (exp(-gamma * d)), - scale = ((theta) / (2*gamma)) * (1 - exp(-2 * gamma * d))) + # Advance time from the previous time to the time of the megashock using the OU process and + # then applying the next megashock value. + v = self.compute_fundamental_at_timestamp(mst, msv, symbol, pt, pv) - # Apply the value adjustment that was passed in. - v += v_adj + # Update our "previous" values for the next computation. + pt, pv = mst, v - # The process is not permitted to become negative. - v = max(0, v) + # Since we just surpassed the last megashock time, compute the next one, which we might or + # might not immediately consume. This works just like the first time (in __init__()). - # For our purposes, the value must be rounded and converted to integer cents. - v = int(round(v)) + mst = pt + pd.Timedelta("{}ns".format(np.random.exponential(scale=1.0 / s["megashock_lambda_a"]))) + msv = s["random_state"].normal(loc=s["megashock_mean"], scale=sqrt(s["megashock_var"])) + msv = msv if s["random_state"].randint(2) == 0 else -msv - # Cache the new time and value as the "previous" fundamental values. - self.r[symbol] = (ts, v) - - # Append the change to the permanent log of fundamental values for this symbol. - self.f_log[symbol].append({ 'FundamentalTime' : ts, 'FundamentalValue' : v }) - - # Return the new value for the requested timestamp. - return v - - - # This method advances the fundamental value series for a single stock symbol, - # using the OU process. It may proceed in several steps due to our periodic - # application of "megashocks" to push the stock price around, simulating - # exogenous forces. - def advance_fundamental_value_series(self, currentTime, symbol): - - # Generation of the fundamental value series uses a separate random state object - # per symbol, which is part of the dictionary we maintain for each symbol. - # Agent observations using the oracle will use an agent's random state object. - s = self.symbols[symbol] - - # This is the previous fundamental time and value. - pt, pv = self.r[symbol] - - # If time hasn't changed since the last advance, just use the current value. - if currentTime <= pt: return pv - - # Otherwise, we have some work to do, advancing time and computing the fundamental. - - # We may not jump straight to the requested time, because we periodically apply - # megashocks to push the series around (not always away from the mean) and we need - # to compute OU at each of those times, so the aftereffects of the megashocks - # properly affect the remaining OU interval. - - mst = self.megashocks[symbol][-1]['MegashockTime'] - msv = self.megashocks[symbol][-1]['MegashockValue'] - - while mst < currentTime: - # A megashock is scheduled to occur before the new time to which we are advancing. Handle it. - - # Advance time from the previous time to the time of the megashock using the OU process and - # then applying the next megashock value. - v = self.compute_fundamental_at_timestamp(mst, msv, symbol, pt, pv) - - # Update our "previous" values for the next computation. - pt, pv = mst, v - - # Since we just surpassed the last megashock time, compute the next one, which we might or - # might not immediately consume. This works just like the first time (in __init__()). - - mst = pt + pd.Timedelta('{}ns'.format(np.random.exponential(scale = 1.0 / s['megashock_lambda_a']))) - msv = s['random_state'].normal(loc = s['megashock_mean'], scale = sqrt(s['megashock_var'])) - msv = msv if s['random_state'].randint(2) == 0 else -msv - - self.megashocks[symbol].append({ 'MegashockTime' : mst, 'MegashockValue' : msv }) - - # The loop will continue until there are no more megashocks before the time requested - # by the calling method. - - - # Once there are no more megashocks to apply (i.e. the next megashock is in the future, after - # currentTime), then finally advance using the OU process to the requested time. - v = self.compute_fundamental_at_timestamp(currentTime, 0, symbol, pt, pv) - - return (v) - - - # Return the daily open price for the symbol given. In the case of the MeanRevertingOracle, - # this will simply be the first fundamental value, which is also the fundamental mean. - # We will use the mkt_open time as given, however, even if it disagrees with this. - def getDailyOpenPrice (self, symbol, mkt_open=None): - - # The sparse oracle doesn't maintain full fundamental value history, but rather - # advances on demand keeping only the most recent price, except for the opening - # price. Thus we cannot honor a mkt_open that isn't what we already expected. - - log_print ("Oracle: client requested {} at market open: {}", symbol, self.mkt_open) - - open = self.symbols[symbol]['r_bar'] - log_print ("Oracle: market open price was was {}", open) - - return open - - - # Return a noisy observation of the current fundamental value. While the fundamental - # value for a given equity at a given time step does not change, multiple agents - # observing that value will receive different observations. - # - # Only the Exchange or other privileged agents should use sigma_n==0. - # - # sigma_n is experimental observation variance. NOTE: NOT STANDARD DEVIATION. - # - # Each agent must pass its RandomState object to observePrice. This ensures that - # each agent will receive the same answers across multiple same-seed simulations - # even if a new agent has been added to the experiment. - def observePrice(self, symbol, currentTime, sigma_n = 1000, random_state = None): - # If the request is made after market close, return the close price. - if currentTime >= self.mkt_close: - r_t = self.advance_fundamental_value_series(self.mkt_close - pd.Timedelta('1ns'), symbol) - else: - r_t = self.advance_fundamental_value_series(currentTime, symbol) - - # Generate a noisy observation of fundamental value at the current time. - if sigma_n == 0: - obs = r_t - else: - obs = int(round(random_state.normal(loc=r_t, scale=sqrt(sigma_n)))) - - log_print ("Oracle: current fundamental value is {} at {}", r_t, currentTime) - log_print ("Oracle: giving client value observation {}", obs) - - # Reminder: all simulator prices are specified in integer cents. - return obs \ No newline at end of file + self.megashocks[symbol].append({"MegashockTime": mst, "MegashockValue": msv}) + + # The loop will continue until there are no more megashocks before the time requested + # by the calling method. + + # Once there are no more megashocks to apply (i.e. the next megashock is in the future, after + # currentTime), then finally advance using the OU process to the requested time. + v = self.compute_fundamental_at_timestamp(currentTime, 0, symbol, pt, pv) + + return v + + # Return the daily open price for the symbol given. In the case of the MeanRevertingOracle, + # this will simply be the first fundamental value, which is also the fundamental mean. + # We will use the mkt_open time as given, however, even if it disagrees with this. + def getDailyOpenPrice(self, symbol, mkt_open=None): + + # The sparse oracle doesn't maintain full fundamental value history, but rather + # advances on demand keeping only the most recent price, except for the opening + # price. Thus we cannot honor a mkt_open that isn't what we already expected. + + log_print("Oracle: client requested {} at market open: {}", symbol, self.mkt_open) + + open = self.symbols[symbol]["r_bar"] + log_print("Oracle: market open price was was {}", open) + + return open + + # Return a noisy observation of the current fundamental value. While the fundamental + # value for a given equity at a given time step does not change, multiple agents + # observing that value will receive different observations. + # + # Only the Exchange or other privileged agents should use sigma_n==0. + # + # sigma_n is experimental observation variance. NOTE: NOT STANDARD DEVIATION. + # + # Each agent must pass its RandomState object to observePrice. This ensures that + # each agent will receive the same answers across multiple same-seed simulations + # even if a new agent has been added to the experiment. + def observePrice(self, symbol, currentTime, sigma_n=1000, random_state=None): + # If the request is made after market close, return the close price. + if currentTime >= self.mkt_close: + r_t = self.advance_fundamental_value_series(self.mkt_close - pd.Timedelta("1ns"), symbol) + else: + r_t = self.advance_fundamental_value_series(currentTime, symbol) + + # Generate a noisy observation of fundamental value at the current time. + if sigma_n == 0: + obs = r_t + else: + obs = int(round(random_state.normal(loc=r_t, scale=sqrt(sigma_n)))) + + log_print("Oracle: current fundamental value is {} at {}", r_t, currentTime) + log_print("Oracle: giving client value observation {}", obs) + + # Reminder: all simulator prices are specified in integer cents. + return obs diff --git a/util/order/LimitOrder.py b/util/order/LimitOrder.py index c16af38b4..8041b902b 100644 --- a/util/order/LimitOrder.py +++ b/util/order/LimitOrder.py @@ -1,12 +1,12 @@ # LimitOrder class, inherits from Order class, adds a limit price. These are the # Orders that typically go in an Exchange's OrderBook. -from util.order.Order import Order -from Kernel import Kernel -from agent.FinancialAgent import dollarize +import sys from copy import deepcopy -import sys +from agent.FinancialAgent import dollarize +from Kernel import Kernel +from util.order.Order import Order # Module level variable that can be changed by config files. silent_mode = False @@ -14,7 +14,17 @@ class LimitOrder(Order): - def __init__(self, agent_id, time_placed, symbol, quantity, is_buy_order, limit_price, order_id=None, tag=None): + def __init__( + self, + agent_id, + time_placed, + symbol, + quantity, + is_buy_order, + limit_price, + order_id=None, + tag=None, + ): super().__init__(agent_id, time_placed, symbol, quantity, is_buy_order, order_id, tag=tag) @@ -23,30 +33,43 @@ def __init__(self, agent_id, time_placed, symbol, quantity, is_buy_order, limit_ self.limit_price: int = limit_price def __str__(self): - if silent_mode: return '' + if silent_mode: + return "" - filled = '' - if self.fill_price: filled = " (filled @ {})".format(dollarize(self.fill_price)) + filled = "" + if self.fill_price: + filled = " (filled @ {})".format(dollarize(self.fill_price)) # Until we make explicit market orders, we make a few assumptions that EXTREME prices on limit # orders are trying to represent a market order. This only affects printing - they still hit # the order book like limit orders, which is wrong. - return "(Agent {} @ {}{}) : {} {} {} @ {}{}".format(self.agent_id, Kernel.fmtTime(self.time_placed), - f" [{self.tag}]" if self.tag is not None else "", - "BUY" if self.is_buy_order else "SELL", self.quantity, - self.symbol, - dollarize(self.limit_price) if abs( - self.limit_price) < sys.maxsize else 'MKT', filled) + return "(Agent {} @ {}{}) : {} {} {} @ {}{}".format( + self.agent_id, + Kernel.fmtTime(self.time_placed), + f" [{self.tag}]" if self.tag is not None else "", + "BUY" if self.is_buy_order else "SELL", + self.quantity, + self.symbol, + (dollarize(self.limit_price) if abs(self.limit_price) < sys.maxsize else "MKT"), + filled, + ) def __repr__(self): - if silent_mode: return '' + if silent_mode: + return "" return self.__str__() def __copy__(self): - order = LimitOrder(self.agent_id, self.time_placed, self.symbol, self.quantity, self.is_buy_order, - self.limit_price, - order_id=self.order_id, - tag=self.tag) + order = LimitOrder( + self.agent_id, + self.time_placed, + self.symbol, + self.quantity, + self.is_buy_order, + self.limit_price, + order_id=self.order_id, + tag=self.tag, + ) Order._order_ids.pop() # remove duplicate agent ID order.fill_price = self.fill_price return order @@ -64,8 +87,16 @@ def __deepcopy__(self, memodict={}): fill_price = deepcopy(self.fill_price, memodict) # Create new order object - order = LimitOrder(agent_id, time_placed, symbol, quantity, is_buy_order, limit_price, - order_id=order_id, tag=tag) + order = LimitOrder( + agent_id, + time_placed, + symbol, + quantity, + is_buy_order, + limit_price, + order_id=order_id, + tag=tag, + ) order.fill_price = fill_price return order diff --git a/util/order/MarketOrder.py b/util/order/MarketOrder.py index 852cccf8d..5c6877fbf 100644 --- a/util/order/MarketOrder.py +++ b/util/order/MarketOrder.py @@ -1,33 +1,62 @@ -from util.order.Order import Order -from Kernel import Kernel -from agent.FinancialAgent import dollarize +import sys from copy import deepcopy -import sys +from agent.FinancialAgent import dollarize +from Kernel import Kernel +from util.order.Order import Order silent_mode = False class MarketOrder(Order): - def __init__(self, agent_id, time_placed, symbol, quantity, is_buy_order, order_id=None, tag=None): - super().__init__(agent_id, time_placed, symbol, quantity, is_buy_order, order_id=order_id, tag=tag) + def __init__( + self, + agent_id, + time_placed, + symbol, + quantity, + is_buy_order, + order_id=None, + tag=None, + ): + super().__init__( + agent_id, + time_placed, + symbol, + quantity, + is_buy_order, + order_id=order_id, + tag=tag, + ) def __str__(self): - if silent_mode: return '' + if silent_mode: + return "" - return "(Agent {} @ {}) : MKT Order {} {} {}".format(self.agent_id, Kernel.fmtTime(self.time_placed), - "BUY" if self.is_buy_order else "SELL", - self.quantity, self.symbol) + return "(Agent {} @ {}) : MKT Order {} {} {}".format( + self.agent_id, + Kernel.fmtTime(self.time_placed), + "BUY" if self.is_buy_order else "SELL", + self.quantity, + self.symbol, + ) def __repr__(self): - if silent_mode: return '' + if silent_mode: + return "" return self.__str__() def __copy__(self): - order = MarketOrder(self.agent_id, self.time_placed, self.symbol, self.quantity, self.is_buy_order, - order_id=self.order_id, - tag=self.tag) + order = MarketOrder( + self.agent_id, + self.time_placed, + self.symbol, + self.quantity, + self.is_buy_order, + order_id=self.order_id, + tag=self.tag, + ) Order._order_ids.pop() # remove duplicate agent ID order.fill_price = self.fill_price return order @@ -44,7 +73,15 @@ def __deepcopy__(self, memodict={}): fill_price = deepcopy(self.fill_price, memodict) # Create new order object - order = MarketOrder(agent_id, time_placed, symbol, quantity, is_buy_order, order_id=order_id, tag=tag) + order = MarketOrder( + agent_id, + time_placed, + symbol, + quantity, + is_buy_order, + order_id=order_id, + tag=tag, + ) order.fill_price = fill_price return order diff --git a/util/order/Order.py b/util/order/Order.py index e76d92e35..9cd97d7b3 100644 --- a/util/order/Order.py +++ b/util/order/Order.py @@ -9,7 +9,16 @@ class Order: order_id = 0 _order_ids = set() - def __init__(self, agent_id, time_placed, symbol, quantity, is_buy_order, order_id=None, tag=None): + def __init__( + self, + agent_id, + time_placed, + symbol, + quantity, + is_buy_order, + order_id=None, + tag=None, + ): self.agent_id = agent_id @@ -52,7 +61,7 @@ def generateOrderId(self): def to_dict(self): as_dict = deepcopy(self).__dict__ - as_dict['time_placed'] = self.time_placed.isoformat() + as_dict["time_placed"] = self.time_placed.isoformat() return as_dict def __copy__(self): diff --git a/util/order/etf/BasketOrder.py b/util/order/etf/BasketOrder.py index fe4794e0f..3ab9c72db 100644 --- a/util/order/etf/BasketOrder.py +++ b/util/order/etf/BasketOrder.py @@ -3,39 +3,58 @@ # A buy order translates to a creation order for an ETF share # A sell order translates to a redemption order for shares of the underlying. -from util.order.Order import Order -from Kernel import Kernel -from agent.FinancialAgent import dollarize - import sys +from agent.FinancialAgent import dollarize +from Kernel import Kernel +from util.order.Order import Order + # Module level variable that can be changed by config files. silent_mode = False -class BasketOrder (Order): - - def __init__ (self, agent_id, time_placed, symbol, quantity, is_buy_order, dollar=True, order_id=None): - super().__init__(agent_id, time_placed, symbol, quantity, is_buy_order, order_id) - self.dollar = dollar - - def __str__ (self): - if silent_mode: return '' - - filled = '' - if self.dollar: - if self.fill_price: filled = " (filled @ {})".format(dollarize(self.fill_price)) - else: - if self.fill_price: filled = " (filled @ {})".format(self.fill_price) - - # Until we make explicit market orders, we make a few assumptions that EXTREME prices on limit - # orders are trying to represent a market order. This only affects printing - they still hit - # the order book like limit orders, which is wrong. - return "(Order_ID: {} Agent {} @ {}) : {} {} {} @ {}{}".format(self.order_id, self.agent_id, - Kernel.fmtTime(self.time_placed), - "CREATE" if self.is_buy_order else "REDEEM", - self.quantity, self.symbol, - filled, self.fill_price) - - def __repr__ (self): - if silent_mode: return '' - return self.__str__() + +class BasketOrder(Order): + + def __init__( + self, + agent_id, + time_placed, + symbol, + quantity, + is_buy_order, + dollar=True, + order_id=None, + ): + super().__init__(agent_id, time_placed, symbol, quantity, is_buy_order, order_id) + self.dollar = dollar + + def __str__(self): + if silent_mode: + return "" + + filled = "" + if self.dollar: + if self.fill_price: + filled = " (filled @ {})".format(dollarize(self.fill_price)) + else: + if self.fill_price: + filled = " (filled @ {})".format(self.fill_price) + + # Until we make explicit market orders, we make a few assumptions that EXTREME prices on limit + # orders are trying to represent a market order. This only affects printing - they still hit + # the order book like limit orders, which is wrong. + return "(Order_ID: {} Agent {} @ {}) : {} {} {} @ {}{}".format( + self.order_id, + self.agent_id, + Kernel.fmtTime(self.time_placed), + "CREATE" if self.is_buy_order else "REDEEM", + self.quantity, + self.symbol, + filled, + self.fill_price, + ) + + def __repr__(self): + if silent_mode: + return "" + return self.__str__() diff --git a/util/plotting/chart_fundamental.py b/util/plotting/chart_fundamental.py index 7705f4f36..c202d5bb3 100644 --- a/util/plotting/chart_fundamental.py +++ b/util/plotting/chart_fundamental.py @@ -1,11 +1,13 @@ +import argparse +import sys + import pandas as pd +from dateutil.parser import parse from matplotlib import pyplot as plt from matplotlib.dates import DateFormatter -from dateutil.parser import parse from pandas.plotting import register_matplotlib_converters -import argparse -import sys -sys.path.append('..') + +sys.path.append("..") from formatting.convert_order_stream import dir_path @@ -16,16 +18,16 @@ class Constants: axes_label_font_size = 20 title_font_size = 22 legend_font_size = 20 - filename = 'fundamental' + filename = "fundamental" def set_up_plotting(): - """ Sets matplotlib variables for plotting. """ - plt.rc('xtick', labelsize=Constants.tick_label_size) - plt.rc('ytick', labelsize=Constants.tick_label_size) - plt.rc('axes', labelsize=Constants.axes_label_font_size) - plt.rc('axes', titlesize=Constants.title_font_size) - plt.rc('legend', fontsize=Constants.legend_font_size) + """Sets matplotlib variables for plotting.""" + plt.rc("xtick", labelsize=Constants.tick_label_size) + plt.rc("ytick", labelsize=Constants.tick_label_size) + plt.rc("axes", labelsize=Constants.axes_label_font_size) + plt.rc("axes", titlesize=Constants.title_font_size) + plt.rc("legend", fontsize=Constants.legend_font_size) def plot_fundamental(fundamentals_df_list, legend_labels, plot_title, output_dir): @@ -41,13 +43,19 @@ def plot_fundamental(fundamentals_df_list, legend_labels, plot_title, output_dir for df, label in zip(fundamentals_df_list, legend_labels): x = df.index - y = df['FundamentalValue'] + y = df["FundamentalValue"] plt.plot(x, y, label=label) plt.legend() - fig.savefig(f'{output_dir}/{Constants.filename}.png', format='png', dpi=300, transparent=False, bbox_inches='tight', - pad_inches=0.03) + fig.savefig( + f"{output_dir}/{Constants.filename}.png", + format="png", + dpi=300, + transparent=False, + bbox_inches="tight", + pad_inches=0.03, + ) plt.show() @@ -58,15 +66,33 @@ def validate_input(fundamentals, legend_labels): if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Chart fundamental for ABIDES simulations.') - parser.add_argument('-f', '--fundamental-file', action='append', required=True, help='bz2 file containing the fundamental' - ' over time. Note can add multiple instances of this variable for' - ' and overlayed chart.') - parser.add_argument('-l', '--legend-label', action='append', required=False, help='label for the legend entry ' - 'corresponding to fundamental-file. Must have as many legen-label variables as' - ' fundamental-file variables.') - parser.add_argument('-t', '--title', action='store', default='', help='Chart title.') - parser.add_argument('-o', '--output-dir', default='.', help='Path to output directory', type=dir_path) + parser = argparse.ArgumentParser(description="Chart fundamental for ABIDES simulations.") + parser.add_argument( + "-f", + "--fundamental-file", + action="append", + required=True, + help="bz2 file containing the fundamental" + " over time. Note can add multiple instances of this variable for" + " and overlayed chart.", + ) + parser.add_argument( + "-l", + "--legend-label", + action="append", + required=False, + help="label for the legend entry " + "corresponding to fundamental-file. Must have as many legen-label variables as" + " fundamental-file variables.", + ) + parser.add_argument("-t", "--title", action="store", default="", help="Chart title.") + parser.add_argument( + "-o", + "--output-dir", + default=".", + help="Path to output directory", + type=dir_path, + ) args, remaining_args = parser.parse_known_args() @@ -80,4 +106,3 @@ def validate_input(fundamentals, legend_labels): output_dir = args.output_dir plot_fundamental(fundamentals_df_list, legend_labels, plot_title, output_dir) - diff --git a/util/plotting/liquidity_telemetry.py b/util/plotting/liquidity_telemetry.py index a78b3b2ca..961374949 100755 --- a/util/plotting/liquidity_telemetry.py +++ b/util/plotting/liquidity_telemetry.py @@ -1,18 +1,22 @@ -import pandas as pd -import sys import os +import sys -sys.path.append('../..') +import pandas as pd + +sys.path.append("../..") -from realism.realism_utils import make_orderbook_for_analysis, MID_PRICE_CUTOFF -from matplotlib import pyplot as plt -import matplotlib.dates as mdates -import numpy as np -from datetime import timedelta, datetime import argparse import json +from datetime import datetime, timedelta + import matplotlib -matplotlib.rcParams['agg.path.chunksize'] = 10000 +import matplotlib.dates as mdates +import numpy as np +from matplotlib import pyplot as plt + +from realism.realism_utils import MID_PRICE_CUTOFF, make_orderbook_for_analysis + +matplotlib.rcParams["agg.path.chunksize"] = 10000 # PLOT_PARAMS_DICT = { @@ -32,47 +36,52 @@ def create_orderbooks(exchange_path, ob_path): - """ Creates orderbook DataFrames from ABIDES exchange output file and orderbook output file. """ + """Creates orderbook DataFrames from ABIDES exchange output file and orderbook output file.""" print("Constructing orderbook...") - processed_orderbook = make_orderbook_for_analysis(exchange_path, ob_path, num_levels=1, - hide_liquidity_collapse=False) - cleaned_orderbook = processed_orderbook[(processed_orderbook['MID_PRICE'] > - MID_PRICE_CUTOFF) & - (processed_orderbook['MID_PRICE'] < MID_PRICE_CUTOFF)] + processed_orderbook = make_orderbook_for_analysis( + exchange_path, ob_path, num_levels=1, hide_liquidity_collapse=False + ) + cleaned_orderbook = processed_orderbook[ + (processed_orderbook["MID_PRICE"] > -MID_PRICE_CUTOFF) & (processed_orderbook["MID_PRICE"] < MID_PRICE_CUTOFF) + ] transacted_orders = cleaned_orderbook.loc[cleaned_orderbook.TYPE == "ORDER_EXECUTED"] - transacted_orders['SIZE'] = transacted_orders['SIZE'] / 2 + transacted_orders["SIZE"] = transacted_orders["SIZE"] / 2 return processed_orderbook, transacted_orders, cleaned_orderbook def bin_and_sum(s, binwidth): - """ Sums the values of a pandas Series indexed by Datetime according to specific binwidth. + """Sums the values of a pandas Series indexed by Datetime according to specific binwidth. - :param s: series of values to process - :type s: pd.Series with pd.DatetimeIndex index - :param binwidth: width of time bins in seconds - :type binwidth: float + :param s: series of values to process + :type s: pd.Series with pd.DatetimeIndex index + :param binwidth: width of time bins in seconds + :type binwidth: float """ - bins = pd.interval_range(start=s.index[0].floor('min'), end=s.index[-1].ceil('min'), - freq=pd.DateOffset(seconds=binwidth)) + bins = pd.interval_range( + start=s.index[0].floor("min"), + end=s.index[-1].ceil("min"), + freq=pd.DateOffset(seconds=binwidth), + ) binned = pd.cut(s.index, bins=bins) counted = s.groupby(binned).sum() return counted def np_bar_plot_hist_input(counted): - """ Constructs the required input for np.bar to produce a histogram plot of the output provided from - __name__.bin_and_sum + """Constructs the required input for np.bar to produce a histogram plot of the output provided from + __name__.bin_and_sum - :param counted: output from __name__.bin_and_sum - :type counted: pd.Series with CategoricalIndex, categories are intervals + :param counted: output from __name__.bin_and_sum + :type counted: pd.Series with CategoricalIndex, categories are intervals """ bins = list(counted.index.categories.left) + [counted.index.categories.right[-1]] bins = np.array([pd.Timestamp.to_pydatetime(x) for x in bins]) width = np.diff(bins) delta = bins[1:] - bins[:-1] half_delta = np.array([timedelta(seconds=0.5 * x.total_seconds()) for x in delta]) - center = (half_delta + bins[:-1]) + center = half_delta + bins[:-1] width = np.array([x.total_seconds() / 86400 for x in width]) # 86400 seconds in a day counts = counted.values @@ -80,37 +89,42 @@ def np_bar_plot_hist_input(counted): def make_liquidity_dropout_events(processed_orderbook): - """ Return index series corresponding to liquidity dropout point events for bids and asks. """ - no_bid_side = processed_orderbook.loc[processed_orderbook['MID_PRICE'] < - MID_PRICE_CUTOFF] - no_ask_side = processed_orderbook.loc[processed_orderbook['MID_PRICE'] > MID_PRICE_CUTOFF] - no_bid_idx = no_bid_side.index[~no_bid_side.index.duplicated(keep='last')] - no_ask_idx = no_ask_side.index[~no_ask_side.index.duplicated(keep='last')] + """Return index series corresponding to liquidity dropout point events for bids and asks.""" + no_bid_side = processed_orderbook.loc[processed_orderbook["MID_PRICE"] < -MID_PRICE_CUTOFF] + no_ask_side = processed_orderbook.loc[processed_orderbook["MID_PRICE"] > MID_PRICE_CUTOFF] + no_bid_idx = no_bid_side.index[~no_bid_side.index.duplicated(keep="last")] + no_ask_idx = no_ask_side.index[~no_ask_side.index.duplicated(keep="last")] return no_bid_idx, no_ask_idx -def print_liquidity_stats(transacted_orders, no_bid_idx, no_ask_idx, liquidity_dropout_buffer=LIQUIDITY_DROPOUT_BUFFER): - """ Print statistics about liquidity to STDERR. """ +def print_liquidity_stats( + transacted_orders, + no_bid_idx, + no_ask_idx, + liquidity_dropout_buffer=LIQUIDITY_DROPOUT_BUFFER, +): + """Print statistics about liquidity to STDERR.""" sys.stderr.write("Liquidity statistics:\n") # daily transacted volume - daily_transacted_volume = transacted_orders['SIZE'].sum() - sys.stderr.write(f'TOTAL_TRASACTED_VOLUME: {daily_transacted_volume}\n') + daily_transacted_volume = transacted_orders["SIZE"].sum() + sys.stderr.write(f"TOTAL_TRASACTED_VOLUME: {daily_transacted_volume}\n") # number of no-bid events total_num_no_bids = len(list(no_bid_idx)) sys.stderr.write(f"TOTAL_NO_BID_EVENTS: {total_num_no_bids}\n") - sys.stderr.write(str(no_bid_idx) + '\n') + sys.stderr.write(str(no_bid_idx) + "\n") # number of no-ask events total_num_no_asks = len(list(no_bid_idx)) sys.stderr.write(f"TOTAL_NO_ASK_EVENTS: {total_num_no_asks}\n") - sys.stderr.write(str(no_ask_idx) + '\n') + sys.stderr.write(str(no_ask_idx) + "\n") # total liquidity dropout events total_liquidity_dropouts = total_num_no_asks + total_num_no_bids - sys.stderr.write(f'TOTAL_LIQUIDITY_DROPOUTS: {total_liquidity_dropouts}\n') + sys.stderr.write(f"TOTAL_LIQUIDITY_DROPOUTS: {total_liquidity_dropouts}\n") # liquidity droput events within buffer start_buffer = transacted_orders.index[0] + pd.Timedelta(seconds=liquidity_dropout_buffer) @@ -121,59 +135,64 @@ def print_liquidity_stats(transacted_orders, no_bid_idx, no_ask_idx, liquidity_d buffered_total_dropouts = buffered_bid_dropouts + buffered_ask_dropouts buffer_window_length_mins = liquidity_dropout_buffer / 60 - sys.stderr.write(f'TOTAL_LIQUIDITY_DROPOUTS_INSIDE_WINDOW: {buffered_total_dropouts}, ({buffer_window_length_mins}' - f' mins)\n') - sys.stderr.write(f'TOTAL_NO_BID_EVENTS_INSIDE_WINDOW: {buffered_bid_dropouts}, ({buffer_window_length_mins}' - f' mins)\n') - sys.stderr.write(f'TOTAL_NO_ASK_EVENTS_INSIDE_WINDOW: {buffered_ask_dropouts}, ({buffer_window_length_mins}' - f' mins)\n') + sys.stderr.write( + f"TOTAL_LIQUIDITY_DROPOUTS_INSIDE_WINDOW: {buffered_total_dropouts}, ({buffer_window_length_mins}" f" mins)\n" + ) + sys.stderr.write( + f"TOTAL_NO_BID_EVENTS_INSIDE_WINDOW: {buffered_bid_dropouts}, ({buffer_window_length_mins}" f" mins)\n" + ) + sys.stderr.write( + f"TOTAL_NO_ASK_EVENTS_INSIDE_WINDOW: {buffered_ask_dropouts}, ({buffer_window_length_mins}" f" mins)\n" + ) def make_plots(plot_inputs, plot_params_dict, title=None, out_file="liquidity_telemetry.png"): - """ Produce a plot with three subplots: - 1. Mid-price over time. - 2. Liquidity dropout events over time - 3. Transacted volume over time + """Produce a plot with three subplots: + 1. Mid-price over time. + 2. Liquidity dropout events over time + 3. Transacted volume over time """ # preamble - fig, axes = plt.subplots(nrows=5, ncols=1, gridspec_kw={'height_ratios': [3, 3, 3, 1, 3]}) + fig, axes = plt.subplots(nrows=5, ncols=1, gridspec_kw={"height_ratios": [3, 3, 3, 1, 3]}) fig.set_size_inches(h=23, w=15) - date = plot_inputs['mid_price'].index[0].date() + date = plot_inputs["mid_price"].index[0].date() midnight = pd.Timestamp(date) - xmin = midnight + pd.to_timedelta(plot_params_dict['xmin']) - xmax = midnight + pd.to_timedelta(plot_params_dict['xmax']) - shade_start = midnight + pd.to_timedelta(plot_params_dict['shade_start_time']) - shade_end = midnight + pd.to_timedelta(plot_params_dict['shade_end_time']) + xmin = midnight + pd.to_timedelta(plot_params_dict["xmin"]) + xmax = midnight + pd.to_timedelta(plot_params_dict["xmax"]) + shade_start = midnight + pd.to_timedelta(plot_params_dict["shade_start_time"]) + shade_end = midnight + pd.to_timedelta(plot_params_dict["shade_end_time"]) # top plot -- mid price + fundamental - if plot_inputs['fundamental'] is not None: - plot_inputs['fundamental'].loc[xmin:xmax].plot(ax=axes[0], color='blue', label="Fundamental") - plot_inputs['mid_price'].loc[xmin:xmax].plot(ax=axes[0], color='black', label="Mid price") - axes[0].axvspan(shade_start, shade_end, alpha=0.2, color='grey') + if plot_inputs["fundamental"] is not None: + plot_inputs["fundamental"].loc[xmin:xmax].plot(ax=axes[0], color="blue", label="Fundamental") + plot_inputs["mid_price"].loc[xmin:xmax].plot(ax=axes[0], color="black", label="Mid price") + axes[0].axvspan(shade_start, shade_end, alpha=0.2, color="grey") axes[0].xaxis.set_visible(False) - axes[0].legend(fontsize='large') - axes[0].set_ylabel("Mid-price ($)", fontsize='large') + axes[0].legend(fontsize="large") + axes[0].set_ylabel("Mid-price ($)", fontsize="large") axes[0].set_xlim(xmin, xmax) # spread - plot_inputs['spread'][xmin:xmax].plot(ax=axes[1], color='black', label="Spread") - axes[1].axvspan(shade_start, shade_end, alpha=0.2, color='grey') + plot_inputs["spread"][xmin:xmax].plot(ax=axes[1], color="black", label="Spread") + axes[1].axvspan(shade_start, shade_end, alpha=0.2, color="grey") axes[1].xaxis.set_visible(False) # axes[0].legend(fontsize='large') - axes[1].set_ylabel("Spread ($)", fontsize='large') + axes[1].set_ylabel("Spread ($)", fontsize="large") axes[1].set_xlim(xmin, xmax) # order volume imbalance - plot_inputs['order_volume_imbalance'][xmin:xmax].plot(ax=axes[2], color='black', label="Order volume imbalance") - axes[2].axvspan(shade_start, shade_end, alpha=0.2, color='grey') + plot_inputs["order_volume_imbalance"][xmin:xmax].plot(ax=axes[2], color="black", label="Order volume imbalance") + axes[2].axvspan(shade_start, shade_end, alpha=0.2, color="grey") axes[2].xaxis.set_visible(False) # axes[0].legend(fontsize='large') - axes[2].set_ylabel("$\\frac{\\mathrm{best\ ask\ size}}{\\mathrm{best\ ask\ size} + \\mathrm{best\ bid\ size}}$", - fontsize='large') + axes[2].set_ylabel( + r"$\frac{\mathrm{best\ ask\ size}}{\mathrm{best\ ask\ size} + \mathrm{best\ bid\ size}}$", + fontsize="large", + ) axes[2].set_xlim(xmin, xmax) # middle plot -- liquidity events @@ -181,85 +200,96 @@ def make_plots(plot_inputs, plot_params_dict, title=None, out_file="liquidity_te axes[3].get_yaxis().set_visible(False) no_bid_kwargs = { - "color": plot_params_dict['no_bids_color'], - "linestyle": '--', - "linewidth": plot_params_dict['linewidth'], - "label": "No bids in book" + "color": plot_params_dict["no_bids_color"], + "linestyle": "--", + "linewidth": plot_params_dict["linewidth"], + "label": "No bids in book", } no_ask_kwargs = { - "color": plot_params_dict['no_asks_color'], - "linestyle": '--', - "linewidth": plot_params_dict['linewidth'], - "label": "No asks in book" + "color": plot_params_dict["no_asks_color"], + "linestyle": "--", + "linewidth": plot_params_dict["linewidth"], + "label": "No asks in book", } - for idx, dt in enumerate(plot_inputs['liquidity_events']['no_bid_idx']): - no_bid_kwargs['x'] = dt + for idx, dt in enumerate(plot_inputs["liquidity_events"]["no_bid_idx"]): + no_bid_kwargs["x"] = dt axes[3].axvline(**no_bid_kwargs) if idx == 0: - del no_bid_kwargs['label'] + del no_bid_kwargs["label"] - for idx, dt in enumerate(plot_inputs['liquidity_events']['no_ask_idx']): - no_ask_kwargs['x'] = dt + for idx, dt in enumerate(plot_inputs["liquidity_events"]["no_ask_idx"]): + no_ask_kwargs["x"] = dt axes[3].axvline(**no_ask_kwargs) if idx == 0: - del no_ask_kwargs['label'] + del no_ask_kwargs["label"] - axes[3].axvspan(shade_start, shade_end, alpha=0.2, color='grey') + axes[3].axvspan(shade_start, shade_end, alpha=0.2, color="grey") - axes[3].legend(fontsize='large') + axes[3].legend(fontsize="large") axes[3].xaxis.set_visible(False) # axes[3].set_title("Liquidity dropout events") # Bottom plot -- transacted volume - axes[4].bar(plot_inputs['transacted_volume']['center'], plot_inputs['transacted_volume']['counts'], align='center', - width=plot_inputs['transacted_volume']['width'], fill=False) + axes[4].bar( + plot_inputs["transacted_volume"]["center"], + plot_inputs["transacted_volume"]["counts"], + align="center", + width=plot_inputs["transacted_volume"]["width"], + fill=False, + ) - axes[4].axvspan(shade_start, shade_end, alpha=0.2, color='grey') + axes[4].axvspan(shade_start, shade_end, alpha=0.2, color="grey") axes[4].xaxis.set_major_formatter(mdates.DateFormatter("%H:%M")) axes[4].xaxis.set_minor_formatter(mdates.DateFormatter("%H:%M")) - axes[4].tick_params(axis='both', which='major', labelsize=14) - axes[4].set_ylabel("Transacted Volume", fontsize='large') + axes[4].tick_params(axis="both", which="major", labelsize=14) + axes[4].set_ylabel("Transacted Volume", fontsize="large") axes[4].set_xlim(xmin, xmax) if title: plt.suptitle(title, fontsize=18, y=0.905) plt.subplots_adjust(hspace=0.05) - fig.savefig(out_file, format='png', dpi=300, transparent=False, bbox_inches='tight', - pad_inches=0.03) + fig.savefig( + out_file, + format="png", + dpi=300, + transparent=False, + bbox_inches="tight", + pad_inches=0.03, + ) def load_fundamental(ob_path): - """ Retrives fundamental path from orderbook path. """ + """Retrives fundamental path from orderbook path.""" # get ticker name from ob path ORDERBOOK_TICKER_FULL.bz2 basename = os.path.basename(ob_path) - ticker = basename.split('_')[1] + ticker = basename.split("_")[1] # fundamental path from ticker fundamental_TICKER.bz2 - fundamental_path = f'{os.path.dirname(ob_path)}/fundamental_{ticker}.bz2' + fundamental_path = f"{os.path.dirname(ob_path)}/fundamental_{ticker}.bz2" # load fundamental as pandas series if os.path.exists(fundamental_path): fundamental_df = pd.read_pickle(fundamental_path) - fundamental_ts = fundamental_df['FundamentalValue'].sort_index() / 100 # convert to USD from cents - fundamental_ts = fundamental_ts.loc[~fundamental_ts.index.duplicated(keep='last')] + fundamental_ts = fundamental_df["FundamentalValue"].sort_index() / 100 # convert to USD from cents + fundamental_ts = fundamental_ts.loc[~fundamental_ts.index.duplicated(keep="last")] return fundamental_ts else: return None -def main(exchange_path, ob_path, title=None, outfile='liquidity_telemetry.png', verbose=False): - """ Processes orderbook from files, creates the liquidity telemetry plot and (optionally) prints statistics. """ +def main(exchange_path, ob_path, title=None, outfile="liquidity_telemetry.png", verbose=False): + """Processes orderbook from files, creates the liquidity telemetry plot and (optionally) prints statistics.""" processed_orderbook, transacted_orders, cleaned_orderbook = create_orderbooks(exchange_path, ob_path) fundamental_ts = load_fundamental(ob_path) - volume_hist = bin_and_sum(transacted_orders["SIZE"], PLOT_PARAMS_DICT['transacted_volume_binwidth']) + volume_hist = bin_and_sum(transacted_orders["SIZE"], PLOT_PARAMS_DICT["transacted_volume_binwidth"]) counts, center, width = np_bar_plot_hist_input(volume_hist) no_bid_idx, no_ask_idx = make_liquidity_dropout_events(processed_orderbook) @@ -268,15 +298,8 @@ def main(exchange_path, ob_path, title=None, outfile='liquidity_telemetry.png', "fundamental": fundamental_ts, "spread": cleaned_orderbook["SPREAD"], "order_volume_imbalance": cleaned_orderbook["ORDER_VOLUME_IMBALANCE"], - "liquidity_events": { - 'no_bid_idx': no_bid_idx, - 'no_ask_idx': no_ask_idx - }, - "transacted_volume": { - 'center': center, - 'width': width, - 'counts': counts - } + "liquidity_events": {"no_bid_idx": no_bid_idx, "no_ask_idx": no_ask_idx}, + "transacted_volume": {"center": center, "width": width, "counts": counts}, } print("Plotting...") @@ -289,39 +312,50 @@ def main(exchange_path, ob_path, title=None, outfile='liquidity_telemetry.png', def check_str_png(s): - """ Check if string has .png extension. """ + """Check if string has .png extension.""" if not isinstance(s, str): raise TypeError("Input must be of type str") - if not s.endswith('.png'): + if not s.endswith(".png"): raise ValueError("String must end with .png") return s -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='CLI utility for inspecting liquidity issues and transacted volumes ' - 'for a day of trading.') - - parser.add_argument('stream', type=str, help='ABIDES order stream in bz2 format. ' - 'Typical example is `ExchangeAgent.bz2`') - parser.add_argument('book', type=str, help='ABIDES order book output in bz2 format. Typical example is ' - 'ORDERBOOK_TICKER_FULL.bz2') - parser.add_argument('-o', '--out_file', - help='Path to png output file. Must have .png file extension', - type=check_str_png, - default='liquidity_telemetry.png') - parser.add_argument('-t', '--plot-title', - help="Title for plot", - type=str, - default=None - ) - parser.add_argument('-v', '--verbose', - help="Print some summary statistics to stderr.", - action='store_true') - parser.add_argument('-c', '--plot-config', - help='Name of config file to execute. ' - 'See configs/telemetry_config.example.json for an example.', - default='configs/telemetry_config.example.json', - type=str) +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="CLI utility for inspecting liquidity issues and transacted volumes " "for a day of trading." + ) + + parser.add_argument( + "stream", + type=str, + help="ABIDES order stream in bz2 format. " "Typical example is `ExchangeAgent.bz2`", + ) + parser.add_argument( + "book", + type=str, + help="ABIDES order book output in bz2 format. Typical example is " "ORDERBOOK_TICKER_FULL.bz2", + ) + parser.add_argument( + "-o", + "--out_file", + help="Path to png output file. Must have .png file extension", + type=check_str_png, + default="liquidity_telemetry.png", + ) + parser.add_argument("-t", "--plot-title", help="Title for plot", type=str, default=None) + parser.add_argument( + "-v", + "--verbose", + help="Print some summary statistics to stderr.", + action="store_true", + ) + parser.add_argument( + "-c", + "--plot-config", + help="Name of config file to execute. " "See configs/telemetry_config.example.json for an example.", + default="configs/telemetry_config.example.json", + type=str, + ) args, remaining_args = parser.parse_known_args() @@ -330,7 +364,7 @@ def check_str_png(s): book = args.book title = args.plot_title verbose = args.verbose - with open(args.plot_config, 'r') as f: + with open(args.plot_config, "r") as f: PLOT_PARAMS_DICT = json.load(f) main(stream, book, title=title, outfile=out_filepath, verbose=verbose) diff --git a/util/random_search.py b/util/random_search.py index c5e9b2566..18da2bfb5 100644 --- a/util/random_search.py +++ b/util/random_search.py @@ -1,22 +1,38 @@ import argparse import itertools -from util import numeric import random +from util import numeric + def generate_random_tuples(list_of_lists, num_samples, seed): random.seed(a=seed) for n in range(num_samples): items = [random.choice(l) for l in list_of_lists] - print(','.join(str(s) for s in items)) + print(",".join(str(s) for s in items)) def parse_cli(): - parser = argparse.ArgumentParser(description='Prints a random selection of the Cartesian product of a group of lists.') - parser.add_argument('-l', '--list', nargs='+', action='append', - help='Start of list', required=True, type=numeric) - parser.add_argument('-n', '--num-samples', type=int, required=True, help='Number of tuples to print.') - parser.add_argument('-s', '--random-seed', type=int, default=12345, help='Random seed.') + parser = argparse.ArgumentParser( + description="Prints a random selection of the Cartesian product of a group of lists." + ) + parser.add_argument( + "-l", + "--list", + nargs="+", + action="append", + help="Start of list", + required=True, + type=numeric, + ) + parser.add_argument( + "-n", + "--num-samples", + type=int, + required=True, + help="Number of tuples to print.", + ) + parser.add_argument("-s", "--random-seed", type=int, default=12345, help="Random seed.") args = parser.parse_args() generate_random_tuples(args.list, args.num_samples, args.random_seed) diff --git a/util/simulation_run_stats.py b/util/simulation_run_stats.py index 2e2c71ae1..6d7c63f21 100755 --- a/util/simulation_run_stats.py +++ b/util/simulation_run_stats.py @@ -1,11 +1,12 @@ import argparse -import pandas as pd -from pandas.io.json import json_normalize -from glob import glob -import re import os -from IPython.display import display, HTML +import re +from glob import glob + +import pandas as pd +from IPython.display import HTML, display from matplotlib import pyplot as plt +from pandas import json_normalize """ @@ -27,12 +28,12 @@ def get_run_statistics(lines): run_stats = dict() patterns = { - 'user_time (s)': r'User time \(seconds\): (\d+\.\d+)', - 'system_time (s)': r'System time \(seconds\): (\d+\.\d+)', - 'cpu_max_perc_usage': r'Percent of CPU this job got: (\d+)\%', - 'mem_max_usage (kB)': r'Maximum resident set size \(kbytes\): (\d+)', - 'messages_total': r'Event Queue elapsed: \d+ days \d{2}:\d{2}:\d{2}\.\d{6}, messages: (\d+), messages per second: \d+\.?\d*', - 'messages_per_second': r'Event Queue elapsed: \d+ days \d{2}:\d{2}:\d{2}\.\d{6}, messages: \d+, messages per second: (\d+\.?\d*)' + "user_time (s)": r"User time \(seconds\): (\d+\.\d+)", + "system_time (s)": r"System time \(seconds\): (\d+\.\d+)", + "cpu_max_perc_usage": r"Percent of CPU this job got: (\d+)\%", + "mem_max_usage (kB)": r"Maximum resident set size \(kbytes\): (\d+)", + "messages_total": r"Event Queue elapsed: \d+ days \d{2}:\d{2}:\d{2}\.\d{6}, messages: (\d+), messages per second: \d+\.?\d*", + "messages_per_second": r"Event Queue elapsed: \d+ days \d{2}:\d{2}:\d{2}\.\d{6}, messages: \d+, messages per second: (\d+\.?\d*)", } for line in lines: @@ -54,18 +55,18 @@ def get_experiment_statistics(expt_path): expt_stat = [] expt_name = os.path.basename(expt_path) - for path in glob(f'{expt_path}/*__*.err'): - pattern = r'.*__(\d+).err' + for path in glob(f"{expt_path}/*__*.err"): + pattern = r".*__(\d+).err" m = re.search(pattern, path) param_value = m.group(1) param_value = make_numeric(param_value) - with open(path, 'r') as f: + with open(path, "r") as f: run_output = f.readlines() run_stats = get_run_statistics(run_output) run_dict = { - 'run_path': path, - 'run_param_value': param_value, - 'run_stats': run_stats + "run_path": path, + "run_param_value": param_value, + "run_stats": run_stats, } expt_stat.append(run_dict) @@ -78,7 +79,7 @@ def dataframe_from_experiment_statistics(expt_name, expt_stat): expt_df.columns.name = expt_name # Clean expt_df = expt_df.dropna() - expt_df = expt_df.sort_values(by='run_param_value') + expt_df = expt_df.sort_values(by="run_param_value") expt_df = expt_df.reset_index(drop=True) # Reorder columns @@ -95,20 +96,22 @@ def dataframe_from_path(expt_path): return expt_df -if __name__ == '__main__': +if __name__ == "__main__": - log_files = glob('/home/ec2-user/efs/_abides/dev/dd/data_dump/*') + log_files = glob("/home/ec2-user/efs/_abides/dev/dd/data_dump/*") no_mm_expt = [] mm_expt = [] for path in log_files: - is_expt = True if 'mm' in path else False - is_full_expt = is_expt and (True if '__' not in path else False) - is_no_mm_expt = is_full_expt and (True if 'no_mm_dates' in path else False) - is_mm_expt = is_full_expt and (True if 'with_mm' in path else False) - if is_no_mm_expt: no_mm_expt.append(path) - if is_mm_expt: mm_expt.append(path) + is_expt = True if "mm" in path else False + is_full_expt = is_expt and (True if "__" not in path else False) + is_no_mm_expt = is_full_expt and (True if "no_mm_dates" in path else False) + is_mm_expt = is_full_expt and (True if "with_mm" in path else False) + if is_no_mm_expt: + no_mm_expt.append(path) + if is_mm_expt: + mm_expt.append(path) expt_dfs = [] @@ -121,15 +124,19 @@ def dataframe_from_path(expt_path): for col in cols[1:-1]: fig = plt.figure(figsize=(11, 8)) for df in expt_dfs: - x = df['run_param_value'] + x = df["run_param_value"] y = df[col] plt.plot(x, y, label=df.columns.name) plt.legend() plt.title(col) - plt.xlabel('num_agents') + plt.xlabel("num_agents") plt.ylabel(col) plt.show() - fig.savefig(f'timings-plots/{col}.png', format='png', dpi=300, transparent=False, bbox_inches='tight', - pad_inches=0.03) - - + fig.savefig( + f"timings-plots/{col}.png", + format="png", + dpi=300, + transparent=False, + bbox_inches="tight", + pad_inches=0.03, + ) diff --git a/util/util.py b/util/util.py index b7f359692..09d3739ba 100644 --- a/util/util.py +++ b/util/util.py @@ -1,10 +1,10 @@ +import warnings +from contextlib import contextmanager + import numpy as np import pandas as pd -from contextlib import contextmanager -import warnings from scipy.spatial.distance import pdist, squareform - # General purpose utility functions for the simulator, attached to no particular class. # Available to any agent or other module/utility. Should not require references to # any simulator object (kernel, agent, etc). @@ -18,28 +18,31 @@ # Use it for all permanent logging print statements to allow fastest possible # execution when verbose flag is not set. This is especially fast because # the arguments will not even be formatted when in silent mode. -def log_print (str, *args): - if not silent_mode: print (str.format(*args)) +def log_print(str, *args): + if not silent_mode: + print(str.format(*args)) # Accessor method for the global silent_mode variable. -def be_silent (): - return silent_mode +def be_silent(): + return silent_mode # Utility method to flatten nested lists. def delist(list_of_lists): return [x for b in list_of_lists for x in b] + # Utility function to get agent wake up times to follow a U-quadratic distribution. def get_wake_time(open_time, close_time, a=0, b=1): - """ Draw a time U-quadratically distributed between open_time and close_time. - For details on U-quadtratic distribution see https://en.wikipedia.org/wiki/U-quadratic_distribution + """Draw a time U-quadratically distributed between open_time and close_time. + For details on U-quadtratic distribution see https://en.wikipedia.org/wiki/U-quadratic_distribution """ + def cubic_pow(n): - """ Helper function: returns *real* cube root of a float""" + """Helper function: returns *real* cube root of a float""" if n < 0: - return -(-n) ** (1.0 / 3.0) + return -((-n) ** (1.0 / 3.0)) else: return n ** (1.0 / 3.0) @@ -47,7 +50,7 @@ def cubic_pow(n): def u_quadratic_inverse_cdf(y): alpha = 12 / ((b - a) ** 3) beta = (b + a) / 2 - result = cubic_pow((3 / alpha) * y - (beta - a)**3 ) + beta + result = cubic_pow((3 / alpha) * y - (beta - a) ** 3) + beta return result uniform_0_1 = np.random.rand() @@ -56,10 +59,11 @@ def u_quadratic_inverse_cdf(y): return wake_time + def numeric(s): - """ Returns numeric type from string, stripping commas from the right. - Adapted from https://stackoverflow.com/a/379966.""" - s = s.rstrip(',') + """Returns numeric type from string, stripping commas from the right. + Adapted from https://stackoverflow.com/a/379966.""" + s = s.rstrip(",") try: return int(s) except ValueError: @@ -68,32 +72,34 @@ def numeric(s): except ValueError: return s + def get_value_from_timestamp(s, ts): - """ Get the value of s corresponding to closest datetime to ts. + """Get the value of s corresponding to closest datetime to ts. - :param s: pandas Series with pd.DatetimeIndex - :type s: pd.Series - :param ts: timestamp at which to retrieve data - :type ts: pd.Timestamp + :param s: pandas Series with pd.DatetimeIndex + :type s: pd.Series + :param ts: timestamp at which to retrieve data + :type ts: pd.Timestamp """ - ts_str = ts.strftime('%Y-%m-%d %H:%M:%S') - s = s.loc[~s.index.duplicated(keep='last')] - locs = s.index.get_loc(ts_str, method='nearest') + ts_str = ts.strftime("%Y-%m-%d %H:%M:%S") + s = s.loc[~s.index.duplicated(keep="last")] + locs = s.index.get_loc(ts_str, method="nearest") out = s[locs][0] if (isinstance(s[locs], np.ndarray) or isinstance(s[locs], pd.Series)) else s[locs] return out + @contextmanager def ignored(warning_str, *exceptions): - """ Context manager that wraps the code block in a try except statement, catching specified exceptions and printing - warning supplied by user. + """Context manager that wraps the code block in a try except statement, catching specified exceptions and printing + warning supplied by user. - :param warning_str: Warning statement printed when exception encountered - :param exceptions: an exception type, e.g. ValueError + :param warning_str: Warning statement printed when exception encountered + :param exceptions: an exception type, e.g. ValueError - https://stackoverflow.com/a/15573313 + https://stackoverflow.com/a/15573313 """ try: yield @@ -104,7 +110,7 @@ def ignored(warning_str, *exceptions): def generate_uniform_random_pairwise_dist_on_line(left, right, num_points, random_state=None): - """ Uniformly generate points on an interval, and return numpy array of pairwise distances between points. + """Uniformly generate points on an interval, and return numpy array of pairwise distances between points. :param left: left endpoint of interval :param right: right endpoint of interval @@ -117,12 +123,12 @@ def generate_uniform_random_pairwise_dist_on_line(left, right, num_points, rando x_coords = random_state.uniform(low=left, high=right, size=num_points) x_coords = x_coords.reshape((x_coords.size, 1)) - out = pdist(x_coords, 'euclidean') + out = pdist(x_coords, "euclidean") return squareform(out) def meters_to_light_ns(x): - """ Converts x in units of meters to light nanoseconds + """Converts x in units of meters to light nanoseconds :param x: :return: @@ -133,25 +139,25 @@ def meters_to_light_ns(x): def validate_window_size(s): - """ Check if s is integer or string 'adaptive'. """ + """Check if s is integer or string 'adaptive'.""" try: return int(s) except ValueError: - if s.lower() == 'adaptive': + if s.lower() == "adaptive": return s.lower() else: raise ValueError(f'String {s} must be integer or string "adaptive".') def sigmoid(x, beta): - """ Numerically stable sigmoid function. + """Numerically stable sigmoid function. Adapted from https://timvieira.github.io/blog/post/2014/02/11/exp-normalize-trick/" """ if x >= 0: - z = np.exp(-beta*x) + z = np.exp(-beta * x) return 1 / (1 + z) else: # if x is less than zero then z will be small, denom can't be # zero because it's 1+z. - z = np.exp(beta*x) + z = np.exp(beta * x) return z / (1 + z)