Reinforcement Learning Coach
Intro
Usage
Usage - Distributed Coach
Features
Selecting an Algorithm
Coach Dashboard
Design
Control Flow
Network Design
Distributed Coach - Horizontal Scale-Out
Contributing
Adding a New Agent
Adding a New Environment
Components
Agents
Architectures
Data Stores
Environments
Exploration Policies
Filters
Memories
Memory Backends
Orchestrators
Core Types
Spaces
Additional Parameters
Reinforcement Learning Coach
Docs
»
Index
Index
A
|
B
|
C
|
D
|
E
|
F
|
G
|
H
|
I
|
K
|
L
|
M
|
N
|
O
|
P
|
Q
|
R
|
S
|
T
|
U
|
V
|
W
A
accumulate_gradients() (rl_coach.architectures.architecture.Architecture method)
ACERAlgorithmParameters (class in rl_coach.agents.acer_agent)
act() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
action_space() (rl_coach.environments.environment.Environment property)
ActionInfo (class in rl_coach.core_types)
actions() (rl_coach.core_types.Batch method)
ActionSpace (class in rl_coach.spaces)
ActorCriticAlgorithmParameters (class in rl_coach.agents.actor_critic_agent)
AdditiveNoise (class in rl_coach.exploration_policies.additive_noise)
Agent (class in rl_coach.agents.agent)
AgentParameters (class in rl_coach.base_parameters)
apply_and_reset_gradients() (rl_coach.architectures.architecture.Architecture method)
apply_gradients() (rl_coach.architectures.architecture.Architecture method)
apply_gradients_and_sync_networks() (rl_coach.architectures.network_wrapper.NetworkWrapper method)
apply_gradients_to_global_network() (rl_coach.architectures.network_wrapper.NetworkWrapper method)
apply_gradients_to_online_network() (rl_coach.architectures.network_wrapper.NetworkWrapper method)
Architecture (class in rl_coach.architectures.architecture)
AttentionActionSpace (class in rl_coach.spaces)
AttentionDiscretization (class in rl_coach.filters.action)
B
BalancedExperienceReplay (class in rl_coach.memories.non_episodic)
Batch (class in rl_coach.core_types)
BCAlgorithmParameters (class in rl_coach.agents.bc_agent)
Boltzmann (class in rl_coach.exploration_policies.boltzmann)
Bootstrapped (class in rl_coach.exploration_policies.bootstrapped)
BoxActionSpace (class in rl_coach.spaces)
BoxDiscretization (class in rl_coach.filters.action)
BoxMasking (class in rl_coach.filters.action)
C
call_memory() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
CarlaEnvironment (class in rl_coach.environments.carla_environment)
Categorical (class in rl_coach.exploration_policies.categorical)
CategoricalDQNAlgorithmParameters (class in rl_coach.agents.categorical_dqn_agent)
change_phase() (rl_coach.exploration_policies.exploration_policy.ExplorationPolicy method)
choose_action() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
CILAlgorithmParameters (class in rl_coach.agents.cil_agent)
clip_action_to_space() (rl_coach.spaces.ActionSpace method)
(rl_coach.spaces.GoalsSpace method)
ClippedPPOAlgorithmParameters (class in rl_coach.agents.clipped_ppo_agent)
close() (rl_coach.environments.environment.Environment method)
collect_savers() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
(rl_coach.architectures.architecture.Architecture method)
(rl_coach.architectures.network_wrapper.NetworkWrapper method)
CompoundActionSpace (class in rl_coach.spaces)
construct() (rl_coach.architectures.architecture.Architecture static method)
contains() (rl_coach.spaces.ActionSpace method)
(rl_coach.spaces.GoalsSpace method)
(rl_coach.spaces.ObservationSpace method)
(rl_coach.spaces.Space method)
ContinuousEntropy (class in rl_coach.exploration_policies.continuous_entropy)
ControlSuiteEnvironment (class in rl_coach.environments.control_suite_environment)
create_networks() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
D
DDPGAlgorithmParameters (class in rl_coach.agents.ddpg_agent)
DFPAlgorithmParameters (class in rl_coach.agents.dfp_agent)
DiscreteActionSpace (class in rl_coach.spaces)
distance_from_goal() (rl_coach.spaces.GoalsSpace method)
DistributedTaskParameters (class in rl_coach.base_parameters)
DoomEnvironment (class in rl_coach.environments.doom_environment)
DQNAgent (class in rl_coach.agents.dqn_agent)
DQNAlgorithmParameters (class in rl_coach.agents.dqn_agent)
E
EGreedy (class in rl_coach.exploration_policies.e_greedy)
Environment (class in rl_coach.environments.environment)
EnvResponse (class in rl_coach.core_types)
Episode (class in rl_coach.core_types)
EpisodicExperienceReplay (class in rl_coach.memories.episodic)
EpisodicHindsightExperienceReplay (class in rl_coach.memories.episodic)
EpisodicHRLHindsightExperienceReplay (class in rl_coach.memories.episodic)
ExperienceReplay (class in rl_coach.memories.non_episodic)
ExplorationPolicy (class in rl_coach.exploration_policies.exploration_policy)
F
freeze_memory() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
FullDiscreteActionSpaceMap (class in rl_coach.filters.action)
G
game_overs() (rl_coach.core_types.Batch method)
get_action() (rl_coach.exploration_policies.exploration_policy.ExplorationPolicy method)
get_action_from_user() (rl_coach.environments.environment.Environment method)
get_available_keys() (rl_coach.environments.environment.Environment method)
get_first_transition() (rl_coach.core_types.Episode method)
get_goal() (rl_coach.environments.environment.Environment method)
get_last_transition() (rl_coach.core_types.Episode method)
get_predictions() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
get_random_action() (rl_coach.environments.environment.Environment method)
get_rendered_image() (rl_coach.environments.environment.Environment method)
get_reward_for_goal_and_state() (rl_coach.spaces.GoalsSpace method)
get_state_embedding() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
get_transition() (rl_coach.core_types.Episode method)
get_transitions_attribute() (rl_coach.core_types.Episode method)
get_variable_value() (rl_coach.architectures.architecture.Architecture method)
get_weights() (rl_coach.architectures.architecture.Architecture method)
goal_from_state() (rl_coach.spaces.GoalsSpace method)
goal_space() (rl_coach.environments.environment.Environment property)
goals() (rl_coach.core_types.Batch method)
GoalsSpace (class in rl_coach.spaces)
GoalsSpace.DistanceMetric (class in rl_coach.spaces)
Greedy (class in rl_coach.exploration_policies.greedy)
GymEnvironment (class in rl_coach.environments.gym_environment)
H
handle_episode_ended() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
(rl_coach.environments.environment.Environment method)
I
ImageObservationSpace (class in rl_coach.spaces)
improve_reward_model() (rl_coach.agents.dqn_agent.DQNAgent method)
info() (rl_coach.core_types.Batch method)
info_as_list() (rl_coach.core_types.Batch method)
init_environment_dependent_modules() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
initialize_session_dependent_components() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
insert() (rl_coach.core_types.Episode method)
is_empty() (rl_coach.core_types.Episode method)
is_valid_index() (rl_coach.spaces.ActionSpace method)
(rl_coach.spaces.GoalsSpace method)
(rl_coach.spaces.ObservationSpace method)
(rl_coach.spaces.Space method)
K
Kubernetes (class in rl_coach.orchestrators.kubernetes_orchestrator)
L
last_env_response() (rl_coach.environments.environment.Environment property)
learn_from_batch() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
length() (rl_coach.core_types.Episode method)
LinearBoxToBoxMap (class in rl_coach.filters.action)
load_memory_from_file() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
log_to_screen() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
M
MixedMonteCarloAlgorithmParameters (class in rl_coach.agents.mmc_agent)
MultiSelectActionSpace (class in rl_coach.spaces)
N
n_step_discounted_rewards() (rl_coach.core_types.Batch method)
NAFAlgorithmParameters (class in rl_coach.agents.naf_agent)
NECAlgorithmParameters (class in rl_coach.agents.nec_agent)
NetworkParameters (class in rl_coach.base_parameters)
NetworkWrapper (class in rl_coach.architectures.network_wrapper)
next_states() (rl_coach.core_types.Batch method)
NFSDataStore (class in rl_coach.data_stores.nfs_data_store)
NStepQAlgorithmParameters (class in rl_coach.agents.n_step_q_agent)
O
ObservationClippingFilter (class in rl_coach.filters.observation)
ObservationCropFilter (class in rl_coach.filters.observation)
ObservationMoveAxisFilter (class in rl_coach.filters.observation)
ObservationNormalizationFilter (class in rl_coach.filters.observation)
ObservationReductionBySubPartsNameFilter (class in rl_coach.filters.observation)
ObservationRescaleSizeByFactorFilter (class in rl_coach.filters.observation)
ObservationRescaleToSizeFilter (class in rl_coach.filters.observation)
ObservationRGBToYFilter (class in rl_coach.filters.observation)
ObservationSpace (class in rl_coach.spaces)
ObservationSqueezeFilter (class in rl_coach.filters.observation)
ObservationStackingFilter (class in rl_coach.filters.observation)
ObservationToUInt8Filter (class in rl_coach.filters.observation)
observe() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
OUProcess (class in rl_coach.exploration_policies.ou_process)
P
PALAlgorithmParameters (class in rl_coach.agents.pal_agent)
parallel_predict() (rl_coach.architectures.architecture.Architecture static method)
parallel_prediction() (rl_coach.architectures.network_wrapper.NetworkWrapper method)
ParameterNoise (class in rl_coach.exploration_policies.parameter_noise)
parent() (rl_coach.agents.agent.Agent property)
(rl_coach.agents.dqn_agent.DQNAgent property)
PartialDiscreteActionSpaceMap (class in rl_coach.filters.action)
phase() (rl_coach.agents.agent.Agent property)
(rl_coach.agents.dqn_agent.DQNAgent property)
(rl_coach.environments.environment.Environment property)
PlanarMapsObservationSpace (class in rl_coach.spaces)
PolicyGradientAlgorithmParameters (class in rl_coach.agents.policy_gradients_agent)
post_training_commands() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
PPOAlgorithmParameters (class in rl_coach.agents.ppo_agent)
predict() (rl_coach.architectures.architecture.Architecture method)
prepare_batch_for_inference() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
PresetValidationParameters (class in rl_coach.base_parameters)
PrioritizedExperienceReplay (class in rl_coach.memories.non_episodic)
Q
QDND (class in rl_coach.memories.non_episodic)
QuantileRegressionDQNAlgorithmParameters (class in rl_coach.agents.qr_dqn_agent)
R
RainbowDQNAlgorithmParameters (class in rl_coach.agents.rainbow_dqn_agent)
RedisPubSubBackend (class in rl_coach.memories.backend.redis)
register_signal() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
render() (rl_coach.environments.environment.Environment method)
requires_action_values() (rl_coach.exploration_policies.exploration_policy.ExplorationPolicy method)
reset() (rl_coach.exploration_policies.exploration_policy.ExplorationPolicy method)
reset_accumulated_gradients() (rl_coach.architectures.architecture.Architecture method)
reset_evaluation_state() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
reset_internal_state() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
(rl_coach.environments.environment.Environment method)
restore_checkpoint() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
RewardClippingFilter (class in rl_coach.filters.reward)
RewardNormalizationFilter (class in rl_coach.filters.reward)
RewardRescaleFilter (class in rl_coach.filters.reward)
rewards() (rl_coach.core_types.Batch method)
run_off_policy_evaluation() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
run_pre_network_filter_for_inference() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
S
S3DataStore (class in rl_coach.data_stores.s3_data_store)
sample() (rl_coach.spaces.ActionSpace method)
(rl_coach.spaces.GoalsSpace method)
(rl_coach.spaces.ObservationSpace method)
(rl_coach.spaces.Space method)
sample_with_info() (rl_coach.spaces.ActionSpace method)
(rl_coach.spaces.GoalsSpace method)
save_checkpoint() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
set_environment_parameters() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
set_goal() (rl_coach.environments.environment.Environment method)
set_incoming_directive() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
set_is_training() (rl_coach.architectures.network_wrapper.NetworkWrapper method)
set_session() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
set_variable_value() (rl_coach.architectures.architecture.Architecture method)
set_weights() (rl_coach.architectures.architecture.Architecture method)
setup_logger() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
shuffle() (rl_coach.core_types.Batch method)
SingleEpisodeBuffer (class in rl_coach.memories.episodic)
size() (rl_coach.core_types.Batch property)
slice() (rl_coach.core_types.Batch method)
SoftActorCriticAlgorithmParameters (class in rl_coach.agents.soft_actor_critic_agent)
Space (class in rl_coach.spaces)
StarCraft2Environment (class in rl_coach.environments.starcraft2_environment)
state_space() (rl_coach.environments.environment.Environment property)
states() (rl_coach.core_types.Batch method)
step() (rl_coach.environments.environment.Environment method)
sync() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
(rl_coach.architectures.network_wrapper.NetworkWrapper method)
T
TaskParameters (class in rl_coach.base_parameters)
TD3AlgorithmParameters (class in rl_coach.agents.td3_agent)
train() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
train_and_sync_networks() (rl_coach.architectures.network_wrapper.NetworkWrapper method)
train_on_batch() (rl_coach.architectures.architecture.Architecture method)
Transition (class in rl_coach.core_types)
TransitionCollection (class in rl_coach.memories.non_episodic)
TruncatedNormal (class in rl_coach.exploration_policies.truncated_normal)
U
UCB (class in rl_coach.exploration_policies.ucb)
update_discounted_rewards() (rl_coach.core_types.Episode method)
update_log() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
update_online_network() (rl_coach.architectures.network_wrapper.NetworkWrapper method)
update_step_in_episode_log() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
update_target_network() (rl_coach.architectures.network_wrapper.NetworkWrapper method)
update_transition_before_adding_to_replay_buffer() (rl_coach.agents.agent.Agent method)
(rl_coach.agents.dqn_agent.DQNAgent method)
V
VectorObservationSpace (class in rl_coach.spaces)
VisualizationParameters (class in rl_coach.base_parameters)
W
WolpertingerAlgorithmParameters (class in rl_coach.agents.wolpertinger_agent)