Connect-four-Esp32/rl/config.py

"""Training hyperparameters — edit these to tune your model."""

# ── Model architecture ──────────────────────────────────────────────
CONV_FILTERS = 32          # filters per conv layer (keep small for ESP32)
NUM_CONV_LAYERS = 3        # number of convolutional blocks
DENSE_UNITS = 64           # units in the dense layer before heads

# ── Training ────────────────────────────────────────────────────────
LEARNING_RATE = 1e-3       # Adam learning rate
BATCH_SIZE = 256           # training batch size
EPOCHS_PER_ITERATION = 4   # epochs per training iteration
REPLAY_BUFFER_SIZE = 50000 # max samples kept in replay buffer

# ── Self-play ───────────────────────────────────────────────────────
NUM_ITERATIONS = 50        # total train iterations (self-play → train cycles)
GAMES_PER_ITERATION = 100  # self-play games generated per iteration
MCTS_SIMULATIONS = 50      # MCTS simulations per move
MCTS_C_PUCT = 1.4          # exploration constant
MCTS_TEMPERATURE = 1.0     # move selection temperature (1 = proportional, →0 = greedy)
TEMP_DROP_MOVE = 10        # switch to greedy after this many moves

# ── Parallelism ────────────────────────────────────────────────────
NUM_WORKERS = 0                # 0 = use all available CPU cores

# ── Reward shaping ──────────────────────────────────────────────────
WIN_REWARD = 1.0
DRAW_REWARD = 0.0
LOSS_REWARD = -1.0

# ── Checkpointing ──────────────────────────────────────────────────
CHECKPOINT_DIR = "rl/checkpoints"
CHECKPOINT_INTERVAL = 5    # save model every N iterations
EXPORT_DIR = "rl/export"

# ── ESP32 export ────────────────────────────────────────────────────
QUANTIZE_INT8 = True       # int8 quantization for TFLite (recommended for ESP32)