Files
2026-03-27 12:17:25 +01:00

37 lines
2.4 KiB
Python

"""Training hyperparameters — edit these to tune your model."""
# ── Model architecture ──────────────────────────────────────────────
CONV_FILTERS = 32 # filters per conv layer (keep small for ESP32)
NUM_CONV_LAYERS = 3 # number of convolutional blocks
DENSE_UNITS = 64 # units in the dense layer before heads
# ── Training ────────────────────────────────────────────────────────
LEARNING_RATE = 1e-3 # Adam learning rate
BATCH_SIZE = 256 # training batch size
EPOCHS_PER_ITERATION = 4 # epochs per training iteration
REPLAY_BUFFER_SIZE = 50000 # max samples kept in replay buffer
# ── Self-play ───────────────────────────────────────────────────────
NUM_ITERATIONS = 50 # total train iterations (self-play → train cycles)
GAMES_PER_ITERATION = 100 # self-play games generated per iteration
MCTS_SIMULATIONS = 50 # MCTS simulations per move
MCTS_C_PUCT = 1.4 # exploration constant
MCTS_TEMPERATURE = 1.0 # move selection temperature (1 = proportional, →0 = greedy)
TEMP_DROP_MOVE = 10 # switch to greedy after this many moves
# ── Parallelism ────────────────────────────────────────────────────
NUM_WORKERS = 0 # 0 = use all available CPU cores
# ── Reward shaping ──────────────────────────────────────────────────
WIN_REWARD = 1.0
DRAW_REWARD = 0.0
LOSS_REWARD = -1.0
# ── Checkpointing ──────────────────────────────────────────────────
CHECKPOINT_DIR = "rl/checkpoints"
CHECKPOINT_INTERVAL = 5 # save model every N iterations
EXPORT_DIR = "rl/export"
# ── ESP32 export ────────────────────────────────────────────────────
QUANTIZE_INT8 = True # int8 quantization for TFLite (recommended for ESP32)