37 lines
2.4 KiB
Python
37 lines
2.4 KiB
Python
"""Training hyperparameters — edit these to tune your model."""
|
|
|
|
# ── Model architecture ──────────────────────────────────────────────
|
|
CONV_FILTERS = 32 # filters per conv layer (keep small for ESP32)
|
|
NUM_CONV_LAYERS = 3 # number of convolutional blocks
|
|
DENSE_UNITS = 64 # units in the dense layer before heads
|
|
|
|
# ── Training ────────────────────────────────────────────────────────
|
|
LEARNING_RATE = 1e-3 # Adam learning rate
|
|
BATCH_SIZE = 256 # training batch size
|
|
EPOCHS_PER_ITERATION = 4 # epochs per training iteration
|
|
REPLAY_BUFFER_SIZE = 50000 # max samples kept in replay buffer
|
|
|
|
# ── Self-play ───────────────────────────────────────────────────────
|
|
NUM_ITERATIONS = 50 # total train iterations (self-play → train cycles)
|
|
GAMES_PER_ITERATION = 100 # self-play games generated per iteration
|
|
MCTS_SIMULATIONS = 50 # MCTS simulations per move
|
|
MCTS_C_PUCT = 1.4 # exploration constant
|
|
MCTS_TEMPERATURE = 1.0 # move selection temperature (1 = proportional, →0 = greedy)
|
|
TEMP_DROP_MOVE = 10 # switch to greedy after this many moves
|
|
|
|
# ── Parallelism ────────────────────────────────────────────────────
|
|
NUM_WORKERS = 0 # 0 = use all available CPU cores
|
|
|
|
# ── Reward shaping ──────────────────────────────────────────────────
|
|
WIN_REWARD = 1.0
|
|
DRAW_REWARD = 0.0
|
|
LOSS_REWARD = -1.0
|
|
|
|
# ── Checkpointing ──────────────────────────────────────────────────
|
|
CHECKPOINT_DIR = "rl/checkpoints"
|
|
CHECKPOINT_INTERVAL = 5 # save model every N iterations
|
|
EXPORT_DIR = "rl/export"
|
|
|
|
# ── ESP32 export ────────────────────────────────────────────────────
|
|
QUANTIZE_INT8 = True # int8 quantization for TFLite (recommended for ESP32)
|