"""Training hyperparameters — edit these to tune your model.""" # ── Model architecture ────────────────────────────────────────────── CONV_FILTERS = 32 # filters per conv layer (keep small for ESP32) NUM_CONV_LAYERS = 3 # number of convolutional blocks DENSE_UNITS = 64 # units in the dense layer before heads # ── Training ──────────────────────────────────────────────────────── LEARNING_RATE = 1e-3 # Adam learning rate BATCH_SIZE = 256 # training batch size EPOCHS_PER_ITERATION = 4 # epochs per training iteration REPLAY_BUFFER_SIZE = 50000 # max samples kept in replay buffer # ── Self-play ─────────────────────────────────────────────────────── NUM_ITERATIONS = 50 # total train iterations (self-play → train cycles) GAMES_PER_ITERATION = 100 # self-play games generated per iteration MCTS_SIMULATIONS = 50 # MCTS simulations per move MCTS_C_PUCT = 1.4 # exploration constant MCTS_TEMPERATURE = 1.0 # move selection temperature (1 = proportional, →0 = greedy) TEMP_DROP_MOVE = 10 # switch to greedy after this many moves # ── Parallelism ──────────────────────────────────────────────────── NUM_WORKERS = 0 # 0 = use all available CPU cores # ── Reward shaping ────────────────────────────────────────────────── WIN_REWARD = 1.0 DRAW_REWARD = 0.0 LOSS_REWARD = -1.0 # ── Checkpointing ────────────────────────────────────────────────── CHECKPOINT_DIR = "rl/checkpoints" CHECKPOINT_INTERVAL = 5 # save model every N iterations EXPORT_DIR = "rl/export" # ── ESP32 export ──────────────────────────────────────────────────── QUANTIZE_INT8 = True # int8 quantization for TFLite (recommended for ESP32)