minigo/tests/test_mcts.py at minigui · artasparks/minigo

194 lines (173 loc) · 7.18 KB
# Copyright 2018 Google LLC
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#      http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
import unittest
import numpy as np
import coords
from go import Position
import mcts
from tests import test_utils
ALMOST_DONE_BOARD = test_utils.load_board('''
TEST_POSITION = go.Position(
    board=ALMOST_DONE_BOARD,
    komi=2.5,
    caps=(1, 4),
    ko=None,
    recent=(go.PlayerMove(go.BLACK, (0, 1)),
            go.PlayerMove(go.WHITE, (0, 8))),
    to_play=go.BLACK
SEND_TWO_RETURN_ONE = go.Position(
    board=ALMOST_DONE_BOARD,
    komi=0.5,
    caps=(0, 0),
    ko=None,
    recent=(go.PlayerMove(go.BLACK, (0, 1)),
            go.PlayerMove(go.WHITE, (0, 8)),
            go.PlayerMove(go.BLACK, (1, 0))),
    to_play=go.WHITE
class TestMctsNodes(test_utils.MiniGoUnitTest):
    def test_action_flipping(self):
        np.random.seed(1)
        probs = np.array([.02] * (go.N * go.N + 1))
        probs = probs + np.random.random([go.N * go.N + 1]) * 0.001
        black_root = mcts.MCTSNode(go.Position())
        white_root = mcts.MCTSNode(go.Position(to_play=go.WHITE))
        black_root.select_leaf().incorporate_results(probs, 0, black_root)
        white_root.select_leaf().incorporate_results(probs, 0, white_root)
        # No matter who is to play, when we know nothing else, the priors
        # should be respected, and the same move should be picked
        black_leaf = black_root.select_leaf()
        white_leaf = white_root.select_leaf()
        self.assertEqual(black_leaf.fmove, white_leaf.fmove)
        self.assertEqualNPArray(
            black_root.child_action_score, white_root.child_action_score)
    def test_select_leaf(self):
        flattened = coords.to_flat(coords.from_kgs('D9'))
        probs = np.array([.02] * (go.N * go.N + 1))
        probs[flattened] = 0.4
        root = mcts.MCTSNode(SEND_TWO_RETURN_ONE)
        root.select_leaf().incorporate_results(probs, 0, root)
        self.assertEqual(root.position.to_play, go.WHITE)
        self.assertEqual(root.select_leaf(), root.children[flattened])
    def test_backup_incorporate_results(self):
        probs = np.array([.02] * (go.N * go.N + 1))
        root = mcts.MCTSNode(SEND_TWO_RETURN_ONE)
        root.select_leaf().incorporate_results(probs, 0, root)
        leaf = root.select_leaf()
        leaf.incorporate_results(probs, -1, root)  # white wins!
        # Root was visited twice: first at the root, then at this child.
        self.assertEqual(root.N, 2)
        # Root has 0 as a prior and two visits with value 0, -1
        self.assertAlmostEqual(root.Q, -1/3)  # average of 0, 0, -1
        # Leaf should have one visit
        self.assertEqual(root.child_N[leaf.fmove], 1)
        self.assertEqual(leaf.N, 1)
        # And that leaf's value had its parent's Q (0) as a prior, so the Q
        # should now be the average of 0, -1
        self.assertAlmostEqual(root.child_Q[leaf.fmove], -0.5)
        self.assertAlmostEqual(leaf.Q, -0.5)
        # We're assuming that select_leaf() returns a leaf like:
        #   root
        #     leaf
        #       leaf2
        # which happens in this test because root is W to play and leaf was a W win.
        self.assertEqual(root.position.to_play, go.WHITE)
        leaf2 = root.select_leaf()
        leaf2.incorporate_results(probs, -0.2, root)  # another white semi-win
        self.assertEqual(root.N, 3)
        # average of 0, 0, -1, -0.2
        self.assertAlmostEqual(root.Q, -0.3)
        self.assertEqual(leaf.N, 2)
        self.assertEqual(leaf2.N, 1)
        # average of 0, -1, -0.2
        self.assertAlmostEqual(leaf.Q, root.child_Q[leaf.fmove])
        self.assertAlmostEqual(leaf.Q, -0.4)
        # average of -1, -0.2
        self.assertAlmostEqual(leaf.child_Q[leaf2.fmove], -0.6)
        self.assertAlmostEqual(leaf2.Q, -0.6)
    def test_do_not_explore_past_finish(self):
        probs = np.array([0.02] * (go.N * go.N + 1), dtype=np.float32)
        root = mcts.MCTSNode(go.Position())
        root.select_leaf().incorporate_results(probs, 0, root)
        first_pass = root.maybe_add_child(coords.to_flat(None))
        first_pass.incorporate_results(probs, 0, root)
        second_pass = first_pass.maybe_add_child(coords.to_flat(None))
        with self.assertRaises(AssertionError):
            second_pass.incorporate_results(probs, 0, root)
        node_to_explore = second_pass.select_leaf()
        # should just stop exploring at the end position.
        self.assertEqual(node_to_explore, second_pass)
    def test_add_child(self):
        root = mcts.MCTSNode(go.Position())
        child = root.maybe_add_child(17)
        self.assertIn(17, root.children)
        self.assertEqual(child.parent, root)
        self.assertEqual(child.fmove, 17)
    def test_add_child_idempotency(self):
        root = mcts.MCTSNode(go.Position())
        child = root.maybe_add_child(17)
        current_children = copy.copy(root.children)
        child2 = root.maybe_add_child(17)
        self.assertEqual(child, child2)
        self.assertEqual(current_children, root.children)
    def test_never_select_illegal_moves(self):
        probs = np.array([0.02] * (go.N * go.N + 1))
        # let's say the NN were to accidentally put a high weight on an illegal move
        probs[1] = 0.99
        root = mcts.MCTSNode(SEND_TWO_RETURN_ONE)
        root.incorporate_results(probs, 0, root)
        # and let's say the root were visited a lot of times, which pumps up the
        # action score for unvisited moves...
        root.N = 100000
        root.child_N[root.position.all_legal_moves()] = 10000
        # this should not throw an error...
        leaf = root.select_leaf()
        # the returned leaf should not be the illegal move
        self.assertNotEqual(leaf.fmove, 1)
        # and even after injecting noise, we should still not select an illegal move
        for i in range(10):
            root.inject_noise()
            leaf = root.select_leaf()
            self.assertNotEqual(leaf.fmove, 1)
    def test_dont_pick_unexpanded_child(self):
        probs = np.array([0.001] * (go.N * go.N + 1))
        # make one move really likely so that tree search goes down that path twice
        # even with a virtual loss
        probs[17] = 0.999
        root = mcts.MCTSNode(go.Position())
        root.incorporate_results(probs, 0, root)
        leaf1 = root.select_leaf()
        self.assertEqual(leaf1.fmove, 17)
        leaf1.add_virtual_loss(up_to=root)
        # the second select_leaf pick should return the same thing, since the child
        # hasn't yet been sent to neural net for eval + result incorporation
        leaf2 = root.select_leaf()
        self.assertIs(leaf1, leaf2)
Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

test_mcts.py

Latest commit

History

test_mcts.py

File metadata and controls