forked from tensorflow/minigo
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmcts_player.h
More file actions
252 lines (193 loc) · 7.9 KB
/
Copy pathmcts_player.h
File metadata and controls
252 lines (193 loc) · 7.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef CC_MCTS_PLAYER_H_
#define CC_MCTS_PLAYER_H_
#include <cmath>
#include <cstdint>
#include <memory>
#include <ostream>
#include <string>
#include <vector>
#include "absl/memory/memory.h"
#include "absl/time/time.h"
#include "absl/types/span.h"
#include "cc/algorithm.h"
#include "cc/constants.h"
#include "cc/dual_net/dual_net.h"
#include "cc/mcts_node.h"
#include "cc/position.h"
#include "cc/random.h"
#include "cc/symmetries.h"
namespace minigo {
// Exposed for testing.
float TimeRecommendation(int move_num, float seconds_per_move, float time_limit,
float decay_factor);
class MctsPlayer {
public:
struct Options {
bool inject_noise = true;
bool soft_pick = true;
bool random_symmetry = true;
float resign_threshold = -0.95;
// We use a separate resign_enabled flag instead of setting the
// resign_threshold to -1 for games where resignation is diabled. This
// enables us to report games where the eventual winner would have
// incorrectly resigned early, had resignations been enabled.
bool resign_enabled = true;
// TODO(tommadams): rename batch_size to virtual_losses.
int batch_size = 8;
float komi = kDefaultKomi;
std::string name = "minigo";
// Seed used from random permutations.
// If the default value of 0 is used, a time-based seed is chosen.
uint64_t random_seed = 0;
// Number of readouts to perform (ignored if seconds_per_move is non-zero).
int num_readouts = 0;
// If non-zero, the number of seconds to spend thinking about each move
// instead of using a fixed number of readouts.
float seconds_per_move = 0;
// If non-zero, the maximum amount of time to spend thinking in a game:
// we spend seconds_per_move thinking for each move for as many moves as
// possible before exponentially decaying the amount of time.
float time_limit = 0;
// If time_limit is non-zero, the decay factor used to shorten the amount
// of time spent thinking as the game progresses.
float decay_factor = 0.98;
// If true, print debug info to stderr.
bool verbose = true;
// If true, children of the current root node are pruned when a move is
// played. Under normal play, only the descendents of the move played ever
// have a chance of being visited again during tree search. However, when
// using Minigo to explore different variations and ponder about the best
// moves, it makes sense to keep the full tree around.
bool prune_orphaned_nodes = true;
friend std::ostream& operator<<(std::ostream& ios, const Options& options);
};
struct History {
std::array<float, kNumMoves> search_pi;
Coord c = Coord::kPass;
std::string comment;
const MctsNode* node = nullptr;
};
// State that tracks which model is used for each inference.
struct InferenceInfo {
InferenceInfo(std::string model, int first_move)
: model(std::move(model)),
first_move(first_move),
last_move(first_move) {}
// Model name returned from RunMany.
std::string model;
// Total number of times a model was used for inference.
size_t total_count = 0;
// The first move a model was used for inference.
int first_move = 0;
// The last move a model was used for inference.
// This needs to be tracked separately from first_move because the common
// case is that the model changes change part-way through a tree search.
int last_move = 0;
};
// If position is non-null, the player will be initilized with that board
// state. Otherwise, the player is initialized with an empty board with black
// to play.
MctsPlayer(std::unique_ptr<DualNet> network, const Options& options);
virtual ~MctsPlayer();
void InitializeGame(const Position& position);
virtual void NewGame();
virtual Coord SuggestMove();
virtual bool PlayMove(Coord c);
bool ShouldResign() const;
void GetNodeFeatures(const MctsNode* node, DualNet::BoardFeatures* features);
// Returns the root of the current search tree, i.e. the current board state.
MctsNode* root() { return root_; }
const MctsNode* root() const { return root_; }
// Returns the result of the game:
// +1.0 if black won.
// 0.0 if the game was drawn.
// -1.0 if white won.
// Check fails if the game is not yet over.
float result() const {
MG_CHECK(root_->game_over() || root_->at_move_limit());
return result_;
}
// Return a text description of the game result, e.g. "B+R", "W+1.5".
// Check fails if the game is not yet over.
const std::string& result_string() const {
MG_CHECK(root_->game_over() || root_->at_move_limit());
return result_string_;
}
const Options& options() const { return options_; }
const std::vector<History>& history() const { return history_; }
const std::string& name() const { return options_.name; }
const std::vector<InferenceInfo>& inferences() const { return inferences_; }
DualNet* network() { return network_.get(); }
protected:
// Path in the game tree from leaf to root.
struct TreePath {
TreePath(MctsNode* root, MctsNode* leaf) : root(root), leaf(leaf) {}
MctsNode* root;
MctsNode* leaf;
};
Options* mutable_options() { return &options_; }
Coord PickMove();
// Resets the root_ node back to the game_root_, clearing the game history but
// preserving the game tree.
// This is used to rewind the game during review.
void ResetRoot();
// Moves the root_ node up to its parent, popping the last move off the game
// history but preserving the game tree.
bool UndoMove();
void TreeSearch();
void SelectLeaves(MctsNode* root, int num_leaves,
std::vector<MctsPlayer::TreePath>* paths);
// Returns the root of the game tree.
MctsNode* game_root() { return &game_root_; }
const MctsNode* game_root() const { return &game_root_; }
Random* rnd() { return &rnd_; }
std::string FormatScore(float score) const;
// Run inference for the given leaf nodes & incorportate the inference output.
virtual void ProcessLeaves(absl::Span<TreePath> paths, bool random_symmetry);
private:
void PushHistory(Coord c);
std::unique_ptr<DualNet> network_;
int temperature_cutoff_;
MctsNode::EdgeStats root_stats_;
MctsNode* root_;
MctsNode game_root_;
BoardVisitor bv_;
GroupVisitor gv_;
Random rnd_;
Options options_;
float result_ = 0;
std::string result_string_;
std::vector<History> history_;
std::string model_;
std::vector<InferenceInfo> inferences_;
// Vectors reused when running TreeSearch.
std::vector<TreePath> tree_search_paths_;
std::vector<DualNet::BoardFeatures> features_;
std::vector<DualNet::Output> outputs_;
std::vector<symmetry::Symmetry> symmetries_used_;
std::vector<const Position::Stones*> recent_positions_;
};
// Get information on the bleakest move for a completed game, if the game has
// history and was played with resign disabled. (If resign was enabled,
// bleakest-move calculation is not relevant, since quitters don't know how bad
// it could have been.)
//
// Returns true if the bleakest move was found and returned; false otherwise.
// Q is returned from the winners perspective, which means we don't have to
// reference the result to transform this into a sortable list of evaluations.
bool FindBleakestMove(const MctsPlayer& player, int* move, float* q);
} // namespace minigo
#endif // CC_MCTS_PLAYER_H_