diff --git a/DroidFishApp/src/main/assets/nn-ad9b42354671.nnue b/DroidFishApp/src/main/assets/nn-5af11540bbfe.nnue similarity index 71% rename from DroidFishApp/src/main/assets/nn-ad9b42354671.nnue rename to DroidFishApp/src/main/assets/nn-5af11540bbfe.nnue index 693fc1d..6690c9e 100644 Binary files a/DroidFishApp/src/main/assets/nn-ad9b42354671.nnue and b/DroidFishApp/src/main/assets/nn-5af11540bbfe.nnue differ diff --git a/DroidFishApp/src/main/cpp/stockfish/benchmark.cpp b/DroidFishApp/src/main/cpp/stockfish/benchmark.cpp index e1c025a..a1ad055 100644 --- a/DroidFishApp/src/main/cpp/stockfish/benchmark.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/benchmark.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,6 +16,8 @@ along with this program. If not, see . */ +#include "benchmark.h" + #include #include #include diff --git a/DroidFishApp/src/main/cpp/stockfish/benchmark.h b/DroidFishApp/src/main/cpp/stockfish/benchmark.h new file mode 100644 index 0000000..64acf83 --- /dev/null +++ b/DroidFishApp/src/main/cpp/stockfish/benchmark.h @@ -0,0 +1,34 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef BENCHMARK_H_INCLUDED +#define BENCHMARK_H_INCLUDED + +#include +#include +#include + +namespace Stockfish { + +class Position; + +std::vector setup_bench(const Position&, std::istream&); + +} // namespace Stockfish + +#endif // #ifndef BENCHMARK_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/bitbase.cpp b/DroidFishApp/src/main/cpp/stockfish/bitbase.cpp index 84300ba..e21d1fe 100644 --- a/DroidFishApp/src/main/cpp/stockfish/bitbase.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/bitbase.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/bitboard.cpp b/DroidFishApp/src/main/cpp/stockfish/bitboard.cpp index fd0ba23..fd5c3c2 100644 --- a/DroidFishApp/src/main/cpp/stockfish/bitboard.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/bitboard.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -27,7 +27,6 @@ namespace Stockfish { uint8_t PopCnt16[1 << 16]; uint8_t SquareDistance[SQUARE_NB][SQUARE_NB]; -Bitboard SquareBB[SQUARE_NB]; Bitboard LineBB[SQUARE_NB][SQUARE_NB]; Bitboard BetweenBB[SQUARE_NB][SQUARE_NB]; Bitboard PseudoAttacks[PIECE_TYPE_NB][SQUARE_NB]; @@ -82,9 +81,6 @@ void Bitboards::init() { for (unsigned i = 0; i < (1 << 16); ++i) PopCnt16[i] = uint8_t(std::bitset<16>(i).count()); - for (Square s = SQ_A1; s <= SQ_H8; ++s) - SquareBB[s] = (1ULL << s); - for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1) for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2) SquareDistance[s1][s2] = std::max(distance(s1, s2), distance(s1, s2)); diff --git a/DroidFishApp/src/main/cpp/stockfish/bitboard.h b/DroidFishApp/src/main/cpp/stockfish/bitboard.h index 2b6e2a6..42fd0e9 100644 --- a/DroidFishApp/src/main/cpp/stockfish/bitboard.h +++ b/DroidFishApp/src/main/cpp/stockfish/bitboard.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -74,7 +74,6 @@ constexpr Bitboard KingFlank[FILE_NB] = { extern uint8_t PopCnt16[1 << 16]; extern uint8_t SquareDistance[SQUARE_NB][SQUARE_NB]; -extern Bitboard SquareBB[SQUARE_NB]; extern Bitboard BetweenBB[SQUARE_NB][SQUARE_NB]; extern Bitboard LineBB[SQUARE_NB][SQUARE_NB]; extern Bitboard PseudoAttacks[PIECE_TYPE_NB][SQUARE_NB]; @@ -108,7 +107,7 @@ extern Magic BishopMagics[SQUARE_NB]; inline Bitboard square_bb(Square s) { assert(is_ok(s)); - return SquareBB[s]; + return (1ULL << s); } diff --git a/DroidFishApp/src/main/cpp/stockfish/endgame.cpp b/DroidFishApp/src/main/cpp/stockfish/endgame.cpp index e773e7a..9021f24 100644 --- a/DroidFishApp/src/main/cpp/stockfish/endgame.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/endgame.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/endgame.h b/DroidFishApp/src/main/cpp/stockfish/endgame.h index e79f696..c184cb3 100644 --- a/DroidFishApp/src/main/cpp/stockfish/endgame.h +++ b/DroidFishApp/src/main/cpp/stockfish/endgame.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/evaluate.cpp b/DroidFishApp/src/main/cpp/stockfish/evaluate.cpp index 1d3e310..35d0542 100644 --- a/DroidFishApp/src/main/cpp/stockfish/evaluate.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/evaluate.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -36,7 +36,7 @@ #include "timeman.h" #include "uci.h" #include "incbin/incbin.h" - +#include "nnue/evaluate_nnue.h" // Macro to embed the default efficiently updatable neural network (NNUE) file // data in the engine binary (using incbin.h, by Dale Weiler). @@ -82,20 +82,18 @@ namespace Eval { eval_file = EvalFileDefaultName; #if defined(DEFAULT_NNUE_DIRECTORY) - #define stringify2(x) #x - #define stringify(x) stringify2(x) vector dirs = { "" , "" , CommandLine::binaryDirectory , stringify(DEFAULT_NNUE_DIRECTORY) }; #else vector dirs = { "" , "" , CommandLine::binaryDirectory }; #endif - for (string directory : dirs) + for (const string& directory : dirs) if (currentEvalFileName != eval_file) { if (directory != "") { ifstream stream(directory + eval_file, ios::binary); - if (load_eval(eval_file, stream)) + if (NNUE::load_eval(eval_file, stream)) currentEvalFileName = eval_file; } @@ -111,12 +109,10 @@ namespace Eval { (void) gEmbeddedNNUEEnd; // Silence warning on unused variable istream stream(&buffer); - if (load_eval(eval_file, stream)) + if (NNUE::load_eval(eval_file, stream)) currentEvalFileName = eval_file; } } - if (currentEvalFileName != eval_file) - currentEvalFileName = ""; } /// NNUE::verify() verifies that the last net used was loaded successfully @@ -161,24 +157,24 @@ namespace Trace { Score scores[TERM_NB][COLOR_NB]; - double to_cp(Value v) { return double(v) / UCI::NormalizeToPawnValue; } + static double to_cp(Value v) { return double(v) / UCI::NormalizeToPawnValue; } - void add(int idx, Color c, Score s) { + static void add(int idx, Color c, Score s) { scores[idx][c] = s; } - void add(int idx, Score w, Score b = SCORE_ZERO) { + static void add(int idx, Score w, Score b = SCORE_ZERO) { scores[idx][WHITE] = w; scores[idx][BLACK] = b; } - std::ostream& operator<<(std::ostream& os, Score s) { + static std::ostream& operator<<(std::ostream& os, Score s) { os << std::setw(5) << to_cp(mg_value(s)) << " " << std::setw(5) << to_cp(eg_value(s)); return os; } - std::ostream& operator<<(std::ostream& os, Term t) { + static std::ostream& operator<<(std::ostream& os, Term t) { if (t == MATERIAL || t == IMBALANCE || t == WINNABLE || t == TOTAL) os << " ---- ----" << " | " << " ---- ----"; @@ -195,8 +191,8 @@ using namespace Trace; namespace { // Threshold for lazy and space evaluation - constexpr Value LazyThreshold1 = Value(3631); - constexpr Value LazyThreshold2 = Value(2084); + constexpr Value LazyThreshold1 = Value(3622); + constexpr Value LazyThreshold2 = Value(1962); constexpr Value SpaceThreshold = Value(11551); // KingAttackWeights[PieceType] contains king attack weights by piece type @@ -390,10 +386,10 @@ namespace { template template Score Evaluation::pieces() { - constexpr Color Them = ~Us; - constexpr Direction Down = -pawn_push(Us); - constexpr Bitboard OutpostRanks = (Us == WHITE ? Rank4BB | Rank5BB | Rank6BB - : Rank5BB | Rank4BB | Rank3BB); + constexpr Color Them = ~Us; + [[maybe_unused]] constexpr Direction Down = -pawn_push(Us); + [[maybe_unused]] constexpr Bitboard OutpostRanks = (Us == WHITE ? Rank4BB | Rank5BB | Rank6BB + : Rank5BB | Rank4BB | Rank3BB); Bitboard b1 = pos.pieces(Us, Pt); Bitboard b, bb; Score score = SCORE_ZERO; @@ -432,7 +428,7 @@ namespace { int mob = popcount(b & mobilityArea[Us]); mobility[Us] += MobilityBonus[Pt - 2][mob]; - if (Pt == BISHOP || Pt == KNIGHT) + if constexpr (Pt == BISHOP || Pt == KNIGHT) { // Bonus if the piece is on an outpost square or can reach one // Bonus for knights (UncontestedOutpost) if few relevant targets @@ -1050,52 +1046,41 @@ make_v: /// evaluate() is the evaluator for the outer world. It returns a static /// evaluation of the position from the point of view of the side to move. -Value Eval::evaluate(const Position& pos, int* complexity) { +Value Eval::evaluate(const Position& pos) { + + assert(!pos.checkers()); Value v; Value psq = pos.psq_eg_stm(); // We use the much less accurate but faster Classical eval when the NNUE // option is set to false. Otherwise we use the NNUE eval unless the - // PSQ advantage is decisive and several pieces remain. (~3 Elo) - bool useClassical = !useNNUE || (pos.count() > 7 && abs(psq) > 1760); + // PSQ advantage is decisive. (~4 Elo at STC, 1 Elo at LTC) + bool useClassical = !useNNUE || abs(psq) > 2048; if (useClassical) v = Evaluation(pos).value(); else { int nnueComplexity; - int scale = 1064 + 106 * pos.non_pawn_material() / 5120; + int npm = pos.non_pawn_material() / 64; Color stm = pos.side_to_move(); Value optimism = pos.this_thread()->optimism[stm]; Value nnue = NNUE::evaluate(pos, true, &nnueComplexity); - // Blend nnue complexity with (semi)classical complexity - nnueComplexity = ( 416 * nnueComplexity - + 424 * abs(psq - nnue) - + (optimism > 0 ? int(optimism) * int(psq - nnue) : 0) - ) / 1024; - - // Return hybrid NNUE complexity to caller - if (complexity) - *complexity = nnueComplexity; - - optimism = optimism * (269 + nnueComplexity) / 256; - v = (nnue * scale + optimism * (scale - 754)) / 1024; + // Blend optimism with nnue complexity and (semi)classical complexity + optimism += optimism * (nnueComplexity + abs(psq - nnue)) / 512; + v = (nnue * (945 + npm) + optimism * (150 + npm)) / 1024; } // Damp down the evaluation linearly when shuffling - v = v * (195 - pos.rule50_count()) / 211; + v = v * (200 - pos.rule50_count()) / 214; // Guarantee evaluation does not hit the tablebase range v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); - // When not using NNUE, return classical complexity to caller - if (complexity && (!useNNUE || useClassical)) - *complexity = abs(v - psq); - return v; } diff --git a/DroidFishApp/src/main/cpp/stockfish/evaluate.h b/DroidFishApp/src/main/cpp/stockfish/evaluate.h index f5ac326..b9d7231 100644 --- a/DroidFishApp/src/main/cpp/stockfish/evaluate.h +++ b/DroidFishApp/src/main/cpp/stockfish/evaluate.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,7 +31,7 @@ class Position; namespace Eval { std::string trace(Position& pos); - Value evaluate(const Position& pos, int* complexity = nullptr); + Value evaluate(const Position& pos); extern bool useNNUE; extern std::string currentEvalFileName; @@ -39,20 +39,13 @@ namespace Eval { // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue // for the build process (profile-build and fishtest) to work. Do not change the // name of the macro, as it is used in the Makefile. - #define EvalFileDefaultName "nn-ad9b42354671.nnue" + #define EvalFileDefaultName "nn-5af11540bbfe.nnue" namespace NNUE { - std::string trace(Position& pos); - Value evaluate(const Position& pos, bool adjusted = false, int* complexity = nullptr); - void init(); void verify(); - bool load_eval(std::string name, std::istream& stream); - bool save_eval(std::ostream& stream); - bool save_eval(const std::optional& filename); - } // namespace NNUE } // namespace Eval diff --git a/DroidFishApp/src/main/cpp/stockfish/main.cpp b/DroidFishApp/src/main/cpp/stockfish/main.cpp index fad0ef8..c40e0fa 100644 --- a/DroidFishApp/src/main/cpp/stockfish/main.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/main.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/material.cpp b/DroidFishApp/src/main/cpp/stockfish/material.cpp index 1567358..7102f87 100644 --- a/DroidFishApp/src/main/cpp/stockfish/material.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/material.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/material.h b/DroidFishApp/src/main/cpp/stockfish/material.h index 3ca169c..9acf78f 100644 --- a/DroidFishApp/src/main/cpp/stockfish/material.h +++ b/DroidFishApp/src/main/cpp/stockfish/material.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -28,7 +28,7 @@ namespace Stockfish::Material { /// Material::Entry contains various information about a material configuration. /// It contains a material imbalance evaluation, a function pointer to a special -/// endgame evaluation function (which in most cases is NULL, meaning that the +/// endgame evaluation function (which in most cases is nullptr, meaning that the /// standard evaluation function will be used), and scale factors. /// /// The scale factors are used to scale the evaluation score up or down. For @@ -62,7 +62,7 @@ struct Entry { uint8_t factor[COLOR_NB]; }; -typedef HashTable Table; +using Table = HashTable; Entry* probe(const Position& pos); diff --git a/DroidFishApp/src/main/cpp/stockfish/misc.cpp b/DroidFishApp/src/main/cpp/stockfish/misc.cpp index 2d86969..bbfa406 100644 --- a/DroidFishApp/src/main/cpp/stockfish/misc.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/misc.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -32,21 +32,26 @@ // the calls at compile time), try to load them at runtime. To do this we need // first to define the corresponding function pointers. extern "C" { -typedef bool(*fun1_t)(LOGICAL_PROCESSOR_RELATIONSHIP, - PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD); -typedef bool(*fun2_t)(USHORT, PGROUP_AFFINITY); -typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY); -typedef bool(*fun4_t)(USHORT, PGROUP_AFFINITY, USHORT, PUSHORT); -typedef WORD(*fun5_t)(); +using fun1_t = bool(*)(LOGICAL_PROCESSOR_RELATIONSHIP, + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD); +using fun2_t = bool(*)(USHORT, PGROUP_AFFINITY); +using fun3_t = bool(*)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY); +using fun4_t = bool(*)(USHORT, PGROUP_AFFINITY, USHORT, PUSHORT); +using fun5_t = WORD(*)(); +using fun6_t = bool(*)(HANDLE, DWORD, PHANDLE); +using fun7_t = bool(*)(LPCSTR, LPCSTR, PLUID); +using fun8_t = bool(*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGES, PDWORD); } #endif +#include +#include #include #include #include #include +#include #include -#include #if defined(__linux__) && !defined(__ANDROID__) #include @@ -68,7 +73,7 @@ namespace Stockfish { namespace { /// Version number or dev. -const string version = "15.1"; +constexpr string_view version = "16"; /// Our fancy logging facility. The trick here is to replace cin.rdbuf() and /// cout.rdbuf() with two Tie objects that tie cin and cout to a file stream. We @@ -151,13 +156,13 @@ string engine_info(bool to_uci) { stringstream ss; ss << "Stockfish " << version << setfill('0'); - if (version == "dev") + if constexpr (version == "dev") { ss << "-"; #ifdef GIT_DATE - ss << GIT_DATE; + ss << stringify(GIT_DATE); #else - const string months("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec"); + constexpr string_view months("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec"); string month, day, year; stringstream date(__DATE__); // From compiler, format is "Sep 21 2008" @@ -168,7 +173,7 @@ string engine_info(bool to_uci) { ss << "-"; #ifdef GIT_SHA - ss << GIT_SHA; + ss << stringify(GIT_SHA); #else ss << "nogit"; #endif @@ -185,8 +190,6 @@ string engine_info(bool to_uci) { std::string compiler_info() { - #define stringify2(x) #x - #define stringify(x) stringify2(x) #define make_version_string(major, minor, patch) stringify(major) "." stringify(minor) "." stringify(patch) /// Predefined macros hell: @@ -298,21 +301,94 @@ std::string compiler_info() { /// Debug functions used mainly to collect run-time statistics -static std::atomic hits[2], means[2]; +constexpr int MaxDebugSlots = 32; -void dbg_hit_on(bool b) { ++hits[0]; if (b) ++hits[1]; } -void dbg_hit_on(bool c, bool b) { if (c) dbg_hit_on(b); } -void dbg_mean_of(int v) { ++means[0]; means[1] += v; } +namespace { + +template +struct DebugInfo { + std::atomic data[N] = { 0 }; + + constexpr inline std::atomic& operator[](int index) { return data[index]; } +}; + +DebugInfo<2> hit[MaxDebugSlots]; +DebugInfo<2> mean[MaxDebugSlots]; +DebugInfo<3> stdev[MaxDebugSlots]; +DebugInfo<6> correl[MaxDebugSlots]; + +} // namespace + +void dbg_hit_on(bool cond, int slot) { + + ++hit[slot][0]; + if (cond) + ++hit[slot][1]; +} + +void dbg_mean_of(int64_t value, int slot) { + + ++mean[slot][0]; + mean[slot][1] += value; +} + +void dbg_stdev_of(int64_t value, int slot) { + + ++stdev[slot][0]; + stdev[slot][1] += value; + stdev[slot][2] += value * value; +} + +void dbg_correl_of(int64_t value1, int64_t value2, int slot) { + + ++correl[slot][0]; + correl[slot][1] += value1; + correl[slot][2] += value1 * value1; + correl[slot][3] += value2; + correl[slot][4] += value2 * value2; + correl[slot][5] += value1 * value2; +} void dbg_print() { - if (hits[0]) - cerr << "Total " << hits[0] << " Hits " << hits[1] - << " hit rate (%) " << 100 * hits[1] / hits[0] << endl; + int64_t n; + auto E = [&n](int64_t x) { return double(x) / n; }; + auto sqr = [](double x) { return x * x; }; - if (means[0]) - cerr << "Total " << means[0] << " Mean " - << (double)means[1] / means[0] << endl; + for (int i = 0; i < MaxDebugSlots; ++i) + if ((n = hit[i][0])) + std::cerr << "Hit #" << i + << ": Total " << n << " Hits " << hit[i][1] + << " Hit Rate (%) " << 100.0 * E(hit[i][1]) + << std::endl; + + for (int i = 0; i < MaxDebugSlots; ++i) + if ((n = mean[i][0])) + { + std::cerr << "Mean #" << i + << ": Total " << n << " Mean " << E(mean[i][1]) + << std::endl; + } + + for (int i = 0; i < MaxDebugSlots; ++i) + if ((n = stdev[i][0])) + { + double r = sqrtl(E(stdev[i][2]) - sqr(E(stdev[i][1]))); + std::cerr << "Stdev #" << i + << ": Total " << n << " Stdev " << r + << std::endl; + } + + for (int i = 0; i < MaxDebugSlots; ++i) + if ((n = correl[i][0])) + { + double r = (E(correl[i][5]) - E(correl[i][1]) * E(correl[i][3])) + / ( sqrtl(E(correl[i][2]) - sqr(E(correl[i][1]))) + * sqrtl(E(correl[i][4]) - sqr(E(correl[i][3])))); + std::cerr << "Correl. #" << i + << ": Total " << n << " Coefficient " << r + << std::endl; + } } @@ -373,8 +449,10 @@ void* std_aligned_alloc(size_t alignment, size_t size) { #if defined(POSIXALIGNEDALLOC) void *mem; return posix_memalign(&mem, alignment, size) ? nullptr : mem; -#elif defined(_WIN32) +#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64) return _mm_malloc(size, alignment); +#elif defined(_WIN32) + return _aligned_malloc(size, alignment); #else return std::aligned_alloc(alignment, size); #endif @@ -384,8 +462,10 @@ void std_aligned_free(void* ptr) { #if defined(POSIXALIGNEDALLOC) free(ptr); -#elif defined(_WIN32) +#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64) _mm_free(ptr); +#elif defined(_WIN32) + _aligned_free(ptr); #else free(ptr); #endif @@ -409,11 +489,30 @@ static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize if (!largePageSize) return nullptr; - // We need SeLockMemoryPrivilege, so try to enable it for the process - if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken)) + // Dynamically link OpenProcessToken, LookupPrivilegeValue and AdjustTokenPrivileges + + HMODULE hAdvapi32 = GetModuleHandle(TEXT("advapi32.dll")); + + if (!hAdvapi32) + hAdvapi32 = LoadLibrary(TEXT("advapi32.dll")); + + auto fun6 = (fun6_t)(void(*)())GetProcAddress(hAdvapi32, "OpenProcessToken"); + if (!fun6) + return nullptr; + auto fun7 = (fun7_t)(void(*)())GetProcAddress(hAdvapi32, "LookupPrivilegeValueA"); + if (!fun7) + return nullptr; + auto fun8 = (fun8_t)(void(*)())GetProcAddress(hAdvapi32, "AdjustTokenPrivileges"); + if (!fun8) return nullptr; - if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &luid)) + // We need SeLockMemoryPrivilege, so try to enable it for the process + if (!fun6( // OpenProcessToken() + GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken)) + return nullptr; + + if (fun7( // LookupPrivilegeValue(nullptr, SE_LOCK_MEMORY_NAME, &luid) + nullptr, "SeLockMemoryPrivilege", &luid)) { TOKEN_PRIVILEGES tp { }; TOKEN_PRIVILEGES prevTp { }; @@ -425,17 +524,18 @@ static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize // Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges() succeeds, // we still need to query GetLastError() to ensure that the privileges were actually obtained. - if (AdjustTokenPrivileges( + if (fun8( // AdjustTokenPrivileges() hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp, &prevTpLen) && GetLastError() == ERROR_SUCCESS) { // Round up size to full pages and allocate allocSize = (allocSize + largePageSize - 1) & ~size_t(largePageSize - 1); mem = VirtualAlloc( - NULL, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE); + nullptr, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE); // Privilege no longer needed, restore previous state - AdjustTokenPrivileges(hProcessToken, FALSE, &prevTp, 0, NULL, NULL); + fun8( // AdjustTokenPrivileges () + hProcessToken, FALSE, &prevTp, 0, nullptr, nullptr); } } @@ -453,7 +553,7 @@ void* aligned_large_pages_alloc(size_t allocSize) { // Fall back to regular, page aligned, allocation if necessary if (!mem) - mem = VirtualAlloc(NULL, allocSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); + mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); return mem; } @@ -517,7 +617,7 @@ void bindThisThread(size_t) {} /// API and returns the best node id for the thread with index idx. Original /// code from Texel by Peter Ă–sterlund. -int best_node(size_t idx) { +static int best_node(size_t idx) { int threads = 0; int nodes = 0; @@ -526,7 +626,7 @@ int best_node(size_t idx) { DWORD byteOffset = 0; // Early exit if the needed API is not available at runtime - HMODULE k32 = GetModuleHandle("Kernel32.dll"); + HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll")); auto fun1 = (fun1_t)(void(*)())GetProcAddress(k32, "GetLogicalProcessorInformationEx"); if (!fun1) return -1; @@ -596,7 +696,7 @@ void bindThisThread(size_t idx) { return; // Early exit if the needed API are not available at runtime - HMODULE k32 = GetModuleHandle("Kernel32.dll"); + HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll")); auto fun2 = (fun2_t)(void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMaskEx"); auto fun3 = (fun3_t)(void(*)())GetProcAddress(k32, "SetThreadGroupAffinity"); auto fun4 = (fun4_t)(void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMask2"); diff --git a/DroidFishApp/src/main/cpp/stockfish/misc.h b/DroidFishApp/src/main/cpp/stockfish/misc.h index 77b81d5..69d470c 100644 --- a/DroidFishApp/src/main/cpp/stockfish/misc.h +++ b/DroidFishApp/src/main/cpp/stockfish/misc.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -28,6 +28,9 @@ #include "types.h" +#define stringify2(x) #x +#define stringify(x) stringify2(x) + namespace Stockfish { std::string engine_info(bool to_uci = false); @@ -39,12 +42,13 @@ void std_aligned_free(void* ptr); void* aligned_large_pages_alloc(size_t size); // memory aligned by page size, min alignment: 4096 bytes void aligned_large_pages_free(void* mem); // nop if mem == nullptr -void dbg_hit_on(bool b); -void dbg_hit_on(bool c, bool b); -void dbg_mean_of(int v); +void dbg_hit_on(bool cond, int slot = 0); +void dbg_mean_of(int64_t value, int slot = 0); +void dbg_stdev_of(int64_t value, int slot = 0); +void dbg_correl_of(int64_t value1, int64_t value2, int slot = 0); void dbg_print(); -typedef std::chrono::milliseconds::rep TimePoint; // A value in milliseconds +using TimePoint = std::chrono::milliseconds::rep; // A value in milliseconds static_assert(sizeof(TimePoint) == sizeof(int64_t), "TimePoint should be 64 bits"); inline TimePoint now() { return std::chrono::duration_cast @@ -85,32 +89,6 @@ static inline const union { uint32_t i; char c[4]; } Le = { 0x01020304 }; static inline const bool IsLittleEndian = (Le.c[0] == 4); -// RunningAverage : a class to calculate a running average of a series of values. -// For efficiency, all computations are done with integers. -class RunningAverage { - public: - - // Reset the running average to rational value p / q - void set(int64_t p, int64_t q) - { average = p * PERIOD * RESOLUTION / q; } - - // Update average with value v - void update(int64_t v) - { average = RESOLUTION * v + (PERIOD - 1) * average / PERIOD; } - - // Test if average is strictly greater than rational a / b - bool is_greater(int64_t a, int64_t b) const - { return b * average > a * (PERIOD * RESOLUTION); } - - int64_t value() const - { return average / (PERIOD * RESOLUTION); } - - private : - static constexpr int64_t PERIOD = 4096; - static constexpr int64_t RESOLUTION = 1024; - int64_t average; -}; - template class ValueList { @@ -164,7 +142,7 @@ public: inline uint64_t mul_hi64(uint64_t a, uint64_t b) { #if defined(__GNUC__) && defined(IS_64BIT) - __extension__ typedef unsigned __int128 uint128; + __extension__ using uint128 = unsigned __int128; return ((uint128)a * (uint128)b) >> 64; #else uint64_t aL = (uint32_t)a, aH = a >> 32; diff --git a/DroidFishApp/src/main/cpp/stockfish/movegen.cpp b/DroidFishApp/src/main/cpp/stockfish/movegen.cpp index c7a3c29..6b28a52 100644 --- a/DroidFishApp/src/main/cpp/stockfish/movegen.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/movegen.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,13 +25,21 @@ namespace Stockfish { namespace { - template - ExtMove* make_promotions(ExtMove* moveList, Square to) { + template + ExtMove* make_promotions(ExtMove* moveList, [[maybe_unused]] Square to) { - if (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS) + if constexpr (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS) + { *moveList++ = make(to - D, to, QUEEN); + if constexpr (Enemy && Type == CAPTURES) + { + *moveList++ = make(to - D, to, ROOK); + *moveList++ = make(to - D, to, BISHOP); + *moveList++ = make(to - D, to, KNIGHT); + } + } - if (Type == QUIETS || Type == EVASIONS || Type == NON_EVASIONS) + if constexpr ((Type == QUIETS && !Enemy) || Type == EVASIONS || Type == NON_EVASIONS) { *moveList++ = make(to - D, to, ROOK); *moveList++ = make(to - D, to, BISHOP); @@ -60,18 +68,18 @@ namespace { Bitboard pawnsNotOn7 = pos.pieces(Us, PAWN) & ~TRank7BB; // Single and double pawn pushes, no promotions - if (Type != CAPTURES) + if constexpr (Type != CAPTURES) { Bitboard b1 = shift(pawnsNotOn7) & emptySquares; Bitboard b2 = shift(b1 & TRank3BB) & emptySquares; - if (Type == EVASIONS) // Consider only blocking squares + if constexpr (Type == EVASIONS) // Consider only blocking squares { b1 &= target; b2 &= target; } - if (Type == QUIET_CHECKS) + if constexpr (Type == QUIET_CHECKS) { // To make a quiet check, you either make a direct check by pushing a pawn // or push a blocker pawn that is not on the same file as the enemy king. @@ -102,21 +110,21 @@ namespace { Bitboard b2 = shift(pawnsOn7) & enemies; Bitboard b3 = shift(pawnsOn7) & emptySquares; - if (Type == EVASIONS) + if constexpr (Type == EVASIONS) b3 &= target; while (b1) - moveList = make_promotions(moveList, pop_lsb(b1)); + moveList = make_promotions(moveList, pop_lsb(b1)); while (b2) - moveList = make_promotions(moveList, pop_lsb(b2)); + moveList = make_promotions(moveList, pop_lsb(b2)); while (b3) - moveList = make_promotions(moveList, pop_lsb(b3)); + moveList = make_promotions(moveList, pop_lsb(b3)); } // Standard and en passant captures - if (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS) + if constexpr (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS) { Bitboard b1 = shift(pawnsNotOn7) & enemies; Bitboard b2 = shift(pawnsNotOn7) & enemies; @@ -264,7 +272,7 @@ ExtMove* generate(const Position& pos, ExtMove* moveList) { moveList = pos.checkers() ? generate(pos, moveList) : generate(pos, moveList); while (cur != moveList) - if ( ((pinned && pinned & from_sq(*cur)) || from_sq(*cur) == ksq || type_of(*cur) == EN_PASSANT) + if ( ((pinned & from_sq(*cur)) || from_sq(*cur) == ksq || type_of(*cur) == EN_PASSANT) && !pos.legal(*cur)) *cur = (--moveList)->move; else diff --git a/DroidFishApp/src/main/cpp/stockfish/movegen.h b/DroidFishApp/src/main/cpp/stockfish/movegen.h index bbb35b3..b8df3e6 100644 --- a/DroidFishApp/src/main/cpp/stockfish/movegen.h +++ b/DroidFishApp/src/main/cpp/stockfish/movegen.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/movepick.cpp b/DroidFishApp/src/main/cpp/stockfish/movepick.cpp index 188d6bd..6fbcb2c 100644 --- a/DroidFishApp/src/main/cpp/stockfish/movepick.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/movepick.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -69,7 +69,6 @@ MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHist stage = (pos.checkers() ? EVASION_TT : MAIN_TT) + !(ttm && pos.pseudo_legal(ttm)); - threatenedPieces = 0; } /// MovePicker constructor for quiescence search @@ -93,20 +92,20 @@ MovePicker::MovePicker(const Position& p, Move ttm, Value th, const CapturePiece { assert(!pos.checkers()); - stage = PROBCUT_TT + !(ttm && pos.capture(ttm) + stage = PROBCUT_TT + !(ttm && pos.capture_stage(ttm) && pos.pseudo_legal(ttm) && pos.see_ge(ttm, threshold)); } /// MovePicker::score() assigns a numerical value to each move in a list, used /// for sorting. Captures are ordered by Most Valuable Victim (MVV), preferring -/// captures with a good history. Quiets moves are ordered using the histories. +/// captures with a good history. Quiets moves are ordered using the history tables. template void MovePicker::score() { static_assert(Type == CAPTURES || Type == QUIETS || Type == EVASIONS, "Wrong type"); - [[maybe_unused]] Bitboard threatenedByPawn, threatenedByMinor, threatenedByRook; + [[maybe_unused]] Bitboard threatenedByPawn, threatenedByMinor, threatenedByRook, threatenedPieces; if constexpr (Type == QUIETS) { Color us = pos.side_to_move(); @@ -123,8 +122,8 @@ void MovePicker::score() { for (auto& m : *this) if constexpr (Type == CAPTURES) - m.value = 6 * int(PieceValue[MG][pos.piece_on(to_sq(m))]) - + (*captureHistory)[pos.moved_piece(m)][to_sq(m)][type_of(pos.piece_on(to_sq(m)))]; + m.value = (7 * int(PieceValue[MG][pos.piece_on(to_sq(m))]) + + (*captureHistory)[pos.moved_piece(m)][to_sq(m)][type_of(pos.piece_on(to_sq(m)))]) / 16; else if constexpr (Type == QUIETS) m.value = 2 * (*mainHistory)[pos.side_to_move()][from_to(m)] @@ -141,7 +140,7 @@ void MovePicker::score() { + bool(pos.check_squares(type_of(pos.moved_piece(m))) & to_sq(m)) * 16384; else // Type == EVASIONS { - if (pos.capture(m)) + if (pos.capture_stage(m)) m.value = PieceValue[MG][pos.piece_on(to_sq(m))] - Value(type_of(pos.moved_piece(m))) + (1 << 28); @@ -158,7 +157,7 @@ Move MovePicker::select(Pred filter) { while (cur < endMoves) { - if (T == Best) + if constexpr (T == Best) std::swap(*cur, *std::max_element(cur, endMoves)); if (*cur != ttMove && filter()) @@ -197,7 +196,7 @@ top: case GOOD_CAPTURE: if (select([&](){ - return pos.see_ge(*cur, Value(-69 * cur->value / 1024)) ? + return pos.see_ge(*cur, Value(-cur->value)) ? // Move losing capture to endBadCaptures to be tried later true : (*endBadCaptures++ = *cur, false); })) return *(cur - 1); @@ -216,7 +215,7 @@ top: case REFUTATION: if (select([&](){ return *cur != MOVE_NONE - && !pos.capture(*cur) + && !pos.capture_stage(*cur) && pos.pseudo_legal(*cur); })) return *(cur - 1); ++stage; diff --git a/DroidFishApp/src/main/cpp/stockfish/movepick.h b/DroidFishApp/src/main/cpp/stockfish/movepick.h index e4c4a5b..0b44557 100644 --- a/DroidFishApp/src/main/cpp/stockfish/movepick.h +++ b/DroidFishApp/src/main/cpp/stockfish/movepick.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -62,14 +62,14 @@ public: template struct Stats : public std::array, Size> { - typedef Stats stats; + using stats = Stats; void fill(const T& v) { // For standard-layout 'this' points to first struct member assert(std::is_standard_layout::value); - typedef StatsEntry entry; + using entry = StatsEntry; entry* p = reinterpret_cast(this); std::fill(p, p + sizeof(*this) / sizeof(entry), v); } @@ -87,23 +87,23 @@ enum StatsType { NoCaptures, Captures }; /// ordering decisions. It uses 2 tables (one for each color) indexed by /// the move's from and to squares, see www.chessprogramming.org/Butterfly_Boards /// (~11 elo) -typedef Stats ButterflyHistory; +using ButterflyHistory = Stats; /// CounterMoveHistory stores counter moves indexed by [piece][to] of the previous /// move, see www.chessprogramming.org/Countermove_Heuristic -typedef Stats CounterMoveHistory; +using CounterMoveHistory = Stats; /// CapturePieceToHistory is addressed by a move's [piece][to][captured piece type] -typedef Stats CapturePieceToHistory; +using CapturePieceToHistory = Stats; /// PieceToHistory is like ButterflyHistory but is addressed by a move's [piece][to] -typedef Stats PieceToHistory; +using PieceToHistory = Stats; /// ContinuationHistory is the combined history of a given pair of moves, usually /// the current one given a previous one. The nested history table is based on /// PieceToHistory instead of ButterflyBoards. /// (~63 elo) -typedef Stats ContinuationHistory; +using ContinuationHistory = Stats; /// MovePicker class is used to pick one pseudo-legal move at a time from the @@ -131,8 +131,6 @@ public: MovePicker(const Position&, Move, Value, const CapturePieceToHistory*); Move next_move(bool skipQuiets = false); - Bitboard threatenedPieces; - private: template Move select(Pred); template void score(); diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.cpp b/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.cpp index 4715fed..329adfd 100644 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,15 +18,15 @@ // Code for calculating NNUE evaluation function +#include +#include #include #include #include -#include -#include +#include #include "../evaluate.h" #include "../position.h" -#include "../misc.h" #include "../uci.h" #include "../types.h" @@ -83,7 +83,7 @@ namespace Stockfish::Eval::NNUE { } // namespace Detail // Initialize the evaluation function parameters - void initialize() { + static void initialize() { Detail::initialize(featureTransformer); for (std::size_t i = 0; i < LayerStacks; ++i) @@ -91,7 +91,7 @@ namespace Stockfish::Eval::NNUE { } // Read network header - bool read_header(std::istream& stream, std::uint32_t* hashValue, std::string* desc) + static bool read_header(std::istream& stream, std::uint32_t* hashValue, std::string* desc) { std::uint32_t version, size; @@ -105,7 +105,7 @@ namespace Stockfish::Eval::NNUE { } // Write network header - bool write_header(std::ostream& stream, std::uint32_t hashValue, const std::string& desc) + static bool write_header(std::ostream& stream, std::uint32_t hashValue, const std::string& desc) { write_little_endian(stream, Version); write_little_endian(stream, hashValue); @@ -115,7 +115,7 @@ namespace Stockfish::Eval::NNUE { } // Read network parameters - bool read_parameters(std::istream& stream) { + static bool read_parameters(std::istream& stream) { std::uint32_t hashValue; if (!read_header(stream, &hashValue, &netDescription)) return false; @@ -127,7 +127,7 @@ namespace Stockfish::Eval::NNUE { } // Write network parameters - bool write_parameters(std::ostream& stream) { + static bool write_parameters(std::ostream& stream) { if (!write_header(stream, HashValue, netDescription)) return false; if (!Detail::write_parameters(stream, *featureTransformer)) return false; @@ -136,6 +136,11 @@ namespace Stockfish::Eval::NNUE { return (bool)stream; } + void hint_common_parent_position(const Position& pos) { + if (Eval::useNNUE) + featureTransformer->hint_common_access(pos); + } + // Evaluation function. Perform differential calculation. Value evaluate(const Position& pos, bool adjusted, int* complexity) { @@ -143,7 +148,7 @@ namespace Stockfish::Eval::NNUE { // overaligning stack variables with alignas() doesn't work correctly. constexpr uint64_t alignment = CacheLineSize; - int delta = 24 - pos.non_pawn_material() / 9560; + constexpr int delta = 24; #if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN) TransformedFeatureType transformedFeaturesUnaligned[ @@ -211,7 +216,7 @@ namespace Stockfish::Eval::NNUE { return t; } - static const std::string PieceToChar(" PNBRQK pnbrqk"); + constexpr std::string_view PieceToChar(" PNBRQK pnbrqk"); // format_cp_compact() converts a Value into (centi)pawns and writes it in a buffer. @@ -245,14 +250,15 @@ namespace Stockfish::Eval::NNUE { } - // format_cp_aligned_dot() converts a Value into (centi)pawns and writes it in a buffer, - // always keeping two decimals. The buffer must have capacity for at least 7 chars. - static void format_cp_aligned_dot(Value v, char* buffer) { + // format_cp_aligned_dot() converts a Value into (centi)pawns, always keeping two decimals. + static void format_cp_aligned_dot(Value v, std::stringstream &stream) { + const double cp = 1.0 * std::abs(int(v)) / UCI::NormalizeToPawnValue; - buffer[0] = (v < 0 ? '-' : v > 0 ? '+' : ' '); - - double cp = 1.0 * std::abs(int(v)) / UCI::NormalizeToPawnValue; - sprintf(&buffer[1], "%6.2f", cp); + stream << (v < 0 ? '-' : v > 0 ? '+' : ' ') + << std::setiosflags(std::ios::fixed) + << std::setw(6) + << std::setprecision(2) + << cp; } @@ -332,17 +338,10 @@ namespace Stockfish::Eval::NNUE { for (std::size_t bucket = 0; bucket < LayerStacks; ++bucket) { - char buffer[3][8]; - std::memset(buffer, '\0', sizeof(buffer)); - - format_cp_aligned_dot(t.psqt[bucket], buffer[0]); - format_cp_aligned_dot(t.positional[bucket], buffer[1]); - format_cp_aligned_dot(t.psqt[bucket] + t.positional[bucket], buffer[2]); - - ss << "| " << bucket << " " - << " | " << buffer[0] << " " - << " | " << buffer[1] << " " - << " | " << buffer[2] << " " + ss << "| " << bucket << " "; + ss << " | "; format_cp_aligned_dot(t.psqt[bucket], ss); ss << " " + << " | "; format_cp_aligned_dot(t.positional[bucket], ss); ss << " " + << " | "; format_cp_aligned_dot(t.psqt[bucket] + t.positional[bucket], ss); ss << " " << " |"; if (bucket == t.correctBucket) ss << " <-- this bucket is used"; diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.h b/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.h index 2e4f1f5..b84bed8 100644 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.h +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,6 +31,7 @@ namespace Stockfish::Eval::NNUE { constexpr std::uint32_t HashValue = FeatureTransformer::get_hash_value() ^ Network::get_hash_value(); + // Deleter for automating release of memory area template struct AlignedDeleter { @@ -54,6 +55,14 @@ namespace Stockfish::Eval::NNUE { template using LargePagePtr = std::unique_ptr>; + std::string trace(Position& pos); + Value evaluate(const Position& pos, bool adjusted = false, int* complexity = nullptr); + void hint_common_parent_position(const Position& pos); + + bool load_eval(std::string name, std::istream& stream); + bool save_eval(std::ostream& stream); + bool save_eval(const std::optional& filename); + } // namespace Stockfish::Eval::NNUE #endif // #ifndef NNUE_EVALUATE_NNUE_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2_hm.cpp b/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2_hm.cpp index 7dbd341..19ebb15 100644 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2_hm.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2_hm.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2_hm.h b/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2_hm.h index a95d432..78063c3 100644 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2_hm.h +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2_hm.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/affine_transform.h b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/affine_transform.h index 461a7b8..9e2f2f9 100644 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/affine_transform.h +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/affine_transform.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,7 +31,7 @@ This file contains the definition for a fully connected layer (aka affine transform). Two approaches are employed, depending on the sizes of the transform. - Approach 1: + Approach 1 (a specialization for large inputs): - used when the PaddedInputDimensions >= 128 - uses AVX512 if possible - processes inputs in batches of 2*InputSimdWidth @@ -42,9 +42,8 @@ depends on the architecture (the amount of registers) - accumulate + hadd is used - Approach 2: + Approach 2 (a specialization for small inputs): - used when the PaddedInputDimensions < 128 - - does not use AVX512 - expected use-case is for when PaddedInputDimensions == 32 and InputDimensions <= 32. - that's why AVX512 is hard to implement - expected use-case is small layers @@ -72,6 +71,10 @@ namespace Stockfish::Eval::NNUE::Layers { const __m64 Zeros = _mm_setzero_si64(); const auto inputVector = reinterpret_cast(input); +# elif defined(USE_NEON_DOTPROD) + constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 16) / 16; + const auto inputVector = reinterpret_cast(input); + # elif defined(USE_NEON) constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 16) / 16; const auto inputVector = reinterpret_cast(input); @@ -123,6 +126,14 @@ namespace Stockfish::Eval::NNUE::Layers { sum = _mm_add_pi32(sum, _mm_unpackhi_pi32(sum, sum)); output[i] = _mm_cvtsi64_si32(sum); +# elif defined(USE_NEON_DOTPROD) + int32x4_t sum = {biases[i]}; + const auto row = reinterpret_cast(&weights[offset]); + for (IndexType j = 0; j < NumChunks; ++j) { + sum = vdotq_s32(sum, inputVector[j], row[j]); + } + output[i] = vaddvq_s32(sum); + # elif defined(USE_NEON) int32x4_t sum = {biases[i]}; const auto row = reinterpret_cast(&weights[offset]); @@ -157,7 +168,7 @@ namespace Stockfish::Eval::NNUE::Layers { constexpr IndexType LargeInputSize = std::numeric_limits::max(); #endif - // A specialization for large inputs. + // A specialization for large inputs template class AffineTransform(InDims, MaxSimdWidth) >= LargeInputSize)>> { public: @@ -176,36 +187,39 @@ namespace Stockfish::Eval::NNUE::Layers { using OutputBuffer = OutputType[PaddedOutputDimensions]; - static_assert(PaddedInputDimensions >= LargeInputSize, "Something went wrong. This specialization should not have been chosen."); + static_assert(PaddedInputDimensions >= LargeInputSize, "Something went wrong. This specialization (for large inputs) should not have been chosen."); #if defined (USE_AVX512) - static constexpr const IndexType InputSimdWidth = 64; - static constexpr const IndexType MaxNumOutputRegs = 16; + static constexpr IndexType InputSimdWidth = 64; + static constexpr IndexType MaxNumOutputRegs = 16; #elif defined (USE_AVX2) - static constexpr const IndexType InputSimdWidth = 32; - static constexpr const IndexType MaxNumOutputRegs = 8; + static constexpr IndexType InputSimdWidth = 32; + static constexpr IndexType MaxNumOutputRegs = 8; #elif defined (USE_SSSE3) - static constexpr const IndexType InputSimdWidth = 16; - static constexpr const IndexType MaxNumOutputRegs = 8; + static constexpr IndexType InputSimdWidth = 16; + static constexpr IndexType MaxNumOutputRegs = 8; +#elif defined (USE_NEON_DOTPROD) + static constexpr IndexType InputSimdWidth = 16; + static constexpr IndexType MaxNumOutputRegs = 8; #elif defined (USE_NEON) - static constexpr const IndexType InputSimdWidth = 8; - static constexpr const IndexType MaxNumOutputRegs = 8; + static constexpr IndexType InputSimdWidth = 8; + static constexpr IndexType MaxNumOutputRegs = 8; #else // The fallback implementation will not have permuted weights. // We define these to avoid a lot of ifdefs later. - static constexpr const IndexType InputSimdWidth = 1; - static constexpr const IndexType MaxNumOutputRegs = 1; + static constexpr IndexType InputSimdWidth = 1; + static constexpr IndexType MaxNumOutputRegs = 1; #endif // A big block is a region in the weight matrix of the size [PaddedInputDimensions, NumOutputRegs]. // A small block is a region of size [InputSimdWidth, 1] - static constexpr const IndexType NumOutputRegs = std::min(MaxNumOutputRegs, OutputDimensions); - static constexpr const IndexType SmallBlockSize = InputSimdWidth; - static constexpr const IndexType BigBlockSize = NumOutputRegs * PaddedInputDimensions; - static constexpr const IndexType NumSmallBlocksInBigBlock = BigBlockSize / SmallBlockSize; - static constexpr const IndexType NumSmallBlocksPerOutput = PaddedInputDimensions / SmallBlockSize; - static constexpr const IndexType NumBigBlocks = OutputDimensions / NumOutputRegs; + static constexpr IndexType NumOutputRegs = std::min(MaxNumOutputRegs, OutputDimensions); + static constexpr IndexType SmallBlockSize = InputSimdWidth; + static constexpr IndexType BigBlockSize = NumOutputRegs * PaddedInputDimensions; + static constexpr IndexType NumSmallBlocksInBigBlock = BigBlockSize / SmallBlockSize; + static constexpr IndexType NumSmallBlocksPerOutput = PaddedInputDimensions / SmallBlockSize; + static constexpr IndexType NumBigBlocks = OutputDimensions / NumOutputRegs; static_assert(OutputDimensions % NumOutputRegs == 0); @@ -241,8 +255,7 @@ namespace Stockfish::Eval::NNUE::Layers { // Read network parameters bool read_parameters(std::istream& stream) { - for (IndexType i = 0; i < OutputDimensions; ++i) - biases[i] = read_little_endian(stream); + read_little_endian(stream, biases, OutputDimensions); for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) weights[get_weight_index(i)] = read_little_endian(stream); @@ -252,8 +265,7 @@ namespace Stockfish::Eval::NNUE::Layers { // Write network parameters bool write_parameters(std::ostream& stream) const { - for (IndexType i = 0; i < OutputDimensions; ++i) - write_little_endian(stream, biases[i]); + write_little_endian(stream, biases, OutputDimensions); for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) write_little_endian(stream, weights[get_weight_index(i)]); @@ -292,6 +304,15 @@ namespace Stockfish::Eval::NNUE::Layers { #define vec_add_dpbusd_32x2 Simd::m128_add_dpbusd_epi32x2 #define vec_hadd Simd::m128_hadd #define vec_haddx4 Simd::m128_haddx4 +#elif defined (USE_NEON_DOTPROD) + using acc_vec_t = int32x4_t; + using bias_vec_t = int32x4_t; + using weight_vec_t = int8x16_t; + using in_vec_t = int8x16_t; + #define vec_zero {0} + #define vec_add_dpbusd_32x2 Simd::dotprod_m128_add_dpbusd_epi32x2 + #define vec_hadd Simd::neon_m128_hadd + #define vec_haddx4 Simd::neon_m128_haddx4 #elif defined (USE_NEON) using acc_vec_t = int32x4_t; using bias_vec_t = int32x4_t; @@ -374,6 +395,7 @@ namespace Stockfish::Eval::NNUE::Layers { alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions]; }; + // A specialization for small inputs template class AffineTransform(InDims, MaxSimdWidth) < LargeInputSize)>> { public: @@ -393,12 +415,7 @@ namespace Stockfish::Eval::NNUE::Layers { using OutputBuffer = OutputType[PaddedOutputDimensions]; - static_assert(PaddedInputDimensions < LargeInputSize, "Something went wrong. This specialization should not have been chosen."); - -#if defined (USE_SSSE3) - static constexpr const IndexType OutputSimdWidth = SimdWidth / 4; - static constexpr const IndexType InputSimdWidth = SimdWidth; -#endif + static_assert(PaddedInputDimensions < LargeInputSize, "Something went wrong. This specialization (for small inputs) should not have been chosen."); // Hash value embedded in the evaluation file static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) { @@ -428,8 +445,7 @@ namespace Stockfish::Eval::NNUE::Layers { // Read network parameters bool read_parameters(std::istream& stream) { - for (IndexType i = 0; i < OutputDimensions; ++i) - biases[i] = read_little_endian(stream); + read_little_endian(stream, biases, OutputDimensions); for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) weights[get_weight_index(i)] = read_little_endian(stream); @@ -438,8 +454,7 @@ namespace Stockfish::Eval::NNUE::Layers { // Write network parameters bool write_parameters(std::ostream& stream) const { - for (IndexType i = 0; i < OutputDimensions; ++i) - write_little_endian(stream, biases[i]); + write_little_endian(stream, biases, OutputDimensions); for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) write_little_endian(stream, weights[get_weight_index(i)]); @@ -450,29 +465,34 @@ namespace Stockfish::Eval::NNUE::Layers { const OutputType* propagate( const InputType* input, OutputType* output) const { -#if defined (USE_AVX2) +#if defined (USE_AVX512) + using vec_t = __m512i; + #define vec_setzero _mm512_setzero_si512 + #define vec_set_32 _mm512_set1_epi32 + #define vec_add_dpbusd_32 Simd::m512_add_dpbusd_epi32 + #define vec_add_dpbusd_32x2 Simd::m512_add_dpbusd_epi32x2 + #define vec_hadd Simd::m512_hadd +#elif defined (USE_AVX2) using vec_t = __m256i; #define vec_setzero _mm256_setzero_si256 #define vec_set_32 _mm256_set1_epi32 #define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32 #define vec_add_dpbusd_32x2 Simd::m256_add_dpbusd_epi32x2 - #define vec_add_dpbusd_32x4 Simd::m256_add_dpbusd_epi32x4 #define vec_hadd Simd::m256_hadd - #define vec_haddx4 Simd::m256_haddx4 #elif defined (USE_SSSE3) using vec_t = __m128i; #define vec_setzero _mm_setzero_si128 #define vec_set_32 _mm_set1_epi32 #define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32 #define vec_add_dpbusd_32x2 Simd::m128_add_dpbusd_epi32x2 - #define vec_add_dpbusd_32x4 Simd::m128_add_dpbusd_epi32x4 #define vec_hadd Simd::m128_hadd - #define vec_haddx4 Simd::m128_haddx4 #endif #if defined (USE_SSSE3) const auto inputVector = reinterpret_cast(input); + static constexpr IndexType OutputSimdWidth = sizeof(vec_t) / sizeof(OutputType); + static_assert(OutputDimensions % OutputSimdWidth == 0 || OutputDimensions == 1); if constexpr (OutputDimensions % OutputSimdWidth == 0) @@ -518,9 +538,7 @@ namespace Stockfish::Eval::NNUE::Layers { # undef vec_set_32 # undef vec_add_dpbusd_32 # undef vec_add_dpbusd_32x2 -# undef vec_add_dpbusd_32x4 # undef vec_hadd -# undef vec_haddx4 #else // Use old implementation for the other architectures. affine_transform_non_ssse3< diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/affine_transform_sparse_input.h b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/affine_transform_sparse_input.h new file mode 100644 index 0000000..e0c3a8a --- /dev/null +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/affine_transform_sparse_input.h @@ -0,0 +1,286 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +// Definition of layer AffineTransformSparseInput of NNUE evaluation function + +#ifndef NNUE_LAYERS_AFFINE_TRANSFORM_SPARSE_INPUT_H_INCLUDED +#define NNUE_LAYERS_AFFINE_TRANSFORM_SPARSE_INPUT_H_INCLUDED + +#include +#include +#include +#include +#include "../nnue_common.h" +#include "affine_transform.h" +#include "simd.h" + +/* + This file contains the definition for a fully connected layer (aka affine transform) with block sparse input. +*/ + +namespace Stockfish::Eval::NNUE::Layers { +#if defined(__GNUC__) // GCC, Clang, ICC + + static inline IndexType lsb_(std::uint32_t b) { + assert(b); + return IndexType(__builtin_ctzl(b)); + } + +#elif defined(_MSC_VER) // MSVC + + static inline IndexType lsb_(std::uint32_t b) { + assert(b); + unsigned long idx; + _BitScanForward(&idx, b); + return (IndexType) idx; + } + +#else // Compiler is neither GCC nor MSVC compatible + +#error "Compiler not supported." + +#endif + + +#if defined(USE_SSSE3) + alignas(CacheLineSize) static inline const std::array, 256> lookup_indices = [](){ + std::array, 256> v{}; + for (int i = 0; i < 256; ++i) + { + int j = i; + int k = 0; + while(j) + { + const IndexType lsbIndex = lsb_(std::uint32_t(j)); + j &= j - 1; + v[i][k] = lsbIndex; + ++k; + } + } + return v; + }(); + alignas(CacheLineSize) static inline const std::array lookup_count = [](){ + std::array v; + for (int i = 0; i < 256; ++i) + { + int j = i; + int k = 0; + while(j) + { + j &= j - 1; + ++k; + } + v[i] = k; + } + return v; + }(); + + // Find indices of nonzero numbers in an int32_t array + template + void find_nnz(const std::int32_t* input, std::uint16_t* out, IndexType& count_out) { +#if defined (USE_AVX512) + using vec_t = __m512i; + #define vec_nnz(a) _mm512_cmpgt_epi32_mask(a, _mm512_setzero_si512()) +#elif defined (USE_AVX2) + using vec_t = __m256i; + #define vec_nnz(a) _mm256_movemask_ps(_mm256_castsi256_ps(_mm256_cmpgt_epi32(a, _mm256_setzero_si256()))) +#elif defined (USE_SSSE3) + using vec_t = __m128i; + #define vec_nnz(a) _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpgt_epi32(a, _mm_setzero_si128()))) +#endif + constexpr IndexType InputSimdWidth = sizeof(vec_t) / sizeof(std::int32_t); + // Inputs are processed InputSimdWidth at a time and outputs are processed 8 at a time so we process in chunks of max(InputSimdWidth, 8) + constexpr IndexType ChunkSize = std::max(InputSimdWidth, 8); + constexpr IndexType NumChunks = InputDimensions / ChunkSize; + constexpr IndexType InputsPerChunk = ChunkSize / InputSimdWidth; + constexpr IndexType OutputsPerChunk = ChunkSize / 8; + + const auto inputVector = reinterpret_cast(input); + IndexType count = 0; + __m128i base = _mm_set1_epi16(0); + __m128i increment = _mm_set1_epi16(8); + for (IndexType i = 0; i < NumChunks; ++i) + { + // bitmask of nonzero values in this chunk + unsigned nnz = 0; + for (IndexType j = 0; j < InputsPerChunk; ++j) + { + const vec_t inputChunk = inputVector[i * InputsPerChunk + j]; + nnz |= (unsigned)vec_nnz(inputChunk) << (j * InputSimdWidth); + } + for (IndexType j = 0; j < OutputsPerChunk; ++j) + { + const auto lookup = (nnz >> (j * 8)) & 0xFF; + const auto offsets = _mm_loadu_si128(reinterpret_cast(&lookup_indices[lookup])); + _mm_storeu_si128(reinterpret_cast<__m128i*>(out + count), _mm_add_epi16(base, offsets)); + count += lookup_count[lookup]; + base = _mm_add_epi16(base, increment); + } + } + count_out = count; + } +# undef vec_nnz +#endif + + // Sparse input implementation + template + class AffineTransformSparseInput { + public: + // Input/output type + // Input/output type + using InputType = std::uint8_t; + using OutputType = std::int32_t; + + // Number of input/output dimensions + static constexpr IndexType InputDimensions = InDims; + static constexpr IndexType OutputDimensions = OutDims; + + static_assert(OutputDimensions % 16 == 0, "Only implemented for OutputDimensions divisible by 16."); + + static constexpr IndexType PaddedInputDimensions = + ceil_to_multiple(InputDimensions, MaxSimdWidth); + static constexpr IndexType PaddedOutputDimensions = + ceil_to_multiple(OutputDimensions, MaxSimdWidth); + +#if defined (USE_SSSE3) + static constexpr IndexType ChunkSize = 4; +#else + static constexpr IndexType ChunkSize = 1; +#endif + + using OutputBuffer = OutputType[PaddedOutputDimensions]; + + // Hash value embedded in the evaluation file + static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) { + std::uint32_t hashValue = 0xCC03DAE4u; + hashValue += OutputDimensions; + hashValue ^= prevHash >> 1; + hashValue ^= prevHash << 31; + return hashValue; + } + + static IndexType get_weight_index_scrambled(IndexType i) + { + return + (i / ChunkSize) % (PaddedInputDimensions / ChunkSize) * OutputDimensions * ChunkSize + + i / PaddedInputDimensions * ChunkSize + + i % ChunkSize; + } + + static IndexType get_weight_index(IndexType i) + { +#if defined (USE_SSSE3) + return get_weight_index_scrambled(i); +#else + return i; +#endif + } + + // Read network parameters + bool read_parameters(std::istream& stream) { + read_little_endian(stream, biases, OutputDimensions); + for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) + weights[get_weight_index(i)] = read_little_endian(stream); + + return !stream.fail(); + } + + // Write network parameters + bool write_parameters(std::ostream& stream) const { + write_little_endian(stream, biases, OutputDimensions); + + for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) + write_little_endian(stream, weights[get_weight_index(i)]); + + return !stream.fail(); + } + // Forward propagation + const OutputType* propagate( + const InputType* input, OutputType* output) const { + +#if defined (USE_SSSE3) +#if defined (USE_AVX512) + using vec_t = __m512i; + #define vec_setzero _mm512_setzero_si512 + #define vec_set_32 _mm512_set1_epi32 + #define vec_add_dpbusd_32 Simd::m512_add_dpbusd_epi32 +#elif defined (USE_AVX2) + using vec_t = __m256i; + #define vec_setzero _mm256_setzero_si256 + #define vec_set_32 _mm256_set1_epi32 + #define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32 +#elif defined (USE_SSSE3) + using vec_t = __m128i; + #define vec_setzero _mm_setzero_si128 + #define vec_set_32 _mm_set1_epi32 + #define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32 +#endif + static constexpr IndexType OutputSimdWidth = sizeof(vec_t) / sizeof(OutputType); + + constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 8) / ChunkSize; + constexpr IndexType NumRegs = OutputDimensions / OutputSimdWidth; + std::uint16_t nnz[NumChunks]; + IndexType count; + + const auto input32 = reinterpret_cast(input); + + // Find indices of nonzero 32bit blocks + find_nnz(input32, nnz, count); + + const vec_t* biasvec = reinterpret_cast(biases); + vec_t acc[NumRegs]; + for (IndexType k = 0; k < NumRegs; ++k) + acc[k] = biasvec[k]; + + for (IndexType j = 0; j < count; ++j) + { + const auto i = nnz[j]; + const vec_t in = vec_set_32(input32[i]); + const auto col = reinterpret_cast(&weights[i * OutputDimensions * ChunkSize]); + for (IndexType k = 0; k < NumRegs; ++k) + vec_add_dpbusd_32(acc[k], in, col[k]); + } + + vec_t* outptr = reinterpret_cast(output); + for (IndexType k = 0; k < NumRegs; ++k) + outptr[k] = acc[k]; +# undef vec_setzero +# undef vec_set_32 +# undef vec_add_dpbusd_32 +#else + // Use dense implementation for the other architectures. + affine_transform_non_ssse3< + InputDimensions, + PaddedInputDimensions, + OutputDimensions>(output, weights, biases, input); +#endif + + return output; + } + + private: + using BiasType = OutputType; + using WeightType = std::int8_t; + + alignas(CacheLineSize) BiasType biases[OutputDimensions]; + alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions]; + }; + +} // namespace Stockfish::Eval::NNUE::Layers + +#endif // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_SPARSE_INPUT_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/clipped_relu.h b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/clipped_relu.h index f94d308..51e562d 100644 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/clipped_relu.h +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/clipped_relu.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/simd.h b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/simd.h index 7b9e8fb..22c5198 100644 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/simd.h +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/simd.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -153,7 +153,7 @@ namespace Stockfish::Simd { asm( "vpdpbusd %[b0], %[a0], %[acc]\n\t" "vpdpbusd %[b1], %[a1], %[acc]\n\t" - : [acc]"+v"(acc) + : [acc]"+&v"(acc) : [a0]"v"(a0), [b0]"vm"(b0), [a1]"v"(a1), [b1]"vm"(b1) ); # else @@ -165,18 +165,19 @@ namespace Stockfish::Simd { __m512i tmp0 = _mm512_maddubs_epi16(a0, b0); __m512i tmp1 = _mm512_maddubs_epi16(a1, b1); asm( - "vpaddsw %[tmp0], %[tmp1], %[tmp0]\n\t" "vpmaddwd %[tmp0], %[ones], %[tmp0]\n\t" + "vpmaddwd %[tmp1], %[ones], %[tmp1]\n\t" + "vpaddd %[tmp0], %[tmp1], %[tmp0]\n\t" "vpaddd %[acc], %[tmp0], %[acc]\n\t" - : [acc]"+v"(acc), [tmp0]"+&v"(tmp0) - : [tmp1]"v"(tmp1), [ones]"v"(_mm512_set1_epi16(1)) + : [acc]"+v"(acc), [tmp0]"+&v"(tmp0), [tmp1]"+&v"(tmp1) + : [ones]"v"(_mm512_set1_epi16(1)) ); # else __m512i product0 = _mm512_maddubs_epi16(a0, b0); __m512i product1 = _mm512_maddubs_epi16(a1, b1); - product0 = _mm512_adds_epi16(product0, product1); product0 = _mm512_madd_epi16(product0, _mm512_set1_epi16(1)); - acc = _mm512_add_epi32(acc, product0); + product1 = _mm512_madd_epi16(product1, _mm512_set1_epi16(1)); + acc = _mm512_add_epi32(acc, _mm512_add_epi32(product0, product1)); # endif # endif } @@ -249,7 +250,7 @@ namespace Stockfish::Simd { asm( VNNI_PREFIX "vpdpbusd %[b0], %[a0], %[acc]\n\t" VNNI_PREFIX "vpdpbusd %[b1], %[a1], %[acc]\n\t" - : [acc]"+v"(acc) + : [acc]"+&v"(acc) : [a0]"v"(a0), [b0]"vm"(b0), [a1]"v"(a1), [b1]"vm"(b1) ); # else @@ -261,18 +262,19 @@ namespace Stockfish::Simd { __m256i tmp0 = _mm256_maddubs_epi16(a0, b0); __m256i tmp1 = _mm256_maddubs_epi16(a1, b1); asm( - "vpaddsw %[tmp0], %[tmp1], %[tmp0]\n\t" "vpmaddwd %[tmp0], %[ones], %[tmp0]\n\t" + "vpmaddwd %[tmp1], %[ones], %[tmp1]\n\t" + "vpaddd %[tmp0], %[tmp1], %[tmp0]\n\t" "vpaddd %[acc], %[tmp0], %[acc]\n\t" - : [acc]"+v"(acc), [tmp0]"+&v"(tmp0) - : [tmp1]"v"(tmp1), [ones]"v"(_mm256_set1_epi16(1)) + : [acc]"+v"(acc), [tmp0]"+&v"(tmp0), [tmp1]"+&v"(tmp1) + : [ones]"v"(_mm256_set1_epi16(1)) ); # else __m256i product0 = _mm256_maddubs_epi16(a0, b0); __m256i product1 = _mm256_maddubs_epi16(a1, b1); - product0 = _mm256_adds_epi16(product0, product1); product0 = _mm256_madd_epi16(product0, _mm256_set1_epi16(1)); - acc = _mm256_add_epi32(acc, product0); + product1 = _mm256_madd_epi16(product1, _mm256_set1_epi16(1)); + acc = _mm256_add_epi32(acc, _mm256_add_epi32(product0, product1)); # endif # endif } @@ -326,23 +328,37 @@ namespace Stockfish::Simd { __m128i tmp0 = _mm_maddubs_epi16(a0, b0); __m128i tmp1 = _mm_maddubs_epi16(a1, b1); asm( - "paddsw %[tmp1], %[tmp0]\n\t" "pmaddwd %[ones], %[tmp0]\n\t" + "pmaddwd %[ones], %[tmp1]\n\t" + "paddd %[tmp1], %[tmp0]\n\t" "paddd %[tmp0], %[acc]\n\t" - : [acc]"+v"(acc), [tmp0]"+&v"(tmp0) - : [tmp1]"v"(tmp1), [ones]"v"(_mm_set1_epi16(1)) + : [acc]"+v"(acc), [tmp0]"+&v"(tmp0), [tmp1]"+&v"(tmp1) + : [ones]"v"(_mm_set1_epi16(1)) ); # else __m128i product0 = _mm_maddubs_epi16(a0, b0); __m128i product1 = _mm_maddubs_epi16(a1, b1); - product0 = _mm_adds_epi16(product0, product1); product0 = _mm_madd_epi16(product0, _mm_set1_epi16(1)); - acc = _mm_add_epi32(acc, product0); + product1 = _mm_madd_epi16(product1, _mm_set1_epi16(1)); + acc = _mm_add_epi32(acc, _mm_add_epi32(product0, product1)); # endif } #endif +#if defined (USE_NEON_DOTPROD) + + [[maybe_unused]] static void dotprod_m128_add_dpbusd_epi32x2( + int32x4_t& acc, + int8x16_t a0, int8x16_t b0, + int8x16_t a1, int8x16_t b1) { + + acc = vdotq_s32(acc, a0, b0); + acc = vdotq_s32(acc, a1, b1); + } + +#endif + #if defined (USE_NEON) [[maybe_unused]] static int neon_m128_reduce_add_epi32(int32x4_t s) { diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/sqr_clipped_relu.h b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/sqr_clipped_relu.h index b603a27..3fbb243 100644 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/sqr_clipped_relu.h +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/sqr_clipped_relu.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -106,7 +106,7 @@ namespace Stockfish::Eval::NNUE::Layers { for (IndexType i = Start; i < InputDimensions; ++i) { output[i] = static_cast( - // realy should be /127 but we need to make it fast + // really should be /127 but we need to make it fast // needs to be accounted for in the trainer std::max(0ll, std::min(127ll, (((long long)input[i] * input[i]) >> (2 * WeightScaleBits)) / 128))); } diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_accumulator.h b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_accumulator.h index 600483b..8eba449 100644 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_accumulator.h +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_accumulator.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_architecture.h b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_architecture.h index cac8373..413dbb3 100644 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_architecture.h +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_architecture.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -27,6 +27,7 @@ #include "features/half_ka_v2_hm.h" +#include "layers/affine_transform_sparse_input.h" #include "layers/affine_transform.h" #include "layers/clipped_relu.h" #include "layers/sqr_clipped_relu.h" @@ -39,7 +40,7 @@ namespace Stockfish::Eval::NNUE { using FeatureSet = Features::HalfKAv2_hm; // Number of input feature dimensions after conversion -constexpr IndexType TransformedFeatureDimensions = 1024; +constexpr IndexType TransformedFeatureDimensions = 1536; constexpr IndexType PSQTBuckets = 8; constexpr IndexType LayerStacks = 8; @@ -48,7 +49,7 @@ struct Network static constexpr int FC_0_OUTPUTS = 15; static constexpr int FC_1_OUTPUTS = 32; - Layers::AffineTransform fc_0; + Layers::AffineTransformSparseInput fc_0; Layers::SqrClippedReLU ac_sqr_0; Layers::ClippedReLU ac_0; Layers::AffineTransform fc_1; @@ -72,22 +73,20 @@ struct Network // Read network parameters bool read_parameters(std::istream& stream) { - if (!fc_0.read_parameters(stream)) return false; - if (!ac_0.read_parameters(stream)) return false; - if (!fc_1.read_parameters(stream)) return false; - if (!ac_1.read_parameters(stream)) return false; - if (!fc_2.read_parameters(stream)) return false; - return true; + return fc_0.read_parameters(stream) + && ac_0.read_parameters(stream) + && fc_1.read_parameters(stream) + && ac_1.read_parameters(stream) + && fc_2.read_parameters(stream); } - // Read network parameters + // Write network parameters bool write_parameters(std::ostream& stream) const { - if (!fc_0.write_parameters(stream)) return false; - if (!ac_0.write_parameters(stream)) return false; - if (!fc_1.write_parameters(stream)) return false; - if (!ac_1.write_parameters(stream)) return false; - if (!fc_2.write_parameters(stream)) return false; - return true; + return fc_0.write_parameters(stream) + && ac_0.write_parameters(stream) + && fc_1.write_parameters(stream) + && ac_1.write_parameters(stream) + && fc_2.write_parameters(stream); } std::int32_t propagate(const TransformedFeatureType* transformedFeatures) diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_common.h b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_common.h index 1795618..d338527 100644 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_common.h +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_common.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -57,6 +57,9 @@ namespace Stockfish::Eval::NNUE { // Size of cache line (in bytes) constexpr std::size_t CacheLineSize = 64; + constexpr const char Leb128MagicString[] = "COMPRESSED_LEB128"; + constexpr const std::size_t Leb128MagicStringSize = sizeof(Leb128MagicString) - 1; + // SIMD width (in bytes) #if defined(USE_AVX2) constexpr std::size_t SimdWidth = 32; @@ -159,6 +162,80 @@ namespace Stockfish::Eval::NNUE { write_little_endian(stream, values[i]); } + template + inline void read_leb_128(std::istream& stream, IntType* out, std::size_t count) { + static_assert(std::is_signed_v, "Not implemented for unsigned types"); + char leb128MagicString[Leb128MagicStringSize]; + stream.read(leb128MagicString, Leb128MagicStringSize); + assert(strncmp(Leb128MagicString, leb128MagicString, Leb128MagicStringSize) == 0); + const std::uint32_t BUF_SIZE = 4096; + std::uint8_t buf[BUF_SIZE]; + auto bytes_left = read_little_endian(stream); + std::uint32_t buf_pos = BUF_SIZE; + for (std::size_t i = 0; i < count; ++i) { + IntType result = 0; + size_t shift = 0; + do { + if (buf_pos == BUF_SIZE) { + stream.read(reinterpret_cast(buf), std::min(bytes_left, BUF_SIZE)); + buf_pos = 0; + } + std::uint8_t byte = buf[buf_pos++]; + --bytes_left; + result |= (byte & 0x7f) << shift; + shift += 7; + if ((byte & 0x80) == 0) { + out[i] = sizeof(IntType) * 8 <= shift || (byte & 0x40) == 0 ? result : result | ~((1 << shift) - 1); + break; + } + } while (shift < sizeof(IntType) * 8); + } + assert(bytes_left == 0); + } + + template + inline void write_leb_128(std::ostream& stream, const IntType* values, std::size_t count) { + static_assert(std::is_signed_v, "Not implemented for unsigned types"); + stream.write(Leb128MagicString, Leb128MagicStringSize); + std::uint32_t byte_count = 0; + for (std::size_t i = 0; i < count; ++i) { + IntType value = values[i]; + std::uint8_t byte; + do { + byte = value & 0x7f; + value >>= 7; + ++byte_count; + } while ((byte & 0x40) == 0 ? value != 0 : value != -1); + } + write_little_endian(stream, byte_count); + const std::uint32_t BUF_SIZE = 4096; + std::uint8_t buf[BUF_SIZE]; + std::uint32_t buf_pos = 0; + auto flush = [&]() { + if (buf_pos > 0) { + stream.write(reinterpret_cast(buf), buf_pos); + buf_pos = 0; + } + }; + auto write = [&](std::uint8_t byte) { + buf[buf_pos++] = byte; + if (buf_pos == BUF_SIZE) flush(); + }; + for (std::size_t i = 0; i < count; ++i) { + IntType value = values[i]; + while (true) { + std::uint8_t byte = value & 0x7f; + value >>= 7; + if ((byte & 0x40) == 0 ? value == 0 : value == -1) { + write(byte); + break; + } + write(byte | 0x80); + } + } + flush(); + } + } // namespace Stockfish::Eval::NNUE #endif // #ifndef NNUE_COMMON_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_feature_transformer.h b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_feature_transformer.h index b6dd54d..7571f39 100644 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_feature_transformer.h +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_feature_transformer.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,6 +25,7 @@ #include "nnue_architecture.h" #include // std::memset() +#include // std::pair namespace Stockfish::Eval::NNUE { @@ -41,8 +42,8 @@ namespace Stockfish::Eval::NNUE { "Per feature PSQT values cannot be processed at granularity lower than 8 at a time."); #ifdef USE_AVX512 - typedef __m512i vec_t; - typedef __m256i psqt_vec_t; + using vec_t = __m512i; + using psqt_vec_t = __m256i; #define vec_load(a) _mm512_load_si512(a) #define vec_store(a,b) _mm512_store_si512(a,b) #define vec_add_16(a,b) _mm512_add_epi16(a,b) @@ -65,8 +66,8 @@ namespace Stockfish::Eval::NNUE { #define MaxChunkSize 64 #elif USE_AVX2 - typedef __m256i vec_t; - typedef __m256i psqt_vec_t; + using vec_t = __m256i; + using psqt_vec_t = __m256i; #define vec_load(a) _mm256_load_si256(a) #define vec_store(a,b) _mm256_store_si256(a,b) #define vec_add_16(a,b) _mm256_add_epi16(a,b) @@ -89,8 +90,8 @@ namespace Stockfish::Eval::NNUE { #define MaxChunkSize 32 #elif USE_SSE2 - typedef __m128i vec_t; - typedef __m128i psqt_vec_t; + using vec_t = __m128i; + using psqt_vec_t = __m128i; #define vec_load(a) (*(a)) #define vec_store(a,b) *(a)=(b) #define vec_add_16(a,b) _mm_add_epi16(a,b) @@ -110,8 +111,8 @@ namespace Stockfish::Eval::NNUE { #define MaxChunkSize 16 #elif USE_MMX - typedef __m64 vec_t; - typedef __m64 psqt_vec_t; + using vec_t = __m64; + using psqt_vec_t = __m64; #define vec_load(a) (*(a)) #define vec_store(a,b) *(a)=(b) #define vec_add_16(a,b) _mm_add_pi16(a,b) @@ -138,8 +139,8 @@ namespace Stockfish::Eval::NNUE { #define MaxChunkSize 8 #elif USE_NEON - typedef int16x8_t vec_t; - typedef int32x4_t psqt_vec_t; + using vec_t = int16x8_t; + using psqt_vec_t = int32x4_t; #define vec_load(a) (*(a)) #define vec_store(a,b) *(a)=(b) #define vec_add_16(a,b) vaddq_s16(a,b) @@ -252,9 +253,9 @@ namespace Stockfish::Eval::NNUE { // Read network parameters bool read_parameters(std::istream& stream) { - read_little_endian(stream, biases , HalfDimensions ); - read_little_endian(stream, weights , HalfDimensions * InputDimensions); - read_little_endian(stream, psqtWeights, PSQTBuckets * InputDimensions); + read_leb_128(stream, biases , HalfDimensions ); + read_leb_128(stream, weights , HalfDimensions * InputDimensions); + read_leb_128(stream, psqtWeights, PSQTBuckets * InputDimensions); return !stream.fail(); } @@ -262,9 +263,9 @@ namespace Stockfish::Eval::NNUE { // Write network parameters bool write_parameters(std::ostream& stream) const { - write_little_endian(stream, biases , HalfDimensions ); - write_little_endian(stream, weights , HalfDimensions * InputDimensions); - write_little_endian(stream, psqtWeights, PSQTBuckets * InputDimensions); + write_leb_128(stream, biases , HalfDimensions ); + write_leb_128(stream, weights , HalfDimensions * InputDimensions); + write_leb_128(stream, psqtWeights, PSQTBuckets * InputDimensions); return !stream.fail(); } @@ -332,27 +333,16 @@ namespace Stockfish::Eval::NNUE { #endif return psqt; + } // end of function transform() - } // end of function transform() - - + void hint_common_access(const Position& pos) const { + hint_common_access_for_perspective(pos); + hint_common_access_for_perspective(pos); + } private: template - void update_accumulator(const Position& pos) const { - - // The size must be enough to contain the largest possible update. - // That might depend on the feature set and generally relies on the - // feature set's update cost calculation to be correct and never - // allow updates with more added/removed features than MaxActiveDimensions. - - #ifdef VECTOR - // Gcc-10.2 unnecessarily spills AVX2 registers if this array - // is defined in the VECTOR code below, once in each branch - vec_t acc[NumRegs]; - psqt_vec_t psqt[NumPsqtRegs]; - #endif - + [[nodiscard]] std::pair try_find_computed_accumulator(const Position& pos) const { // Look for a usable accumulator of an earlier position. We keep track // of the estimated gain in terms of features to be added/subtracted. StateInfo *st = pos.state(), *next = nullptr; @@ -367,218 +357,313 @@ namespace Stockfish::Eval::NNUE { next = st; st = st->previous; } + return { st, next }; + } - if (st->accumulator.computed[Perspective]) - { - if (next == nullptr) - return; + // NOTE: The parameter states_to_update is an array of position states, ending with nullptr. + // All states must be sequential, that is states_to_update[i] must either be reachable + // by repeatedly applying ->previous from states_to_update[i+1] or states_to_update[i] == nullptr. + // computed_st must be reachable by repeatedly applying ->previous on states_to_update[0], if not nullptr. + template + void update_accumulator_incremental(const Position& pos, StateInfo* computed_st, StateInfo* states_to_update[N]) const { + static_assert(N > 0); + assert(states_to_update[N-1] == nullptr); - // Update incrementally in two steps. First, we update the "next" - // accumulator. Then, we update the current accumulator (pos.state()). - - // Gather all features to be updated. - const Square ksq = pos.square(Perspective); - FeatureSet::IndexList removed[2], added[2]; - FeatureSet::append_changed_indices( - ksq, next->dirtyPiece, removed[0], added[0]); - for (StateInfo *st2 = pos.state(); st2 != next; st2 = st2->previous) - FeatureSet::append_changed_indices( - ksq, st2->dirtyPiece, removed[1], added[1]); - - // Mark the accumulators as computed. - next->accumulator.computed[Perspective] = true; - pos.state()->accumulator.computed[Perspective] = true; - - // Now update the accumulators listed in states_to_update[], where the last element is a sentinel. - StateInfo *states_to_update[3] = - { next, next == pos.state() ? nullptr : pos.state(), nullptr }; #ifdef VECTOR - for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j) + // Gcc-10.2 unnecessarily spills AVX2 registers if this array + // is defined in the VECTOR code below, once in each branch + vec_t acc[NumRegs]; + psqt_vec_t psqt[NumPsqtRegs]; + #endif + + if (states_to_update[0] == nullptr) + return; + + // Update incrementally going back through states_to_update. + + // Gather all features to be updated. + const Square ksq = pos.square(Perspective); + + // The size must be enough to contain the largest possible update. + // That might depend on the feature set and generally relies on the + // feature set's update cost calculation to be correct and never + // allow updates with more added/removed features than MaxActiveDimensions. + FeatureSet::IndexList removed[N-1], added[N-1]; + + { + int i = N-2; // last potential state to update. Skip last element because it must be nullptr. + while (states_to_update[i] == nullptr) + --i; + + StateInfo *st2 = states_to_update[i]; + + for (; i >= 0; --i) { - // Load accumulator - auto accTile = reinterpret_cast( - &st->accumulator.accumulation[Perspective][j * TileHeight]); - for (IndexType k = 0; k < NumRegs; ++k) - acc[k] = vec_load(&accTile[k]); + states_to_update[i]->accumulator.computed[Perspective] = true; - for (IndexType i = 0; states_to_update[i]; ++i) - { - // Difference calculation for the deactivated features - for (const auto index : removed[i]) - { - const IndexType offset = HalfDimensions * index + j * TileHeight; - auto column = reinterpret_cast(&weights[offset]); - for (IndexType k = 0; k < NumRegs; ++k) - acc[k] = vec_sub_16(acc[k], column[k]); - } + StateInfo* end_state = i == 0 ? computed_st : states_to_update[i - 1]; - // Difference calculation for the activated features - for (const auto index : added[i]) - { - const IndexType offset = HalfDimensions * index + j * TileHeight; - auto column = reinterpret_cast(&weights[offset]); - for (IndexType k = 0; k < NumRegs; ++k) - acc[k] = vec_add_16(acc[k], column[k]); - } - - // Store accumulator - accTile = reinterpret_cast( - &states_to_update[i]->accumulator.accumulation[Perspective][j * TileHeight]); - for (IndexType k = 0; k < NumRegs; ++k) - vec_store(&accTile[k], acc[k]); - } + for (; st2 != end_state; st2 = st2->previous) + FeatureSet::append_changed_indices( + ksq, st2->dirtyPiece, removed[i], added[i]); } + } - for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j) - { - // Load accumulator - auto accTilePsqt = reinterpret_cast( - &st->accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]); - for (std::size_t k = 0; k < NumPsqtRegs; ++k) - psqt[k] = vec_load_psqt(&accTilePsqt[k]); + StateInfo* st = computed_st; - for (IndexType i = 0; states_to_update[i]; ++i) - { - // Difference calculation for the deactivated features - for (const auto index : removed[i]) - { - const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; - auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); - for (std::size_t k = 0; k < NumPsqtRegs; ++k) - psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]); - } + // Now update the accumulators listed in states_to_update[], where the last element is a sentinel. +#ifdef VECTOR + for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j) + { + // Load accumulator + auto accTile = reinterpret_cast( + &st->accumulator.accumulation[Perspective][j * TileHeight]); + for (IndexType k = 0; k < NumRegs; ++k) + acc[k] = vec_load(&accTile[k]); - // Difference calculation for the activated features - for (const auto index : added[i]) - { - const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; - auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); - for (std::size_t k = 0; k < NumPsqtRegs; ++k) - psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]); - } - - // Store accumulator - accTilePsqt = reinterpret_cast( - &states_to_update[i]->accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]); - for (std::size_t k = 0; k < NumPsqtRegs; ++k) - vec_store_psqt(&accTilePsqt[k], psqt[k]); - } - } - - #else for (IndexType i = 0; states_to_update[i]; ++i) { - std::memcpy(states_to_update[i]->accumulator.accumulation[Perspective], - st->accumulator.accumulation[Perspective], - HalfDimensions * sizeof(BiasType)); - - for (std::size_t k = 0; k < PSQTBuckets; ++k) - states_to_update[i]->accumulator.psqtAccumulation[Perspective][k] = st->accumulator.psqtAccumulation[Perspective][k]; - - st = states_to_update[i]; - // Difference calculation for the deactivated features for (const auto index : removed[i]) { - const IndexType offset = HalfDimensions * index; - - for (IndexType j = 0; j < HalfDimensions; ++j) - st->accumulator.accumulation[Perspective][j] -= weights[offset + j]; - - for (std::size_t k = 0; k < PSQTBuckets; ++k) - st->accumulator.psqtAccumulation[Perspective][k] -= psqtWeights[index * PSQTBuckets + k]; + const IndexType offset = HalfDimensions * index + j * TileHeight; + auto column = reinterpret_cast(&weights[offset]); + for (IndexType k = 0; k < NumRegs; ++k) + acc[k] = vec_sub_16(acc[k], column[k]); } // Difference calculation for the activated features for (const auto index : added[i]) - { - const IndexType offset = HalfDimensions * index; - - for (IndexType j = 0; j < HalfDimensions; ++j) - st->accumulator.accumulation[Perspective][j] += weights[offset + j]; - - for (std::size_t k = 0; k < PSQTBuckets; ++k) - st->accumulator.psqtAccumulation[Perspective][k] += psqtWeights[index * PSQTBuckets + k]; - } - } - #endif - } - else - { - // Refresh the accumulator - auto& accumulator = pos.state()->accumulator; - accumulator.computed[Perspective] = true; - FeatureSet::IndexList active; - FeatureSet::append_active_indices(pos, active); - - #ifdef VECTOR - for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j) - { - auto biasesTile = reinterpret_cast( - &biases[j * TileHeight]); - for (IndexType k = 0; k < NumRegs; ++k) - acc[k] = biasesTile[k]; - - for (const auto index : active) { const IndexType offset = HalfDimensions * index + j * TileHeight; auto column = reinterpret_cast(&weights[offset]); - - for (unsigned k = 0; k < NumRegs; ++k) + for (IndexType k = 0; k < NumRegs; ++k) acc[k] = vec_add_16(acc[k], column[k]); } - auto accTile = reinterpret_cast( - &accumulator.accumulation[Perspective][j * TileHeight]); - for (unsigned k = 0; k < NumRegs; k++) + // Store accumulator + accTile = reinterpret_cast( + &states_to_update[i]->accumulator.accumulation[Perspective][j * TileHeight]); + for (IndexType k = 0; k < NumRegs; ++k) vec_store(&accTile[k], acc[k]); } + } - for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j) + for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j) + { + // Load accumulator + auto accTilePsqt = reinterpret_cast( + &st->accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_load_psqt(&accTilePsqt[k]); + + for (IndexType i = 0; states_to_update[i]; ++i) { - for (std::size_t k = 0; k < NumPsqtRegs; ++k) - psqt[k] = vec_zero_psqt(); - - for (const auto index : active) + // Difference calculation for the deactivated features + for (const auto index : removed[i]) { const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]); + } + // Difference calculation for the activated features + for (const auto index : added[i]) + { + const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; + auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); for (std::size_t k = 0; k < NumPsqtRegs; ++k) psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]); } - auto accTilePsqt = reinterpret_cast( - &accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]); + // Store accumulator + accTilePsqt = reinterpret_cast( + &states_to_update[i]->accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]); for (std::size_t k = 0; k < NumPsqtRegs; ++k) vec_store_psqt(&accTilePsqt[k], psqt[k]); } + } - #else - std::memcpy(accumulator.accumulation[Perspective], biases, +#else + for (IndexType i = 0; states_to_update[i]; ++i) + { + std::memcpy(states_to_update[i]->accumulator.accumulation[Perspective], + st->accumulator.accumulation[Perspective], HalfDimensions * sizeof(BiasType)); for (std::size_t k = 0; k < PSQTBuckets; ++k) - accumulator.psqtAccumulation[Perspective][k] = 0; + states_to_update[i]->accumulator.psqtAccumulation[Perspective][k] = st->accumulator.psqtAccumulation[Perspective][k]; - for (const auto index : active) + st = states_to_update[i]; + + // Difference calculation for the deactivated features + for (const auto index : removed[i]) { const IndexType offset = HalfDimensions * index; for (IndexType j = 0; j < HalfDimensions; ++j) - accumulator.accumulation[Perspective][j] += weights[offset + j]; + st->accumulator.accumulation[Perspective][j] -= weights[offset + j]; for (std::size_t k = 0; k < PSQTBuckets; ++k) - accumulator.psqtAccumulation[Perspective][k] += psqtWeights[index * PSQTBuckets + k]; + st->accumulator.psqtAccumulation[Perspective][k] -= psqtWeights[index * PSQTBuckets + k]; + } + + // Difference calculation for the activated features + for (const auto index : added[i]) + { + const IndexType offset = HalfDimensions * index; + + for (IndexType j = 0; j < HalfDimensions; ++j) + st->accumulator.accumulation[Perspective][j] += weights[offset + j]; + + for (std::size_t k = 0; k < PSQTBuckets; ++k) + st->accumulator.psqtAccumulation[Perspective][k] += psqtWeights[index * PSQTBuckets + k]; } - #endif } +#endif #if defined(USE_MMX) _mm_empty(); #endif } + template + void update_accumulator_refresh(const Position& pos) const { + #ifdef VECTOR + // Gcc-10.2 unnecessarily spills AVX2 registers if this array + // is defined in the VECTOR code below, once in each branch + vec_t acc[NumRegs]; + psqt_vec_t psqt[NumPsqtRegs]; + #endif + + // Refresh the accumulator + // Could be extracted to a separate function because it's done in 2 places, + // but it's unclear if compilers would correctly handle register allocation. + auto& accumulator = pos.state()->accumulator; + accumulator.computed[Perspective] = true; + FeatureSet::IndexList active; + FeatureSet::append_active_indices(pos, active); + +#ifdef VECTOR + for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j) + { + auto biasesTile = reinterpret_cast( + &biases[j * TileHeight]); + for (IndexType k = 0; k < NumRegs; ++k) + acc[k] = biasesTile[k]; + + for (const auto index : active) + { + const IndexType offset = HalfDimensions * index + j * TileHeight; + auto column = reinterpret_cast(&weights[offset]); + + for (unsigned k = 0; k < NumRegs; ++k) + acc[k] = vec_add_16(acc[k], column[k]); + } + + auto accTile = reinterpret_cast( + &accumulator.accumulation[Perspective][j * TileHeight]); + for (unsigned k = 0; k < NumRegs; k++) + vec_store(&accTile[k], acc[k]); + } + + for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j) + { + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_zero_psqt(); + + for (const auto index : active) + { + const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; + auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); + + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]); + } + + auto accTilePsqt = reinterpret_cast( + &accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + vec_store_psqt(&accTilePsqt[k], psqt[k]); + } + +#else + std::memcpy(accumulator.accumulation[Perspective], biases, + HalfDimensions * sizeof(BiasType)); + + for (std::size_t k = 0; k < PSQTBuckets; ++k) + accumulator.psqtAccumulation[Perspective][k] = 0; + + for (const auto index : active) + { + const IndexType offset = HalfDimensions * index; + + for (IndexType j = 0; j < HalfDimensions; ++j) + accumulator.accumulation[Perspective][j] += weights[offset + j]; + + for (std::size_t k = 0; k < PSQTBuckets; ++k) + accumulator.psqtAccumulation[Perspective][k] += psqtWeights[index * PSQTBuckets + k]; + } +#endif + + #if defined(USE_MMX) + _mm_empty(); + #endif + } + + template + void hint_common_access_for_perspective(const Position& pos) const { + + // Works like update_accumulator, but performs less work. + // Updates ONLY the accumulator for pos. + + // Look for a usable accumulator of an earlier position. We keep track + // of the estimated gain in terms of features to be added/subtracted. + // Fast early exit. + if (pos.state()->accumulator.computed[Perspective]) + return; + + auto [oldest_st, _] = try_find_computed_accumulator(pos); + + if (oldest_st->accumulator.computed[Perspective]) + { + // Only update current position accumulator to minimize work. + StateInfo* states_to_update[2] = { pos.state(), nullptr }; + update_accumulator_incremental(pos, oldest_st, states_to_update); + } + else + { + update_accumulator_refresh(pos); + } + } + + template + void update_accumulator(const Position& pos) const { + + auto [oldest_st, next] = try_find_computed_accumulator(pos); + + if (oldest_st->accumulator.computed[Perspective]) + { + if (next == nullptr) + return; + + // Now update the accumulators listed in states_to_update[], where the last element is a sentinel. + // Currently we update 2 accumulators. + // 1. for the current position + // 2. the next accumulator after the computed one + // The heuristic may change in the future. + StateInfo *states_to_update[3] = + { next, next == pos.state() ? nullptr : pos.state(), nullptr }; + + update_accumulator_incremental(pos, oldest_st, states_to_update); + } + else + { + update_accumulator_refresh(pos); + } + } + alignas(CacheLineSize) BiasType biases[HalfDimensions]; alignas(CacheLineSize) WeightType weights[HalfDimensions * InputDimensions]; alignas(CacheLineSize) PSQTWeightType psqtWeights[InputDimensions * PSQTBuckets]; diff --git a/DroidFishApp/src/main/cpp/stockfish/pawns.cpp b/DroidFishApp/src/main/cpp/stockfish/pawns.cpp index fdcfa02..0ccafd9 100644 --- a/DroidFishApp/src/main/cpp/stockfish/pawns.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/pawns.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/pawns.h b/DroidFishApp/src/main/cpp/stockfish/pawns.h index af0370f..d20e7c2 100644 --- a/DroidFishApp/src/main/cpp/stockfish/pawns.h +++ b/DroidFishApp/src/main/cpp/stockfish/pawns.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -61,7 +61,7 @@ struct Entry { int blockedCount; }; -typedef HashTable Table; +using Table = HashTable; Entry* probe(const Position& pos); diff --git a/DroidFishApp/src/main/cpp/stockfish/position.cpp b/DroidFishApp/src/main/cpp/stockfish/position.cpp index 5befcaf..2a9d798 100644 --- a/DroidFishApp/src/main/cpp/stockfish/position.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/position.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,6 +22,7 @@ #include // For std::memset, std::memcmp #include #include +#include #include "bitboard.h" #include "misc.h" @@ -46,7 +47,7 @@ namespace Zobrist { namespace { -const string PieceToChar(" PNBRQK pnbrqk"); +constexpr std::string_view PieceToChar(" PNBRQK pnbrqk"); constexpr Piece Pieces[] = { W_PAWN, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING, B_PAWN, B_KNIGHT, B_BISHOP, B_ROOK, B_QUEEN, B_KING }; @@ -96,7 +97,7 @@ std::ostream& operator<<(std::ostream& os, const Position& pos) { // Marcel van Kervinck's cuckoo algorithm for fast detection of "upcoming repetition" // situations. Description of the algorithm in the following paper: -// https://marcelk.net/2013-04-06/paper/upcoming-rep-v2.pdf +// http://web.archive.org/web/20201107002606/https://marcelk.net/2013-04-06/paper/upcoming-rep-v2.pdf // First and second hash functions for indexing the cuckoo tables inline int H1(Key h) { return h & 0x1fff; } @@ -281,7 +282,7 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th chess960 = isChess960; thisThread = th; - set_state(st); + set_state(); assert(pos_is_ok()); @@ -312,60 +313,59 @@ void Position::set_castling_right(Color c, Square rfrom) { /// Position::set_check_info() sets king attacks to detect if a move gives check -void Position::set_check_info(StateInfo* si) const { +void Position::set_check_info() const { - si->blockersForKing[WHITE] = slider_blockers(pieces(BLACK), square(WHITE), si->pinners[BLACK]); - si->blockersForKing[BLACK] = slider_blockers(pieces(WHITE), square(BLACK), si->pinners[WHITE]); + st->blockersForKing[WHITE] = slider_blockers(pieces(BLACK), square(WHITE), st->pinners[BLACK]); + st->blockersForKing[BLACK] = slider_blockers(pieces(WHITE), square(BLACK), st->pinners[WHITE]); Square ksq = square(~sideToMove); - si->checkSquares[PAWN] = pawn_attacks_bb(~sideToMove, ksq); - si->checkSquares[KNIGHT] = attacks_bb(ksq); - si->checkSquares[BISHOP] = attacks_bb(ksq, pieces()); - si->checkSquares[ROOK] = attacks_bb(ksq, pieces()); - si->checkSquares[QUEEN] = si->checkSquares[BISHOP] | si->checkSquares[ROOK]; - si->checkSquares[KING] = 0; + st->checkSquares[PAWN] = pawn_attacks_bb(~sideToMove, ksq); + st->checkSquares[KNIGHT] = attacks_bb(ksq); + st->checkSquares[BISHOP] = attacks_bb(ksq, pieces()); + st->checkSquares[ROOK] = attacks_bb(ksq, pieces()); + st->checkSquares[QUEEN] = st->checkSquares[BISHOP] | st->checkSquares[ROOK]; + st->checkSquares[KING] = 0; } /// Position::set_state() computes the hash keys of the position, and other /// data that once computed is updated incrementally as moves are made. -/// The function is only used when a new position is set up, and to verify -/// the correctness of the StateInfo data when running in debug mode. +/// The function is only used when a new position is set up -void Position::set_state(StateInfo* si) const { +void Position::set_state() const { - si->key = si->materialKey = 0; - si->pawnKey = Zobrist::noPawns; - si->nonPawnMaterial[WHITE] = si->nonPawnMaterial[BLACK] = VALUE_ZERO; - si->checkersBB = attackers_to(square(sideToMove)) & pieces(~sideToMove); + st->key = st->materialKey = 0; + st->pawnKey = Zobrist::noPawns; + st->nonPawnMaterial[WHITE] = st->nonPawnMaterial[BLACK] = VALUE_ZERO; + st->checkersBB = attackers_to(square(sideToMove)) & pieces(~sideToMove); - set_check_info(si); + set_check_info(); for (Bitboard b = pieces(); b; ) { Square s = pop_lsb(b); Piece pc = piece_on(s); - si->key ^= Zobrist::psq[pc][s]; + st->key ^= Zobrist::psq[pc][s]; if (type_of(pc) == PAWN) - si->pawnKey ^= Zobrist::psq[pc][s]; + st->pawnKey ^= Zobrist::psq[pc][s]; else if (type_of(pc) != KING) - si->nonPawnMaterial[color_of(pc)] += PieceValue[MG][pc]; + st->nonPawnMaterial[color_of(pc)] += PieceValue[MG][pc]; } - if (si->epSquare != SQ_NONE) - si->key ^= Zobrist::enpassant[file_of(si->epSquare)]; + if (st->epSquare != SQ_NONE) + st->key ^= Zobrist::enpassant[file_of(st->epSquare)]; if (sideToMove == BLACK) - si->key ^= Zobrist::side; + st->key ^= Zobrist::side; - si->key ^= Zobrist::castling[si->castlingRights]; + st->key ^= Zobrist::castling[st->castlingRights]; for (Piece pc : Pieces) for (int cnt = 0; cnt < pieceCount[pc]; ++cnt) - si->materialKey ^= Zobrist::psq[pc][cnt]; + st->materialKey ^= Zobrist::psq[pc][cnt]; } @@ -568,8 +568,7 @@ bool Position::pseudo_legal(const Move m) const { : MoveList(*this).contains(m); // Is not a promotion, so promotion piece must be empty - if (promotion_type(m) - KNIGHT != NO_PIECE_TYPE) - return false; + assert(promotion_type(m) - KNIGHT == NO_PIECE_TYPE); // If the 'from' square is not occupied by a piece belonging to the side to // move, the move is obviously not legal. @@ -765,9 +764,6 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { // Update board and piece lists remove_piece(capsq); - if (type_of(m) == EN_PASSANT) - board[capsq] = NO_PIECE; - // Update material hash key and prefetch access to materialTable k ^= Zobrist::psq[captured][capsq]; st->materialKey ^= Zobrist::psq[captured][pieceCount[captured]]; @@ -868,7 +864,7 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { sideToMove = ~sideToMove; // Update king attacks used for fast check detection - set_check_info(st); + set_check_info(); // Calculate the repetition info. It is the ply distance from the previous // occurrence of the same position, negative in the 3-fold case, or zero @@ -1020,7 +1016,7 @@ void Position::do_null_move(StateInfo& newSt) { sideToMove = ~sideToMove; - set_check_info(st); + set_check_info(); st->repetition = 0; @@ -1065,7 +1061,7 @@ Key Position::key_after(Move m) const { /// SEE value of move is greater or equal to the given threshold. We'll use an /// algorithm similar to alpha-beta pruning with a null window. -bool Position::see_ge(Move m, Value threshold) const { +bool Position::see_ge(Move m, Bitboard& occupied, Value threshold) const { assert(is_ok(m)); @@ -1084,7 +1080,7 @@ bool Position::see_ge(Move m, Value threshold) const { return true; assert(color_of(piece_on(from)) == sideToMove); - Bitboard occupied = pieces() ^ from ^ to; + occupied = pieces() ^ from ^ to; // xoring to is important for pinned piece logic Color stm = sideToMove; Bitboard attackers = attackers_to(to, occupied); Bitboard stmAttackers, bb; @@ -1115,45 +1111,44 @@ bool Position::see_ge(Move m, Value threshold) const { // the bitboard 'attackers' any X-ray attackers behind it. if ((bb = stmAttackers & pieces(PAWN))) { + occupied ^= least_significant_square_bb(bb); if ((swap = PawnValueMg - swap) < res) break; - occupied ^= least_significant_square_bb(bb); attackers |= attacks_bb(to, occupied) & pieces(BISHOP, QUEEN); } else if ((bb = stmAttackers & pieces(KNIGHT))) { + occupied ^= least_significant_square_bb(bb); if ((swap = KnightValueMg - swap) < res) break; - - occupied ^= least_significant_square_bb(bb); } else if ((bb = stmAttackers & pieces(BISHOP))) { + occupied ^= least_significant_square_bb(bb); if ((swap = BishopValueMg - swap) < res) break; - occupied ^= least_significant_square_bb(bb); attackers |= attacks_bb(to, occupied) & pieces(BISHOP, QUEEN); } else if ((bb = stmAttackers & pieces(ROOK))) { + occupied ^= least_significant_square_bb(bb); if ((swap = RookValueMg - swap) < res) break; - occupied ^= least_significant_square_bb(bb); attackers |= attacks_bb(to, occupied) & pieces(ROOK, QUEEN); } else if ((bb = stmAttackers & pieces(QUEEN))) { + occupied ^= least_significant_square_bb(bb); if ((swap = QueenValueMg - swap) < res) break; - occupied ^= least_significant_square_bb(bb); attackers |= (attacks_bb(to, occupied) & pieces(BISHOP, QUEEN)) | (attacks_bb(to, occupied) & pieces(ROOK , QUEEN)); } @@ -1167,6 +1162,11 @@ bool Position::see_ge(Move m, Value threshold) const { return bool(res); } +bool Position::see_ge(Move m, Value threshold) const { + Bitboard occupied; + return see_ge(m, occupied, threshold); +} + /// Position::is_draw() tests whether the position is drawn by 50-move rule /// or by repetition. It does not detect stalemates. @@ -1323,12 +1323,6 @@ bool Position::pos_is_ok() const { if (p1 != p2 && (pieces(p1) & pieces(p2))) assert(0 && "pos_is_ok: Bitboards"); - StateInfo si = *st; - ASSERT_ALIGNED(&si, Eval::NNUE::CacheLineSize); - - set_state(&si); - if (std::memcmp(&si, st, sizeof(StateInfo))) - assert(0 && "pos_is_ok: State"); for (Piece pc : Pieces) if ( pieceCount[pc] != popcount(pieces(color_of(pc), type_of(pc))) diff --git a/DroidFishApp/src/main/cpp/stockfish/position.h b/DroidFishApp/src/main/cpp/stockfish/position.h index 078ff5b..2e6014d 100644 --- a/DroidFishApp/src/main/cpp/stockfish/position.h +++ b/DroidFishApp/src/main/cpp/stockfish/position.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -68,7 +68,7 @@ struct StateInfo { /// start position to the position just before the search starts). Needed by /// 'draw by repetition' detection. Use a std::deque because pointers to /// elements are not invalidated upon list resizing. -typedef std::unique_ptr> StateListPtr; +using StateListPtr = std::unique_ptr>; /// Position class stores information regarding the board representation as @@ -92,10 +92,9 @@ public: // Position representation Bitboard pieces(PieceType pt) const; - Bitboard pieces(PieceType pt1, PieceType pt2) const; + template Bitboard pieces(PieceType pt, PieceTypes... pts) const; Bitboard pieces(Color c) const; - Bitboard pieces(Color c, PieceType pt) const; - Bitboard pieces(Color c, PieceType pt1, PieceType pt2) const; + template Bitboard pieces(Color c, PieceTypes... pts) const; Piece piece_on(Square s) const; Square ep_square() const; bool empty(Square s) const; @@ -126,6 +125,7 @@ public: bool legal(Move m) const; bool pseudo_legal(const Move m) const; bool capture(Move m) const; + bool capture_stage(Move m) const; bool gives_check(Move m) const; Piece moved_piece(Move m) const; Piece captured_piece() const; @@ -144,6 +144,7 @@ public: // Static Exchange Evaluation bool see_ge(Move m, Value threshold = VALUE_ZERO) const; + bool see_ge(Move m, Bitboard& occupied, Value threshold = VALUE_ZERO) const; // Accessing hash keys Key key() const; @@ -178,8 +179,8 @@ public: private: // Initialization helpers (used while setting up a position) void set_castling_right(Color c, Square rfrom); - void set_state(StateInfo* si) const; - void set_check_info(StateInfo* si) const; + void set_state() const; + void set_check_info() const; // Other helpers void move_piece(Square from, Square to); @@ -204,7 +205,7 @@ private: bool chess960; }; -extern std::ostream& operator<<(std::ostream& os, const Position& pos); +std::ostream& operator<<(std::ostream& os, const Position& pos); inline Color Position::side_to_move() const { return sideToMove; @@ -227,20 +228,18 @@ inline Bitboard Position::pieces(PieceType pt = ALL_PIECES) const { return byTypeBB[pt]; } -inline Bitboard Position::pieces(PieceType pt1, PieceType pt2) const { - return pieces(pt1) | pieces(pt2); +template +inline Bitboard Position::pieces(PieceType pt, PieceTypes... pts) const { + return pieces(pt) | pieces(pts...); } inline Bitboard Position::pieces(Color c) const { return byColorBB[c]; } -inline Bitboard Position::pieces(Color c, PieceType pt) const { - return pieces(c) & pieces(pt); -} - -inline Bitboard Position::pieces(Color c, PieceType pt1, PieceType pt2) const { - return pieces(c) & (pieces(pt1) | pieces(pt2)); +template +inline Bitboard Position::pieces(Color c, PieceTypes... pts) const { + return pieces(c) & pieces(pts...); } template inline int Position::count(Color c) const { @@ -383,8 +382,16 @@ inline bool Position::is_chess960() const { inline bool Position::capture(Move m) const { assert(is_ok(m)); - // Castling is encoded as "king captures rook" - return (!empty(to_sq(m)) && type_of(m) != CASTLING) || type_of(m) == EN_PASSANT; + return (!empty(to_sq(m)) && type_of(m) != CASTLING) + || type_of(m) == EN_PASSANT; +} + +// returns true if a move is generated from the capture stage +// having also queen promotions covered, i.e. consistency with the capture stage move generation +// is needed to avoid the generation of duplicate moves. +inline bool Position::capture_stage(Move m) const { + assert(is_ok(m)); + return capture(m) || promotion_type(m) == QUEEN; } inline Piece Position::captured_piece() const { diff --git a/DroidFishApp/src/main/cpp/stockfish/psqt.cpp b/DroidFishApp/src/main/cpp/stockfish/psqt.cpp index ca5664c..d3ebb20 100644 --- a/DroidFishApp/src/main/cpp/stockfish/psqt.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/psqt.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/psqt.h b/DroidFishApp/src/main/cpp/stockfish/psqt.h index 4ee0e37..9630f44 100644 --- a/DroidFishApp/src/main/cpp/stockfish/psqt.h +++ b/DroidFishApp/src/main/cpp/stockfish/psqt.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -30,7 +30,7 @@ namespace Stockfish::PSQT extern Score psq[PIECE_NB][SQUARE_NB]; // Fill psqt array from a set of internally linked parameters -extern void init(); +void init(); } // namespace Stockfish::PSQT diff --git a/DroidFishApp/src/main/cpp/stockfish/search.cpp b/DroidFishApp/src/main/cpp/stockfish/search.cpp index c8163d1..740ad71 100644 --- a/DroidFishApp/src/main/cpp/stockfish/search.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/search.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -34,6 +34,7 @@ #include "tt.h" #include "uci.h" #include "syzygy/tbprobe.h" +#include "nnue/evaluate_nnue.h" namespace Stockfish { @@ -63,7 +64,7 @@ namespace { // Futility margin Value futility_margin(Depth d, bool improving) { - return Value(165 * (d - improving)); + return Value(140 * (d - improving)); } // Reductions lookup table, initialized at startup @@ -71,7 +72,7 @@ namespace { Depth reduction(bool i, Depth d, int mn, Value delta, Value rootDelta) { int r = Reductions[d] * Reductions[mn]; - return (r + 1642 - int(delta) * 1024 / int(rootDelta)) / 1024 + (!i && r > 916); + return (r + 1372 - int(delta) * 1073 / int(rootDelta)) / 1024 + (!i && r > 936); } constexpr int futility_move_count(bool improving, Depth depth) { @@ -81,7 +82,7 @@ namespace { // History and stats update bonus, based on depth int stat_bonus(Depth d) { - return std::min((12 * d + 282) * d - 349 , 1594); + return std::min(336 * d - 547, 1561); } // Add a small random component to draw evaluations to avoid 3-fold blindness @@ -96,7 +97,10 @@ namespace { struct Skill { Skill(int skill_level, int uci_elo) { if (uci_elo) - level = std::clamp(std::pow((uci_elo - 1346.6) / 143.4, 1 / 0.806), 0.0, 20.0); + { + double e = double(uci_elo - 1320) / (3190 - 1320); + level = std::clamp((((37.2473 * e - 40.8525) * e + 22.2943) * e - 0.311438), 0.0, 19.0); + } else level = double(skill_level); } @@ -158,7 +162,7 @@ namespace { void Search::init() { for (int i = 1; i < MAX_MOVES; ++i) - Reductions[i] = int((20.26 + std::log(Threads.size()) / 2) * std::log(i)); + Reductions[i] = int((20.57 + std::log(Threads.size()) / 2) * std::log(i)); } @@ -239,9 +243,6 @@ void MainThread::search() { bestPreviousScore = bestThread->rootMoves[0].score; bestPreviousAverageScore = bestThread->rootMoves[0].averageScore; - for (Thread* th : Threads) - th->previousDepth = bestThread->completedDepth; - // Send again PV info if we have a new best thread if (bestThread != this) sync_cout << UCI::pv(bestThread->rootPos, bestThread->completedDepth) << sync_endl; @@ -276,16 +277,18 @@ void Thread::search() { int iterIdx = 0; std::memset(ss-7, 0, 10 * sizeof(Stack)); - for (int i = 7; i > 0; i--) + for (int i = 7; i > 0; --i) + { (ss-i)->continuationHistory = &this->continuationHistory[0][0][NO_PIECE][0]; // Use as a sentinel + (ss-i)->staticEval = VALUE_NONE; + } for (int i = 0; i <= MAX_PLY + 2; ++i) (ss+i)->ply = i; ss->pv = pv; - bestValue = delta = alpha = -VALUE_INFINITE; - beta = VALUE_INFINITE; + bestValue = -VALUE_INFINITE; if (mainThread) { @@ -307,10 +310,6 @@ void Thread::search() { multiPV = std::min(multiPV, rootMoves.size()); - complexityAverage.set(155, 1); - - optimism[us] = optimism[~us] = VALUE_ZERO; - int searchAgainCounter = 0; // Iterative deepening loop until requested to stop or the target depth is reached @@ -331,7 +330,7 @@ void Thread::search() { pvLast = 0; if (!Threads.increaseDepth) - searchAgainCounter++; + searchAgainCounter++; // MultiPV loop. We perform a full root search for each PV line for (pvIdx = 0; pvIdx < multiPV && !Threads.stop; ++pvIdx) @@ -348,18 +347,15 @@ void Thread::search() { selDepth = 0; // Reset aspiration window starting size - if (rootDepth >= 4) - { - Value prev = rootMoves[pvIdx].averageScore; - delta = Value(10) + int(prev) * prev / 15620; - alpha = std::max(prev - delta,-VALUE_INFINITE); - beta = std::min(prev + delta, VALUE_INFINITE); + Value prev = rootMoves[pvIdx].averageScore; + delta = Value(10) + int(prev) * prev / 15799; + alpha = std::max(prev - delta,-VALUE_INFINITE); + beta = std::min(prev + delta, VALUE_INFINITE); - // Adjust optimism based on root move's previousScore - int opt = 118 * prev / (std::abs(prev) + 169); - optimism[ us] = Value(opt); - optimism[~us] = -optimism[us]; - } + // Adjust optimism based on root move's previousScore + int opt = 109 * prev / (std::abs(prev) + 141); + optimism[ us] = Value(opt); + optimism[~us] = -optimism[us]; // Start with a small aspiration window and, in the case of a fail // high/low, re-search with a bigger window until we don't fail @@ -413,7 +409,7 @@ void Thread::search() { else break; - delta += delta / 4 + 2; + delta += delta / 3; assert(alpha >= -VALUE_INFINITE && beta <= VALUE_INFINITE); } @@ -429,9 +425,10 @@ void Thread::search() { if (!Threads.stop) completedDepth = rootDepth; - if (rootMoves[0].pv[0] != lastBestMove) { - lastBestMove = rootMoves[0].pv[0]; - lastBestMoveDepth = rootDepth; + if (rootMoves[0].pv[0] != lastBestMove) + { + lastBestMove = rootMoves[0].pv[0]; + lastBestMoveDepth = rootDepth; } // Have we found a "mate in x"? @@ -459,18 +456,16 @@ void Thread::search() { && !Threads.stop && !mainThread->stopOnPonderhit) { - double fallingEval = (71 + 12 * (mainThread->bestPreviousAverageScore - bestValue) - + 6 * (mainThread->iterValue[iterIdx] - bestValue)) / 656.7; + double fallingEval = (69 + 13 * (mainThread->bestPreviousAverageScore - bestValue) + + 6 * (mainThread->iterValue[iterIdx] - bestValue)) / 619.6; fallingEval = std::clamp(fallingEval, 0.5, 1.5); // If the bestMove is stable over several iterations, reduce time accordingly - timeReduction = lastBestMoveDepth + 9 < completedDepth ? 1.37 : 0.65; - double reduction = (1.4 + mainThread->previousTimeReduction) / (2.15 * timeReduction); - double bestMoveInstability = 1 + 1.7 * totBestMoveChanges / Threads.size(); - int complexity = mainThread->complexityAverage.value(); - double complexPosition = std::min(1.0 + (complexity - 261) / 1738.7, 1.5); + timeReduction = lastBestMoveDepth + 8 < completedDepth ? 1.57 : 0.65; + double reduction = (1.4 + mainThread->previousTimeReduction) / (2.08 * timeReduction); + double bestMoveInstability = 1 + 1.8 * totBestMoveChanges / Threads.size(); - double totalTime = Time.optimum() * fallingEval * reduction * bestMoveInstability * complexPosition; + double totalTime = Time.optimum() * fallingEval * reduction * bestMoveInstability; // Cap used time in case of a single legal move for a better viewer experience in tournaments // yielding correct scores and sufficiently fast moves. @@ -487,12 +482,11 @@ void Thread::search() { else Threads.stop = true; } - else if ( Threads.increaseDepth - && !mainThread->ponder - && Time.elapsed() > totalTime * 0.53) - Threads.increaseDepth = false; + else if ( !mainThread->ponder + && Time.elapsed() > totalTime * 0.50) + Threads.increaseDepth = false; else - Threads.increaseDepth = true; + Threads.increaseDepth = true; } mainThread->iterValue[iterIdx] = bestValue; @@ -520,7 +514,6 @@ namespace { constexpr bool PvNode = nodeType != NonPV; constexpr bool rootNode = nodeType == Root; - const Depth maxNextDepth = rootNode ? depth : depth + 1; // Check if we have an upcoming move which draws by repetition, or // if the opponent had an alternative move earlier to this position. @@ -555,7 +548,7 @@ namespace { bool givesCheck, improving, priorCapture, singularQuietLMR; bool capture, moveCountPruning, ttCapture; Piece movedPiece; - int moveCount, captureCount, quietCount, improvement, complexity; + int moveCount, captureCount, quietCount, improvement; // Step 1. Initialize node Thread* thisThread = pos.this_thread(); @@ -599,52 +592,45 @@ namespace { assert(0 <= ss->ply && ss->ply < MAX_PLY); - (ss+1)->ttPv = false; (ss+1)->excludedMove = bestMove = MOVE_NONE; (ss+2)->killers[0] = (ss+2)->killers[1] = MOVE_NONE; (ss+2)->cutoffCnt = 0; ss->doubleExtensions = (ss-1)->doubleExtensions; - Square prevSq = to_sq((ss-1)->currentMove); + Square prevSq = is_ok((ss-1)->currentMove) ? to_sq((ss-1)->currentMove) : SQ_NONE; + ss->statScore = 0; - // Initialize statScore to zero for the grandchildren of the current position. - // So statScore is shared between all grandchildren and only the first grandchild - // starts with statScore = 0. Later grandchildren start with the last calculated - // statScore of the previous grandchild. This influences the reduction rules in - // LMR which are based on the statScore of parent position. - if (!rootNode) - (ss+2)->statScore = 0; - - // Step 4. Transposition table lookup. We don't want the score of a partial - // search to overwrite a previous full search TT value, so we use a different - // position key in case of an excluded move. + // Step 4. Transposition table lookup. excludedMove = ss->excludedMove; - posKey = excludedMove == MOVE_NONE ? pos.key() : pos.key() ^ make_key(excludedMove); + posKey = pos.key(); tte = TT.probe(posKey, ss->ttHit); ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; ttMove = rootNode ? thisThread->rootMoves[thisThread->pvIdx].pv[0] : ss->ttHit ? tte->move() : MOVE_NONE; - ttCapture = ttMove && pos.capture(ttMove); + ttCapture = ttMove && pos.capture_stage(ttMove); + + // At this point, if excluded, skip straight to step 6, static eval. However, + // to save indentation, we list the condition in all code between here and there. if (!excludedMove) ss->ttPv = PvNode || (ss->ttHit && tte->is_pv()); // At non-PV nodes we check for an early TT cutoff if ( !PvNode - && ss->ttHit + && !excludedMove && tte->depth() > depth - (tte->bound() == BOUND_EXACT) - && ttValue != VALUE_NONE // Possible in case of TT access race + && ttValue != VALUE_NONE // Possible in case of TT access race or if !ttHit && (tte->bound() & (ttValue >= beta ? BOUND_LOWER : BOUND_UPPER))) { - // If ttMove is quiet, update move sorting heuristics on TT hit (~1 Elo) + // If ttMove is quiet, update move sorting heuristics on TT hit (~2 Elo) if (ttMove) { if (ttValue >= beta) { - // Bonus for a quiet ttMove that fails high (~3 Elo) + // Bonus for a quiet ttMove that fails high (~2 Elo) if (!ttCapture) update_quiet_stats(pos, ss, ttMove, stat_bonus(depth)); - // Extra penalty for early quiet moves of the previous ply (~0 Elo) - if ((ss-1)->moveCount <= 2 && !priorCapture) + // Extra penalty for early quiet moves of the previous ply (~0 Elo on STC, ~2 Elo on LTC) + if (prevSq != SQ_NONE && (ss-1)->moveCount <= 2 && !priorCapture) update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, -stat_bonus(depth + 1)); } // Penalty for a quiet ttMove that fails low (~1 Elo) @@ -663,7 +649,7 @@ namespace { } // Step 5. Tablebases probe - if (!rootNode && TB::Cardinality) + if (!rootNode && !excludedMove && TB::Cardinality) { int piecesCount = pos.count(); @@ -723,38 +709,39 @@ namespace { ss->staticEval = eval = VALUE_NONE; improving = false; improvement = 0; - complexity = 0; goto moves_loop; } + else if (excludedMove) + { + // Providing the hint that this node's accumulator will be used often brings significant Elo gain (13 Elo) + Eval::NNUE::hint_common_parent_position(pos); + eval = ss->staticEval; + } else if (ss->ttHit) { // Never assume anything about values stored in TT ss->staticEval = eval = tte->eval(); if (eval == VALUE_NONE) - ss->staticEval = eval = evaluate(pos, &complexity); - else // Fall back to (semi)classical complexity for TT hits, the NNUE complexity is lost - complexity = abs(ss->staticEval - pos.psq_eg_stm()); + ss->staticEval = eval = evaluate(pos); + else if (PvNode) + Eval::NNUE::hint_common_parent_position(pos); - // ttValue can be used as a better position evaluation (~4 Elo) + // ttValue can be used as a better position evaluation (~7 Elo) if ( ttValue != VALUE_NONE && (tte->bound() & (ttValue > eval ? BOUND_LOWER : BOUND_UPPER))) eval = ttValue; } else { - ss->staticEval = eval = evaluate(pos, &complexity); - + ss->staticEval = eval = evaluate(pos); // Save static evaluation into transposition table - if (!excludedMove) - tte->save(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval); + tte->save(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval); } - thisThread->complexityAverage.update(complexity); - - // Use static evaluation difference to improve quiet move ordering (~3 Elo) + // Use static evaluation difference to improve quiet move ordering (~4 Elo) if (is_ok((ss-1)->currentMove) && !(ss-1)->inCheck && !priorCapture) { - int bonus = std::clamp(-19 * int((ss-1)->staticEval + ss->staticEval), -1914, 1914); + int bonus = std::clamp(-18 * int((ss-1)->staticEval + ss->staticEval), -1817, 1817); thisThread->mainHistory[~us][from_to((ss-1)->currentMove)] << bonus; } @@ -764,43 +751,43 @@ namespace { // margin and the improving flag are used in various pruning heuristics. improvement = (ss-2)->staticEval != VALUE_NONE ? ss->staticEval - (ss-2)->staticEval : (ss-4)->staticEval != VALUE_NONE ? ss->staticEval - (ss-4)->staticEval - : 168; + : 173; improving = improvement > 0; - // Step 7. Razoring. + // Step 7. Razoring (~1 Elo). // If eval is really low check with qsearch if it can exceed alpha, if it can't, // return a fail low. - if (eval < alpha - 369 - 254 * depth * depth) + if (eval < alpha - 456 - 252 * depth * depth) { value = qsearch(pos, ss, alpha - 1, alpha); if (value < alpha) return value; } - // Step 8. Futility pruning: child node (~25 Elo). + // Step 8. Futility pruning: child node (~40 Elo). // The depth condition is important for mate finding. if ( !ss->ttPv - && depth < 8 - && eval - futility_margin(depth, improving) - (ss-1)->statScore / 303 >= beta + && depth < 9 + && eval - futility_margin(depth, improving) - (ss-1)->statScore / 306 >= beta && eval >= beta - && eval < 28031) // larger than VALUE_KNOWN_WIN, but smaller than TB wins + && eval < 24923) // larger than VALUE_KNOWN_WIN, but smaller than TB wins return eval; - // Step 9. Null move search with verification search (~22 Elo) + // Step 9. Null move search with verification search (~35 Elo) if ( !PvNode && (ss-1)->currentMove != MOVE_NULL - && (ss-1)->statScore < 17139 + && (ss-1)->statScore < 17329 && eval >= beta && eval >= ss->staticEval - && ss->staticEval >= beta - 20 * depth - improvement / 13 + 233 + complexity / 25 + && ss->staticEval >= beta - 21 * depth - improvement / 13 + 258 && !excludedMove && pos.non_pawn_material(us) - && (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor)) + && (ss->ply >= thisThread->nmpMinPly)) { assert(eval - beta >= 0); - // Null move dynamic reduction based on depth, eval and complexity of position - Depth R = std::min(int(eval - beta) / 168, 7) + depth / 3 + 4 - (complexity > 861); + // Null move dynamic reduction based on depth and eval + Depth R = std::min(int(eval - beta) / 173, 6) + depth / 3 + 4; ss->currentMove = MOVE_NULL; ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0]; @@ -814,18 +801,16 @@ namespace { if (nullValue >= beta) { // Do not return unproven mate or TB scores - if (nullValue >= VALUE_TB_WIN_IN_MAX_PLY) - nullValue = beta; + nullValue = std::min(nullValue, VALUE_TB_WIN_IN_MAX_PLY-1); - if (thisThread->nmpMinPly || (abs(beta) < VALUE_KNOWN_WIN && depth < 14)) + if (thisThread->nmpMinPly || depth < 14) return nullValue; assert(!thisThread->nmpMinPly); // Recursive verification is not allowed // Do verification search at high depths, with null move pruning disabled - // for us, until ply exceeds nmpMinPly. + // until ply exceeds nmpMinPly. thisThread->nmpMinPly = ss->ply + 3 * (depth-R) / 4; - thisThread->nmpColor = us; Value v = search(pos, ss, beta-1, beta, depth-R, false); @@ -836,20 +821,34 @@ namespace { } } - probCutBeta = beta + 191 - 54 * improving; + // Step 10. If the position doesn't have a ttMove, decrease depth by 2 + // (or by 4 if the TT entry for the current position was hit and the stored depth is greater than or equal to the current depth). + // Use qsearch if depth is equal or below zero (~9 Elo) + if ( PvNode + && !ttMove) + depth -= 2 + 2 * (ss->ttHit && tte->depth() >= depth); - // Step 10. ProbCut (~4 Elo) - // If we have a good enough capture and a reduced search returns a value + if (depth <= 0) + return qsearch(pos, ss, alpha, beta); + + if ( cutNode + && depth >= 8 + && !ttMove) + depth -= 2; + + probCutBeta = beta + 168 - 61 * improving; + + // Step 11. ProbCut (~10 Elo) + // If we have a good enough capture (or queen promotion) and a reduced search returns a value // much above beta, we can (almost) safely prune the previous move. if ( !PvNode - && depth > 4 + && depth > 3 && abs(beta) < VALUE_TB_WIN_IN_MAX_PLY // if value from transposition table is lower than probCutBeta, don't attempt probCut // there and in further interactions with transposition table cutoff depth is set to depth - 3 // because probCut search has depth set to depth - 4 but we also do a move before it // so effective depth is equal to depth - 3 - && !( ss->ttHit - && tte->depth() >= depth - 3 + && !( tte->depth() >= depth - 3 && ttValue != VALUE_NONE && ttValue < probCutBeta)) { @@ -860,7 +859,7 @@ namespace { while ((move = mp.next_move()) != MOVE_NONE) if (move != excludedMove && pos.legal(move)) { - assert(pos.capture(move) || promotion_type(move) == QUEEN); + assert(pos.capture_stage(move)); ss->currentMove = move; ss->continuationHistory = &thisThread->continuationHistory[ss->inCheck] @@ -886,44 +885,29 @@ namespace { return value; } } + + Eval::NNUE::hint_common_parent_position(pos); } - // Step 11. If the position is not in TT, decrease depth by 3. - // Use qsearch if depth is equal or below zero (~4 Elo) - if ( PvNode - && !ttMove) - depth -= 3; - - if (depth <= 0) - return qsearch(pos, ss, alpha, beta); - - if ( cutNode - && depth >= 9 - && !ttMove) - depth -= 2; - moves_loop: // When in check, search starts here - // Step 12. A small Probcut idea, when we are in check (~0 Elo) - probCutBeta = beta + 417; + // Step 12. A small Probcut idea, when we are in check (~4 Elo) + probCutBeta = beta + 413; if ( ss->inCheck && !PvNode - && depth >= 2 && ttCapture && (tte->bound() & BOUND_LOWER) - && tte->depth() >= depth - 3 + && tte->depth() >= depth - 4 && ttValue >= probCutBeta && abs(ttValue) <= VALUE_KNOWN_WIN - && abs(beta) <= VALUE_KNOWN_WIN - ) + && abs(beta) <= VALUE_KNOWN_WIN) return probCutBeta; - const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory, nullptr , (ss-4)->continuationHistory, nullptr , (ss-6)->continuationHistory }; - Move countermove = thisThread->counterMoves[pos.piece_on(prevSq)][prevSq]; + Move countermove = prevSq != SQ_NONE ? thisThread->counterMoves[pos.piece_on(prevSq)][prevSq] : MOVE_NONE; MovePicker mp(pos, ttMove, depth, &thisThread->mainHistory, &captureHistory, @@ -972,7 +956,7 @@ moves_loop: // When in check, search starts here (ss+1)->pv = nullptr; extension = 0; - capture = pos.capture(move); + capture = pos.capture_stage(move); movedPiece = pos.moved_piece(move); givesCheck = pos.gives_check(move); @@ -981,32 +965,52 @@ moves_loop: // When in check, search starts here Value delta = beta - alpha; - // Step 14. Pruning at shallow depth (~98 Elo). Depth conditions are important for mate finding. + Depth r = reduction(improving, depth, moveCount, delta, thisThread->rootDelta); + + // Step 14. Pruning at shallow depth (~120 Elo). Depth conditions are important for mate finding. if ( !rootNode && pos.non_pawn_material(us) && bestValue > VALUE_TB_LOSS_IN_MAX_PLY) { - // Skip quiet moves if movecount exceeds our FutilityMoveCount threshold (~7 Elo) + // Skip quiet moves if movecount exceeds our FutilityMoveCount threshold (~8 Elo) moveCountPruning = moveCount >= futility_move_count(improving, depth); // Reduced depth of the next LMR search - int lmrDepth = std::max(newDepth - reduction(improving, depth, moveCount, delta, thisThread->rootDelta), 0); + int lmrDepth = newDepth - r; if ( capture || givesCheck) { - // Futility pruning for captures (~0 Elo) + // Futility pruning for captures (~2 Elo) if ( !givesCheck - && !PvNode && lmrDepth < 7 && !ss->inCheck - && ss->staticEval + 180 + 201 * lmrDepth + PieceValue[EG][pos.piece_on(to_sq(move))] - + captureHistory[movedPiece][to_sq(move)][type_of(pos.piece_on(to_sq(move)))] / 6 < alpha) + && ss->staticEval + 197 + 248 * lmrDepth + PieceValue[EG][pos.piece_on(to_sq(move))] + + captureHistory[movedPiece][to_sq(move)][type_of(pos.piece_on(to_sq(move)))] / 7 < alpha) continue; - // SEE based pruning (~9 Elo) - if (!pos.see_ge(move, Value(-222) * depth)) - continue; + Bitboard occupied; + // SEE based pruning (~11 Elo) + if (!pos.see_ge(move, occupied, Value(-205) * depth)) + { + if (depth < 2 - capture) + continue; + // Don't prune the move if opponent Queen/Rook is under discovered attack after the exchanges + // Don't prune the move if opponent King is under discovered attack after or during the exchanges + Bitboard leftEnemies = (pos.pieces(~us, KING, QUEEN, ROOK)) & occupied; + Bitboard attacks = 0; + occupied |= to_sq(move); + while (leftEnemies && !attacks) + { + Square sq = pop_lsb(leftEnemies); + attacks |= pos.attackers_to(sq, occupied) & pos.pieces(us) & occupied; + // don't consider pieces which were already threatened/hanging before SEE exchanges + if (attacks && (sq != pos.square(~us) && (pos.attackers_to(sq, pos.pieces()) & pos.pieces(us)))) + attacks = 0; + } + if (!attacks) + continue; + } } else { @@ -1015,35 +1019,43 @@ moves_loop: // When in check, search starts here + (*contHist[3])[movedPiece][to_sq(move)]; // Continuation history based pruning (~2 Elo) - if ( lmrDepth < 5 - && history < -3875 * (depth - 1)) + if ( lmrDepth < 6 + && history < -3832 * depth) continue; history += 2 * thisThread->mainHistory[us][from_to(move)]; - // Futility pruning: parent node (~9 Elo) + lmrDepth += history / 7011; + lmrDepth = std::max(lmrDepth, -2); + + // Futility pruning: parent node (~13 Elo) if ( !ss->inCheck - && lmrDepth < 13 - && ss->staticEval + 106 + 145 * lmrDepth + history / 52 <= alpha) + && lmrDepth < 12 + && ss->staticEval + 112 + 138 * lmrDepth <= alpha) continue; - // Prune moves with negative SEE (~3 Elo) - if (!pos.see_ge(move, Value(-24 * lmrDepth * lmrDepth - 15 * lmrDepth))) + lmrDepth = std::max(lmrDepth, 0); + + // Prune moves with negative SEE (~4 Elo) + if (!pos.see_ge(move, Value(-27 * lmrDepth * lmrDepth - 16 * lmrDepth))) continue; } } - // Step 15. Extensions (~66 Elo) + // Step 15. Extensions (~100 Elo) // We take care to not overdo to avoid search getting stuck. if (ss->ply < thisThread->rootDepth * 2) { - // Singular extension search (~58 Elo). If all moves but one fail low on a + // Singular extension search (~94 Elo). If all moves but one fail low on a // search of (alpha-s, beta-s), and just one fails high on (alpha, beta), // then that move is singular and should be extended. To verify this we do // a reduced search on all the other moves but the ttMove and if the // result is lower than ttValue minus a margin, then we will extend the ttMove. + // Depth margin and singularBeta margin are known for having non-linear scaling. + // Their values are optimized to time controls of 180+1.8 and longer + // so changing them requires tests at this type of time controls. if ( !rootNode - && depth >= 4 - (thisThread->previousDepth > 24) + 2 * (PvNode && tte->is_pv()) + && depth >= 4 - (thisThread->completedDepth > 22) + 2 * (PvNode && tte->is_pv()) && move == ttMove && !excludedMove // Avoid recursive singular search /* && ttValue != VALUE_NONE Already implicit in the next condition */ @@ -1051,7 +1063,7 @@ moves_loop: // When in check, search starts here && (tte->bound() & BOUND_LOWER) && tte->depth() >= depth - 3) { - Value singularBeta = ttValue - (3 + (ss->ttPv && !PvNode)) * depth; + Value singularBeta = ttValue - (82 + 65 * (ss->ttPv && !PvNode)) * depth / 64; Depth singularDepth = (depth - 1) / 2; ss->excludedMove = move; @@ -1065,9 +1077,12 @@ moves_loop: // When in check, search starts here // Avoid search explosion by limiting the number of double extensions if ( !PvNode - && value < singularBeta - 25 - && ss->doubleExtensions <= 9) + && value < singularBeta - 21 + && ss->doubleExtensions <= 11) + { extension = 2; + depth += depth < 13; + } } // Multi-cut pruning @@ -1078,26 +1093,29 @@ moves_loop: // When in check, search starts here else if (singularBeta >= beta) return singularBeta; - // If the eval of ttMove is greater than beta, we reduce it (negative extension) + // If the eval of ttMove is greater than beta, we reduce it (negative extension) (~7 Elo) else if (ttValue >= beta) - extension = -2; + extension = -2 - !PvNode; - // If the eval of ttMove is less than alpha and value, we reduce it (negative extension) - else if (ttValue <= alpha && ttValue <= value) + // If the eval of ttMove is less than value, we reduce it (negative extension) (~1 Elo) + else if (ttValue <= value) + extension = -1; + + // If the eval of ttMove is less than alpha, we reduce it (negative extension) (~1 Elo) + else if (ttValue <= alpha) extension = -1; } // Check extensions (~1 Elo) else if ( givesCheck - && depth > 9 - && abs(ss->staticEval) > 82) + && depth > 9) extension = 1; - // Quiet ttMove extensions (~0 Elo) + // Quiet ttMove extensions (~1 Elo) else if ( PvNode && move == ttMove && move == ss->killers[0] - && (*contHist[0])[movedPiece][to_sq(move)] >= 5177) + && (*contHist[0])[movedPiece][to_sq(move)] >= 5168) extension = 1; } @@ -1118,7 +1136,50 @@ moves_loop: // When in check, search starts here // Step 16. Make the move pos.do_move(move, st, givesCheck); - // Step 17. Late moves reduction / extension (LMR, ~98 Elo) + // Decrease reduction if position is or has been on the PV + // and node is not likely to fail low. (~3 Elo) + // Decrease further on cutNodes. (~1 Elo) + if ( ss->ttPv + && !likelyFailLow) + r -= cutNode && tte->depth() >= depth + 3 ? 3 : 2; + + // Decrease reduction if opponent's move count is high (~1 Elo) + if ((ss-1)->moveCount > 8) + r--; + + // Increase reduction for cut nodes (~3 Elo) + if (cutNode) + r += 2; + + // Increase reduction if ttMove is a capture (~3 Elo) + if (ttCapture) + r++; + + // Decrease reduction for PvNodes based on depth (~2 Elo) + if (PvNode) + r -= 1 + 12 / (3 + depth); + + // Decrease reduction if ttMove has been singularly extended (~1 Elo) + if (singularQuietLMR) + r--; + + // Increase reduction if next ply has a lot of fail high (~5 Elo) + if ((ss+1)->cutoffCnt > 3) + r++; + + else if (move == ttMove) + r--; + + ss->statScore = 2 * thisThread->mainHistory[us][from_to(move)] + + (*contHist[0])[movedPiece][to_sq(move)] + + (*contHist[1])[movedPiece][to_sq(move)] + + (*contHist[3])[movedPiece][to_sq(move)] + - 4006; + + // Decrease/increase reduction for moves with a good/bad history (~25 Elo) + r -= ss->statScore / (11124 + 4740 * (depth > 5 && depth < 22)); + + // Step 17. Late moves reduction / extension (LMR, ~117 Elo) // We use various heuristics for the sons of a node after the first son has // been searched. In general we would like to reduce them, but there are many // cases where we extend a son if it has good chances to be "interesting". @@ -1128,52 +1189,6 @@ moves_loop: // When in check, search starts here || !capture || (cutNode && (ss-1)->moveCount > 1))) { - Depth r = reduction(improving, depth, moveCount, delta, thisThread->rootDelta); - - // Decrease reduction if position is or has been on the PV - // and node is not likely to fail low. (~3 Elo) - if ( ss->ttPv - && !likelyFailLow) - r -= 2; - - // Decrease reduction if opponent's move count is high (~1 Elo) - if ((ss-1)->moveCount > 7) - r--; - - // Increase reduction for cut nodes (~3 Elo) - if (cutNode) - r += 2; - - // Increase reduction if ttMove is a capture (~3 Elo) - if (ttCapture) - r++; - - // Decrease reduction for PvNodes based on depth - if (PvNode) - r -= 1 + 11 / (3 + depth); - - // Decrease reduction if ttMove has been singularly extended (~1 Elo) - if (singularQuietLMR) - r--; - - // Decrease reduction if we move a threatened piece (~1 Elo) - if ( depth > 9 - && (mp.threatenedPieces & from_sq(move))) - r--; - - // Increase reduction if next ply has a lot of fail high - if ((ss+1)->cutoffCnt > 3) - r++; - - ss->statScore = 2 * thisThread->mainHistory[us][from_to(move)] - + (*contHist[0])[movedPiece][to_sq(move)] - + (*contHist[1])[movedPiece][to_sq(move)] - + (*contHist[3])[movedPiece][to_sq(move)] - - 4433; - - // Decrease/increase reduction for moves with a good/bad history (~30 Elo) - r -= ss->statScore / (13628 + 4000 * (depth > 7 && depth < 19)); - // In general we want to cap the LMR depth search at newDepth, but when // reduction is negative, we allow this move a limited search extension // beyond the first move depth. This may lead to hidden double extensions. @@ -1186,28 +1201,33 @@ moves_loop: // When in check, search starts here { // Adjust full depth search based on LMR results - if result // was good enough search deeper, if it was bad enough search shallower - const bool doDeeperSearch = value > (alpha + 64 + 11 * (newDepth - d)); + const bool doDeeperSearch = value > (bestValue + 64 + 11 * (newDepth - d)); + const bool doEvenDeeperSearch = value > alpha + 711 && ss->doubleExtensions <= 6; const bool doShallowerSearch = value < bestValue + newDepth; - newDepth += doDeeperSearch - doShallowerSearch; + ss->doubleExtensions = ss->doubleExtensions + doEvenDeeperSearch; + + newDepth += doDeeperSearch - doShallowerSearch + doEvenDeeperSearch; if (newDepth > d) value = -search(pos, ss+1, -(alpha+1), -alpha, newDepth, !cutNode); - int bonus = value > alpha ? stat_bonus(newDepth) - : -stat_bonus(newDepth); - - if (capture) - bonus /= 6; + int bonus = value <= alpha ? -stat_bonus(newDepth) + : value >= beta ? stat_bonus(newDepth) + : 0; update_continuation_histories(ss, movedPiece, to_sq(move), bonus); } } - // Step 18. Full depth search when LMR is skipped + // Step 18. Full depth search when LMR is skipped. If expected reduction is high, reduce its depth by 1. else if (!PvNode || moveCount > 1) { - value = -search(pos, ss+1, -(alpha+1), -alpha, newDepth, !cutNode); + // Increase reduction for cut nodes and not ttMove (~1 Elo) + if (!ttMove && cutNode) + r += 2; + + value = -search(pos, ss+1, -(alpha+1), -alpha, newDepth - (r > 3), !cutNode); } // For PV nodes only, do a full PV search on the first move or after a fail @@ -1218,8 +1238,7 @@ moves_loop: // When in check, search starts here (ss+1)->pv = pv; (ss+1)->pv[0] = MOVE_NONE; - value = -search(pos, ss+1, -beta, -alpha, - std::min(maxNextDepth, newDepth), false); + value = -search(pos, ss+1, -beta, -alpha, newDepth, false); } // Step 19. Undo move @@ -1244,10 +1263,21 @@ moves_loop: // When in check, search starts here // PV move or new best move? if (moveCount == 1 || value > alpha) { - rm.score = value; + rm.score = rm.uciScore = value; rm.selDepth = thisThread->selDepth; - rm.scoreLowerbound = value >= beta; - rm.scoreUpperbound = value <= alpha; + rm.scoreLowerbound = rm.scoreUpperbound = false; + + if (value >= beta) + { + rm.scoreLowerbound = true; + rm.uciScore = beta; + } + else if (value <= alpha) + { + rm.scoreUpperbound = true; + rm.uciScore = alpha; + } + rm.pv.resize(1); assert((ss+1)->pv); @@ -1280,24 +1310,23 @@ moves_loop: // When in check, search starts here if (PvNode && !rootNode) // Update pv even in fail-high case update_pv(ss->pv, move, (ss+1)->pv); - if (PvNode && value < beta) // Update alpha! Always alpha < beta + if (value >= beta) { - alpha = value; - - // Reduce other moves if we have found at least one score improvement - if ( depth > 1 - && depth < 6 - && beta < VALUE_KNOWN_WIN - && alpha > -VALUE_KNOWN_WIN) - depth -= 1; - - assert(depth > 0); + ss->cutoffCnt += 1 + !ttMove; + assert(value >= beta); // Fail high + break; } else { - ss->cutoffCnt++; - assert(value >= beta); // Fail high - break; + // Reduce other moves if we have found at least one score improvement (~1 Elo) + // Reduce more for depth > 3 and depth < 12 (~1 Elo) + if ( depth > 1 + && beta < 14362 + && value > -12393) + depth -= depth > 3 && depth < 12 ? 2 : 1; + + assert(depth > 0); + alpha = value; // Update alpha! Always alpha < beta } } } @@ -1340,23 +1369,17 @@ moves_loop: // When in check, search starts here quietsSearched, quietCount, capturesSearched, captureCount, depth); // Bonus for prior countermove that caused the fail low - else if ( (depth >= 5 || PvNode) - && !priorCapture) + else if (!priorCapture && prevSq != SQ_NONE) { - //Assign extra bonus if current node is PvNode or cutNode - //or fail low was really bad - bool extraBonus = PvNode - || cutNode - || bestValue < alpha - 62 * depth; - - update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, stat_bonus(depth) * (1 + extraBonus)); + int bonus = (depth > 5) + (PvNode || cutNode) + (bestValue < alpha - 113 * depth) + ((ss-1)->moveCount > 12); + update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, stat_bonus(depth) * bonus); } if (PvNode) bestValue = std::min(bestValue, maxValue); // If no good move is found and the previous position was ttPv, then the previous - // opponent move is probably good and the new position is added to the search tree. + // opponent move is probably good and the new position is added to the search tree. (~7 Elo) if (bestValue <= alpha) ss->ttPv = ss->ttPv || ((ss-1)->ttPv && depth > 3); @@ -1375,7 +1398,7 @@ moves_loop: // When in check, search starts here // qsearch() is the quiescence search function, which is called by the main search // function with zero depth, or recursively with further decreasing depth per call. - // (~155 elo) + // (~155 Elo) template Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth) { @@ -1398,6 +1421,7 @@ moves_loop: // When in check, search starts here bool pvHit, givesCheck, capture; int moveCount; + // Step 1. Initialize node if (PvNode) { (ss+1)->pv = pv; @@ -1409,7 +1433,7 @@ moves_loop: // When in check, search starts here ss->inCheck = pos.checkers(); moveCount = 0; - // Check for an immediate draw or maximum ply reached + // Step 2. Check for an immediate draw or maximum ply reached if ( pos.is_draw(ss->ply) || ss->ply >= MAX_PLY) return (ss->ply >= MAX_PLY && !ss->inCheck) ? evaluate(pos) : VALUE_DRAW; @@ -1420,27 +1444,25 @@ moves_loop: // When in check, search starts here // TT entry depth that we are going to use. Note that in qsearch we use // only two types of depth in TT: DEPTH_QS_CHECKS or DEPTH_QS_NO_CHECKS. ttDepth = ss->inCheck || depth >= DEPTH_QS_CHECKS ? DEPTH_QS_CHECKS - : DEPTH_QS_NO_CHECKS; - // Transposition table lookup + : DEPTH_QS_NO_CHECKS; + + // Step 3. Transposition table lookup posKey = pos.key(); tte = TT.probe(posKey, ss->ttHit); ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; ttMove = ss->ttHit ? tte->move() : MOVE_NONE; pvHit = ss->ttHit && tte->is_pv(); + // At non-PV nodes we check for an early TT cutoff if ( !PvNode - && ss->ttHit && tte->depth() >= ttDepth - && ttValue != VALUE_NONE // Only in case of TT access race + && ttValue != VALUE_NONE // Only in case of TT access race or if !ttHit && (tte->bound() & (ttValue >= beta ? BOUND_LOWER : BOUND_UPPER))) return ttValue; - // Evaluate the position statically + // Step 4. Static evaluation of the position if (ss->inCheck) - { - ss->staticEval = VALUE_NONE; bestValue = futilityBase = -VALUE_INFINITE; - } else { if (ss->ttHit) @@ -1449,16 +1471,15 @@ moves_loop: // When in check, search starts here if ((ss->staticEval = bestValue = tte->eval()) == VALUE_NONE) ss->staticEval = bestValue = evaluate(pos); - // ttValue can be used as a better position evaluation (~7 Elo) + // ttValue can be used as a better position evaluation (~13 Elo) if ( ttValue != VALUE_NONE && (tte->bound() & (ttValue > bestValue ? BOUND_LOWER : BOUND_UPPER))) bestValue = ttValue; } else // In case of null move search use previous static eval with a different sign - ss->staticEval = bestValue = - (ss-1)->currentMove != MOVE_NULL ? evaluate(pos) - : -(ss-1)->staticEval; + ss->staticEval = bestValue = (ss-1)->currentMove != MOVE_NULL ? evaluate(pos) + : -(ss-1)->staticEval; // Stand pat. Return immediately if static value is at least beta if (bestValue >= beta) @@ -1474,7 +1495,7 @@ moves_loop: // When in check, search starts here if (PvNode && bestValue > alpha) alpha = bestValue; - futilityBase = bestValue + 153; + futilityBase = bestValue + 200; } const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory, @@ -1485,7 +1506,7 @@ moves_loop: // When in check, search starts here // to search the moves. Because the depth is <= 0 here, only captures, // queen promotions, and other checks (only if depth >= DEPTH_QS_CHECKS) // will be generated. - Square prevSq = to_sq((ss-1)->currentMove); + Square prevSq = is_ok((ss-1)->currentMove) ? to_sq((ss-1)->currentMove) : SQ_NONE; MovePicker mp(pos, ttMove, depth, &thisThread->mainHistory, &thisThread->captureHistory, contHist, @@ -1493,102 +1514,105 @@ moves_loop: // When in check, search starts here int quietCheckEvasions = 0; - // Loop through the moves until no moves remain or a beta cutoff occurs + // Step 5. Loop through all pseudo-legal moves until no moves remain + // or a beta cutoff occurs. while ((move = mp.next_move()) != MOVE_NONE) { - assert(is_ok(move)); + assert(is_ok(move)); - // Check for legality - if (!pos.legal(move)) - continue; + // Check for legality + if (!pos.legal(move)) + continue; - givesCheck = pos.gives_check(move); - capture = pos.capture(move); + givesCheck = pos.gives_check(move); + capture = pos.capture_stage(move); - moveCount++; + moveCount++; - // Futility pruning and moveCount pruning (~5 Elo) - if ( bestValue > VALUE_TB_LOSS_IN_MAX_PLY - && !givesCheck - && to_sq(move) != prevSq - && futilityBase > -VALUE_KNOWN_WIN - && type_of(move) != PROMOTION) - { + // Step 6. Pruning. + if (bestValue > VALUE_TB_LOSS_IN_MAX_PLY) + { + // Futility pruning and moveCount pruning (~10 Elo) + if ( !givesCheck + && to_sq(move) != prevSq + && futilityBase > -VALUE_KNOWN_WIN + && type_of(move) != PROMOTION) + { + if (moveCount > 2) + continue; - if (moveCount > 2) - continue; + futilityValue = futilityBase + PieceValue[EG][pos.piece_on(to_sq(move))]; - futilityValue = futilityBase + PieceValue[EG][pos.piece_on(to_sq(move))]; + if (futilityValue <= alpha) + { + bestValue = std::max(bestValue, futilityValue); + continue; + } - if (futilityValue <= alpha) - { - bestValue = std::max(bestValue, futilityValue); - continue; - } + if (futilityBase <= alpha && !pos.see_ge(move, VALUE_ZERO + 1)) + { + bestValue = std::max(bestValue, futilityBase); + continue; + } + } - if (futilityBase <= alpha && !pos.see_ge(move, VALUE_ZERO + 1)) - { - bestValue = std::max(bestValue, futilityBase); - continue; - } - } + // We prune after the second quiet check evasion move, where being 'in check' is + // implicitly checked through the counter, and being a 'quiet move' apart from + // being a tt move is assumed after an increment because captures are pushed ahead. + if (quietCheckEvasions > 1) + break; - // Do not search moves with negative SEE values (~5 Elo) - if ( bestValue > VALUE_TB_LOSS_IN_MAX_PLY - && !pos.see_ge(move)) - continue; + // Continuation history based pruning (~3 Elo) + if ( !capture + && (*contHist[0])[pos.moved_piece(move)][to_sq(move)] < 0 + && (*contHist[1])[pos.moved_piece(move)][to_sq(move)] < 0) + continue; - // Speculative prefetch as early as possible - prefetch(TT.first_entry(pos.key_after(move))); + // Do not search moves with bad enough SEE values (~5 Elo) + if (!pos.see_ge(move, Value(-95))) + continue; + } - ss->currentMove = move; - ss->continuationHistory = &thisThread->continuationHistory[ss->inCheck] - [capture] - [pos.moved_piece(move)] - [to_sq(move)]; + // Speculative prefetch as early as possible + prefetch(TT.first_entry(pos.key_after(move))); - // Continuation history based pruning (~2 Elo) - if ( !capture - && bestValue > VALUE_TB_LOSS_IN_MAX_PLY - && (*contHist[0])[pos.moved_piece(move)][to_sq(move)] < 0 - && (*contHist[1])[pos.moved_piece(move)][to_sq(move)] < 0) - continue; + // Update the current move + ss->currentMove = move; + ss->continuationHistory = &thisThread->continuationHistory[ss->inCheck] + [capture] + [pos.moved_piece(move)] + [to_sq(move)]; - // We prune after 2nd quiet check evasion where being 'in check' is implicitly checked through the counter - // and being a 'quiet' apart from being a tt move is assumed after an increment because captures are pushed ahead. - if ( bestValue > VALUE_TB_LOSS_IN_MAX_PLY - && quietCheckEvasions > 1) - break; + quietCheckEvasions += !capture && ss->inCheck; - quietCheckEvasions += !capture && ss->inCheck; + // Step 7. Make and search the move + pos.do_move(move, st, givesCheck); + value = -qsearch(pos, ss+1, -beta, -alpha, depth - 1); + pos.undo_move(move); - // Make and search the move - pos.do_move(move, st, givesCheck); - value = -qsearch(pos, ss+1, -beta, -alpha, depth - 1); - pos.undo_move(move); + assert(value > -VALUE_INFINITE && value < VALUE_INFINITE); - assert(value > -VALUE_INFINITE && value < VALUE_INFINITE); + // Step 8. Check for a new best move + if (value > bestValue) + { + bestValue = value; - // Check for a new best move - if (value > bestValue) - { - bestValue = value; + if (value > alpha) + { + bestMove = move; - if (value > alpha) - { - bestMove = move; + if (PvNode) // Update pv even in fail-high case + update_pv(ss->pv, move, (ss+1)->pv); - if (PvNode) // Update pv even in fail-high case - update_pv(ss->pv, move, (ss+1)->pv); - - if (PvNode && value < beta) // Update alpha here! - alpha = value; - else - break; // Fail high - } - } + if (PvNode && value < beta) // Update alpha here! + alpha = value; + else + break; // Fail high + } + } } + // Step 9. Check for mate // All legal moves have been searched. A special case: if we're in check // and no legal moves were found, it is checkmate. if (ss->inCheck && bestValue == -VALUE_INFINITE) @@ -1672,12 +1696,13 @@ moves_loop: // When in check, search starts here Thread* thisThread = pos.this_thread(); CapturePieceToHistory& captureHistory = thisThread->captureHistory; Piece moved_piece = pos.moved_piece(bestMove); - PieceType captured = type_of(pos.piece_on(to_sq(bestMove))); + PieceType captured; + int bonus1 = stat_bonus(depth + 1); - if (!pos.capture(bestMove)) + if (!pos.capture_stage(bestMove)) { - int bonus2 = bestValue > beta + 137 ? bonus1 // larger bonus + int bonus2 = bestValue > beta + 145 ? bonus1 // larger bonus : stat_bonus(depth); // smaller bonus // Increase stats for the best move in case it was a quiet move @@ -1691,12 +1716,16 @@ moves_loop: // When in check, search starts here } } else + { // Increase stats for the best move in case it was a capture move + captured = type_of(pos.piece_on(to_sq(bestMove))); captureHistory[moved_piece][to_sq(bestMove)][captured] << bonus1; + } // Extra penalty for a quiet early move that was not a TT move or // main killer move in previous ply when it gets refuted. - if ( ((ss-1)->moveCount == 1 + (ss-1)->ttHit || ((ss-1)->currentMove == (ss-1)->killers[0])) + if ( prevSq != SQ_NONE + && ((ss-1)->moveCount == 1 + (ss-1)->ttHit || ((ss-1)->currentMove == (ss-1)->killers[0])) && !pos.captured_piece()) update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, -bonus1); @@ -1840,7 +1869,7 @@ string UCI::pv(const Position& pos, Depth depth) { continue; Depth d = updated ? depth : std::max(1, depth - 1); - Value v = updated ? rootMoves[i].score : rootMoves[i].previousScore; + Value v = updated ? rootMoves[i].uciScore : rootMoves[i].previousScore; if (v == -VALUE_INFINITE) v = VALUE_ZERO; diff --git a/DroidFishApp/src/main/cpp/stockfish/search.h b/DroidFishApp/src/main/cpp/stockfish/search.h index 60f2762..806e4be 100644 --- a/DroidFishApp/src/main/cpp/stockfish/search.h +++ b/DroidFishApp/src/main/cpp/stockfish/search.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -71,6 +71,7 @@ struct RootMove { Value score = -VALUE_INFINITE; Value previousScore = -VALUE_INFINITE; Value averageScore = -VALUE_INFINITE; + Value uciScore = -VALUE_INFINITE; bool scoreLowerbound = false; bool scoreUpperbound = false; int selDepth = 0; @@ -79,7 +80,7 @@ struct RootMove { std::vector pv; }; -typedef std::vector RootMoves; +using RootMoves = std::vector; /// LimitsType struct stores information sent by GUI about available time to diff --git a/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.cpp b/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.cpp index e478b78..9cb0bfd 100644 --- a/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,9 +24,10 @@ #include #include #include -#include -#include #include +#include +#include +#include #include "../bitboard.h" #include "../movegen.h" @@ -70,7 +71,7 @@ enum TBFlag { STM = 1, Mapped = 2, WinPlies = 4, LossPlies = 8, Wide = 16, Singl inline WDLScore operator-(WDLScore d) { return WDLScore(-int(d)); } inline Square operator^(Square s, int i) { return Square(int(s) ^ i); } -const std::string PieceToChar = " PNBRQK pnbrqk"; +constexpr std::string_view PieceToChar = " PNBRQK pnbrqk"; int MapPawns[SQUARE_NB]; int MapB1H1H7[SQUARE_NB]; @@ -141,7 +142,7 @@ struct SparseEntry { static_assert(sizeof(SparseEntry) == 6, "SparseEntry must be 6 bytes"); -typedef uint16_t Sym; // Huffman symbol +using Sym = uint16_t; // Huffman symbol struct LR { enum Side { Left, Right }; @@ -199,13 +200,10 @@ public: } } - // Memory map the file and check it. File should be already open and will be - // closed after mapping. + // Memory map the file and check it. uint8_t* map(void** baseAddress, uint64_t* mapping, TBType type) { - - assert(is_open()); - - close(); // Need to re-open to get native file descriptor + if (is_open()) + close(); // Need to re-open to get native file descriptor #ifndef _WIN32 struct stat statbuf; @@ -236,7 +234,7 @@ public: } #else // Note FILE_FLAG_RANDOM_ACCESS is only a hint to Windows and as such may get ignored. - HANDLE fd = CreateFile(fname.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, + HANDLE fd = CreateFileA(fname.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, FILE_FLAG_RANDOM_ACCESS, nullptr); if (fd == INVALID_HANDLE_VALUE) @@ -329,7 +327,7 @@ struct PairsData { // first access, when the corresponding file is memory mapped. template struct TBTable { - typedef typename std::conditional::type Ret; + using Ret = typename std::conditional::type; static constexpr int Sides = Type == WDL ? 2 : 1; @@ -473,8 +471,6 @@ TBTables TBTables; // If the corresponding file exists two new objects TBTable and TBTable // are created and added to the lists and hash table. Called at init time. void TBTables::add(const std::vector& pieces) { - if (sizeof(char*) < 8 && pieces.size() >= 6) - return; // Not enough address space to support 6-men TB on 32-bit OS std::string code; @@ -1516,7 +1512,7 @@ int Tablebases::probe_dtz(Position& pos, ProbeState* result) { // A return value false indicates that not all probes were successful. bool Tablebases::root_probe(Position& pos, Search::RootMoves& rootMoves) { - ProbeState result; + ProbeState result = OK; StateInfo st; // Obtain 50-move counter for the root position @@ -1595,7 +1591,7 @@ bool Tablebases::root_probe_wdl(Position& pos, Search::RootMoves& rootMoves) { static const int WDL_to_rank[] = { -MAX_DTZ, -MAX_DTZ + 101, 0, MAX_DTZ - 101, MAX_DTZ }; - ProbeState result; + ProbeState result = OK; StateInfo st; WDLScore wdl; diff --git a/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.h b/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.h index 179c757..159c686 100644 --- a/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.h +++ b/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/thread.cpp b/DroidFishApp/src/main/cpp/stockfish/thread.cpp index b7471f6..c680393 100644 --- a/DroidFishApp/src/main/cpp/stockfish/thread.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/thread.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -60,7 +60,6 @@ void Thread::clear() { counterMoves.fill(MOVE_NONE); mainHistory.fill(0); captureHistory.fill(0); - previousDepth = 0; for (bool inCheck : { false, true }) for (StatsType c : { NoCaptures, Captures }) @@ -73,9 +72,9 @@ void Thread::clear() { /// Thread::start_searching() wakes up the thread that will start the search void Thread::start_searching() { - - std::lock_guard lk(mutex); + mutex.lock(); searching = true; + mutex.unlock(); // Unlock before notifying saves a few CPU-cycles cv.notify_one(); // Wake up the thread in idle_loop() } @@ -125,20 +124,20 @@ void Thread::idle_loop() { void ThreadPool::set(size_t requested) { - if (size() > 0) // destroy any existing thread(s) + if (threads.size() > 0) // destroy any existing thread(s) { main()->wait_for_search_finished(); - while (size() > 0) - delete back(), pop_back(); + while (threads.size() > 0) + delete threads.back(), threads.pop_back(); } if (requested > 0) // create new thread(s) { - push_back(new MainThread(0)); + threads.push_back(new MainThread(0)); - while (size() < requested) - push_back(new Thread(size())); + while (threads.size() < requested) + threads.push_back(new Thread(threads.size())); clear(); // Reallocate the hash with the new threadpool size @@ -154,7 +153,7 @@ void ThreadPool::set(size_t requested) { void ThreadPool::clear() { - for (Thread* th : *this) + for (Thread* th : threads) th->clear(); main()->callsCnt = 0; @@ -187,7 +186,7 @@ void ThreadPool::start_thinking(Position& pos, StateListPtr& states, Tablebases::rank_root_moves(pos, rootMoves); // After ownership transfer 'states' becomes empty, so if we stop the search - // and call 'go' again without setting a new position states.get() == NULL. + // and call 'go' again without setting a new position states.get() == nullptr. assert(states.get() || setupStates.get()); if (states.get()) @@ -198,7 +197,7 @@ void ThreadPool::start_thinking(Position& pos, StateListPtr& states, // be deduced from a fen string, so set() clears them and they are set from // setupStates->back() later. The rootState is per thread, earlier states are shared // since they are read-only. - for (Thread* th : *this) + for (Thread* th : threads) { th->nodes = th->tbHits = th->nmpMinPly = th->bestMoveChanges = 0; th->rootDepth = th->completedDepth = 0; @@ -212,12 +211,12 @@ void ThreadPool::start_thinking(Position& pos, StateListPtr& states, Thread* ThreadPool::get_best_thread() const { - Thread* bestThread = front(); + Thread* bestThread = threads.front(); std::map votes; Value minScore = VALUE_NONE; // Find minimum score of all threads - for (Thread* th: *this) + for (Thread* th: threads) minScore = std::min(minScore, th->rootMoves[0].score); // Vote according to score and depth, and select the best thread @@ -225,10 +224,10 @@ Thread* ThreadPool::get_best_thread() const { return (th->rootMoves[0].score - minScore + 14) * int(th->completedDepth); }; - for (Thread* th : *this) + for (Thread* th : threads) votes[th->rootMoves[0].pv[0]] += thread_value(th); - for (Thread* th : *this) + for (Thread* th : threads) if (abs(bestThread->rootMoves[0].score) >= VALUE_TB_WIN_IN_MAX_PLY) { // Make sure we pick the shortest mate / TB conversion or stave off mate the longest @@ -239,7 +238,8 @@ Thread* ThreadPool::get_best_thread() const { || ( th->rootMoves[0].score > VALUE_TB_LOSS_IN_MAX_PLY && ( votes[th->rootMoves[0].pv[0]] > votes[bestThread->rootMoves[0].pv[0]] || ( votes[th->rootMoves[0].pv[0]] == votes[bestThread->rootMoves[0].pv[0]] - && thread_value(th) > thread_value(bestThread))))) + && thread_value(th) * int(th->rootMoves[0].pv.size() > 2) + > thread_value(bestThread) * int(bestThread->rootMoves[0].pv.size() > 2))))) bestThread = th; return bestThread; @@ -250,8 +250,8 @@ Thread* ThreadPool::get_best_thread() const { void ThreadPool::start_searching() { - for (Thread* th : *this) - if (th != front()) + for (Thread* th : threads) + if (th != threads.front()) th->start_searching(); } @@ -260,8 +260,8 @@ void ThreadPool::start_searching() { void ThreadPool::wait_for_search_finished() const { - for (Thread* th : *this) - if (th != front()) + for (Thread* th : threads) + if (th != threads.front()) th->wait_for_search_finished(); } diff --git a/DroidFishApp/src/main/cpp/stockfish/thread.h b/DroidFishApp/src/main/cpp/stockfish/thread.h index 5f0b2c3..09bdb47 100644 --- a/DroidFishApp/src/main/cpp/stockfish/thread.h +++ b/DroidFishApp/src/main/cpp/stockfish/thread.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -60,16 +60,14 @@ public: Pawns::Table pawnsTable; Material::Table materialTable; size_t pvIdx, pvLast; - RunningAverage complexityAverage; std::atomic nodes, tbHits, bestMoveChanges; int selDepth, nmpMinPly; - Color nmpColor; Value bestValue, optimism[COLOR_NB]; Position rootPos; StateInfo rootState; Search::RootMoves rootMoves; - Depth rootDepth, completedDepth, previousDepth; + Depth rootDepth, completedDepth; Value rootDelta; CounterMoveHistory counterMoves; ButterflyHistory mainHistory; @@ -101,13 +99,13 @@ struct MainThread : public Thread { /// parking and, most importantly, launching a thread. All the access to threads /// is done through this class. -struct ThreadPool : public std::vector { +struct ThreadPool { void start_thinking(Position&, StateListPtr&, const Search::LimitsType&, bool = false); void clear(); void set(size_t); - MainThread* main() const { return static_cast(front()); } + MainThread* main() const { return static_cast(threads.front()); } uint64_t nodes_searched() const { return accumulate(&Thread::nodes); } uint64_t tb_hits() const { return accumulate(&Thread::tbHits); } Thread* get_best_thread() const; @@ -116,13 +114,21 @@ struct ThreadPool : public std::vector { std::atomic_bool stop, increaseDepth; + auto cbegin() const noexcept { return threads.cbegin(); } + auto begin() noexcept { return threads.begin(); } + auto end() noexcept { return threads.end(); } + auto cend() const noexcept { return threads.cend(); } + auto size() const noexcept { return threads.size(); } + auto empty() const noexcept { return threads.empty(); } + private: StateListPtr setupStates; + std::vector threads; uint64_t accumulate(std::atomic Thread::* member) const { uint64_t sum = 0; - for (Thread* th : *this) + for (Thread* th : threads) sum += (th->*member).load(std::memory_order_relaxed); return sum; } diff --git a/DroidFishApp/src/main/cpp/stockfish/thread_win32_osx.h b/DroidFishApp/src/main/cpp/stockfish/thread_win32_osx.h index 77d1c3c..330a834 100644 --- a/DroidFishApp/src/main/cpp/stockfish/thread_win32_osx.h +++ b/DroidFishApp/src/main/cpp/stockfish/thread_win32_osx.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -41,7 +41,7 @@ void* start_routine(void* ptr) P* p = reinterpret_cast(ptr); (p->first->*(p->second))(); // Call member function pointer delete p; - return NULL; + return nullptr; } class NativeThread { @@ -56,7 +56,7 @@ public: pthread_attr_setstacksize(attr, TH_STACK_SIZE); pthread_create(&thread, attr, start_routine, new P(obj, fun)); } - void join() { pthread_join(thread, NULL); } + void join() { pthread_join(thread, nullptr); } }; } // namespace Stockfish @@ -65,7 +65,7 @@ public: namespace Stockfish { -typedef std::thread NativeThread; +using NativeThread = std::thread; } // namespace Stockfish diff --git a/DroidFishApp/src/main/cpp/stockfish/timeman.cpp b/DroidFishApp/src/main/cpp/stockfish/timeman.cpp index cab0d76..061de01 100644 --- a/DroidFishApp/src/main/cpp/stockfish/timeman.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/timeman.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -36,6 +36,12 @@ TimeManagement Time; // Our global time management object void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) { + // if we have no time, no need to initialize TM, except for the start time, + // which is used by movetime. + startTime = limits.startTime; + if (limits.time[us] == 0) + return; + TimePoint moveOverhead = TimePoint(Options["Move Overhead"]); TimePoint slowMover = TimePoint(Options["Slow Mover"]); TimePoint npmsec = TimePoint(Options["nodestime"]); @@ -59,8 +65,6 @@ void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) { limits.npmsec = npmsec; } - startTime = limits.startTime; - // Maximum move horizon of 50 moves int mtg = limits.movestogo ? std::min(limits.movestogo, 50) : 50; diff --git a/DroidFishApp/src/main/cpp/stockfish/timeman.h b/DroidFishApp/src/main/cpp/stockfish/timeman.h index a86f076..3462b82 100644 --- a/DroidFishApp/src/main/cpp/stockfish/timeman.h +++ b/DroidFishApp/src/main/cpp/stockfish/timeman.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/tt.cpp b/DroidFishApp/src/main/cpp/stockfish/tt.cpp index c7118ae..3339c99 100644 --- a/DroidFishApp/src/main/cpp/stockfish/tt.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/tt.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -87,7 +87,7 @@ void TranspositionTable::clear() { std::vector threads; - for (size_t idx = 0; idx < Options["Threads"]; ++idx) + for (size_t idx = 0; idx < size_t(Options["Threads"]); ++idx) { threads.emplace_back([this, idx]() { @@ -98,7 +98,7 @@ void TranspositionTable::clear() { // Each thread will zero its part of the hash table const size_t stride = size_t(clusterCount / Options["Threads"]), start = size_t(stride * idx), - len = idx != Options["Threads"] - 1 ? + len = idx != size_t(Options["Threads"]) - 1 ? stride : clusterCount - start; std::memset(&table[start], 0, len * sizeof(Cluster)); diff --git a/DroidFishApp/src/main/cpp/stockfish/tt.h b/DroidFishApp/src/main/cpp/stockfish/tt.h index 03fe3e1..3e335b4 100644 --- a/DroidFishApp/src/main/cpp/stockfish/tt.h +++ b/DroidFishApp/src/main/cpp/stockfish/tt.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/tune.cpp b/DroidFishApp/src/main/cpp/stockfish/tune.cpp index a885845..41f6664 100644 --- a/DroidFishApp/src/main/cpp/stockfish/tune.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/tune.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/tune.h b/DroidFishApp/src/main/cpp/stockfish/tune.h index 75ab484..440d950 100644 --- a/DroidFishApp/src/main/cpp/stockfish/tune.h +++ b/DroidFishApp/src/main/cpp/stockfish/tune.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -26,8 +26,8 @@ namespace Stockfish { -typedef std::pair Range; // Option's min-max values -typedef Range (RangeFun) (int); +using Range = std::pair; // Option's min-max values +using RangeFun = Range (int); // Default Range function, to calculate Option's min-max values inline Range default_range(int v) { @@ -75,7 +75,7 @@ struct SetRange { class Tune { - typedef void (PostUpdate) (); // Post-update function + using PostUpdate = void (); // Post-update function Tune() { read_results(); } Tune(const Tune&) = delete; diff --git a/DroidFishApp/src/main/cpp/stockfish/types.h b/DroidFishApp/src/main/cpp/stockfish/types.h index c2087c6..06b0a05 100644 --- a/DroidFishApp/src/main/cpp/stockfish/types.h +++ b/DroidFishApp/src/main/cpp/stockfish/types.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -103,8 +103,8 @@ constexpr bool Is64Bit = true; constexpr bool Is64Bit = false; #endif -typedef uint64_t Key; -typedef uint64_t Bitboard; +using Key = uint64_t; +using Bitboard = uint64_t; constexpr int MAX_MOVES = 256; constexpr int MAX_PLY = 246; @@ -186,6 +186,9 @@ enum Value : int { VALUE_MATE_IN_MAX_PLY = VALUE_MATE - MAX_PLY, VALUE_MATED_IN_MAX_PLY = -VALUE_MATE_IN_MAX_PLY, + // In the code, we make the assumption that these values + // are such that non_pawn_material() can be used to uniquely + // identify the material on the board. PawnValueMg = 126, PawnValueEg = 208, KnightValueMg = 781, KnightValueEg = 854, BishopValueMg = 825, BishopValueEg = 915, @@ -215,7 +218,7 @@ constexpr Value PieceValue[PHASE_NB][PIECE_NB] = { VALUE_ZERO, PawnValueEg, KnightValueEg, BishopValueEg, RookValueEg, QueenValueEg, VALUE_ZERO, VALUE_ZERO } }; -typedef int Depth; +using Depth = int; enum : int { DEPTH_QS_CHECKS = 0, @@ -413,6 +416,10 @@ inline Color color_of(Piece pc) { return Color(pc >> 3); } +constexpr bool is_ok(Move m) { + return m != MOVE_NONE && m != MOVE_NULL; +} + constexpr bool is_ok(Square s) { return s >= SQ_A1 && s <= SQ_H8; } @@ -442,10 +449,12 @@ constexpr Direction pawn_push(Color c) { } constexpr Square from_sq(Move m) { + assert(is_ok(m)); return Square((m >> 6) & 0x3F); } constexpr Square to_sq(Move m) { + assert(is_ok(m)); return Square(m & 0x3F); } @@ -470,10 +479,6 @@ constexpr Move make(Square from, Square to, PieceType pt = KNIGHT) { return Move(T + ((pt - KNIGHT) << 12) + (from << 6) + to); } -constexpr bool is_ok(Move m) { - return from_sq(m) != to_sq(m); // Catch MOVE_NULL and MOVE_NONE -} - /// Based on a congruential pseudo random number generator constexpr Key make_key(uint64_t seed) { return seed * 6364136223846793005ULL + 1442695040888963407ULL; diff --git a/DroidFishApp/src/main/cpp/stockfish/uci.cpp b/DroidFishApp/src/main/cpp/stockfish/uci.cpp index 5d842d2..ed16f24 100644 --- a/DroidFishApp/src/main/cpp/stockfish/uci.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/uci.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,6 +22,7 @@ #include #include +#include "benchmark.h" #include "evaluate.h" #include "movegen.h" #include "position.h" @@ -31,13 +32,12 @@ #include "tt.h" #include "uci.h" #include "syzygy/tbprobe.h" +#include "nnue/evaluate_nnue.h" using namespace std; namespace Stockfish { -extern vector setup_bench(const Position&, istream&); - namespace { // FEN string for the initial position in standard chess @@ -161,7 +161,7 @@ namespace { uint64_t num, nodes = 0, cnt = 1; vector list = setup_bench(pos, args); - num = count_if(list.begin(), list.end(), [](string s) { return s.find("go ") == 0 || s.find("eval") == 0; }); + num = count_if(list.begin(), list.end(), [](const string& s) { return s.find("go ") == 0 || s.find("eval") == 0; }); TimePoint elapsed = now(); @@ -207,8 +207,8 @@ namespace { // The coefficients of a third-order polynomial fit is based on the fishtest data // for two parameters that need to transform eval to the argument of a logistic // function. - constexpr double as[] = { -0.58270499, 2.68512549, 15.24638015, 344.49745382}; - constexpr double bs[] = { -2.65734562, 15.96509799, -20.69040836, 73.61029937 }; + constexpr double as[] = { 0.38036525, -2.82015070, 23.17882135, 307.36768407}; + constexpr double bs[] = { -2.29434733, 13.27689788, -14.26828904, 63.45318330 }; // Enforce that NormalizeToPawnValue corresponds to a 50% win rate at ply 64 static_assert(UCI::NormalizeToPawnValue == int(as[0] + as[1] + as[2] + as[3])); @@ -315,8 +315,13 @@ string UCI::value(Value v) { stringstream ss; - if (abs(v) < VALUE_MATE_IN_MAX_PLY) + if (abs(v) < VALUE_TB_WIN_IN_MAX_PLY) ss << "cp " << v * 100 / NormalizeToPawnValue; + else if (abs(v) < VALUE_MATE_IN_MAX_PLY) + { + const int ply = VALUE_MATE_IN_MAX_PLY - 1 - std::abs(v); // recompute ss->ply + ss << "cp " << (v > 0 ? 20000 - ply : -20000 + ply); + } else ss << "mate " << (v > 0 ? VALUE_MATE - v + 1 : -VALUE_MATE - v) / 2; @@ -354,15 +359,15 @@ std::string UCI::square(Square s) { string UCI::move(Move m, bool chess960) { - Square from = from_sq(m); - Square to = to_sq(m); - if (m == MOVE_NONE) return "(none)"; if (m == MOVE_NULL) return "0000"; + Square from = from_sq(m); + Square to = to_sq(m); + if (type_of(m) == CASTLING && !chess960) to = make_square(to > from ? FILE_G : FILE_C, rank_of(from)); diff --git a/DroidFishApp/src/main/cpp/stockfish/uci.h b/DroidFishApp/src/main/cpp/stockfish/uci.h index 3b5a676..8f1be00 100644 --- a/DroidFishApp/src/main/cpp/stockfish/uci.h +++ b/DroidFishApp/src/main/cpp/stockfish/uci.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -35,7 +35,7 @@ namespace UCI { // the win_rate_model() such that Stockfish outputs an advantage of // "100 centipawns" for a position if the engine has a 50% probability to win // from this position in selfplay at fishtest LTC time control. -const int NormalizeToPawnValue = 361; +const int NormalizeToPawnValue = 328; class Option; @@ -45,12 +45,12 @@ struct CaseInsensitiveLess { }; /// The options container is defined as a std::map -typedef std::map OptionsMap; +using OptionsMap = std::map; /// The Option class implements each option as specified by the UCI protocol class Option { - typedef void (*OnChange)(const Option&); + using OnChange = void (*)(const Option&); public: Option(OnChange = nullptr); @@ -61,7 +61,7 @@ public: Option& operator=(const std::string&); void operator<<(const Option&); - operator double() const; + operator int() const; operator std::string() const; bool operator==(const char*) const; diff --git a/DroidFishApp/src/main/cpp/stockfish/ucioption.cpp b/DroidFishApp/src/main/cpp/stockfish/ucioption.cpp index 9fb4834..f6342e5 100644 --- a/DroidFishApp/src/main/cpp/stockfish/ucioption.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/ucioption.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -38,13 +38,13 @@ UCI::OptionsMap Options; // Global object namespace UCI { /// 'On change' actions, triggered by an option's value change -void on_clear_hash(const Option&) { Search::clear(); } -void on_hash_size(const Option& o) { TT.resize(size_t(o)); } -void on_logger(const Option& o) { start_logger(o); } -void on_threads(const Option& o) { Threads.set(size_t(o)); } -void on_tb_path(const Option& o) { Tablebases::init(o); } -void on_use_NNUE(const Option& ) { Eval::NNUE::init(); } -void on_eval_file(const Option& ) { Eval::NNUE::init(); } +static void on_clear_hash(const Option&) { Search::clear(); } +static void on_hash_size(const Option& o) { TT.resize(size_t(o)); } +static void on_logger(const Option& o) { start_logger(o); } +static void on_threads(const Option& o) { Threads.set(size_t(o)); } +static void on_tb_path(const Option& o) { Tablebases::init(o); } +static void on_use_NNUE(const Option&) { Eval::NNUE::init(); } +static void on_eval_file(const Option&) { Eval::NNUE::init(); } /// Our case insensitive less() function as required by UCI protocol bool CaseInsensitiveLess::operator() (const string& s1, const string& s2) const { @@ -73,7 +73,7 @@ void init(OptionsMap& o) { o["UCI_Chess960"] << Option(false); o["UCI_AnalyseMode"] << Option(false); o["UCI_LimitStrength"] << Option(false); - o["UCI_Elo"] << Option(1350, 1350, 2850); + o["UCI_Elo"] << Option(1320, 1320, 3190); o["UCI_ShowWDL"] << Option(false); o["SyzygyPath"] << Option("", on_tb_path); o["SyzygyProbeDepth"] << Option(1, 1, 100); @@ -128,9 +128,9 @@ Option::Option(double v, int minv, int maxv, OnChange f) : type("spin"), min(min Option::Option(const char* v, const char* cur, OnChange f) : type("combo"), min(0), max(0), on_change(f) { defaultValue = v; currentValue = cur; } -Option::operator double() const { +Option::operator int() const { assert(type == "check" || type == "spin"); - return (type == "spin" ? stof(currentValue) : currentValue == "true"); + return (type == "spin" ? std::stoi(currentValue) : currentValue == "true"); } Option::operator std::string() const { diff --git a/DroidFishApp/src/main/java/org/petero/droidfish/engine/InternalStockFish.java b/DroidFishApp/src/main/java/org/petero/droidfish/engine/InternalStockFish.java index 64477a6..006ad9d 100644 --- a/DroidFishApp/src/main/java/org/petero/droidfish/engine/InternalStockFish.java +++ b/DroidFishApp/src/main/java/org/petero/droidfish/engine/InternalStockFish.java @@ -36,7 +36,7 @@ import org.petero.droidfish.EngineOptions; /** Stockfish engine running as process, started from assets resource. */ public class InternalStockFish extends ExternalEngine { - private static final String defaultNet = "nn-ad9b42354671.nnue"; + private static final String defaultNet = "nn-5af11540bbfe.nnue"; private static final String netOption = "evalfile"; private File defaultNetFile; // To get the full path of the copied default network file