diff --git a/DroidFishApp/src/main/assets/nn-3475407dc199.nnue b/DroidFishApp/src/main/assets/nn-6877cd24400e.nnue similarity index 78% rename from DroidFishApp/src/main/assets/nn-3475407dc199.nnue rename to DroidFishApp/src/main/assets/nn-6877cd24400e.nnue index d1fdcf2..0bf5b62 100644 Binary files a/DroidFishApp/src/main/assets/nn-3475407dc199.nnue and b/DroidFishApp/src/main/assets/nn-6877cd24400e.nnue differ diff --git a/DroidFishApp/src/main/cpp/stockfish/Android.mk b/DroidFishApp/src/main/cpp/stockfish/Android.mk index 35969fa..abe90e4 100644 --- a/DroidFishApp/src/main/cpp/stockfish/Android.mk +++ b/DroidFishApp/src/main/cpp/stockfish/Android.mk @@ -5,7 +5,7 @@ SF_SRC_FILES := \ bitbase.cpp endgame.cpp material.cpp movepick.cpp position.cpp timeman.cpp \ tune.cpp ucioption.cpp \ bitboard.cpp evaluate.cpp misc.cpp search.cpp tt.cpp syzygy/tbprobe.cpp \ - nnue/evaluate_nnue.cpp nnue/features/half_ka_v2.cpp + nnue/evaluate_nnue.cpp nnue/features/half_ka_v2_hm.cpp MY_ARCH_DEF := ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) diff --git a/DroidFishApp/src/main/cpp/stockfish/benchmark.cpp b/DroidFishApp/src/main/cpp/stockfish/benchmark.cpp index 7945a45..e1c025a 100644 --- a/DroidFishApp/src/main/cpp/stockfish/benchmark.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/benchmark.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -87,6 +87,7 @@ const vector Defaults = { // Chess 960 "setoption name UCI_Chess960 value true", "bbqnnrkr/pppppppp/8/8/8/8/PPPPPPPP/BBQNNRKR w HFhf - 0 1 moves g2g3 d7d5 d2d4 c8h3 c1g5 e8d6 g5e7 f7f6", + "nqbnrkrb/pppppppp/8/8/8/8/PPPPPPPP/NQBNRKRB w KQkq - 0 1", "setoption name UCI_Chess960 value false" }; diff --git a/DroidFishApp/src/main/cpp/stockfish/bitbase.cpp b/DroidFishApp/src/main/cpp/stockfish/bitbase.cpp index 27bf409..84300ba 100644 --- a/DroidFishApp/src/main/cpp/stockfish/bitbase.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/bitbase.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/bitboard.cpp b/DroidFishApp/src/main/cpp/stockfish/bitboard.cpp index 6b84b51..fd0ba23 100644 --- a/DroidFishApp/src/main/cpp/stockfish/bitboard.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/bitboard.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/bitboard.h b/DroidFishApp/src/main/cpp/stockfish/bitboard.h index b29f3e2..2b6e2a6 100644 --- a/DroidFishApp/src/main/cpp/stockfish/bitboard.h +++ b/DroidFishApp/src/main/cpp/stockfish/bitboard.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/endgame.cpp b/DroidFishApp/src/main/cpp/stockfish/endgame.cpp index a44d3a1..e773e7a 100644 --- a/DroidFishApp/src/main/cpp/stockfish/endgame.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/endgame.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/endgame.h b/DroidFishApp/src/main/cpp/stockfish/endgame.h index 146111b..e79f696 100644 --- a/DroidFishApp/src/main/cpp/stockfish/endgame.h +++ b/DroidFishApp/src/main/cpp/stockfish/endgame.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/evaluate.cpp b/DroidFishApp/src/main/cpp/stockfish/evaluate.cpp index c83f0de..8bb42ce 100644 --- a/DroidFishApp/src/main/cpp/stockfish/evaluate.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/evaluate.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -61,7 +61,7 @@ namespace Stockfish { namespace Eval { bool useNNUE; - string eval_file_loaded = "None"; + string currentEvalFileName = "None"; /// NNUE::init() tries to load a NNUE network at startup time, or when the engine /// receives a UCI command "setoption name EvalFile value nn-[a-z0-9]{12}.nnue" @@ -78,6 +78,8 @@ namespace Eval { return; string eval_file = string(Options["EvalFile"]); + if (eval_file.empty()) + eval_file = EvalFileDefaultName; #if defined(DEFAULT_NNUE_DIRECTORY) #define stringify2(x) #x @@ -88,13 +90,13 @@ namespace Eval { #endif for (string directory : dirs) - if (eval_file_loaded != eval_file) + if (currentEvalFileName != eval_file) { if (directory != "") { ifstream stream(directory + eval_file, ios::binary); if (load_eval(eval_file, stream)) - eval_file_loaded = eval_file; + currentEvalFileName = eval_file; } if (directory == "" && eval_file == EvalFileDefaultName) @@ -106,30 +108,29 @@ namespace Eval { MemoryBuffer buffer(const_cast(reinterpret_cast(gEmbeddedNNUEData)), size_t(gEmbeddedNNUESize)); + (void) gEmbeddedNNUEEnd; // Silence warning on unused variable istream stream(&buffer); if (load_eval(eval_file, stream)) - eval_file_loaded = eval_file; + currentEvalFileName = eval_file; } } - if (eval_file_loaded != eval_file) - eval_file_loaded = ""; } /// NNUE::verify() verifies that the last net used was loaded successfully void NNUE::verify() { string eval_file = string(Options["EvalFile"]); + if (eval_file.empty()) + eval_file = EvalFileDefaultName; - if (useNNUE && eval_file_loaded != eval_file) + if (useNNUE && currentEvalFileName != eval_file) { - UCI::OptionsMap defaults; - UCI::init(defaults); string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available."; string msg2 = "The option is set to true, but the network file " + eval_file + " was not loaded successfully."; string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file."; - string msg4 = "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/" + string(defaults["EvalFile"]); + string msg4 = "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/" + std::string(EvalFileDefaultName); string msg5 = "The engine will be terminated now."; sync_cout << "info string ERROR: " << msg1 << sync_endl; @@ -192,17 +193,17 @@ using namespace Trace; namespace { // Threshold for lazy and space evaluation - constexpr Value LazyThreshold1 = Value(1565); - constexpr Value LazyThreshold2 = Value(1102); + constexpr Value LazyThreshold1 = Value(3631); + constexpr Value LazyThreshold2 = Value(2084); constexpr Value SpaceThreshold = Value(11551); // KingAttackWeights[PieceType] contains king attack weights by piece type - constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 81, 52, 44, 10 }; + constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 76, 46, 45, 14 }; // SafeCheck[PieceType][single/multiple] contains safe check bonus by piece type, // higher if multiple safe checks are possible for that piece type. constexpr int SafeCheck[][2] = { - {}, {}, {803, 1292}, {639, 974}, {1087, 1878}, {759, 1132} + {}, {}, {805, 1292}, {650, 984}, {1071, 1886}, {730, 1128} }; #define S(mg, eg) make_score(mg, eg) @@ -228,58 +229,58 @@ namespace { // BishopPawns[distance from edge] contains a file-dependent penalty for pawns on // squares of the same color as our bishop. constexpr Score BishopPawns[int(FILE_NB) / 2] = { - S(3, 8), S(3, 9), S(2, 8), S(3, 8) + S(3, 8), S(3, 9), S(2, 7), S(3, 7) }; // KingProtector[knight/bishop] contains penalty for each distance unit to own king - constexpr Score KingProtector[] = { S(8, 9), S(6, 9) }; + constexpr Score KingProtector[] = { S(9, 9), S(7, 9) }; // Outpost[knight/bishop] contains bonuses for each knight or bishop occupying a // pawn protected square on rank 4 to 6 which is also safe from a pawn attack. - constexpr Score Outpost[] = { S(57, 38), S(31, 24) }; + constexpr Score Outpost[] = { S(54, 34), S(31, 25) }; // PassedRank[Rank] contains a bonus according to the rank of a passed pawn constexpr Score PassedRank[RANK_NB] = { - S(0, 0), S(7, 27), S(16, 32), S(17, 40), S(64, 71), S(170, 174), S(278, 262) + S(0, 0), S(2, 38), S(15, 36), S(22, 50), S(64, 81), S(166, 184), S(284, 269) }; constexpr Score RookOnClosedFile = S(10, 5); - constexpr Score RookOnOpenFile[] = { S(19, 6), S(47, 26) }; + constexpr Score RookOnOpenFile[] = { S(18, 8), S(49, 26) }; // ThreatByMinor/ByRook[attacked PieceType] contains bonuses according to // which piece type attacks which one. Attacks on lesser pieces which are // pawn-defended are not considered. constexpr Score ThreatByMinor[PIECE_TYPE_NB] = { - S(0, 0), S(5, 32), S(55, 41), S(77, 56), S(89, 119), S(79, 162) + S(0, 0), S(6, 37), S(64, 50), S(82, 57), S(103, 130), S(81, 163) }; constexpr Score ThreatByRook[PIECE_TYPE_NB] = { - S(0, 0), S(3, 44), S(37, 68), S(42, 60), S(0, 39), S(58, 43) + S(0, 0), S(3, 44), S(36, 71), S(44, 59), S(0, 39), S(60, 39) }; constexpr Value CorneredBishop = Value(50); // Assorted bonuses and penalties - constexpr Score UncontestedOutpost = S( 1, 10); + constexpr Score UncontestedOutpost = S( 0, 10); constexpr Score BishopOnKingRing = S( 24, 0); constexpr Score BishopXRayPawns = S( 4, 5); constexpr Score FlankAttacks = S( 8, 0); - constexpr Score Hanging = S( 69, 36); + constexpr Score Hanging = S( 72, 40); constexpr Score KnightOnQueen = S( 16, 11); constexpr Score LongDiagonalBishop = S( 45, 0); constexpr Score MinorBehindPawn = S( 18, 3); - constexpr Score PassedFile = S( 11, 8); - constexpr Score PawnlessFlank = S( 17, 95); - constexpr Score ReachableOutpost = S( 31, 22); - constexpr Score RestrictedPiece = S( 7, 7); + constexpr Score PassedFile = S( 13, 8); + constexpr Score PawnlessFlank = S( 19, 97); + constexpr Score ReachableOutpost = S( 33, 19); + constexpr Score RestrictedPiece = S( 6, 7); constexpr Score RookOnKingRing = S( 16, 0); - constexpr Score SliderOnQueen = S( 60, 18); - constexpr Score ThreatByKing = S( 24, 89); + constexpr Score SliderOnQueen = S( 62, 21); + constexpr Score ThreatByKing = S( 24, 87); constexpr Score ThreatByPawnPush = S( 48, 39); - constexpr Score ThreatBySafePawn = S(173, 94); + constexpr Score ThreatBySafePawn = S(167, 99); constexpr Score TrappedRook = S( 55, 13); constexpr Score WeakQueenProtection = S( 14, 0); - constexpr Score WeakQueen = S( 56, 15); + constexpr Score WeakQueen = S( 57, 19); #undef S @@ -988,7 +989,9 @@ namespace { // Early exit if score is high auto lazy_skip = [&](Value lazyThreshold) { - return abs(mg_value(score) + eg_value(score)) / 2 > lazyThreshold + pos.non_pawn_material() / 64; + return abs(mg_value(score) + eg_value(score)) > lazyThreshold + + std::abs(pos.this_thread()->bestValue) * 5 / 4 + + pos.non_pawn_material() / 32; }; if (lazy_skip(LazyThreshold1)) @@ -1053,26 +1056,22 @@ make_v: if ( pos.piece_on(SQ_A1) == W_BISHOP && pos.piece_on(SQ_B2) == W_PAWN) - correction += !pos.empty(SQ_B3) ? -CorneredBishop * 4 - : -CorneredBishop * 3; + correction -= CorneredBishop; if ( pos.piece_on(SQ_H1) == W_BISHOP && pos.piece_on(SQ_G2) == W_PAWN) - correction += !pos.empty(SQ_G3) ? -CorneredBishop * 4 - : -CorneredBishop * 3; + correction -= CorneredBishop; if ( pos.piece_on(SQ_A8) == B_BISHOP && pos.piece_on(SQ_B7) == B_PAWN) - correction += !pos.empty(SQ_B6) ? CorneredBishop * 4 - : CorneredBishop * 3; + correction += CorneredBishop; if ( pos.piece_on(SQ_H8) == B_BISHOP && pos.piece_on(SQ_G7) == B_PAWN) - correction += !pos.empty(SQ_G6) ? CorneredBishop * 4 - : CorneredBishop * 3; + correction += CorneredBishop; - return pos.side_to_move() == WHITE ? Value(correction) - : -Value(correction); + return pos.side_to_move() == WHITE ? Value(3 * correction) + : -Value(3 * correction); } } // namespace Eval @@ -1084,38 +1083,37 @@ make_v: Value Eval::evaluate(const Position& pos) { Value v; + bool useClassical = false; - if (!Eval::useNNUE) - v = Evaluation(pos).value(); - else + // Deciding between classical and NNUE eval (~10 Elo): for high PSQ imbalance we use classical, + // but we switch to NNUE during long shuffling or with high material on the board. + if ( !useNNUE + || ((pos.this_thread()->depth > 9 || pos.count() > 7) && + abs(eg_value(pos.psq_score())) * 5 > (856 + pos.non_pawn_material() / 64) * (10 + pos.rule50_count()))) { - // Scale and shift NNUE for compatibility with search and classical evaluation - auto adjusted_NNUE = [&]() - { - int scale = 903 - + 32 * pos.count() - + 32 * pos.non_pawn_material() / 1024; + v = Evaluation(pos).value(); // classical + useClassical = abs(v) >= 297; + } - Value nnue = NNUE::evaluate(pos, true) * scale / 1024; + // If result of a classical evaluation is much lower than threshold fall back to NNUE + if (useNNUE && !useClassical) + { + Value nnue = NNUE::evaluate(pos, true); // NNUE + int scale = 1036 + 22 * pos.non_pawn_material() / 1024; + Color stm = pos.side_to_move(); + Value optimism = pos.this_thread()->optimism[stm]; + Value psq = (stm == WHITE ? 1 : -1) * eg_value(pos.psq_score()); + int complexity = 35 * abs(nnue - psq) / 256; - if (pos.is_chess960()) - nnue += fix_FRC(pos); + optimism = optimism * (44 + complexity) / 31; + v = (nnue + optimism) * scale / 1024 - optimism; - return nnue; - }; - - // If there is PSQ imbalance we use the classical eval, but we switch to - // NNUE eval faster when shuffling or if the material on the board is high. - int r50 = pos.rule50_count(); - Value psq = Value(abs(eg_value(pos.psq_score()))); - bool classical = psq * 5 > (750 + pos.non_pawn_material() / 64) * (5 + r50); - - v = classical ? Evaluation(pos).value() // classical - : adjusted_NNUE(); // NNUE + if (pos.is_chess960()) + v += fix_FRC(pos); } // Damp down the evaluation linearly when shuffling - v = v * (100 - pos.rule50_count()) / 100; + v = v * (195 - pos.rule50_count()) / 211; // Guarantee evaluation does not hit the tablebase range v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); @@ -1140,7 +1138,12 @@ std::string Eval::trace(Position& pos) { std::memset(scores, 0, sizeof(scores)); - pos.this_thread()->trend = SCORE_ZERO; // Reset any dynamic contempt + // Reset any global variable used in eval + pos.this_thread()->depth = 0; + pos.this_thread()->trend = SCORE_ZERO; + pos.this_thread()->bestValue = VALUE_ZERO; + pos.this_thread()->optimism[WHITE] = VALUE_ZERO; + pos.this_thread()->optimism[BLACK] = VALUE_ZERO; v = Evaluation(pos).value(); diff --git a/DroidFishApp/src/main/cpp/stockfish/evaluate.h b/DroidFishApp/src/main/cpp/stockfish/evaluate.h index 91da01d..1934c9b 100644 --- a/DroidFishApp/src/main/cpp/stockfish/evaluate.h +++ b/DroidFishApp/src/main/cpp/stockfish/evaluate.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -34,12 +34,12 @@ namespace Eval { Value evaluate(const Position& pos); extern bool useNNUE; - extern std::string eval_file_loaded; + extern std::string currentEvalFileName; // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue // for the build process (profile-build and fishtest) to work. Do not change the // name of the macro, as it is used in the Makefile. - #define EvalFileDefaultName "nn-3475407dc199.nnue" + #define EvalFileDefaultName "nn-6877cd24400e.nnue" namespace NNUE { diff --git a/DroidFishApp/src/main/cpp/stockfish/main.cpp b/DroidFishApp/src/main/cpp/stockfish/main.cpp index 62e0ed5..fad0ef8 100644 --- a/DroidFishApp/src/main/cpp/stockfish/main.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/main.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/material.cpp b/DroidFishApp/src/main/cpp/stockfish/material.cpp index 9d17af2..1567358 100644 --- a/DroidFishApp/src/main/cpp/stockfish/material.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/material.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/material.h b/DroidFishApp/src/main/cpp/stockfish/material.h index 26535a5..3ca169c 100644 --- a/DroidFishApp/src/main/cpp/stockfish/material.h +++ b/DroidFishApp/src/main/cpp/stockfish/material.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/misc.cpp b/DroidFishApp/src/main/cpp/stockfish/misc.cpp index 78227ee..178465c 100644 --- a/DroidFishApp/src/main/cpp/stockfish/misc.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/misc.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -36,6 +36,8 @@ typedef bool(*fun1_t)(LOGICAL_PROCESSOR_RELATIONSHIP, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD); typedef bool(*fun2_t)(USHORT, PGROUP_AFFINITY); typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY); +typedef bool(*fun4_t)(USHORT, PGROUP_AFFINITY, USHORT, PUSHORT); +typedef WORD(*fun5_t)(); } #endif @@ -67,7 +69,7 @@ namespace { /// Version number. If Version is left empty, then compile date in the format /// DD-MM-YY and show in engine_info. -const string Version = "14"; +const string Version = "15"; /// Our fancy logging facility. The trick here is to replace cin.rdbuf() and /// cout.rdbuf() with two Tie objects that tie cin and cout to a file stream. We @@ -110,7 +112,14 @@ public: static Logger l; - if (!fname.empty() && !l.file.is_open()) + if (l.file.is_open()) + { + cout.rdbuf(l.out.buf); + cin.rdbuf(l.in.buf); + l.file.close(); + } + + if (!fname.empty()) { l.file.open(fname, ifstream::out); @@ -123,12 +132,6 @@ public: cin.rdbuf(&l.in); cout.rdbuf(&l.out); } - else if (fname.empty() && l.file.is_open()) - { - cout.rdbuf(l.out.buf); - cin.rdbuf(l.in.buf); - l.file.close(); - } } }; @@ -378,6 +381,7 @@ void std_aligned_free(void* ptr) { static void* aligned_large_pages_alloc_windows(size_t allocSize) { #if !defined(_WIN64) + (void)allocSize; // suppress unused-parameter compiler warning return nullptr; #else @@ -493,11 +497,11 @@ void bindThisThread(size_t) {} #else -/// best_group() retrieves logical processor information using Windows specific -/// API and returns the best group id for the thread with index idx. Original +/// best_node() retrieves logical processor information using Windows specific +/// API and returns the best node id for the thread with index idx. Original /// code from Texel by Peter Ă–sterlund. -int best_group(size_t idx) { +int best_node(size_t idx) { int threads = 0; int nodes = 0; @@ -511,7 +515,8 @@ int best_group(size_t idx) { if (!fun1) return -1; - // First call to get returnLength. We expect it to fail due to null buffer + // First call to GetLogicalProcessorInformationEx() to get returnLength. + // We expect the call to fail due to null buffer. if (fun1(RelationAll, nullptr, &returnLength)) return -1; @@ -519,7 +524,7 @@ int best_group(size_t idx) { SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *buffer, *ptr; ptr = buffer = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)malloc(returnLength); - // Second call, now we expect to succeed + // Second call to GetLogicalProcessorInformationEx(), now we expect to succeed if (!fun1(RelationAll, buffer, &returnLength)) { free(buffer); @@ -569,22 +574,38 @@ int best_group(size_t idx) { void bindThisThread(size_t idx) { // Use only local variables to be thread-safe - int group = best_group(idx); + int node = best_node(idx); - if (group == -1) + if (node == -1) return; // Early exit if the needed API are not available at runtime HMODULE k32 = GetModuleHandle("Kernel32.dll"); auto fun2 = (fun2_t)(void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMaskEx"); auto fun3 = (fun3_t)(void(*)())GetProcAddress(k32, "SetThreadGroupAffinity"); + auto fun4 = (fun4_t)(void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMask2"); + auto fun5 = (fun5_t)(void(*)())GetProcAddress(k32, "GetMaximumProcessorGroupCount"); if (!fun2 || !fun3) return; - GROUP_AFFINITY affinity; - if (fun2(group, &affinity)) - fun3(GetCurrentThread(), &affinity, nullptr); + if (!fun4 || !fun5) + { + GROUP_AFFINITY affinity; + if (fun2(node, &affinity)) // GetNumaNodeProcessorMaskEx + fun3(GetCurrentThread(), &affinity, nullptr); // SetThreadGroupAffinity + } + else + { + // If a numa node has more than one processor group, we assume they are + // sized equal and we spread threads evenly across the groups. + USHORT elements, returnedElements; + elements = fun5(); // GetMaximumProcessorGroupCount + GROUP_AFFINITY *affinity = (GROUP_AFFINITY*)malloc(elements * sizeof(GROUP_AFFINITY)); + if (fun4(node, affinity, elements, &returnedElements)) // GetNumaNodeProcessorMask2 + fun3(GetCurrentThread(), &affinity[idx % returnedElements], nullptr); // SetThreadGroupAffinity + free(affinity); + } } #endif diff --git a/DroidFishApp/src/main/cpp/stockfish/misc.h b/DroidFishApp/src/main/cpp/stockfish/misc.h index dae37cd..2fd2b40 100644 --- a/DroidFishApp/src/main/cpp/stockfish/misc.h +++ b/DroidFishApp/src/main/cpp/stockfish/misc.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -85,19 +85,30 @@ static inline const union { uint32_t i; char c[4]; } Le = { 0x01020304 }; static inline const bool IsLittleEndian = (Le.c[0] == 4); -template -class ValueListInserter { -public: - ValueListInserter(T* v, std::size_t& s) : - values(v), - size(&s) - { - } +// RunningAverage : a class to calculate a running average of a series of values. +// For efficiency, all computations are done with integers. +class RunningAverage { + public: - void push_back(const T& value) { values[(*size)++] = value; } -private: - T* values; - std::size_t* size; + // Reset the running average to rational value p / q + void set(int64_t p, int64_t q) + { average = p * PERIOD * RESOLUTION / q; } + + // Update average with value v + void update(int64_t v) + { average = RESOLUTION * v + (PERIOD - 1) * average / PERIOD; } + + // Test if average is strictly greater than rational a / b + bool is_greater(int64_t a, int64_t b) const + { return b * average > a * (PERIOD * RESOLUTION); } + + int64_t value() const + { return average / (PERIOD * RESOLUTION); } + + private : + static constexpr int64_t PERIOD = 4096; + static constexpr int64_t RESOLUTION = 1024; + int64_t average; }; template @@ -113,7 +124,6 @@ public: const T& operator[](std::size_t index) const { return values_[index]; } const T* begin() const { return values_; } const T* end() const { return values_ + size_; } - operator ValueListInserter() { return ValueListInserter(values_, size_); } void swap(ValueList& other) { const std::size_t maxSize = std::max(size_, other.size_); @@ -128,6 +138,34 @@ private: std::size_t size_ = 0; }; + +/// sigmoid(t, x0, y0, C, P, Q) implements a sigmoid-like function using only integers, +/// with the following properties: +/// +/// - sigmoid is centered in (x0, y0) +/// - sigmoid has amplitude [-P/Q , P/Q] instead of [-1 , +1] +/// - limit is (y0 - P/Q) when t tends to -infinity +/// - limit is (y0 + P/Q) when t tends to +infinity +/// - the slope can be adjusted using C > 0, smaller C giving a steeper sigmoid +/// - the slope of the sigmoid when t = x0 is P/(Q*C) +/// - sigmoid is increasing with t when P > 0 and Q > 0 +/// - to get a decreasing sigmoid, change sign of P +/// - mean value of the sigmoid is y0 +/// +/// Use to draw the sigmoid + +inline int64_t sigmoid(int64_t t, int64_t x0, + int64_t y0, + int64_t C, + int64_t P, + int64_t Q) +{ + assert(C > 0); + assert(Q != 0); + return y0 + P * (t-x0) / (Q * (std::abs(t-x0) + C)) ; +} + + /// xorshift64star Pseudo-Random Number Generator /// This class is based on original code written and dedicated /// to the public domain by Sebastiano Vigna (2014). diff --git a/DroidFishApp/src/main/cpp/stockfish/movegen.cpp b/DroidFishApp/src/main/cpp/stockfish/movegen.cpp index 5f3ba90..c7a3c29 100644 --- a/DroidFishApp/src/main/cpp/stockfish/movegen.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/movegen.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -52,9 +52,9 @@ namespace { constexpr Direction UpRight = (Us == WHITE ? NORTH_EAST : SOUTH_WEST); constexpr Direction UpLeft = (Us == WHITE ? NORTH_WEST : SOUTH_EAST); - const Bitboard emptySquares = Type == QUIETS || Type == QUIET_CHECKS ? target : ~pos.pieces(); - const Bitboard enemies = Type == EVASIONS ? pos.checkers() - : Type == CAPTURES ? target : pos.pieces(Them); + const Bitboard emptySquares = ~pos.pieces(); + const Bitboard enemies = Type == EVASIONS ? pos.checkers() + : pos.pieces(Them); Bitboard pawnsOn7 = pos.pieces(Us, PAWN) & TRank7BB; Bitboard pawnsNotOn7 = pos.pieces(Us, PAWN) & ~TRank7BB; diff --git a/DroidFishApp/src/main/cpp/stockfish/movegen.h b/DroidFishApp/src/main/cpp/stockfish/movegen.h index 3f895f0..bbb35b3 100644 --- a/DroidFishApp/src/main/cpp/stockfish/movegen.h +++ b/DroidFishApp/src/main/cpp/stockfish/movegen.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/movepick.cpp b/DroidFishApp/src/main/cpp/stockfish/movepick.cpp index 4ff4cff..b0166c6 100644 --- a/DroidFishApp/src/main/cpp/stockfish/movepick.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/movepick.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,6 +18,7 @@ #include +#include "bitboard.h" #include "movepick.h" namespace Stockfish { @@ -56,11 +57,14 @@ namespace { /// ordering is at the current node. /// MovePicker constructor for the main search -MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHistory* mh, const LowPlyHistory* lp, - const CapturePieceToHistory* cph, const PieceToHistory** ch, Move cm, const Move* killers, int pl) - : pos(p), mainHistory(mh), lowPlyHistory(lp), captureHistory(cph), continuationHistory(ch), - ttMove(ttm), refutations{{killers[0], 0}, {killers[1], 0}, {cm, 0}}, depth(d), ply(pl) { - +MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHistory* mh, + const CapturePieceToHistory* cph, + const PieceToHistory** ch, + Move cm, + const Move* killers) + : pos(p), mainHistory(mh), captureHistory(cph), continuationHistory(ch), + ttMove(ttm), refutations{{killers[0], 0}, {killers[1], 0}, {cm, 0}}, depth(d) +{ assert(d > 0); stage = (pos.checkers() ? EVASION_TT : MAIN_TT) + @@ -69,9 +73,11 @@ MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHist /// MovePicker constructor for quiescence search MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHistory* mh, - const CapturePieceToHistory* cph, const PieceToHistory** ch, Square rs) - : pos(p), mainHistory(mh), captureHistory(cph), continuationHistory(ch), ttMove(ttm), recaptureSquare(rs), depth(d) { - + const CapturePieceToHistory* cph, + const PieceToHistory** ch, + Square rs) + : pos(p), mainHistory(mh), captureHistory(cph), continuationHistory(ch), ttMove(ttm), recaptureSquare(rs), depth(d) +{ assert(d <= 0); stage = (pos.checkers() ? EVASION_TT : QSEARCH_TT) + @@ -82,9 +88,9 @@ MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHist /// MovePicker constructor for ProbCut: we generate captures with SEE greater /// than or equal to the given threshold. -MovePicker::MovePicker(const Position& p, Move ttm, Value th, const CapturePieceToHistory* cph) - : pos(p), captureHistory(cph), ttMove(ttm), threshold(th) { - +MovePicker::MovePicker(const Position& p, Move ttm, Value th, Depth d, const CapturePieceToHistory* cph) + : pos(p), captureHistory(cph), ttMove(ttm), threshold(th), depth(d) +{ assert(!pos.checkers()); stage = PROBCUT_TT + !(ttm && pos.capture(ttm) @@ -100,10 +106,35 @@ void MovePicker::score() { static_assert(Type == CAPTURES || Type == QUIETS || Type == EVASIONS, "Wrong type"); + Bitboard threatened, threatenedByPawn, threatenedByMinor, threatenedByRook; + if constexpr (Type == QUIETS) + { + Color us = pos.side_to_move(); + // squares threatened by pawns + threatenedByPawn = pos.attacks_by(~us); + // squares threatened by minors or pawns + threatenedByMinor = pos.attacks_by(~us) | pos.attacks_by(~us) | threatenedByPawn; + // squares threatened by rooks, minors or pawns + threatenedByRook = pos.attacks_by(~us) | threatenedByMinor; + + // pieces threatened by pieces of lesser material value + threatened = (pos.pieces(us, QUEEN) & threatenedByRook) + | (pos.pieces(us, ROOK) & threatenedByMinor) + | (pos.pieces(us, KNIGHT, BISHOP) & threatenedByPawn); + } + else + { + // Silence unused variable warnings + (void) threatened; + (void) threatenedByPawn; + (void) threatenedByMinor; + (void) threatenedByRook; + } + for (auto& m : *this) if constexpr (Type == CAPTURES) - m.value = int(PieceValue[MG][pos.piece_on(to_sq(m))]) * 6 - + (*captureHistory)[pos.moved_piece(m)][to_sq(m)][type_of(pos.piece_on(to_sq(m)))]; + m.value = 6 * int(PieceValue[MG][pos.piece_on(to_sq(m))]) + + (*captureHistory)[pos.moved_piece(m)][to_sq(m)][type_of(pos.piece_on(to_sq(m)))]; else if constexpr (Type == QUIETS) m.value = (*mainHistory)[pos.side_to_move()][from_to(m)] @@ -111,7 +142,12 @@ void MovePicker::score() { + (*continuationHistory[1])[pos.moved_piece(m)][to_sq(m)] + (*continuationHistory[3])[pos.moved_piece(m)][to_sq(m)] + (*continuationHistory[5])[pos.moved_piece(m)][to_sq(m)] - + (ply < MAX_LPH ? std::min(4, depth / 3) * (*lowPlyHistory)[ply][from_to(m)] : 0); + + (threatened & from_sq(m) ? + (type_of(pos.moved_piece(m)) == QUEEN && !(to_sq(m) & threatenedByRook) ? 50000 + : type_of(pos.moved_piece(m)) == ROOK && !(to_sq(m) & threatenedByMinor) ? 25000 + : !(to_sq(m) & threatenedByPawn) ? 15000 + : 0) + : 0); else // Type == EVASIONS { @@ -165,11 +201,12 @@ top: endMoves = generate(pos, cur); score(); + partial_insertion_sort(cur, endMoves, -3000 * depth); ++stage; goto top; case GOOD_CAPTURE: - if (select([&](){ + if (select([&](){ return pos.see_ge(*cur, Value(-69 * cur->value / 1024)) ? // Move losing capture to endBadCaptures to be tried later true : (*endBadCaptures++ = *cur, false); })) @@ -237,10 +274,10 @@ top: return select([](){ return true; }); case PROBCUT: - return select([&](){ return pos.see_ge(*cur, threshold); }); + return select([&](){ return pos.see_ge(*cur, threshold); }); case QCAPTURE: - if (select([&](){ return depth > DEPTH_QS_RECAPTURES + if (select([&](){ return depth > DEPTH_QS_RECAPTURES || to_sq(*cur) == recaptureSquare; })) return *(cur - 1); diff --git a/DroidFishApp/src/main/cpp/stockfish/movepick.h b/DroidFishApp/src/main/cpp/stockfish/movepick.h index c76d495..9a3c279 100644 --- a/DroidFishApp/src/main/cpp/stockfish/movepick.h +++ b/DroidFishApp/src/main/cpp/stockfish/movepick.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -86,13 +86,7 @@ enum StatsType { NoCaptures, Captures }; /// unsuccessful during the current search, and is used for reduction and move /// ordering decisions. It uses 2 tables (one for each color) indexed by /// the move's from and to squares, see www.chessprogramming.org/Butterfly_Boards -typedef Stats ButterflyHistory; - -/// At higher depths LowPlyHistory records successful quiet moves near the root -/// and quiet moves which are/were in the PV (ttPv). It is cleared with each new -/// search and filled during iterative deepening. -constexpr int MAX_LPH = 4; -typedef Stats LowPlyHistory; +typedef Stats ButterflyHistory; /// CounterMoveHistory stores counter moves indexed by [piece][to] of the previous /// move, see www.chessprogramming.org/Countermove_Heuristic @@ -123,18 +117,16 @@ class MovePicker { public: MovePicker(const MovePicker&) = delete; MovePicker& operator=(const MovePicker&) = delete; - MovePicker(const Position&, Move, Value, const CapturePieceToHistory*); + MovePicker(const Position&, Move, Depth, const ButterflyHistory*, + const CapturePieceToHistory*, + const PieceToHistory**, + Move, + const Move*); MovePicker(const Position&, Move, Depth, const ButterflyHistory*, const CapturePieceToHistory*, const PieceToHistory**, Square); - MovePicker(const Position&, Move, Depth, const ButterflyHistory*, - const LowPlyHistory*, - const CapturePieceToHistory*, - const PieceToHistory**, - Move, - const Move*, - int); + MovePicker(const Position&, Move, Value, Depth, const CapturePieceToHistory*); Move next_move(bool skipQuiets = false); private: @@ -145,7 +137,6 @@ private: const Position& pos; const ButterflyHistory* mainHistory; - const LowPlyHistory* lowPlyHistory; const CapturePieceToHistory* captureHistory; const PieceToHistory** continuationHistory; Move ttMove; @@ -154,7 +145,6 @@ private: Square recaptureSquare; Value threshold; Depth depth; - int ply; ExtMove moves[MAX_MOVES]; }; diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.cpp b/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.cpp index 8828ae5..9ee599f 100644 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -109,7 +109,7 @@ namespace Stockfish::Eval::NNUE { { write_little_endian(stream, Version); write_little_endian(stream, hashValue); - write_little_endian(stream, desc.size()); + write_little_endian(stream, (std::uint32_t)desc.size()); stream.write(&desc[0], desc.size()); return !stream.fail(); } @@ -143,39 +143,29 @@ namespace Stockfish::Eval::NNUE { // overaligning stack variables with alignas() doesn't work correctly. constexpr uint64_t alignment = CacheLineSize; + int delta = 10 - pos.non_pawn_material() / 1515; #if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN) TransformedFeatureType transformedFeaturesUnaligned[ FeatureTransformer::BufferSize + alignment / sizeof(TransformedFeatureType)]; - char bufferUnaligned[Network::BufferSize + alignment]; auto* transformedFeatures = align_ptr_up(&transformedFeaturesUnaligned[0]); - auto* buffer = align_ptr_up(&bufferUnaligned[0]); #else alignas(alignment) TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize]; - alignas(alignment) char buffer[Network::BufferSize]; #endif ASSERT_ALIGNED(transformedFeatures, alignment); - ASSERT_ALIGNED(buffer, alignment); - const std::size_t bucket = (pos.count() - 1) / 4; + const int bucket = (pos.count() - 1) / 4; const auto psqt = featureTransformer->transform(pos, transformedFeatures, bucket); - const auto output = network[bucket]->propagate(transformedFeatures, buffer); + const auto positional = network[bucket]->propagate(transformedFeatures); - int materialist = psqt; - int positional = output[0]; - - int delta_npm = abs(pos.non_pawn_material(WHITE) - pos.non_pawn_material(BLACK)); - int entertainment = (adjusted && delta_npm <= BishopValueMg - KnightValueMg ? 7 : 0); - - int A = 128 - entertainment; - int B = 128 + entertainment; - - int sum = (A * materialist + B * positional) / 128; - - return static_cast( sum / OutputScale ); + // Give more value to positional evaluation when adjusted flag is set + if (adjusted) + return static_cast(((128 - delta) * psqt + (128 + delta) * positional) / 128 / OutputScale); + else + return static_cast((psqt + positional) / OutputScale); } struct NnueEvalTrace { @@ -196,27 +186,20 @@ namespace Stockfish::Eval::NNUE { #if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN) TransformedFeatureType transformedFeaturesUnaligned[ FeatureTransformer::BufferSize + alignment / sizeof(TransformedFeatureType)]; - char bufferUnaligned[Network::BufferSize + alignment]; auto* transformedFeatures = align_ptr_up(&transformedFeaturesUnaligned[0]); - auto* buffer = align_ptr_up(&bufferUnaligned[0]); #else alignas(alignment) TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize]; - alignas(alignment) char buffer[Network::BufferSize]; #endif ASSERT_ALIGNED(transformedFeatures, alignment); - ASSERT_ALIGNED(buffer, alignment); NnueEvalTrace t{}; t.correctBucket = (pos.count() - 1) / 4; - for (std::size_t bucket = 0; bucket < LayerStacks; ++bucket) { - const auto psqt = featureTransformer->transform(pos, transformedFeatures, bucket); - const auto output = network[bucket]->propagate(transformedFeatures, buffer); - - int materialist = psqt; - int positional = output[0]; + for (IndexType bucket = 0; bucket < LayerStacks; ++bucket) { + const auto materialist = featureTransformer->transform(pos, transformedFeatures, bucket); + const auto positional = network[bucket]->propagate(transformedFeatures); t.psqt[bucket] = static_cast( materialist / OutputScale ); t.positional[bucket] = static_cast( positional / OutputScale ); @@ -227,69 +210,46 @@ namespace Stockfish::Eval::NNUE { static const std::string PieceToChar(" PNBRQK pnbrqk"); - // Requires the buffer to have capacity for at least 5 values + + // format_cp_compact() converts a Value into (centi)pawns and writes it in a buffer. + // The buffer must have capacity for at least 5 chars. static void format_cp_compact(Value v, char* buffer) { buffer[0] = (v < 0 ? '-' : v > 0 ? '+' : ' '); int cp = std::abs(100 * v / PawnValueEg); - if (cp >= 10000) { - buffer[1] = '0' + cp / 10000; cp %= 10000; - buffer[2] = '0' + cp / 1000; cp %= 1000; - buffer[3] = '0' + cp / 100; cp %= 100; - buffer[4] = ' '; + buffer[1] = '0' + cp / 10000; cp %= 10000; + buffer[2] = '0' + cp / 1000; cp %= 1000; + buffer[3] = '0' + cp / 100; + buffer[4] = ' '; } else if (cp >= 1000) { - buffer[1] = '0' + cp / 1000; cp %= 1000; - buffer[2] = '0' + cp / 100; cp %= 100; - buffer[3] = '.'; - buffer[4] = '0' + cp / 10; + buffer[1] = '0' + cp / 1000; cp %= 1000; + buffer[2] = '0' + cp / 100; cp %= 100; + buffer[3] = '.'; + buffer[4] = '0' + cp / 10; } else { - buffer[1] = '0' + cp / 100; cp %= 100; - buffer[2] = '.'; - buffer[3] = '0' + cp / 10; cp %= 10; - buffer[4] = '0' + cp / 1; + buffer[1] = '0' + cp / 100; cp %= 100; + buffer[2] = '.'; + buffer[3] = '0' + cp / 10; cp %= 10; + buffer[4] = '0' + cp / 1; } } - // Requires the buffer to have capacity for at least 7 values + + // format_cp_aligned_dot() converts a Value into (centi)pawns and writes it in a buffer, + // always keeping two decimals. The buffer must have capacity for at least 7 chars. static void format_cp_aligned_dot(Value v, char* buffer) { + buffer[0] = (v < 0 ? '-' : v > 0 ? '+' : ' '); - int cp = std::abs(100 * v / PawnValueEg); - - if (cp >= 10000) - { - buffer[1] = '0' + cp / 10000; cp %= 10000; - buffer[2] = '0' + cp / 1000; cp %= 1000; - buffer[3] = '0' + cp / 100; cp %= 100; - buffer[4] = '.'; - buffer[5] = '0' + cp / 10; cp %= 10; - buffer[6] = '0' + cp; - } - else if (cp >= 1000) - { - buffer[1] = ' '; - buffer[2] = '0' + cp / 1000; cp %= 1000; - buffer[3] = '0' + cp / 100; cp %= 100; - buffer[4] = '.'; - buffer[5] = '0' + cp / 10; cp %= 10; - buffer[6] = '0' + cp; - } - else - { - buffer[1] = ' '; - buffer[2] = ' '; - buffer[3] = '0' + cp / 100; cp %= 100; - buffer[4] = '.'; - buffer[5] = '0' + cp / 10; cp %= 10; - buffer[6] = '0' + cp / 1; - } + double cp = 1.0 * std::abs(int(v)) / PawnValueEg; + sprintf(&buffer[1], "%6.2f", cp); } @@ -419,7 +379,7 @@ namespace Stockfish::Eval::NNUE { actualFilename = filename.value(); else { - if (eval_file_loaded != EvalFileDefaultName) + if (currentEvalFileName != EvalFileDefaultName) { msg = "Failed to export a net. A non-embedded net can only be saved if the filename is specified"; diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.h b/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.h index c7fa4a9..2e4f1f5 100644 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.h +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2.cpp b/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2_hm.cpp similarity index 58% rename from DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2.cpp rename to DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2_hm.cpp index 57f43e5..07a1d7a 100644 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2_hm.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,31 +16,32 @@ along with this program. If not, see . */ -//Definition of input features HalfKAv2 of NNUE evaluation function +//Definition of input features HalfKAv2_hm of NNUE evaluation function -#include "half_ka_v2.h" +#include "half_ka_v2_hm.h" #include "../../position.h" namespace Stockfish::Eval::NNUE::Features { // Orient a square according to perspective (rotates by 180 for black) - inline Square HalfKAv2::orient(Color perspective, Square s) { - return Square(int(s) ^ (bool(perspective) * 56)); + inline Square HalfKAv2_hm::orient(Color perspective, Square s, Square ksq) { + return Square(int(s) ^ (bool(perspective) * SQ_A8) ^ ((file_of(ksq) < FILE_E) * SQ_H1)); } // Index of a feature for a given king position and another piece on some square - inline IndexType HalfKAv2::make_index(Color perspective, Square s, Piece pc, Square ksq) { - return IndexType(orient(perspective, s) + PieceSquareIndex[perspective][pc] + PS_NB * ksq); + inline IndexType HalfKAv2_hm::make_index(Color perspective, Square s, Piece pc, Square ksq) { + Square o_ksq = orient(perspective, ksq, ksq); + return IndexType(orient(perspective, s, ksq) + PieceSquareIndex[perspective][pc] + PS_NB * KingBuckets[o_ksq]); } // Get a list of indices for active features - void HalfKAv2::append_active_indices( + void HalfKAv2_hm::append_active_indices( const Position& pos, Color perspective, - ValueListInserter active + IndexList& active ) { - Square ksq = orient(perspective, pos.square(perspective)); + Square ksq = pos.square(perspective); Bitboard bb = pos.pieces(); while (bb) { @@ -52,33 +53,30 @@ namespace Stockfish::Eval::NNUE::Features { // append_changed_indices() : get a list of indices for recently changed features - void HalfKAv2::append_changed_indices( + void HalfKAv2_hm::append_changed_indices( Square ksq, - StateInfo* st, + const DirtyPiece& dp, Color perspective, - ValueListInserter removed, - ValueListInserter added + IndexList& removed, + IndexList& added ) { - const auto& dp = st->dirtyPiece; - Square oriented_ksq = orient(perspective, ksq); for (int i = 0; i < dp.dirty_num; ++i) { - Piece pc = dp.piece[i]; if (dp.from[i] != SQ_NONE) - removed.push_back(make_index(perspective, dp.from[i], pc, oriented_ksq)); + removed.push_back(make_index(perspective, dp.from[i], dp.piece[i], ksq)); if (dp.to[i] != SQ_NONE) - added.push_back(make_index(perspective, dp.to[i], pc, oriented_ksq)); + added.push_back(make_index(perspective, dp.to[i], dp.piece[i], ksq)); } } - int HalfKAv2::update_cost(StateInfo* st) { + int HalfKAv2_hm::update_cost(const StateInfo* st) { return st->dirtyPiece.dirty_num; } - int HalfKAv2::refresh_cost(const Position& pos) { + int HalfKAv2_hm::refresh_cost(const Position& pos) { return pos.count(); } - bool HalfKAv2::requires_refresh(StateInfo* st, Color perspective) { + bool HalfKAv2_hm::requires_refresh(const StateInfo* st, Color perspective) { return st->dirtyPiece.piece[0] == make_piece(perspective, KING); } diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2.h b/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2_hm.h similarity index 71% rename from DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2.h rename to DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2_hm.h index e4b2edd..1e6da0b 100644 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2.h +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2_hm.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,8 +18,8 @@ //Definition of input features HalfKP of NNUE evaluation function -#ifndef NNUE_FEATURES_HALF_KA_V2_H_INCLUDED -#define NNUE_FEATURES_HALF_KA_V2_H_INCLUDED +#ifndef NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED +#define NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED #include "../nnue_common.h" @@ -32,9 +32,9 @@ namespace Stockfish { namespace Stockfish::Eval::NNUE::Features { - // Feature HalfKAv2: Combination of the position of own king - // and the position of pieces - class HalfKAv2 { + // Feature HalfKAv2_hm: Combination of the position of own king + // and the position of pieces. Position mirrored such that king always on e..h files. + class HalfKAv2_hm { // unique number for each piece type on each square enum { @@ -50,7 +50,7 @@ namespace Stockfish::Eval::NNUE::Features { PS_W_QUEEN = 8 * SQUARE_NB, PS_B_QUEEN = 9 * SQUARE_NB, PS_KING = 10 * SQUARE_NB, - PS_NB = 11 * SQUARE_NB + PS_NB = 11 * SQUARE_NB }; static constexpr IndexType PieceSquareIndex[COLOR_NB][PIECE_NB] = { @@ -63,49 +63,62 @@ namespace Stockfish::Eval::NNUE::Features { }; // Orient a square according to perspective (rotates by 180 for black) - static Square orient(Color perspective, Square s); + static Square orient(Color perspective, Square s, Square ksq); // Index of a feature for a given king position and another piece on some square static IndexType make_index(Color perspective, Square s, Piece pc, Square ksq); public: // Feature name - static constexpr const char* Name = "HalfKAv2(Friend)"; + static constexpr const char* Name = "HalfKAv2_hm(Friend)"; // Hash value embedded in the evaluation file - static constexpr std::uint32_t HashValue = 0x5f234cb8u; + static constexpr std::uint32_t HashValue = 0x7f234cb8u; // Number of feature dimensions static constexpr IndexType Dimensions = - static_cast(SQUARE_NB) * static_cast(PS_NB); + static_cast(SQUARE_NB) * static_cast(PS_NB) / 2; + + static constexpr int KingBuckets[64] = { + -1, -1, -1, -1, 31, 30, 29, 28, + -1, -1, -1, -1, 27, 26, 25, 24, + -1, -1, -1, -1, 23, 22, 21, 20, + -1, -1, -1, -1, 19, 18, 17, 16, + -1, -1, -1, -1, 15, 14, 13, 12, + -1, -1, -1, -1, 11, 10, 9, 8, + -1, -1, -1, -1, 7, 6, 5, 4, + -1, -1, -1, -1, 3, 2, 1, 0 + }; // Maximum number of simultaneously active features. static constexpr IndexType MaxActiveDimensions = 32; + using IndexList = ValueList; // Get a list of indices for active features static void append_active_indices( const Position& pos, Color perspective, - ValueListInserter active); + IndexList& active); // Get a list of indices for recently changed features static void append_changed_indices( Square ksq, - StateInfo* st, + const DirtyPiece& dp, Color perspective, - ValueListInserter removed, - ValueListInserter added); + IndexList& removed, + IndexList& added + ); // Returns the cost of updating one perspective, the most costly one. // Assumes no refresh needed. - static int update_cost(StateInfo* st); + static int update_cost(const StateInfo* st); static int refresh_cost(const Position& pos); // Returns whether the change stored in this StateInfo means that // a full accumulator refresh is required. - static bool requires_refresh(StateInfo* st, Color perspective); + static bool requires_refresh(const StateInfo* st, Color perspective); }; } // namespace Stockfish::Eval::NNUE::Features -#endif // #ifndef NNUE_FEATURES_HALF_KA_V2_H_INCLUDED +#endif // #ifndef NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/affine_transform.h b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/affine_transform.h index 9a3b778..9a99260 100644 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/affine_transform.h +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/affine_transform.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,398 +22,338 @@ #define NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED #include +#include +#include #include "../nnue_common.h" +#include "../../simd.h" + +/* + This file contains the definition for a fully connected layer (aka affine transform). + Two approaches are employed, depending on the sizes of the transform. + + Approach 1: + - used when the PaddedInputDimensions >= 128 + - uses AVX512 if possible + - processes inputs in batches of 2*InputSimdWidth + - so in batches of 128 for AVX512 + - the weight blocks of size InputSimdWidth are transposed such that + access is sequential + - N columns of the weight matrix are processed a time, where N + depends on the architecture (the amount of registers) + - accumulate + hadd is used + + Approach 2: + - used when the PaddedInputDimensions < 128 + - does not use AVX512 + - expected use-case is for when PaddedInputDimensions == 32 and InputDimensions <= 32. + - that's why AVX512 is hard to implement + - expected use-case is small layers + - not optimized as well as the approach 1 + - inputs are processed in chunks of 4, weights are respectively transposed + - accumulation happens directly to int32s +*/ namespace Stockfish::Eval::NNUE::Layers { - // Affine transformation layer - template - class AffineTransform { - public: - // Input/output type - using InputType = typename PreviousLayer::OutputType; - using OutputType = std::int32_t; - static_assert(std::is_same::value, ""); +// Fallback implementation for older/other architectures. +// Identical for both approaches. Requires the input to be padded to at least 16 values. +#if !defined(USE_SSSE3) + template + static void affine_transform_non_ssse3(std::int32_t* output, const std::int8_t* weights, const std::int32_t* biases, const std::uint8_t* input) + { +# if defined(USE_SSE2) + // At least a multiple of 16, with SSE2. + constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 16) / 16; + const __m128i Zeros = _mm_setzero_si128(); + const auto inputVector = reinterpret_cast(input); - // Number of input/output dimensions - static constexpr IndexType InputDimensions = - PreviousLayer::OutputDimensions; - static constexpr IndexType OutputDimensions = OutDims; - static constexpr IndexType PaddedInputDimensions = - ceil_to_multiple(InputDimensions, MaxSimdWidth); -#if defined (USE_AVX512) - static constexpr const IndexType OutputSimdWidth = SimdWidth / 2; -#elif defined (USE_SSSE3) - static constexpr const IndexType OutputSimdWidth = SimdWidth / 4; +# elif defined(USE_MMX) + constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 8) / 8; + const __m64 Zeros = _mm_setzero_si64(); + const auto inputVector = reinterpret_cast(input); + +# elif defined(USE_NEON) + constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 16) / 16; + const auto inputVector = reinterpret_cast(input); +# endif + + for (IndexType i = 0; i < OutputDimensions; ++i) { + const IndexType offset = i * PaddedInputDimensions; + +# if defined(USE_SSE2) + __m128i sumLo = _mm_cvtsi32_si128(biases[i]); + __m128i sumHi = Zeros; + const auto row = reinterpret_cast(&weights[offset]); + for (IndexType j = 0; j < NumChunks; ++j) { + __m128i row_j = _mm_load_si128(&row[j]); + __m128i input_j = _mm_load_si128(&inputVector[j]); + __m128i extendedRowLo = _mm_srai_epi16(_mm_unpacklo_epi8(row_j, row_j), 8); + __m128i extendedRowHi = _mm_srai_epi16(_mm_unpackhi_epi8(row_j, row_j), 8); + __m128i extendedInputLo = _mm_unpacklo_epi8(input_j, Zeros); + __m128i extendedInputHi = _mm_unpackhi_epi8(input_j, Zeros); + __m128i productLo = _mm_madd_epi16(extendedRowLo, extendedInputLo); + __m128i productHi = _mm_madd_epi16(extendedRowHi, extendedInputHi); + sumLo = _mm_add_epi32(sumLo, productLo); + sumHi = _mm_add_epi32(sumHi, productHi); + } + __m128i sum = _mm_add_epi32(sumLo, sumHi); + __m128i sumHigh_64 = _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2)); + sum = _mm_add_epi32(sum, sumHigh_64); + __m128i sum_second_32 = _mm_shufflelo_epi16(sum, _MM_SHUFFLE(1, 0, 3, 2)); + sum = _mm_add_epi32(sum, sum_second_32); + output[i] = _mm_cvtsi128_si32(sum); + +# elif defined(USE_MMX) + __m64 sumLo = _mm_cvtsi32_si64(biases[i]); + __m64 sumHi = Zeros; + const auto row = reinterpret_cast(&weights[offset]); + for (IndexType j = 0; j < NumChunks; ++j) { + __m64 row_j = row[j]; + __m64 input_j = inputVector[j]; + __m64 extendedRowLo = _mm_srai_pi16(_mm_unpacklo_pi8(row_j, row_j), 8); + __m64 extendedRowHi = _mm_srai_pi16(_mm_unpackhi_pi8(row_j, row_j), 8); + __m64 extendedInputLo = _mm_unpacklo_pi8(input_j, Zeros); + __m64 extendedInputHi = _mm_unpackhi_pi8(input_j, Zeros); + __m64 productLo = _mm_madd_pi16(extendedRowLo, extendedInputLo); + __m64 productHi = _mm_madd_pi16(extendedRowHi, extendedInputHi); + sumLo = _mm_add_pi32(sumLo, productLo); + sumHi = _mm_add_pi32(sumHi, productHi); + } + __m64 sum = _mm_add_pi32(sumLo, sumHi); + sum = _mm_add_pi32(sum, _mm_unpackhi_pi32(sum, sum)); + output[i] = _mm_cvtsi64_si32(sum); + +# elif defined(USE_NEON) + int32x4_t sum = {biases[i]}; + const auto row = reinterpret_cast(&weights[offset]); + for (IndexType j = 0; j < NumChunks; ++j) { + int16x8_t product = vmull_s8(inputVector[j * 2], row[j * 2]); + product = vmlal_s8(product, inputVector[j * 2 + 1], row[j * 2 + 1]); + sum = vpadalq_s16(sum, product); + } + output[i] = sum[0] + sum[1] + sum[2] + sum[3]; + +# else + std::int32_t sum = biases[i]; + for (IndexType j = 0; j < InputDimensions; ++j) { + sum += weights[offset + j] * input[j]; + } + output[i] = sum; +# endif + } + +# if defined(USE_MMX) + _mm_empty(); +# endif + } #endif - // Size of forward propagation buffer used in this layer - static constexpr std::size_t SelfBufferSize = - ceil_to_multiple(OutputDimensions * sizeof(OutputType), CacheLineSize); + template + class AffineTransform; - // Size of the forward propagation buffer used from the input layer to this layer - static constexpr std::size_t BufferSize = - PreviousLayer::BufferSize + SelfBufferSize; + // A specialization for large inputs. + template + class AffineTransform(InDims, MaxSimdWidth) >= 2*64)>> { + public: + // Input/output type + using InputType = std::uint8_t; + using OutputType = std::int32_t; + + // Number of input/output dimensions + static constexpr IndexType InputDimensions = InDims; + static constexpr IndexType OutputDimensions = OutDims; + + static constexpr IndexType PaddedInputDimensions = + ceil_to_multiple(InputDimensions, MaxSimdWidth); + static constexpr IndexType PaddedOutputDimensions = + ceil_to_multiple(OutputDimensions, MaxSimdWidth); + + using OutputBuffer = OutputType[PaddedOutputDimensions]; + + static_assert(PaddedInputDimensions >= 128, "Something went wrong. This specialization should not have been chosen."); + +#if defined (USE_AVX512) + static constexpr const IndexType InputSimdWidth = 64; + static constexpr const IndexType MaxNumOutputRegs = 16; +#elif defined (USE_AVX2) + static constexpr const IndexType InputSimdWidth = 32; + static constexpr const IndexType MaxNumOutputRegs = 8; +#elif defined (USE_SSSE3) + static constexpr const IndexType InputSimdWidth = 16; + static constexpr const IndexType MaxNumOutputRegs = 8; +#elif defined (USE_NEON) + static constexpr const IndexType InputSimdWidth = 8; + static constexpr const IndexType MaxNumOutputRegs = 8; +#else + // The fallback implementation will not have permuted weights. + // We define these to avoid a lot of ifdefs later. + static constexpr const IndexType InputSimdWidth = 1; + static constexpr const IndexType MaxNumOutputRegs = 1; +#endif + + // A big block is a region in the weight matrix of the size [PaddedInputDimensions, NumOutputRegs]. + // A small block is a region of size [InputSimdWidth, 1] + + static constexpr const IndexType NumOutputRegs = std::min(MaxNumOutputRegs, OutputDimensions); + static constexpr const IndexType SmallBlockSize = InputSimdWidth; + static constexpr const IndexType BigBlockSize = NumOutputRegs * PaddedInputDimensions; + static constexpr const IndexType NumSmallBlocksInBigBlock = BigBlockSize / SmallBlockSize; + static constexpr const IndexType NumSmallBlocksPerOutput = PaddedInputDimensions / SmallBlockSize; + static constexpr const IndexType NumBigBlocks = OutputDimensions / NumOutputRegs; + + static_assert(OutputDimensions % NumOutputRegs == 0); // Hash value embedded in the evaluation file - static constexpr std::uint32_t get_hash_value() { + static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) { std::uint32_t hashValue = 0xCC03DAE4u; hashValue += OutputDimensions; - hashValue ^= PreviousLayer::get_hash_value() >> 1; - hashValue ^= PreviousLayer::get_hash_value() << 31; + hashValue ^= prevHash >> 1; + hashValue ^= prevHash << 31; return hashValue; } + /* + Transposes the small blocks within a block. + Effectively means that weights can be traversed sequentially during inference. + */ + static IndexType get_weight_index(IndexType i) + { + const IndexType smallBlock = (i / SmallBlockSize) % NumSmallBlocksInBigBlock; + const IndexType smallBlockCol = smallBlock / NumSmallBlocksPerOutput; + const IndexType smallBlockRow = smallBlock % NumSmallBlocksPerOutput; + const IndexType bigBlock = i / BigBlockSize; + const IndexType rest = i % SmallBlockSize; + + const IndexType idx = + bigBlock * BigBlockSize + + smallBlockRow * SmallBlockSize * NumOutputRegs + + smallBlockCol * SmallBlockSize + + rest; + + return idx; + } + // Read network parameters bool read_parameters(std::istream& stream) { - if (!previousLayer.read_parameters(stream)) return false; - for (std::size_t i = 0; i < OutputDimensions; ++i) + for (IndexType i = 0; i < OutputDimensions; ++i) biases[i] = read_little_endian(stream); - for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) -#if !defined (USE_SSSE3) - weights[i] = read_little_endian(stream); -#else - weights[ - (i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 + - i / PaddedInputDimensions * 4 + - i % 4 - ] = read_little_endian(stream); -#endif + + for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) + weights[get_weight_index(i)] = read_little_endian(stream); return !stream.fail(); } // Write network parameters bool write_parameters(std::ostream& stream) const { - if (!previousLayer.write_parameters(stream)) return false; - for (std::size_t i = 0; i < OutputDimensions; ++i) + for (IndexType i = 0; i < OutputDimensions; ++i) write_little_endian(stream, biases[i]); -#if !defined (USE_SSSE3) - for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) - write_little_endian(stream, weights[i]); -#else - std::unique_ptr unscrambledWeights = std::make_unique(OutputDimensions * PaddedInputDimensions); - for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) { - unscrambledWeights[i] = - weights[ - (i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 + - i / PaddedInputDimensions * 4 + - i % 4 - ]; - } - for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) - write_little_endian(stream, unscrambledWeights[i]); -#endif + for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) + write_little_endian(stream, weights[get_weight_index(i)]); return !stream.fail(); } // Forward propagation const OutputType* propagate( - const TransformedFeatureType* transformedFeatures, char* buffer) const { - const auto input = previousLayer.propagate( - transformedFeatures, buffer + SelfBufferSize); + const InputType* input, OutputType* output) const { #if defined (USE_AVX512) - - [[maybe_unused]] const __m512i Ones512 = _mm512_set1_epi16(1); - - [[maybe_unused]] auto m512_hadd = [](__m512i sum, int bias) -> int { - return _mm512_reduce_add_epi32(sum) + bias; - }; - - [[maybe_unused]] auto m512_add_dpbusd_epi32 = [=](__m512i& acc, __m512i a, __m512i b) { -#if defined (USE_VNNI) - acc = _mm512_dpbusd_epi32(acc, a, b); -#else - __m512i product0 = _mm512_maddubs_epi16(a, b); - product0 = _mm512_madd_epi16(product0, Ones512); - acc = _mm512_add_epi32(acc, product0); -#endif - }; - - [[maybe_unused]] auto m512_add_dpbusd_epi32x4 = [=](__m512i& acc, __m512i a0, __m512i b0, __m512i a1, __m512i b1, - __m512i a2, __m512i b2, __m512i a3, __m512i b3) { -#if defined (USE_VNNI) - acc = _mm512_dpbusd_epi32(acc, a0, b0); - acc = _mm512_dpbusd_epi32(acc, a1, b1); - acc = _mm512_dpbusd_epi32(acc, a2, b2); - acc = _mm512_dpbusd_epi32(acc, a3, b3); -#else - __m512i product0 = _mm512_maddubs_epi16(a0, b0); - __m512i product1 = _mm512_maddubs_epi16(a1, b1); - __m512i product2 = _mm512_maddubs_epi16(a2, b2); - __m512i product3 = _mm512_maddubs_epi16(a3, b3); - product0 = _mm512_adds_epi16(product0, product1); - product0 = _mm512_madd_epi16(product0, Ones512); - product2 = _mm512_adds_epi16(product2, product3); - product2 = _mm512_madd_epi16(product2, Ones512); - acc = _mm512_add_epi32(acc, _mm512_add_epi32(product0, product2)); -#endif - }; - -#endif -#if defined (USE_AVX2) - - [[maybe_unused]] const __m256i Ones256 = _mm256_set1_epi16(1); - - [[maybe_unused]] auto m256_hadd = [](__m256i sum, int bias) -> int { - __m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1)); - sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC)); - sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_CDAB)); - return _mm_cvtsi128_si32(sum128) + bias; - }; - - [[maybe_unused]] auto m256_add_dpbusd_epi32 = [=](__m256i& acc, __m256i a, __m256i b) { -#if defined (USE_VNNI) - acc = _mm256_dpbusd_epi32(acc, a, b); -#else - __m256i product0 = _mm256_maddubs_epi16(a, b); - product0 = _mm256_madd_epi16(product0, Ones256); - acc = _mm256_add_epi32(acc, product0); -#endif - }; - - [[maybe_unused]] auto m256_add_dpbusd_epi32x4 = [=](__m256i& acc, __m256i a0, __m256i b0, __m256i a1, __m256i b1, - __m256i a2, __m256i b2, __m256i a3, __m256i b3) { -#if defined (USE_VNNI) - acc = _mm256_dpbusd_epi32(acc, a0, b0); - acc = _mm256_dpbusd_epi32(acc, a1, b1); - acc = _mm256_dpbusd_epi32(acc, a2, b2); - acc = _mm256_dpbusd_epi32(acc, a3, b3); -#else - __m256i product0 = _mm256_maddubs_epi16(a0, b0); - __m256i product1 = _mm256_maddubs_epi16(a1, b1); - __m256i product2 = _mm256_maddubs_epi16(a2, b2); - __m256i product3 = _mm256_maddubs_epi16(a3, b3); - product0 = _mm256_adds_epi16(product0, product1); - product0 = _mm256_madd_epi16(product0, Ones256); - product2 = _mm256_adds_epi16(product2, product3); - product2 = _mm256_madd_epi16(product2, Ones256); - acc = _mm256_add_epi32(acc, _mm256_add_epi32(product0, product2)); -#endif - }; - -#endif -#if defined (USE_SSSE3) - - [[maybe_unused]] const __m128i Ones128 = _mm_set1_epi16(1); - - [[maybe_unused]] auto m128_hadd = [](__m128i sum, int bias) -> int { - sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E)); //_MM_PERM_BADC - sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); //_MM_PERM_CDAB - return _mm_cvtsi128_si32(sum) + bias; - }; - - [[maybe_unused]] auto m128_add_dpbusd_epi32 = [=](__m128i& acc, __m128i a, __m128i b) { - __m128i product0 = _mm_maddubs_epi16(a, b); - product0 = _mm_madd_epi16(product0, Ones128); - acc = _mm_add_epi32(acc, product0); - }; - - [[maybe_unused]] auto m128_add_dpbusd_epi32x4 = [=](__m128i& acc, __m128i a0, __m128i b0, __m128i a1, __m128i b1, - __m128i a2, __m128i b2, __m128i a3, __m128i b3) { - __m128i product0 = _mm_maddubs_epi16(a0, b0); - __m128i product1 = _mm_maddubs_epi16(a1, b1); - __m128i product2 = _mm_maddubs_epi16(a2, b2); - __m128i product3 = _mm_maddubs_epi16(a3, b3); - product0 = _mm_adds_epi16(product0, product1); - product0 = _mm_madd_epi16(product0, Ones128); - product2 = _mm_adds_epi16(product2, product3); - product2 = _mm_madd_epi16(product2, Ones128); - acc = _mm_add_epi32(acc, _mm_add_epi32(product0, product2)); - }; - -#endif - -#if defined (USE_AVX512) - using vec_t = __m512i; - #define vec_setzero _mm512_setzero_si512 - #define vec_set_32 _mm512_set1_epi32 - auto& vec_add_dpbusd_32 = m512_add_dpbusd_epi32; - auto& vec_add_dpbusd_32x4 = m512_add_dpbusd_epi32x4; - auto& vec_hadd = m512_hadd; + using acc_vec_t = __m512i; + using bias_vec_t = __m128i; + using weight_vec_t = __m512i; + using in_vec_t = __m512i; + #define vec_zero _mm512_setzero_si512() + #define vec_add_dpbusd_32x2 Simd::m512_add_dpbusd_epi32x2 + #define vec_hadd Simd::m512_hadd + #define vec_haddx4 Simd::m512_haddx4 #elif defined (USE_AVX2) - using vec_t = __m256i; - #define vec_setzero _mm256_setzero_si256 - #define vec_set_32 _mm256_set1_epi32 - auto& vec_add_dpbusd_32 = m256_add_dpbusd_epi32; - auto& vec_add_dpbusd_32x4 = m256_add_dpbusd_epi32x4; - auto& vec_hadd = m256_hadd; + using acc_vec_t = __m256i; + using bias_vec_t = __m128i; + using weight_vec_t = __m256i; + using in_vec_t = __m256i; + #define vec_zero _mm256_setzero_si256() + #define vec_add_dpbusd_32x2 Simd::m256_add_dpbusd_epi32x2 + #define vec_hadd Simd::m256_hadd + #define vec_haddx4 Simd::m256_haddx4 #elif defined (USE_SSSE3) - using vec_t = __m128i; - #define vec_setzero _mm_setzero_si128 - #define vec_set_32 _mm_set1_epi32 - auto& vec_add_dpbusd_32 = m128_add_dpbusd_epi32; - auto& vec_add_dpbusd_32x4 = m128_add_dpbusd_epi32x4; - auto& vec_hadd = m128_hadd; + using acc_vec_t = __m128i; + using bias_vec_t = __m128i; + using weight_vec_t = __m128i; + using in_vec_t = __m128i; + #define vec_zero _mm_setzero_si128() + #define vec_add_dpbusd_32x2 Simd::m128_add_dpbusd_epi32x2 + #define vec_hadd Simd::m128_hadd + #define vec_haddx4 Simd::m128_haddx4 +#elif defined (USE_NEON) + using acc_vec_t = int32x4_t; + using bias_vec_t = int32x4_t; + using weight_vec_t = int8x8_t; + using in_vec_t = int8x8_t; + #define vec_zero {0} + #define vec_add_dpbusd_32x2 Simd::neon_m128_add_dpbusd_epi32x2 + #define vec_hadd Simd::neon_m128_hadd + #define vec_haddx4 Simd::neon_m128_haddx4 #endif -#if defined (USE_SSSE3) - // Different layout, we process 4 inputs at a time, always. - static_assert(InputDimensions % 4 == 0); +#if defined (USE_SSSE3) || defined (USE_NEON) + const in_vec_t* invec = reinterpret_cast(input); - const auto output = reinterpret_cast(buffer); - const auto inputVector = reinterpret_cast(input); - - static_assert(OutputDimensions % OutputSimdWidth == 0 || OutputDimensions == 1); - - // OutputDimensions is either 1 or a multiple of SimdWidth - // because then it is also an input dimension. - if constexpr (OutputDimensions % OutputSimdWidth == 0) + // Perform accumulation to registers for each big block + for (IndexType bigBlock = 0; bigBlock < NumBigBlocks; ++bigBlock) { - constexpr IndexType NumChunks = InputDimensions / 4; + acc_vec_t acc[NumOutputRegs] = { vec_zero }; - const auto input32 = reinterpret_cast(input); - vec_t* outptr = reinterpret_cast(output); - std::memcpy(output, biases, OutputDimensions * sizeof(OutputType)); + // Each big block has NumOutputRegs small blocks in each "row", one per register. + // We process two small blocks at a time to save on one addition without VNNI. + for (IndexType smallBlock = 0; smallBlock < NumSmallBlocksPerOutput; smallBlock += 2) + { + const weight_vec_t* weightvec = + reinterpret_cast( + weights + + bigBlock * BigBlockSize + + smallBlock * SmallBlockSize * NumOutputRegs); - for (int i = 0; i < (int)NumChunks - 3; i += 4) + const in_vec_t in0 = invec[smallBlock + 0]; + const in_vec_t in1 = invec[smallBlock + 1]; + + for (IndexType k = 0; k < NumOutputRegs; ++k) + vec_add_dpbusd_32x2(acc[k], in0, weightvec[k], in1, weightvec[k + NumOutputRegs]); + } + + // Horizontally add all accumulators. + if constexpr (NumOutputRegs % 4 == 0) + { + bias_vec_t* outputvec = reinterpret_cast(output); + const bias_vec_t* biasvec = reinterpret_cast(biases); + + for (IndexType k = 0; k < NumOutputRegs; k += 4) { - const vec_t in0 = vec_set_32(input32[i + 0]); - const vec_t in1 = vec_set_32(input32[i + 1]); - const vec_t in2 = vec_set_32(input32[i + 2]); - const vec_t in3 = vec_set_32(input32[i + 3]); - const auto col0 = reinterpret_cast(&weights[(i + 0) * OutputDimensions * 4]); - const auto col1 = reinterpret_cast(&weights[(i + 1) * OutputDimensions * 4]); - const auto col2 = reinterpret_cast(&weights[(i + 2) * OutputDimensions * 4]); - const auto col3 = reinterpret_cast(&weights[(i + 3) * OutputDimensions * 4]); - for (int j = 0; j * OutputSimdWidth < OutputDimensions; ++j) - vec_add_dpbusd_32x4(outptr[j], in0, col0[j], in1, col1[j], in2, col2[j], in3, col3[j]); + const IndexType idx = (bigBlock * NumOutputRegs + k) / 4; + outputvec[idx] = vec_haddx4(acc[k+0], acc[k+1], acc[k+2], acc[k+3], biasvec[idx]); } - } - else if constexpr (OutputDimensions == 1) - { -#if defined (USE_AVX512) - if constexpr (PaddedInputDimensions % (SimdWidth * 2) != 0) + } + else + { + for (IndexType k = 0; k < NumOutputRegs; ++k) { - constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth; - const auto inputVector256 = reinterpret_cast(input); - - __m256i sum0 = _mm256_setzero_si256(); - const auto row0 = reinterpret_cast(&weights[0]); - - for (int j = 0; j < (int)NumChunks; ++j) - { - const __m256i in = inputVector256[j]; - m256_add_dpbusd_epi32(sum0, in, row0[j]); - } - output[0] = m256_hadd(sum0, biases[0]); - } - else -#endif - { -#if defined (USE_AVX512) - constexpr IndexType NumChunks = PaddedInputDimensions / (SimdWidth * 2); -#else - constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth; -#endif - vec_t sum0 = vec_setzero(); - const auto row0 = reinterpret_cast(&weights[0]); - - for (int j = 0; j < (int)NumChunks; ++j) - { - const vec_t in = inputVector[j]; - vec_add_dpbusd_32(sum0, in, row0[j]); - } - output[0] = vec_hadd(sum0, biases[0]); + const IndexType idx = (bigBlock * NumOutputRegs + k); + output[idx] = vec_hadd(acc[k], biases[idx]); } + } } +# undef vec_zero +# undef vec_add_dpbusd_32x2 +# undef vec_hadd +# undef vec_haddx4 #else - -// Use old implementation for the other architectures. - - auto output = reinterpret_cast(buffer); - -#if defined(USE_SSE2) - // At least a multiple of 16, with SSE2. - static_assert(InputDimensions % SimdWidth == 0); - constexpr IndexType NumChunks = InputDimensions / SimdWidth; - const __m128i Zeros = _mm_setzero_si128(); - const auto inputVector = reinterpret_cast(input); - -#elif defined(USE_MMX) - static_assert(InputDimensions % SimdWidth == 0); - constexpr IndexType NumChunks = InputDimensions / SimdWidth; - const __m64 Zeros = _mm_setzero_si64(); - const auto inputVector = reinterpret_cast(input); - -#elif defined(USE_NEON) - static_assert(InputDimensions % SimdWidth == 0); - constexpr IndexType NumChunks = InputDimensions / SimdWidth; - const auto inputVector = reinterpret_cast(input); -#endif - - for (IndexType i = 0; i < OutputDimensions; ++i) { - const IndexType offset = i * PaddedInputDimensions; - -#if defined(USE_SSE2) - __m128i sumLo = _mm_cvtsi32_si128(biases[i]); - __m128i sumHi = Zeros; - const auto row = reinterpret_cast(&weights[offset]); - for (IndexType j = 0; j < NumChunks; ++j) { - __m128i row_j = _mm_load_si128(&row[j]); - __m128i input_j = _mm_load_si128(&inputVector[j]); - __m128i extendedRowLo = _mm_srai_epi16(_mm_unpacklo_epi8(row_j, row_j), 8); - __m128i extendedRowHi = _mm_srai_epi16(_mm_unpackhi_epi8(row_j, row_j), 8); - __m128i extendedInputLo = _mm_unpacklo_epi8(input_j, Zeros); - __m128i extendedInputHi = _mm_unpackhi_epi8(input_j, Zeros); - __m128i productLo = _mm_madd_epi16(extendedRowLo, extendedInputLo); - __m128i productHi = _mm_madd_epi16(extendedRowHi, extendedInputHi); - sumLo = _mm_add_epi32(sumLo, productLo); - sumHi = _mm_add_epi32(sumHi, productHi); - } - __m128i sum = _mm_add_epi32(sumLo, sumHi); - __m128i sumHigh_64 = _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2)); - sum = _mm_add_epi32(sum, sumHigh_64); - __m128i sum_second_32 = _mm_shufflelo_epi16(sum, _MM_SHUFFLE(1, 0, 3, 2)); - sum = _mm_add_epi32(sum, sum_second_32); - output[i] = _mm_cvtsi128_si32(sum); - -#elif defined(USE_MMX) - __m64 sumLo = _mm_cvtsi32_si64(biases[i]); - __m64 sumHi = Zeros; - const auto row = reinterpret_cast(&weights[offset]); - for (IndexType j = 0; j < NumChunks; ++j) { - __m64 row_j = row[j]; - __m64 input_j = inputVector[j]; - __m64 extendedRowLo = _mm_srai_pi16(_mm_unpacklo_pi8(row_j, row_j), 8); - __m64 extendedRowHi = _mm_srai_pi16(_mm_unpackhi_pi8(row_j, row_j), 8); - __m64 extendedInputLo = _mm_unpacklo_pi8(input_j, Zeros); - __m64 extendedInputHi = _mm_unpackhi_pi8(input_j, Zeros); - __m64 productLo = _mm_madd_pi16(extendedRowLo, extendedInputLo); - __m64 productHi = _mm_madd_pi16(extendedRowHi, extendedInputHi); - sumLo = _mm_add_pi32(sumLo, productLo); - sumHi = _mm_add_pi32(sumHi, productHi); - } - __m64 sum = _mm_add_pi32(sumLo, sumHi); - sum = _mm_add_pi32(sum, _mm_unpackhi_pi32(sum, sum)); - output[i] = _mm_cvtsi64_si32(sum); - -#elif defined(USE_NEON) - int32x4_t sum = {biases[i]}; - const auto row = reinterpret_cast(&weights[offset]); - for (IndexType j = 0; j < NumChunks; ++j) { - int16x8_t product = vmull_s8(inputVector[j * 2], row[j * 2]); - product = vmlal_s8(product, inputVector[j * 2 + 1], row[j * 2 + 1]); - sum = vpadalq_s16(sum, product); - } - output[i] = sum[0] + sum[1] + sum[2] + sum[3]; - -#else - OutputType sum = biases[i]; - for (IndexType j = 0; j < InputDimensions; ++j) { - sum += weights[offset + j] * input[j]; - } - output[i] = sum; -#endif - - } -#if defined(USE_MMX) - _mm_empty(); -#endif + // Use old implementation for the other architectures. + affine_transform_non_ssse3< + InputDimensions, + PaddedInputDimensions, + OutputDimensions>(output, weights, biases, input); #endif @@ -424,7 +364,171 @@ namespace Stockfish::Eval::NNUE::Layers { using BiasType = OutputType; using WeightType = std::int8_t; - PreviousLayer previousLayer; + alignas(CacheLineSize) BiasType biases[OutputDimensions]; + alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions]; + }; + + template + class AffineTransform(InDims, MaxSimdWidth) < 2*64)>> { + public: + // Input/output type + // Input/output type + using InputType = std::uint8_t; + using OutputType = std::int32_t; + + // Number of input/output dimensions + static constexpr IndexType InputDimensions = InDims; + static constexpr IndexType OutputDimensions = OutDims; + + static constexpr IndexType PaddedInputDimensions = + ceil_to_multiple(InputDimensions, MaxSimdWidth); + static constexpr IndexType PaddedOutputDimensions = + ceil_to_multiple(OutputDimensions, MaxSimdWidth); + + using OutputBuffer = OutputType[PaddedOutputDimensions]; + + static_assert(PaddedInputDimensions < 128, "Something went wrong. This specialization should not have been chosen."); + +#if defined (USE_SSSE3) + static constexpr const IndexType OutputSimdWidth = SimdWidth / 4; + static constexpr const IndexType InputSimdWidth = SimdWidth; +#endif + + // Hash value embedded in the evaluation file + static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) { + std::uint32_t hashValue = 0xCC03DAE4u; + hashValue += OutputDimensions; + hashValue ^= prevHash >> 1; + hashValue ^= prevHash << 31; + return hashValue; + } + + static IndexType get_weight_index_scrambled(IndexType i) + { + return + (i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 + + i / PaddedInputDimensions * 4 + + i % 4; + } + + static IndexType get_weight_index(IndexType i) + { +#if defined (USE_SSSE3) + return get_weight_index_scrambled(i); +#else + return i; +#endif + } + + // Read network parameters + bool read_parameters(std::istream& stream) { + for (IndexType i = 0; i < OutputDimensions; ++i) + biases[i] = read_little_endian(stream); + for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) + weights[get_weight_index(i)] = read_little_endian(stream); + + return !stream.fail(); + } + + // Write network parameters + bool write_parameters(std::ostream& stream) const { + for (IndexType i = 0; i < OutputDimensions; ++i) + write_little_endian(stream, biases[i]); + + for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) + write_little_endian(stream, weights[get_weight_index(i)]); + + return !stream.fail(); + } + // Forward propagation + const OutputType* propagate( + const InputType* input, OutputType* output) const { + +#if defined (USE_AVX2) + using vec_t = __m256i; + #define vec_setzero _mm256_setzero_si256 + #define vec_set_32 _mm256_set1_epi32 + #define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32 + #define vec_add_dpbusd_32x2 Simd::m256_add_dpbusd_epi32x2 + #define vec_add_dpbusd_32x4 Simd::m256_add_dpbusd_epi32x4 + #define vec_hadd Simd::m256_hadd + #define vec_haddx4 Simd::m256_haddx4 +#elif defined (USE_SSSE3) + using vec_t = __m128i; + #define vec_setzero _mm_setzero_si128 + #define vec_set_32 _mm_set1_epi32 + #define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32 + #define vec_add_dpbusd_32x2 Simd::m128_add_dpbusd_epi32x2 + #define vec_add_dpbusd_32x4 Simd::m128_add_dpbusd_epi32x4 + #define vec_hadd Simd::m128_hadd + #define vec_haddx4 Simd::m128_haddx4 +#endif + +#if defined (USE_SSSE3) + const auto inputVector = reinterpret_cast(input); + + static_assert(OutputDimensions % OutputSimdWidth == 0 || OutputDimensions == 1); + + if constexpr (OutputDimensions % OutputSimdWidth == 0) + { + constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 8) / 4; + constexpr IndexType NumRegs = OutputDimensions / OutputSimdWidth; + + const auto input32 = reinterpret_cast(input); + const vec_t* biasvec = reinterpret_cast(biases); + vec_t acc[NumRegs]; + for (IndexType k = 0; k < NumRegs; ++k) + acc[k] = biasvec[k]; + + for (IndexType i = 0; i < NumChunks; i += 2) + { + const vec_t in0 = vec_set_32(input32[i + 0]); + const vec_t in1 = vec_set_32(input32[i + 1]); + const auto col0 = reinterpret_cast(&weights[(i + 0) * OutputDimensions * 4]); + const auto col1 = reinterpret_cast(&weights[(i + 1) * OutputDimensions * 4]); + for (IndexType k = 0; k < NumRegs; ++k) + vec_add_dpbusd_32x2(acc[k], in0, col0[k], in1, col1[k]); + } + + vec_t* outptr = reinterpret_cast(output); + for (IndexType k = 0; k < NumRegs; ++k) + outptr[k] = acc[k]; + } + else if constexpr (OutputDimensions == 1) + { + constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth; + vec_t sum0 = vec_setzero(); + const auto row0 = reinterpret_cast(&weights[0]); + + for (int j = 0; j < (int)NumChunks; ++j) + { + const vec_t in = inputVector[j]; + vec_add_dpbusd_32(sum0, in, row0[j]); + } + output[0] = vec_hadd(sum0, biases[0]); + } + +# undef vec_setzero +# undef vec_set_32 +# undef vec_add_dpbusd_32 +# undef vec_add_dpbusd_32x2 +# undef vec_add_dpbusd_32x4 +# undef vec_hadd +# undef vec_haddx4 +#else + // Use old implementation for the other architectures. + affine_transform_non_ssse3< + InputDimensions, + PaddedInputDimensions, + OutputDimensions>(output, weights, biases, input); +#endif + + return output; + } + + private: + using BiasType = OutputType; + using WeightType = std::int8_t; alignas(CacheLineSize) BiasType biases[OutputDimensions]; alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions]; diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/clipped_relu.h b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/clipped_relu.h index 65455df..f94d308 100644 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/clipped_relu.h +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/clipped_relu.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -26,50 +26,41 @@ namespace Stockfish::Eval::NNUE::Layers { // Clipped ReLU - template + template class ClippedReLU { public: // Input/output type - using InputType = typename PreviousLayer::OutputType; + using InputType = std::int32_t; using OutputType = std::uint8_t; - static_assert(std::is_same::value, ""); // Number of input/output dimensions - static constexpr IndexType InputDimensions = - PreviousLayer::OutputDimensions; + static constexpr IndexType InputDimensions = InDims; static constexpr IndexType OutputDimensions = InputDimensions; + static constexpr IndexType PaddedOutputDimensions = + ceil_to_multiple(OutputDimensions, 32); - // Size of forward propagation buffer used in this layer - static constexpr std::size_t SelfBufferSize = - ceil_to_multiple(OutputDimensions * sizeof(OutputType), CacheLineSize); - - // Size of the forward propagation buffer used from the input layer to this layer - static constexpr std::size_t BufferSize = - PreviousLayer::BufferSize + SelfBufferSize; + using OutputBuffer = OutputType[PaddedOutputDimensions]; // Hash value embedded in the evaluation file - static constexpr std::uint32_t get_hash_value() { + static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) { std::uint32_t hashValue = 0x538D24C7u; - hashValue += PreviousLayer::get_hash_value(); + hashValue += prevHash; return hashValue; } // Read network parameters - bool read_parameters(std::istream& stream) { - return previousLayer.read_parameters(stream); + bool read_parameters(std::istream&) { + return true; } // Write network parameters - bool write_parameters(std::ostream& stream) const { - return previousLayer.write_parameters(stream); + bool write_parameters(std::ostream&) const { + return true; } // Forward propagation const OutputType* propagate( - const TransformedFeatureType* transformedFeatures, char* buffer) const { - const auto input = previousLayer.propagate( - transformedFeatures, buffer + SelfBufferSize); - const auto output = reinterpret_cast(buffer); + const InputType* input, OutputType* output) const { #if defined(USE_AVX2) if constexpr (InputDimensions % SimdWidth == 0) { @@ -179,11 +170,9 @@ namespace Stockfish::Eval::NNUE::Layers { output[i] = static_cast( std::max(0, std::min(127, input[i] >> WeightScaleBits))); } + return output; } - - private: - PreviousLayer previousLayer; }; } // namespace Stockfish::Eval::NNUE::Layers diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/input_slice.h b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/input_slice.h deleted file mode 100644 index b6bf172..0000000 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/input_slice.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -// NNUE evaluation function layer InputSlice definition - -#ifndef NNUE_LAYERS_INPUT_SLICE_H_INCLUDED -#define NNUE_LAYERS_INPUT_SLICE_H_INCLUDED - -#include "../nnue_common.h" - -namespace Stockfish::Eval::NNUE::Layers { - -// Input layer -template -class InputSlice { - public: - // Need to maintain alignment - static_assert(Offset % MaxSimdWidth == 0, ""); - - // Output type - using OutputType = TransformedFeatureType; - - // Output dimensionality - static constexpr IndexType OutputDimensions = OutDims; - - // Size of forward propagation buffer used from the input layer to this layer - static constexpr std::size_t BufferSize = 0; - - // Hash value embedded in the evaluation file - static constexpr std::uint32_t get_hash_value() { - std::uint32_t hashValue = 0xEC42E90Du; - hashValue ^= OutputDimensions ^ (Offset << 10); - return hashValue; - } - - // Read network parameters - bool read_parameters(std::istream& /*stream*/) { - return true; - } - - // Write network parameters - bool write_parameters(std::ostream& /*stream*/) const { - return true; - } - - // Forward propagation - const OutputType* propagate( - const TransformedFeatureType* transformedFeatures, - char* /*buffer*/) const { - return transformedFeatures + Offset; - } - - private: -}; - -} // namespace Stockfish::Eval::NNUE::Layers - -#endif // #ifndef NNUE_LAYERS_INPUT_SLICE_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_accumulator.h b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_accumulator.h index d41ecf9..600483b 100644 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_accumulator.h +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_accumulator.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_architecture.h b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_architecture.h index 879a39c..4f9596a 100644 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_architecture.h +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_architecture.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,39 +21,112 @@ #ifndef NNUE_ARCHITECTURE_H_INCLUDED #define NNUE_ARCHITECTURE_H_INCLUDED +#include + #include "nnue_common.h" -#include "features/half_ka_v2.h" +#include "features/half_ka_v2_hm.h" -#include "layers/input_slice.h" #include "layers/affine_transform.h" #include "layers/clipped_relu.h" +#include "../misc.h" + namespace Stockfish::Eval::NNUE { - // Input features used in evaluation function - using FeatureSet = Features::HalfKAv2; +// Input features used in evaluation function +using FeatureSet = Features::HalfKAv2_hm; - // Number of input feature dimensions after conversion - constexpr IndexType TransformedFeatureDimensions = 512; - constexpr IndexType PSQTBuckets = 8; - constexpr IndexType LayerStacks = 8; +// Number of input feature dimensions after conversion +constexpr IndexType TransformedFeatureDimensions = 1024; +constexpr IndexType PSQTBuckets = 8; +constexpr IndexType LayerStacks = 8; - namespace Layers { +struct Network +{ + static constexpr int FC_0_OUTPUTS = 15; + static constexpr int FC_1_OUTPUTS = 32; - // Define network structure - using InputLayer = InputSlice; - using HiddenLayer1 = ClippedReLU>; - using HiddenLayer2 = ClippedReLU>; - using OutputLayer = AffineTransform; + Layers::AffineTransform fc_0; + Layers::ClippedReLU ac_0; + Layers::AffineTransform fc_1; + Layers::ClippedReLU ac_1; + Layers::AffineTransform fc_2; - } // namespace Layers + // Hash value embedded in the evaluation file + static constexpr std::uint32_t get_hash_value() { + // input slice hash + std::uint32_t hashValue = 0xEC42E90Du; + hashValue ^= TransformedFeatureDimensions * 2; - using Network = Layers::OutputLayer; + hashValue = decltype(fc_0)::get_hash_value(hashValue); + hashValue = decltype(ac_0)::get_hash_value(hashValue); + hashValue = decltype(fc_1)::get_hash_value(hashValue); + hashValue = decltype(ac_1)::get_hash_value(hashValue); + hashValue = decltype(fc_2)::get_hash_value(hashValue); - static_assert(TransformedFeatureDimensions % MaxSimdWidth == 0, ""); - static_assert(Network::OutputDimensions == 1, ""); - static_assert(std::is_same::value, ""); + return hashValue; + } + + // Read network parameters + bool read_parameters(std::istream& stream) { + if (!fc_0.read_parameters(stream)) return false; + if (!ac_0.read_parameters(stream)) return false; + if (!fc_1.read_parameters(stream)) return false; + if (!ac_1.read_parameters(stream)) return false; + if (!fc_2.read_parameters(stream)) return false; + return true; + } + + // Read network parameters + bool write_parameters(std::ostream& stream) const { + if (!fc_0.write_parameters(stream)) return false; + if (!ac_0.write_parameters(stream)) return false; + if (!fc_1.write_parameters(stream)) return false; + if (!ac_1.write_parameters(stream)) return false; + if (!fc_2.write_parameters(stream)) return false; + return true; + } + + std::int32_t propagate(const TransformedFeatureType* transformedFeatures) + { + struct alignas(CacheLineSize) Buffer + { + alignas(CacheLineSize) decltype(fc_0)::OutputBuffer fc_0_out; + alignas(CacheLineSize) decltype(ac_0)::OutputBuffer ac_0_out; + alignas(CacheLineSize) decltype(fc_1)::OutputBuffer fc_1_out; + alignas(CacheLineSize) decltype(ac_1)::OutputBuffer ac_1_out; + alignas(CacheLineSize) decltype(fc_2)::OutputBuffer fc_2_out; + + Buffer() + { + std::memset(this, 0, sizeof(*this)); + } + }; + +#if defined(__clang__) && (__APPLE__) + // workaround for a bug reported with xcode 12 + static thread_local auto tlsBuffer = std::make_unique(); + // Access TLS only once, cache result. + Buffer& buffer = *tlsBuffer; +#else + alignas(CacheLineSize) static thread_local Buffer buffer; +#endif + + fc_0.propagate(transformedFeatures, buffer.fc_0_out); + ac_0.propagate(buffer.fc_0_out, buffer.ac_0_out); + fc_1.propagate(buffer.ac_0_out, buffer.fc_1_out); + ac_1.propagate(buffer.fc_1_out, buffer.ac_1_out); + fc_2.propagate(buffer.ac_1_out, buffer.fc_2_out); + + // buffer.fc_0_out[FC_0_OUTPUTS] is such that 1.0 is equal to 127*(1< inline void write_little_endian(std::ostream& stream, IntType value) { @@ -127,11 +127,11 @@ namespace Stockfish::Eval::NNUE { { for (; i + 1 < sizeof(IntType); ++i) { - u[i] = v; + u[i] = (std::uint8_t)v; v >>= 8; } } - u[i] = v; + u[i] = (std::uint8_t)v; stream.write(reinterpret_cast(u), sizeof(IntType)); } diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_feature_transformer.h b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_feature_transformer.h index 59a965a..c969ac6 100644 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_feature_transformer.h +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_feature_transformer.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -47,12 +47,22 @@ namespace Stockfish::Eval::NNUE { #define vec_store(a,b) _mm512_store_si512(a,b) #define vec_add_16(a,b) _mm512_add_epi16(a,b) #define vec_sub_16(a,b) _mm512_sub_epi16(a,b) + #define vec_mul_16(a,b) _mm512_mullo_epi16(a,b) + #define vec_zero() _mm512_setzero_epi32() + #define vec_set_16(a) _mm512_set1_epi16(a) + #define vec_max_16(a,b) _mm512_max_epi16(a,b) + #define vec_min_16(a,b) _mm512_min_epi16(a,b) + inline vec_t vec_msb_pack_16(vec_t a, vec_t b){ + vec_t compacted = _mm512_packs_epi16(_mm512_srli_epi16(a,7),_mm512_srli_epi16(b,7)); + return _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 2, 4, 6, 1, 3, 5, 7), compacted); + } #define vec_load_psqt(a) _mm256_load_si256(a) #define vec_store_psqt(a,b) _mm256_store_si256(a,b) #define vec_add_psqt_32(a,b) _mm256_add_epi32(a,b) #define vec_sub_psqt_32(a,b) _mm256_sub_epi32(a,b) #define vec_zero_psqt() _mm256_setzero_si256() #define NumRegistersSIMD 32 + #define MaxChunkSize 64 #elif USE_AVX2 typedef __m256i vec_t; @@ -61,12 +71,22 @@ namespace Stockfish::Eval::NNUE { #define vec_store(a,b) _mm256_store_si256(a,b) #define vec_add_16(a,b) _mm256_add_epi16(a,b) #define vec_sub_16(a,b) _mm256_sub_epi16(a,b) + #define vec_mul_16(a,b) _mm256_mullo_epi16(a,b) + #define vec_zero() _mm256_setzero_si256() + #define vec_set_16(a) _mm256_set1_epi16(a) + #define vec_max_16(a,b) _mm256_max_epi16(a,b) + #define vec_min_16(a,b) _mm256_min_epi16(a,b) + inline vec_t vec_msb_pack_16(vec_t a, vec_t b){ + vec_t compacted = _mm256_packs_epi16(_mm256_srli_epi16(a,7), _mm256_srli_epi16(b,7)); + return _mm256_permute4x64_epi64(compacted, 0b11011000); + } #define vec_load_psqt(a) _mm256_load_si256(a) #define vec_store_psqt(a,b) _mm256_store_si256(a,b) #define vec_add_psqt_32(a,b) _mm256_add_epi32(a,b) #define vec_sub_psqt_32(a,b) _mm256_sub_epi32(a,b) #define vec_zero_psqt() _mm256_setzero_si256() #define NumRegistersSIMD 16 + #define MaxChunkSize 32 #elif USE_SSE2 typedef __m128i vec_t; @@ -75,12 +95,19 @@ namespace Stockfish::Eval::NNUE { #define vec_store(a,b) *(a)=(b) #define vec_add_16(a,b) _mm_add_epi16(a,b) #define vec_sub_16(a,b) _mm_sub_epi16(a,b) + #define vec_mul_16(a,b) _mm_mullo_epi16(a,b) + #define vec_zero() _mm_setzero_si128() + #define vec_set_16(a) _mm_set1_epi16(a) + #define vec_max_16(a,b) _mm_max_epi16(a,b) + #define vec_min_16(a,b) _mm_min_epi16(a,b) + #define vec_msb_pack_16(a,b) _mm_packs_epi16(_mm_srli_epi16(a,7),_mm_srli_epi16(b,7)) #define vec_load_psqt(a) (*(a)) #define vec_store_psqt(a,b) *(a)=(b) #define vec_add_psqt_32(a,b) _mm_add_epi32(a,b) #define vec_sub_psqt_32(a,b) _mm_sub_epi32(a,b) #define vec_zero_psqt() _mm_setzero_si128() #define NumRegistersSIMD (Is64Bit ? 16 : 8) + #define MaxChunkSize 16 #elif USE_MMX typedef __m64 vec_t; @@ -89,12 +116,26 @@ namespace Stockfish::Eval::NNUE { #define vec_store(a,b) *(a)=(b) #define vec_add_16(a,b) _mm_add_pi16(a,b) #define vec_sub_16(a,b) _mm_sub_pi16(a,b) + #define vec_mul_16(a,b) _mm_mullo_pi16(a,b) + #define vec_zero() _mm_setzero_si64() + #define vec_set_16(a) _mm_set1_pi16(a) + inline vec_t vec_max_16(vec_t a,vec_t b){ + vec_t comparison = _mm_cmpgt_pi16(a,b); + return _mm_or_si64(_mm_and_si64(comparison, a), _mm_andnot_si64(comparison, b)); + } + inline vec_t vec_min_16(vec_t a,vec_t b){ + vec_t comparison = _mm_cmpgt_pi16(a,b); + return _mm_or_si64(_mm_and_si64(comparison, b), _mm_andnot_si64(comparison, a)); + } + #define vec_msb_pack_16(a,b) _mm_packs_pi16(_mm_srli_pi16(a,7),_mm_srli_pi16(b,7)) #define vec_load_psqt(a) (*(a)) #define vec_store_psqt(a,b) *(a)=(b) #define vec_add_psqt_32(a,b) _mm_add_pi32(a,b) #define vec_sub_psqt_32(a,b) _mm_sub_pi32(a,b) #define vec_zero_psqt() _mm_setzero_si64() + #define vec_cleanup() _mm_empty() #define NumRegistersSIMD 8 + #define MaxChunkSize 8 #elif USE_NEON typedef int16x8_t vec_t; @@ -103,12 +144,24 @@ namespace Stockfish::Eval::NNUE { #define vec_store(a,b) *(a)=(b) #define vec_add_16(a,b) vaddq_s16(a,b) #define vec_sub_16(a,b) vsubq_s16(a,b) + #define vec_mul_16(a,b) vmulq_s16(a,b) + #define vec_zero() vec_t{0} + #define vec_set_16(a) vdupq_n_s16(a) + #define vec_max_16(a,b) vmaxq_s16(a,b) + #define vec_min_16(a,b) vminq_s16(a,b) + inline vec_t vec_msb_pack_16(vec_t a, vec_t b){ + const int8x8_t shifta = vshrn_n_s16(a, 7); + const int8x8_t shiftb = vshrn_n_s16(b, 7); + const int8x16_t compacted = vcombine_s8(shifta,shiftb); + return *reinterpret_cast (&compacted); + } #define vec_load_psqt(a) (*(a)) #define vec_store_psqt(a,b) *(a)=(b) #define vec_add_psqt_32(a,b) vaddq_s32(a,b) #define vec_sub_psqt_32(a,b) vsubq_s32(a,b) #define vec_zero_psqt() psqt_vec_t{0} #define NumRegistersSIMD 16 + #define MaxChunkSize 16 #else #undef VECTOR @@ -123,8 +176,10 @@ namespace Stockfish::Eval::NNUE { // We use __m* types as template arguments, which causes GCC to emit warnings // about losing some attribute information. This is irrelevant to us as we // only take their size, so the following pragma are harmless. + #if defined(__GNUC__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wignored-attributes" + #endif template (); static constexpr int NumPsqtRegs = BestRegisterCount(); - + #if defined(__GNUC__) #pragma GCC diagnostic pop - + #endif #endif @@ -183,7 +238,7 @@ namespace Stockfish::Eval::NNUE { // Number of input/output dimensions static constexpr IndexType InputDimensions = FeatureSet::Dimensions; - static constexpr IndexType OutputDimensions = HalfDimensions * 2; + static constexpr IndexType OutputDimensions = HalfDimensions; // Size of forward propagation buffer static constexpr std::size_t BufferSize = @@ -191,7 +246,7 @@ namespace Stockfish::Eval::NNUE { // Hash value embedded in the evaluation file static constexpr std::uint32_t get_hash_value() { - return FeatureSet::HashValue ^ OutputDimensions; + return FeatureSet::HashValue ^ (OutputDimensions * 2); } // Read network parameters @@ -229,136 +284,55 @@ namespace Stockfish::Eval::NNUE { ) / 2; - #if defined(USE_AVX512) - - constexpr IndexType NumChunks = HalfDimensions / (SimdWidth * 2); - static_assert(HalfDimensions % (SimdWidth * 2) == 0); - const __m512i Control = _mm512_setr_epi64(0, 2, 4, 6, 1, 3, 5, 7); - const __m512i Zero = _mm512_setzero_si512(); - for (IndexType p = 0; p < 2; ++p) { - const IndexType offset = HalfDimensions * p; - auto out = reinterpret_cast<__m512i*>(&output[offset]); - for (IndexType j = 0; j < NumChunks; ++j) + const IndexType offset = (HalfDimensions / 2) * p; + +#if defined(VECTOR) + + constexpr IndexType OutputChunkSize = MaxChunkSize; + static_assert((HalfDimensions / 2) % OutputChunkSize == 0); + constexpr IndexType NumOutputChunks = HalfDimensions / 2 / OutputChunkSize; + + vec_t Zero = vec_zero(); + vec_t One = vec_set_16(127); + + const vec_t* in0 = reinterpret_cast(&(accumulation[perspectives[p]][0])); + const vec_t* in1 = reinterpret_cast(&(accumulation[perspectives[p]][HalfDimensions / 2])); + vec_t* out = reinterpret_cast< vec_t*>(output + offset); + + for (IndexType j = 0; j < NumOutputChunks; j += 1) { - __m512i sum0 = _mm512_load_si512(&reinterpret_cast - (accumulation[perspectives[p]])[j * 2 + 0]); - __m512i sum1 = _mm512_load_si512(&reinterpret_cast - (accumulation[perspectives[p]])[j * 2 + 1]); + const vec_t sum0a = vec_max_16(vec_min_16(in0[j * 2 + 0], One), Zero); + const vec_t sum0b = vec_max_16(vec_min_16(in0[j * 2 + 1], One), Zero); + const vec_t sum1a = vec_max_16(vec_min_16(in1[j * 2 + 0], One), Zero); + const vec_t sum1b = vec_max_16(vec_min_16(in1[j * 2 + 1], One), Zero); - _mm512_store_si512(&out[j], _mm512_permutexvar_epi64(Control, - _mm512_max_epi8(_mm512_packs_epi16(sum0, sum1), Zero))); + const vec_t pa = vec_mul_16(sum0a, sum1a); + const vec_t pb = vec_mul_16(sum0b, sum1b); + + out[j] = vec_msb_pack_16(pa, pb); } - } - return psqt; - #elif defined(USE_AVX2) +#else - constexpr IndexType NumChunks = HalfDimensions / SimdWidth; - constexpr int Control = 0b11011000; - const __m256i Zero = _mm256_setzero_si256(); - - for (IndexType p = 0; p < 2; ++p) - { - const IndexType offset = HalfDimensions * p; - auto out = reinterpret_cast<__m256i*>(&output[offset]); - for (IndexType j = 0; j < NumChunks; ++j) - { - __m256i sum0 = _mm256_load_si256(&reinterpret_cast - (accumulation[perspectives[p]])[j * 2 + 0]); - __m256i sum1 = _mm256_load_si256(&reinterpret_cast - (accumulation[perspectives[p]])[j * 2 + 1]); - - _mm256_store_si256(&out[j], _mm256_permute4x64_epi64( - _mm256_max_epi8(_mm256_packs_epi16(sum0, sum1), Zero), Control)); + for (IndexType j = 0; j < HalfDimensions / 2; ++j) { + BiasType sum0 = accumulation[static_cast(perspectives[p])][j + 0]; + BiasType sum1 = accumulation[static_cast(perspectives[p])][j + HalfDimensions / 2]; + sum0 = std::max(0, std::min(127, sum0)); + sum1 = std::max(0, std::min(127, sum1)); + output[offset + j] = static_cast(sum0 * sum1 / 128); } + +#endif } + +#if defined(vec_cleanup) + vec_cleanup(); +#endif + return psqt; - #elif defined(USE_SSE2) - - #ifdef USE_SSE41 - constexpr IndexType NumChunks = HalfDimensions / SimdWidth; - const __m128i Zero = _mm_setzero_si128(); - #else - constexpr IndexType NumChunks = HalfDimensions / SimdWidth; - const __m128i k0x80s = _mm_set1_epi8(-128); - #endif - - for (IndexType p = 0; p < 2; ++p) - { - const IndexType offset = HalfDimensions * p; - auto out = reinterpret_cast<__m128i*>(&output[offset]); - for (IndexType j = 0; j < NumChunks; ++j) - { - __m128i sum0 = _mm_load_si128(&reinterpret_cast - (accumulation[perspectives[p]])[j * 2 + 0]); - __m128i sum1 = _mm_load_si128(&reinterpret_cast - (accumulation[perspectives[p]])[j * 2 + 1]); - const __m128i packedbytes = _mm_packs_epi16(sum0, sum1); - - #ifdef USE_SSE41 - _mm_store_si128(&out[j], _mm_max_epi8(packedbytes, Zero)); - #else - _mm_store_si128(&out[j], _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)); - #endif - } - } - return psqt; - - #elif defined(USE_MMX) - - constexpr IndexType NumChunks = HalfDimensions / SimdWidth; - const __m64 k0x80s = _mm_set1_pi8(-128); - - for (IndexType p = 0; p < 2; ++p) - { - const IndexType offset = HalfDimensions * p; - auto out = reinterpret_cast<__m64*>(&output[offset]); - for (IndexType j = 0; j < NumChunks; ++j) - { - __m64 sum0 = *(&reinterpret_cast(accumulation[perspectives[p]])[j * 2 + 0]); - __m64 sum1 = *(&reinterpret_cast(accumulation[perspectives[p]])[j * 2 + 1]); - const __m64 packedbytes = _mm_packs_pi16(sum0, sum1); - out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s); - } - } - _mm_empty(); - return psqt; - - #elif defined(USE_NEON) - - constexpr IndexType NumChunks = HalfDimensions / (SimdWidth / 2); - const int8x8_t Zero = {0}; - - for (IndexType p = 0; p < 2; ++p) - { - const IndexType offset = HalfDimensions * p; - const auto out = reinterpret_cast(&output[offset]); - for (IndexType j = 0; j < NumChunks; ++j) - { - int16x8_t sum = reinterpret_cast(accumulation[perspectives[p]])[j]; - out[j] = vmax_s8(vqmovn_s16(sum), Zero); - } - } - return psqt; - - #else - - for (IndexType p = 0; p < 2; ++p) - { - const IndexType offset = HalfDimensions * p; - for (IndexType j = 0; j < HalfDimensions; ++j) - { - BiasType sum = accumulation[perspectives[p]][j]; - output[offset + j] = static_cast(std::max(0, std::min(127, sum))); - } - } - return psqt; - - #endif - } // end of function transform() @@ -370,7 +344,6 @@ namespace Stockfish::Eval::NNUE { // That might depend on the feature set and generally relies on the // feature set's update cost calculation to be correct and never // allow updates with more added/removed features than MaxActiveDimensions. - using IndexList = ValueList; #ifdef VECTOR // Gcc-10.2 unnecessarily spills AVX2 registers if this array @@ -404,12 +377,12 @@ namespace Stockfish::Eval::NNUE { // Gather all features to be updated. const Square ksq = pos.square(perspective); - IndexList removed[2], added[2]; + FeatureSet::IndexList removed[2], added[2]; FeatureSet::append_changed_indices( - ksq, next, perspective, removed[0], added[0]); + ksq, next->dirtyPiece, perspective, removed[0], added[0]); for (StateInfo *st2 = pos.state(); st2 != next; st2 = st2->previous) FeatureSet::append_changed_indices( - ksq, st2, perspective, removed[1], added[1]); + ksq, st2->dirtyPiece, perspective, removed[1], added[1]); // Mark the accumulators as computed. next->accumulator.computed[perspective] = true; @@ -534,7 +507,7 @@ namespace Stockfish::Eval::NNUE { // Refresh the accumulator auto& accumulator = pos.state()->accumulator; accumulator.computed[perspective] = true; - IndexList active; + FeatureSet::IndexList active; FeatureSet::append_active_indices(pos, perspective, active); #ifdef VECTOR diff --git a/DroidFishApp/src/main/cpp/stockfish/pawns.cpp b/DroidFishApp/src/main/cpp/stockfish/pawns.cpp index 70fb6f2..fdcfa02 100644 --- a/DroidFishApp/src/main/cpp/stockfish/pawns.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/pawns.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -32,30 +32,30 @@ namespace { #define S(mg, eg) make_score(mg, eg) // Pawn penalties - constexpr Score Backward = S( 9, 22); - constexpr Score Doubled = S(13, 51); - constexpr Score DoubledEarly = S(20, 7); - constexpr Score Isolated = S( 3, 15); - constexpr Score WeakLever = S( 4, 58); - constexpr Score WeakUnopposed = S(13, 24); + constexpr Score Backward = S( 6, 19); + constexpr Score Doubled = S(11, 51); + constexpr Score DoubledEarly = S(17, 7); + constexpr Score Isolated = S( 1, 20); + constexpr Score WeakLever = S( 2, 57); + constexpr Score WeakUnopposed = S(15, 18); // Bonus for blocked pawns at 5th or 6th rank - constexpr Score BlockedPawn[2] = { S(-17, -6), S(-9, 2) }; + constexpr Score BlockedPawn[2] = { S(-19, -8), S(-7, 3) }; constexpr Score BlockedStorm[RANK_NB] = { - S(0, 0), S(0, 0), S(75, 78), S(-8, 16), S(-6, 10), S(-6, 6), S(0, 2) + S(0, 0), S(0, 0), S(64, 75), S(-3, 14), S(-12, 19), S(-7, 4), S(-10, 5) }; // Connected pawn bonus - constexpr int Connected[RANK_NB] = { 0, 5, 7, 11, 23, 48, 87 }; + constexpr int Connected[RANK_NB] = { 0, 3, 7, 7, 15, 54, 86 }; // Strength of pawn shelter for our king by [distance from edge][rank]. // RANK_1 = 0 is used for files where we have no pawn, or pawn is behind our king. constexpr Value ShelterStrength[int(FILE_NB) / 2][RANK_NB] = { - { V( -5), V( 82), V( 92), V( 54), V( 36), V( 22), V( 28) }, - { V(-44), V( 63), V( 33), V(-50), V(-30), V(-12), V( -62) }, - { V(-11), V( 77), V( 22), V( -6), V( 31), V( 8), V( -45) }, - { V(-39), V(-12), V(-29), V(-50), V(-43), V(-68), V(-164) } + { V(-2), V(85), V(95), V(53), V(39), V(23), V(25) }, + { V(-55), V(64), V(32), V(-55), V(-30), V(-11), V(-61) }, + { V(-11), V(75), V(19), V(-6), V(26), V(9), V(-47) }, + { V(-41), V(-11), V(-27), V(-58), V(-42), V(-66), V(-163) } }; // Danger of enemy pawns moving toward our king by [distance from edge][rank]. @@ -63,17 +63,17 @@ namespace { // is behind our king. Note that UnblockedStorm[0][1-2] accommodate opponent pawn // on edge, likely blocked by our king. constexpr Value UnblockedStorm[int(FILE_NB) / 2][RANK_NB] = { - { V( 87), V(-288), V(-168), V( 96), V( 47), V( 44), V( 46) }, - { V( 42), V( -25), V( 120), V( 45), V( 34), V( -9), V( 24) }, - { V( -8), V( 51), V( 167), V( 35), V( -4), V(-16), V(-12) }, - { V(-17), V( -13), V( 100), V( 4), V( 9), V(-16), V(-31) } + { V(94), V(-280), V(-170), V(90), V(59), V(47), V(53) }, + { V(43), V(-17), V(128), V(39), V(26), V(-17), V(15) }, + { V(-9), V(62), V(170), V(34), V(-5), V(-20), V(-11) }, + { V(-27), V(-19), V(106), V(10), V(2), V(-13), V(-24) } }; // KingOnFile[semi-open Us][semi-open Them] contains bonuses/penalties // for king when the king is on a semi-open or open file. - constexpr Score KingOnFile[2][2] = {{ S(-21,10), S(-7, 1) }, - { S( 0,-3), S( 9,-4) }}; + constexpr Score KingOnFile[2][2] = {{ S(-18,11), S(-6,-3) }, + { S( 0, 0), S( 5,-4) }}; #undef S #undef V diff --git a/DroidFishApp/src/main/cpp/stockfish/pawns.h b/DroidFishApp/src/main/cpp/stockfish/pawns.h index 124619d..af0370f 100644 --- a/DroidFishApp/src/main/cpp/stockfish/pawns.h +++ b/DroidFishApp/src/main/cpp/stockfish/pawns.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/position.cpp b/DroidFishApp/src/main/cpp/stockfish/position.cpp index ba015d3..ec9229e 100644 --- a/DroidFishApp/src/main/cpp/stockfish/position.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/position.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1013,9 +1013,9 @@ void Position::do_null_move(StateInfo& newSt) { } st->key ^= Zobrist::side; + ++st->rule50; prefetch(TT.first_entry(key())); - ++st->rule50; st->pliesFromNull = 0; sideToMove = ~sideToMove; @@ -1080,8 +1080,9 @@ bool Position::see_ge(Move m, Value threshold) const { if (swap <= 0) return true; + assert(color_of(piece_on(from)) == sideToMove); Bitboard occupied = pieces() ^ from ^ to; - Color stm = color_of(piece_on(from)); + Color stm = sideToMove; Bitboard attackers = attackers_to(to, occupied); Bitboard stmAttackers, bb; int res = 1; diff --git a/DroidFishApp/src/main/cpp/stockfish/position.h b/DroidFishApp/src/main/cpp/stockfish/position.h index 9f694a7..e558581 100644 --- a/DroidFishApp/src/main/cpp/stockfish/position.h +++ b/DroidFishApp/src/main/cpp/stockfish/position.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -120,12 +120,12 @@ public: Bitboard attackers_to(Square s) const; Bitboard attackers_to(Square s, Bitboard occupied) const; Bitboard slider_blockers(Bitboard sliders, Square s, Bitboard& pinners) const; + template Bitboard attacks_by(Color c) const; // Properties of moves bool legal(Move m) const; bool pseudo_legal(const Move m) const; bool capture(Move m) const; - bool capture_or_promotion(Move m) const; bool gives_check(Move m) const; Piece moved_piece(Move m) const; Piece captured_piece() const; @@ -285,6 +285,22 @@ inline Bitboard Position::attackers_to(Square s) const { return attackers_to(s, pieces()); } +template +inline Bitboard Position::attacks_by(Color c) const { + + if constexpr (Pt == PAWN) + return c == WHITE ? pawn_attacks_bb(pieces(WHITE, PAWN)) + : pawn_attacks_bb(pieces(BLACK, PAWN)); + else + { + Bitboard threats = 0; + Bitboard attackers = pieces(c, Pt); + while (attackers) + threats |= attacks_bb(pop_lsb(attackers), pieces()); + return threats; + } +} + inline Bitboard Position::checkers() const { return st->checkersBB; } @@ -352,11 +368,6 @@ inline bool Position::is_chess960() const { return chess960; } -inline bool Position::capture_or_promotion(Move m) const { - assert(is_ok(m)); - return type_of(m) != NORMAL ? type_of(m) != CASTLING : !empty(to_sq(m)); -} - inline bool Position::capture(Move m) const { assert(is_ok(m)); // Castling is encoded as "king captures rook" diff --git a/DroidFishApp/src/main/cpp/stockfish/psqt.cpp b/DroidFishApp/src/main/cpp/stockfish/psqt.cpp index 33a3e00..ca5664c 100644 --- a/DroidFishApp/src/main/cpp/stockfish/psqt.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/psqt.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/psqt.h b/DroidFishApp/src/main/cpp/stockfish/psqt.h index 7abb148..4ee0e37 100644 --- a/DroidFishApp/src/main/cpp/stockfish/psqt.h +++ b/DroidFishApp/src/main/cpp/stockfish/psqt.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/search.cpp b/DroidFishApp/src/main/cpp/stockfish/search.cpp index a413bd3..49d7c5c 100644 --- a/DroidFishApp/src/main/cpp/stockfish/search.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/search.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -61,20 +61,17 @@ namespace { // Different node types, used as a template parameter enum NodeType { NonPV, PV, Root }; - constexpr uint64_t TtHitAverageWindow = 4096; - constexpr uint64_t TtHitAverageResolution = 1024; - // Futility margin Value futility_margin(Depth d, bool improving) { - return Value(214 * (d - improving)); + return Value(168 * (d - improving)); } // Reductions lookup table, initialized at startup int Reductions[MAX_MOVES]; // [depth or moveNumber] - Depth reduction(bool i, Depth d, int mn) { + Depth reduction(bool i, Depth d, int mn, Value delta, Value rootDelta) { int r = Reductions[d] * Reductions[mn]; - return (r + 534) / 1024 + (!i && r > 904); + return (r + 1463 - int(delta) * 1024 / int(rootDelta)) / 1024 + (!i && r > 1010); } constexpr int futility_move_count(bool improving, Depth depth) { @@ -83,7 +80,7 @@ namespace { // History and stats update bonus, based on depth int stat_bonus(Depth d) { - return d > 14 ? 73 : 6 * d * d + 229 * d - 215; + return std::min((9 * d + 270) * d - 311 , 2145); } // Add a small random component to draw evaluations to avoid 3-fold blindness @@ -91,14 +88,22 @@ namespace { return VALUE_DRAW + Value(2 * (thisThread->nodes & 1) - 1); } - // Skill structure is used to implement strength limit + // Skill structure is used to implement strength limit. If we have an uci_elo then + // we convert it to a suitable fractional skill level using anchoring to CCRL Elo + // (goldfish 1.13 = 2000) and a fit through Ordo derived Elo for match (TC 60+0.6) + // results spanning a wide range of k values. struct Skill { - explicit Skill(int l) : level(l) {} - bool enabled() const { return level < 20; } - bool time_to_pick(Depth depth) const { return depth == 1 + level; } + Skill(int skill_level, int uci_elo) { + if (uci_elo) + level = std::clamp(std::pow((uci_elo - 1346.6) / 143.4, 1 / 0.806), 0.0, 20.0); + else + level = double(skill_level); + } + bool enabled() const { return level < 20.0; } + bool time_to_pick(Depth depth) const { return depth == 1 + int(level); } Move pick_best(size_t multiPV); - int level; + double level; Move best = MOVE_NONE; }; @@ -112,7 +117,7 @@ namespace { Value value_from_tt(Value v, int ply, int r50c); void update_pv(Move* pv, Move move, Move* childPv); void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus); - void update_quiet_stats(const Position& pos, Stack* ss, Move move, int bonus, int depth); + void update_quiet_stats(const Position& pos, Stack* ss, Move move, int bonus); void update_all_stats(const Position& pos, Stack* ss, Move bestMove, Value bestValue, Value beta, Square prevSq, Move* quietsSearched, int quietCount, Move* capturesSearched, int captureCount, Depth depth); @@ -152,7 +157,7 @@ namespace { void Search::init() { for (int i = 1; i < MAX_MOVES; ++i) - Reductions[i] = int(21.9 * std::log(i)); + Reductions[i] = int((20.81 + std::log(Threads.size()) / 2) * std::log(i)); } @@ -222,14 +227,16 @@ void MainThread::search() { Time.availableNodes += Limits.inc[us] - Threads.nodes_searched(); Thread* bestThread = this; + Skill skill = Skill(Options["Skill Level"], Options["UCI_LimitStrength"] ? int(Options["UCI_Elo"]) : 0); if ( int(Options["MultiPV"]) == 1 && !Limits.depth - && !(Skill(Options["Skill Level"]).enabled() || int(Options["UCI_LimitStrength"])) + && !skill.enabled() && rootMoves[0].pv[0] != MOVE_NONE) bestThread = Threads.get_best_thread(); bestPreviousScore = bestThread->rootMoves[0].score; + bestPreviousAverageScore = bestThread->rootMoves[0].averageScore; // Send again PV info if we have a new best thread if (bestThread != this) @@ -256,7 +263,7 @@ void Thread::search() { // The latter is needed for statScore and killer initialization. Stack stack[MAX_PLY+10], *ss = stack+7; Move pv[MAX_PLY+1]; - Value bestValue, alpha, beta, delta; + Value alpha, beta, delta; Move lastBestMove = MOVE_NONE; Depth lastBestMoveDepth = 0; MainThread* mainThread = (this == Threads.main() ? Threads.main() : nullptr); @@ -286,23 +293,8 @@ void Thread::search() { mainThread->iterValue[i] = mainThread->bestPreviousScore; } - std::copy(&lowPlyHistory[2][0], &lowPlyHistory.back().back() + 1, &lowPlyHistory[0][0]); - std::fill(&lowPlyHistory[MAX_LPH - 2][0], &lowPlyHistory.back().back() + 1, 0); - size_t multiPV = size_t(Options["MultiPV"]); - - // Pick integer skill levels, but non-deterministically round up or down - // such that the average integer skill corresponds to the input floating point one. - // UCI_Elo is converted to a suitable fractional skill level, using anchoring - // to CCRL Elo (goldfish 1.13 = 2000) and a fit through Ordo derived Elo - // for match (TC 60+0.6) results spanning a wide range of k values. - PRNG rng(now()); - double floatLevel = Options["UCI_LimitStrength"] ? - std::clamp(std::pow((Options["UCI_Elo"] - 1346.6) / 143.4, 1 / 0.806), 0.0, 20.0) : - double(Options["Skill Level"]); - int intLevel = int(floatLevel) + - ((floatLevel - int(floatLevel)) * 1024 > rng.rand() % 1024 ? 1 : 0); - Skill skill(intLevel); + Skill skill(Options["Skill Level"], Options["UCI_LimitStrength"] ? int(Options["UCI_Elo"]) : 0); // When playing with strength handicap enable MultiPV search that we will // use behind the scenes to retrieve a set of possible moves. @@ -310,9 +302,12 @@ void Thread::search() { multiPV = std::max(multiPV, (size_t)4); multiPV = std::min(multiPV, rootMoves.size()); - ttHitAverage = TtHitAverageWindow * TtHitAverageResolution / 2; - trend = SCORE_ZERO; + complexityAverage.set(202, 1); + + trend = SCORE_ZERO; + optimism[ us] = Value(39); + optimism[~us] = -optimism[us]; int searchAgainCounter = 0; @@ -353,16 +348,19 @@ void Thread::search() { // Reset aspiration window starting size if (rootDepth >= 4) { - Value prev = rootMoves[pvIdx].previousScore; - delta = Value(17); + Value prev = rootMoves[pvIdx].averageScore; + delta = Value(16) + int(prev) * prev / 19178; alpha = std::max(prev - delta,-VALUE_INFINITE); beta = std::min(prev + delta, VALUE_INFINITE); - // Adjust trend based on root move's previousScore (dynamic contempt) - int tr = 113 * prev / (abs(prev) + 147); - + // Adjust trend and optimism based on root move's previousScore + int tr = sigmoid(prev, 3, 8, 90, 125, 1); trend = (us == WHITE ? make_score(tr, tr / 2) : -make_score(tr, tr / 2)); + + int opt = sigmoid(prev, 8, 17, 144, 13966, 183); + optimism[ us] = Value(opt); + optimism[~us] = -optimism[us]; } // Start with a small aspiration window and, in the case of a fail @@ -415,7 +413,7 @@ void Thread::search() { else break; - delta += delta / 4 + 5; + delta += delta / 4 + 2; assert(alpha >= -VALUE_INFINITE && beta <= VALUE_INFINITE); } @@ -449,28 +447,31 @@ void Thread::search() { if (skill.enabled() && skill.time_to_pick(rootDepth)) skill.pick_best(multiPV); + // Use part of the gained time from a previous stable move for the current move + for (Thread* th : Threads) + { + totBestMoveChanges += th->bestMoveChanges; + th->bestMoveChanges = 0; + } + // Do we have time for the next iteration? Can we stop searching now? if ( Limits.use_time_management() && !Threads.stop && !mainThread->stopOnPonderhit) { - double fallingEval = (318 + 6 * (mainThread->bestPreviousScore - bestValue) - + 6 * (mainThread->iterValue[iterIdx] - bestValue)) / 825.0; + double fallingEval = (69 + 12 * (mainThread->bestPreviousAverageScore - bestValue) + + 6 * (mainThread->iterValue[iterIdx] - bestValue)) / 781.4; fallingEval = std::clamp(fallingEval, 0.5, 1.5); // If the bestMove is stable over several iterations, reduce time accordingly - timeReduction = lastBestMoveDepth + 9 < completedDepth ? 1.92 : 0.95; - double reduction = (1.47 + mainThread->previousTimeReduction) / (2.32 * timeReduction); - - // Use part of the gained time from a previous stable move for the current move - for (Thread* th : Threads) - { - totBestMoveChanges += th->bestMoveChanges; - th->bestMoveChanges = 0; - } + timeReduction = lastBestMoveDepth + 10 < completedDepth ? 1.63 : 0.73; + double reduction = (1.56 + mainThread->previousTimeReduction) / (2.20 * timeReduction); double bestMoveInstability = 1.073 + std::max(1.0, 2.25 - 9.9 / rootDepth) * totBestMoveChanges / Threads.size(); - double totalTime = Time.optimum() * fallingEval * reduction * bestMoveInstability; + int complexity = mainThread->complexityAverage.value(); + double complexPosition = std::clamp(1.0 + (complexity - 326) / 1618.1, 0.5, 1.5); + + double totalTime = Time.optimum() * fallingEval * reduction * bestMoveInstability * complexPosition; // Cap used time in case of a single legal move for a better viewer experience in tournaments // yielding correct scores and sufficiently fast moves. @@ -489,7 +490,7 @@ void Thread::search() { } else if ( Threads.increaseDepth && !mainThread->ponder - && Time.elapsed() > totalTime * 0.58) + && Time.elapsed() > totalTime * 0.43) Threads.increaseDepth = false; else Threads.increaseDepth = true; @@ -553,17 +554,17 @@ namespace { Depth extension, newDepth; Value bestValue, value, ttValue, eval, maxValue, probCutBeta; bool givesCheck, improving, didLMR, priorCapture; - bool captureOrPromotion, doFullDepthSearch, moveCountPruning, - ttCapture, singularQuietLMR; + bool capture, doFullDepthSearch, moveCountPruning, ttCapture; Piece movedPiece; - int moveCount, captureCount, quietCount; + int moveCount, captureCount, quietCount, bestMoveCount, improvement, complexity; // Step 1. Initialize node Thread* thisThread = pos.this_thread(); + thisThread->depth = depth; ss->inCheck = pos.checkers(); priorCapture = pos.captured_piece(); Color us = pos.side_to_move(); - moveCount = captureCount = quietCount = ss->moveCount = 0; + moveCount = bestMoveCount = captureCount = quietCount = ss->moveCount = 0; bestValue = -VALUE_INFINITE; maxValue = VALUE_INFINITE; @@ -595,6 +596,8 @@ namespace { if (alpha >= beta) return alpha; } + else + thisThread->rootDelta = beta - alpha; assert(0 <= ss->ply && ss->ply < MAX_PLY); @@ -602,6 +605,7 @@ namespace { (ss+1)->excludedMove = bestMove = MOVE_NONE; (ss+2)->killers[0] = (ss+2)->killers[1] = MOVE_NONE; ss->doubleExtensions = (ss-1)->doubleExtensions; + ss->depth = depth; Square prevSq = to_sq((ss-1)->currentMove); // Initialize statScore to zero for the grandchildren of the current position. @@ -621,44 +625,33 @@ namespace { ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; ttMove = rootNode ? thisThread->rootMoves[thisThread->pvIdx].pv[0] : ss->ttHit ? tte->move() : MOVE_NONE; + ttCapture = ttMove && pos.capture(ttMove); if (!excludedMove) ss->ttPv = PvNode || (ss->ttHit && tte->is_pv()); - // Update low ply history for previous move if we are near root and position is or has been in PV - if ( ss->ttPv - && depth > 12 - && ss->ply - 1 < MAX_LPH - && !priorCapture - && is_ok((ss-1)->currentMove)) - thisThread->lowPlyHistory[ss->ply - 1][from_to((ss-1)->currentMove)] << stat_bonus(depth - 5); - - // thisThread->ttHitAverage can be used to approximate the running average of ttHit - thisThread->ttHitAverage = (TtHitAverageWindow - 1) * thisThread->ttHitAverage / TtHitAverageWindow - + TtHitAverageResolution * ss->ttHit; - // At non-PV nodes we check for an early TT cutoff if ( !PvNode && ss->ttHit - && tte->depth() >= depth + && tte->depth() > depth - (thisThread->id() % 2 == 1) && ttValue != VALUE_NONE // Possible in case of TT access race && (ttValue >= beta ? (tte->bound() & BOUND_LOWER) : (tte->bound() & BOUND_UPPER))) { - // If ttMove is quiet, update move sorting heuristics on TT hit + // If ttMove is quiet, update move sorting heuristics on TT hit (~1 Elo) if (ttMove) { if (ttValue >= beta) { - // Bonus for a quiet ttMove that fails high - if (!pos.capture_or_promotion(ttMove)) - update_quiet_stats(pos, ss, ttMove, stat_bonus(depth), depth); + // Bonus for a quiet ttMove that fails high (~3 Elo) + if (!ttCapture) + update_quiet_stats(pos, ss, ttMove, stat_bonus(depth)); - // Extra penalty for early quiet moves of the previous ply + // Extra penalty for early quiet moves of the previous ply (~0 Elo) if ((ss-1)->moveCount <= 2 && !priorCapture) update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, -stat_bonus(depth + 1)); } - // Penalty for a quiet ttMove that fails low - else if (!pos.capture_or_promotion(ttMove)) + // Penalty for a quiet ttMove that fails low (~1 Elo) + else if (!ttCapture) { int penalty = -stat_bonus(depth); thisThread->mainHistory[us][from_to(ttMove)] << penalty; @@ -732,6 +725,8 @@ namespace { // Skip early pruning when in check ss->staticEval = eval = VALUE_NONE; improving = false; + improvement = 0; + complexity = 0; goto moves_loop; } else if (ss->ttHit) @@ -745,61 +740,76 @@ namespace { if (eval == VALUE_DRAW) eval = value_draw(thisThread); - // Can ttValue be used as a better position evaluation? + // ttValue can be used as a better position evaluation (~4 Elo) if ( ttValue != VALUE_NONE && (tte->bound() & (ttValue > eval ? BOUND_LOWER : BOUND_UPPER))) eval = ttValue; } else { - // In case of null move search use previous static eval with a different sign - // and addition of two tempos - if ((ss-1)->currentMove != MOVE_NULL) - ss->staticEval = eval = evaluate(pos); - else - ss->staticEval = eval = -(ss-1)->staticEval; + ss->staticEval = eval = evaluate(pos); // Save static evaluation into transposition table - tte->save(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval); + if (!excludedMove) + tte->save(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval); } - // Use static evaluation difference to improve quiet move ordering + // Use static evaluation difference to improve quiet move ordering (~3 Elo) if (is_ok((ss-1)->currentMove) && !(ss-1)->inCheck && !priorCapture) { - int bonus = std::clamp(-depth * 4 * int((ss-1)->staticEval + ss->staticEval), -1000, 1000); + int bonus = std::clamp(-16 * int((ss-1)->staticEval + ss->staticEval), -2000, 2000); thisThread->mainHistory[~us][from_to((ss-1)->currentMove)] << bonus; } - // Set up improving flag that is used in various pruning heuristics - // We define position as improving if static evaluation of position is better - // Than the previous static evaluation at our turn - // In case of us being in check at our previous move we look at move prior to it - improving = (ss-2)->staticEval == VALUE_NONE - ? ss->staticEval > (ss-4)->staticEval || (ss-4)->staticEval == VALUE_NONE - : ss->staticEval > (ss-2)->staticEval; + // Set up the improvement variable, which is the difference between the current + // static evaluation and the previous static evaluation at our turn (if we were + // in check at our previous move we look at the move prior to it). The improvement + // margin and the improving flag are used in various pruning heuristics. + improvement = (ss-2)->staticEval != VALUE_NONE ? ss->staticEval - (ss-2)->staticEval + : (ss-4)->staticEval != VALUE_NONE ? ss->staticEval - (ss-4)->staticEval + : 175; - // Step 7. Futility pruning: child node (~50 Elo) + improving = improvement > 0; + complexity = abs(ss->staticEval - (us == WHITE ? eg_value(pos.psq_score()) : -eg_value(pos.psq_score()))); + + thisThread->complexityAverage.update(complexity); + + // Step 7. Razoring. + // If eval is really low check with qsearch if it can exceed alpha, if it can't, + // return a fail low. if ( !PvNode - && depth < 9 - && eval - futility_margin(depth, improving) >= beta - && eval < VALUE_KNOWN_WIN) // Do not return unproven wins + && depth <= 7 + && eval < alpha - 348 - 258 * depth * depth) + { + value = qsearch(pos, ss, alpha - 1, alpha); + if (value < alpha) + return value; + } + + // Step 8. Futility pruning: child node (~25 Elo). + // The depth condition is important for mate finding. + if ( !ss->ttPv + && depth < 8 + && eval - futility_margin(depth, improving) - (ss-1)->statScore / 256 >= beta + && eval >= beta + && eval < 26305) // larger than VALUE_KNOWN_WIN, but smaller than TB wins. return eval; - // Step 8. Null move search with verification search (~40 Elo) + // Step 9. Null move search with verification search (~22 Elo) if ( !PvNode && (ss-1)->currentMove != MOVE_NULL - && (ss-1)->statScore < 23767 + && (ss-1)->statScore < 14695 && eval >= beta && eval >= ss->staticEval - && ss->staticEval >= beta - 20 * depth - 22 * improving + 168 * ss->ttPv + 159 + && ss->staticEval >= beta - 15 * depth - improvement / 15 + 198 + complexity / 28 && !excludedMove && pos.non_pawn_material(us) && (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor)) { assert(eval - beta >= 0); - // Null move dynamic reduction based on depth and value - Depth R = (1090 + 81 * depth) / 256 + std::min(int(eval - beta) / 205, 3); + // Null move dynamic reduction based on depth, eval and complexity of position + Depth R = std::min(int(eval - beta) / 147, 5) + depth / 3 + 4 - (complexity > 753); ss->currentMove = MOVE_NULL; ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0]; @@ -835,9 +845,9 @@ namespace { } } - probCutBeta = beta + 209 - 44 * improving; + probCutBeta = beta + 179 - 46 * improving; - // Step 9. ProbCut (~4 Elo) + // Step 10. ProbCut (~4 Elo) // If we have a good enough capture and a reduced search returns a value // much above beta, we can (almost) safely prune the previous move. if ( !PvNode @@ -854,20 +864,17 @@ namespace { { assert(probCutBeta < VALUE_INFINITE); - MovePicker mp(pos, ttMove, probCutBeta - ss->staticEval, &captureHistory); - int probCutCount = 0; + MovePicker mp(pos, ttMove, probCutBeta - ss->staticEval, depth - 3, &captureHistory); bool ttPv = ss->ttPv; + bool captureOrPromotion; ss->ttPv = false; - while ( (move = mp.next_move()) != MOVE_NONE - && probCutCount < 2 + 2 * cutNode) + while ((move = mp.next_move()) != MOVE_NONE) if (move != excludedMove && pos.legal(move)) { - assert(pos.capture_or_promotion(move)); - assert(depth >= 5); + assert(pos.capture(move) || promotion_type(move) == QUEEN); captureOrPromotion = true; - probCutCount++; ss->currentMove = move; ss->continuationHistory = &thisThread->continuationHistory[ss->inCheck] @@ -901,21 +908,24 @@ namespace { ss->ttPv = ttPv; } - // Step 10. If the position is not in TT, decrease depth by 2 + // Step 11. If the position is not in TT, decrease depth by 2 or 1 depending on node type (~3 Elo) if ( PvNode - && depth >= 6 + && depth >= 3 && !ttMove) depth -= 2; -moves_loop: // When in check, search starts from here + if ( cutNode + && depth >= 8 + && !ttMove) + depth--; - ttCapture = ttMove && pos.capture_or_promotion(ttMove); +moves_loop: // When in check, search starts here - // Step 11. A small Probcut idea, when we are in check - probCutBeta = beta + 409; + // Step 12. A small Probcut idea, when we are in check (~0 Elo) + probCutBeta = beta + 481; if ( ss->inCheck && !PvNode - && depth >= 4 + && depth >= 2 && ttCapture && (tte->bound() & BOUND_LOWER) && tte->depth() >= depth - 3 @@ -933,16 +943,13 @@ moves_loop: // When in check, search starts from here Move countermove = thisThread->counterMoves[pos.piece_on(prevSq)][prevSq]; MovePicker mp(pos, ttMove, depth, &thisThread->mainHistory, - &thisThread->lowPlyHistory, &captureHistory, contHist, countermove, - ss->killers, - ss->ply); + ss->killers); value = bestValue; - singularQuietLMR = moveCountPruning = false; - bool doubleExtension = false; + moveCountPruning = false; // Indicate PvNodes that will probably fail low if the node was searched // at a depth equal or greater than the current depth, and the result of this search was a fail low. @@ -951,7 +958,7 @@ moves_loop: // When in check, search starts from here && (tte->bound() & BOUND_UPPER) && tte->depth() >= depth; - // Step 12. Loop through all pseudo-legal moves until no moves remain + // Step 13. Loop through all pseudo-legal moves until no moves remain // or a beta cutoff occurs. while ((move = mp.next_move(moveCountPruning)) != MOVE_NONE) { @@ -982,123 +989,130 @@ moves_loop: // When in check, search starts from here (ss+1)->pv = nullptr; extension = 0; - captureOrPromotion = pos.capture_or_promotion(move); + capture = pos.capture(move); movedPiece = pos.moved_piece(move); givesCheck = pos.gives_check(move); // Calculate new depth for this move newDepth = depth - 1; - // Step 13. Pruning at shallow depth (~200 Elo) + Value delta = beta - alpha; + + // Step 14. Pruning at shallow depth (~98 Elo). Depth conditions are important for mate finding. if ( !rootNode && pos.non_pawn_material(us) && bestValue > VALUE_TB_LOSS_IN_MAX_PLY) { - // Skip quiet moves if movecount exceeds our FutilityMoveCount threshold + // Skip quiet moves if movecount exceeds our FutilityMoveCount threshold (~7 Elo) moveCountPruning = moveCount >= futility_move_count(improving, depth); // Reduced depth of the next LMR search - int lmrDepth = std::max(newDepth - reduction(improving, depth, moveCount), 0); + int lmrDepth = std::max(newDepth - reduction(improving, depth, moveCount, delta, thisThread->rootDelta), 0); - if ( captureOrPromotion + if ( capture || givesCheck) { - // Capture history based pruning when the move doesn't give check - if ( !givesCheck - && lmrDepth < 1 - && captureHistory[movedPiece][to_sq(move)][type_of(pos.piece_on(to_sq(move)))] < 0) + // Futility pruning for captures (~0 Elo) + if ( !pos.empty(to_sq(move)) + && !givesCheck + && !PvNode + && lmrDepth < 6 + && !ss->inCheck + && ss->staticEval + 281 + 179 * lmrDepth + PieceValue[EG][pos.piece_on(to_sq(move))] + + captureHistory[movedPiece][to_sq(move)][type_of(pos.piece_on(to_sq(move)))] / 6 < alpha) continue; - // SEE based pruning - if (!pos.see_ge(move, Value(-218) * depth)) // (~25 Elo) + // SEE based pruning (~9 Elo) + if (!pos.see_ge(move, Value(-203) * depth)) continue; } else { - // Continuation history based pruning (~20 Elo) + int history = (*contHist[0])[movedPiece][to_sq(move)] + + (*contHist[1])[movedPiece][to_sq(move)] + + (*contHist[3])[movedPiece][to_sq(move)]; + + // Continuation history based pruning (~2 Elo) if ( lmrDepth < 5 - && (*contHist[0])[movedPiece][to_sq(move)] < CounterMovePruneThreshold - && (*contHist[1])[movedPiece][to_sq(move)] < CounterMovePruneThreshold) + && history < -3875 * (depth - 1)) continue; - // Futility pruning: parent node (~5 Elo) - if ( lmrDepth < 7 - && !ss->inCheck - && ss->staticEval + 174 + 157 * lmrDepth <= alpha - && (*contHist[0])[movedPiece][to_sq(move)] - + (*contHist[1])[movedPiece][to_sq(move)] - + (*contHist[3])[movedPiece][to_sq(move)] - + (*contHist[5])[movedPiece][to_sq(move)] / 3 < 28255) + history += thisThread->mainHistory[us][from_to(move)]; + + // Futility pruning: parent node (~9 Elo) + if ( !ss->inCheck + && lmrDepth < 11 + && ss->staticEval + 122 + 138 * lmrDepth + history / 60 <= alpha) continue; - // Prune moves with negative SEE (~20 Elo) - if (!pos.see_ge(move, Value(-(30 - std::min(lmrDepth, 18)) * lmrDepth * lmrDepth))) + // Prune moves with negative SEE (~3 Elo) + if (!pos.see_ge(move, Value(-25 * lmrDepth * lmrDepth - 20 * lmrDepth))) continue; } } - // Step 14. Extensions (~75 Elo) - - // Singular extension search (~70 Elo). If all moves but one fail low on a - // search of (alpha-s, beta-s), and just one fails high on (alpha, beta), - // then that move is singular and should be extended. To verify this we do - // a reduced search on all the other moves but the ttMove and if the - // result is lower than ttValue minus a margin, then we will extend the ttMove. - if ( !rootNode - && depth >= 7 - && move == ttMove - && !excludedMove // Avoid recursive singular search - /* && ttValue != VALUE_NONE Already implicit in the next condition */ - && abs(ttValue) < VALUE_KNOWN_WIN - && (tte->bound() & BOUND_LOWER) - && tte->depth() >= depth - 3) + // Step 15. Extensions (~66 Elo) + // We take care to not overdo to avoid search getting stuck. + if (ss->ply < thisThread->rootDepth * 2) { - Value singularBeta = ttValue - 2 * depth; - Depth singularDepth = (depth - 1) / 2; - - ss->excludedMove = move; - value = search(pos, ss, singularBeta - 1, singularBeta, singularDepth, cutNode); - ss->excludedMove = MOVE_NONE; - - if (value < singularBeta) + // Singular extension search (~58 Elo). If all moves but one fail low on a + // search of (alpha-s, beta-s), and just one fails high on (alpha, beta), + // then that move is singular and should be extended. To verify this we do + // a reduced search on all the other moves but the ttMove and if the + // result is lower than ttValue minus a margin, then we will extend the ttMove. + if ( !rootNode + && depth >= 4 + 2 * (PvNode && tte->is_pv()) + && move == ttMove + && !excludedMove // Avoid recursive singular search + /* && ttValue != VALUE_NONE Already implicit in the next condition */ + && abs(ttValue) < VALUE_KNOWN_WIN + && (tte->bound() & BOUND_LOWER) + && tte->depth() >= depth - 3) { - extension = 1; - singularQuietLMR = !ttCapture; + Value singularBeta = ttValue - 3 * depth; + Depth singularDepth = (depth - 1) / 2; - // Avoid search explosion by limiting the number of double extensions to at most 3 - if ( !PvNode - && value < singularBeta - 93 - && ss->doubleExtensions < 3) - { - extension = 2; - doubleExtension = true; - } - } - - // Multi-cut pruning - // Our ttMove is assumed to fail high, and now we failed high also on a reduced - // search without the ttMove. So we assume this expected Cut-node is not singular, - // that multiple moves fail high, and we can prune the whole subtree by returning - // a soft bound. - else if (singularBeta >= beta) - return singularBeta; - - // If the eval of ttMove is greater than beta we try also if there is another - // move that pushes it over beta, if so also produce a cutoff. - else if (ttValue >= beta) - { ss->excludedMove = move; - value = search(pos, ss, beta - 1, beta, (depth + 3) / 2, cutNode); + value = search(pos, ss, singularBeta - 1, singularBeta, singularDepth, cutNode); ss->excludedMove = MOVE_NONE; - if (value >= beta) - return beta; + if (value < singularBeta) + { + extension = 1; + + // Avoid search explosion by limiting the number of double extensions + if ( !PvNode + && value < singularBeta - 26 + && ss->doubleExtensions <= 8) + extension = 2; + } + + // Multi-cut pruning + // Our ttMove is assumed to fail high, and now we failed high also on a reduced + // search without the ttMove. So we assume this expected Cut-node is not singular, + // that multiple moves fail high, and we can prune the whole subtree by returning + // a soft bound. + else if (singularBeta >= beta) + return singularBeta; + + // If the eval of ttMove is greater than beta, we reduce it (negative extension) + else if (ttValue >= beta) + extension = -2; } + + // Check extensions (~1 Elo) + else if ( givesCheck + && depth > 9 + && abs(ss->staticEval) > 71) + extension = 1; + + // Quiet ttMove extensions (~0 Elo) + else if ( PvNode + && move == ttMove + && move == ss->killers[0] + && (*contHist[0])[movedPiece][to_sq(move)] >= 5491) + extension = 1; } - else if ( givesCheck - && depth > 6 - && abs(ss->staticEval) > Value(100)) - extension = 1; // Add extension to new depth newDepth += extension; @@ -1110,31 +1124,30 @@ moves_loop: // When in check, search starts from here // Update the current move (this must be done after singular extension search) ss->currentMove = move; ss->continuationHistory = &thisThread->continuationHistory[ss->inCheck] - [captureOrPromotion] + [capture] [movedPiece] [to_sq(move)]; - // Step 15. Make the move + // Step 16. Make the move pos.do_move(move, st, givesCheck); - // Step 16. Late moves reduction / extension (LMR, ~200 Elo) + bool doDeeperSearch = false; + + // Step 17. Late moves reduction / extension (LMR, ~98 Elo) // We use various heuristics for the sons of a node after the first son has // been searched. In general we would like to reduce them, but there are many // cases where we extend a son if it has good chances to be "interesting". - if ( depth >= 3 - && moveCount > 1 + 2 * rootNode - && ( !captureOrPromotion - || (cutNode && (ss-1)->moveCount > 1) - || !ss->ttPv) - && (!PvNode || ss->ply > 1 || thisThread->id() % 4 != 3)) + if ( depth >= 2 + && moveCount > 1 + (PvNode && ss->ply <= 1) + && ( !ss->ttPv + || !capture + || (cutNode && (ss-1)->moveCount > 1))) { - Depth r = reduction(improving, depth, moveCount); + Depth r = reduction(improving, depth, moveCount, delta, thisThread->rootDelta); - if (PvNode) - r--; - - // Decrease reduction if the ttHit running average is large (~0 Elo) - if (thisThread->ttHitAverage > 537 * TtHitAverageResolution * TtHitAverageWindow / 1024) + // Decrease reduction at some PvNodes (~2 Elo) + if ( PvNode + && bestMoveCount <= 3) r--; // Decrease reduction if position is or has been on the PV @@ -1143,49 +1156,52 @@ moves_loop: // When in check, search starts from here && !likelyFailLow) r -= 2; - // Increase reduction at root and non-PV nodes when the best move does not change frequently - if ( (rootNode || !PvNode) - && thisThread->bestMoveChanges <= 2) - r++; - // Decrease reduction if opponent's move count is high (~1 Elo) - if ((ss-1)->moveCount > 13) - r--; - - // Decrease reduction if ttMove has been singularly extended (~1 Elo) - if (singularQuietLMR) + if ((ss-1)->moveCount > 7) r--; // Increase reduction for cut nodes (~3 Elo) - if (cutNode) - r += 1 + !captureOrPromotion; + if (cutNode && move != ss->killers[0]) + r += 2; - if (!captureOrPromotion) - { - // Increase reduction if ttMove is a capture (~3 Elo) - if (ttCapture) - r++; + // Increase reduction if ttMove is a capture (~3 Elo) + if (ttCapture) + r++; - ss->statScore = thisThread->mainHistory[us][from_to(move)] - + (*contHist[0])[movedPiece][to_sq(move)] - + (*contHist[1])[movedPiece][to_sq(move)] - + (*contHist[3])[movedPiece][to_sq(move)] - - 4923; + // Decrease reduction at PvNodes if bestvalue + // is vastly different from static evaluation + if (PvNode && !ss->inCheck && abs(ss->staticEval - bestValue) > 250) + r--; - // Decrease/increase reduction for moves with a good/bad history (~30 Elo) - if (!ss->inCheck) - r -= ss->statScore / 14721; - } + // Increase depth based reduction if PvNode + if (PvNode) + r -= 15 / ( 3 + depth ); - // In general we want to cap the LMR depth search at newDepth. But if - // reductions are really negative and movecount is low, we allow this move - // to be searched deeper than the first move, unless ttMove was extended by 2. - Depth d = std::clamp(newDepth - r, 1, newDepth + (r < -1 && moveCount <= 5 && !doubleExtension)); + ss->statScore = thisThread->mainHistory[us][from_to(move)] + + (*contHist[0])[movedPiece][to_sq(move)] + + (*contHist[1])[movedPiece][to_sq(move)] + + (*contHist[3])[movedPiece][to_sq(move)] + - 4334; + + // Decrease/increase reduction for moves with a good/bad history (~30 Elo) + r -= ss->statScore / 15914; + + // In general we want to cap the LMR depth search at newDepth. But if reductions + // are really negative and movecount is low, we allow this move to be searched + // deeper than the first move (this may lead to hidden double extensions). + int deeper = r >= -1 ? 0 + : moveCount <= 4 ? 2 + : PvNode && depth > 4 ? 1 + : cutNode && moveCount <= 8 ? 1 + : 0; + + Depth d = std::clamp(newDepth - r, 1, newDepth + deeper); value = -search(pos, ss+1, -(alpha+1), -alpha, d, true); // If the son is reduced and fails high it will be re-searched at full depth doFullDepthSearch = value > alpha && d < newDepth; + doDeeperSearch = value > (alpha + 78 + 11 * (newDepth - d)); didLMR = true; } else @@ -1194,17 +1210,20 @@ moves_loop: // When in check, search starts from here didLMR = false; } - // Step 17. Full depth search when LMR is skipped or fails high + // Step 18. Full depth search when LMR is skipped or fails high if (doFullDepthSearch) { - value = -search(pos, ss+1, -(alpha+1), -alpha, newDepth, !cutNode); + value = -search(pos, ss+1, -(alpha+1), -alpha, newDepth + doDeeperSearch, !cutNode); // If the move passed LMR update its stats - if (didLMR && !captureOrPromotion) + if (didLMR) { int bonus = value > alpha ? stat_bonus(newDepth) : -stat_bonus(newDepth); + if (capture) + bonus /= 6; + update_continuation_histories(ss, movedPiece, to_sq(move), bonus); } } @@ -1221,12 +1240,12 @@ moves_loop: // When in check, search starts from here std::min(maxNextDepth, newDepth), false); } - // Step 18. Undo move + // Step 19. Undo move pos.undo_move(move); assert(value > -VALUE_INFINITE && value < VALUE_INFINITE); - // Step 19. Check for a new best move + // Step 20. Check for a new best move // Finished searching the move. If a stop occurred, the return value of // the search cannot be trusted, and we return immediately without // updating best move, PV and TT. @@ -1238,6 +1257,8 @@ moves_loop: // When in check, search starts from here RootMove& rm = *std::find(thisThread->rootMoves.begin(), thisThread->rootMoves.end(), move); + rm.averageScore = rm.averageScore != -VALUE_INFINITE ? (2 * value + rm.averageScore) / 3 : value; + // PV move or new best move? if (moveCount == 1 || value > alpha) { @@ -1250,9 +1271,11 @@ moves_loop: // When in check, search starts from here for (Move* m = (ss+1)->pv; *m != MOVE_NONE; ++m) rm.pv.push_back(*m); - // We record how often the best move has been changed in each - // iteration. This information is used for time management and LMR - if (moveCount > 1) + // We record how often the best move has been changed in each iteration. + // This information is used for time management. In MultiPV mode, + // we must take care to only do this for the first PV line. + if ( moveCount > 1 + && !thisThread->pvIdx) ++thisThread->bestMoveChanges; } else @@ -1274,7 +1297,10 @@ moves_loop: // When in check, search starts from here update_pv(ss->pv, move, (ss+1)->pv); if (PvNode && value < beta) // Update alpha! Always alpha < beta + { alpha = value; + bestMoveCount++; + } else { assert(value >= beta); // Fail high @@ -1286,10 +1312,10 @@ moves_loop: // When in check, search starts from here // If the move is worse than some previously searched move, remember it to update its stats later if (move != bestMove) { - if (captureOrPromotion && captureCount < 32) + if (capture && captureCount < 32) capturesSearched[captureCount++] = move; - else if (!captureOrPromotion && quietCount < 64) + else if (!capture && quietCount < 64) quietsSearched[quietCount++] = move; } } @@ -1302,7 +1328,7 @@ moves_loop: // When in check, search starts from here return VALUE_DRAW; */ - // Step 20. Check for mate and stalemate + // Step 21. Check for mate and stalemate // All legal moves have been searched and if there are no legal moves, it // must be a mate or a stalemate. If we are in a singular extension search then // return a fail low score. @@ -1320,9 +1346,17 @@ moves_loop: // When in check, search starts from here quietsSearched, quietCount, capturesSearched, captureCount, depth); // Bonus for prior countermove that caused the fail low - else if ( (depth >= 3 || PvNode) + else if ( (depth >= 4 || PvNode) && !priorCapture) - update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, stat_bonus(depth)); + { + //Assign extra bonus if current node is PvNode or cutNode + //or fail low was really bad + bool extraBonus = PvNode + || cutNode + || bestValue < alpha - 70 * depth; + + update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, stat_bonus(depth) * (1 + extraBonus)); + } if (PvNode) bestValue = std::min(bestValue, maxValue); @@ -1331,10 +1365,6 @@ moves_loop: // When in check, search starts from here // opponent move is probably good and the new position is added to the search tree. if (bestValue <= alpha) ss->ttPv = ss->ttPv || ((ss-1)->ttPv && depth > 3); - // Otherwise, a counter move has been found and if the position is the last leaf - // in the search tree, remove the position from the search tree. - else if (depth > 3) - ss->ttPv = ss->ttPv && (ss+1)->ttPv; // Write gathered information in transposition table if (!excludedMove && !(rootNode && thisThread->pvIdx)) @@ -1369,13 +1399,12 @@ moves_loop: // When in check, search starts from here Key posKey; Move ttMove, move, bestMove; Depth ttDepth; - Value bestValue, value, ttValue, futilityValue, futilityBase, oldAlpha; - bool pvHit, givesCheck, captureOrPromotion; + Value bestValue, value, ttValue, futilityValue, futilityBase; + bool pvHit, givesCheck, capture; int moveCount; if (PvNode) { - oldAlpha = alpha; // To flag BOUND_EXACT when eval above alpha and no available moves (ss+1)->pv = pv; ss->pv[0] = MOVE_NONE; } @@ -1426,14 +1455,13 @@ moves_loop: // When in check, search starts from here if ((ss->staticEval = bestValue = tte->eval()) == VALUE_NONE) ss->staticEval = bestValue = evaluate(pos); - // Can ttValue be used as a better position evaluation? + // ttValue can be used as a better position evaluation (~7 Elo) if ( ttValue != VALUE_NONE && (tte->bound() & (ttValue > bestValue ? BOUND_LOWER : BOUND_UPPER))) bestValue = ttValue; } else // In case of null move search use previous static eval with a different sign - // and addition of two tempos ss->staticEval = bestValue = (ss-1)->currentMove != MOVE_NULL ? evaluate(pos) : -(ss-1)->staticEval; @@ -1452,7 +1480,7 @@ moves_loop: // When in check, search starts from here if (PvNode && bestValue > alpha) alpha = bestValue; - futilityBase = bestValue + 155; + futilityBase = bestValue + 118; } const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory, @@ -1463,24 +1491,32 @@ moves_loop: // When in check, search starts from here // to search the moves. Because the depth is <= 0 here, only captures, // queen promotions, and other checks (only if depth >= DEPTH_QS_CHECKS) // will be generated. + Square prevSq = to_sq((ss-1)->currentMove); MovePicker mp(pos, ttMove, depth, &thisThread->mainHistory, &thisThread->captureHistory, contHist, - to_sq((ss-1)->currentMove)); + prevSq); + + int quietCheckEvasions = 0; // Loop through the moves until no moves remain or a beta cutoff occurs while ((move = mp.next_move()) != MOVE_NONE) { assert(is_ok(move)); + // Check for legality + if (!pos.legal(move)) + continue; + givesCheck = pos.gives_check(move); - captureOrPromotion = pos.capture_or_promotion(move); + capture = pos.capture(move); moveCount++; - // Futility pruning and moveCount pruning + // Futility pruning and moveCount pruning (~5 Elo) if ( bestValue > VALUE_TB_LOSS_IN_MAX_PLY && !givesCheck + && to_sq(move) != prevSq && futilityBase > -VALUE_KNOWN_WIN && type_of(move) != PROMOTION) { @@ -1503,7 +1539,7 @@ moves_loop: // When in check, search starts from here } } - // Do not search moves with negative SEE values + // Do not search moves with negative SEE values (~5 Elo) if ( bestValue > VALUE_TB_LOSS_IN_MAX_PLY && !pos.see_ge(move)) continue; @@ -1511,26 +1547,28 @@ moves_loop: // When in check, search starts from here // Speculative prefetch as early as possible prefetch(TT.first_entry(pos.key_after(move))); - // Check for legality just before making the move - if (!pos.legal(move)) - { - moveCount--; - continue; - } - ss->currentMove = move; ss->continuationHistory = &thisThread->continuationHistory[ss->inCheck] - [captureOrPromotion] + [capture] [pos.moved_piece(move)] [to_sq(move)]; - // Continuation history based pruning - if ( !captureOrPromotion + // Continuation history based pruning (~2 Elo) + if ( !capture && bestValue > VALUE_TB_LOSS_IN_MAX_PLY && (*contHist[0])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold && (*contHist[1])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold) continue; + // movecount pruning for quiet check evasions + if ( bestValue > VALUE_TB_LOSS_IN_MAX_PLY + && quietCheckEvasions > 1 + && !capture + && ss->inCheck) + continue; + + quietCheckEvasions += !capture && ss->inCheck; + // Make and search the move pos.do_move(move, st, givesCheck); value = -qsearch(pos, ss+1, -beta, -alpha, depth - 1); @@ -1569,8 +1607,7 @@ moves_loop: // When in check, search starts from here // Save gathered info in transposition table tte->save(posKey, value_to_tt(bestValue, ss->ply), pvHit, - bestValue >= beta ? BOUND_LOWER : - PvNode && bestValue > oldAlpha ? BOUND_EXACT : BOUND_UPPER, + bestValue >= beta ? BOUND_LOWER : BOUND_UPPER, ttDepth, bestMove, ss->staticEval); assert(bestValue > -VALUE_INFINITE && bestValue < VALUE_INFINITE); @@ -1646,13 +1683,13 @@ moves_loop: // When in check, search starts from here PieceType captured = type_of(pos.piece_on(to_sq(bestMove))); bonus1 = stat_bonus(depth + 1); - bonus2 = bestValue > beta + PawnValueMg ? bonus1 // larger bonus - : std::min(bonus1, stat_bonus(depth)); // smaller bonus + bonus2 = bestValue > beta + PawnValueMg ? bonus1 // larger bonus + : stat_bonus(depth); // smaller bonus - if (!pos.capture_or_promotion(bestMove)) + if (!pos.capture(bestMove)) { // Increase stats for the best move in case it was a quiet move - update_quiet_stats(pos, ss, bestMove, bonus2, depth); + update_quiet_stats(pos, ss, bestMove, bonus2); // Decrease stats for all non-best quiet moves for (int i = 0; i < quietCount; ++i) @@ -1699,7 +1736,7 @@ moves_loop: // When in check, search starts from here // update_quiet_stats() updates move sorting heuristics - void update_quiet_stats(const Position& pos, Stack* ss, Move move, int bonus, int depth) { + void update_quiet_stats(const Position& pos, Stack* ss, Move move, int bonus) { // Update killers if (ss->killers[0] != move) @@ -1713,20 +1750,12 @@ moves_loop: // When in check, search starts from here thisThread->mainHistory[us][from_to(move)] << bonus; update_continuation_histories(ss, pos.moved_piece(move), to_sq(move), bonus); - // Penalty for reversed move in case of moved piece not being a pawn - if (type_of(pos.moved_piece(move)) != PAWN) - thisThread->mainHistory[us][from_to(reverse_move(move))] << -bonus; - // Update countermove history if (is_ok((ss-1)->currentMove)) { Square prevSq = to_sq((ss-1)->currentMove); thisThread->counterMoves[pos.piece_on(prevSq)][prevSq] = move; } - - // Update low ply history - if (depth > 11 && ss->ply < MAX_LPH) - thisThread->lowPlyHistory[ss->ply][from_to(move)] << stat_bonus(depth - 7); } // When playing with strength handicap, choose best move among a set of RootMoves @@ -1740,8 +1769,8 @@ moves_loop: // When in check, search starts from here // RootMoves are already sorted by score in descending order Value topScore = rootMoves[0].score; int delta = std::min(topScore - rootMoves[multiPV - 1].score, PawnValueMg); - int weakness = 120 - 2 * level; int maxScore = -VALUE_INFINITE; + double weakness = 120 - 2 * level; // Choose best move. For each move score we add two terms, both dependent on // weakness. One is deterministic and bigger for weaker levels, and one is @@ -1749,8 +1778,8 @@ moves_loop: // When in check, search starts from here for (size_t i = 0; i < multiPV; ++i) { // This is our magic formula - int push = ( weakness * int(topScore - rootMoves[i].score) - + delta * (rng.rand() % weakness)) / 128; + int push = int(( weakness * int(topScore - rootMoves[i].score) + + delta * (rng.rand() % int(weakness))) / 128); if (rootMoves[i].score + push >= maxScore) { diff --git a/DroidFishApp/src/main/cpp/stockfish/search.h b/DroidFishApp/src/main/cpp/stockfish/search.h index 801baac..806295a 100644 --- a/DroidFishApp/src/main/cpp/stockfish/search.h +++ b/DroidFishApp/src/main/cpp/stockfish/search.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -47,6 +47,7 @@ struct Stack { Move excludedMove; Move killers[2]; Value staticEval; + Depth depth; int statScore; int moveCount; bool inCheck; @@ -72,6 +73,7 @@ struct RootMove { Value score = -VALUE_INFINITE; Value previousScore = -VALUE_INFINITE; + Value averageScore = -VALUE_INFINITE; int selDepth = 0; int tbRank = 0; Value tbScore; diff --git a/DroidFishApp/src/main/cpp/stockfish/simd.h b/DroidFishApp/src/main/cpp/stockfish/simd.h new file mode 100644 index 0000000..7b9e8fb --- /dev/null +++ b/DroidFishApp/src/main/cpp/stockfish/simd.h @@ -0,0 +1,387 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef STOCKFISH_SIMD_H_INCLUDED +#define STOCKFISH_SIMD_H_INCLUDED + +#if defined(USE_AVX2) +# include + +#elif defined(USE_SSE41) +# include + +#elif defined(USE_SSSE3) +# include + +#elif defined(USE_SSE2) +# include + +#elif defined(USE_MMX) +# include + +#elif defined(USE_NEON) +# include +#endif + +// The inline asm is only safe for GCC, where it is necessary to get good codegen. +// See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101693 +// Clang does fine without it. +// Play around here: https://godbolt.org/z/7EWqrYq51 +#if (defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)) +#define USE_INLINE_ASM +#endif + +// Use either the AVX512 or AVX-VNNI version of the VNNI instructions. +#if defined(USE_AVXVNNI) +#define VNNI_PREFIX "%{vex%} " +#else +#define VNNI_PREFIX "" +#endif + +namespace Stockfish::Simd { + +#if defined (USE_AVX512) + + [[maybe_unused]] static int m512_hadd(__m512i sum, int bias) { + return _mm512_reduce_add_epi32(sum) + bias; + } + + /* + Parameters: + sum0 = [zmm0.i128[0], zmm0.i128[1], zmm0.i128[2], zmm0.i128[3]] + sum1 = [zmm1.i128[0], zmm1.i128[1], zmm1.i128[2], zmm1.i128[3]] + sum2 = [zmm2.i128[0], zmm2.i128[1], zmm2.i128[2], zmm2.i128[3]] + sum3 = [zmm3.i128[0], zmm3.i128[1], zmm3.i128[2], zmm3.i128[3]] + + Returns: + ret = [ + reduce_add_epi32(zmm0.i128[0]), reduce_add_epi32(zmm1.i128[0]), reduce_add_epi32(zmm2.i128[0]), reduce_add_epi32(zmm3.i128[0]), + reduce_add_epi32(zmm0.i128[1]), reduce_add_epi32(zmm1.i128[1]), reduce_add_epi32(zmm2.i128[1]), reduce_add_epi32(zmm3.i128[1]), + reduce_add_epi32(zmm0.i128[2]), reduce_add_epi32(zmm1.i128[2]), reduce_add_epi32(zmm2.i128[2]), reduce_add_epi32(zmm3.i128[2]), + reduce_add_epi32(zmm0.i128[3]), reduce_add_epi32(zmm1.i128[3]), reduce_add_epi32(zmm2.i128[3]), reduce_add_epi32(zmm3.i128[3]) + ] + */ + [[maybe_unused]] static __m512i m512_hadd128x16_interleave( + __m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3) { + + __m512i sum01a = _mm512_unpacklo_epi32(sum0, sum1); + __m512i sum01b = _mm512_unpackhi_epi32(sum0, sum1); + + __m512i sum23a = _mm512_unpacklo_epi32(sum2, sum3); + __m512i sum23b = _mm512_unpackhi_epi32(sum2, sum3); + + __m512i sum01 = _mm512_add_epi32(sum01a, sum01b); + __m512i sum23 = _mm512_add_epi32(sum23a, sum23b); + + __m512i sum0123a = _mm512_unpacklo_epi64(sum01, sum23); + __m512i sum0123b = _mm512_unpackhi_epi64(sum01, sum23); + + return _mm512_add_epi32(sum0123a, sum0123b); + } + + [[maybe_unused]] static __m128i m512_haddx4( + __m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3, + __m128i bias) { + + __m512i sum = m512_hadd128x16_interleave(sum0, sum1, sum2, sum3); + + __m256i sum256lo = _mm512_castsi512_si256(sum); + __m256i sum256hi = _mm512_extracti64x4_epi64(sum, 1); + + sum256lo = _mm256_add_epi32(sum256lo, sum256hi); + + __m128i sum128lo = _mm256_castsi256_si128(sum256lo); + __m128i sum128hi = _mm256_extracti128_si256(sum256lo, 1); + + return _mm_add_epi32(_mm_add_epi32(sum128lo, sum128hi), bias); + } + + [[maybe_unused]] static void m512_add_dpbusd_epi32( + __m512i& acc, + __m512i a, + __m512i b) { + +# if defined (USE_VNNI) +# if defined (USE_INLINE_ASM) + asm( + "vpdpbusd %[b], %[a], %[acc]\n\t" + : [acc]"+v"(acc) + : [a]"v"(a), [b]"vm"(b) + ); +# else + acc = _mm512_dpbusd_epi32(acc, a, b); +# endif +# else +# if defined (USE_INLINE_ASM) + __m512i tmp = _mm512_maddubs_epi16(a, b); + asm( + "vpmaddwd %[tmp], %[ones], %[tmp]\n\t" + "vpaddd %[acc], %[tmp], %[acc]\n\t" + : [acc]"+v"(acc), [tmp]"+&v"(tmp) + : [ones]"v"(_mm512_set1_epi16(1)) + ); +# else + __m512i product0 = _mm512_maddubs_epi16(a, b); + product0 = _mm512_madd_epi16(product0, _mm512_set1_epi16(1)); + acc = _mm512_add_epi32(acc, product0); +# endif +# endif + } + + [[maybe_unused]] static void m512_add_dpbusd_epi32x2( + __m512i& acc, + __m512i a0, __m512i b0, + __m512i a1, __m512i b1) { + +# if defined (USE_VNNI) +# if defined (USE_INLINE_ASM) + asm( + "vpdpbusd %[b0], %[a0], %[acc]\n\t" + "vpdpbusd %[b1], %[a1], %[acc]\n\t" + : [acc]"+v"(acc) + : [a0]"v"(a0), [b0]"vm"(b0), [a1]"v"(a1), [b1]"vm"(b1) + ); +# else + acc = _mm512_dpbusd_epi32(acc, a0, b0); + acc = _mm512_dpbusd_epi32(acc, a1, b1); +# endif +# else +# if defined (USE_INLINE_ASM) + __m512i tmp0 = _mm512_maddubs_epi16(a0, b0); + __m512i tmp1 = _mm512_maddubs_epi16(a1, b1); + asm( + "vpaddsw %[tmp0], %[tmp1], %[tmp0]\n\t" + "vpmaddwd %[tmp0], %[ones], %[tmp0]\n\t" + "vpaddd %[acc], %[tmp0], %[acc]\n\t" + : [acc]"+v"(acc), [tmp0]"+&v"(tmp0) + : [tmp1]"v"(tmp1), [ones]"v"(_mm512_set1_epi16(1)) + ); +# else + __m512i product0 = _mm512_maddubs_epi16(a0, b0); + __m512i product1 = _mm512_maddubs_epi16(a1, b1); + product0 = _mm512_adds_epi16(product0, product1); + product0 = _mm512_madd_epi16(product0, _mm512_set1_epi16(1)); + acc = _mm512_add_epi32(acc, product0); +# endif +# endif + } + +#endif + +#if defined (USE_AVX2) + + [[maybe_unused]] static int m256_hadd(__m256i sum, int bias) { + __m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1)); + sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC)); + sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_CDAB)); + return _mm_cvtsi128_si32(sum128) + bias; + } + + [[maybe_unused]] static __m128i m256_haddx4( + __m256i sum0, __m256i sum1, __m256i sum2, __m256i sum3, + __m128i bias) { + + sum0 = _mm256_hadd_epi32(sum0, sum1); + sum2 = _mm256_hadd_epi32(sum2, sum3); + + sum0 = _mm256_hadd_epi32(sum0, sum2); + + __m128i sum128lo = _mm256_castsi256_si128(sum0); + __m128i sum128hi = _mm256_extracti128_si256(sum0, 1); + + return _mm_add_epi32(_mm_add_epi32(sum128lo, sum128hi), bias); + } + + [[maybe_unused]] static void m256_add_dpbusd_epi32( + __m256i& acc, + __m256i a, + __m256i b) { + +# if defined (USE_VNNI) +# if defined (USE_INLINE_ASM) + asm( + VNNI_PREFIX "vpdpbusd %[b], %[a], %[acc]\n\t" + : [acc]"+v"(acc) + : [a]"v"(a), [b]"vm"(b) + ); +# else + acc = _mm256_dpbusd_epi32(acc, a, b); +# endif +# else +# if defined (USE_INLINE_ASM) + __m256i tmp = _mm256_maddubs_epi16(a, b); + asm( + "vpmaddwd %[tmp], %[ones], %[tmp]\n\t" + "vpaddd %[acc], %[tmp], %[acc]\n\t" + : [acc]"+v"(acc), [tmp]"+&v"(tmp) + : [ones]"v"(_mm256_set1_epi16(1)) + ); +# else + __m256i product0 = _mm256_maddubs_epi16(a, b); + product0 = _mm256_madd_epi16(product0, _mm256_set1_epi16(1)); + acc = _mm256_add_epi32(acc, product0); +# endif +# endif + } + + [[maybe_unused]] static void m256_add_dpbusd_epi32x2( + __m256i& acc, + __m256i a0, __m256i b0, + __m256i a1, __m256i b1) { + +# if defined (USE_VNNI) +# if defined (USE_INLINE_ASM) + asm( + VNNI_PREFIX "vpdpbusd %[b0], %[a0], %[acc]\n\t" + VNNI_PREFIX "vpdpbusd %[b1], %[a1], %[acc]\n\t" + : [acc]"+v"(acc) + : [a0]"v"(a0), [b0]"vm"(b0), [a1]"v"(a1), [b1]"vm"(b1) + ); +# else + acc = _mm256_dpbusd_epi32(acc, a0, b0); + acc = _mm256_dpbusd_epi32(acc, a1, b1); +# endif +# else +# if defined (USE_INLINE_ASM) + __m256i tmp0 = _mm256_maddubs_epi16(a0, b0); + __m256i tmp1 = _mm256_maddubs_epi16(a1, b1); + asm( + "vpaddsw %[tmp0], %[tmp1], %[tmp0]\n\t" + "vpmaddwd %[tmp0], %[ones], %[tmp0]\n\t" + "vpaddd %[acc], %[tmp0], %[acc]\n\t" + : [acc]"+v"(acc), [tmp0]"+&v"(tmp0) + : [tmp1]"v"(tmp1), [ones]"v"(_mm256_set1_epi16(1)) + ); +# else + __m256i product0 = _mm256_maddubs_epi16(a0, b0); + __m256i product1 = _mm256_maddubs_epi16(a1, b1); + product0 = _mm256_adds_epi16(product0, product1); + product0 = _mm256_madd_epi16(product0, _mm256_set1_epi16(1)); + acc = _mm256_add_epi32(acc, product0); +# endif +# endif + } + +#endif + +#if defined (USE_SSSE3) + + [[maybe_unused]] static int m128_hadd(__m128i sum, int bias) { + sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E)); //_MM_PERM_BADC + sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); //_MM_PERM_CDAB + return _mm_cvtsi128_si32(sum) + bias; + } + + [[maybe_unused]] static __m128i m128_haddx4( + __m128i sum0, __m128i sum1, __m128i sum2, __m128i sum3, + __m128i bias) { + + sum0 = _mm_hadd_epi32(sum0, sum1); + sum2 = _mm_hadd_epi32(sum2, sum3); + sum0 = _mm_hadd_epi32(sum0, sum2); + return _mm_add_epi32(sum0, bias); + } + + [[maybe_unused]] static void m128_add_dpbusd_epi32( + __m128i& acc, + __m128i a, + __m128i b) { + +# if defined (USE_INLINE_ASM) + __m128i tmp = _mm_maddubs_epi16(a, b); + asm( + "pmaddwd %[ones], %[tmp]\n\t" + "paddd %[tmp], %[acc]\n\t" + : [acc]"+v"(acc), [tmp]"+&v"(tmp) + : [ones]"v"(_mm_set1_epi16(1)) + ); +# else + __m128i product0 = _mm_maddubs_epi16(a, b); + product0 = _mm_madd_epi16(product0, _mm_set1_epi16(1)); + acc = _mm_add_epi32(acc, product0); +# endif + } + + [[maybe_unused]] static void m128_add_dpbusd_epi32x2( + __m128i& acc, + __m128i a0, __m128i b0, + __m128i a1, __m128i b1) { + +# if defined (USE_INLINE_ASM) + __m128i tmp0 = _mm_maddubs_epi16(a0, b0); + __m128i tmp1 = _mm_maddubs_epi16(a1, b1); + asm( + "paddsw %[tmp1], %[tmp0]\n\t" + "pmaddwd %[ones], %[tmp0]\n\t" + "paddd %[tmp0], %[acc]\n\t" + : [acc]"+v"(acc), [tmp0]"+&v"(tmp0) + : [tmp1]"v"(tmp1), [ones]"v"(_mm_set1_epi16(1)) + ); +# else + __m128i product0 = _mm_maddubs_epi16(a0, b0); + __m128i product1 = _mm_maddubs_epi16(a1, b1); + product0 = _mm_adds_epi16(product0, product1); + product0 = _mm_madd_epi16(product0, _mm_set1_epi16(1)); + acc = _mm_add_epi32(acc, product0); +# endif + } + +#endif + +#if defined (USE_NEON) + + [[maybe_unused]] static int neon_m128_reduce_add_epi32(int32x4_t s) { +# if USE_NEON >= 8 + return vaddvq_s32(s); +# else + return s[0] + s[1] + s[2] + s[3]; +# endif + } + + [[maybe_unused]] static int neon_m128_hadd(int32x4_t sum, int bias) { + return neon_m128_reduce_add_epi32(sum) + bias; + } + + [[maybe_unused]] static int32x4_t neon_m128_haddx4( + int32x4_t sum0, int32x4_t sum1, int32x4_t sum2, int32x4_t sum3, + int32x4_t bias) { + + int32x4_t hsums { + neon_m128_reduce_add_epi32(sum0), + neon_m128_reduce_add_epi32(sum1), + neon_m128_reduce_add_epi32(sum2), + neon_m128_reduce_add_epi32(sum3) + }; + return vaddq_s32(hsums, bias); + } + + [[maybe_unused]] static void neon_m128_add_dpbusd_epi32x2( + int32x4_t& acc, + int8x8_t a0, int8x8_t b0, + int8x8_t a1, int8x8_t b1) { + + int16x8_t product = vmull_s8(a0, b0); + product = vmlal_s8(product, a1, b1); + acc = vpadalq_s16(acc, product); + } + +#endif + +} + +#endif // STOCKFISH_SIMD_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.cpp b/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.cpp index ff05a0c..a131524 100644 --- a/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -472,8 +472,6 @@ TBTables TBTables; // If the corresponding file exists two new objects TBTable and TBTable // are created and added to the lists and hash table. Called at init time. void TBTables::add(const std::vector& pieces) { - if (sizeof(char*) < 8 && pieces.size() >= 6) - return; // Not enough address space to support 6-men TB on 32-bit OS std::string code; @@ -771,7 +769,7 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu goto encode_remaining; // With pawns we have finished special treatments } - // In positions withouth pawns, we further flip the squares to ensure leading + // In positions without pawns, we further flip the squares to ensure leading // piece is below RANK_5. if (rank_of(squares[0]) > RANK_4) for (int i = 0; i < size; ++i) @@ -814,7 +812,7 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu // Rs "together" in 62 * 61 / 2 ways (we divide by 2 because rooks can be // swapped and still get the same position.) // - // In case we have at least 3 unique pieces (inlcuded kings) we encode them + // In case we have at least 3 unique pieces (included kings) we encode them // together. if (entry->hasUniquePieces) { @@ -829,7 +827,7 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu + (squares[1] - adjust1)) * 62 + squares[2] - adjust2; - // First piece is on a1-h8 diagonal, second below: map this occurence to + // First piece is on a1-h8 diagonal, second below: map this occurrence to // 6 to differentiate from the above case, rank_of() maps a1-d4 diagonal // to 0...3 and finally MapB1H1H7[] maps the b1-h1-h7 triangle to 0..27. else if (off_A1H8(squares[1])) @@ -859,7 +857,7 @@ encode_remaining: idx *= d->groupIdx[0]; Square* groupSq = squares + d->groupLen[0]; - // Encode remainig pawns then pieces according to square, in ascending order + // Encode remaining pawns then pieces according to square, in ascending order bool remainingPawns = entry->hasPawns && entry->pawnCount[1]; while (d->groupLen[++next]) @@ -887,7 +885,7 @@ encode_remaining: // Group together pieces that will be encoded together. The general rule is that // a group contains pieces of same type and color. The exception is the leading -// group that, in case of positions withouth pawns, can be formed by 3 different +// group that, in case of positions without pawns, can be formed by 3 different // pieces (default) or by the king pair when there is not a unique piece apart // from the kings. When there are pawns, pawns are always first in pieces[]. // @@ -919,7 +917,7 @@ void set_groups(T& e, PairsData* d, int order[], File f) { // // This ensures unique encoding for the whole position. The order of the // groups is a per-table parameter and could not follow the canonical leading - // pawns/pieces -> remainig pawns -> remaining pieces. In particular the + // pawns/pieces -> remaining pawns -> remaining pieces. In particular the // first group is at order[0] position and the remaining pawns, when present, // are at order[1] position. bool pp = e.hasPawns && e.pawnCount[1]; // Pawns on both sides @@ -939,7 +937,7 @@ void set_groups(T& e, PairsData* d, int order[], File f) { d->groupIdx[1] = idx; idx *= Binomial[d->groupLen[1]][48 - d->groupLen[0]]; } - else // Remainig pieces + else // Remaining pieces { d->groupIdx[next] = idx; idx *= Binomial[d->groupLen[next]][freeSquares]; @@ -949,7 +947,7 @@ void set_groups(T& e, PairsData* d, int order[], File f) { d->groupIdx[n] = idx; } -// In Recursive Pairing each symbol represents a pair of childern symbols. So +// In Recursive Pairing each symbol represents a pair of children symbols. So // read d->btree[] symbols data and expand each one in his left and right child // symbol until reaching the leafs that represent the symbol value. uint8_t set_symlen(PairsData* d, Sym s, std::vector& visited) { @@ -1319,7 +1317,7 @@ void Tablebases::init(const std::string& paths) { for (auto p : bothOnDiagonal) MapKK[p.first][p.second] = code++; - // Binomial[] stores the Binomial Coefficents using Pascal rule. There + // Binomial[] stores the Binomial Coefficients using Pascal rule. There // are Binomial[k][n] ways to choose k elements from a set of n elements. Binomial[0][0] = 1; @@ -1339,7 +1337,7 @@ void Tablebases::init(const std::string& paths) { for (int leadPawnsCnt = 1; leadPawnsCnt <= 5; ++leadPawnsCnt) for (File f = FILE_A; f <= FILE_D; ++f) { - // Restart the index at every file because TB table is splitted + // Restart the index at every file because TB table is split // by file, so we can reuse the same index for different files. int idx = 0; diff --git a/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.h b/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.h index 56734af..c2917fe 100644 --- a/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.h +++ b/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -38,7 +38,7 @@ enum WDLScore { // Possible states after a probing operation enum ProbeState { FAIL = 0, // Probe failed (missing file table) - OK = 1, // Probe succesful + OK = 1, // Probe successful CHANGE_STM = -1, // DTZ should check the other side ZEROING_BEST_MOVE = 2 // Best move zeroes DTZ (capture or pawn move) }; diff --git a/DroidFishApp/src/main/cpp/stockfish/thread.cpp b/DroidFishApp/src/main/cpp/stockfish/thread.cpp index da8e1d0..30177a3 100644 --- a/DroidFishApp/src/main/cpp/stockfish/thread.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/thread.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -59,7 +59,6 @@ void Thread::clear() { counterMoves.fill(MOVE_NONE); mainHistory.fill(0); - lowPlyHistory.fill(0); captureHistory.fill(0); for (bool inCheck : { false, true }) @@ -67,7 +66,7 @@ void Thread::clear() { { for (auto& to : continuationHistory[inCheck][c]) for (auto& h : to) - h->fill(0); + h->fill(-71); continuationHistory[inCheck][c][NO_PIECE][0]->fill(Search::CounterMovePruneThreshold - 1); } } @@ -162,6 +161,7 @@ void ThreadPool::clear() { main()->callsCnt = 0; main()->bestPreviousScore = VALUE_INFINITE; + main()->bestPreviousAverageScore = VALUE_INFINITE; main()->previousTimeReduction = 1.0; } diff --git a/DroidFishApp/src/main/cpp/stockfish/thread.h b/DroidFishApp/src/main/cpp/stockfish/thread.h index 5bfa235..8027855 100644 --- a/DroidFishApp/src/main/cpp/stockfish/thread.h +++ b/DroidFishApp/src/main/cpp/stockfish/thread.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -60,18 +60,19 @@ public: Pawns::Table pawnsTable; Material::Table materialTable; size_t pvIdx, pvLast; - uint64_t ttHitAverage; + RunningAverage complexityAverage; + std::atomic nodes, tbHits, bestMoveChanges; int selDepth, nmpMinPly; Color nmpColor; - std::atomic nodes, tbHits, bestMoveChanges; + Value bestValue, optimism[COLOR_NB]; Position rootPos; StateInfo rootState; Search::RootMoves rootMoves; - Depth rootDepth, completedDepth; + Depth rootDepth, completedDepth, depth; + Value rootDelta; CounterMoveHistory counterMoves; ButterflyHistory mainHistory; - LowPlyHistory lowPlyHistory; CapturePieceToHistory captureHistory; ContinuationHistory continuationHistory[2][2]; Score trend; @@ -89,6 +90,7 @@ struct MainThread : public Thread { double previousTimeReduction; Value bestPreviousScore; + Value bestPreviousAverageScore; Value iterValue[4]; int callsCnt; bool stopOnPonderhit; diff --git a/DroidFishApp/src/main/cpp/stockfish/thread_win32_osx.h b/DroidFishApp/src/main/cpp/stockfish/thread_win32_osx.h index a21674c..77d1c3c 100644 --- a/DroidFishApp/src/main/cpp/stockfish/thread_win32_osx.h +++ b/DroidFishApp/src/main/cpp/stockfish/thread_win32_osx.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/timeman.cpp b/DroidFishApp/src/main/cpp/stockfish/timeman.cpp index f742d1e..0400401 100644 --- a/DroidFishApp/src/main/cpp/stockfish/timeman.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/timeman.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -68,6 +68,9 @@ void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) { TimePoint timeLeft = std::max(TimePoint(1), limits.time[us] + limits.inc[us] * (mtg - 1) - moveOverhead * (2 + mtg)); + // Use extra time with larger increments + double optExtra = std::clamp(1.0 + 12.0 * limits.inc[us] / limits.time[us], 1.0, 1.12); + // A user may scale time usage by setting UCI option "Slow Mover" // Default is 100 and changing this value will probably lose elo. timeLeft = slowMover * timeLeft / 100; @@ -78,15 +81,16 @@ void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) { if (limits.movestogo == 0) { optScale = std::min(0.0084 + std::pow(ply + 3.0, 0.5) * 0.0042, - 0.2 * limits.time[us] / double(timeLeft)); + 0.2 * limits.time[us] / double(timeLeft)) + * optExtra; maxScale = std::min(7.0, 4.0 + ply / 12.0); } // x moves in y seconds (+ z increment) else { - optScale = std::min((0.8 + ply / 128.0) / mtg, - 0.8 * limits.time[us] / double(timeLeft)); + optScale = std::min((0.88 + ply / 116.4) / mtg, + 0.88 * limits.time[us] / double(timeLeft)); maxScale = std::min(6.3, 1.5 + 0.11 * mtg); } diff --git a/DroidFishApp/src/main/cpp/stockfish/timeman.h b/DroidFishApp/src/main/cpp/stockfish/timeman.h index b1878d6..a86f076 100644 --- a/DroidFishApp/src/main/cpp/stockfish/timeman.h +++ b/DroidFishApp/src/main/cpp/stockfish/timeman.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/tt.cpp b/DroidFishApp/src/main/cpp/stockfish/tt.cpp index 1f495ca..c7118ae 100644 --- a/DroidFishApp/src/main/cpp/stockfish/tt.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/tt.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -40,9 +40,9 @@ void TTEntry::save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev) move16 = (uint16_t)m; // Overwrite less valuable entries (cheapest checks first) - if (b == BOUND_EXACT + if ( b == BOUND_EXACT || (uint16_t)k != key16 - || d - DEPTH_OFFSET > depth8 - 4) + || d - DEPTH_OFFSET + 2 * pv > depth8 - 4) { assert(d > DEPTH_OFFSET); assert(d < 256 + DEPTH_OFFSET); diff --git a/DroidFishApp/src/main/cpp/stockfish/tt.h b/DroidFishApp/src/main/cpp/stockfish/tt.h index d915d92..03fe3e1 100644 --- a/DroidFishApp/src/main/cpp/stockfish/tt.h +++ b/DroidFishApp/src/main/cpp/stockfish/tt.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/tune.cpp b/DroidFishApp/src/main/cpp/stockfish/tune.cpp index ac91b60..a885845 100644 --- a/DroidFishApp/src/main/cpp/stockfish/tune.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/tune.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/tune.h b/DroidFishApp/src/main/cpp/stockfish/tune.h index b5c715b..75ab484 100644 --- a/DroidFishApp/src/main/cpp/stockfish/tune.h +++ b/DroidFishApp/src/main/cpp/stockfish/tune.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -84,7 +84,7 @@ class Tune { static Tune& instance() { static Tune t; return t; } // Singleton - // Use polymorphism to accomodate Entry of different types in the same vector + // Use polymorphism to accommodate Entry of different types in the same vector struct EntryBase { virtual ~EntryBase() = default; virtual void init_option() = 0; diff --git a/DroidFishApp/src/main/cpp/stockfish/types.h b/DroidFishApp/src/main/cpp/stockfish/types.h index 0bd4a1c..cf42bc9 100644 --- a/DroidFishApp/src/main/cpp/stockfish/types.h +++ b/DroidFishApp/src/main/cpp/stockfish/types.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -465,10 +465,6 @@ constexpr Move make_move(Square from, Square to) { return Move((from << 6) + to); } -constexpr Move reverse_move(Move m) { - return make_move(to_sq(m), from_sq(m)); -} - template constexpr Move make(Square from, Square to, PieceType pt = KNIGHT) { return Move(T + ((pt - KNIGHT) << 12) + (from << 6) + to); diff --git a/DroidFishApp/src/main/cpp/stockfish/uci.cpp b/DroidFishApp/src/main/cpp/stockfish/uci.cpp index b3738a4..7b30cc0 100644 --- a/DroidFishApp/src/main/cpp/stockfish/uci.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/uci.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -207,8 +207,8 @@ namespace { // Coefficients of a 3rd order polynomial fit based on fishtest data // for two parameters needed to transform eval to the argument of a // logistic function. - double as[] = {-3.68389304, 30.07065921, -60.52878723, 149.53378557}; - double bs[] = {-2.0181857, 15.85685038, -29.83452023, 47.59078827}; + double as[] = {-1.17202460e-01, 5.94729104e-01, 1.12065546e+01, 1.22606222e+02}; + double bs[] = {-1.79066759, 11.30759193, -17.43677612, 36.47147479}; double a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3]; double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3]; diff --git a/DroidFishApp/src/main/cpp/stockfish/uci.h b/DroidFishApp/src/main/cpp/stockfish/uci.h index d316010..5bb24a4 100644 --- a/DroidFishApp/src/main/cpp/stockfish/uci.h +++ b/DroidFishApp/src/main/cpp/stockfish/uci.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/DroidFishApp/src/main/cpp/stockfish/ucioption.cpp b/DroidFishApp/src/main/cpp/stockfish/ucioption.cpp index 07b3027..922fa34 100644 --- a/DroidFishApp/src/main/cpp/stockfish/ucioption.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/ucioption.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -164,7 +164,7 @@ Option& Option::operator=(const string& v) { assert(!type.empty()); - if ( (type != "button" && v.empty()) + if ( (type != "button" && type != "string" && v.empty()) || (type == "check" && v != "true" && v != "false") || (type == "spin" && (stof(v) < min || stof(v) > max))) return *this; diff --git a/DroidFishApp/src/main/java/org/petero/droidfish/engine/InternalStockFish.java b/DroidFishApp/src/main/java/org/petero/droidfish/engine/InternalStockFish.java index ba9ed99..ad98dda 100644 --- a/DroidFishApp/src/main/java/org/petero/droidfish/engine/InternalStockFish.java +++ b/DroidFishApp/src/main/java/org/petero/droidfish/engine/InternalStockFish.java @@ -36,7 +36,7 @@ import org.petero.droidfish.EngineOptions; /** Stockfish engine running as process, started from assets resource. */ public class InternalStockFish extends ExternalEngine { - private static final String defaultNet = "nn-3475407dc199.nnue"; + private static final String defaultNet = "nn-6877cd24400e.nnue"; private static final String netOption = "evalfile"; private File defaultNetFile; // To get the full path of the copied default network file