diff --git a/DroidFishApp/src/main/assets/nn-ad9b42354671.nnue b/DroidFishApp/src/main/assets/nn-5af11540bbfe.nnue
similarity index 71%
rename from DroidFishApp/src/main/assets/nn-ad9b42354671.nnue
rename to DroidFishApp/src/main/assets/nn-5af11540bbfe.nnue
index 693fc1d..6690c9e 100644
Binary files a/DroidFishApp/src/main/assets/nn-ad9b42354671.nnue and b/DroidFishApp/src/main/assets/nn-5af11540bbfe.nnue differ
diff --git a/DroidFishApp/src/main/cpp/stockfish/benchmark.cpp b/DroidFishApp/src/main/cpp/stockfish/benchmark.cpp
index e1c025a..a1ad055 100644
--- a/DroidFishApp/src/main/cpp/stockfish/benchmark.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/benchmark.cpp
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -16,6 +16,8 @@
along with this program. If not, see .
*/
+#include "benchmark.h"
+
#include
#include
#include
diff --git a/DroidFishApp/src/main/cpp/stockfish/benchmark.h b/DroidFishApp/src/main/cpp/stockfish/benchmark.h
new file mode 100644
index 0000000..64acf83
--- /dev/null
+++ b/DroidFishApp/src/main/cpp/stockfish/benchmark.h
@@ -0,0 +1,34 @@
+/*
+ Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
+
+ Stockfish is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Stockfish is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+
+#ifndef BENCHMARK_H_INCLUDED
+#define BENCHMARK_H_INCLUDED
+
+#include
+#include
+#include
+
+namespace Stockfish {
+
+class Position;
+
+std::vector setup_bench(const Position&, std::istream&);
+
+} // namespace Stockfish
+
+#endif // #ifndef BENCHMARK_H_INCLUDED
diff --git a/DroidFishApp/src/main/cpp/stockfish/bitbase.cpp b/DroidFishApp/src/main/cpp/stockfish/bitbase.cpp
index 84300ba..e21d1fe 100644
--- a/DroidFishApp/src/main/cpp/stockfish/bitbase.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/bitbase.cpp
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/DroidFishApp/src/main/cpp/stockfish/bitboard.cpp b/DroidFishApp/src/main/cpp/stockfish/bitboard.cpp
index fd0ba23..fd5c3c2 100644
--- a/DroidFishApp/src/main/cpp/stockfish/bitboard.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/bitboard.cpp
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -27,7 +27,6 @@ namespace Stockfish {
uint8_t PopCnt16[1 << 16];
uint8_t SquareDistance[SQUARE_NB][SQUARE_NB];
-Bitboard SquareBB[SQUARE_NB];
Bitboard LineBB[SQUARE_NB][SQUARE_NB];
Bitboard BetweenBB[SQUARE_NB][SQUARE_NB];
Bitboard PseudoAttacks[PIECE_TYPE_NB][SQUARE_NB];
@@ -82,9 +81,6 @@ void Bitboards::init() {
for (unsigned i = 0; i < (1 << 16); ++i)
PopCnt16[i] = uint8_t(std::bitset<16>(i).count());
- for (Square s = SQ_A1; s <= SQ_H8; ++s)
- SquareBB[s] = (1ULL << s);
-
for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1)
for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2)
SquareDistance[s1][s2] = std::max(distance(s1, s2), distance(s1, s2));
diff --git a/DroidFishApp/src/main/cpp/stockfish/bitboard.h b/DroidFishApp/src/main/cpp/stockfish/bitboard.h
index 2b6e2a6..42fd0e9 100644
--- a/DroidFishApp/src/main/cpp/stockfish/bitboard.h
+++ b/DroidFishApp/src/main/cpp/stockfish/bitboard.h
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -74,7 +74,6 @@ constexpr Bitboard KingFlank[FILE_NB] = {
extern uint8_t PopCnt16[1 << 16];
extern uint8_t SquareDistance[SQUARE_NB][SQUARE_NB];
-extern Bitboard SquareBB[SQUARE_NB];
extern Bitboard BetweenBB[SQUARE_NB][SQUARE_NB];
extern Bitboard LineBB[SQUARE_NB][SQUARE_NB];
extern Bitboard PseudoAttacks[PIECE_TYPE_NB][SQUARE_NB];
@@ -108,7 +107,7 @@ extern Magic BishopMagics[SQUARE_NB];
inline Bitboard square_bb(Square s) {
assert(is_ok(s));
- return SquareBB[s];
+ return (1ULL << s);
}
diff --git a/DroidFishApp/src/main/cpp/stockfish/endgame.cpp b/DroidFishApp/src/main/cpp/stockfish/endgame.cpp
index e773e7a..9021f24 100644
--- a/DroidFishApp/src/main/cpp/stockfish/endgame.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/endgame.cpp
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/DroidFishApp/src/main/cpp/stockfish/endgame.h b/DroidFishApp/src/main/cpp/stockfish/endgame.h
index e79f696..c184cb3 100644
--- a/DroidFishApp/src/main/cpp/stockfish/endgame.h
+++ b/DroidFishApp/src/main/cpp/stockfish/endgame.h
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/DroidFishApp/src/main/cpp/stockfish/evaluate.cpp b/DroidFishApp/src/main/cpp/stockfish/evaluate.cpp
index 1d3e310..35d0542 100644
--- a/DroidFishApp/src/main/cpp/stockfish/evaluate.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/evaluate.cpp
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -36,7 +36,7 @@
#include "timeman.h"
#include "uci.h"
#include "incbin/incbin.h"
-
+#include "nnue/evaluate_nnue.h"
// Macro to embed the default efficiently updatable neural network (NNUE) file
// data in the engine binary (using incbin.h, by Dale Weiler).
@@ -82,20 +82,18 @@ namespace Eval {
eval_file = EvalFileDefaultName;
#if defined(DEFAULT_NNUE_DIRECTORY)
- #define stringify2(x) #x
- #define stringify(x) stringify2(x)
vector dirs = { "" , "" , CommandLine::binaryDirectory , stringify(DEFAULT_NNUE_DIRECTORY) };
#else
vector dirs = { "" , "" , CommandLine::binaryDirectory };
#endif
- for (string directory : dirs)
+ for (const string& directory : dirs)
if (currentEvalFileName != eval_file)
{
if (directory != "")
{
ifstream stream(directory + eval_file, ios::binary);
- if (load_eval(eval_file, stream))
+ if (NNUE::load_eval(eval_file, stream))
currentEvalFileName = eval_file;
}
@@ -111,12 +109,10 @@ namespace Eval {
(void) gEmbeddedNNUEEnd; // Silence warning on unused variable
istream stream(&buffer);
- if (load_eval(eval_file, stream))
+ if (NNUE::load_eval(eval_file, stream))
currentEvalFileName = eval_file;
}
}
- if (currentEvalFileName != eval_file)
- currentEvalFileName = "";
}
/// NNUE::verify() verifies that the last net used was loaded successfully
@@ -161,24 +157,24 @@ namespace Trace {
Score scores[TERM_NB][COLOR_NB];
- double to_cp(Value v) { return double(v) / UCI::NormalizeToPawnValue; }
+ static double to_cp(Value v) { return double(v) / UCI::NormalizeToPawnValue; }
- void add(int idx, Color c, Score s) {
+ static void add(int idx, Color c, Score s) {
scores[idx][c] = s;
}
- void add(int idx, Score w, Score b = SCORE_ZERO) {
+ static void add(int idx, Score w, Score b = SCORE_ZERO) {
scores[idx][WHITE] = w;
scores[idx][BLACK] = b;
}
- std::ostream& operator<<(std::ostream& os, Score s) {
+ static std::ostream& operator<<(std::ostream& os, Score s) {
os << std::setw(5) << to_cp(mg_value(s)) << " "
<< std::setw(5) << to_cp(eg_value(s));
return os;
}
- std::ostream& operator<<(std::ostream& os, Term t) {
+ static std::ostream& operator<<(std::ostream& os, Term t) {
if (t == MATERIAL || t == IMBALANCE || t == WINNABLE || t == TOTAL)
os << " ---- ----" << " | " << " ---- ----";
@@ -195,8 +191,8 @@ using namespace Trace;
namespace {
// Threshold for lazy and space evaluation
- constexpr Value LazyThreshold1 = Value(3631);
- constexpr Value LazyThreshold2 = Value(2084);
+ constexpr Value LazyThreshold1 = Value(3622);
+ constexpr Value LazyThreshold2 = Value(1962);
constexpr Value SpaceThreshold = Value(11551);
// KingAttackWeights[PieceType] contains king attack weights by piece type
@@ -390,10 +386,10 @@ namespace {
template template
Score Evaluation::pieces() {
- constexpr Color Them = ~Us;
- constexpr Direction Down = -pawn_push(Us);
- constexpr Bitboard OutpostRanks = (Us == WHITE ? Rank4BB | Rank5BB | Rank6BB
- : Rank5BB | Rank4BB | Rank3BB);
+ constexpr Color Them = ~Us;
+ [[maybe_unused]] constexpr Direction Down = -pawn_push(Us);
+ [[maybe_unused]] constexpr Bitboard OutpostRanks = (Us == WHITE ? Rank4BB | Rank5BB | Rank6BB
+ : Rank5BB | Rank4BB | Rank3BB);
Bitboard b1 = pos.pieces(Us, Pt);
Bitboard b, bb;
Score score = SCORE_ZERO;
@@ -432,7 +428,7 @@ namespace {
int mob = popcount(b & mobilityArea[Us]);
mobility[Us] += MobilityBonus[Pt - 2][mob];
- if (Pt == BISHOP || Pt == KNIGHT)
+ if constexpr (Pt == BISHOP || Pt == KNIGHT)
{
// Bonus if the piece is on an outpost square or can reach one
// Bonus for knights (UncontestedOutpost) if few relevant targets
@@ -1050,52 +1046,41 @@ make_v:
/// evaluate() is the evaluator for the outer world. It returns a static
/// evaluation of the position from the point of view of the side to move.
-Value Eval::evaluate(const Position& pos, int* complexity) {
+Value Eval::evaluate(const Position& pos) {
+
+ assert(!pos.checkers());
Value v;
Value psq = pos.psq_eg_stm();
// We use the much less accurate but faster Classical eval when the NNUE
// option is set to false. Otherwise we use the NNUE eval unless the
- // PSQ advantage is decisive and several pieces remain. (~3 Elo)
- bool useClassical = !useNNUE || (pos.count() > 7 && abs(psq) > 1760);
+ // PSQ advantage is decisive. (~4 Elo at STC, 1 Elo at LTC)
+ bool useClassical = !useNNUE || abs(psq) > 2048;
if (useClassical)
v = Evaluation(pos).value();
else
{
int nnueComplexity;
- int scale = 1064 + 106 * pos.non_pawn_material() / 5120;
+ int npm = pos.non_pawn_material() / 64;
Color stm = pos.side_to_move();
Value optimism = pos.this_thread()->optimism[stm];
Value nnue = NNUE::evaluate(pos, true, &nnueComplexity);
- // Blend nnue complexity with (semi)classical complexity
- nnueComplexity = ( 416 * nnueComplexity
- + 424 * abs(psq - nnue)
- + (optimism > 0 ? int(optimism) * int(psq - nnue) : 0)
- ) / 1024;
-
- // Return hybrid NNUE complexity to caller
- if (complexity)
- *complexity = nnueComplexity;
-
- optimism = optimism * (269 + nnueComplexity) / 256;
- v = (nnue * scale + optimism * (scale - 754)) / 1024;
+ // Blend optimism with nnue complexity and (semi)classical complexity
+ optimism += optimism * (nnueComplexity + abs(psq - nnue)) / 512;
+ v = (nnue * (945 + npm) + optimism * (150 + npm)) / 1024;
}
// Damp down the evaluation linearly when shuffling
- v = v * (195 - pos.rule50_count()) / 211;
+ v = v * (200 - pos.rule50_count()) / 214;
// Guarantee evaluation does not hit the tablebase range
v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1);
- // When not using NNUE, return classical complexity to caller
- if (complexity && (!useNNUE || useClassical))
- *complexity = abs(v - psq);
-
return v;
}
diff --git a/DroidFishApp/src/main/cpp/stockfish/evaluate.h b/DroidFishApp/src/main/cpp/stockfish/evaluate.h
index f5ac326..b9d7231 100644
--- a/DroidFishApp/src/main/cpp/stockfish/evaluate.h
+++ b/DroidFishApp/src/main/cpp/stockfish/evaluate.h
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -31,7 +31,7 @@ class Position;
namespace Eval {
std::string trace(Position& pos);
- Value evaluate(const Position& pos, int* complexity = nullptr);
+ Value evaluate(const Position& pos);
extern bool useNNUE;
extern std::string currentEvalFileName;
@@ -39,20 +39,13 @@ namespace Eval {
// The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue
// for the build process (profile-build and fishtest) to work. Do not change the
// name of the macro, as it is used in the Makefile.
- #define EvalFileDefaultName "nn-ad9b42354671.nnue"
+ #define EvalFileDefaultName "nn-5af11540bbfe.nnue"
namespace NNUE {
- std::string trace(Position& pos);
- Value evaluate(const Position& pos, bool adjusted = false, int* complexity = nullptr);
-
void init();
void verify();
- bool load_eval(std::string name, std::istream& stream);
- bool save_eval(std::ostream& stream);
- bool save_eval(const std::optional& filename);
-
} // namespace NNUE
} // namespace Eval
diff --git a/DroidFishApp/src/main/cpp/stockfish/main.cpp b/DroidFishApp/src/main/cpp/stockfish/main.cpp
index fad0ef8..c40e0fa 100644
--- a/DroidFishApp/src/main/cpp/stockfish/main.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/main.cpp
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/DroidFishApp/src/main/cpp/stockfish/material.cpp b/DroidFishApp/src/main/cpp/stockfish/material.cpp
index 1567358..7102f87 100644
--- a/DroidFishApp/src/main/cpp/stockfish/material.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/material.cpp
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/DroidFishApp/src/main/cpp/stockfish/material.h b/DroidFishApp/src/main/cpp/stockfish/material.h
index 3ca169c..9acf78f 100644
--- a/DroidFishApp/src/main/cpp/stockfish/material.h
+++ b/DroidFishApp/src/main/cpp/stockfish/material.h
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -28,7 +28,7 @@ namespace Stockfish::Material {
/// Material::Entry contains various information about a material configuration.
/// It contains a material imbalance evaluation, a function pointer to a special
-/// endgame evaluation function (which in most cases is NULL, meaning that the
+/// endgame evaluation function (which in most cases is nullptr, meaning that the
/// standard evaluation function will be used), and scale factors.
///
/// The scale factors are used to scale the evaluation score up or down. For
@@ -62,7 +62,7 @@ struct Entry {
uint8_t factor[COLOR_NB];
};
-typedef HashTable Table;
+using Table = HashTable;
Entry* probe(const Position& pos);
diff --git a/DroidFishApp/src/main/cpp/stockfish/misc.cpp b/DroidFishApp/src/main/cpp/stockfish/misc.cpp
index 2d86969..bbfa406 100644
--- a/DroidFishApp/src/main/cpp/stockfish/misc.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/misc.cpp
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -32,21 +32,26 @@
// the calls at compile time), try to load them at runtime. To do this we need
// first to define the corresponding function pointers.
extern "C" {
-typedef bool(*fun1_t)(LOGICAL_PROCESSOR_RELATIONSHIP,
- PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD);
-typedef bool(*fun2_t)(USHORT, PGROUP_AFFINITY);
-typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY);
-typedef bool(*fun4_t)(USHORT, PGROUP_AFFINITY, USHORT, PUSHORT);
-typedef WORD(*fun5_t)();
+using fun1_t = bool(*)(LOGICAL_PROCESSOR_RELATIONSHIP,
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD);
+using fun2_t = bool(*)(USHORT, PGROUP_AFFINITY);
+using fun3_t = bool(*)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY);
+using fun4_t = bool(*)(USHORT, PGROUP_AFFINITY, USHORT, PUSHORT);
+using fun5_t = WORD(*)();
+using fun6_t = bool(*)(HANDLE, DWORD, PHANDLE);
+using fun7_t = bool(*)(LPCSTR, LPCSTR, PLUID);
+using fun8_t = bool(*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGES, PDWORD);
}
#endif
+#include
+#include
#include
#include
#include
#include
+#include
#include
-#include
#if defined(__linux__) && !defined(__ANDROID__)
#include
@@ -68,7 +73,7 @@ namespace Stockfish {
namespace {
/// Version number or dev.
-const string version = "15.1";
+constexpr string_view version = "16";
/// Our fancy logging facility. The trick here is to replace cin.rdbuf() and
/// cout.rdbuf() with two Tie objects that tie cin and cout to a file stream. We
@@ -151,13 +156,13 @@ string engine_info(bool to_uci) {
stringstream ss;
ss << "Stockfish " << version << setfill('0');
- if (version == "dev")
+ if constexpr (version == "dev")
{
ss << "-";
#ifdef GIT_DATE
- ss << GIT_DATE;
+ ss << stringify(GIT_DATE);
#else
- const string months("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec");
+ constexpr string_view months("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec");
string month, day, year;
stringstream date(__DATE__); // From compiler, format is "Sep 21 2008"
@@ -168,7 +173,7 @@ string engine_info(bool to_uci) {
ss << "-";
#ifdef GIT_SHA
- ss << GIT_SHA;
+ ss << stringify(GIT_SHA);
#else
ss << "nogit";
#endif
@@ -185,8 +190,6 @@ string engine_info(bool to_uci) {
std::string compiler_info() {
- #define stringify2(x) #x
- #define stringify(x) stringify2(x)
#define make_version_string(major, minor, patch) stringify(major) "." stringify(minor) "." stringify(patch)
/// Predefined macros hell:
@@ -298,21 +301,94 @@ std::string compiler_info() {
/// Debug functions used mainly to collect run-time statistics
-static std::atomic hits[2], means[2];
+constexpr int MaxDebugSlots = 32;
-void dbg_hit_on(bool b) { ++hits[0]; if (b) ++hits[1]; }
-void dbg_hit_on(bool c, bool b) { if (c) dbg_hit_on(b); }
-void dbg_mean_of(int v) { ++means[0]; means[1] += v; }
+namespace {
+
+template
+struct DebugInfo {
+ std::atomic data[N] = { 0 };
+
+ constexpr inline std::atomic& operator[](int index) { return data[index]; }
+};
+
+DebugInfo<2> hit[MaxDebugSlots];
+DebugInfo<2> mean[MaxDebugSlots];
+DebugInfo<3> stdev[MaxDebugSlots];
+DebugInfo<6> correl[MaxDebugSlots];
+
+} // namespace
+
+void dbg_hit_on(bool cond, int slot) {
+
+ ++hit[slot][0];
+ if (cond)
+ ++hit[slot][1];
+}
+
+void dbg_mean_of(int64_t value, int slot) {
+
+ ++mean[slot][0];
+ mean[slot][1] += value;
+}
+
+void dbg_stdev_of(int64_t value, int slot) {
+
+ ++stdev[slot][0];
+ stdev[slot][1] += value;
+ stdev[slot][2] += value * value;
+}
+
+void dbg_correl_of(int64_t value1, int64_t value2, int slot) {
+
+ ++correl[slot][0];
+ correl[slot][1] += value1;
+ correl[slot][2] += value1 * value1;
+ correl[slot][3] += value2;
+ correl[slot][4] += value2 * value2;
+ correl[slot][5] += value1 * value2;
+}
void dbg_print() {
- if (hits[0])
- cerr << "Total " << hits[0] << " Hits " << hits[1]
- << " hit rate (%) " << 100 * hits[1] / hits[0] << endl;
+ int64_t n;
+ auto E = [&n](int64_t x) { return double(x) / n; };
+ auto sqr = [](double x) { return x * x; };
- if (means[0])
- cerr << "Total " << means[0] << " Mean "
- << (double)means[1] / means[0] << endl;
+ for (int i = 0; i < MaxDebugSlots; ++i)
+ if ((n = hit[i][0]))
+ std::cerr << "Hit #" << i
+ << ": Total " << n << " Hits " << hit[i][1]
+ << " Hit Rate (%) " << 100.0 * E(hit[i][1])
+ << std::endl;
+
+ for (int i = 0; i < MaxDebugSlots; ++i)
+ if ((n = mean[i][0]))
+ {
+ std::cerr << "Mean #" << i
+ << ": Total " << n << " Mean " << E(mean[i][1])
+ << std::endl;
+ }
+
+ for (int i = 0; i < MaxDebugSlots; ++i)
+ if ((n = stdev[i][0]))
+ {
+ double r = sqrtl(E(stdev[i][2]) - sqr(E(stdev[i][1])));
+ std::cerr << "Stdev #" << i
+ << ": Total " << n << " Stdev " << r
+ << std::endl;
+ }
+
+ for (int i = 0; i < MaxDebugSlots; ++i)
+ if ((n = correl[i][0]))
+ {
+ double r = (E(correl[i][5]) - E(correl[i][1]) * E(correl[i][3]))
+ / ( sqrtl(E(correl[i][2]) - sqr(E(correl[i][1])))
+ * sqrtl(E(correl[i][4]) - sqr(E(correl[i][3]))));
+ std::cerr << "Correl. #" << i
+ << ": Total " << n << " Coefficient " << r
+ << std::endl;
+ }
}
@@ -373,8 +449,10 @@ void* std_aligned_alloc(size_t alignment, size_t size) {
#if defined(POSIXALIGNEDALLOC)
void *mem;
return posix_memalign(&mem, alignment, size) ? nullptr : mem;
-#elif defined(_WIN32)
+#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64)
return _mm_malloc(size, alignment);
+#elif defined(_WIN32)
+ return _aligned_malloc(size, alignment);
#else
return std::aligned_alloc(alignment, size);
#endif
@@ -384,8 +462,10 @@ void std_aligned_free(void* ptr) {
#if defined(POSIXALIGNEDALLOC)
free(ptr);
-#elif defined(_WIN32)
+#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64)
_mm_free(ptr);
+#elif defined(_WIN32)
+ _aligned_free(ptr);
#else
free(ptr);
#endif
@@ -409,11 +489,30 @@ static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize
if (!largePageSize)
return nullptr;
- // We need SeLockMemoryPrivilege, so try to enable it for the process
- if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken))
+ // Dynamically link OpenProcessToken, LookupPrivilegeValue and AdjustTokenPrivileges
+
+ HMODULE hAdvapi32 = GetModuleHandle(TEXT("advapi32.dll"));
+
+ if (!hAdvapi32)
+ hAdvapi32 = LoadLibrary(TEXT("advapi32.dll"));
+
+ auto fun6 = (fun6_t)(void(*)())GetProcAddress(hAdvapi32, "OpenProcessToken");
+ if (!fun6)
+ return nullptr;
+ auto fun7 = (fun7_t)(void(*)())GetProcAddress(hAdvapi32, "LookupPrivilegeValueA");
+ if (!fun7)
+ return nullptr;
+ auto fun8 = (fun8_t)(void(*)())GetProcAddress(hAdvapi32, "AdjustTokenPrivileges");
+ if (!fun8)
return nullptr;
- if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &luid))
+ // We need SeLockMemoryPrivilege, so try to enable it for the process
+ if (!fun6( // OpenProcessToken()
+ GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken))
+ return nullptr;
+
+ if (fun7( // LookupPrivilegeValue(nullptr, SE_LOCK_MEMORY_NAME, &luid)
+ nullptr, "SeLockMemoryPrivilege", &luid))
{
TOKEN_PRIVILEGES tp { };
TOKEN_PRIVILEGES prevTp { };
@@ -425,17 +524,18 @@ static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize
// Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges() succeeds,
// we still need to query GetLastError() to ensure that the privileges were actually obtained.
- if (AdjustTokenPrivileges(
+ if (fun8( // AdjustTokenPrivileges()
hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp, &prevTpLen) &&
GetLastError() == ERROR_SUCCESS)
{
// Round up size to full pages and allocate
allocSize = (allocSize + largePageSize - 1) & ~size_t(largePageSize - 1);
mem = VirtualAlloc(
- NULL, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE);
+ nullptr, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE);
// Privilege no longer needed, restore previous state
- AdjustTokenPrivileges(hProcessToken, FALSE, &prevTp, 0, NULL, NULL);
+ fun8( // AdjustTokenPrivileges ()
+ hProcessToken, FALSE, &prevTp, 0, nullptr, nullptr);
}
}
@@ -453,7 +553,7 @@ void* aligned_large_pages_alloc(size_t allocSize) {
// Fall back to regular, page aligned, allocation if necessary
if (!mem)
- mem = VirtualAlloc(NULL, allocSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
+ mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
return mem;
}
@@ -517,7 +617,7 @@ void bindThisThread(size_t) {}
/// API and returns the best node id for the thread with index idx. Original
/// code from Texel by Peter Ă–sterlund.
-int best_node(size_t idx) {
+static int best_node(size_t idx) {
int threads = 0;
int nodes = 0;
@@ -526,7 +626,7 @@ int best_node(size_t idx) {
DWORD byteOffset = 0;
// Early exit if the needed API is not available at runtime
- HMODULE k32 = GetModuleHandle("Kernel32.dll");
+ HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll"));
auto fun1 = (fun1_t)(void(*)())GetProcAddress(k32, "GetLogicalProcessorInformationEx");
if (!fun1)
return -1;
@@ -596,7 +696,7 @@ void bindThisThread(size_t idx) {
return;
// Early exit if the needed API are not available at runtime
- HMODULE k32 = GetModuleHandle("Kernel32.dll");
+ HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll"));
auto fun2 = (fun2_t)(void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMaskEx");
auto fun3 = (fun3_t)(void(*)())GetProcAddress(k32, "SetThreadGroupAffinity");
auto fun4 = (fun4_t)(void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMask2");
diff --git a/DroidFishApp/src/main/cpp/stockfish/misc.h b/DroidFishApp/src/main/cpp/stockfish/misc.h
index 77b81d5..69d470c 100644
--- a/DroidFishApp/src/main/cpp/stockfish/misc.h
+++ b/DroidFishApp/src/main/cpp/stockfish/misc.h
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -28,6 +28,9 @@
#include "types.h"
+#define stringify2(x) #x
+#define stringify(x) stringify2(x)
+
namespace Stockfish {
std::string engine_info(bool to_uci = false);
@@ -39,12 +42,13 @@ void std_aligned_free(void* ptr);
void* aligned_large_pages_alloc(size_t size); // memory aligned by page size, min alignment: 4096 bytes
void aligned_large_pages_free(void* mem); // nop if mem == nullptr
-void dbg_hit_on(bool b);
-void dbg_hit_on(bool c, bool b);
-void dbg_mean_of(int v);
+void dbg_hit_on(bool cond, int slot = 0);
+void dbg_mean_of(int64_t value, int slot = 0);
+void dbg_stdev_of(int64_t value, int slot = 0);
+void dbg_correl_of(int64_t value1, int64_t value2, int slot = 0);
void dbg_print();
-typedef std::chrono::milliseconds::rep TimePoint; // A value in milliseconds
+using TimePoint = std::chrono::milliseconds::rep; // A value in milliseconds
static_assert(sizeof(TimePoint) == sizeof(int64_t), "TimePoint should be 64 bits");
inline TimePoint now() {
return std::chrono::duration_cast
@@ -85,32 +89,6 @@ static inline const union { uint32_t i; char c[4]; } Le = { 0x01020304 };
static inline const bool IsLittleEndian = (Le.c[0] == 4);
-// RunningAverage : a class to calculate a running average of a series of values.
-// For efficiency, all computations are done with integers.
-class RunningAverage {
- public:
-
- // Reset the running average to rational value p / q
- void set(int64_t p, int64_t q)
- { average = p * PERIOD * RESOLUTION / q; }
-
- // Update average with value v
- void update(int64_t v)
- { average = RESOLUTION * v + (PERIOD - 1) * average / PERIOD; }
-
- // Test if average is strictly greater than rational a / b
- bool is_greater(int64_t a, int64_t b) const
- { return b * average > a * (PERIOD * RESOLUTION); }
-
- int64_t value() const
- { return average / (PERIOD * RESOLUTION); }
-
- private :
- static constexpr int64_t PERIOD = 4096;
- static constexpr int64_t RESOLUTION = 1024;
- int64_t average;
-};
-
template
class ValueList {
@@ -164,7 +142,7 @@ public:
inline uint64_t mul_hi64(uint64_t a, uint64_t b) {
#if defined(__GNUC__) && defined(IS_64BIT)
- __extension__ typedef unsigned __int128 uint128;
+ __extension__ using uint128 = unsigned __int128;
return ((uint128)a * (uint128)b) >> 64;
#else
uint64_t aL = (uint32_t)a, aH = a >> 32;
diff --git a/DroidFishApp/src/main/cpp/stockfish/movegen.cpp b/DroidFishApp/src/main/cpp/stockfish/movegen.cpp
index c7a3c29..6b28a52 100644
--- a/DroidFishApp/src/main/cpp/stockfish/movegen.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/movegen.cpp
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -25,13 +25,21 @@ namespace Stockfish {
namespace {
- template
- ExtMove* make_promotions(ExtMove* moveList, Square to) {
+ template
+ ExtMove* make_promotions(ExtMove* moveList, [[maybe_unused]] Square to) {
- if (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS)
+ if constexpr (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS)
+ {
*moveList++ = make(to - D, to, QUEEN);
+ if constexpr (Enemy && Type == CAPTURES)
+ {
+ *moveList++ = make(to - D, to, ROOK);
+ *moveList++ = make(to - D, to, BISHOP);
+ *moveList++ = make(to - D, to, KNIGHT);
+ }
+ }
- if (Type == QUIETS || Type == EVASIONS || Type == NON_EVASIONS)
+ if constexpr ((Type == QUIETS && !Enemy) || Type == EVASIONS || Type == NON_EVASIONS)
{
*moveList++ = make(to - D, to, ROOK);
*moveList++ = make(to - D, to, BISHOP);
@@ -60,18 +68,18 @@ namespace {
Bitboard pawnsNotOn7 = pos.pieces(Us, PAWN) & ~TRank7BB;
// Single and double pawn pushes, no promotions
- if (Type != CAPTURES)
+ if constexpr (Type != CAPTURES)
{
Bitboard b1 = shift(pawnsNotOn7) & emptySquares;
Bitboard b2 = shift(b1 & TRank3BB) & emptySquares;
- if (Type == EVASIONS) // Consider only blocking squares
+ if constexpr (Type == EVASIONS) // Consider only blocking squares
{
b1 &= target;
b2 &= target;
}
- if (Type == QUIET_CHECKS)
+ if constexpr (Type == QUIET_CHECKS)
{
// To make a quiet check, you either make a direct check by pushing a pawn
// or push a blocker pawn that is not on the same file as the enemy king.
@@ -102,21 +110,21 @@ namespace {
Bitboard b2 = shift(pawnsOn7) & enemies;
Bitboard b3 = shift(pawnsOn7) & emptySquares;
- if (Type == EVASIONS)
+ if constexpr (Type == EVASIONS)
b3 &= target;
while (b1)
- moveList = make_promotions(moveList, pop_lsb(b1));
+ moveList = make_promotions(moveList, pop_lsb(b1));
while (b2)
- moveList = make_promotions(moveList, pop_lsb(b2));
+ moveList = make_promotions(moveList, pop_lsb(b2));
while (b3)
- moveList = make_promotions(moveList, pop_lsb(b3));
+ moveList = make_promotions(moveList, pop_lsb(b3));
}
// Standard and en passant captures
- if (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS)
+ if constexpr (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS)
{
Bitboard b1 = shift(pawnsNotOn7) & enemies;
Bitboard b2 = shift(pawnsNotOn7) & enemies;
@@ -264,7 +272,7 @@ ExtMove* generate(const Position& pos, ExtMove* moveList) {
moveList = pos.checkers() ? generate(pos, moveList)
: generate(pos, moveList);
while (cur != moveList)
- if ( ((pinned && pinned & from_sq(*cur)) || from_sq(*cur) == ksq || type_of(*cur) == EN_PASSANT)
+ if ( ((pinned & from_sq(*cur)) || from_sq(*cur) == ksq || type_of(*cur) == EN_PASSANT)
&& !pos.legal(*cur))
*cur = (--moveList)->move;
else
diff --git a/DroidFishApp/src/main/cpp/stockfish/movegen.h b/DroidFishApp/src/main/cpp/stockfish/movegen.h
index bbb35b3..b8df3e6 100644
--- a/DroidFishApp/src/main/cpp/stockfish/movegen.h
+++ b/DroidFishApp/src/main/cpp/stockfish/movegen.h
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/DroidFishApp/src/main/cpp/stockfish/movepick.cpp b/DroidFishApp/src/main/cpp/stockfish/movepick.cpp
index 188d6bd..6fbcb2c 100644
--- a/DroidFishApp/src/main/cpp/stockfish/movepick.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/movepick.cpp
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -69,7 +69,6 @@ MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHist
stage = (pos.checkers() ? EVASION_TT : MAIN_TT) +
!(ttm && pos.pseudo_legal(ttm));
- threatenedPieces = 0;
}
/// MovePicker constructor for quiescence search
@@ -93,20 +92,20 @@ MovePicker::MovePicker(const Position& p, Move ttm, Value th, const CapturePiece
{
assert(!pos.checkers());
- stage = PROBCUT_TT + !(ttm && pos.capture(ttm)
+ stage = PROBCUT_TT + !(ttm && pos.capture_stage(ttm)
&& pos.pseudo_legal(ttm)
&& pos.see_ge(ttm, threshold));
}
/// MovePicker::score() assigns a numerical value to each move in a list, used
/// for sorting. Captures are ordered by Most Valuable Victim (MVV), preferring
-/// captures with a good history. Quiets moves are ordered using the histories.
+/// captures with a good history. Quiets moves are ordered using the history tables.
template
void MovePicker::score() {
static_assert(Type == CAPTURES || Type == QUIETS || Type == EVASIONS, "Wrong type");
- [[maybe_unused]] Bitboard threatenedByPawn, threatenedByMinor, threatenedByRook;
+ [[maybe_unused]] Bitboard threatenedByPawn, threatenedByMinor, threatenedByRook, threatenedPieces;
if constexpr (Type == QUIETS)
{
Color us = pos.side_to_move();
@@ -123,8 +122,8 @@ void MovePicker::score() {
for (auto& m : *this)
if constexpr (Type == CAPTURES)
- m.value = 6 * int(PieceValue[MG][pos.piece_on(to_sq(m))])
- + (*captureHistory)[pos.moved_piece(m)][to_sq(m)][type_of(pos.piece_on(to_sq(m)))];
+ m.value = (7 * int(PieceValue[MG][pos.piece_on(to_sq(m))])
+ + (*captureHistory)[pos.moved_piece(m)][to_sq(m)][type_of(pos.piece_on(to_sq(m)))]) / 16;
else if constexpr (Type == QUIETS)
m.value = 2 * (*mainHistory)[pos.side_to_move()][from_to(m)]
@@ -141,7 +140,7 @@ void MovePicker::score() {
+ bool(pos.check_squares(type_of(pos.moved_piece(m))) & to_sq(m)) * 16384;
else // Type == EVASIONS
{
- if (pos.capture(m))
+ if (pos.capture_stage(m))
m.value = PieceValue[MG][pos.piece_on(to_sq(m))]
- Value(type_of(pos.moved_piece(m)))
+ (1 << 28);
@@ -158,7 +157,7 @@ Move MovePicker::select(Pred filter) {
while (cur < endMoves)
{
- if (T == Best)
+ if constexpr (T == Best)
std::swap(*cur, *std::max_element(cur, endMoves));
if (*cur != ttMove && filter())
@@ -197,7 +196,7 @@ top:
case GOOD_CAPTURE:
if (select([&](){
- return pos.see_ge(*cur, Value(-69 * cur->value / 1024)) ?
+ return pos.see_ge(*cur, Value(-cur->value)) ?
// Move losing capture to endBadCaptures to be tried later
true : (*endBadCaptures++ = *cur, false); }))
return *(cur - 1);
@@ -216,7 +215,7 @@ top:
case REFUTATION:
if (select([&](){ return *cur != MOVE_NONE
- && !pos.capture(*cur)
+ && !pos.capture_stage(*cur)
&& pos.pseudo_legal(*cur); }))
return *(cur - 1);
++stage;
diff --git a/DroidFishApp/src/main/cpp/stockfish/movepick.h b/DroidFishApp/src/main/cpp/stockfish/movepick.h
index e4c4a5b..0b44557 100644
--- a/DroidFishApp/src/main/cpp/stockfish/movepick.h
+++ b/DroidFishApp/src/main/cpp/stockfish/movepick.h
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -62,14 +62,14 @@ public:
template
struct Stats : public std::array, Size>
{
- typedef Stats stats;
+ using stats = Stats;
void fill(const T& v) {
// For standard-layout 'this' points to first struct member
assert(std::is_standard_layout::value);
- typedef StatsEntry entry;
+ using entry = StatsEntry;
entry* p = reinterpret_cast(this);
std::fill(p, p + sizeof(*this) / sizeof(entry), v);
}
@@ -87,23 +87,23 @@ enum StatsType { NoCaptures, Captures };
/// ordering decisions. It uses 2 tables (one for each color) indexed by
/// the move's from and to squares, see www.chessprogramming.org/Butterfly_Boards
/// (~11 elo)
-typedef Stats ButterflyHistory;
+using ButterflyHistory = Stats;
/// CounterMoveHistory stores counter moves indexed by [piece][to] of the previous
/// move, see www.chessprogramming.org/Countermove_Heuristic
-typedef Stats CounterMoveHistory;
+using CounterMoveHistory = Stats;
/// CapturePieceToHistory is addressed by a move's [piece][to][captured piece type]
-typedef Stats CapturePieceToHistory;
+using CapturePieceToHistory = Stats;
/// PieceToHistory is like ButterflyHistory but is addressed by a move's [piece][to]
-typedef Stats PieceToHistory;
+using PieceToHistory = Stats;
/// ContinuationHistory is the combined history of a given pair of moves, usually
/// the current one given a previous one. The nested history table is based on
/// PieceToHistory instead of ButterflyBoards.
/// (~63 elo)
-typedef Stats ContinuationHistory;
+using ContinuationHistory = Stats;
/// MovePicker class is used to pick one pseudo-legal move at a time from the
@@ -131,8 +131,6 @@ public:
MovePicker(const Position&, Move, Value, const CapturePieceToHistory*);
Move next_move(bool skipQuiets = false);
- Bitboard threatenedPieces;
-
private:
template Move select(Pred);
template void score();
diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.cpp b/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.cpp
index 4715fed..329adfd 100644
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.cpp
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,15 +18,15 @@
// Code for calculating NNUE evaluation function
+#include
+#include
#include
#include
#include
-#include
-#include
+#include
#include "../evaluate.h"
#include "../position.h"
-#include "../misc.h"
#include "../uci.h"
#include "../types.h"
@@ -83,7 +83,7 @@ namespace Stockfish::Eval::NNUE {
} // namespace Detail
// Initialize the evaluation function parameters
- void initialize() {
+ static void initialize() {
Detail::initialize(featureTransformer);
for (std::size_t i = 0; i < LayerStacks; ++i)
@@ -91,7 +91,7 @@ namespace Stockfish::Eval::NNUE {
}
// Read network header
- bool read_header(std::istream& stream, std::uint32_t* hashValue, std::string* desc)
+ static bool read_header(std::istream& stream, std::uint32_t* hashValue, std::string* desc)
{
std::uint32_t version, size;
@@ -105,7 +105,7 @@ namespace Stockfish::Eval::NNUE {
}
// Write network header
- bool write_header(std::ostream& stream, std::uint32_t hashValue, const std::string& desc)
+ static bool write_header(std::ostream& stream, std::uint32_t hashValue, const std::string& desc)
{
write_little_endian(stream, Version);
write_little_endian(stream, hashValue);
@@ -115,7 +115,7 @@ namespace Stockfish::Eval::NNUE {
}
// Read network parameters
- bool read_parameters(std::istream& stream) {
+ static bool read_parameters(std::istream& stream) {
std::uint32_t hashValue;
if (!read_header(stream, &hashValue, &netDescription)) return false;
@@ -127,7 +127,7 @@ namespace Stockfish::Eval::NNUE {
}
// Write network parameters
- bool write_parameters(std::ostream& stream) {
+ static bool write_parameters(std::ostream& stream) {
if (!write_header(stream, HashValue, netDescription)) return false;
if (!Detail::write_parameters(stream, *featureTransformer)) return false;
@@ -136,6 +136,11 @@ namespace Stockfish::Eval::NNUE {
return (bool)stream;
}
+ void hint_common_parent_position(const Position& pos) {
+ if (Eval::useNNUE)
+ featureTransformer->hint_common_access(pos);
+ }
+
// Evaluation function. Perform differential calculation.
Value evaluate(const Position& pos, bool adjusted, int* complexity) {
@@ -143,7 +148,7 @@ namespace Stockfish::Eval::NNUE {
// overaligning stack variables with alignas() doesn't work correctly.
constexpr uint64_t alignment = CacheLineSize;
- int delta = 24 - pos.non_pawn_material() / 9560;
+ constexpr int delta = 24;
#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
TransformedFeatureType transformedFeaturesUnaligned[
@@ -211,7 +216,7 @@ namespace Stockfish::Eval::NNUE {
return t;
}
- static const std::string PieceToChar(" PNBRQK pnbrqk");
+ constexpr std::string_view PieceToChar(" PNBRQK pnbrqk");
// format_cp_compact() converts a Value into (centi)pawns and writes it in a buffer.
@@ -245,14 +250,15 @@ namespace Stockfish::Eval::NNUE {
}
- // format_cp_aligned_dot() converts a Value into (centi)pawns and writes it in a buffer,
- // always keeping two decimals. The buffer must have capacity for at least 7 chars.
- static void format_cp_aligned_dot(Value v, char* buffer) {
+ // format_cp_aligned_dot() converts a Value into (centi)pawns, always keeping two decimals.
+ static void format_cp_aligned_dot(Value v, std::stringstream &stream) {
+ const double cp = 1.0 * std::abs(int(v)) / UCI::NormalizeToPawnValue;
- buffer[0] = (v < 0 ? '-' : v > 0 ? '+' : ' ');
-
- double cp = 1.0 * std::abs(int(v)) / UCI::NormalizeToPawnValue;
- sprintf(&buffer[1], "%6.2f", cp);
+ stream << (v < 0 ? '-' : v > 0 ? '+' : ' ')
+ << std::setiosflags(std::ios::fixed)
+ << std::setw(6)
+ << std::setprecision(2)
+ << cp;
}
@@ -332,17 +338,10 @@ namespace Stockfish::Eval::NNUE {
for (std::size_t bucket = 0; bucket < LayerStacks; ++bucket)
{
- char buffer[3][8];
- std::memset(buffer, '\0', sizeof(buffer));
-
- format_cp_aligned_dot(t.psqt[bucket], buffer[0]);
- format_cp_aligned_dot(t.positional[bucket], buffer[1]);
- format_cp_aligned_dot(t.psqt[bucket] + t.positional[bucket], buffer[2]);
-
- ss << "| " << bucket << " "
- << " | " << buffer[0] << " "
- << " | " << buffer[1] << " "
- << " | " << buffer[2] << " "
+ ss << "| " << bucket << " ";
+ ss << " | "; format_cp_aligned_dot(t.psqt[bucket], ss); ss << " "
+ << " | "; format_cp_aligned_dot(t.positional[bucket], ss); ss << " "
+ << " | "; format_cp_aligned_dot(t.psqt[bucket] + t.positional[bucket], ss); ss << " "
<< " |";
if (bucket == t.correctBucket)
ss << " <-- this bucket is used";
diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.h b/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.h
index 2e4f1f5..b84bed8 100644
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.h
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.h
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -31,6 +31,7 @@ namespace Stockfish::Eval::NNUE {
constexpr std::uint32_t HashValue =
FeatureTransformer::get_hash_value() ^ Network::get_hash_value();
+
// Deleter for automating release of memory area
template
struct AlignedDeleter {
@@ -54,6 +55,14 @@ namespace Stockfish::Eval::NNUE {
template
using LargePagePtr = std::unique_ptr>;
+ std::string trace(Position& pos);
+ Value evaluate(const Position& pos, bool adjusted = false, int* complexity = nullptr);
+ void hint_common_parent_position(const Position& pos);
+
+ bool load_eval(std::string name, std::istream& stream);
+ bool save_eval(std::ostream& stream);
+ bool save_eval(const std::optional& filename);
+
} // namespace Stockfish::Eval::NNUE
#endif // #ifndef NNUE_EVALUATE_NNUE_H_INCLUDED
diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2_hm.cpp b/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2_hm.cpp
index 7dbd341..19ebb15 100644
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2_hm.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2_hm.cpp
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2_hm.h b/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2_hm.h
index a95d432..78063c3 100644
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2_hm.h
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2_hm.h
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/affine_transform.h b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/affine_transform.h
index 461a7b8..9e2f2f9 100644
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/affine_transform.h
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/affine_transform.h
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -31,7 +31,7 @@
This file contains the definition for a fully connected layer (aka affine transform).
Two approaches are employed, depending on the sizes of the transform.
- Approach 1:
+ Approach 1 (a specialization for large inputs):
- used when the PaddedInputDimensions >= 128
- uses AVX512 if possible
- processes inputs in batches of 2*InputSimdWidth
@@ -42,9 +42,8 @@
depends on the architecture (the amount of registers)
- accumulate + hadd is used
- Approach 2:
+ Approach 2 (a specialization for small inputs):
- used when the PaddedInputDimensions < 128
- - does not use AVX512
- expected use-case is for when PaddedInputDimensions == 32 and InputDimensions <= 32.
- that's why AVX512 is hard to implement
- expected use-case is small layers
@@ -72,6 +71,10 @@ namespace Stockfish::Eval::NNUE::Layers {
const __m64 Zeros = _mm_setzero_si64();
const auto inputVector = reinterpret_cast(input);
+# elif defined(USE_NEON_DOTPROD)
+ constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 16) / 16;
+ const auto inputVector = reinterpret_cast(input);
+
# elif defined(USE_NEON)
constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 16) / 16;
const auto inputVector = reinterpret_cast(input);
@@ -123,6 +126,14 @@ namespace Stockfish::Eval::NNUE::Layers {
sum = _mm_add_pi32(sum, _mm_unpackhi_pi32(sum, sum));
output[i] = _mm_cvtsi64_si32(sum);
+# elif defined(USE_NEON_DOTPROD)
+ int32x4_t sum = {biases[i]};
+ const auto row = reinterpret_cast(&weights[offset]);
+ for (IndexType j = 0; j < NumChunks; ++j) {
+ sum = vdotq_s32(sum, inputVector[j], row[j]);
+ }
+ output[i] = vaddvq_s32(sum);
+
# elif defined(USE_NEON)
int32x4_t sum = {biases[i]};
const auto row = reinterpret_cast(&weights[offset]);
@@ -157,7 +168,7 @@ namespace Stockfish::Eval::NNUE::Layers {
constexpr IndexType LargeInputSize = std::numeric_limits::max();
#endif
- // A specialization for large inputs.
+ // A specialization for large inputs
template
class AffineTransform(InDims, MaxSimdWidth) >= LargeInputSize)>> {
public:
@@ -176,36 +187,39 @@ namespace Stockfish::Eval::NNUE::Layers {
using OutputBuffer = OutputType[PaddedOutputDimensions];
- static_assert(PaddedInputDimensions >= LargeInputSize, "Something went wrong. This specialization should not have been chosen.");
+ static_assert(PaddedInputDimensions >= LargeInputSize, "Something went wrong. This specialization (for large inputs) should not have been chosen.");
#if defined (USE_AVX512)
- static constexpr const IndexType InputSimdWidth = 64;
- static constexpr const IndexType MaxNumOutputRegs = 16;
+ static constexpr IndexType InputSimdWidth = 64;
+ static constexpr IndexType MaxNumOutputRegs = 16;
#elif defined (USE_AVX2)
- static constexpr const IndexType InputSimdWidth = 32;
- static constexpr const IndexType MaxNumOutputRegs = 8;
+ static constexpr IndexType InputSimdWidth = 32;
+ static constexpr IndexType MaxNumOutputRegs = 8;
#elif defined (USE_SSSE3)
- static constexpr const IndexType InputSimdWidth = 16;
- static constexpr const IndexType MaxNumOutputRegs = 8;
+ static constexpr IndexType InputSimdWidth = 16;
+ static constexpr IndexType MaxNumOutputRegs = 8;
+#elif defined (USE_NEON_DOTPROD)
+ static constexpr IndexType InputSimdWidth = 16;
+ static constexpr IndexType MaxNumOutputRegs = 8;
#elif defined (USE_NEON)
- static constexpr const IndexType InputSimdWidth = 8;
- static constexpr const IndexType MaxNumOutputRegs = 8;
+ static constexpr IndexType InputSimdWidth = 8;
+ static constexpr IndexType MaxNumOutputRegs = 8;
#else
// The fallback implementation will not have permuted weights.
// We define these to avoid a lot of ifdefs later.
- static constexpr const IndexType InputSimdWidth = 1;
- static constexpr const IndexType MaxNumOutputRegs = 1;
+ static constexpr IndexType InputSimdWidth = 1;
+ static constexpr IndexType MaxNumOutputRegs = 1;
#endif
// A big block is a region in the weight matrix of the size [PaddedInputDimensions, NumOutputRegs].
// A small block is a region of size [InputSimdWidth, 1]
- static constexpr const IndexType NumOutputRegs = std::min(MaxNumOutputRegs, OutputDimensions);
- static constexpr const IndexType SmallBlockSize = InputSimdWidth;
- static constexpr const IndexType BigBlockSize = NumOutputRegs * PaddedInputDimensions;
- static constexpr const IndexType NumSmallBlocksInBigBlock = BigBlockSize / SmallBlockSize;
- static constexpr const IndexType NumSmallBlocksPerOutput = PaddedInputDimensions / SmallBlockSize;
- static constexpr const IndexType NumBigBlocks = OutputDimensions / NumOutputRegs;
+ static constexpr IndexType NumOutputRegs = std::min(MaxNumOutputRegs, OutputDimensions);
+ static constexpr IndexType SmallBlockSize = InputSimdWidth;
+ static constexpr IndexType BigBlockSize = NumOutputRegs * PaddedInputDimensions;
+ static constexpr IndexType NumSmallBlocksInBigBlock = BigBlockSize / SmallBlockSize;
+ static constexpr IndexType NumSmallBlocksPerOutput = PaddedInputDimensions / SmallBlockSize;
+ static constexpr IndexType NumBigBlocks = OutputDimensions / NumOutputRegs;
static_assert(OutputDimensions % NumOutputRegs == 0);
@@ -241,8 +255,7 @@ namespace Stockfish::Eval::NNUE::Layers {
// Read network parameters
bool read_parameters(std::istream& stream) {
- for (IndexType i = 0; i < OutputDimensions; ++i)
- biases[i] = read_little_endian(stream);
+ read_little_endian(stream, biases, OutputDimensions);
for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
weights[get_weight_index(i)] = read_little_endian(stream);
@@ -252,8 +265,7 @@ namespace Stockfish::Eval::NNUE::Layers {
// Write network parameters
bool write_parameters(std::ostream& stream) const {
- for (IndexType i = 0; i < OutputDimensions; ++i)
- write_little_endian(stream, biases[i]);
+ write_little_endian(stream, biases, OutputDimensions);
for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
write_little_endian(stream, weights[get_weight_index(i)]);
@@ -292,6 +304,15 @@ namespace Stockfish::Eval::NNUE::Layers {
#define vec_add_dpbusd_32x2 Simd::m128_add_dpbusd_epi32x2
#define vec_hadd Simd::m128_hadd
#define vec_haddx4 Simd::m128_haddx4
+#elif defined (USE_NEON_DOTPROD)
+ using acc_vec_t = int32x4_t;
+ using bias_vec_t = int32x4_t;
+ using weight_vec_t = int8x16_t;
+ using in_vec_t = int8x16_t;
+ #define vec_zero {0}
+ #define vec_add_dpbusd_32x2 Simd::dotprod_m128_add_dpbusd_epi32x2
+ #define vec_hadd Simd::neon_m128_hadd
+ #define vec_haddx4 Simd::neon_m128_haddx4
#elif defined (USE_NEON)
using acc_vec_t = int32x4_t;
using bias_vec_t = int32x4_t;
@@ -374,6 +395,7 @@ namespace Stockfish::Eval::NNUE::Layers {
alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions];
};
+ // A specialization for small inputs
template
class AffineTransform(InDims, MaxSimdWidth) < LargeInputSize)>> {
public:
@@ -393,12 +415,7 @@ namespace Stockfish::Eval::NNUE::Layers {
using OutputBuffer = OutputType[PaddedOutputDimensions];
- static_assert(PaddedInputDimensions < LargeInputSize, "Something went wrong. This specialization should not have been chosen.");
-
-#if defined (USE_SSSE3)
- static constexpr const IndexType OutputSimdWidth = SimdWidth / 4;
- static constexpr const IndexType InputSimdWidth = SimdWidth;
-#endif
+ static_assert(PaddedInputDimensions < LargeInputSize, "Something went wrong. This specialization (for small inputs) should not have been chosen.");
// Hash value embedded in the evaluation file
static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) {
@@ -428,8 +445,7 @@ namespace Stockfish::Eval::NNUE::Layers {
// Read network parameters
bool read_parameters(std::istream& stream) {
- for (IndexType i = 0; i < OutputDimensions; ++i)
- biases[i] = read_little_endian(stream);
+ read_little_endian(stream, biases, OutputDimensions);
for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
weights[get_weight_index(i)] = read_little_endian(stream);
@@ -438,8 +454,7 @@ namespace Stockfish::Eval::NNUE::Layers {
// Write network parameters
bool write_parameters(std::ostream& stream) const {
- for (IndexType i = 0; i < OutputDimensions; ++i)
- write_little_endian(stream, biases[i]);
+ write_little_endian(stream, biases, OutputDimensions);
for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
write_little_endian(stream, weights[get_weight_index(i)]);
@@ -450,29 +465,34 @@ namespace Stockfish::Eval::NNUE::Layers {
const OutputType* propagate(
const InputType* input, OutputType* output) const {
-#if defined (USE_AVX2)
+#if defined (USE_AVX512)
+ using vec_t = __m512i;
+ #define vec_setzero _mm512_setzero_si512
+ #define vec_set_32 _mm512_set1_epi32
+ #define vec_add_dpbusd_32 Simd::m512_add_dpbusd_epi32
+ #define vec_add_dpbusd_32x2 Simd::m512_add_dpbusd_epi32x2
+ #define vec_hadd Simd::m512_hadd
+#elif defined (USE_AVX2)
using vec_t = __m256i;
#define vec_setzero _mm256_setzero_si256
#define vec_set_32 _mm256_set1_epi32
#define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32
#define vec_add_dpbusd_32x2 Simd::m256_add_dpbusd_epi32x2
- #define vec_add_dpbusd_32x4 Simd::m256_add_dpbusd_epi32x4
#define vec_hadd Simd::m256_hadd
- #define vec_haddx4 Simd::m256_haddx4
#elif defined (USE_SSSE3)
using vec_t = __m128i;
#define vec_setzero _mm_setzero_si128
#define vec_set_32 _mm_set1_epi32
#define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32
#define vec_add_dpbusd_32x2 Simd::m128_add_dpbusd_epi32x2
- #define vec_add_dpbusd_32x4 Simd::m128_add_dpbusd_epi32x4
#define vec_hadd Simd::m128_hadd
- #define vec_haddx4 Simd::m128_haddx4
#endif
#if defined (USE_SSSE3)
const auto inputVector = reinterpret_cast(input);
+ static constexpr IndexType OutputSimdWidth = sizeof(vec_t) / sizeof(OutputType);
+
static_assert(OutputDimensions % OutputSimdWidth == 0 || OutputDimensions == 1);
if constexpr (OutputDimensions % OutputSimdWidth == 0)
@@ -518,9 +538,7 @@ namespace Stockfish::Eval::NNUE::Layers {
# undef vec_set_32
# undef vec_add_dpbusd_32
# undef vec_add_dpbusd_32x2
-# undef vec_add_dpbusd_32x4
# undef vec_hadd
-# undef vec_haddx4
#else
// Use old implementation for the other architectures.
affine_transform_non_ssse3<
diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/affine_transform_sparse_input.h b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/affine_transform_sparse_input.h
new file mode 100644
index 0000000..e0c3a8a
--- /dev/null
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/affine_transform_sparse_input.h
@@ -0,0 +1,286 @@
+/*
+ Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
+
+ Stockfish is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Stockfish is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+
+// Definition of layer AffineTransformSparseInput of NNUE evaluation function
+
+#ifndef NNUE_LAYERS_AFFINE_TRANSFORM_SPARSE_INPUT_H_INCLUDED
+#define NNUE_LAYERS_AFFINE_TRANSFORM_SPARSE_INPUT_H_INCLUDED
+
+#include
+#include
+#include
+#include
+#include "../nnue_common.h"
+#include "affine_transform.h"
+#include "simd.h"
+
+/*
+ This file contains the definition for a fully connected layer (aka affine transform) with block sparse input.
+*/
+
+namespace Stockfish::Eval::NNUE::Layers {
+#if defined(__GNUC__) // GCC, Clang, ICC
+
+ static inline IndexType lsb_(std::uint32_t b) {
+ assert(b);
+ return IndexType(__builtin_ctzl(b));
+ }
+
+#elif defined(_MSC_VER) // MSVC
+
+ static inline IndexType lsb_(std::uint32_t b) {
+ assert(b);
+ unsigned long idx;
+ _BitScanForward(&idx, b);
+ return (IndexType) idx;
+ }
+
+#else // Compiler is neither GCC nor MSVC compatible
+
+#error "Compiler not supported."
+
+#endif
+
+
+#if defined(USE_SSSE3)
+ alignas(CacheLineSize) static inline const std::array, 256> lookup_indices = [](){
+ std::array, 256> v{};
+ for (int i = 0; i < 256; ++i)
+ {
+ int j = i;
+ int k = 0;
+ while(j)
+ {
+ const IndexType lsbIndex = lsb_(std::uint32_t(j));
+ j &= j - 1;
+ v[i][k] = lsbIndex;
+ ++k;
+ }
+ }
+ return v;
+ }();
+ alignas(CacheLineSize) static inline const std::array lookup_count = [](){
+ std::array v;
+ for (int i = 0; i < 256; ++i)
+ {
+ int j = i;
+ int k = 0;
+ while(j)
+ {
+ j &= j - 1;
+ ++k;
+ }
+ v[i] = k;
+ }
+ return v;
+ }();
+
+ // Find indices of nonzero numbers in an int32_t array
+ template
+ void find_nnz(const std::int32_t* input, std::uint16_t* out, IndexType& count_out) {
+#if defined (USE_AVX512)
+ using vec_t = __m512i;
+ #define vec_nnz(a) _mm512_cmpgt_epi32_mask(a, _mm512_setzero_si512())
+#elif defined (USE_AVX2)
+ using vec_t = __m256i;
+ #define vec_nnz(a) _mm256_movemask_ps(_mm256_castsi256_ps(_mm256_cmpgt_epi32(a, _mm256_setzero_si256())))
+#elif defined (USE_SSSE3)
+ using vec_t = __m128i;
+ #define vec_nnz(a) _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpgt_epi32(a, _mm_setzero_si128())))
+#endif
+ constexpr IndexType InputSimdWidth = sizeof(vec_t) / sizeof(std::int32_t);
+ // Inputs are processed InputSimdWidth at a time and outputs are processed 8 at a time so we process in chunks of max(InputSimdWidth, 8)
+ constexpr IndexType ChunkSize = std::max(InputSimdWidth, 8);
+ constexpr IndexType NumChunks = InputDimensions / ChunkSize;
+ constexpr IndexType InputsPerChunk = ChunkSize / InputSimdWidth;
+ constexpr IndexType OutputsPerChunk = ChunkSize / 8;
+
+ const auto inputVector = reinterpret_cast(input);
+ IndexType count = 0;
+ __m128i base = _mm_set1_epi16(0);
+ __m128i increment = _mm_set1_epi16(8);
+ for (IndexType i = 0; i < NumChunks; ++i)
+ {
+ // bitmask of nonzero values in this chunk
+ unsigned nnz = 0;
+ for (IndexType j = 0; j < InputsPerChunk; ++j)
+ {
+ const vec_t inputChunk = inputVector[i * InputsPerChunk + j];
+ nnz |= (unsigned)vec_nnz(inputChunk) << (j * InputSimdWidth);
+ }
+ for (IndexType j = 0; j < OutputsPerChunk; ++j)
+ {
+ const auto lookup = (nnz >> (j * 8)) & 0xFF;
+ const auto offsets = _mm_loadu_si128(reinterpret_cast(&lookup_indices[lookup]));
+ _mm_storeu_si128(reinterpret_cast<__m128i*>(out + count), _mm_add_epi16(base, offsets));
+ count += lookup_count[lookup];
+ base = _mm_add_epi16(base, increment);
+ }
+ }
+ count_out = count;
+ }
+# undef vec_nnz
+#endif
+
+ // Sparse input implementation
+ template
+ class AffineTransformSparseInput {
+ public:
+ // Input/output type
+ // Input/output type
+ using InputType = std::uint8_t;
+ using OutputType = std::int32_t;
+
+ // Number of input/output dimensions
+ static constexpr IndexType InputDimensions = InDims;
+ static constexpr IndexType OutputDimensions = OutDims;
+
+ static_assert(OutputDimensions % 16 == 0, "Only implemented for OutputDimensions divisible by 16.");
+
+ static constexpr IndexType PaddedInputDimensions =
+ ceil_to_multiple(InputDimensions, MaxSimdWidth);
+ static constexpr IndexType PaddedOutputDimensions =
+ ceil_to_multiple(OutputDimensions, MaxSimdWidth);
+
+#if defined (USE_SSSE3)
+ static constexpr IndexType ChunkSize = 4;
+#else
+ static constexpr IndexType ChunkSize = 1;
+#endif
+
+ using OutputBuffer = OutputType[PaddedOutputDimensions];
+
+ // Hash value embedded in the evaluation file
+ static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) {
+ std::uint32_t hashValue = 0xCC03DAE4u;
+ hashValue += OutputDimensions;
+ hashValue ^= prevHash >> 1;
+ hashValue ^= prevHash << 31;
+ return hashValue;
+ }
+
+ static IndexType get_weight_index_scrambled(IndexType i)
+ {
+ return
+ (i / ChunkSize) % (PaddedInputDimensions / ChunkSize) * OutputDimensions * ChunkSize +
+ i / PaddedInputDimensions * ChunkSize +
+ i % ChunkSize;
+ }
+
+ static IndexType get_weight_index(IndexType i)
+ {
+#if defined (USE_SSSE3)
+ return get_weight_index_scrambled(i);
+#else
+ return i;
+#endif
+ }
+
+ // Read network parameters
+ bool read_parameters(std::istream& stream) {
+ read_little_endian(stream, biases, OutputDimensions);
+ for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
+ weights[get_weight_index(i)] = read_little_endian(stream);
+
+ return !stream.fail();
+ }
+
+ // Write network parameters
+ bool write_parameters(std::ostream& stream) const {
+ write_little_endian(stream, biases, OutputDimensions);
+
+ for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
+ write_little_endian(stream, weights[get_weight_index(i)]);
+
+ return !stream.fail();
+ }
+ // Forward propagation
+ const OutputType* propagate(
+ const InputType* input, OutputType* output) const {
+
+#if defined (USE_SSSE3)
+#if defined (USE_AVX512)
+ using vec_t = __m512i;
+ #define vec_setzero _mm512_setzero_si512
+ #define vec_set_32 _mm512_set1_epi32
+ #define vec_add_dpbusd_32 Simd::m512_add_dpbusd_epi32
+#elif defined (USE_AVX2)
+ using vec_t = __m256i;
+ #define vec_setzero _mm256_setzero_si256
+ #define vec_set_32 _mm256_set1_epi32
+ #define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32
+#elif defined (USE_SSSE3)
+ using vec_t = __m128i;
+ #define vec_setzero _mm_setzero_si128
+ #define vec_set_32 _mm_set1_epi32
+ #define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32
+#endif
+ static constexpr IndexType OutputSimdWidth = sizeof(vec_t) / sizeof(OutputType);
+
+ constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 8) / ChunkSize;
+ constexpr IndexType NumRegs = OutputDimensions / OutputSimdWidth;
+ std::uint16_t nnz[NumChunks];
+ IndexType count;
+
+ const auto input32 = reinterpret_cast(input);
+
+ // Find indices of nonzero 32bit blocks
+ find_nnz(input32, nnz, count);
+
+ const vec_t* biasvec = reinterpret_cast(biases);
+ vec_t acc[NumRegs];
+ for (IndexType k = 0; k < NumRegs; ++k)
+ acc[k] = biasvec[k];
+
+ for (IndexType j = 0; j < count; ++j)
+ {
+ const auto i = nnz[j];
+ const vec_t in = vec_set_32(input32[i]);
+ const auto col = reinterpret_cast(&weights[i * OutputDimensions * ChunkSize]);
+ for (IndexType k = 0; k < NumRegs; ++k)
+ vec_add_dpbusd_32(acc[k], in, col[k]);
+ }
+
+ vec_t* outptr = reinterpret_cast(output);
+ for (IndexType k = 0; k < NumRegs; ++k)
+ outptr[k] = acc[k];
+# undef vec_setzero
+# undef vec_set_32
+# undef vec_add_dpbusd_32
+#else
+ // Use dense implementation for the other architectures.
+ affine_transform_non_ssse3<
+ InputDimensions,
+ PaddedInputDimensions,
+ OutputDimensions>(output, weights, biases, input);
+#endif
+
+ return output;
+ }
+
+ private:
+ using BiasType = OutputType;
+ using WeightType = std::int8_t;
+
+ alignas(CacheLineSize) BiasType biases[OutputDimensions];
+ alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions];
+ };
+
+} // namespace Stockfish::Eval::NNUE::Layers
+
+#endif // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_SPARSE_INPUT_H_INCLUDED
diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/clipped_relu.h b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/clipped_relu.h
index f94d308..51e562d 100644
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/clipped_relu.h
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/clipped_relu.h
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/simd.h b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/simd.h
index 7b9e8fb..22c5198 100644
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/simd.h
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/simd.h
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -153,7 +153,7 @@ namespace Stockfish::Simd {
asm(
"vpdpbusd %[b0], %[a0], %[acc]\n\t"
"vpdpbusd %[b1], %[a1], %[acc]\n\t"
- : [acc]"+v"(acc)
+ : [acc]"+&v"(acc)
: [a0]"v"(a0), [b0]"vm"(b0), [a1]"v"(a1), [b1]"vm"(b1)
);
# else
@@ -165,18 +165,19 @@ namespace Stockfish::Simd {
__m512i tmp0 = _mm512_maddubs_epi16(a0, b0);
__m512i tmp1 = _mm512_maddubs_epi16(a1, b1);
asm(
- "vpaddsw %[tmp0], %[tmp1], %[tmp0]\n\t"
"vpmaddwd %[tmp0], %[ones], %[tmp0]\n\t"
+ "vpmaddwd %[tmp1], %[ones], %[tmp1]\n\t"
+ "vpaddd %[tmp0], %[tmp1], %[tmp0]\n\t"
"vpaddd %[acc], %[tmp0], %[acc]\n\t"
- : [acc]"+v"(acc), [tmp0]"+&v"(tmp0)
- : [tmp1]"v"(tmp1), [ones]"v"(_mm512_set1_epi16(1))
+ : [acc]"+v"(acc), [tmp0]"+&v"(tmp0), [tmp1]"+&v"(tmp1)
+ : [ones]"v"(_mm512_set1_epi16(1))
);
# else
__m512i product0 = _mm512_maddubs_epi16(a0, b0);
__m512i product1 = _mm512_maddubs_epi16(a1, b1);
- product0 = _mm512_adds_epi16(product0, product1);
product0 = _mm512_madd_epi16(product0, _mm512_set1_epi16(1));
- acc = _mm512_add_epi32(acc, product0);
+ product1 = _mm512_madd_epi16(product1, _mm512_set1_epi16(1));
+ acc = _mm512_add_epi32(acc, _mm512_add_epi32(product0, product1));
# endif
# endif
}
@@ -249,7 +250,7 @@ namespace Stockfish::Simd {
asm(
VNNI_PREFIX "vpdpbusd %[b0], %[a0], %[acc]\n\t"
VNNI_PREFIX "vpdpbusd %[b1], %[a1], %[acc]\n\t"
- : [acc]"+v"(acc)
+ : [acc]"+&v"(acc)
: [a0]"v"(a0), [b0]"vm"(b0), [a1]"v"(a1), [b1]"vm"(b1)
);
# else
@@ -261,18 +262,19 @@ namespace Stockfish::Simd {
__m256i tmp0 = _mm256_maddubs_epi16(a0, b0);
__m256i tmp1 = _mm256_maddubs_epi16(a1, b1);
asm(
- "vpaddsw %[tmp0], %[tmp1], %[tmp0]\n\t"
"vpmaddwd %[tmp0], %[ones], %[tmp0]\n\t"
+ "vpmaddwd %[tmp1], %[ones], %[tmp1]\n\t"
+ "vpaddd %[tmp0], %[tmp1], %[tmp0]\n\t"
"vpaddd %[acc], %[tmp0], %[acc]\n\t"
- : [acc]"+v"(acc), [tmp0]"+&v"(tmp0)
- : [tmp1]"v"(tmp1), [ones]"v"(_mm256_set1_epi16(1))
+ : [acc]"+v"(acc), [tmp0]"+&v"(tmp0), [tmp1]"+&v"(tmp1)
+ : [ones]"v"(_mm256_set1_epi16(1))
);
# else
__m256i product0 = _mm256_maddubs_epi16(a0, b0);
__m256i product1 = _mm256_maddubs_epi16(a1, b1);
- product0 = _mm256_adds_epi16(product0, product1);
product0 = _mm256_madd_epi16(product0, _mm256_set1_epi16(1));
- acc = _mm256_add_epi32(acc, product0);
+ product1 = _mm256_madd_epi16(product1, _mm256_set1_epi16(1));
+ acc = _mm256_add_epi32(acc, _mm256_add_epi32(product0, product1));
# endif
# endif
}
@@ -326,23 +328,37 @@ namespace Stockfish::Simd {
__m128i tmp0 = _mm_maddubs_epi16(a0, b0);
__m128i tmp1 = _mm_maddubs_epi16(a1, b1);
asm(
- "paddsw %[tmp1], %[tmp0]\n\t"
"pmaddwd %[ones], %[tmp0]\n\t"
+ "pmaddwd %[ones], %[tmp1]\n\t"
+ "paddd %[tmp1], %[tmp0]\n\t"
"paddd %[tmp0], %[acc]\n\t"
- : [acc]"+v"(acc), [tmp0]"+&v"(tmp0)
- : [tmp1]"v"(tmp1), [ones]"v"(_mm_set1_epi16(1))
+ : [acc]"+v"(acc), [tmp0]"+&v"(tmp0), [tmp1]"+&v"(tmp1)
+ : [ones]"v"(_mm_set1_epi16(1))
);
# else
__m128i product0 = _mm_maddubs_epi16(a0, b0);
__m128i product1 = _mm_maddubs_epi16(a1, b1);
- product0 = _mm_adds_epi16(product0, product1);
product0 = _mm_madd_epi16(product0, _mm_set1_epi16(1));
- acc = _mm_add_epi32(acc, product0);
+ product1 = _mm_madd_epi16(product1, _mm_set1_epi16(1));
+ acc = _mm_add_epi32(acc, _mm_add_epi32(product0, product1));
# endif
}
#endif
+#if defined (USE_NEON_DOTPROD)
+
+ [[maybe_unused]] static void dotprod_m128_add_dpbusd_epi32x2(
+ int32x4_t& acc,
+ int8x16_t a0, int8x16_t b0,
+ int8x16_t a1, int8x16_t b1) {
+
+ acc = vdotq_s32(acc, a0, b0);
+ acc = vdotq_s32(acc, a1, b1);
+ }
+
+#endif
+
#if defined (USE_NEON)
[[maybe_unused]] static int neon_m128_reduce_add_epi32(int32x4_t s) {
diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/sqr_clipped_relu.h b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/sqr_clipped_relu.h
index b603a27..3fbb243 100644
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/sqr_clipped_relu.h
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/sqr_clipped_relu.h
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -106,7 +106,7 @@ namespace Stockfish::Eval::NNUE::Layers {
for (IndexType i = Start; i < InputDimensions; ++i) {
output[i] = static_cast(
- // realy should be /127 but we need to make it fast
+ // really should be /127 but we need to make it fast
// needs to be accounted for in the trainer
std::max(0ll, std::min(127ll, (((long long)input[i] * input[i]) >> (2 * WeightScaleBits)) / 128)));
}
diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_accumulator.h b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_accumulator.h
index 600483b..8eba449 100644
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_accumulator.h
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_accumulator.h
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_architecture.h b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_architecture.h
index cac8373..413dbb3 100644
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_architecture.h
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_architecture.h
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -27,6 +27,7 @@
#include "features/half_ka_v2_hm.h"
+#include "layers/affine_transform_sparse_input.h"
#include "layers/affine_transform.h"
#include "layers/clipped_relu.h"
#include "layers/sqr_clipped_relu.h"
@@ -39,7 +40,7 @@ namespace Stockfish::Eval::NNUE {
using FeatureSet = Features::HalfKAv2_hm;
// Number of input feature dimensions after conversion
-constexpr IndexType TransformedFeatureDimensions = 1024;
+constexpr IndexType TransformedFeatureDimensions = 1536;
constexpr IndexType PSQTBuckets = 8;
constexpr IndexType LayerStacks = 8;
@@ -48,7 +49,7 @@ struct Network
static constexpr int FC_0_OUTPUTS = 15;
static constexpr int FC_1_OUTPUTS = 32;
- Layers::AffineTransform fc_0;
+ Layers::AffineTransformSparseInput fc_0;
Layers::SqrClippedReLU ac_sqr_0;
Layers::ClippedReLU ac_0;
Layers::AffineTransform fc_1;
@@ -72,22 +73,20 @@ struct Network
// Read network parameters
bool read_parameters(std::istream& stream) {
- if (!fc_0.read_parameters(stream)) return false;
- if (!ac_0.read_parameters(stream)) return false;
- if (!fc_1.read_parameters(stream)) return false;
- if (!ac_1.read_parameters(stream)) return false;
- if (!fc_2.read_parameters(stream)) return false;
- return true;
+ return fc_0.read_parameters(stream)
+ && ac_0.read_parameters(stream)
+ && fc_1.read_parameters(stream)
+ && ac_1.read_parameters(stream)
+ && fc_2.read_parameters(stream);
}
- // Read network parameters
+ // Write network parameters
bool write_parameters(std::ostream& stream) const {
- if (!fc_0.write_parameters(stream)) return false;
- if (!ac_0.write_parameters(stream)) return false;
- if (!fc_1.write_parameters(stream)) return false;
- if (!ac_1.write_parameters(stream)) return false;
- if (!fc_2.write_parameters(stream)) return false;
- return true;
+ return fc_0.write_parameters(stream)
+ && ac_0.write_parameters(stream)
+ && fc_1.write_parameters(stream)
+ && ac_1.write_parameters(stream)
+ && fc_2.write_parameters(stream);
}
std::int32_t propagate(const TransformedFeatureType* transformedFeatures)
diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_common.h b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_common.h
index 1795618..d338527 100644
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_common.h
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_common.h
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -57,6 +57,9 @@ namespace Stockfish::Eval::NNUE {
// Size of cache line (in bytes)
constexpr std::size_t CacheLineSize = 64;
+ constexpr const char Leb128MagicString[] = "COMPRESSED_LEB128";
+ constexpr const std::size_t Leb128MagicStringSize = sizeof(Leb128MagicString) - 1;
+
// SIMD width (in bytes)
#if defined(USE_AVX2)
constexpr std::size_t SimdWidth = 32;
@@ -159,6 +162,80 @@ namespace Stockfish::Eval::NNUE {
write_little_endian(stream, values[i]);
}
+ template
+ inline void read_leb_128(std::istream& stream, IntType* out, std::size_t count) {
+ static_assert(std::is_signed_v, "Not implemented for unsigned types");
+ char leb128MagicString[Leb128MagicStringSize];
+ stream.read(leb128MagicString, Leb128MagicStringSize);
+ assert(strncmp(Leb128MagicString, leb128MagicString, Leb128MagicStringSize) == 0);
+ const std::uint32_t BUF_SIZE = 4096;
+ std::uint8_t buf[BUF_SIZE];
+ auto bytes_left = read_little_endian(stream);
+ std::uint32_t buf_pos = BUF_SIZE;
+ for (std::size_t i = 0; i < count; ++i) {
+ IntType result = 0;
+ size_t shift = 0;
+ do {
+ if (buf_pos == BUF_SIZE) {
+ stream.read(reinterpret_cast(buf), std::min(bytes_left, BUF_SIZE));
+ buf_pos = 0;
+ }
+ std::uint8_t byte = buf[buf_pos++];
+ --bytes_left;
+ result |= (byte & 0x7f) << shift;
+ shift += 7;
+ if ((byte & 0x80) == 0) {
+ out[i] = sizeof(IntType) * 8 <= shift || (byte & 0x40) == 0 ? result : result | ~((1 << shift) - 1);
+ break;
+ }
+ } while (shift < sizeof(IntType) * 8);
+ }
+ assert(bytes_left == 0);
+ }
+
+ template
+ inline void write_leb_128(std::ostream& stream, const IntType* values, std::size_t count) {
+ static_assert(std::is_signed_v, "Not implemented for unsigned types");
+ stream.write(Leb128MagicString, Leb128MagicStringSize);
+ std::uint32_t byte_count = 0;
+ for (std::size_t i = 0; i < count; ++i) {
+ IntType value = values[i];
+ std::uint8_t byte;
+ do {
+ byte = value & 0x7f;
+ value >>= 7;
+ ++byte_count;
+ } while ((byte & 0x40) == 0 ? value != 0 : value != -1);
+ }
+ write_little_endian(stream, byte_count);
+ const std::uint32_t BUF_SIZE = 4096;
+ std::uint8_t buf[BUF_SIZE];
+ std::uint32_t buf_pos = 0;
+ auto flush = [&]() {
+ if (buf_pos > 0) {
+ stream.write(reinterpret_cast(buf), buf_pos);
+ buf_pos = 0;
+ }
+ };
+ auto write = [&](std::uint8_t byte) {
+ buf[buf_pos++] = byte;
+ if (buf_pos == BUF_SIZE) flush();
+ };
+ for (std::size_t i = 0; i < count; ++i) {
+ IntType value = values[i];
+ while (true) {
+ std::uint8_t byte = value & 0x7f;
+ value >>= 7;
+ if ((byte & 0x40) == 0 ? value == 0 : value == -1) {
+ write(byte);
+ break;
+ }
+ write(byte | 0x80);
+ }
+ }
+ flush();
+ }
+
} // namespace Stockfish::Eval::NNUE
#endif // #ifndef NNUE_COMMON_H_INCLUDED
diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_feature_transformer.h b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_feature_transformer.h
index b6dd54d..7571f39 100644
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_feature_transformer.h
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_feature_transformer.h
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -25,6 +25,7 @@
#include "nnue_architecture.h"
#include // std::memset()
+#include // std::pair
namespace Stockfish::Eval::NNUE {
@@ -41,8 +42,8 @@ namespace Stockfish::Eval::NNUE {
"Per feature PSQT values cannot be processed at granularity lower than 8 at a time.");
#ifdef USE_AVX512
- typedef __m512i vec_t;
- typedef __m256i psqt_vec_t;
+ using vec_t = __m512i;
+ using psqt_vec_t = __m256i;
#define vec_load(a) _mm512_load_si512(a)
#define vec_store(a,b) _mm512_store_si512(a,b)
#define vec_add_16(a,b) _mm512_add_epi16(a,b)
@@ -65,8 +66,8 @@ namespace Stockfish::Eval::NNUE {
#define MaxChunkSize 64
#elif USE_AVX2
- typedef __m256i vec_t;
- typedef __m256i psqt_vec_t;
+ using vec_t = __m256i;
+ using psqt_vec_t = __m256i;
#define vec_load(a) _mm256_load_si256(a)
#define vec_store(a,b) _mm256_store_si256(a,b)
#define vec_add_16(a,b) _mm256_add_epi16(a,b)
@@ -89,8 +90,8 @@ namespace Stockfish::Eval::NNUE {
#define MaxChunkSize 32
#elif USE_SSE2
- typedef __m128i vec_t;
- typedef __m128i psqt_vec_t;
+ using vec_t = __m128i;
+ using psqt_vec_t = __m128i;
#define vec_load(a) (*(a))
#define vec_store(a,b) *(a)=(b)
#define vec_add_16(a,b) _mm_add_epi16(a,b)
@@ -110,8 +111,8 @@ namespace Stockfish::Eval::NNUE {
#define MaxChunkSize 16
#elif USE_MMX
- typedef __m64 vec_t;
- typedef __m64 psqt_vec_t;
+ using vec_t = __m64;
+ using psqt_vec_t = __m64;
#define vec_load(a) (*(a))
#define vec_store(a,b) *(a)=(b)
#define vec_add_16(a,b) _mm_add_pi16(a,b)
@@ -138,8 +139,8 @@ namespace Stockfish::Eval::NNUE {
#define MaxChunkSize 8
#elif USE_NEON
- typedef int16x8_t vec_t;
- typedef int32x4_t psqt_vec_t;
+ using vec_t = int16x8_t;
+ using psqt_vec_t = int32x4_t;
#define vec_load(a) (*(a))
#define vec_store(a,b) *(a)=(b)
#define vec_add_16(a,b) vaddq_s16(a,b)
@@ -252,9 +253,9 @@ namespace Stockfish::Eval::NNUE {
// Read network parameters
bool read_parameters(std::istream& stream) {
- read_little_endian(stream, biases , HalfDimensions );
- read_little_endian(stream, weights , HalfDimensions * InputDimensions);
- read_little_endian(stream, psqtWeights, PSQTBuckets * InputDimensions);
+ read_leb_128(stream, biases , HalfDimensions );
+ read_leb_128(stream, weights , HalfDimensions * InputDimensions);
+ read_leb_128(stream, psqtWeights, PSQTBuckets * InputDimensions);
return !stream.fail();
}
@@ -262,9 +263,9 @@ namespace Stockfish::Eval::NNUE {
// Write network parameters
bool write_parameters(std::ostream& stream) const {
- write_little_endian(stream, biases , HalfDimensions );
- write_little_endian(stream, weights , HalfDimensions * InputDimensions);
- write_little_endian(stream, psqtWeights, PSQTBuckets * InputDimensions);
+ write_leb_128(stream, biases , HalfDimensions );
+ write_leb_128(stream, weights , HalfDimensions * InputDimensions);
+ write_leb_128(stream, psqtWeights, PSQTBuckets * InputDimensions);
return !stream.fail();
}
@@ -332,27 +333,16 @@ namespace Stockfish::Eval::NNUE {
#endif
return psqt;
+ } // end of function transform()
- } // end of function transform()
-
-
+ void hint_common_access(const Position& pos) const {
+ hint_common_access_for_perspective(pos);
+ hint_common_access_for_perspective(pos);
+ }
private:
template
- void update_accumulator(const Position& pos) const {
-
- // The size must be enough to contain the largest possible update.
- // That might depend on the feature set and generally relies on the
- // feature set's update cost calculation to be correct and never
- // allow updates with more added/removed features than MaxActiveDimensions.
-
- #ifdef VECTOR
- // Gcc-10.2 unnecessarily spills AVX2 registers if this array
- // is defined in the VECTOR code below, once in each branch
- vec_t acc[NumRegs];
- psqt_vec_t psqt[NumPsqtRegs];
- #endif
-
+ [[nodiscard]] std::pair try_find_computed_accumulator(const Position& pos) const {
// Look for a usable accumulator of an earlier position. We keep track
// of the estimated gain in terms of features to be added/subtracted.
StateInfo *st = pos.state(), *next = nullptr;
@@ -367,218 +357,313 @@ namespace Stockfish::Eval::NNUE {
next = st;
st = st->previous;
}
+ return { st, next };
+ }
- if (st->accumulator.computed[Perspective])
- {
- if (next == nullptr)
- return;
+ // NOTE: The parameter states_to_update is an array of position states, ending with nullptr.
+ // All states must be sequential, that is states_to_update[i] must either be reachable
+ // by repeatedly applying ->previous from states_to_update[i+1] or states_to_update[i] == nullptr.
+ // computed_st must be reachable by repeatedly applying ->previous on states_to_update[0], if not nullptr.
+ template
+ void update_accumulator_incremental(const Position& pos, StateInfo* computed_st, StateInfo* states_to_update[N]) const {
+ static_assert(N > 0);
+ assert(states_to_update[N-1] == nullptr);
- // Update incrementally in two steps. First, we update the "next"
- // accumulator. Then, we update the current accumulator (pos.state()).
-
- // Gather all features to be updated.
- const Square ksq = pos.square(Perspective);
- FeatureSet::IndexList removed[2], added[2];
- FeatureSet::append_changed_indices(
- ksq, next->dirtyPiece, removed[0], added[0]);
- for (StateInfo *st2 = pos.state(); st2 != next; st2 = st2->previous)
- FeatureSet::append_changed_indices(
- ksq, st2->dirtyPiece, removed[1], added[1]);
-
- // Mark the accumulators as computed.
- next->accumulator.computed[Perspective] = true;
- pos.state()->accumulator.computed[Perspective] = true;
-
- // Now update the accumulators listed in states_to_update[], where the last element is a sentinel.
- StateInfo *states_to_update[3] =
- { next, next == pos.state() ? nullptr : pos.state(), nullptr };
#ifdef VECTOR
- for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
+ // Gcc-10.2 unnecessarily spills AVX2 registers if this array
+ // is defined in the VECTOR code below, once in each branch
+ vec_t acc[NumRegs];
+ psqt_vec_t psqt[NumPsqtRegs];
+ #endif
+
+ if (states_to_update[0] == nullptr)
+ return;
+
+ // Update incrementally going back through states_to_update.
+
+ // Gather all features to be updated.
+ const Square ksq = pos.square(Perspective);
+
+ // The size must be enough to contain the largest possible update.
+ // That might depend on the feature set and generally relies on the
+ // feature set's update cost calculation to be correct and never
+ // allow updates with more added/removed features than MaxActiveDimensions.
+ FeatureSet::IndexList removed[N-1], added[N-1];
+
+ {
+ int i = N-2; // last potential state to update. Skip last element because it must be nullptr.
+ while (states_to_update[i] == nullptr)
+ --i;
+
+ StateInfo *st2 = states_to_update[i];
+
+ for (; i >= 0; --i)
{
- // Load accumulator
- auto accTile = reinterpret_cast(
- &st->accumulator.accumulation[Perspective][j * TileHeight]);
- for (IndexType k = 0; k < NumRegs; ++k)
- acc[k] = vec_load(&accTile[k]);
+ states_to_update[i]->accumulator.computed[Perspective] = true;
- for (IndexType i = 0; states_to_update[i]; ++i)
- {
- // Difference calculation for the deactivated features
- for (const auto index : removed[i])
- {
- const IndexType offset = HalfDimensions * index + j * TileHeight;
- auto column = reinterpret_cast(&weights[offset]);
- for (IndexType k = 0; k < NumRegs; ++k)
- acc[k] = vec_sub_16(acc[k], column[k]);
- }
+ StateInfo* end_state = i == 0 ? computed_st : states_to_update[i - 1];
- // Difference calculation for the activated features
- for (const auto index : added[i])
- {
- const IndexType offset = HalfDimensions * index + j * TileHeight;
- auto column = reinterpret_cast(&weights[offset]);
- for (IndexType k = 0; k < NumRegs; ++k)
- acc[k] = vec_add_16(acc[k], column[k]);
- }
-
- // Store accumulator
- accTile = reinterpret_cast(
- &states_to_update[i]->accumulator.accumulation[Perspective][j * TileHeight]);
- for (IndexType k = 0; k < NumRegs; ++k)
- vec_store(&accTile[k], acc[k]);
- }
+ for (; st2 != end_state; st2 = st2->previous)
+ FeatureSet::append_changed_indices(
+ ksq, st2->dirtyPiece, removed[i], added[i]);
}
+ }
- for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j)
- {
- // Load accumulator
- auto accTilePsqt = reinterpret_cast(
- &st->accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]);
- for (std::size_t k = 0; k < NumPsqtRegs; ++k)
- psqt[k] = vec_load_psqt(&accTilePsqt[k]);
+ StateInfo* st = computed_st;
- for (IndexType i = 0; states_to_update[i]; ++i)
- {
- // Difference calculation for the deactivated features
- for (const auto index : removed[i])
- {
- const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight;
- auto columnPsqt = reinterpret_cast(&psqtWeights[offset]);
- for (std::size_t k = 0; k < NumPsqtRegs; ++k)
- psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]);
- }
+ // Now update the accumulators listed in states_to_update[], where the last element is a sentinel.
+#ifdef VECTOR
+ for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
+ {
+ // Load accumulator
+ auto accTile = reinterpret_cast(
+ &st->accumulator.accumulation[Perspective][j * TileHeight]);
+ for (IndexType k = 0; k < NumRegs; ++k)
+ acc[k] = vec_load(&accTile[k]);
- // Difference calculation for the activated features
- for (const auto index : added[i])
- {
- const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight;
- auto columnPsqt = reinterpret_cast(&psqtWeights[offset]);
- for (std::size_t k = 0; k < NumPsqtRegs; ++k)
- psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);
- }
-
- // Store accumulator
- accTilePsqt = reinterpret_cast(
- &states_to_update[i]->accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]);
- for (std::size_t k = 0; k < NumPsqtRegs; ++k)
- vec_store_psqt(&accTilePsqt[k], psqt[k]);
- }
- }
-
- #else
for (IndexType i = 0; states_to_update[i]; ++i)
{
- std::memcpy(states_to_update[i]->accumulator.accumulation[Perspective],
- st->accumulator.accumulation[Perspective],
- HalfDimensions * sizeof(BiasType));
-
- for (std::size_t k = 0; k < PSQTBuckets; ++k)
- states_to_update[i]->accumulator.psqtAccumulation[Perspective][k] = st->accumulator.psqtAccumulation[Perspective][k];
-
- st = states_to_update[i];
-
// Difference calculation for the deactivated features
for (const auto index : removed[i])
{
- const IndexType offset = HalfDimensions * index;
-
- for (IndexType j = 0; j < HalfDimensions; ++j)
- st->accumulator.accumulation[Perspective][j] -= weights[offset + j];
-
- for (std::size_t k = 0; k < PSQTBuckets; ++k)
- st->accumulator.psqtAccumulation[Perspective][k] -= psqtWeights[index * PSQTBuckets + k];
+ const IndexType offset = HalfDimensions * index + j * TileHeight;
+ auto column = reinterpret_cast(&weights[offset]);
+ for (IndexType k = 0; k < NumRegs; ++k)
+ acc[k] = vec_sub_16(acc[k], column[k]);
}
// Difference calculation for the activated features
for (const auto index : added[i])
- {
- const IndexType offset = HalfDimensions * index;
-
- for (IndexType j = 0; j < HalfDimensions; ++j)
- st->accumulator.accumulation[Perspective][j] += weights[offset + j];
-
- for (std::size_t k = 0; k < PSQTBuckets; ++k)
- st->accumulator.psqtAccumulation[Perspective][k] += psqtWeights[index * PSQTBuckets + k];
- }
- }
- #endif
- }
- else
- {
- // Refresh the accumulator
- auto& accumulator = pos.state()->accumulator;
- accumulator.computed[Perspective] = true;
- FeatureSet::IndexList active;
- FeatureSet::append_active_indices(pos, active);
-
- #ifdef VECTOR
- for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
- {
- auto biasesTile = reinterpret_cast(
- &biases[j * TileHeight]);
- for (IndexType k = 0; k < NumRegs; ++k)
- acc[k] = biasesTile[k];
-
- for (const auto index : active)
{
const IndexType offset = HalfDimensions * index + j * TileHeight;
auto column = reinterpret_cast(&weights[offset]);
-
- for (unsigned k = 0; k < NumRegs; ++k)
+ for (IndexType k = 0; k < NumRegs; ++k)
acc[k] = vec_add_16(acc[k], column[k]);
}
- auto accTile = reinterpret_cast(
- &accumulator.accumulation[Perspective][j * TileHeight]);
- for (unsigned k = 0; k < NumRegs; k++)
+ // Store accumulator
+ accTile = reinterpret_cast(
+ &states_to_update[i]->accumulator.accumulation[Perspective][j * TileHeight]);
+ for (IndexType k = 0; k < NumRegs; ++k)
vec_store(&accTile[k], acc[k]);
}
+ }
- for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j)
+ for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j)
+ {
+ // Load accumulator
+ auto accTilePsqt = reinterpret_cast(
+ &st->accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]);
+ for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+ psqt[k] = vec_load_psqt(&accTilePsqt[k]);
+
+ for (IndexType i = 0; states_to_update[i]; ++i)
{
- for (std::size_t k = 0; k < NumPsqtRegs; ++k)
- psqt[k] = vec_zero_psqt();
-
- for (const auto index : active)
+ // Difference calculation for the deactivated features
+ for (const auto index : removed[i])
{
const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight;
auto columnPsqt = reinterpret_cast(&psqtWeights[offset]);
+ for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+ psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]);
+ }
+ // Difference calculation for the activated features
+ for (const auto index : added[i])
+ {
+ const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight;
+ auto columnPsqt = reinterpret_cast(&psqtWeights[offset]);
for (std::size_t k = 0; k < NumPsqtRegs; ++k)
psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);
}
- auto accTilePsqt = reinterpret_cast(
- &accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]);
+ // Store accumulator
+ accTilePsqt = reinterpret_cast(
+ &states_to_update[i]->accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]);
for (std::size_t k = 0; k < NumPsqtRegs; ++k)
vec_store_psqt(&accTilePsqt[k], psqt[k]);
}
+ }
- #else
- std::memcpy(accumulator.accumulation[Perspective], biases,
+#else
+ for (IndexType i = 0; states_to_update[i]; ++i)
+ {
+ std::memcpy(states_to_update[i]->accumulator.accumulation[Perspective],
+ st->accumulator.accumulation[Perspective],
HalfDimensions * sizeof(BiasType));
for (std::size_t k = 0; k < PSQTBuckets; ++k)
- accumulator.psqtAccumulation[Perspective][k] = 0;
+ states_to_update[i]->accumulator.psqtAccumulation[Perspective][k] = st->accumulator.psqtAccumulation[Perspective][k];
- for (const auto index : active)
+ st = states_to_update[i];
+
+ // Difference calculation for the deactivated features
+ for (const auto index : removed[i])
{
const IndexType offset = HalfDimensions * index;
for (IndexType j = 0; j < HalfDimensions; ++j)
- accumulator.accumulation[Perspective][j] += weights[offset + j];
+ st->accumulator.accumulation[Perspective][j] -= weights[offset + j];
for (std::size_t k = 0; k < PSQTBuckets; ++k)
- accumulator.psqtAccumulation[Perspective][k] += psqtWeights[index * PSQTBuckets + k];
+ st->accumulator.psqtAccumulation[Perspective][k] -= psqtWeights[index * PSQTBuckets + k];
+ }
+
+ // Difference calculation for the activated features
+ for (const auto index : added[i])
+ {
+ const IndexType offset = HalfDimensions * index;
+
+ for (IndexType j = 0; j < HalfDimensions; ++j)
+ st->accumulator.accumulation[Perspective][j] += weights[offset + j];
+
+ for (std::size_t k = 0; k < PSQTBuckets; ++k)
+ st->accumulator.psqtAccumulation[Perspective][k] += psqtWeights[index * PSQTBuckets + k];
}
- #endif
}
+#endif
#if defined(USE_MMX)
_mm_empty();
#endif
}
+ template
+ void update_accumulator_refresh(const Position& pos) const {
+ #ifdef VECTOR
+ // Gcc-10.2 unnecessarily spills AVX2 registers if this array
+ // is defined in the VECTOR code below, once in each branch
+ vec_t acc[NumRegs];
+ psqt_vec_t psqt[NumPsqtRegs];
+ #endif
+
+ // Refresh the accumulator
+ // Could be extracted to a separate function because it's done in 2 places,
+ // but it's unclear if compilers would correctly handle register allocation.
+ auto& accumulator = pos.state()->accumulator;
+ accumulator.computed[Perspective] = true;
+ FeatureSet::IndexList active;
+ FeatureSet::append_active_indices(pos, active);
+
+#ifdef VECTOR
+ for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
+ {
+ auto biasesTile = reinterpret_cast(
+ &biases[j * TileHeight]);
+ for (IndexType k = 0; k < NumRegs; ++k)
+ acc[k] = biasesTile[k];
+
+ for (const auto index : active)
+ {
+ const IndexType offset = HalfDimensions * index + j * TileHeight;
+ auto column = reinterpret_cast(&weights[offset]);
+
+ for (unsigned k = 0; k < NumRegs; ++k)
+ acc[k] = vec_add_16(acc[k], column[k]);
+ }
+
+ auto accTile = reinterpret_cast(
+ &accumulator.accumulation[Perspective][j * TileHeight]);
+ for (unsigned k = 0; k < NumRegs; k++)
+ vec_store(&accTile[k], acc[k]);
+ }
+
+ for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j)
+ {
+ for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+ psqt[k] = vec_zero_psqt();
+
+ for (const auto index : active)
+ {
+ const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight;
+ auto columnPsqt = reinterpret_cast(&psqtWeights[offset]);
+
+ for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+ psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);
+ }
+
+ auto accTilePsqt = reinterpret_cast(
+ &accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]);
+ for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+ vec_store_psqt(&accTilePsqt[k], psqt[k]);
+ }
+
+#else
+ std::memcpy(accumulator.accumulation[Perspective], biases,
+ HalfDimensions * sizeof(BiasType));
+
+ for (std::size_t k = 0; k < PSQTBuckets; ++k)
+ accumulator.psqtAccumulation[Perspective][k] = 0;
+
+ for (const auto index : active)
+ {
+ const IndexType offset = HalfDimensions * index;
+
+ for (IndexType j = 0; j < HalfDimensions; ++j)
+ accumulator.accumulation[Perspective][j] += weights[offset + j];
+
+ for (std::size_t k = 0; k < PSQTBuckets; ++k)
+ accumulator.psqtAccumulation[Perspective][k] += psqtWeights[index * PSQTBuckets + k];
+ }
+#endif
+
+ #if defined(USE_MMX)
+ _mm_empty();
+ #endif
+ }
+
+ template
+ void hint_common_access_for_perspective(const Position& pos) const {
+
+ // Works like update_accumulator, but performs less work.
+ // Updates ONLY the accumulator for pos.
+
+ // Look for a usable accumulator of an earlier position. We keep track
+ // of the estimated gain in terms of features to be added/subtracted.
+ // Fast early exit.
+ if (pos.state()->accumulator.computed[Perspective])
+ return;
+
+ auto [oldest_st, _] = try_find_computed_accumulator(pos);
+
+ if (oldest_st->accumulator.computed[Perspective])
+ {
+ // Only update current position accumulator to minimize work.
+ StateInfo* states_to_update[2] = { pos.state(), nullptr };
+ update_accumulator_incremental(pos, oldest_st, states_to_update);
+ }
+ else
+ {
+ update_accumulator_refresh(pos);
+ }
+ }
+
+ template
+ void update_accumulator(const Position& pos) const {
+
+ auto [oldest_st, next] = try_find_computed_accumulator(pos);
+
+ if (oldest_st->accumulator.computed[Perspective])
+ {
+ if (next == nullptr)
+ return;
+
+ // Now update the accumulators listed in states_to_update[], where the last element is a sentinel.
+ // Currently we update 2 accumulators.
+ // 1. for the current position
+ // 2. the next accumulator after the computed one
+ // The heuristic may change in the future.
+ StateInfo *states_to_update[3] =
+ { next, next == pos.state() ? nullptr : pos.state(), nullptr };
+
+ update_accumulator_incremental(pos, oldest_st, states_to_update);
+ }
+ else
+ {
+ update_accumulator_refresh(pos);
+ }
+ }
+
alignas(CacheLineSize) BiasType biases[HalfDimensions];
alignas(CacheLineSize) WeightType weights[HalfDimensions * InputDimensions];
alignas(CacheLineSize) PSQTWeightType psqtWeights[InputDimensions * PSQTBuckets];
diff --git a/DroidFishApp/src/main/cpp/stockfish/pawns.cpp b/DroidFishApp/src/main/cpp/stockfish/pawns.cpp
index fdcfa02..0ccafd9 100644
--- a/DroidFishApp/src/main/cpp/stockfish/pawns.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/pawns.cpp
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/DroidFishApp/src/main/cpp/stockfish/pawns.h b/DroidFishApp/src/main/cpp/stockfish/pawns.h
index af0370f..d20e7c2 100644
--- a/DroidFishApp/src/main/cpp/stockfish/pawns.h
+++ b/DroidFishApp/src/main/cpp/stockfish/pawns.h
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -61,7 +61,7 @@ struct Entry {
int blockedCount;
};
-typedef HashTable Table;
+using Table = HashTable;
Entry* probe(const Position& pos);
diff --git a/DroidFishApp/src/main/cpp/stockfish/position.cpp b/DroidFishApp/src/main/cpp/stockfish/position.cpp
index 5befcaf..2a9d798 100644
--- a/DroidFishApp/src/main/cpp/stockfish/position.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/position.cpp
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -22,6 +22,7 @@
#include // For std::memset, std::memcmp
#include
#include
+#include
#include "bitboard.h"
#include "misc.h"
@@ -46,7 +47,7 @@ namespace Zobrist {
namespace {
-const string PieceToChar(" PNBRQK pnbrqk");
+constexpr std::string_view PieceToChar(" PNBRQK pnbrqk");
constexpr Piece Pieces[] = { W_PAWN, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING,
B_PAWN, B_KNIGHT, B_BISHOP, B_ROOK, B_QUEEN, B_KING };
@@ -96,7 +97,7 @@ std::ostream& operator<<(std::ostream& os, const Position& pos) {
// Marcel van Kervinck's cuckoo algorithm for fast detection of "upcoming repetition"
// situations. Description of the algorithm in the following paper:
-// https://marcelk.net/2013-04-06/paper/upcoming-rep-v2.pdf
+// http://web.archive.org/web/20201107002606/https://marcelk.net/2013-04-06/paper/upcoming-rep-v2.pdf
// First and second hash functions for indexing the cuckoo tables
inline int H1(Key h) { return h & 0x1fff; }
@@ -281,7 +282,7 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th
chess960 = isChess960;
thisThread = th;
- set_state(st);
+ set_state();
assert(pos_is_ok());
@@ -312,60 +313,59 @@ void Position::set_castling_right(Color c, Square rfrom) {
/// Position::set_check_info() sets king attacks to detect if a move gives check
-void Position::set_check_info(StateInfo* si) const {
+void Position::set_check_info() const {
- si->blockersForKing[WHITE] = slider_blockers(pieces(BLACK), square(WHITE), si->pinners[BLACK]);
- si->blockersForKing[BLACK] = slider_blockers(pieces(WHITE), square(BLACK), si->pinners[WHITE]);
+ st->blockersForKing[WHITE] = slider_blockers(pieces(BLACK), square(WHITE), st->pinners[BLACK]);
+ st->blockersForKing[BLACK] = slider_blockers(pieces(WHITE), square(BLACK), st->pinners[WHITE]);
Square ksq = square(~sideToMove);
- si->checkSquares[PAWN] = pawn_attacks_bb(~sideToMove, ksq);
- si->checkSquares[KNIGHT] = attacks_bb(ksq);
- si->checkSquares[BISHOP] = attacks_bb(ksq, pieces());
- si->checkSquares[ROOK] = attacks_bb(ksq, pieces());
- si->checkSquares[QUEEN] = si->checkSquares[BISHOP] | si->checkSquares[ROOK];
- si->checkSquares[KING] = 0;
+ st->checkSquares[PAWN] = pawn_attacks_bb(~sideToMove, ksq);
+ st->checkSquares[KNIGHT] = attacks_bb(ksq);
+ st->checkSquares[BISHOP] = attacks_bb(ksq, pieces());
+ st->checkSquares[ROOK] = attacks_bb(ksq, pieces());
+ st->checkSquares[QUEEN] = st->checkSquares[BISHOP] | st->checkSquares[ROOK];
+ st->checkSquares[KING] = 0;
}
/// Position::set_state() computes the hash keys of the position, and other
/// data that once computed is updated incrementally as moves are made.
-/// The function is only used when a new position is set up, and to verify
-/// the correctness of the StateInfo data when running in debug mode.
+/// The function is only used when a new position is set up
-void Position::set_state(StateInfo* si) const {
+void Position::set_state() const {
- si->key = si->materialKey = 0;
- si->pawnKey = Zobrist::noPawns;
- si->nonPawnMaterial[WHITE] = si->nonPawnMaterial[BLACK] = VALUE_ZERO;
- si->checkersBB = attackers_to(square(sideToMove)) & pieces(~sideToMove);
+ st->key = st->materialKey = 0;
+ st->pawnKey = Zobrist::noPawns;
+ st->nonPawnMaterial[WHITE] = st->nonPawnMaterial[BLACK] = VALUE_ZERO;
+ st->checkersBB = attackers_to(square(sideToMove)) & pieces(~sideToMove);
- set_check_info(si);
+ set_check_info();
for (Bitboard b = pieces(); b; )
{
Square s = pop_lsb(b);
Piece pc = piece_on(s);
- si->key ^= Zobrist::psq[pc][s];
+ st->key ^= Zobrist::psq[pc][s];
if (type_of(pc) == PAWN)
- si->pawnKey ^= Zobrist::psq[pc][s];
+ st->pawnKey ^= Zobrist::psq[pc][s];
else if (type_of(pc) != KING)
- si->nonPawnMaterial[color_of(pc)] += PieceValue[MG][pc];
+ st->nonPawnMaterial[color_of(pc)] += PieceValue[MG][pc];
}
- if (si->epSquare != SQ_NONE)
- si->key ^= Zobrist::enpassant[file_of(si->epSquare)];
+ if (st->epSquare != SQ_NONE)
+ st->key ^= Zobrist::enpassant[file_of(st->epSquare)];
if (sideToMove == BLACK)
- si->key ^= Zobrist::side;
+ st->key ^= Zobrist::side;
- si->key ^= Zobrist::castling[si->castlingRights];
+ st->key ^= Zobrist::castling[st->castlingRights];
for (Piece pc : Pieces)
for (int cnt = 0; cnt < pieceCount[pc]; ++cnt)
- si->materialKey ^= Zobrist::psq[pc][cnt];
+ st->materialKey ^= Zobrist::psq[pc][cnt];
}
@@ -568,8 +568,7 @@ bool Position::pseudo_legal(const Move m) const {
: MoveList(*this).contains(m);
// Is not a promotion, so promotion piece must be empty
- if (promotion_type(m) - KNIGHT != NO_PIECE_TYPE)
- return false;
+ assert(promotion_type(m) - KNIGHT == NO_PIECE_TYPE);
// If the 'from' square is not occupied by a piece belonging to the side to
// move, the move is obviously not legal.
@@ -765,9 +764,6 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
// Update board and piece lists
remove_piece(capsq);
- if (type_of(m) == EN_PASSANT)
- board[capsq] = NO_PIECE;
-
// Update material hash key and prefetch access to materialTable
k ^= Zobrist::psq[captured][capsq];
st->materialKey ^= Zobrist::psq[captured][pieceCount[captured]];
@@ -868,7 +864,7 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
sideToMove = ~sideToMove;
// Update king attacks used for fast check detection
- set_check_info(st);
+ set_check_info();
// Calculate the repetition info. It is the ply distance from the previous
// occurrence of the same position, negative in the 3-fold case, or zero
@@ -1020,7 +1016,7 @@ void Position::do_null_move(StateInfo& newSt) {
sideToMove = ~sideToMove;
- set_check_info(st);
+ set_check_info();
st->repetition = 0;
@@ -1065,7 +1061,7 @@ Key Position::key_after(Move m) const {
/// SEE value of move is greater or equal to the given threshold. We'll use an
/// algorithm similar to alpha-beta pruning with a null window.
-bool Position::see_ge(Move m, Value threshold) const {
+bool Position::see_ge(Move m, Bitboard& occupied, Value threshold) const {
assert(is_ok(m));
@@ -1084,7 +1080,7 @@ bool Position::see_ge(Move m, Value threshold) const {
return true;
assert(color_of(piece_on(from)) == sideToMove);
- Bitboard occupied = pieces() ^ from ^ to;
+ occupied = pieces() ^ from ^ to; // xoring to is important for pinned piece logic
Color stm = sideToMove;
Bitboard attackers = attackers_to(to, occupied);
Bitboard stmAttackers, bb;
@@ -1115,45 +1111,44 @@ bool Position::see_ge(Move m, Value threshold) const {
// the bitboard 'attackers' any X-ray attackers behind it.
if ((bb = stmAttackers & pieces(PAWN)))
{
+ occupied ^= least_significant_square_bb(bb);
if ((swap = PawnValueMg - swap) < res)
break;
- occupied ^= least_significant_square_bb(bb);
attackers |= attacks_bb(to, occupied) & pieces(BISHOP, QUEEN);
}
else if ((bb = stmAttackers & pieces(KNIGHT)))
{
+ occupied ^= least_significant_square_bb(bb);
if ((swap = KnightValueMg - swap) < res)
break;
-
- occupied ^= least_significant_square_bb(bb);
}
else if ((bb = stmAttackers & pieces(BISHOP)))
{
+ occupied ^= least_significant_square_bb(bb);
if ((swap = BishopValueMg - swap) < res)
break;
- occupied ^= least_significant_square_bb(bb);
attackers |= attacks_bb(to, occupied) & pieces(BISHOP, QUEEN);
}
else if ((bb = stmAttackers & pieces(ROOK)))
{
+ occupied ^= least_significant_square_bb(bb);
if ((swap = RookValueMg - swap) < res)
break;
- occupied ^= least_significant_square_bb(bb);
attackers |= attacks_bb(to, occupied) & pieces(ROOK, QUEEN);
}
else if ((bb = stmAttackers & pieces(QUEEN)))
{
+ occupied ^= least_significant_square_bb(bb);
if ((swap = QueenValueMg - swap) < res)
break;
- occupied ^= least_significant_square_bb(bb);
attackers |= (attacks_bb(to, occupied) & pieces(BISHOP, QUEEN))
| (attacks_bb(to, occupied) & pieces(ROOK , QUEEN));
}
@@ -1167,6 +1162,11 @@ bool Position::see_ge(Move m, Value threshold) const {
return bool(res);
}
+bool Position::see_ge(Move m, Value threshold) const {
+ Bitboard occupied;
+ return see_ge(m, occupied, threshold);
+}
+
/// Position::is_draw() tests whether the position is drawn by 50-move rule
/// or by repetition. It does not detect stalemates.
@@ -1323,12 +1323,6 @@ bool Position::pos_is_ok() const {
if (p1 != p2 && (pieces(p1) & pieces(p2)))
assert(0 && "pos_is_ok: Bitboards");
- StateInfo si = *st;
- ASSERT_ALIGNED(&si, Eval::NNUE::CacheLineSize);
-
- set_state(&si);
- if (std::memcmp(&si, st, sizeof(StateInfo)))
- assert(0 && "pos_is_ok: State");
for (Piece pc : Pieces)
if ( pieceCount[pc] != popcount(pieces(color_of(pc), type_of(pc)))
diff --git a/DroidFishApp/src/main/cpp/stockfish/position.h b/DroidFishApp/src/main/cpp/stockfish/position.h
index 078ff5b..2e6014d 100644
--- a/DroidFishApp/src/main/cpp/stockfish/position.h
+++ b/DroidFishApp/src/main/cpp/stockfish/position.h
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -68,7 +68,7 @@ struct StateInfo {
/// start position to the position just before the search starts). Needed by
/// 'draw by repetition' detection. Use a std::deque because pointers to
/// elements are not invalidated upon list resizing.
-typedef std::unique_ptr> StateListPtr;
+using StateListPtr = std::unique_ptr>;
/// Position class stores information regarding the board representation as
@@ -92,10 +92,9 @@ public:
// Position representation
Bitboard pieces(PieceType pt) const;
- Bitboard pieces(PieceType pt1, PieceType pt2) const;
+ template Bitboard pieces(PieceType pt, PieceTypes... pts) const;
Bitboard pieces(Color c) const;
- Bitboard pieces(Color c, PieceType pt) const;
- Bitboard pieces(Color c, PieceType pt1, PieceType pt2) const;
+ template Bitboard pieces(Color c, PieceTypes... pts) const;
Piece piece_on(Square s) const;
Square ep_square() const;
bool empty(Square s) const;
@@ -126,6 +125,7 @@ public:
bool legal(Move m) const;
bool pseudo_legal(const Move m) const;
bool capture(Move m) const;
+ bool capture_stage(Move m) const;
bool gives_check(Move m) const;
Piece moved_piece(Move m) const;
Piece captured_piece() const;
@@ -144,6 +144,7 @@ public:
// Static Exchange Evaluation
bool see_ge(Move m, Value threshold = VALUE_ZERO) const;
+ bool see_ge(Move m, Bitboard& occupied, Value threshold = VALUE_ZERO) const;
// Accessing hash keys
Key key() const;
@@ -178,8 +179,8 @@ public:
private:
// Initialization helpers (used while setting up a position)
void set_castling_right(Color c, Square rfrom);
- void set_state(StateInfo* si) const;
- void set_check_info(StateInfo* si) const;
+ void set_state() const;
+ void set_check_info() const;
// Other helpers
void move_piece(Square from, Square to);
@@ -204,7 +205,7 @@ private:
bool chess960;
};
-extern std::ostream& operator<<(std::ostream& os, const Position& pos);
+std::ostream& operator<<(std::ostream& os, const Position& pos);
inline Color Position::side_to_move() const {
return sideToMove;
@@ -227,20 +228,18 @@ inline Bitboard Position::pieces(PieceType pt = ALL_PIECES) const {
return byTypeBB[pt];
}
-inline Bitboard Position::pieces(PieceType pt1, PieceType pt2) const {
- return pieces(pt1) | pieces(pt2);
+template
+inline Bitboard Position::pieces(PieceType pt, PieceTypes... pts) const {
+ return pieces(pt) | pieces(pts...);
}
inline Bitboard Position::pieces(Color c) const {
return byColorBB[c];
}
-inline Bitboard Position::pieces(Color c, PieceType pt) const {
- return pieces(c) & pieces(pt);
-}
-
-inline Bitboard Position::pieces(Color c, PieceType pt1, PieceType pt2) const {
- return pieces(c) & (pieces(pt1) | pieces(pt2));
+template
+inline Bitboard Position::pieces(Color c, PieceTypes... pts) const {
+ return pieces(c) & pieces(pts...);
}
template inline int Position::count(Color c) const {
@@ -383,8 +382,16 @@ inline bool Position::is_chess960() const {
inline bool Position::capture(Move m) const {
assert(is_ok(m));
- // Castling is encoded as "king captures rook"
- return (!empty(to_sq(m)) && type_of(m) != CASTLING) || type_of(m) == EN_PASSANT;
+ return (!empty(to_sq(m)) && type_of(m) != CASTLING)
+ || type_of(m) == EN_PASSANT;
+}
+
+// returns true if a move is generated from the capture stage
+// having also queen promotions covered, i.e. consistency with the capture stage move generation
+// is needed to avoid the generation of duplicate moves.
+inline bool Position::capture_stage(Move m) const {
+ assert(is_ok(m));
+ return capture(m) || promotion_type(m) == QUEEN;
}
inline Piece Position::captured_piece() const {
diff --git a/DroidFishApp/src/main/cpp/stockfish/psqt.cpp b/DroidFishApp/src/main/cpp/stockfish/psqt.cpp
index ca5664c..d3ebb20 100644
--- a/DroidFishApp/src/main/cpp/stockfish/psqt.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/psqt.cpp
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/DroidFishApp/src/main/cpp/stockfish/psqt.h b/DroidFishApp/src/main/cpp/stockfish/psqt.h
index 4ee0e37..9630f44 100644
--- a/DroidFishApp/src/main/cpp/stockfish/psqt.h
+++ b/DroidFishApp/src/main/cpp/stockfish/psqt.h
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -30,7 +30,7 @@ namespace Stockfish::PSQT
extern Score psq[PIECE_NB][SQUARE_NB];
// Fill psqt array from a set of internally linked parameters
-extern void init();
+void init();
} // namespace Stockfish::PSQT
diff --git a/DroidFishApp/src/main/cpp/stockfish/search.cpp b/DroidFishApp/src/main/cpp/stockfish/search.cpp
index c8163d1..740ad71 100644
--- a/DroidFishApp/src/main/cpp/stockfish/search.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/search.cpp
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
- Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -34,6 +34,7 @@
#include "tt.h"
#include "uci.h"
#include "syzygy/tbprobe.h"
+#include "nnue/evaluate_nnue.h"
namespace Stockfish {
@@ -63,7 +64,7 @@ namespace {
// Futility margin
Value futility_margin(Depth d, bool improving) {
- return Value(165 * (d - improving));
+ return Value(140 * (d - improving));
}
// Reductions lookup table, initialized at startup
@@ -71,7 +72,7 @@ namespace {
Depth reduction(bool i, Depth d, int mn, Value delta, Value rootDelta) {
int r = Reductions[d] * Reductions[mn];
- return (r + 1642 - int(delta) * 1024 / int(rootDelta)) / 1024 + (!i && r > 916);
+ return (r + 1372 - int(delta) * 1073 / int(rootDelta)) / 1024 + (!i && r > 936);
}
constexpr int futility_move_count(bool improving, Depth depth) {
@@ -81,7 +82,7 @@ namespace {
// History and stats update bonus, based on depth
int stat_bonus(Depth d) {
- return std::min((12 * d + 282) * d - 349 , 1594);
+ return std::min(336 * d - 547, 1561);
}
// Add a small random component to draw evaluations to avoid 3-fold blindness
@@ -96,7 +97,10 @@ namespace {
struct Skill {
Skill(int skill_level, int uci_elo) {
if (uci_elo)
- level = std::clamp(std::pow((uci_elo - 1346.6) / 143.4, 1 / 0.806), 0.0, 20.0);
+ {
+ double e = double(uci_elo - 1320) / (3190 - 1320);
+ level = std::clamp((((37.2473 * e - 40.8525) * e + 22.2943) * e - 0.311438), 0.0, 19.0);
+ }
else
level = double(skill_level);
}
@@ -158,7 +162,7 @@ namespace {
void Search::init() {
for (int i = 1; i < MAX_MOVES; ++i)
- Reductions[i] = int((20.26 + std::log(Threads.size()) / 2) * std::log(i));
+ Reductions[i] = int((20.57 + std::log(Threads.size()) / 2) * std::log(i));
}
@@ -239,9 +243,6 @@ void MainThread::search() {
bestPreviousScore = bestThread->rootMoves[0].score;
bestPreviousAverageScore = bestThread->rootMoves[0].averageScore;
- for (Thread* th : Threads)
- th->previousDepth = bestThread->completedDepth;
-
// Send again PV info if we have a new best thread
if (bestThread != this)
sync_cout << UCI::pv(bestThread->rootPos, bestThread->completedDepth) << sync_endl;
@@ -276,16 +277,18 @@ void Thread::search() {
int iterIdx = 0;
std::memset(ss-7, 0, 10 * sizeof(Stack));
- for (int i = 7; i > 0; i--)
+ for (int i = 7; i > 0; --i)
+ {
(ss-i)->continuationHistory = &this->continuationHistory[0][0][NO_PIECE][0]; // Use as a sentinel
+ (ss-i)->staticEval = VALUE_NONE;
+ }
for (int i = 0; i <= MAX_PLY + 2; ++i)
(ss+i)->ply = i;
ss->pv = pv;
- bestValue = delta = alpha = -VALUE_INFINITE;
- beta = VALUE_INFINITE;
+ bestValue = -VALUE_INFINITE;
if (mainThread)
{
@@ -307,10 +310,6 @@ void Thread::search() {
multiPV = std::min(multiPV, rootMoves.size());
- complexityAverage.set(155, 1);
-
- optimism[us] = optimism[~us] = VALUE_ZERO;
-
int searchAgainCounter = 0;
// Iterative deepening loop until requested to stop or the target depth is reached
@@ -331,7 +330,7 @@ void Thread::search() {
pvLast = 0;
if (!Threads.increaseDepth)
- searchAgainCounter++;
+ searchAgainCounter++;
// MultiPV loop. We perform a full root search for each PV line
for (pvIdx = 0; pvIdx < multiPV && !Threads.stop; ++pvIdx)
@@ -348,18 +347,15 @@ void Thread::search() {
selDepth = 0;
// Reset aspiration window starting size
- if (rootDepth >= 4)
- {
- Value prev = rootMoves[pvIdx].averageScore;
- delta = Value(10) + int(prev) * prev / 15620;
- alpha = std::max(prev - delta,-VALUE_INFINITE);
- beta = std::min(prev + delta, VALUE_INFINITE);
+ Value prev = rootMoves[pvIdx].averageScore;
+ delta = Value(10) + int(prev) * prev / 15799;
+ alpha = std::max(prev - delta,-VALUE_INFINITE);
+ beta = std::min(prev + delta, VALUE_INFINITE);
- // Adjust optimism based on root move's previousScore
- int opt = 118 * prev / (std::abs(prev) + 169);
- optimism[ us] = Value(opt);
- optimism[~us] = -optimism[us];
- }
+ // Adjust optimism based on root move's previousScore
+ int opt = 109 * prev / (std::abs(prev) + 141);
+ optimism[ us] = Value(opt);
+ optimism[~us] = -optimism[us];
// Start with a small aspiration window and, in the case of a fail
// high/low, re-search with a bigger window until we don't fail
@@ -413,7 +409,7 @@ void Thread::search() {
else
break;
- delta += delta / 4 + 2;
+ delta += delta / 3;
assert(alpha >= -VALUE_INFINITE && beta <= VALUE_INFINITE);
}
@@ -429,9 +425,10 @@ void Thread::search() {
if (!Threads.stop)
completedDepth = rootDepth;
- if (rootMoves[0].pv[0] != lastBestMove) {
- lastBestMove = rootMoves[0].pv[0];
- lastBestMoveDepth = rootDepth;
+ if (rootMoves[0].pv[0] != lastBestMove)
+ {
+ lastBestMove = rootMoves[0].pv[0];
+ lastBestMoveDepth = rootDepth;
}
// Have we found a "mate in x"?
@@ -459,18 +456,16 @@ void Thread::search() {
&& !Threads.stop
&& !mainThread->stopOnPonderhit)
{
- double fallingEval = (71 + 12 * (mainThread->bestPreviousAverageScore - bestValue)
- + 6 * (mainThread->iterValue[iterIdx] - bestValue)) / 656.7;
+ double fallingEval = (69 + 13 * (mainThread->bestPreviousAverageScore - bestValue)
+ + 6 * (mainThread->iterValue[iterIdx] - bestValue)) / 619.6;
fallingEval = std::clamp(fallingEval, 0.5, 1.5);
// If the bestMove is stable over several iterations, reduce time accordingly
- timeReduction = lastBestMoveDepth + 9 < completedDepth ? 1.37 : 0.65;
- double reduction = (1.4 + mainThread->previousTimeReduction) / (2.15 * timeReduction);
- double bestMoveInstability = 1 + 1.7 * totBestMoveChanges / Threads.size();
- int complexity = mainThread->complexityAverage.value();
- double complexPosition = std::min(1.0 + (complexity - 261) / 1738.7, 1.5);
+ timeReduction = lastBestMoveDepth + 8 < completedDepth ? 1.57 : 0.65;
+ double reduction = (1.4 + mainThread->previousTimeReduction) / (2.08 * timeReduction);
+ double bestMoveInstability = 1 + 1.8 * totBestMoveChanges / Threads.size();
- double totalTime = Time.optimum() * fallingEval * reduction * bestMoveInstability * complexPosition;
+ double totalTime = Time.optimum() * fallingEval * reduction * bestMoveInstability;
// Cap used time in case of a single legal move for a better viewer experience in tournaments
// yielding correct scores and sufficiently fast moves.
@@ -487,12 +482,11 @@ void Thread::search() {
else
Threads.stop = true;
}
- else if ( Threads.increaseDepth
- && !mainThread->ponder
- && Time.elapsed() > totalTime * 0.53)
- Threads.increaseDepth = false;
+ else if ( !mainThread->ponder
+ && Time.elapsed() > totalTime * 0.50)
+ Threads.increaseDepth = false;
else
- Threads.increaseDepth = true;
+ Threads.increaseDepth = true;
}
mainThread->iterValue[iterIdx] = bestValue;
@@ -520,7 +514,6 @@ namespace {
constexpr bool PvNode = nodeType != NonPV;
constexpr bool rootNode = nodeType == Root;
- const Depth maxNextDepth = rootNode ? depth : depth + 1;
// Check if we have an upcoming move which draws by repetition, or
// if the opponent had an alternative move earlier to this position.
@@ -555,7 +548,7 @@ namespace {
bool givesCheck, improving, priorCapture, singularQuietLMR;
bool capture, moveCountPruning, ttCapture;
Piece movedPiece;
- int moveCount, captureCount, quietCount, improvement, complexity;
+ int moveCount, captureCount, quietCount, improvement;
// Step 1. Initialize node
Thread* thisThread = pos.this_thread();
@@ -599,52 +592,45 @@ namespace {
assert(0 <= ss->ply && ss->ply < MAX_PLY);
- (ss+1)->ttPv = false;
(ss+1)->excludedMove = bestMove = MOVE_NONE;
(ss+2)->killers[0] = (ss+2)->killers[1] = MOVE_NONE;
(ss+2)->cutoffCnt = 0;
ss->doubleExtensions = (ss-1)->doubleExtensions;
- Square prevSq = to_sq((ss-1)->currentMove);
+ Square prevSq = is_ok((ss-1)->currentMove) ? to_sq((ss-1)->currentMove) : SQ_NONE;
+ ss->statScore = 0;
- // Initialize statScore to zero for the grandchildren of the current position.
- // So statScore is shared between all grandchildren and only the first grandchild
- // starts with statScore = 0. Later grandchildren start with the last calculated
- // statScore of the previous grandchild. This influences the reduction rules in
- // LMR which are based on the statScore of parent position.
- if (!rootNode)
- (ss+2)->statScore = 0;
-
- // Step 4. Transposition table lookup. We don't want the score of a partial
- // search to overwrite a previous full search TT value, so we use a different
- // position key in case of an excluded move.
+ // Step 4. Transposition table lookup.
excludedMove = ss->excludedMove;
- posKey = excludedMove == MOVE_NONE ? pos.key() : pos.key() ^ make_key(excludedMove);
+ posKey = pos.key();
tte = TT.probe(posKey, ss->ttHit);
ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE;
ttMove = rootNode ? thisThread->rootMoves[thisThread->pvIdx].pv[0]
: ss->ttHit ? tte->move() : MOVE_NONE;
- ttCapture = ttMove && pos.capture(ttMove);
+ ttCapture = ttMove && pos.capture_stage(ttMove);
+
+ // At this point, if excluded, skip straight to step 6, static eval. However,
+ // to save indentation, we list the condition in all code between here and there.
if (!excludedMove)
ss->ttPv = PvNode || (ss->ttHit && tte->is_pv());
// At non-PV nodes we check for an early TT cutoff
if ( !PvNode
- && ss->ttHit
+ && !excludedMove
&& tte->depth() > depth - (tte->bound() == BOUND_EXACT)
- && ttValue != VALUE_NONE // Possible in case of TT access race
+ && ttValue != VALUE_NONE // Possible in case of TT access race or if !ttHit
&& (tte->bound() & (ttValue >= beta ? BOUND_LOWER : BOUND_UPPER)))
{
- // If ttMove is quiet, update move sorting heuristics on TT hit (~1 Elo)
+ // If ttMove is quiet, update move sorting heuristics on TT hit (~2 Elo)
if (ttMove)
{
if (ttValue >= beta)
{
- // Bonus for a quiet ttMove that fails high (~3 Elo)
+ // Bonus for a quiet ttMove that fails high (~2 Elo)
if (!ttCapture)
update_quiet_stats(pos, ss, ttMove, stat_bonus(depth));
- // Extra penalty for early quiet moves of the previous ply (~0 Elo)
- if ((ss-1)->moveCount <= 2 && !priorCapture)
+ // Extra penalty for early quiet moves of the previous ply (~0 Elo on STC, ~2 Elo on LTC)
+ if (prevSq != SQ_NONE && (ss-1)->moveCount <= 2 && !priorCapture)
update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, -stat_bonus(depth + 1));
}
// Penalty for a quiet ttMove that fails low (~1 Elo)
@@ -663,7 +649,7 @@ namespace {
}
// Step 5. Tablebases probe
- if (!rootNode && TB::Cardinality)
+ if (!rootNode && !excludedMove && TB::Cardinality)
{
int piecesCount = pos.count();
@@ -723,38 +709,39 @@ namespace {
ss->staticEval = eval = VALUE_NONE;
improving = false;
improvement = 0;
- complexity = 0;
goto moves_loop;
}
+ else if (excludedMove)
+ {
+ // Providing the hint that this node's accumulator will be used often brings significant Elo gain (13 Elo)
+ Eval::NNUE::hint_common_parent_position(pos);
+ eval = ss->staticEval;
+ }
else if (ss->ttHit)
{
// Never assume anything about values stored in TT
ss->staticEval = eval = tte->eval();
if (eval == VALUE_NONE)
- ss->staticEval = eval = evaluate(pos, &complexity);
- else // Fall back to (semi)classical complexity for TT hits, the NNUE complexity is lost
- complexity = abs(ss->staticEval - pos.psq_eg_stm());
+ ss->staticEval = eval = evaluate(pos);
+ else if (PvNode)
+ Eval::NNUE::hint_common_parent_position(pos);
- // ttValue can be used as a better position evaluation (~4 Elo)
+ // ttValue can be used as a better position evaluation (~7 Elo)
if ( ttValue != VALUE_NONE
&& (tte->bound() & (ttValue > eval ? BOUND_LOWER : BOUND_UPPER)))
eval = ttValue;
}
else
{
- ss->staticEval = eval = evaluate(pos, &complexity);
-
+ ss->staticEval = eval = evaluate(pos);
// Save static evaluation into transposition table
- if (!excludedMove)
- tte->save(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval);
+ tte->save(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval);
}
- thisThread->complexityAverage.update(complexity);
-
- // Use static evaluation difference to improve quiet move ordering (~3 Elo)
+ // Use static evaluation difference to improve quiet move ordering (~4 Elo)
if (is_ok((ss-1)->currentMove) && !(ss-1)->inCheck && !priorCapture)
{
- int bonus = std::clamp(-19 * int((ss-1)->staticEval + ss->staticEval), -1914, 1914);
+ int bonus = std::clamp(-18 * int((ss-1)->staticEval + ss->staticEval), -1817, 1817);
thisThread->mainHistory[~us][from_to((ss-1)->currentMove)] << bonus;
}
@@ -764,43 +751,43 @@ namespace {
// margin and the improving flag are used in various pruning heuristics.
improvement = (ss-2)->staticEval != VALUE_NONE ? ss->staticEval - (ss-2)->staticEval
: (ss-4)->staticEval != VALUE_NONE ? ss->staticEval - (ss-4)->staticEval
- : 168;
+ : 173;
improving = improvement > 0;
- // Step 7. Razoring.
+ // Step 7. Razoring (~1 Elo).
// If eval is really low check with qsearch if it can exceed alpha, if it can't,
// return a fail low.
- if (eval < alpha - 369 - 254 * depth * depth)
+ if (eval < alpha - 456 - 252 * depth * depth)
{
value = qsearch