Update to Stockfish 15

This commit is contained in:
Peter Osterlund 2022-04-23 14:00:50 +02:00
parent bc1c8a2c29
commit 07931e96a5
55 changed files with 1812 additions and 1255 deletions

View File

@ -5,7 +5,7 @@ SF_SRC_FILES := \
bitbase.cpp endgame.cpp material.cpp movepick.cpp position.cpp timeman.cpp \
tune.cpp ucioption.cpp \
bitboard.cpp evaluate.cpp misc.cpp search.cpp tt.cpp syzygy/tbprobe.cpp \
nnue/evaluate_nnue.cpp nnue/features/half_ka_v2.cpp
nnue/evaluate_nnue.cpp nnue/features/half_ka_v2_hm.cpp
MY_ARCH_DEF :=
ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -87,6 +87,7 @@ const vector<string> Defaults = {
// Chess 960
"setoption name UCI_Chess960 value true",
"bbqnnrkr/pppppppp/8/8/8/8/PPPPPPPP/BBQNNRKR w HFhf - 0 1 moves g2g3 d7d5 d2d4 c8h3 c1g5 e8d6 g5e7 f7f6",
"nqbnrkrb/pppppppp/8/8/8/8/PPPPPPPP/NQBNRKRB w KQkq - 0 1",
"setoption name UCI_Chess960 value false"
};

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -61,7 +61,7 @@ namespace Stockfish {
namespace Eval {
bool useNNUE;
string eval_file_loaded = "None";
string currentEvalFileName = "None";
/// NNUE::init() tries to load a NNUE network at startup time, or when the engine
/// receives a UCI command "setoption name EvalFile value nn-[a-z0-9]{12}.nnue"
@ -78,6 +78,8 @@ namespace Eval {
return;
string eval_file = string(Options["EvalFile"]);
if (eval_file.empty())
eval_file = EvalFileDefaultName;
#if defined(DEFAULT_NNUE_DIRECTORY)
#define stringify2(x) #x
@ -88,13 +90,13 @@ namespace Eval {
#endif
for (string directory : dirs)
if (eval_file_loaded != eval_file)
if (currentEvalFileName != eval_file)
{
if (directory != "<internal>")
{
ifstream stream(directory + eval_file, ios::binary);
if (load_eval(eval_file, stream))
eval_file_loaded = eval_file;
currentEvalFileName = eval_file;
}
if (directory == "<internal>" && eval_file == EvalFileDefaultName)
@ -106,30 +108,29 @@ namespace Eval {
MemoryBuffer buffer(const_cast<char*>(reinterpret_cast<const char*>(gEmbeddedNNUEData)),
size_t(gEmbeddedNNUESize));
(void) gEmbeddedNNUEEnd; // Silence warning on unused variable
istream stream(&buffer);
if (load_eval(eval_file, stream))
eval_file_loaded = eval_file;
currentEvalFileName = eval_file;
}
}
if (eval_file_loaded != eval_file)
eval_file_loaded = "";
}
/// NNUE::verify() verifies that the last net used was loaded successfully
void NNUE::verify() {
string eval_file = string(Options["EvalFile"]);
if (eval_file.empty())
eval_file = EvalFileDefaultName;
if (useNNUE && eval_file_loaded != eval_file)
if (useNNUE && currentEvalFileName != eval_file)
{
UCI::OptionsMap defaults;
UCI::init(defaults);
string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available.";
string msg2 = "The option is set to true, but the network file " + eval_file + " was not loaded successfully.";
string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file.";
string msg4 = "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/" + string(defaults["EvalFile"]);
string msg4 = "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/" + std::string(EvalFileDefaultName);
string msg5 = "The engine will be terminated now.";
sync_cout << "info string ERROR: " << msg1 << sync_endl;
@ -192,17 +193,17 @@ using namespace Trace;
namespace {
// Threshold for lazy and space evaluation
constexpr Value LazyThreshold1 = Value(1565);
constexpr Value LazyThreshold2 = Value(1102);
constexpr Value LazyThreshold1 = Value(3631);
constexpr Value LazyThreshold2 = Value(2084);
constexpr Value SpaceThreshold = Value(11551);
// KingAttackWeights[PieceType] contains king attack weights by piece type
constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 81, 52, 44, 10 };
constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 76, 46, 45, 14 };
// SafeCheck[PieceType][single/multiple] contains safe check bonus by piece type,
// higher if multiple safe checks are possible for that piece type.
constexpr int SafeCheck[][2] = {
{}, {}, {803, 1292}, {639, 974}, {1087, 1878}, {759, 1132}
{}, {}, {805, 1292}, {650, 984}, {1071, 1886}, {730, 1128}
};
#define S(mg, eg) make_score(mg, eg)
@ -228,58 +229,58 @@ namespace {
// BishopPawns[distance from edge] contains a file-dependent penalty for pawns on
// squares of the same color as our bishop.
constexpr Score BishopPawns[int(FILE_NB) / 2] = {
S(3, 8), S(3, 9), S(2, 8), S(3, 8)
S(3, 8), S(3, 9), S(2, 7), S(3, 7)
};
// KingProtector[knight/bishop] contains penalty for each distance unit to own king
constexpr Score KingProtector[] = { S(8, 9), S(6, 9) };
constexpr Score KingProtector[] = { S(9, 9), S(7, 9) };
// Outpost[knight/bishop] contains bonuses for each knight or bishop occupying a
// pawn protected square on rank 4 to 6 which is also safe from a pawn attack.
constexpr Score Outpost[] = { S(57, 38), S(31, 24) };
constexpr Score Outpost[] = { S(54, 34), S(31, 25) };
// PassedRank[Rank] contains a bonus according to the rank of a passed pawn
constexpr Score PassedRank[RANK_NB] = {
S(0, 0), S(7, 27), S(16, 32), S(17, 40), S(64, 71), S(170, 174), S(278, 262)
S(0, 0), S(2, 38), S(15, 36), S(22, 50), S(64, 81), S(166, 184), S(284, 269)
};
constexpr Score RookOnClosedFile = S(10, 5);
constexpr Score RookOnOpenFile[] = { S(19, 6), S(47, 26) };
constexpr Score RookOnOpenFile[] = { S(18, 8), S(49, 26) };
// ThreatByMinor/ByRook[attacked PieceType] contains bonuses according to
// which piece type attacks which one. Attacks on lesser pieces which are
// pawn-defended are not considered.
constexpr Score ThreatByMinor[PIECE_TYPE_NB] = {
S(0, 0), S(5, 32), S(55, 41), S(77, 56), S(89, 119), S(79, 162)
S(0, 0), S(6, 37), S(64, 50), S(82, 57), S(103, 130), S(81, 163)
};
constexpr Score ThreatByRook[PIECE_TYPE_NB] = {
S(0, 0), S(3, 44), S(37, 68), S(42, 60), S(0, 39), S(58, 43)
S(0, 0), S(3, 44), S(36, 71), S(44, 59), S(0, 39), S(60, 39)
};
constexpr Value CorneredBishop = Value(50);
// Assorted bonuses and penalties
constexpr Score UncontestedOutpost = S( 1, 10);
constexpr Score UncontestedOutpost = S( 0, 10);
constexpr Score BishopOnKingRing = S( 24, 0);
constexpr Score BishopXRayPawns = S( 4, 5);
constexpr Score FlankAttacks = S( 8, 0);
constexpr Score Hanging = S( 69, 36);
constexpr Score Hanging = S( 72, 40);
constexpr Score KnightOnQueen = S( 16, 11);
constexpr Score LongDiagonalBishop = S( 45, 0);
constexpr Score MinorBehindPawn = S( 18, 3);
constexpr Score PassedFile = S( 11, 8);
constexpr Score PawnlessFlank = S( 17, 95);
constexpr Score ReachableOutpost = S( 31, 22);
constexpr Score RestrictedPiece = S( 7, 7);
constexpr Score PassedFile = S( 13, 8);
constexpr Score PawnlessFlank = S( 19, 97);
constexpr Score ReachableOutpost = S( 33, 19);
constexpr Score RestrictedPiece = S( 6, 7);
constexpr Score RookOnKingRing = S( 16, 0);
constexpr Score SliderOnQueen = S( 60, 18);
constexpr Score ThreatByKing = S( 24, 89);
constexpr Score SliderOnQueen = S( 62, 21);
constexpr Score ThreatByKing = S( 24, 87);
constexpr Score ThreatByPawnPush = S( 48, 39);
constexpr Score ThreatBySafePawn = S(173, 94);
constexpr Score ThreatBySafePawn = S(167, 99);
constexpr Score TrappedRook = S( 55, 13);
constexpr Score WeakQueenProtection = S( 14, 0);
constexpr Score WeakQueen = S( 56, 15);
constexpr Score WeakQueen = S( 57, 19);
#undef S
@ -988,7 +989,9 @@ namespace {
// Early exit if score is high
auto lazy_skip = [&](Value lazyThreshold) {
return abs(mg_value(score) + eg_value(score)) / 2 > lazyThreshold + pos.non_pawn_material() / 64;
return abs(mg_value(score) + eg_value(score)) > lazyThreshold
+ std::abs(pos.this_thread()->bestValue) * 5 / 4
+ pos.non_pawn_material() / 32;
};
if (lazy_skip(LazyThreshold1))
@ -1053,26 +1056,22 @@ make_v:
if ( pos.piece_on(SQ_A1) == W_BISHOP
&& pos.piece_on(SQ_B2) == W_PAWN)
correction += !pos.empty(SQ_B3) ? -CorneredBishop * 4
: -CorneredBishop * 3;
correction -= CorneredBishop;
if ( pos.piece_on(SQ_H1) == W_BISHOP
&& pos.piece_on(SQ_G2) == W_PAWN)
correction += !pos.empty(SQ_G3) ? -CorneredBishop * 4
: -CorneredBishop * 3;
correction -= CorneredBishop;
if ( pos.piece_on(SQ_A8) == B_BISHOP
&& pos.piece_on(SQ_B7) == B_PAWN)
correction += !pos.empty(SQ_B6) ? CorneredBishop * 4
: CorneredBishop * 3;
correction += CorneredBishop;
if ( pos.piece_on(SQ_H8) == B_BISHOP
&& pos.piece_on(SQ_G7) == B_PAWN)
correction += !pos.empty(SQ_G6) ? CorneredBishop * 4
: CorneredBishop * 3;
correction += CorneredBishop;
return pos.side_to_move() == WHITE ? Value(correction)
: -Value(correction);
return pos.side_to_move() == WHITE ? Value(3 * correction)
: -Value(3 * correction);
}
} // namespace Eval
@ -1084,38 +1083,37 @@ make_v:
Value Eval::evaluate(const Position& pos) {
Value v;
bool useClassical = false;
if (!Eval::useNNUE)
v = Evaluation<NO_TRACE>(pos).value();
else
// Deciding between classical and NNUE eval (~10 Elo): for high PSQ imbalance we use classical,
// but we switch to NNUE during long shuffling or with high material on the board.
if ( !useNNUE
|| ((pos.this_thread()->depth > 9 || pos.count<ALL_PIECES>() > 7) &&
abs(eg_value(pos.psq_score())) * 5 > (856 + pos.non_pawn_material() / 64) * (10 + pos.rule50_count())))
{
// Scale and shift NNUE for compatibility with search and classical evaluation
auto adjusted_NNUE = [&]()
{
int scale = 903
+ 32 * pos.count<PAWN>()
+ 32 * pos.non_pawn_material() / 1024;
v = Evaluation<NO_TRACE>(pos).value(); // classical
useClassical = abs(v) >= 297;
}
Value nnue = NNUE::evaluate(pos, true) * scale / 1024;
// If result of a classical evaluation is much lower than threshold fall back to NNUE
if (useNNUE && !useClassical)
{
Value nnue = NNUE::evaluate(pos, true); // NNUE
int scale = 1036 + 22 * pos.non_pawn_material() / 1024;
Color stm = pos.side_to_move();
Value optimism = pos.this_thread()->optimism[stm];
Value psq = (stm == WHITE ? 1 : -1) * eg_value(pos.psq_score());
int complexity = 35 * abs(nnue - psq) / 256;
if (pos.is_chess960())
nnue += fix_FRC(pos);
optimism = optimism * (44 + complexity) / 31;
v = (nnue + optimism) * scale / 1024 - optimism;
return nnue;
};
// If there is PSQ imbalance we use the classical eval, but we switch to
// NNUE eval faster when shuffling or if the material on the board is high.
int r50 = pos.rule50_count();
Value psq = Value(abs(eg_value(pos.psq_score())));
bool classical = psq * 5 > (750 + pos.non_pawn_material() / 64) * (5 + r50);
v = classical ? Evaluation<NO_TRACE>(pos).value() // classical
: adjusted_NNUE(); // NNUE
if (pos.is_chess960())
v += fix_FRC(pos);
}
// Damp down the evaluation linearly when shuffling
v = v * (100 - pos.rule50_count()) / 100;
v = v * (195 - pos.rule50_count()) / 211;
// Guarantee evaluation does not hit the tablebase range
v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1);
@ -1140,7 +1138,12 @@ std::string Eval::trace(Position& pos) {
std::memset(scores, 0, sizeof(scores));
pos.this_thread()->trend = SCORE_ZERO; // Reset any dynamic contempt
// Reset any global variable used in eval
pos.this_thread()->depth = 0;
pos.this_thread()->trend = SCORE_ZERO;
pos.this_thread()->bestValue = VALUE_ZERO;
pos.this_thread()->optimism[WHITE] = VALUE_ZERO;
pos.this_thread()->optimism[BLACK] = VALUE_ZERO;
v = Evaluation<TRACE>(pos).value();

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -34,12 +34,12 @@ namespace Eval {
Value evaluate(const Position& pos);
extern bool useNNUE;
extern std::string eval_file_loaded;
extern std::string currentEvalFileName;
// The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue
// for the build process (profile-build and fishtest) to work. Do not change the
// name of the macro, as it is used in the Makefile.
#define EvalFileDefaultName "nn-3475407dc199.nnue"
#define EvalFileDefaultName "nn-6877cd24400e.nnue"
namespace NNUE {

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -36,6 +36,8 @@ typedef bool(*fun1_t)(LOGICAL_PROCESSOR_RELATIONSHIP,
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD);
typedef bool(*fun2_t)(USHORT, PGROUP_AFFINITY);
typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY);
typedef bool(*fun4_t)(USHORT, PGROUP_AFFINITY, USHORT, PUSHORT);
typedef WORD(*fun5_t)();
}
#endif
@ -67,7 +69,7 @@ namespace {
/// Version number. If Version is left empty, then compile date in the format
/// DD-MM-YY and show in engine_info.
const string Version = "14";
const string Version = "15";
/// Our fancy logging facility. The trick here is to replace cin.rdbuf() and
/// cout.rdbuf() with two Tie objects that tie cin and cout to a file stream. We
@ -110,7 +112,14 @@ public:
static Logger l;
if (!fname.empty() && !l.file.is_open())
if (l.file.is_open())
{
cout.rdbuf(l.out.buf);
cin.rdbuf(l.in.buf);
l.file.close();
}
if (!fname.empty())
{
l.file.open(fname, ifstream::out);
@ -123,12 +132,6 @@ public:
cin.rdbuf(&l.in);
cout.rdbuf(&l.out);
}
else if (fname.empty() && l.file.is_open())
{
cout.rdbuf(l.out.buf);
cin.rdbuf(l.in.buf);
l.file.close();
}
}
};
@ -378,6 +381,7 @@ void std_aligned_free(void* ptr) {
static void* aligned_large_pages_alloc_windows(size_t allocSize) {
#if !defined(_WIN64)
(void)allocSize; // suppress unused-parameter compiler warning
return nullptr;
#else
@ -493,11 +497,11 @@ void bindThisThread(size_t) {}
#else
/// best_group() retrieves logical processor information using Windows specific
/// API and returns the best group id for the thread with index idx. Original
/// best_node() retrieves logical processor information using Windows specific
/// API and returns the best node id for the thread with index idx. Original
/// code from Texel by Peter Österlund.
int best_group(size_t idx) {
int best_node(size_t idx) {
int threads = 0;
int nodes = 0;
@ -511,7 +515,8 @@ int best_group(size_t idx) {
if (!fun1)
return -1;
// First call to get returnLength. We expect it to fail due to null buffer
// First call to GetLogicalProcessorInformationEx() to get returnLength.
// We expect the call to fail due to null buffer.
if (fun1(RelationAll, nullptr, &returnLength))
return -1;
@ -519,7 +524,7 @@ int best_group(size_t idx) {
SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *buffer, *ptr;
ptr = buffer = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)malloc(returnLength);
// Second call, now we expect to succeed
// Second call to GetLogicalProcessorInformationEx(), now we expect to succeed
if (!fun1(RelationAll, buffer, &returnLength))
{
free(buffer);
@ -569,22 +574,38 @@ int best_group(size_t idx) {
void bindThisThread(size_t idx) {
// Use only local variables to be thread-safe
int group = best_group(idx);
int node = best_node(idx);
if (group == -1)
if (node == -1)
return;
// Early exit if the needed API are not available at runtime
HMODULE k32 = GetModuleHandle("Kernel32.dll");
auto fun2 = (fun2_t)(void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMaskEx");
auto fun3 = (fun3_t)(void(*)())GetProcAddress(k32, "SetThreadGroupAffinity");
auto fun4 = (fun4_t)(void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMask2");
auto fun5 = (fun5_t)(void(*)())GetProcAddress(k32, "GetMaximumProcessorGroupCount");
if (!fun2 || !fun3)
return;
GROUP_AFFINITY affinity;
if (fun2(group, &affinity))
fun3(GetCurrentThread(), &affinity, nullptr);
if (!fun4 || !fun5)
{
GROUP_AFFINITY affinity;
if (fun2(node, &affinity)) // GetNumaNodeProcessorMaskEx
fun3(GetCurrentThread(), &affinity, nullptr); // SetThreadGroupAffinity
}
else
{
// If a numa node has more than one processor group, we assume they are
// sized equal and we spread threads evenly across the groups.
USHORT elements, returnedElements;
elements = fun5(); // GetMaximumProcessorGroupCount
GROUP_AFFINITY *affinity = (GROUP_AFFINITY*)malloc(elements * sizeof(GROUP_AFFINITY));
if (fun4(node, affinity, elements, &returnedElements)) // GetNumaNodeProcessorMask2
fun3(GetCurrentThread(), &affinity[idx % returnedElements], nullptr); // SetThreadGroupAffinity
free(affinity);
}
}
#endif

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -85,19 +85,30 @@ static inline const union { uint32_t i; char c[4]; } Le = { 0x01020304 };
static inline const bool IsLittleEndian = (Le.c[0] == 4);
template <typename T>
class ValueListInserter {
public:
ValueListInserter(T* v, std::size_t& s) :
values(v),
size(&s)
{
}
// RunningAverage : a class to calculate a running average of a series of values.
// For efficiency, all computations are done with integers.
class RunningAverage {
public:
void push_back(const T& value) { values[(*size)++] = value; }
private:
T* values;
std::size_t* size;
// Reset the running average to rational value p / q
void set(int64_t p, int64_t q)
{ average = p * PERIOD * RESOLUTION / q; }
// Update average with value v
void update(int64_t v)
{ average = RESOLUTION * v + (PERIOD - 1) * average / PERIOD; }
// Test if average is strictly greater than rational a / b
bool is_greater(int64_t a, int64_t b) const
{ return b * average > a * (PERIOD * RESOLUTION); }
int64_t value() const
{ return average / (PERIOD * RESOLUTION); }
private :
static constexpr int64_t PERIOD = 4096;
static constexpr int64_t RESOLUTION = 1024;
int64_t average;
};
template <typename T, std::size_t MaxSize>
@ -113,7 +124,6 @@ public:
const T& operator[](std::size_t index) const { return values_[index]; }
const T* begin() const { return values_; }
const T* end() const { return values_ + size_; }
operator ValueListInserter<T>() { return ValueListInserter(values_, size_); }
void swap(ValueList& other) {
const std::size_t maxSize = std::max(size_, other.size_);
@ -128,6 +138,34 @@ private:
std::size_t size_ = 0;
};
/// sigmoid(t, x0, y0, C, P, Q) implements a sigmoid-like function using only integers,
/// with the following properties:
///
/// - sigmoid is centered in (x0, y0)
/// - sigmoid has amplitude [-P/Q , P/Q] instead of [-1 , +1]
/// - limit is (y0 - P/Q) when t tends to -infinity
/// - limit is (y0 + P/Q) when t tends to +infinity
/// - the slope can be adjusted using C > 0, smaller C giving a steeper sigmoid
/// - the slope of the sigmoid when t = x0 is P/(Q*C)
/// - sigmoid is increasing with t when P > 0 and Q > 0
/// - to get a decreasing sigmoid, change sign of P
/// - mean value of the sigmoid is y0
///
/// Use <https://www.desmos.com/calculator/jhh83sqq92> to draw the sigmoid
inline int64_t sigmoid(int64_t t, int64_t x0,
int64_t y0,
int64_t C,
int64_t P,
int64_t Q)
{
assert(C > 0);
assert(Q != 0);
return y0 + P * (t-x0) / (Q * (std::abs(t-x0) + C)) ;
}
/// xorshift64star Pseudo-Random Number Generator
/// This class is based on original code written and dedicated
/// to the public domain by Sebastiano Vigna (2014).

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -52,9 +52,9 @@ namespace {
constexpr Direction UpRight = (Us == WHITE ? NORTH_EAST : SOUTH_WEST);
constexpr Direction UpLeft = (Us == WHITE ? NORTH_WEST : SOUTH_EAST);
const Bitboard emptySquares = Type == QUIETS || Type == QUIET_CHECKS ? target : ~pos.pieces();
const Bitboard enemies = Type == EVASIONS ? pos.checkers()
: Type == CAPTURES ? target : pos.pieces(Them);
const Bitboard emptySquares = ~pos.pieces();
const Bitboard enemies = Type == EVASIONS ? pos.checkers()
: pos.pieces(Them);
Bitboard pawnsOn7 = pos.pieces(Us, PAWN) & TRank7BB;
Bitboard pawnsNotOn7 = pos.pieces(Us, PAWN) & ~TRank7BB;

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -18,6 +18,7 @@
#include <cassert>
#include "bitboard.h"
#include "movepick.h"
namespace Stockfish {
@ -56,11 +57,14 @@ namespace {
/// ordering is at the current node.
/// MovePicker constructor for the main search
MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHistory* mh, const LowPlyHistory* lp,
const CapturePieceToHistory* cph, const PieceToHistory** ch, Move cm, const Move* killers, int pl)
: pos(p), mainHistory(mh), lowPlyHistory(lp), captureHistory(cph), continuationHistory(ch),
ttMove(ttm), refutations{{killers[0], 0}, {killers[1], 0}, {cm, 0}}, depth(d), ply(pl) {
MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHistory* mh,
const CapturePieceToHistory* cph,
const PieceToHistory** ch,
Move cm,
const Move* killers)
: pos(p), mainHistory(mh), captureHistory(cph), continuationHistory(ch),
ttMove(ttm), refutations{{killers[0], 0}, {killers[1], 0}, {cm, 0}}, depth(d)
{
assert(d > 0);
stage = (pos.checkers() ? EVASION_TT : MAIN_TT) +
@ -69,9 +73,11 @@ MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHist
/// MovePicker constructor for quiescence search
MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHistory* mh,
const CapturePieceToHistory* cph, const PieceToHistory** ch, Square rs)
: pos(p), mainHistory(mh), captureHistory(cph), continuationHistory(ch), ttMove(ttm), recaptureSquare(rs), depth(d) {
const CapturePieceToHistory* cph,
const PieceToHistory** ch,
Square rs)
: pos(p), mainHistory(mh), captureHistory(cph), continuationHistory(ch), ttMove(ttm), recaptureSquare(rs), depth(d)
{
assert(d <= 0);
stage = (pos.checkers() ? EVASION_TT : QSEARCH_TT) +
@ -82,9 +88,9 @@ MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHist
/// MovePicker constructor for ProbCut: we generate captures with SEE greater
/// than or equal to the given threshold.
MovePicker::MovePicker(const Position& p, Move ttm, Value th, const CapturePieceToHistory* cph)
: pos(p), captureHistory(cph), ttMove(ttm), threshold(th) {
MovePicker::MovePicker(const Position& p, Move ttm, Value th, Depth d, const CapturePieceToHistory* cph)
: pos(p), captureHistory(cph), ttMove(ttm), threshold(th), depth(d)
{
assert(!pos.checkers());
stage = PROBCUT_TT + !(ttm && pos.capture(ttm)
@ -100,10 +106,35 @@ void MovePicker::score() {
static_assert(Type == CAPTURES || Type == QUIETS || Type == EVASIONS, "Wrong type");
Bitboard threatened, threatenedByPawn, threatenedByMinor, threatenedByRook;
if constexpr (Type == QUIETS)
{
Color us = pos.side_to_move();
// squares threatened by pawns
threatenedByPawn = pos.attacks_by<PAWN>(~us);
// squares threatened by minors or pawns
threatenedByMinor = pos.attacks_by<KNIGHT>(~us) | pos.attacks_by<BISHOP>(~us) | threatenedByPawn;
// squares threatened by rooks, minors or pawns
threatenedByRook = pos.attacks_by<ROOK>(~us) | threatenedByMinor;
// pieces threatened by pieces of lesser material value
threatened = (pos.pieces(us, QUEEN) & threatenedByRook)
| (pos.pieces(us, ROOK) & threatenedByMinor)
| (pos.pieces(us, KNIGHT, BISHOP) & threatenedByPawn);
}
else
{
// Silence unused variable warnings
(void) threatened;
(void) threatenedByPawn;
(void) threatenedByMinor;
(void) threatenedByRook;
}
for (auto& m : *this)
if constexpr (Type == CAPTURES)
m.value = int(PieceValue[MG][pos.piece_on(to_sq(m))]) * 6
+ (*captureHistory)[pos.moved_piece(m)][to_sq(m)][type_of(pos.piece_on(to_sq(m)))];
m.value = 6 * int(PieceValue[MG][pos.piece_on(to_sq(m))])
+ (*captureHistory)[pos.moved_piece(m)][to_sq(m)][type_of(pos.piece_on(to_sq(m)))];
else if constexpr (Type == QUIETS)
m.value = (*mainHistory)[pos.side_to_move()][from_to(m)]
@ -111,7 +142,12 @@ void MovePicker::score() {
+ (*continuationHistory[1])[pos.moved_piece(m)][to_sq(m)]
+ (*continuationHistory[3])[pos.moved_piece(m)][to_sq(m)]
+ (*continuationHistory[5])[pos.moved_piece(m)][to_sq(m)]
+ (ply < MAX_LPH ? std::min(4, depth / 3) * (*lowPlyHistory)[ply][from_to(m)] : 0);
+ (threatened & from_sq(m) ?
(type_of(pos.moved_piece(m)) == QUEEN && !(to_sq(m) & threatenedByRook) ? 50000
: type_of(pos.moved_piece(m)) == ROOK && !(to_sq(m) & threatenedByMinor) ? 25000
: !(to_sq(m) & threatenedByPawn) ? 15000
: 0)
: 0);
else // Type == EVASIONS
{
@ -165,11 +201,12 @@ top:
endMoves = generate<CAPTURES>(pos, cur);
score<CAPTURES>();
partial_insertion_sort(cur, endMoves, -3000 * depth);
++stage;
goto top;
case GOOD_CAPTURE:
if (select<Best>([&](){
if (select<Next>([&](){
return pos.see_ge(*cur, Value(-69 * cur->value / 1024)) ?
// Move losing capture to endBadCaptures to be tried later
true : (*endBadCaptures++ = *cur, false); }))
@ -237,10 +274,10 @@ top:
return select<Best>([](){ return true; });
case PROBCUT:
return select<Best>([&](){ return pos.see_ge(*cur, threshold); });
return select<Next>([&](){ return pos.see_ge(*cur, threshold); });
case QCAPTURE:
if (select<Best>([&](){ return depth > DEPTH_QS_RECAPTURES
if (select<Next>([&](){ return depth > DEPTH_QS_RECAPTURES
|| to_sq(*cur) == recaptureSquare; }))
return *(cur - 1);

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -86,13 +86,7 @@ enum StatsType { NoCaptures, Captures };
/// unsuccessful during the current search, and is used for reduction and move
/// ordering decisions. It uses 2 tables (one for each color) indexed by
/// the move's from and to squares, see www.chessprogramming.org/Butterfly_Boards
typedef Stats<int16_t, 13365, COLOR_NB, int(SQUARE_NB) * int(SQUARE_NB)> ButterflyHistory;
/// At higher depths LowPlyHistory records successful quiet moves near the root
/// and quiet moves which are/were in the PV (ttPv). It is cleared with each new
/// search and filled during iterative deepening.
constexpr int MAX_LPH = 4;
typedef Stats<int16_t, 10692, MAX_LPH, int(SQUARE_NB) * int(SQUARE_NB)> LowPlyHistory;
typedef Stats<int16_t, 14365, COLOR_NB, int(SQUARE_NB) * int(SQUARE_NB)> ButterflyHistory;
/// CounterMoveHistory stores counter moves indexed by [piece][to] of the previous
/// move, see www.chessprogramming.org/Countermove_Heuristic
@ -123,18 +117,16 @@ class MovePicker {
public:
MovePicker(const MovePicker&) = delete;
MovePicker& operator=(const MovePicker&) = delete;
MovePicker(const Position&, Move, Value, const CapturePieceToHistory*);
MovePicker(const Position&, Move, Depth, const ButterflyHistory*,
const CapturePieceToHistory*,
const PieceToHistory**,
Move,
const Move*);
MovePicker(const Position&, Move, Depth, const ButterflyHistory*,
const CapturePieceToHistory*,
const PieceToHistory**,
Square);
MovePicker(const Position&, Move, Depth, const ButterflyHistory*,
const LowPlyHistory*,
const CapturePieceToHistory*,
const PieceToHistory**,
Move,
const Move*,
int);
MovePicker(const Position&, Move, Value, Depth, const CapturePieceToHistory*);
Move next_move(bool skipQuiets = false);
private:
@ -145,7 +137,6 @@ private:
const Position& pos;
const ButterflyHistory* mainHistory;
const LowPlyHistory* lowPlyHistory;
const CapturePieceToHistory* captureHistory;
const PieceToHistory** continuationHistory;
Move ttMove;
@ -154,7 +145,6 @@ private:
Square recaptureSquare;
Value threshold;
Depth depth;
int ply;
ExtMove moves[MAX_MOVES];
};

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -109,7 +109,7 @@ namespace Stockfish::Eval::NNUE {
{
write_little_endian<std::uint32_t>(stream, Version);
write_little_endian<std::uint32_t>(stream, hashValue);
write_little_endian<std::uint32_t>(stream, desc.size());
write_little_endian<std::uint32_t>(stream, (std::uint32_t)desc.size());
stream.write(&desc[0], desc.size());
return !stream.fail();
}
@ -143,39 +143,29 @@ namespace Stockfish::Eval::NNUE {
// overaligning stack variables with alignas() doesn't work correctly.
constexpr uint64_t alignment = CacheLineSize;
int delta = 10 - pos.non_pawn_material() / 1515;
#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
TransformedFeatureType transformedFeaturesUnaligned[
FeatureTransformer::BufferSize + alignment / sizeof(TransformedFeatureType)];
char bufferUnaligned[Network::BufferSize + alignment];
auto* transformedFeatures = align_ptr_up<alignment>(&transformedFeaturesUnaligned[0]);
auto* buffer = align_ptr_up<alignment>(&bufferUnaligned[0]);
#else
alignas(alignment)
TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize];
alignas(alignment) char buffer[Network::BufferSize];
#endif
ASSERT_ALIGNED(transformedFeatures, alignment);
ASSERT_ALIGNED(buffer, alignment);
const std::size_t bucket = (pos.count<ALL_PIECES>() - 1) / 4;
const int bucket = (pos.count<ALL_PIECES>() - 1) / 4;
const auto psqt = featureTransformer->transform(pos, transformedFeatures, bucket);
const auto output = network[bucket]->propagate(transformedFeatures, buffer);
const auto positional = network[bucket]->propagate(transformedFeatures);
int materialist = psqt;
int positional = output[0];
int delta_npm = abs(pos.non_pawn_material(WHITE) - pos.non_pawn_material(BLACK));
int entertainment = (adjusted && delta_npm <= BishopValueMg - KnightValueMg ? 7 : 0);
int A = 128 - entertainment;
int B = 128 + entertainment;
int sum = (A * materialist + B * positional) / 128;
return static_cast<Value>( sum / OutputScale );
// Give more value to positional evaluation when adjusted flag is set
if (adjusted)
return static_cast<Value>(((128 - delta) * psqt + (128 + delta) * positional) / 128 / OutputScale);
else
return static_cast<Value>((psqt + positional) / OutputScale);
}
struct NnueEvalTrace {
@ -196,27 +186,20 @@ namespace Stockfish::Eval::NNUE {
#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
TransformedFeatureType transformedFeaturesUnaligned[
FeatureTransformer::BufferSize + alignment / sizeof(TransformedFeatureType)];
char bufferUnaligned[Network::BufferSize + alignment];
auto* transformedFeatures = align_ptr_up<alignment>(&transformedFeaturesUnaligned[0]);
auto* buffer = align_ptr_up<alignment>(&bufferUnaligned[0]);
#else
alignas(alignment)
TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize];
alignas(alignment) char buffer[Network::BufferSize];
#endif
ASSERT_ALIGNED(transformedFeatures, alignment);
ASSERT_ALIGNED(buffer, alignment);
NnueEvalTrace t{};
t.correctBucket = (pos.count<ALL_PIECES>() - 1) / 4;
for (std::size_t bucket = 0; bucket < LayerStacks; ++bucket) {
const auto psqt = featureTransformer->transform(pos, transformedFeatures, bucket);
const auto output = network[bucket]->propagate(transformedFeatures, buffer);
int materialist = psqt;
int positional = output[0];
for (IndexType bucket = 0; bucket < LayerStacks; ++bucket) {
const auto materialist = featureTransformer->transform(pos, transformedFeatures, bucket);
const auto positional = network[bucket]->propagate(transformedFeatures);
t.psqt[bucket] = static_cast<Value>( materialist / OutputScale );
t.positional[bucket] = static_cast<Value>( positional / OutputScale );
@ -227,69 +210,46 @@ namespace Stockfish::Eval::NNUE {
static const std::string PieceToChar(" PNBRQK pnbrqk");
// Requires the buffer to have capacity for at least 5 values
// format_cp_compact() converts a Value into (centi)pawns and writes it in a buffer.
// The buffer must have capacity for at least 5 chars.
static void format_cp_compact(Value v, char* buffer) {
buffer[0] = (v < 0 ? '-' : v > 0 ? '+' : ' ');
int cp = std::abs(100 * v / PawnValueEg);
if (cp >= 10000)
{
buffer[1] = '0' + cp / 10000; cp %= 10000;
buffer[2] = '0' + cp / 1000; cp %= 1000;
buffer[3] = '0' + cp / 100; cp %= 100;
buffer[4] = ' ';
buffer[1] = '0' + cp / 10000; cp %= 10000;
buffer[2] = '0' + cp / 1000; cp %= 1000;
buffer[3] = '0' + cp / 100;
buffer[4] = ' ';
}
else if (cp >= 1000)
{
buffer[1] = '0' + cp / 1000; cp %= 1000;
buffer[2] = '0' + cp / 100; cp %= 100;
buffer[3] = '.';
buffer[4] = '0' + cp / 10;
buffer[1] = '0' + cp / 1000; cp %= 1000;
buffer[2] = '0' + cp / 100; cp %= 100;
buffer[3] = '.';
buffer[4] = '0' + cp / 10;
}
else
{
buffer[1] = '0' + cp / 100; cp %= 100;
buffer[2] = '.';
buffer[3] = '0' + cp / 10; cp %= 10;
buffer[4] = '0' + cp / 1;
buffer[1] = '0' + cp / 100; cp %= 100;
buffer[2] = '.';
buffer[3] = '0' + cp / 10; cp %= 10;
buffer[4] = '0' + cp / 1;
}
}
// Requires the buffer to have capacity for at least 7 values
// format_cp_aligned_dot() converts a Value into (centi)pawns and writes it in a buffer,
// always keeping two decimals. The buffer must have capacity for at least 7 chars.
static void format_cp_aligned_dot(Value v, char* buffer) {
buffer[0] = (v < 0 ? '-' : v > 0 ? '+' : ' ');
int cp = std::abs(100 * v / PawnValueEg);
if (cp >= 10000)
{
buffer[1] = '0' + cp / 10000; cp %= 10000;
buffer[2] = '0' + cp / 1000; cp %= 1000;
buffer[3] = '0' + cp / 100; cp %= 100;
buffer[4] = '.';
buffer[5] = '0' + cp / 10; cp %= 10;
buffer[6] = '0' + cp;
}
else if (cp >= 1000)
{
buffer[1] = ' ';
buffer[2] = '0' + cp / 1000; cp %= 1000;
buffer[3] = '0' + cp / 100; cp %= 100;
buffer[4] = '.';
buffer[5] = '0' + cp / 10; cp %= 10;
buffer[6] = '0' + cp;
}
else
{
buffer[1] = ' ';
buffer[2] = ' ';
buffer[3] = '0' + cp / 100; cp %= 100;
buffer[4] = '.';
buffer[5] = '0' + cp / 10; cp %= 10;
buffer[6] = '0' + cp / 1;
}
double cp = 1.0 * std::abs(int(v)) / PawnValueEg;
sprintf(&buffer[1], "%6.2f", cp);
}
@ -419,7 +379,7 @@ namespace Stockfish::Eval::NNUE {
actualFilename = filename.value();
else
{
if (eval_file_loaded != EvalFileDefaultName)
if (currentEvalFileName != EvalFileDefaultName)
{
msg = "Failed to export a net. A non-embedded net can only be saved if the filename is specified";

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -16,31 +16,32 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
//Definition of input features HalfKAv2 of NNUE evaluation function
//Definition of input features HalfKAv2_hm of NNUE evaluation function
#include "half_ka_v2.h"
#include "half_ka_v2_hm.h"
#include "../../position.h"
namespace Stockfish::Eval::NNUE::Features {
// Orient a square according to perspective (rotates by 180 for black)
inline Square HalfKAv2::orient(Color perspective, Square s) {
return Square(int(s) ^ (bool(perspective) * 56));
inline Square HalfKAv2_hm::orient(Color perspective, Square s, Square ksq) {
return Square(int(s) ^ (bool(perspective) * SQ_A8) ^ ((file_of(ksq) < FILE_E) * SQ_H1));
}
// Index of a feature for a given king position and another piece on some square
inline IndexType HalfKAv2::make_index(Color perspective, Square s, Piece pc, Square ksq) {
return IndexType(orient(perspective, s) + PieceSquareIndex[perspective][pc] + PS_NB * ksq);
inline IndexType HalfKAv2_hm::make_index(Color perspective, Square s, Piece pc, Square ksq) {
Square o_ksq = orient(perspective, ksq, ksq);
return IndexType(orient(perspective, s, ksq) + PieceSquareIndex[perspective][pc] + PS_NB * KingBuckets[o_ksq]);
}
// Get a list of indices for active features
void HalfKAv2::append_active_indices(
void HalfKAv2_hm::append_active_indices(
const Position& pos,
Color perspective,
ValueListInserter<IndexType> active
IndexList& active
) {
Square ksq = orient(perspective, pos.square<KING>(perspective));
Square ksq = pos.square<KING>(perspective);
Bitboard bb = pos.pieces();
while (bb)
{
@ -52,33 +53,30 @@ namespace Stockfish::Eval::NNUE::Features {
// append_changed_indices() : get a list of indices for recently changed features
void HalfKAv2::append_changed_indices(
void HalfKAv2_hm::append_changed_indices(
Square ksq,
StateInfo* st,
const DirtyPiece& dp,
Color perspective,
ValueListInserter<IndexType> removed,
ValueListInserter<IndexType> added
IndexList& removed,
IndexList& added
) {
const auto& dp = st->dirtyPiece;
Square oriented_ksq = orient(perspective, ksq);
for (int i = 0; i < dp.dirty_num; ++i) {
Piece pc = dp.piece[i];
if (dp.from[i] != SQ_NONE)
removed.push_back(make_index(perspective, dp.from[i], pc, oriented_ksq));
removed.push_back(make_index(perspective, dp.from[i], dp.piece[i], ksq));
if (dp.to[i] != SQ_NONE)
added.push_back(make_index(perspective, dp.to[i], pc, oriented_ksq));
added.push_back(make_index(perspective, dp.to[i], dp.piece[i], ksq));
}
}
int HalfKAv2::update_cost(StateInfo* st) {
int HalfKAv2_hm::update_cost(const StateInfo* st) {
return st->dirtyPiece.dirty_num;
}
int HalfKAv2::refresh_cost(const Position& pos) {
int HalfKAv2_hm::refresh_cost(const Position& pos) {
return pos.count<ALL_PIECES>();
}
bool HalfKAv2::requires_refresh(StateInfo* st, Color perspective) {
bool HalfKAv2_hm::requires_refresh(const StateInfo* st, Color perspective) {
return st->dirtyPiece.piece[0] == make_piece(perspective, KING);
}

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -18,8 +18,8 @@
//Definition of input features HalfKP of NNUE evaluation function
#ifndef NNUE_FEATURES_HALF_KA_V2_H_INCLUDED
#define NNUE_FEATURES_HALF_KA_V2_H_INCLUDED
#ifndef NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED
#define NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED
#include "../nnue_common.h"
@ -32,9 +32,9 @@ namespace Stockfish {
namespace Stockfish::Eval::NNUE::Features {
// Feature HalfKAv2: Combination of the position of own king
// and the position of pieces
class HalfKAv2 {
// Feature HalfKAv2_hm: Combination of the position of own king
// and the position of pieces. Position mirrored such that king always on e..h files.
class HalfKAv2_hm {
// unique number for each piece type on each square
enum {
@ -50,7 +50,7 @@ namespace Stockfish::Eval::NNUE::Features {
PS_W_QUEEN = 8 * SQUARE_NB,
PS_B_QUEEN = 9 * SQUARE_NB,
PS_KING = 10 * SQUARE_NB,
PS_NB = 11 * SQUARE_NB
PS_NB = 11 * SQUARE_NB
};
static constexpr IndexType PieceSquareIndex[COLOR_NB][PIECE_NB] = {
@ -63,49 +63,62 @@ namespace Stockfish::Eval::NNUE::Features {
};
// Orient a square according to perspective (rotates by 180 for black)
static Square orient(Color perspective, Square s);
static Square orient(Color perspective, Square s, Square ksq);
// Index of a feature for a given king position and another piece on some square
static IndexType make_index(Color perspective, Square s, Piece pc, Square ksq);
public:
// Feature name
static constexpr const char* Name = "HalfKAv2(Friend)";
static constexpr const char* Name = "HalfKAv2_hm(Friend)";
// Hash value embedded in the evaluation file
static constexpr std::uint32_t HashValue = 0x5f234cb8u;
static constexpr std::uint32_t HashValue = 0x7f234cb8u;
// Number of feature dimensions
static constexpr IndexType Dimensions =
static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_NB);
static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_NB) / 2;
static constexpr int KingBuckets[64] = {
-1, -1, -1, -1, 31, 30, 29, 28,
-1, -1, -1, -1, 27, 26, 25, 24,
-1, -1, -1, -1, 23, 22, 21, 20,
-1, -1, -1, -1, 19, 18, 17, 16,
-1, -1, -1, -1, 15, 14, 13, 12,
-1, -1, -1, -1, 11, 10, 9, 8,
-1, -1, -1, -1, 7, 6, 5, 4,
-1, -1, -1, -1, 3, 2, 1, 0
};
// Maximum number of simultaneously active features.
static constexpr IndexType MaxActiveDimensions = 32;
using IndexList = ValueList<IndexType, MaxActiveDimensions>;
// Get a list of indices for active features
static void append_active_indices(
const Position& pos,
Color perspective,
ValueListInserter<IndexType> active);
IndexList& active);
// Get a list of indices for recently changed features
static void append_changed_indices(
Square ksq,
StateInfo* st,
const DirtyPiece& dp,
Color perspective,
ValueListInserter<IndexType> removed,
ValueListInserter<IndexType> added);
IndexList& removed,
IndexList& added
);
// Returns the cost of updating one perspective, the most costly one.
// Assumes no refresh needed.
static int update_cost(StateInfo* st);
static int update_cost(const StateInfo* st);
static int refresh_cost(const Position& pos);
// Returns whether the change stored in this StateInfo means that
// a full accumulator refresh is required.
static bool requires_refresh(StateInfo* st, Color perspective);
static bool requires_refresh(const StateInfo* st, Color perspective);
};
} // namespace Stockfish::Eval::NNUE::Features
#endif // #ifndef NNUE_FEATURES_HALF_KA_V2_H_INCLUDED
#endif // #ifndef NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -22,398 +22,338 @@
#define NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
#include <iostream>
#include <algorithm>
#include <type_traits>
#include "../nnue_common.h"
#include "../../simd.h"
/*
This file contains the definition for a fully connected layer (aka affine transform).
Two approaches are employed, depending on the sizes of the transform.
Approach 1:
- used when the PaddedInputDimensions >= 128
- uses AVX512 if possible
- processes inputs in batches of 2*InputSimdWidth
- so in batches of 128 for AVX512
- the weight blocks of size InputSimdWidth are transposed such that
access is sequential
- N columns of the weight matrix are processed a time, where N
depends on the architecture (the amount of registers)
- accumulate + hadd is used
Approach 2:
- used when the PaddedInputDimensions < 128
- does not use AVX512
- expected use-case is for when PaddedInputDimensions == 32 and InputDimensions <= 32.
- that's why AVX512 is hard to implement
- expected use-case is small layers
- not optimized as well as the approach 1
- inputs are processed in chunks of 4, weights are respectively transposed
- accumulation happens directly to int32s
*/
namespace Stockfish::Eval::NNUE::Layers {
// Affine transformation layer
template <typename PreviousLayer, IndexType OutDims>
class AffineTransform {
public:
// Input/output type
using InputType = typename PreviousLayer::OutputType;
using OutputType = std::int32_t;
static_assert(std::is_same<InputType, std::uint8_t>::value, "");
// Fallback implementation for older/other architectures.
// Identical for both approaches. Requires the input to be padded to at least 16 values.
#if !defined(USE_SSSE3)
template <IndexType InputDimensions, IndexType PaddedInputDimensions, IndexType OutputDimensions>
static void affine_transform_non_ssse3(std::int32_t* output, const std::int8_t* weights, const std::int32_t* biases, const std::uint8_t* input)
{
# if defined(USE_SSE2)
// At least a multiple of 16, with SSE2.
constexpr IndexType NumChunks = ceil_to_multiple<IndexType>(InputDimensions, 16) / 16;
const __m128i Zeros = _mm_setzero_si128();
const auto inputVector = reinterpret_cast<const __m128i*>(input);
// Number of input/output dimensions
static constexpr IndexType InputDimensions =
PreviousLayer::OutputDimensions;
static constexpr IndexType OutputDimensions = OutDims;
static constexpr IndexType PaddedInputDimensions =
ceil_to_multiple<IndexType>(InputDimensions, MaxSimdWidth);
#if defined (USE_AVX512)
static constexpr const IndexType OutputSimdWidth = SimdWidth / 2;
#elif defined (USE_SSSE3)
static constexpr const IndexType OutputSimdWidth = SimdWidth / 4;
# elif defined(USE_MMX)
constexpr IndexType NumChunks = ceil_to_multiple<IndexType>(InputDimensions, 8) / 8;
const __m64 Zeros = _mm_setzero_si64();
const auto inputVector = reinterpret_cast<const __m64*>(input);
# elif defined(USE_NEON)
constexpr IndexType NumChunks = ceil_to_multiple<IndexType>(InputDimensions, 16) / 16;
const auto inputVector = reinterpret_cast<const int8x8_t*>(input);
# endif
for (IndexType i = 0; i < OutputDimensions; ++i) {
const IndexType offset = i * PaddedInputDimensions;
# if defined(USE_SSE2)
__m128i sumLo = _mm_cvtsi32_si128(biases[i]);
__m128i sumHi = Zeros;
const auto row = reinterpret_cast<const __m128i*>(&weights[offset]);
for (IndexType j = 0; j < NumChunks; ++j) {
__m128i row_j = _mm_load_si128(&row[j]);
__m128i input_j = _mm_load_si128(&inputVector[j]);
__m128i extendedRowLo = _mm_srai_epi16(_mm_unpacklo_epi8(row_j, row_j), 8);
__m128i extendedRowHi = _mm_srai_epi16(_mm_unpackhi_epi8(row_j, row_j), 8);
__m128i extendedInputLo = _mm_unpacklo_epi8(input_j, Zeros);
__m128i extendedInputHi = _mm_unpackhi_epi8(input_j, Zeros);
__m128i productLo = _mm_madd_epi16(extendedRowLo, extendedInputLo);
__m128i productHi = _mm_madd_epi16(extendedRowHi, extendedInputHi);
sumLo = _mm_add_epi32(sumLo, productLo);
sumHi = _mm_add_epi32(sumHi, productHi);
}
__m128i sum = _mm_add_epi32(sumLo, sumHi);
__m128i sumHigh_64 = _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2));
sum = _mm_add_epi32(sum, sumHigh_64);
__m128i sum_second_32 = _mm_shufflelo_epi16(sum, _MM_SHUFFLE(1, 0, 3, 2));
sum = _mm_add_epi32(sum, sum_second_32);
output[i] = _mm_cvtsi128_si32(sum);
# elif defined(USE_MMX)
__m64 sumLo = _mm_cvtsi32_si64(biases[i]);
__m64 sumHi = Zeros;
const auto row = reinterpret_cast<const __m64*>(&weights[offset]);
for (IndexType j = 0; j < NumChunks; ++j) {
__m64 row_j = row[j];
__m64 input_j = inputVector[j];
__m64 extendedRowLo = _mm_srai_pi16(_mm_unpacklo_pi8(row_j, row_j), 8);
__m64 extendedRowHi = _mm_srai_pi16(_mm_unpackhi_pi8(row_j, row_j), 8);
__m64 extendedInputLo = _mm_unpacklo_pi8(input_j, Zeros);
__m64 extendedInputHi = _mm_unpackhi_pi8(input_j, Zeros);
__m64 productLo = _mm_madd_pi16(extendedRowLo, extendedInputLo);
__m64 productHi = _mm_madd_pi16(extendedRowHi, extendedInputHi);
sumLo = _mm_add_pi32(sumLo, productLo);
sumHi = _mm_add_pi32(sumHi, productHi);
}
__m64 sum = _mm_add_pi32(sumLo, sumHi);
sum = _mm_add_pi32(sum, _mm_unpackhi_pi32(sum, sum));
output[i] = _mm_cvtsi64_si32(sum);
# elif defined(USE_NEON)
int32x4_t sum = {biases[i]};
const auto row = reinterpret_cast<const int8x8_t*>(&weights[offset]);
for (IndexType j = 0; j < NumChunks; ++j) {
int16x8_t product = vmull_s8(inputVector[j * 2], row[j * 2]);
product = vmlal_s8(product, inputVector[j * 2 + 1], row[j * 2 + 1]);
sum = vpadalq_s16(sum, product);
}
output[i] = sum[0] + sum[1] + sum[2] + sum[3];
# else
std::int32_t sum = biases[i];
for (IndexType j = 0; j < InputDimensions; ++j) {
sum += weights[offset + j] * input[j];
}
output[i] = sum;
# endif
}
# if defined(USE_MMX)
_mm_empty();
# endif
}
#endif
// Size of forward propagation buffer used in this layer
static constexpr std::size_t SelfBufferSize =
ceil_to_multiple(OutputDimensions * sizeof(OutputType), CacheLineSize);
template <IndexType InDims, IndexType OutDims, typename Enabled = void>
class AffineTransform;
// Size of the forward propagation buffer used from the input layer to this layer
static constexpr std::size_t BufferSize =
PreviousLayer::BufferSize + SelfBufferSize;
// A specialization for large inputs.
template <IndexType InDims, IndexType OutDims>
class AffineTransform<InDims, OutDims, std::enable_if_t<(ceil_to_multiple<IndexType>(InDims, MaxSimdWidth) >= 2*64)>> {
public:
// Input/output type
using InputType = std::uint8_t;
using OutputType = std::int32_t;
// Number of input/output dimensions
static constexpr IndexType InputDimensions = InDims;
static constexpr IndexType OutputDimensions = OutDims;
static constexpr IndexType PaddedInputDimensions =
ceil_to_multiple<IndexType>(InputDimensions, MaxSimdWidth);
static constexpr IndexType PaddedOutputDimensions =
ceil_to_multiple<IndexType>(OutputDimensions, MaxSimdWidth);
using OutputBuffer = OutputType[PaddedOutputDimensions];
static_assert(PaddedInputDimensions >= 128, "Something went wrong. This specialization should not have been chosen.");
#if defined (USE_AVX512)
static constexpr const IndexType InputSimdWidth = 64;
static constexpr const IndexType MaxNumOutputRegs = 16;
#elif defined (USE_AVX2)
static constexpr const IndexType InputSimdWidth = 32;
static constexpr const IndexType MaxNumOutputRegs = 8;
#elif defined (USE_SSSE3)
static constexpr const IndexType InputSimdWidth = 16;
static constexpr const IndexType MaxNumOutputRegs = 8;
#elif defined (USE_NEON)
static constexpr const IndexType InputSimdWidth = 8;
static constexpr const IndexType MaxNumOutputRegs = 8;
#else
// The fallback implementation will not have permuted weights.
// We define these to avoid a lot of ifdefs later.
static constexpr const IndexType InputSimdWidth = 1;
static constexpr const IndexType MaxNumOutputRegs = 1;
#endif
// A big block is a region in the weight matrix of the size [PaddedInputDimensions, NumOutputRegs].
// A small block is a region of size [InputSimdWidth, 1]
static constexpr const IndexType NumOutputRegs = std::min(MaxNumOutputRegs, OutputDimensions);
static constexpr const IndexType SmallBlockSize = InputSimdWidth;
static constexpr const IndexType BigBlockSize = NumOutputRegs * PaddedInputDimensions;
static constexpr const IndexType NumSmallBlocksInBigBlock = BigBlockSize / SmallBlockSize;
static constexpr const IndexType NumSmallBlocksPerOutput = PaddedInputDimensions / SmallBlockSize;
static constexpr const IndexType NumBigBlocks = OutputDimensions / NumOutputRegs;
static_assert(OutputDimensions % NumOutputRegs == 0);
// Hash value embedded in the evaluation file
static constexpr std::uint32_t get_hash_value() {
static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) {
std::uint32_t hashValue = 0xCC03DAE4u;
hashValue += OutputDimensions;
hashValue ^= PreviousLayer::get_hash_value() >> 1;
hashValue ^= PreviousLayer::get_hash_value() << 31;
hashValue ^= prevHash >> 1;
hashValue ^= prevHash << 31;
return hashValue;
}
/*
Transposes the small blocks within a block.
Effectively means that weights can be traversed sequentially during inference.
*/
static IndexType get_weight_index(IndexType i)
{
const IndexType smallBlock = (i / SmallBlockSize) % NumSmallBlocksInBigBlock;
const IndexType smallBlockCol = smallBlock / NumSmallBlocksPerOutput;
const IndexType smallBlockRow = smallBlock % NumSmallBlocksPerOutput;
const IndexType bigBlock = i / BigBlockSize;
const IndexType rest = i % SmallBlockSize;
const IndexType idx =
bigBlock * BigBlockSize
+ smallBlockRow * SmallBlockSize * NumOutputRegs
+ smallBlockCol * SmallBlockSize
+ rest;
return idx;
}
// Read network parameters
bool read_parameters(std::istream& stream) {
if (!previousLayer.read_parameters(stream)) return false;
for (std::size_t i = 0; i < OutputDimensions; ++i)
for (IndexType i = 0; i < OutputDimensions; ++i)
biases[i] = read_little_endian<BiasType>(stream);
for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
#if !defined (USE_SSSE3)
weights[i] = read_little_endian<WeightType>(stream);
#else
weights[
(i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 +
i / PaddedInputDimensions * 4 +
i % 4
] = read_little_endian<WeightType>(stream);
#endif
for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
weights[get_weight_index(i)] = read_little_endian<WeightType>(stream);
return !stream.fail();
}
// Write network parameters
bool write_parameters(std::ostream& stream) const {
if (!previousLayer.write_parameters(stream)) return false;
for (std::size_t i = 0; i < OutputDimensions; ++i)
for (IndexType i = 0; i < OutputDimensions; ++i)
write_little_endian<BiasType>(stream, biases[i]);
#if !defined (USE_SSSE3)
for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
write_little_endian<WeightType>(stream, weights[i]);
#else
std::unique_ptr<WeightType[]> unscrambledWeights = std::make_unique<WeightType[]>(OutputDimensions * PaddedInputDimensions);
for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) {
unscrambledWeights[i] =
weights[
(i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 +
i / PaddedInputDimensions * 4 +
i % 4
];
}
for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
write_little_endian<WeightType>(stream, unscrambledWeights[i]);
#endif
for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
write_little_endian<WeightType>(stream, weights[get_weight_index(i)]);
return !stream.fail();
}
// Forward propagation
const OutputType* propagate(
const TransformedFeatureType* transformedFeatures, char* buffer) const {
const auto input = previousLayer.propagate(
transformedFeatures, buffer + SelfBufferSize);
const InputType* input, OutputType* output) const {
#if defined (USE_AVX512)
[[maybe_unused]] const __m512i Ones512 = _mm512_set1_epi16(1);
[[maybe_unused]] auto m512_hadd = [](__m512i sum, int bias) -> int {
return _mm512_reduce_add_epi32(sum) + bias;
};
[[maybe_unused]] auto m512_add_dpbusd_epi32 = [=](__m512i& acc, __m512i a, __m512i b) {
#if defined (USE_VNNI)
acc = _mm512_dpbusd_epi32(acc, a, b);
#else
__m512i product0 = _mm512_maddubs_epi16(a, b);
product0 = _mm512_madd_epi16(product0, Ones512);
acc = _mm512_add_epi32(acc, product0);
#endif
};
[[maybe_unused]] auto m512_add_dpbusd_epi32x4 = [=](__m512i& acc, __m512i a0, __m512i b0, __m512i a1, __m512i b1,
__m512i a2, __m512i b2, __m512i a3, __m512i b3) {
#if defined (USE_VNNI)
acc = _mm512_dpbusd_epi32(acc, a0, b0);
acc = _mm512_dpbusd_epi32(acc, a1, b1);
acc = _mm512_dpbusd_epi32(acc, a2, b2);
acc = _mm512_dpbusd_epi32(acc, a3, b3);
#else
__m512i product0 = _mm512_maddubs_epi16(a0, b0);
__m512i product1 = _mm512_maddubs_epi16(a1, b1);
__m512i product2 = _mm512_maddubs_epi16(a2, b2);
__m512i product3 = _mm512_maddubs_epi16(a3, b3);
product0 = _mm512_adds_epi16(product0, product1);
product0 = _mm512_madd_epi16(product0, Ones512);
product2 = _mm512_adds_epi16(product2, product3);
product2 = _mm512_madd_epi16(product2, Ones512);
acc = _mm512_add_epi32(acc, _mm512_add_epi32(product0, product2));
#endif
};
#endif
#if defined (USE_AVX2)
[[maybe_unused]] const __m256i Ones256 = _mm256_set1_epi16(1);
[[maybe_unused]] auto m256_hadd = [](__m256i sum, int bias) -> int {
__m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1));
sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC));
sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_CDAB));
return _mm_cvtsi128_si32(sum128) + bias;
};
[[maybe_unused]] auto m256_add_dpbusd_epi32 = [=](__m256i& acc, __m256i a, __m256i b) {
#if defined (USE_VNNI)
acc = _mm256_dpbusd_epi32(acc, a, b);
#else
__m256i product0 = _mm256_maddubs_epi16(a, b);
product0 = _mm256_madd_epi16(product0, Ones256);
acc = _mm256_add_epi32(acc, product0);
#endif
};
[[maybe_unused]] auto m256_add_dpbusd_epi32x4 = [=](__m256i& acc, __m256i a0, __m256i b0, __m256i a1, __m256i b1,
__m256i a2, __m256i b2, __m256i a3, __m256i b3) {
#if defined (USE_VNNI)
acc = _mm256_dpbusd_epi32(acc, a0, b0);
acc = _mm256_dpbusd_epi32(acc, a1, b1);
acc = _mm256_dpbusd_epi32(acc, a2, b2);
acc = _mm256_dpbusd_epi32(acc, a3, b3);
#else
__m256i product0 = _mm256_maddubs_epi16(a0, b0);
__m256i product1 = _mm256_maddubs_epi16(a1, b1);
__m256i product2 = _mm256_maddubs_epi16(a2, b2);
__m256i product3 = _mm256_maddubs_epi16(a3, b3);
product0 = _mm256_adds_epi16(product0, product1);
product0 = _mm256_madd_epi16(product0, Ones256);
product2 = _mm256_adds_epi16(product2, product3);
product2 = _mm256_madd_epi16(product2, Ones256);
acc = _mm256_add_epi32(acc, _mm256_add_epi32(product0, product2));
#endif
};
#endif
#if defined (USE_SSSE3)
[[maybe_unused]] const __m128i Ones128 = _mm_set1_epi16(1);
[[maybe_unused]] auto m128_hadd = [](__m128i sum, int bias) -> int {
sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E)); //_MM_PERM_BADC
sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); //_MM_PERM_CDAB
return _mm_cvtsi128_si32(sum) + bias;
};
[[maybe_unused]] auto m128_add_dpbusd_epi32 = [=](__m128i& acc, __m128i a, __m128i b) {
__m128i product0 = _mm_maddubs_epi16(a, b);
product0 = _mm_madd_epi16(product0, Ones128);
acc = _mm_add_epi32(acc, product0);
};
[[maybe_unused]] auto m128_add_dpbusd_epi32x4 = [=](__m128i& acc, __m128i a0, __m128i b0, __m128i a1, __m128i b1,
__m128i a2, __m128i b2, __m128i a3, __m128i b3) {
__m128i product0 = _mm_maddubs_epi16(a0, b0);
__m128i product1 = _mm_maddubs_epi16(a1, b1);
__m128i product2 = _mm_maddubs_epi16(a2, b2);
__m128i product3 = _mm_maddubs_epi16(a3, b3);
product0 = _mm_adds_epi16(product0, product1);
product0 = _mm_madd_epi16(product0, Ones128);
product2 = _mm_adds_epi16(product2, product3);
product2 = _mm_madd_epi16(product2, Ones128);
acc = _mm_add_epi32(acc, _mm_add_epi32(product0, product2));
};
#endif
#if defined (USE_AVX512)
using vec_t = __m512i;
#define vec_setzero _mm512_setzero_si512
#define vec_set_32 _mm512_set1_epi32
auto& vec_add_dpbusd_32 = m512_add_dpbusd_epi32;
auto& vec_add_dpbusd_32x4 = m512_add_dpbusd_epi32x4;
auto& vec_hadd = m512_hadd;
using acc_vec_t = __m512i;
using bias_vec_t = __m128i;
using weight_vec_t = __m512i;
using in_vec_t = __m512i;
#define vec_zero _mm512_setzero_si512()
#define vec_add_dpbusd_32x2 Simd::m512_add_dpbusd_epi32x2
#define vec_hadd Simd::m512_hadd
#define vec_haddx4 Simd::m512_haddx4
#elif defined (USE_AVX2)
using vec_t = __m256i;
#define vec_setzero _mm256_setzero_si256
#define vec_set_32 _mm256_set1_epi32
auto& vec_add_dpbusd_32 = m256_add_dpbusd_epi32;
auto& vec_add_dpbusd_32x4 = m256_add_dpbusd_epi32x4;
auto& vec_hadd = m256_hadd;
using acc_vec_t = __m256i;
using bias_vec_t = __m128i;
using weight_vec_t = __m256i;
using in_vec_t = __m256i;
#define vec_zero _mm256_setzero_si256()
#define vec_add_dpbusd_32x2 Simd::m256_add_dpbusd_epi32x2
#define vec_hadd Simd::m256_hadd
#define vec_haddx4 Simd::m256_haddx4
#elif defined (USE_SSSE3)
using vec_t = __m128i;
#define vec_setzero _mm_setzero_si128
#define vec_set_32 _mm_set1_epi32
auto& vec_add_dpbusd_32 = m128_add_dpbusd_epi32;
auto& vec_add_dpbusd_32x4 = m128_add_dpbusd_epi32x4;
auto& vec_hadd = m128_hadd;
using acc_vec_t = __m128i;
using bias_vec_t = __m128i;
using weight_vec_t = __m128i;
using in_vec_t = __m128i;
#define vec_zero _mm_setzero_si128()
#define vec_add_dpbusd_32x2 Simd::m128_add_dpbusd_epi32x2
#define vec_hadd Simd::m128_hadd
#define vec_haddx4 Simd::m128_haddx4
#elif defined (USE_NEON)
using acc_vec_t = int32x4_t;
using bias_vec_t = int32x4_t;
using weight_vec_t = int8x8_t;
using in_vec_t = int8x8_t;
#define vec_zero {0}
#define vec_add_dpbusd_32x2 Simd::neon_m128_add_dpbusd_epi32x2
#define vec_hadd Simd::neon_m128_hadd
#define vec_haddx4 Simd::neon_m128_haddx4
#endif
#if defined (USE_SSSE3)
// Different layout, we process 4 inputs at a time, always.
static_assert(InputDimensions % 4 == 0);
#if defined (USE_SSSE3) || defined (USE_NEON)
const in_vec_t* invec = reinterpret_cast<const in_vec_t*>(input);
const auto output = reinterpret_cast<OutputType*>(buffer);
const auto inputVector = reinterpret_cast<const vec_t*>(input);
static_assert(OutputDimensions % OutputSimdWidth == 0 || OutputDimensions == 1);
// OutputDimensions is either 1 or a multiple of SimdWidth
// because then it is also an input dimension.
if constexpr (OutputDimensions % OutputSimdWidth == 0)
// Perform accumulation to registers for each big block
for (IndexType bigBlock = 0; bigBlock < NumBigBlocks; ++bigBlock)
{
constexpr IndexType NumChunks = InputDimensions / 4;
acc_vec_t acc[NumOutputRegs] = { vec_zero };
const auto input32 = reinterpret_cast<const std::int32_t*>(input);
vec_t* outptr = reinterpret_cast<vec_t*>(output);
std::memcpy(output, biases, OutputDimensions * sizeof(OutputType));
// Each big block has NumOutputRegs small blocks in each "row", one per register.
// We process two small blocks at a time to save on one addition without VNNI.
for (IndexType smallBlock = 0; smallBlock < NumSmallBlocksPerOutput; smallBlock += 2)
{
const weight_vec_t* weightvec =
reinterpret_cast<const weight_vec_t*>(
weights
+ bigBlock * BigBlockSize
+ smallBlock * SmallBlockSize * NumOutputRegs);
for (int i = 0; i < (int)NumChunks - 3; i += 4)
const in_vec_t in0 = invec[smallBlock + 0];
const in_vec_t in1 = invec[smallBlock + 1];
for (IndexType k = 0; k < NumOutputRegs; ++k)
vec_add_dpbusd_32x2(acc[k], in0, weightvec[k], in1, weightvec[k + NumOutputRegs]);
}
// Horizontally add all accumulators.
if constexpr (NumOutputRegs % 4 == 0)
{
bias_vec_t* outputvec = reinterpret_cast<bias_vec_t*>(output);
const bias_vec_t* biasvec = reinterpret_cast<const bias_vec_t*>(biases);
for (IndexType k = 0; k < NumOutputRegs; k += 4)
{
const vec_t in0 = vec_set_32(input32[i + 0]);
const vec_t in1 = vec_set_32(input32[i + 1]);
const vec_t in2 = vec_set_32(input32[i + 2]);
const vec_t in3 = vec_set_32(input32[i + 3]);
const auto col0 = reinterpret_cast<const vec_t*>(&weights[(i + 0) * OutputDimensions * 4]);
const auto col1 = reinterpret_cast<const vec_t*>(&weights[(i + 1) * OutputDimensions * 4]);
const auto col2 = reinterpret_cast<const vec_t*>(&weights[(i + 2) * OutputDimensions * 4]);
const auto col3 = reinterpret_cast<const vec_t*>(&weights[(i + 3) * OutputDimensions * 4]);
for (int j = 0; j * OutputSimdWidth < OutputDimensions; ++j)
vec_add_dpbusd_32x4(outptr[j], in0, col0[j], in1, col1[j], in2, col2[j], in3, col3[j]);
const IndexType idx = (bigBlock * NumOutputRegs + k) / 4;
outputvec[idx] = vec_haddx4(acc[k+0], acc[k+1], acc[k+2], acc[k+3], biasvec[idx]);
}
}
else if constexpr (OutputDimensions == 1)
{
#if defined (USE_AVX512)
if constexpr (PaddedInputDimensions % (SimdWidth * 2) != 0)
}
else
{
for (IndexType k = 0; k < NumOutputRegs; ++k)
{
constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth;
const auto inputVector256 = reinterpret_cast<const __m256i*>(input);
__m256i sum0 = _mm256_setzero_si256();
const auto row0 = reinterpret_cast<const __m256i*>(&weights[0]);
for (int j = 0; j < (int)NumChunks; ++j)
{
const __m256i in = inputVector256[j];
m256_add_dpbusd_epi32(sum0, in, row0[j]);
}
output[0] = m256_hadd(sum0, biases[0]);
}
else
#endif
{
#if defined (USE_AVX512)
constexpr IndexType NumChunks = PaddedInputDimensions / (SimdWidth * 2);
#else
constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth;
#endif
vec_t sum0 = vec_setzero();
const auto row0 = reinterpret_cast<const vec_t*>(&weights[0]);
for (int j = 0; j < (int)NumChunks; ++j)
{
const vec_t in = inputVector[j];
vec_add_dpbusd_32(sum0, in, row0[j]);
}
output[0] = vec_hadd(sum0, biases[0]);
const IndexType idx = (bigBlock * NumOutputRegs + k);
output[idx] = vec_hadd(acc[k], biases[idx]);
}
}
}
# undef vec_zero
# undef vec_add_dpbusd_32x2
# undef vec_hadd
# undef vec_haddx4
#else
// Use old implementation for the other architectures.
auto output = reinterpret_cast<OutputType*>(buffer);
#if defined(USE_SSE2)
// At least a multiple of 16, with SSE2.
static_assert(InputDimensions % SimdWidth == 0);
constexpr IndexType NumChunks = InputDimensions / SimdWidth;
const __m128i Zeros = _mm_setzero_si128();
const auto inputVector = reinterpret_cast<const __m128i*>(input);
#elif defined(USE_MMX)
static_assert(InputDimensions % SimdWidth == 0);
constexpr IndexType NumChunks = InputDimensions / SimdWidth;
const __m64 Zeros = _mm_setzero_si64();
const auto inputVector = reinterpret_cast<const __m64*>(input);
#elif defined(USE_NEON)
static_assert(InputDimensions % SimdWidth == 0);
constexpr IndexType NumChunks = InputDimensions / SimdWidth;
const auto inputVector = reinterpret_cast<const int8x8_t*>(input);
#endif
for (IndexType i = 0; i < OutputDimensions; ++i) {
const IndexType offset = i * PaddedInputDimensions;
#if defined(USE_SSE2)
__m128i sumLo = _mm_cvtsi32_si128(biases[i]);
__m128i sumHi = Zeros;
const auto row = reinterpret_cast<const __m128i*>(&weights[offset]);
for (IndexType j = 0; j < NumChunks; ++j) {
__m128i row_j = _mm_load_si128(&row[j]);
__m128i input_j = _mm_load_si128(&inputVector[j]);
__m128i extendedRowLo = _mm_srai_epi16(_mm_unpacklo_epi8(row_j, row_j), 8);
__m128i extendedRowHi = _mm_srai_epi16(_mm_unpackhi_epi8(row_j, row_j), 8);
__m128i extendedInputLo = _mm_unpacklo_epi8(input_j, Zeros);
__m128i extendedInputHi = _mm_unpackhi_epi8(input_j, Zeros);
__m128i productLo = _mm_madd_epi16(extendedRowLo, extendedInputLo);
__m128i productHi = _mm_madd_epi16(extendedRowHi, extendedInputHi);
sumLo = _mm_add_epi32(sumLo, productLo);
sumHi = _mm_add_epi32(sumHi, productHi);
}
__m128i sum = _mm_add_epi32(sumLo, sumHi);
__m128i sumHigh_64 = _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2));
sum = _mm_add_epi32(sum, sumHigh_64);
__m128i sum_second_32 = _mm_shufflelo_epi16(sum, _MM_SHUFFLE(1, 0, 3, 2));
sum = _mm_add_epi32(sum, sum_second_32);
output[i] = _mm_cvtsi128_si32(sum);
#elif defined(USE_MMX)
__m64 sumLo = _mm_cvtsi32_si64(biases[i]);
__m64 sumHi = Zeros;
const auto row = reinterpret_cast<const __m64*>(&weights[offset]);
for (IndexType j = 0; j < NumChunks; ++j) {
__m64 row_j = row[j];
__m64 input_j = inputVector[j];
__m64 extendedRowLo = _mm_srai_pi16(_mm_unpacklo_pi8(row_j, row_j), 8);
__m64 extendedRowHi = _mm_srai_pi16(_mm_unpackhi_pi8(row_j, row_j), 8);
__m64 extendedInputLo = _mm_unpacklo_pi8(input_j, Zeros);
__m64 extendedInputHi = _mm_unpackhi_pi8(input_j, Zeros);
__m64 productLo = _mm_madd_pi16(extendedRowLo, extendedInputLo);
__m64 productHi = _mm_madd_pi16(extendedRowHi, extendedInputHi);
sumLo = _mm_add_pi32(sumLo, productLo);
sumHi = _mm_add_pi32(sumHi, productHi);
}
__m64 sum = _mm_add_pi32(sumLo, sumHi);
sum = _mm_add_pi32(sum, _mm_unpackhi_pi32(sum, sum));
output[i] = _mm_cvtsi64_si32(sum);
#elif defined(USE_NEON)
int32x4_t sum = {biases[i]};
const auto row = reinterpret_cast<const int8x8_t*>(&weights[offset]);
for (IndexType j = 0; j < NumChunks; ++j) {
int16x8_t product = vmull_s8(inputVector[j * 2], row[j * 2]);
product = vmlal_s8(product, inputVector[j * 2 + 1], row[j * 2 + 1]);
sum = vpadalq_s16(sum, product);
}
output[i] = sum[0] + sum[1] + sum[2] + sum[3];
#else
OutputType sum = biases[i];
for (IndexType j = 0; j < InputDimensions; ++j) {
sum += weights[offset + j] * input[j];
}
output[i] = sum;
#endif
}
#if defined(USE_MMX)
_mm_empty();
#endif
// Use old implementation for the other architectures.
affine_transform_non_ssse3<
InputDimensions,
PaddedInputDimensions,
OutputDimensions>(output, weights, biases, input);
#endif
@ -424,7 +364,171 @@ namespace Stockfish::Eval::NNUE::Layers {
using BiasType = OutputType;
using WeightType = std::int8_t;
PreviousLayer previousLayer;
alignas(CacheLineSize) BiasType biases[OutputDimensions];
alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions];
};
template <IndexType InDims, IndexType OutDims>
class AffineTransform<InDims, OutDims, std::enable_if_t<(ceil_to_multiple<IndexType>(InDims, MaxSimdWidth) < 2*64)>> {
public:
// Input/output type
// Input/output type
using InputType = std::uint8_t;
using OutputType = std::int32_t;
// Number of input/output dimensions
static constexpr IndexType InputDimensions = InDims;
static constexpr IndexType OutputDimensions = OutDims;
static constexpr IndexType PaddedInputDimensions =
ceil_to_multiple<IndexType>(InputDimensions, MaxSimdWidth);
static constexpr IndexType PaddedOutputDimensions =
ceil_to_multiple<IndexType>(OutputDimensions, MaxSimdWidth);
using OutputBuffer = OutputType[PaddedOutputDimensions];
static_assert(PaddedInputDimensions < 128, "Something went wrong. This specialization should not have been chosen.");
#if defined (USE_SSSE3)
static constexpr const IndexType OutputSimdWidth = SimdWidth / 4;
static constexpr const IndexType InputSimdWidth = SimdWidth;
#endif
// Hash value embedded in the evaluation file
static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) {
std::uint32_t hashValue = 0xCC03DAE4u;
hashValue += OutputDimensions;
hashValue ^= prevHash >> 1;
hashValue ^= prevHash << 31;
return hashValue;
}
static IndexType get_weight_index_scrambled(IndexType i)
{
return
(i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 +
i / PaddedInputDimensions * 4 +
i % 4;
}
static IndexType get_weight_index(IndexType i)
{
#if defined (USE_SSSE3)
return get_weight_index_scrambled(i);
#else
return i;
#endif
}
// Read network parameters
bool read_parameters(std::istream& stream) {
for (IndexType i = 0; i < OutputDimensions; ++i)
biases[i] = read_little_endian<BiasType>(stream);
for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
weights[get_weight_index(i)] = read_little_endian<WeightType>(stream);
return !stream.fail();
}
// Write network parameters
bool write_parameters(std::ostream& stream) const {
for (IndexType i = 0; i < OutputDimensions; ++i)
write_little_endian<BiasType>(stream, biases[i]);
for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
write_little_endian<WeightType>(stream, weights[get_weight_index(i)]);
return !stream.fail();
}
// Forward propagation
const OutputType* propagate(
const InputType* input, OutputType* output) const {
#if defined (USE_AVX2)
using vec_t = __m256i;
#define vec_setzero _mm256_setzero_si256
#define vec_set_32 _mm256_set1_epi32
#define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32
#define vec_add_dpbusd_32x2 Simd::m256_add_dpbusd_epi32x2
#define vec_add_dpbusd_32x4 Simd::m256_add_dpbusd_epi32x4
#define vec_hadd Simd::m256_hadd
#define vec_haddx4 Simd::m256_haddx4
#elif defined (USE_SSSE3)
using vec_t = __m128i;
#define vec_setzero _mm_setzero_si128
#define vec_set_32 _mm_set1_epi32
#define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32
#define vec_add_dpbusd_32x2 Simd::m128_add_dpbusd_epi32x2
#define vec_add_dpbusd_32x4 Simd::m128_add_dpbusd_epi32x4
#define vec_hadd Simd::m128_hadd
#define vec_haddx4 Simd::m128_haddx4
#endif
#if defined (USE_SSSE3)
const auto inputVector = reinterpret_cast<const vec_t*>(input);
static_assert(OutputDimensions % OutputSimdWidth == 0 || OutputDimensions == 1);
if constexpr (OutputDimensions % OutputSimdWidth == 0)
{
constexpr IndexType NumChunks = ceil_to_multiple<IndexType>(InputDimensions, 8) / 4;
constexpr IndexType NumRegs = OutputDimensions / OutputSimdWidth;
const auto input32 = reinterpret_cast<const std::int32_t*>(input);
const vec_t* biasvec = reinterpret_cast<const vec_t*>(biases);
vec_t acc[NumRegs];
for (IndexType k = 0; k < NumRegs; ++k)
acc[k] = biasvec[k];
for (IndexType i = 0; i < NumChunks; i += 2)
{
const vec_t in0 = vec_set_32(input32[i + 0]);
const vec_t in1 = vec_set_32(input32[i + 1]);
const auto col0 = reinterpret_cast<const vec_t*>(&weights[(i + 0) * OutputDimensions * 4]);
const auto col1 = reinterpret_cast<const vec_t*>(&weights[(i + 1) * OutputDimensions * 4]);
for (IndexType k = 0; k < NumRegs; ++k)
vec_add_dpbusd_32x2(acc[k], in0, col0[k], in1, col1[k]);
}
vec_t* outptr = reinterpret_cast<vec_t*>(output);
for (IndexType k = 0; k < NumRegs; ++k)
outptr[k] = acc[k];
}
else if constexpr (OutputDimensions == 1)
{
constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth;
vec_t sum0 = vec_setzero();
const auto row0 = reinterpret_cast<const vec_t*>(&weights[0]);
for (int j = 0; j < (int)NumChunks; ++j)
{
const vec_t in = inputVector[j];
vec_add_dpbusd_32(sum0, in, row0[j]);
}
output[0] = vec_hadd(sum0, biases[0]);
}
# undef vec_setzero
# undef vec_set_32
# undef vec_add_dpbusd_32
# undef vec_add_dpbusd_32x2
# undef vec_add_dpbusd_32x4
# undef vec_hadd
# undef vec_haddx4
#else
// Use old implementation for the other architectures.
affine_transform_non_ssse3<
InputDimensions,
PaddedInputDimensions,
OutputDimensions>(output, weights, biases, input);
#endif
return output;
}
private:
using BiasType = OutputType;
using WeightType = std::int8_t;
alignas(CacheLineSize) BiasType biases[OutputDimensions];
alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions];

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -26,50 +26,41 @@
namespace Stockfish::Eval::NNUE::Layers {
// Clipped ReLU
template <typename PreviousLayer>
template <IndexType InDims>
class ClippedReLU {
public:
// Input/output type
using InputType = typename PreviousLayer::OutputType;
using InputType = std::int32_t;
using OutputType = std::uint8_t;
static_assert(std::is_same<InputType, std::int32_t>::value, "");
// Number of input/output dimensions
static constexpr IndexType InputDimensions =
PreviousLayer::OutputDimensions;
static constexpr IndexType InputDimensions = InDims;
static constexpr IndexType OutputDimensions = InputDimensions;
static constexpr IndexType PaddedOutputDimensions =
ceil_to_multiple<IndexType>(OutputDimensions, 32);
// Size of forward propagation buffer used in this layer
static constexpr std::size_t SelfBufferSize =
ceil_to_multiple(OutputDimensions * sizeof(OutputType), CacheLineSize);
// Size of the forward propagation buffer used from the input layer to this layer
static constexpr std::size_t BufferSize =
PreviousLayer::BufferSize + SelfBufferSize;
using OutputBuffer = OutputType[PaddedOutputDimensions];
// Hash value embedded in the evaluation file
static constexpr std::uint32_t get_hash_value() {
static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) {
std::uint32_t hashValue = 0x538D24C7u;
hashValue += PreviousLayer::get_hash_value();
hashValue += prevHash;
return hashValue;
}
// Read network parameters
bool read_parameters(std::istream& stream) {
return previousLayer.read_parameters(stream);
bool read_parameters(std::istream&) {
return true;
}
// Write network parameters
bool write_parameters(std::ostream& stream) const {
return previousLayer.write_parameters(stream);
bool write_parameters(std::ostream&) const {
return true;
}
// Forward propagation
const OutputType* propagate(
const TransformedFeatureType* transformedFeatures, char* buffer) const {
const auto input = previousLayer.propagate(
transformedFeatures, buffer + SelfBufferSize);
const auto output = reinterpret_cast<OutputType*>(buffer);
const InputType* input, OutputType* output) const {
#if defined(USE_AVX2)
if constexpr (InputDimensions % SimdWidth == 0) {
@ -179,11 +170,9 @@ namespace Stockfish::Eval::NNUE::Layers {
output[i] = static_cast<OutputType>(
std::max(0, std::min(127, input[i] >> WeightScaleBits)));
}
return output;
}
private:
PreviousLayer previousLayer;
};
} // namespace Stockfish::Eval::NNUE::Layers

View File

@ -1,73 +0,0 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// NNUE evaluation function layer InputSlice definition
#ifndef NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
#define NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
#include "../nnue_common.h"
namespace Stockfish::Eval::NNUE::Layers {
// Input layer
template <IndexType OutDims, IndexType Offset = 0>
class InputSlice {
public:
// Need to maintain alignment
static_assert(Offset % MaxSimdWidth == 0, "");
// Output type
using OutputType = TransformedFeatureType;
// Output dimensionality
static constexpr IndexType OutputDimensions = OutDims;
// Size of forward propagation buffer used from the input layer to this layer
static constexpr std::size_t BufferSize = 0;
// Hash value embedded in the evaluation file
static constexpr std::uint32_t get_hash_value() {
std::uint32_t hashValue = 0xEC42E90Du;
hashValue ^= OutputDimensions ^ (Offset << 10);
return hashValue;
}
// Read network parameters
bool read_parameters(std::istream& /*stream*/) {
return true;
}
// Write network parameters
bool write_parameters(std::ostream& /*stream*/) const {
return true;
}
// Forward propagation
const OutputType* propagate(
const TransformedFeatureType* transformedFeatures,
char* /*buffer*/) const {
return transformedFeatures + Offset;
}
private:
};
} // namespace Stockfish::Eval::NNUE::Layers
#endif // #ifndef NNUE_LAYERS_INPUT_SLICE_H_INCLUDED

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -21,39 +21,112 @@
#ifndef NNUE_ARCHITECTURE_H_INCLUDED
#define NNUE_ARCHITECTURE_H_INCLUDED
#include <memory>
#include "nnue_common.h"
#include "features/half_ka_v2.h"
#include "features/half_ka_v2_hm.h"
#include "layers/input_slice.h"
#include "layers/affine_transform.h"
#include "layers/clipped_relu.h"
#include "../misc.h"
namespace Stockfish::Eval::NNUE {
// Input features used in evaluation function
using FeatureSet = Features::HalfKAv2;
// Input features used in evaluation function
using FeatureSet = Features::HalfKAv2_hm;
// Number of input feature dimensions after conversion
constexpr IndexType TransformedFeatureDimensions = 512;
constexpr IndexType PSQTBuckets = 8;
constexpr IndexType LayerStacks = 8;
// Number of input feature dimensions after conversion
constexpr IndexType TransformedFeatureDimensions = 1024;
constexpr IndexType PSQTBuckets = 8;
constexpr IndexType LayerStacks = 8;
namespace Layers {
struct Network
{
static constexpr int FC_0_OUTPUTS = 15;
static constexpr int FC_1_OUTPUTS = 32;
// Define network structure
using InputLayer = InputSlice<TransformedFeatureDimensions * 2>;
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 16>>;
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
using OutputLayer = AffineTransform<HiddenLayer2, 1>;
Layers::AffineTransform<TransformedFeatureDimensions, FC_0_OUTPUTS + 1> fc_0;
Layers::ClippedReLU<FC_0_OUTPUTS + 1> ac_0;
Layers::AffineTransform<FC_0_OUTPUTS, FC_1_OUTPUTS> fc_1;
Layers::ClippedReLU<FC_1_OUTPUTS> ac_1;
Layers::AffineTransform<FC_1_OUTPUTS, 1> fc_2;
} // namespace Layers
// Hash value embedded in the evaluation file
static constexpr std::uint32_t get_hash_value() {
// input slice hash
std::uint32_t hashValue = 0xEC42E90Du;
hashValue ^= TransformedFeatureDimensions * 2;
using Network = Layers::OutputLayer;
hashValue = decltype(fc_0)::get_hash_value(hashValue);
hashValue = decltype(ac_0)::get_hash_value(hashValue);
hashValue = decltype(fc_1)::get_hash_value(hashValue);
hashValue = decltype(ac_1)::get_hash_value(hashValue);
hashValue = decltype(fc_2)::get_hash_value(hashValue);
static_assert(TransformedFeatureDimensions % MaxSimdWidth == 0, "");
static_assert(Network::OutputDimensions == 1, "");
static_assert(std::is_same<Network::OutputType, std::int32_t>::value, "");
return hashValue;
}
// Read network parameters
bool read_parameters(std::istream& stream) {
if (!fc_0.read_parameters(stream)) return false;
if (!ac_0.read_parameters(stream)) return false;
if (!fc_1.read_parameters(stream)) return false;
if (!ac_1.read_parameters(stream)) return false;
if (!fc_2.read_parameters(stream)) return false;
return true;
}
// Read network parameters
bool write_parameters(std::ostream& stream) const {
if (!fc_0.write_parameters(stream)) return false;
if (!ac_0.write_parameters(stream)) return false;
if (!fc_1.write_parameters(stream)) return false;
if (!ac_1.write_parameters(stream)) return false;
if (!fc_2.write_parameters(stream)) return false;
return true;
}
std::int32_t propagate(const TransformedFeatureType* transformedFeatures)
{
struct alignas(CacheLineSize) Buffer
{
alignas(CacheLineSize) decltype(fc_0)::OutputBuffer fc_0_out;
alignas(CacheLineSize) decltype(ac_0)::OutputBuffer ac_0_out;
alignas(CacheLineSize) decltype(fc_1)::OutputBuffer fc_1_out;
alignas(CacheLineSize) decltype(ac_1)::OutputBuffer ac_1_out;
alignas(CacheLineSize) decltype(fc_2)::OutputBuffer fc_2_out;
Buffer()
{
std::memset(this, 0, sizeof(*this));
}
};
#if defined(__clang__) && (__APPLE__)
// workaround for a bug reported with xcode 12
static thread_local auto tlsBuffer = std::make_unique<Buffer>();
// Access TLS only once, cache result.
Buffer& buffer = *tlsBuffer;
#else
alignas(CacheLineSize) static thread_local Buffer buffer;
#endif
fc_0.propagate(transformedFeatures, buffer.fc_0_out);
ac_0.propagate(buffer.fc_0_out, buffer.ac_0_out);
fc_1.propagate(buffer.ac_0_out, buffer.fc_1_out);
ac_1.propagate(buffer.fc_1_out, buffer.ac_1_out);
fc_2.propagate(buffer.ac_1_out, buffer.fc_2_out);
// buffer.fc_0_out[FC_0_OUTPUTS] is such that 1.0 is equal to 127*(1<<WeightScaleBits) in quantized form
// but we want 1.0 to be equal to 600*OutputScale
std::int32_t fwdOut = int(buffer.fc_0_out[FC_0_OUTPUTS]) * (600*OutputScale) / (127*(1<<WeightScaleBits));
std::int32_t outputValue = buffer.fc_2_out[0] + fwdOut;
return outputValue;
}
};
} // namespace Stockfish::Eval::NNUE

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -109,7 +109,7 @@ namespace Stockfish::Eval::NNUE {
// write_little_endian() is our utility to write an integer (signed or unsigned, any size)
// to a stream in little-endian order. We swap the byte order before the write if
// necessary to always write in little endian order, independantly of the byte
// necessary to always write in little endian order, independently of the byte
// ordering of the compiling machine.
template <typename IntType>
inline void write_little_endian(std::ostream& stream, IntType value) {
@ -127,11 +127,11 @@ namespace Stockfish::Eval::NNUE {
{
for (; i + 1 < sizeof(IntType); ++i)
{
u[i] = v;
u[i] = (std::uint8_t)v;
v >>= 8;
}
}
u[i] = v;
u[i] = (std::uint8_t)v;
stream.write(reinterpret_cast<char*>(u), sizeof(IntType));
}

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -47,12 +47,22 @@ namespace Stockfish::Eval::NNUE {
#define vec_store(a,b) _mm512_store_si512(a,b)
#define vec_add_16(a,b) _mm512_add_epi16(a,b)
#define vec_sub_16(a,b) _mm512_sub_epi16(a,b)
#define vec_mul_16(a,b) _mm512_mullo_epi16(a,b)
#define vec_zero() _mm512_setzero_epi32()
#define vec_set_16(a) _mm512_set1_epi16(a)
#define vec_max_16(a,b) _mm512_max_epi16(a,b)
#define vec_min_16(a,b) _mm512_min_epi16(a,b)
inline vec_t vec_msb_pack_16(vec_t a, vec_t b){
vec_t compacted = _mm512_packs_epi16(_mm512_srli_epi16(a,7),_mm512_srli_epi16(b,7));
return _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 2, 4, 6, 1, 3, 5, 7), compacted);
}
#define vec_load_psqt(a) _mm256_load_si256(a)
#define vec_store_psqt(a,b) _mm256_store_si256(a,b)
#define vec_add_psqt_32(a,b) _mm256_add_epi32(a,b)
#define vec_sub_psqt_32(a,b) _mm256_sub_epi32(a,b)
#define vec_zero_psqt() _mm256_setzero_si256()
#define NumRegistersSIMD 32
#define MaxChunkSize 64
#elif USE_AVX2
typedef __m256i vec_t;
@ -61,12 +71,22 @@ namespace Stockfish::Eval::NNUE {
#define vec_store(a,b) _mm256_store_si256(a,b)
#define vec_add_16(a,b) _mm256_add_epi16(a,b)
#define vec_sub_16(a,b) _mm256_sub_epi16(a,b)
#define vec_mul_16(a,b) _mm256_mullo_epi16(a,b)
#define vec_zero() _mm256_setzero_si256()
#define vec_set_16(a) _mm256_set1_epi16(a)
#define vec_max_16(a,b) _mm256_max_epi16(a,b)
#define vec_min_16(a,b) _mm256_min_epi16(a,b)
inline vec_t vec_msb_pack_16(vec_t a, vec_t b){
vec_t compacted = _mm256_packs_epi16(_mm256_srli_epi16(a,7), _mm256_srli_epi16(b,7));
return _mm256_permute4x64_epi64(compacted, 0b11011000);
}
#define vec_load_psqt(a) _mm256_load_si256(a)
#define vec_store_psqt(a,b) _mm256_store_si256(a,b)
#define vec_add_psqt_32(a,b) _mm256_add_epi32(a,b)
#define vec_sub_psqt_32(a,b) _mm256_sub_epi32(a,b)
#define vec_zero_psqt() _mm256_setzero_si256()
#define NumRegistersSIMD 16
#define MaxChunkSize 32
#elif USE_SSE2
typedef __m128i vec_t;
@ -75,12 +95,19 @@ namespace Stockfish::Eval::NNUE {
#define vec_store(a,b) *(a)=(b)
#define vec_add_16(a,b) _mm_add_epi16(a,b)
#define vec_sub_16(a,b) _mm_sub_epi16(a,b)
#define vec_mul_16(a,b) _mm_mullo_epi16(a,b)
#define vec_zero() _mm_setzero_si128()
#define vec_set_16(a) _mm_set1_epi16(a)
#define vec_max_16(a,b) _mm_max_epi16(a,b)
#define vec_min_16(a,b) _mm_min_epi16(a,b)
#define vec_msb_pack_16(a,b) _mm_packs_epi16(_mm_srli_epi16(a,7),_mm_srli_epi16(b,7))
#define vec_load_psqt(a) (*(a))
#define vec_store_psqt(a,b) *(a)=(b)
#define vec_add_psqt_32(a,b) _mm_add_epi32(a,b)
#define vec_sub_psqt_32(a,b) _mm_sub_epi32(a,b)
#define vec_zero_psqt() _mm_setzero_si128()
#define NumRegistersSIMD (Is64Bit ? 16 : 8)
#define MaxChunkSize 16
#elif USE_MMX
typedef __m64 vec_t;
@ -89,12 +116,26 @@ namespace Stockfish::Eval::NNUE {
#define vec_store(a,b) *(a)=(b)
#define vec_add_16(a,b) _mm_add_pi16(a,b)
#define vec_sub_16(a,b) _mm_sub_pi16(a,b)
#define vec_mul_16(a,b) _mm_mullo_pi16(a,b)
#define vec_zero() _mm_setzero_si64()
#define vec_set_16(a) _mm_set1_pi16(a)
inline vec_t vec_max_16(vec_t a,vec_t b){
vec_t comparison = _mm_cmpgt_pi16(a,b);
return _mm_or_si64(_mm_and_si64(comparison, a), _mm_andnot_si64(comparison, b));
}
inline vec_t vec_min_16(vec_t a,vec_t b){
vec_t comparison = _mm_cmpgt_pi16(a,b);
return _mm_or_si64(_mm_and_si64(comparison, b), _mm_andnot_si64(comparison, a));
}
#define vec_msb_pack_16(a,b) _mm_packs_pi16(_mm_srli_pi16(a,7),_mm_srli_pi16(b,7))
#define vec_load_psqt(a) (*(a))
#define vec_store_psqt(a,b) *(a)=(b)
#define vec_add_psqt_32(a,b) _mm_add_pi32(a,b)
#define vec_sub_psqt_32(a,b) _mm_sub_pi32(a,b)
#define vec_zero_psqt() _mm_setzero_si64()
#define vec_cleanup() _mm_empty()
#define NumRegistersSIMD 8
#define MaxChunkSize 8
#elif USE_NEON
typedef int16x8_t vec_t;
@ -103,12 +144,24 @@ namespace Stockfish::Eval::NNUE {
#define vec_store(a,b) *(a)=(b)
#define vec_add_16(a,b) vaddq_s16(a,b)
#define vec_sub_16(a,b) vsubq_s16(a,b)
#define vec_mul_16(a,b) vmulq_s16(a,b)
#define vec_zero() vec_t{0}
#define vec_set_16(a) vdupq_n_s16(a)
#define vec_max_16(a,b) vmaxq_s16(a,b)
#define vec_min_16(a,b) vminq_s16(a,b)
inline vec_t vec_msb_pack_16(vec_t a, vec_t b){
const int8x8_t shifta = vshrn_n_s16(a, 7);
const int8x8_t shiftb = vshrn_n_s16(b, 7);
const int8x16_t compacted = vcombine_s8(shifta,shiftb);
return *reinterpret_cast<const vec_t*> (&compacted);
}
#define vec_load_psqt(a) (*(a))
#define vec_store_psqt(a,b) *(a)=(b)
#define vec_add_psqt_32(a,b) vaddq_s32(a,b)
#define vec_sub_psqt_32(a,b) vsubq_s32(a,b)
#define vec_zero_psqt() psqt_vec_t{0}
#define NumRegistersSIMD 16
#define MaxChunkSize 16
#else
#undef VECTOR
@ -123,8 +176,10 @@ namespace Stockfish::Eval::NNUE {
// We use __m* types as template arguments, which causes GCC to emit warnings
// about losing some attribute information. This is irrelevant to us as we
// only take their size, so the following pragma are harmless.
#if defined(__GNUC__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wignored-attributes"
#endif
template <typename SIMDRegisterType,
typename LaneType,
@ -156,9 +211,9 @@ namespace Stockfish::Eval::NNUE {
static constexpr int NumRegs = BestRegisterCount<vec_t, WeightType, TransformedFeatureDimensions, NumRegistersSIMD>();
static constexpr int NumPsqtRegs = BestRegisterCount<psqt_vec_t, PSQTWeightType, PSQTBuckets, NumRegistersSIMD>();
#if defined(__GNUC__)
#pragma GCC diagnostic pop
#endif
#endif
@ -183,7 +238,7 @@ namespace Stockfish::Eval::NNUE {
// Number of input/output dimensions
static constexpr IndexType InputDimensions = FeatureSet::Dimensions;
static constexpr IndexType OutputDimensions = HalfDimensions * 2;
static constexpr IndexType OutputDimensions = HalfDimensions;
// Size of forward propagation buffer
static constexpr std::size_t BufferSize =
@ -191,7 +246,7 @@ namespace Stockfish::Eval::NNUE {
// Hash value embedded in the evaluation file
static constexpr std::uint32_t get_hash_value() {
return FeatureSet::HashValue ^ OutputDimensions;
return FeatureSet::HashValue ^ (OutputDimensions * 2);
}
// Read network parameters
@ -229,136 +284,55 @@ namespace Stockfish::Eval::NNUE {
) / 2;
#if defined(USE_AVX512)
constexpr IndexType NumChunks = HalfDimensions / (SimdWidth * 2);
static_assert(HalfDimensions % (SimdWidth * 2) == 0);
const __m512i Control = _mm512_setr_epi64(0, 2, 4, 6, 1, 3, 5, 7);
const __m512i Zero = _mm512_setzero_si512();
for (IndexType p = 0; p < 2; ++p)
{
const IndexType offset = HalfDimensions * p;
auto out = reinterpret_cast<__m512i*>(&output[offset]);
for (IndexType j = 0; j < NumChunks; ++j)
const IndexType offset = (HalfDimensions / 2) * p;
#if defined(VECTOR)
constexpr IndexType OutputChunkSize = MaxChunkSize;
static_assert((HalfDimensions / 2) % OutputChunkSize == 0);
constexpr IndexType NumOutputChunks = HalfDimensions / 2 / OutputChunkSize;
vec_t Zero = vec_zero();
vec_t One = vec_set_16(127);
const vec_t* in0 = reinterpret_cast<const vec_t*>(&(accumulation[perspectives[p]][0]));
const vec_t* in1 = reinterpret_cast<const vec_t*>(&(accumulation[perspectives[p]][HalfDimensions / 2]));
vec_t* out = reinterpret_cast< vec_t*>(output + offset);
for (IndexType j = 0; j < NumOutputChunks; j += 1)
{
__m512i sum0 = _mm512_load_si512(&reinterpret_cast<const __m512i*>
(accumulation[perspectives[p]])[j * 2 + 0]);
__m512i sum1 = _mm512_load_si512(&reinterpret_cast<const __m512i*>
(accumulation[perspectives[p]])[j * 2 + 1]);
const vec_t sum0a = vec_max_16(vec_min_16(in0[j * 2 + 0], One), Zero);
const vec_t sum0b = vec_max_16(vec_min_16(in0[j * 2 + 1], One), Zero);
const vec_t sum1a = vec_max_16(vec_min_16(in1[j * 2 + 0], One), Zero);
const vec_t sum1b = vec_max_16(vec_min_16(in1[j * 2 + 1], One), Zero);
_mm512_store_si512(&out[j], _mm512_permutexvar_epi64(Control,
_mm512_max_epi8(_mm512_packs_epi16(sum0, sum1), Zero)));
const vec_t pa = vec_mul_16(sum0a, sum1a);
const vec_t pb = vec_mul_16(sum0b, sum1b);
out[j] = vec_msb_pack_16(pa, pb);
}
}
return psqt;
#elif defined(USE_AVX2)
#else
constexpr IndexType NumChunks = HalfDimensions / SimdWidth;
constexpr int Control = 0b11011000;
const __m256i Zero = _mm256_setzero_si256();
for (IndexType p = 0; p < 2; ++p)
{
const IndexType offset = HalfDimensions * p;
auto out = reinterpret_cast<__m256i*>(&output[offset]);
for (IndexType j = 0; j < NumChunks; ++j)
{
__m256i sum0 = _mm256_load_si256(&reinterpret_cast<const __m256i*>
(accumulation[perspectives[p]])[j * 2 + 0]);
__m256i sum1 = _mm256_load_si256(&reinterpret_cast<const __m256i*>
(accumulation[perspectives[p]])[j * 2 + 1]);
_mm256_store_si256(&out[j], _mm256_permute4x64_epi64(
_mm256_max_epi8(_mm256_packs_epi16(sum0, sum1), Zero), Control));
for (IndexType j = 0; j < HalfDimensions / 2; ++j) {
BiasType sum0 = accumulation[static_cast<int>(perspectives[p])][j + 0];
BiasType sum1 = accumulation[static_cast<int>(perspectives[p])][j + HalfDimensions / 2];
sum0 = std::max<int>(0, std::min<int>(127, sum0));
sum1 = std::max<int>(0, std::min<int>(127, sum1));
output[offset + j] = static_cast<OutputType>(sum0 * sum1 / 128);
}
#endif
}
#if defined(vec_cleanup)
vec_cleanup();
#endif
return psqt;
#elif defined(USE_SSE2)
#ifdef USE_SSE41
constexpr IndexType NumChunks = HalfDimensions / SimdWidth;
const __m128i Zero = _mm_setzero_si128();
#else
constexpr IndexType NumChunks = HalfDimensions / SimdWidth;
const __m128i k0x80s = _mm_set1_epi8(-128);
#endif
for (IndexType p = 0; p < 2; ++p)
{
const IndexType offset = HalfDimensions * p;
auto out = reinterpret_cast<__m128i*>(&output[offset]);
for (IndexType j = 0; j < NumChunks; ++j)
{
__m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>
(accumulation[perspectives[p]])[j * 2 + 0]);
__m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>
(accumulation[perspectives[p]])[j * 2 + 1]);
const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);
#ifdef USE_SSE41
_mm_store_si128(&out[j], _mm_max_epi8(packedbytes, Zero));
#else
_mm_store_si128(&out[j], _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s));
#endif
}
}
return psqt;
#elif defined(USE_MMX)
constexpr IndexType NumChunks = HalfDimensions / SimdWidth;
const __m64 k0x80s = _mm_set1_pi8(-128);
for (IndexType p = 0; p < 2; ++p)
{
const IndexType offset = HalfDimensions * p;
auto out = reinterpret_cast<__m64*>(&output[offset]);
for (IndexType j = 0; j < NumChunks; ++j)
{
__m64 sum0 = *(&reinterpret_cast<const __m64*>(accumulation[perspectives[p]])[j * 2 + 0]);
__m64 sum1 = *(&reinterpret_cast<const __m64*>(accumulation[perspectives[p]])[j * 2 + 1]);
const __m64 packedbytes = _mm_packs_pi16(sum0, sum1);
out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s);
}
}
_mm_empty();
return psqt;
#elif defined(USE_NEON)
constexpr IndexType NumChunks = HalfDimensions / (SimdWidth / 2);
const int8x8_t Zero = {0};
for (IndexType p = 0; p < 2; ++p)
{
const IndexType offset = HalfDimensions * p;
const auto out = reinterpret_cast<int8x8_t*>(&output[offset]);
for (IndexType j = 0; j < NumChunks; ++j)
{
int16x8_t sum = reinterpret_cast<const int16x8_t*>(accumulation[perspectives[p]])[j];
out[j] = vmax_s8(vqmovn_s16(sum), Zero);
}
}
return psqt;
#else
for (IndexType p = 0; p < 2; ++p)
{
const IndexType offset = HalfDimensions * p;
for (IndexType j = 0; j < HalfDimensions; ++j)
{
BiasType sum = accumulation[perspectives[p]][j];
output[offset + j] = static_cast<OutputType>(std::max<int>(0, std::min<int>(127, sum)));
}
}
return psqt;
#endif
} // end of function transform()
@ -370,7 +344,6 @@ namespace Stockfish::Eval::NNUE {
// That might depend on the feature set and generally relies on the
// feature set's update cost calculation to be correct and never
// allow updates with more added/removed features than MaxActiveDimensions.
using IndexList = ValueList<IndexType, FeatureSet::MaxActiveDimensions>;
#ifdef VECTOR
// Gcc-10.2 unnecessarily spills AVX2 registers if this array
@ -404,12 +377,12 @@ namespace Stockfish::Eval::NNUE {
// Gather all features to be updated.
const Square ksq = pos.square<KING>(perspective);
IndexList removed[2], added[2];
FeatureSet::IndexList removed[2], added[2];
FeatureSet::append_changed_indices(
ksq, next, perspective, removed[0], added[0]);
ksq, next->dirtyPiece, perspective, removed[0], added[0]);
for (StateInfo *st2 = pos.state(); st2 != next; st2 = st2->previous)
FeatureSet::append_changed_indices(
ksq, st2, perspective, removed[1], added[1]);
ksq, st2->dirtyPiece, perspective, removed[1], added[1]);
// Mark the accumulators as computed.
next->accumulator.computed[perspective] = true;
@ -534,7 +507,7 @@ namespace Stockfish::Eval::NNUE {
// Refresh the accumulator
auto& accumulator = pos.state()->accumulator;
accumulator.computed[perspective] = true;
IndexList active;
FeatureSet::IndexList active;
FeatureSet::append_active_indices(pos, perspective, active);
#ifdef VECTOR

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -32,30 +32,30 @@ namespace {
#define S(mg, eg) make_score(mg, eg)
// Pawn penalties
constexpr Score Backward = S( 9, 22);
constexpr Score Doubled = S(13, 51);
constexpr Score DoubledEarly = S(20, 7);
constexpr Score Isolated = S( 3, 15);
constexpr Score WeakLever = S( 4, 58);
constexpr Score WeakUnopposed = S(13, 24);
constexpr Score Backward = S( 6, 19);
constexpr Score Doubled = S(11, 51);
constexpr Score DoubledEarly = S(17, 7);
constexpr Score Isolated = S( 1, 20);
constexpr Score WeakLever = S( 2, 57);
constexpr Score WeakUnopposed = S(15, 18);
// Bonus for blocked pawns at 5th or 6th rank
constexpr Score BlockedPawn[2] = { S(-17, -6), S(-9, 2) };
constexpr Score BlockedPawn[2] = { S(-19, -8), S(-7, 3) };
constexpr Score BlockedStorm[RANK_NB] = {
S(0, 0), S(0, 0), S(75, 78), S(-8, 16), S(-6, 10), S(-6, 6), S(0, 2)
S(0, 0), S(0, 0), S(64, 75), S(-3, 14), S(-12, 19), S(-7, 4), S(-10, 5)
};
// Connected pawn bonus
constexpr int Connected[RANK_NB] = { 0, 5, 7, 11, 23, 48, 87 };
constexpr int Connected[RANK_NB] = { 0, 3, 7, 7, 15, 54, 86 };
// Strength of pawn shelter for our king by [distance from edge][rank].
// RANK_1 = 0 is used for files where we have no pawn, or pawn is behind our king.
constexpr Value ShelterStrength[int(FILE_NB) / 2][RANK_NB] = {
{ V( -5), V( 82), V( 92), V( 54), V( 36), V( 22), V( 28) },
{ V(-44), V( 63), V( 33), V(-50), V(-30), V(-12), V( -62) },
{ V(-11), V( 77), V( 22), V( -6), V( 31), V( 8), V( -45) },
{ V(-39), V(-12), V(-29), V(-50), V(-43), V(-68), V(-164) }
{ V(-2), V(85), V(95), V(53), V(39), V(23), V(25) },
{ V(-55), V(64), V(32), V(-55), V(-30), V(-11), V(-61) },
{ V(-11), V(75), V(19), V(-6), V(26), V(9), V(-47) },
{ V(-41), V(-11), V(-27), V(-58), V(-42), V(-66), V(-163) }
};
// Danger of enemy pawns moving toward our king by [distance from edge][rank].
@ -63,17 +63,17 @@ namespace {
// is behind our king. Note that UnblockedStorm[0][1-2] accommodate opponent pawn
// on edge, likely blocked by our king.
constexpr Value UnblockedStorm[int(FILE_NB) / 2][RANK_NB] = {
{ V( 87), V(-288), V(-168), V( 96), V( 47), V( 44), V( 46) },
{ V( 42), V( -25), V( 120), V( 45), V( 34), V( -9), V( 24) },
{ V( -8), V( 51), V( 167), V( 35), V( -4), V(-16), V(-12) },
{ V(-17), V( -13), V( 100), V( 4), V( 9), V(-16), V(-31) }
{ V(94), V(-280), V(-170), V(90), V(59), V(47), V(53) },
{ V(43), V(-17), V(128), V(39), V(26), V(-17), V(15) },
{ V(-9), V(62), V(170), V(34), V(-5), V(-20), V(-11) },
{ V(-27), V(-19), V(106), V(10), V(2), V(-13), V(-24) }
};
// KingOnFile[semi-open Us][semi-open Them] contains bonuses/penalties
// for king when the king is on a semi-open or open file.
constexpr Score KingOnFile[2][2] = {{ S(-21,10), S(-7, 1) },
{ S( 0,-3), S( 9,-4) }};
constexpr Score KingOnFile[2][2] = {{ S(-18,11), S(-6,-3) },
{ S( 0, 0), S( 5,-4) }};
#undef S
#undef V

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -1013,9 +1013,9 @@ void Position::do_null_move(StateInfo& newSt) {
}
st->key ^= Zobrist::side;
++st->rule50;
prefetch(TT.first_entry(key()));
++st->rule50;
st->pliesFromNull = 0;
sideToMove = ~sideToMove;
@ -1080,8 +1080,9 @@ bool Position::see_ge(Move m, Value threshold) const {
if (swap <= 0)
return true;
assert(color_of(piece_on(from)) == sideToMove);
Bitboard occupied = pieces() ^ from ^ to;
Color stm = color_of(piece_on(from));
Color stm = sideToMove;
Bitboard attackers = attackers_to(to, occupied);
Bitboard stmAttackers, bb;
int res = 1;

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -120,12 +120,12 @@ public:
Bitboard attackers_to(Square s) const;
Bitboard attackers_to(Square s, Bitboard occupied) const;
Bitboard slider_blockers(Bitboard sliders, Square s, Bitboard& pinners) const;
template<PieceType Pt> Bitboard attacks_by(Color c) const;
// Properties of moves
bool legal(Move m) const;
bool pseudo_legal(const Move m) const;
bool capture(Move m) const;
bool capture_or_promotion(Move m) const;
bool gives_check(Move m) const;
Piece moved_piece(Move m) const;
Piece captured_piece() const;
@ -285,6 +285,22 @@ inline Bitboard Position::attackers_to(Square s) const {
return attackers_to(s, pieces());
}
template<PieceType Pt>
inline Bitboard Position::attacks_by(Color c) const {
if constexpr (Pt == PAWN)
return c == WHITE ? pawn_attacks_bb<WHITE>(pieces(WHITE, PAWN))
: pawn_attacks_bb<BLACK>(pieces(BLACK, PAWN));
else
{
Bitboard threats = 0;
Bitboard attackers = pieces(c, Pt);
while (attackers)
threats |= attacks_bb<Pt>(pop_lsb(attackers), pieces());
return threats;
}
}
inline Bitboard Position::checkers() const {
return st->checkersBB;
}
@ -352,11 +368,6 @@ inline bool Position::is_chess960() const {
return chess960;
}
inline bool Position::capture_or_promotion(Move m) const {
assert(is_ok(m));
return type_of(m) != NORMAL ? type_of(m) != CASTLING : !empty(to_sq(m));
}
inline bool Position::capture(Move m) const {
assert(is_ok(m));
// Castling is encoded as "king captures rook"

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -47,6 +47,7 @@ struct Stack {
Move excludedMove;
Move killers[2];
Value staticEval;
Depth depth;
int statScore;
int moveCount;
bool inCheck;
@ -72,6 +73,7 @@ struct RootMove {
Value score = -VALUE_INFINITE;
Value previousScore = -VALUE_INFINITE;
Value averageScore = -VALUE_INFINITE;
int selDepth = 0;
int tbRank = 0;
Value tbScore;

View File

@ -0,0 +1,387 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef STOCKFISH_SIMD_H_INCLUDED
#define STOCKFISH_SIMD_H_INCLUDED
#if defined(USE_AVX2)
# include <immintrin.h>
#elif defined(USE_SSE41)
# include <smmintrin.h>
#elif defined(USE_SSSE3)
# include <tmmintrin.h>
#elif defined(USE_SSE2)
# include <emmintrin.h>
#elif defined(USE_MMX)
# include <mmintrin.h>
#elif defined(USE_NEON)
# include <arm_neon.h>
#endif
// The inline asm is only safe for GCC, where it is necessary to get good codegen.
// See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101693
// Clang does fine without it.
// Play around here: https://godbolt.org/z/7EWqrYq51
#if (defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER))
#define USE_INLINE_ASM
#endif
// Use either the AVX512 or AVX-VNNI version of the VNNI instructions.
#if defined(USE_AVXVNNI)
#define VNNI_PREFIX "%{vex%} "
#else
#define VNNI_PREFIX ""
#endif
namespace Stockfish::Simd {
#if defined (USE_AVX512)
[[maybe_unused]] static int m512_hadd(__m512i sum, int bias) {
return _mm512_reduce_add_epi32(sum) + bias;
}
/*
Parameters:
sum0 = [zmm0.i128[0], zmm0.i128[1], zmm0.i128[2], zmm0.i128[3]]
sum1 = [zmm1.i128[0], zmm1.i128[1], zmm1.i128[2], zmm1.i128[3]]
sum2 = [zmm2.i128[0], zmm2.i128[1], zmm2.i128[2], zmm2.i128[3]]
sum3 = [zmm3.i128[0], zmm3.i128[1], zmm3.i128[2], zmm3.i128[3]]
Returns:
ret = [
reduce_add_epi32(zmm0.i128[0]), reduce_add_epi32(zmm1.i128[0]), reduce_add_epi32(zmm2.i128[0]), reduce_add_epi32(zmm3.i128[0]),
reduce_add_epi32(zmm0.i128[1]), reduce_add_epi32(zmm1.i128[1]), reduce_add_epi32(zmm2.i128[1]), reduce_add_epi32(zmm3.i128[1]),
reduce_add_epi32(zmm0.i128[2]), reduce_add_epi32(zmm1.i128[2]), reduce_add_epi32(zmm2.i128[2]), reduce_add_epi32(zmm3.i128[2]),
reduce_add_epi32(zmm0.i128[3]), reduce_add_epi32(zmm1.i128[3]), reduce_add_epi32(zmm2.i128[3]), reduce_add_epi32(zmm3.i128[3])
]
*/
[[maybe_unused]] static __m512i m512_hadd128x16_interleave(
__m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3) {
__m512i sum01a = _mm512_unpacklo_epi32(sum0, sum1);
__m512i sum01b = _mm512_unpackhi_epi32(sum0, sum1);
__m512i sum23a = _mm512_unpacklo_epi32(sum2, sum3);
__m512i sum23b = _mm512_unpackhi_epi32(sum2, sum3);
__m512i sum01 = _mm512_add_epi32(sum01a, sum01b);
__m512i sum23 = _mm512_add_epi32(sum23a, sum23b);
__m512i sum0123a = _mm512_unpacklo_epi64(sum01, sum23);
__m512i sum0123b = _mm512_unpackhi_epi64(sum01, sum23);
return _mm512_add_epi32(sum0123a, sum0123b);
}
[[maybe_unused]] static __m128i m512_haddx4(
__m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3,
__m128i bias) {
__m512i sum = m512_hadd128x16_interleave(sum0, sum1, sum2, sum3);
__m256i sum256lo = _mm512_castsi512_si256(sum);
__m256i sum256hi = _mm512_extracti64x4_epi64(sum, 1);
sum256lo = _mm256_add_epi32(sum256lo, sum256hi);
__m128i sum128lo = _mm256_castsi256_si128(sum256lo);
__m128i sum128hi = _mm256_extracti128_si256(sum256lo, 1);
return _mm_add_epi32(_mm_add_epi32(sum128lo, sum128hi), bias);
}
[[maybe_unused]] static void m512_add_dpbusd_epi32(
__m512i& acc,
__m512i a,
__m512i b) {
# if defined (USE_VNNI)
# if defined (USE_INLINE_ASM)
asm(
"vpdpbusd %[b], %[a], %[acc]\n\t"
: [acc]"+v"(acc)
: [a]"v"(a), [b]"vm"(b)
);
# else
acc = _mm512_dpbusd_epi32(acc, a, b);
# endif
# else
# if defined (USE_INLINE_ASM)
__m512i tmp = _mm512_maddubs_epi16(a, b);
asm(
"vpmaddwd %[tmp], %[ones], %[tmp]\n\t"
"vpaddd %[acc], %[tmp], %[acc]\n\t"
: [acc]"+v"(acc), [tmp]"+&v"(tmp)
: [ones]"v"(_mm512_set1_epi16(1))
);
# else
__m512i product0 = _mm512_maddubs_epi16(a, b);
product0 = _mm512_madd_epi16(product0, _mm512_set1_epi16(1));
acc = _mm512_add_epi32(acc, product0);
# endif
# endif
}
[[maybe_unused]] static void m512_add_dpbusd_epi32x2(
__m512i& acc,
__m512i a0, __m512i b0,
__m512i a1, __m512i b1) {
# if defined (USE_VNNI)
# if defined (USE_INLINE_ASM)
asm(
"vpdpbusd %[b0], %[a0], %[acc]\n\t"
"vpdpbusd %[b1], %[a1], %[acc]\n\t"
: [acc]"+v"(acc)
: [a0]"v"(a0), [b0]"vm"(b0), [a1]"v"(a1), [b1]"vm"(b1)
);
# else
acc = _mm512_dpbusd_epi32(acc, a0, b0);
acc = _mm512_dpbusd_epi32(acc, a1, b1);
# endif
# else
# if defined (USE_INLINE_ASM)
__m512i tmp0 = _mm512_maddubs_epi16(a0, b0);
__m512i tmp1 = _mm512_maddubs_epi16(a1, b1);
asm(
"vpaddsw %[tmp0], %[tmp1], %[tmp0]\n\t"
"vpmaddwd %[tmp0], %[ones], %[tmp0]\n\t"
"vpaddd %[acc], %[tmp0], %[acc]\n\t"
: [acc]"+v"(acc), [tmp0]"+&v"(tmp0)
: [tmp1]"v"(tmp1), [ones]"v"(_mm512_set1_epi16(1))
);
# else
__m512i product0 = _mm512_maddubs_epi16(a0, b0);
__m512i product1 = _mm512_maddubs_epi16(a1, b1);
product0 = _mm512_adds_epi16(product0, product1);
product0 = _mm512_madd_epi16(product0, _mm512_set1_epi16(1));
acc = _mm512_add_epi32(acc, product0);
# endif
# endif
}
#endif
#if defined (USE_AVX2)
[[maybe_unused]] static int m256_hadd(__m256i sum, int bias) {
__m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1));
sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC));
sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_CDAB));
return _mm_cvtsi128_si32(sum128) + bias;
}
[[maybe_unused]] static __m128i m256_haddx4(
__m256i sum0, __m256i sum1, __m256i sum2, __m256i sum3,
__m128i bias) {
sum0 = _mm256_hadd_epi32(sum0, sum1);
sum2 = _mm256_hadd_epi32(sum2, sum3);
sum0 = _mm256_hadd_epi32(sum0, sum2);
__m128i sum128lo = _mm256_castsi256_si128(sum0);
__m128i sum128hi = _mm256_extracti128_si256(sum0, 1);
return _mm_add_epi32(_mm_add_epi32(sum128lo, sum128hi), bias);
}
[[maybe_unused]] static void m256_add_dpbusd_epi32(
__m256i& acc,
__m256i a,
__m256i b) {
# if defined (USE_VNNI)
# if defined (USE_INLINE_ASM)
asm(
VNNI_PREFIX "vpdpbusd %[b], %[a], %[acc]\n\t"
: [acc]"+v"(acc)
: [a]"v"(a), [b]"vm"(b)
);
# else
acc = _mm256_dpbusd_epi32(acc, a, b);
# endif
# else
# if defined (USE_INLINE_ASM)
__m256i tmp = _mm256_maddubs_epi16(a, b);
asm(
"vpmaddwd %[tmp], %[ones], %[tmp]\n\t"
"vpaddd %[acc], %[tmp], %[acc]\n\t"
: [acc]"+v"(acc), [tmp]"+&v"(tmp)
: [ones]"v"(_mm256_set1_epi16(1))
);
# else
__m256i product0 = _mm256_maddubs_epi16(a, b);
product0 = _mm256_madd_epi16(product0, _mm256_set1_epi16(1));
acc = _mm256_add_epi32(acc, product0);
# endif
# endif
}
[[maybe_unused]] static void m256_add_dpbusd_epi32x2(
__m256i& acc,
__m256i a0, __m256i b0,
__m256i a1, __m256i b1) {
# if defined (USE_VNNI)
# if defined (USE_INLINE_ASM)
asm(
VNNI_PREFIX "vpdpbusd %[b0], %[a0], %[acc]\n\t"
VNNI_PREFIX "vpdpbusd %[b1], %[a1], %[acc]\n\t"
: [acc]"+v"(acc)
: [a0]"v"(a0), [b0]"vm"(b0), [a1]"v"(a1), [b1]"vm"(b1)
);
# else
acc = _mm256_dpbusd_epi32(acc, a0, b0);
acc = _mm256_dpbusd_epi32(acc, a1, b1);
# endif
# else
# if defined (USE_INLINE_ASM)
__m256i tmp0 = _mm256_maddubs_epi16(a0, b0);
__m256i tmp1 = _mm256_maddubs_epi16(a1, b1);
asm(
"vpaddsw %[tmp0], %[tmp1], %[tmp0]\n\t"
"vpmaddwd %[tmp0], %[ones], %[tmp0]\n\t"
"vpaddd %[acc], %[tmp0], %[acc]\n\t"
: [acc]"+v"(acc), [tmp0]"+&v"(tmp0)
: [tmp1]"v"(tmp1), [ones]"v"(_mm256_set1_epi16(1))
);
# else
__m256i product0 = _mm256_maddubs_epi16(a0, b0);
__m256i product1 = _mm256_maddubs_epi16(a1, b1);
product0 = _mm256_adds_epi16(product0, product1);
product0 = _mm256_madd_epi16(product0, _mm256_set1_epi16(1));
acc = _mm256_add_epi32(acc, product0);
# endif
# endif
}
#endif
#if defined (USE_SSSE3)
[[maybe_unused]] static int m128_hadd(__m128i sum, int bias) {
sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E)); //_MM_PERM_BADC
sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); //_MM_PERM_CDAB
return _mm_cvtsi128_si32(sum) + bias;
}
[[maybe_unused]] static __m128i m128_haddx4(
__m128i sum0, __m128i sum1, __m128i sum2, __m128i sum3,
__m128i bias) {
sum0 = _mm_hadd_epi32(sum0, sum1);
sum2 = _mm_hadd_epi32(sum2, sum3);
sum0 = _mm_hadd_epi32(sum0, sum2);
return _mm_add_epi32(sum0, bias);
}
[[maybe_unused]] static void m128_add_dpbusd_epi32(
__m128i& acc,
__m128i a,
__m128i b) {
# if defined (USE_INLINE_ASM)
__m128i tmp = _mm_maddubs_epi16(a, b);
asm(
"pmaddwd %[ones], %[tmp]\n\t"
"paddd %[tmp], %[acc]\n\t"
: [acc]"+v"(acc), [tmp]"+&v"(tmp)
: [ones]"v"(_mm_set1_epi16(1))
);
# else
__m128i product0 = _mm_maddubs_epi16(a, b);
product0 = _mm_madd_epi16(product0, _mm_set1_epi16(1));
acc = _mm_add_epi32(acc, product0);
# endif
}
[[maybe_unused]] static void m128_add_dpbusd_epi32x2(
__m128i& acc,
__m128i a0, __m128i b0,
__m128i a1, __m128i b1) {
# if defined (USE_INLINE_ASM)
__m128i tmp0 = _mm_maddubs_epi16(a0, b0);
__m128i tmp1 = _mm_maddubs_epi16(a1, b1);
asm(
"paddsw %[tmp1], %[tmp0]\n\t"
"pmaddwd %[ones], %[tmp0]\n\t"
"paddd %[tmp0], %[acc]\n\t"
: [acc]"+v"(acc), [tmp0]"+&v"(tmp0)
: [tmp1]"v"(tmp1), [ones]"v"(_mm_set1_epi16(1))
);
# else
__m128i product0 = _mm_maddubs_epi16(a0, b0);
__m128i product1 = _mm_maddubs_epi16(a1, b1);
product0 = _mm_adds_epi16(product0, product1);
product0 = _mm_madd_epi16(product0, _mm_set1_epi16(1));
acc = _mm_add_epi32(acc, product0);
# endif
}
#endif
#if defined (USE_NEON)
[[maybe_unused]] static int neon_m128_reduce_add_epi32(int32x4_t s) {
# if USE_NEON >= 8
return vaddvq_s32(s);
# else
return s[0] + s[1] + s[2] + s[3];
# endif
}
[[maybe_unused]] static int neon_m128_hadd(int32x4_t sum, int bias) {
return neon_m128_reduce_add_epi32(sum) + bias;
}
[[maybe_unused]] static int32x4_t neon_m128_haddx4(
int32x4_t sum0, int32x4_t sum1, int32x4_t sum2, int32x4_t sum3,
int32x4_t bias) {
int32x4_t hsums {
neon_m128_reduce_add_epi32(sum0),
neon_m128_reduce_add_epi32(sum1),
neon_m128_reduce_add_epi32(sum2),
neon_m128_reduce_add_epi32(sum3)
};
return vaddq_s32(hsums, bias);
}
[[maybe_unused]] static void neon_m128_add_dpbusd_epi32x2(
int32x4_t& acc,
int8x8_t a0, int8x8_t b0,
int8x8_t a1, int8x8_t b1) {
int16x8_t product = vmull_s8(a0, b0);
product = vmlal_s8(product, a1, b1);
acc = vpadalq_s16(acc, product);
}
#endif
}
#endif // STOCKFISH_SIMD_H_INCLUDED

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -472,8 +472,6 @@ TBTables TBTables;
// If the corresponding file exists two new objects TBTable<WDL> and TBTable<DTZ>
// are created and added to the lists and hash table. Called at init time.
void TBTables::add(const std::vector<PieceType>& pieces) {
if (sizeof(char*) < 8 && pieces.size() >= 6)
return; // Not enough address space to support 6-men TB on 32-bit OS
std::string code;
@ -771,7 +769,7 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu
goto encode_remaining; // With pawns we have finished special treatments
}
// In positions withouth pawns, we further flip the squares to ensure leading
// In positions without pawns, we further flip the squares to ensure leading
// piece is below RANK_5.
if (rank_of(squares[0]) > RANK_4)
for (int i = 0; i < size; ++i)
@ -814,7 +812,7 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu
// Rs "together" in 62 * 61 / 2 ways (we divide by 2 because rooks can be
// swapped and still get the same position.)
//
// In case we have at least 3 unique pieces (inlcuded kings) we encode them
// In case we have at least 3 unique pieces (included kings) we encode them
// together.
if (entry->hasUniquePieces) {
@ -829,7 +827,7 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu
+ (squares[1] - adjust1)) * 62
+ squares[2] - adjust2;
// First piece is on a1-h8 diagonal, second below: map this occurence to
// First piece is on a1-h8 diagonal, second below: map this occurrence to
// 6 to differentiate from the above case, rank_of() maps a1-d4 diagonal
// to 0...3 and finally MapB1H1H7[] maps the b1-h1-h7 triangle to 0..27.
else if (off_A1H8(squares[1]))
@ -859,7 +857,7 @@ encode_remaining:
idx *= d->groupIdx[0];
Square* groupSq = squares + d->groupLen[0];
// Encode remainig pawns then pieces according to square, in ascending order
// Encode remaining pawns then pieces according to square, in ascending order
bool remainingPawns = entry->hasPawns && entry->pawnCount[1];
while (d->groupLen[++next])
@ -887,7 +885,7 @@ encode_remaining:
// Group together pieces that will be encoded together. The general rule is that
// a group contains pieces of same type and color. The exception is the leading
// group that, in case of positions withouth pawns, can be formed by 3 different
// group that, in case of positions without pawns, can be formed by 3 different
// pieces (default) or by the king pair when there is not a unique piece apart
// from the kings. When there are pawns, pawns are always first in pieces[].
//
@ -919,7 +917,7 @@ void set_groups(T& e, PairsData* d, int order[], File f) {
//
// This ensures unique encoding for the whole position. The order of the
// groups is a per-table parameter and could not follow the canonical leading
// pawns/pieces -> remainig pawns -> remaining pieces. In particular the
// pawns/pieces -> remaining pawns -> remaining pieces. In particular the
// first group is at order[0] position and the remaining pawns, when present,
// are at order[1] position.
bool pp = e.hasPawns && e.pawnCount[1]; // Pawns on both sides
@ -939,7 +937,7 @@ void set_groups(T& e, PairsData* d, int order[], File f) {
d->groupIdx[1] = idx;
idx *= Binomial[d->groupLen[1]][48 - d->groupLen[0]];
}
else // Remainig pieces
else // Remaining pieces
{
d->groupIdx[next] = idx;
idx *= Binomial[d->groupLen[next]][freeSquares];
@ -949,7 +947,7 @@ void set_groups(T& e, PairsData* d, int order[], File f) {
d->groupIdx[n] = idx;
}
// In Recursive Pairing each symbol represents a pair of childern symbols. So
// In Recursive Pairing each symbol represents a pair of children symbols. So
// read d->btree[] symbols data and expand each one in his left and right child
// symbol until reaching the leafs that represent the symbol value.
uint8_t set_symlen(PairsData* d, Sym s, std::vector<bool>& visited) {
@ -1319,7 +1317,7 @@ void Tablebases::init(const std::string& paths) {
for (auto p : bothOnDiagonal)
MapKK[p.first][p.second] = code++;
// Binomial[] stores the Binomial Coefficents using Pascal rule. There
// Binomial[] stores the Binomial Coefficients using Pascal rule. There
// are Binomial[k][n] ways to choose k elements from a set of n elements.
Binomial[0][0] = 1;
@ -1339,7 +1337,7 @@ void Tablebases::init(const std::string& paths) {
for (int leadPawnsCnt = 1; leadPawnsCnt <= 5; ++leadPawnsCnt)
for (File f = FILE_A; f <= FILE_D; ++f)
{
// Restart the index at every file because TB table is splitted
// Restart the index at every file because TB table is split
// by file, so we can reuse the same index for different files.
int idx = 0;

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -38,7 +38,7 @@ enum WDLScore {
// Possible states after a probing operation
enum ProbeState {
FAIL = 0, // Probe failed (missing file table)
OK = 1, // Probe succesful
OK = 1, // Probe successful
CHANGE_STM = -1, // DTZ should check the other side
ZEROING_BEST_MOVE = 2 // Best move zeroes DTZ (capture or pawn move)
};

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -59,7 +59,6 @@ void Thread::clear() {
counterMoves.fill(MOVE_NONE);
mainHistory.fill(0);
lowPlyHistory.fill(0);
captureHistory.fill(0);
for (bool inCheck : { false, true })
@ -67,7 +66,7 @@ void Thread::clear() {
{
for (auto& to : continuationHistory[inCheck][c])
for (auto& h : to)
h->fill(0);
h->fill(-71);
continuationHistory[inCheck][c][NO_PIECE][0]->fill(Search::CounterMovePruneThreshold - 1);
}
}
@ -162,6 +161,7 @@ void ThreadPool::clear() {
main()->callsCnt = 0;
main()->bestPreviousScore = VALUE_INFINITE;
main()->bestPreviousAverageScore = VALUE_INFINITE;
main()->previousTimeReduction = 1.0;
}

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -60,18 +60,19 @@ public:
Pawns::Table pawnsTable;
Material::Table materialTable;
size_t pvIdx, pvLast;
uint64_t ttHitAverage;
RunningAverage complexityAverage;
std::atomic<uint64_t> nodes, tbHits, bestMoveChanges;
int selDepth, nmpMinPly;
Color nmpColor;
std::atomic<uint64_t> nodes, tbHits, bestMoveChanges;
Value bestValue, optimism[COLOR_NB];
Position rootPos;
StateInfo rootState;
Search::RootMoves rootMoves;
Depth rootDepth, completedDepth;
Depth rootDepth, completedDepth, depth;
Value rootDelta;
CounterMoveHistory counterMoves;
ButterflyHistory mainHistory;
LowPlyHistory lowPlyHistory;
CapturePieceToHistory captureHistory;
ContinuationHistory continuationHistory[2][2];
Score trend;
@ -89,6 +90,7 @@ struct MainThread : public Thread {
double previousTimeReduction;
Value bestPreviousScore;
Value bestPreviousAverageScore;
Value iterValue[4];
int callsCnt;
bool stopOnPonderhit;

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -68,6 +68,9 @@ void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) {
TimePoint timeLeft = std::max(TimePoint(1),
limits.time[us] + limits.inc[us] * (mtg - 1) - moveOverhead * (2 + mtg));
// Use extra time with larger increments
double optExtra = std::clamp(1.0 + 12.0 * limits.inc[us] / limits.time[us], 1.0, 1.12);
// A user may scale time usage by setting UCI option "Slow Mover"
// Default is 100 and changing this value will probably lose elo.
timeLeft = slowMover * timeLeft / 100;
@ -78,15 +81,16 @@ void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) {
if (limits.movestogo == 0)
{
optScale = std::min(0.0084 + std::pow(ply + 3.0, 0.5) * 0.0042,
0.2 * limits.time[us] / double(timeLeft));
0.2 * limits.time[us] / double(timeLeft))
* optExtra;
maxScale = std::min(7.0, 4.0 + ply / 12.0);
}
// x moves in y seconds (+ z increment)
else
{
optScale = std::min((0.8 + ply / 128.0) / mtg,
0.8 * limits.time[us] / double(timeLeft));
optScale = std::min((0.88 + ply / 116.4) / mtg,
0.88 * limits.time[us] / double(timeLeft));
maxScale = std::min(6.3, 1.5 + 0.11 * mtg);
}

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -40,9 +40,9 @@ void TTEntry::save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev)
move16 = (uint16_t)m;
// Overwrite less valuable entries (cheapest checks first)
if (b == BOUND_EXACT
if ( b == BOUND_EXACT
|| (uint16_t)k != key16
|| d - DEPTH_OFFSET > depth8 - 4)
|| d - DEPTH_OFFSET + 2 * pv > depth8 - 4)
{
assert(d > DEPTH_OFFSET);
assert(d < 256 + DEPTH_OFFSET);

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -84,7 +84,7 @@ class Tune {
static Tune& instance() { static Tune t; return t; } // Singleton
// Use polymorphism to accomodate Entry of different types in the same vector
// Use polymorphism to accommodate Entry of different types in the same vector
struct EntryBase {
virtual ~EntryBase() = default;
virtual void init_option() = 0;

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -465,10 +465,6 @@ constexpr Move make_move(Square from, Square to) {
return Move((from << 6) + to);
}
constexpr Move reverse_move(Move m) {
return make_move(to_sq(m), from_sq(m));
}
template<MoveType T>
constexpr Move make(Square from, Square to, PieceType pt = KNIGHT) {
return Move(T + ((pt - KNIGHT) << 12) + (from << 6) + to);

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -207,8 +207,8 @@ namespace {
// Coefficients of a 3rd order polynomial fit based on fishtest data
// for two parameters needed to transform eval to the argument of a
// logistic function.
double as[] = {-3.68389304, 30.07065921, -60.52878723, 149.53378557};
double bs[] = {-2.0181857, 15.85685038, -29.83452023, 47.59078827};
double as[] = {-1.17202460e-01, 5.94729104e-01, 1.12065546e+01, 1.22606222e+02};
double bs[] = {-1.79066759, 11.30759193, -17.43677612, 36.47147479};
double a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3];
double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3];

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View File

@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -164,7 +164,7 @@ Option& Option::operator=(const string& v) {
assert(!type.empty());
if ( (type != "button" && v.empty())
if ( (type != "button" && type != "string" && v.empty())
|| (type == "check" && v != "true" && v != "false")
|| (type == "spin" && (stof(v) < min || stof(v) > max)))
return *this;

View File

@ -36,7 +36,7 @@ import org.petero.droidfish.EngineOptions;
/** Stockfish engine running as process, started from assets resource. */
public class InternalStockFish extends ExternalEngine {
private static final String defaultNet = "nn-3475407dc199.nnue";
private static final String defaultNet = "nn-6877cd24400e.nnue";
private static final String netOption = "evalfile";
private File defaultNetFile; // To get the full path of the copied default network file