mirror of
https://github.com/peterosterlund2/droidfish.git
synced 2024-11-26 21:47:23 +01:00
Update to Stockfish 15
This commit is contained in:
parent
bc1c8a2c29
commit
07931e96a5
Binary file not shown.
|
@ -5,7 +5,7 @@ SF_SRC_FILES := \
|
|||
bitbase.cpp endgame.cpp material.cpp movepick.cpp position.cpp timeman.cpp \
|
||||
tune.cpp ucioption.cpp \
|
||||
bitboard.cpp evaluate.cpp misc.cpp search.cpp tt.cpp syzygy/tbprobe.cpp \
|
||||
nnue/evaluate_nnue.cpp nnue/features/half_ka_v2.cpp
|
||||
nnue/evaluate_nnue.cpp nnue/features/half_ka_v2_hm.cpp
|
||||
|
||||
MY_ARCH_DEF :=
|
||||
ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -87,6 +87,7 @@ const vector<string> Defaults = {
|
|||
// Chess 960
|
||||
"setoption name UCI_Chess960 value true",
|
||||
"bbqnnrkr/pppppppp/8/8/8/8/PPPPPPPP/BBQNNRKR w HFhf - 0 1 moves g2g3 d7d5 d2d4 c8h3 c1g5 e8d6 g5e7 f7f6",
|
||||
"nqbnrkrb/pppppppp/8/8/8/8/PPPPPPPP/NQBNRKRB w KQkq - 0 1",
|
||||
"setoption name UCI_Chess960 value false"
|
||||
};
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -61,7 +61,7 @@ namespace Stockfish {
|
|||
namespace Eval {
|
||||
|
||||
bool useNNUE;
|
||||
string eval_file_loaded = "None";
|
||||
string currentEvalFileName = "None";
|
||||
|
||||
/// NNUE::init() tries to load a NNUE network at startup time, or when the engine
|
||||
/// receives a UCI command "setoption name EvalFile value nn-[a-z0-9]{12}.nnue"
|
||||
|
@ -78,6 +78,8 @@ namespace Eval {
|
|||
return;
|
||||
|
||||
string eval_file = string(Options["EvalFile"]);
|
||||
if (eval_file.empty())
|
||||
eval_file = EvalFileDefaultName;
|
||||
|
||||
#if defined(DEFAULT_NNUE_DIRECTORY)
|
||||
#define stringify2(x) #x
|
||||
|
@ -88,13 +90,13 @@ namespace Eval {
|
|||
#endif
|
||||
|
||||
for (string directory : dirs)
|
||||
if (eval_file_loaded != eval_file)
|
||||
if (currentEvalFileName != eval_file)
|
||||
{
|
||||
if (directory != "<internal>")
|
||||
{
|
||||
ifstream stream(directory + eval_file, ios::binary);
|
||||
if (load_eval(eval_file, stream))
|
||||
eval_file_loaded = eval_file;
|
||||
currentEvalFileName = eval_file;
|
||||
}
|
||||
|
||||
if (directory == "<internal>" && eval_file == EvalFileDefaultName)
|
||||
|
@ -106,30 +108,29 @@ namespace Eval {
|
|||
|
||||
MemoryBuffer buffer(const_cast<char*>(reinterpret_cast<const char*>(gEmbeddedNNUEData)),
|
||||
size_t(gEmbeddedNNUESize));
|
||||
(void) gEmbeddedNNUEEnd; // Silence warning on unused variable
|
||||
|
||||
istream stream(&buffer);
|
||||
if (load_eval(eval_file, stream))
|
||||
eval_file_loaded = eval_file;
|
||||
currentEvalFileName = eval_file;
|
||||
}
|
||||
}
|
||||
if (eval_file_loaded != eval_file)
|
||||
eval_file_loaded = "";
|
||||
}
|
||||
|
||||
/// NNUE::verify() verifies that the last net used was loaded successfully
|
||||
void NNUE::verify() {
|
||||
|
||||
string eval_file = string(Options["EvalFile"]);
|
||||
if (eval_file.empty())
|
||||
eval_file = EvalFileDefaultName;
|
||||
|
||||
if (useNNUE && eval_file_loaded != eval_file)
|
||||
if (useNNUE && currentEvalFileName != eval_file)
|
||||
{
|
||||
UCI::OptionsMap defaults;
|
||||
UCI::init(defaults);
|
||||
|
||||
string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available.";
|
||||
string msg2 = "The option is set to true, but the network file " + eval_file + " was not loaded successfully.";
|
||||
string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file.";
|
||||
string msg4 = "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/" + string(defaults["EvalFile"]);
|
||||
string msg4 = "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/" + std::string(EvalFileDefaultName);
|
||||
string msg5 = "The engine will be terminated now.";
|
||||
|
||||
sync_cout << "info string ERROR: " << msg1 << sync_endl;
|
||||
|
@ -192,17 +193,17 @@ using namespace Trace;
|
|||
namespace {
|
||||
|
||||
// Threshold for lazy and space evaluation
|
||||
constexpr Value LazyThreshold1 = Value(1565);
|
||||
constexpr Value LazyThreshold2 = Value(1102);
|
||||
constexpr Value LazyThreshold1 = Value(3631);
|
||||
constexpr Value LazyThreshold2 = Value(2084);
|
||||
constexpr Value SpaceThreshold = Value(11551);
|
||||
|
||||
// KingAttackWeights[PieceType] contains king attack weights by piece type
|
||||
constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 81, 52, 44, 10 };
|
||||
constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 76, 46, 45, 14 };
|
||||
|
||||
// SafeCheck[PieceType][single/multiple] contains safe check bonus by piece type,
|
||||
// higher if multiple safe checks are possible for that piece type.
|
||||
constexpr int SafeCheck[][2] = {
|
||||
{}, {}, {803, 1292}, {639, 974}, {1087, 1878}, {759, 1132}
|
||||
{}, {}, {805, 1292}, {650, 984}, {1071, 1886}, {730, 1128}
|
||||
};
|
||||
|
||||
#define S(mg, eg) make_score(mg, eg)
|
||||
|
@ -228,58 +229,58 @@ namespace {
|
|||
// BishopPawns[distance from edge] contains a file-dependent penalty for pawns on
|
||||
// squares of the same color as our bishop.
|
||||
constexpr Score BishopPawns[int(FILE_NB) / 2] = {
|
||||
S(3, 8), S(3, 9), S(2, 8), S(3, 8)
|
||||
S(3, 8), S(3, 9), S(2, 7), S(3, 7)
|
||||
};
|
||||
|
||||
// KingProtector[knight/bishop] contains penalty for each distance unit to own king
|
||||
constexpr Score KingProtector[] = { S(8, 9), S(6, 9) };
|
||||
constexpr Score KingProtector[] = { S(9, 9), S(7, 9) };
|
||||
|
||||
// Outpost[knight/bishop] contains bonuses for each knight or bishop occupying a
|
||||
// pawn protected square on rank 4 to 6 which is also safe from a pawn attack.
|
||||
constexpr Score Outpost[] = { S(57, 38), S(31, 24) };
|
||||
constexpr Score Outpost[] = { S(54, 34), S(31, 25) };
|
||||
|
||||
// PassedRank[Rank] contains a bonus according to the rank of a passed pawn
|
||||
constexpr Score PassedRank[RANK_NB] = {
|
||||
S(0, 0), S(7, 27), S(16, 32), S(17, 40), S(64, 71), S(170, 174), S(278, 262)
|
||||
S(0, 0), S(2, 38), S(15, 36), S(22, 50), S(64, 81), S(166, 184), S(284, 269)
|
||||
};
|
||||
|
||||
constexpr Score RookOnClosedFile = S(10, 5);
|
||||
constexpr Score RookOnOpenFile[] = { S(19, 6), S(47, 26) };
|
||||
constexpr Score RookOnOpenFile[] = { S(18, 8), S(49, 26) };
|
||||
|
||||
// ThreatByMinor/ByRook[attacked PieceType] contains bonuses according to
|
||||
// which piece type attacks which one. Attacks on lesser pieces which are
|
||||
// pawn-defended are not considered.
|
||||
constexpr Score ThreatByMinor[PIECE_TYPE_NB] = {
|
||||
S(0, 0), S(5, 32), S(55, 41), S(77, 56), S(89, 119), S(79, 162)
|
||||
S(0, 0), S(6, 37), S(64, 50), S(82, 57), S(103, 130), S(81, 163)
|
||||
};
|
||||
|
||||
constexpr Score ThreatByRook[PIECE_TYPE_NB] = {
|
||||
S(0, 0), S(3, 44), S(37, 68), S(42, 60), S(0, 39), S(58, 43)
|
||||
S(0, 0), S(3, 44), S(36, 71), S(44, 59), S(0, 39), S(60, 39)
|
||||
};
|
||||
|
||||
constexpr Value CorneredBishop = Value(50);
|
||||
|
||||
// Assorted bonuses and penalties
|
||||
constexpr Score UncontestedOutpost = S( 1, 10);
|
||||
constexpr Score UncontestedOutpost = S( 0, 10);
|
||||
constexpr Score BishopOnKingRing = S( 24, 0);
|
||||
constexpr Score BishopXRayPawns = S( 4, 5);
|
||||
constexpr Score FlankAttacks = S( 8, 0);
|
||||
constexpr Score Hanging = S( 69, 36);
|
||||
constexpr Score Hanging = S( 72, 40);
|
||||
constexpr Score KnightOnQueen = S( 16, 11);
|
||||
constexpr Score LongDiagonalBishop = S( 45, 0);
|
||||
constexpr Score MinorBehindPawn = S( 18, 3);
|
||||
constexpr Score PassedFile = S( 11, 8);
|
||||
constexpr Score PawnlessFlank = S( 17, 95);
|
||||
constexpr Score ReachableOutpost = S( 31, 22);
|
||||
constexpr Score RestrictedPiece = S( 7, 7);
|
||||
constexpr Score PassedFile = S( 13, 8);
|
||||
constexpr Score PawnlessFlank = S( 19, 97);
|
||||
constexpr Score ReachableOutpost = S( 33, 19);
|
||||
constexpr Score RestrictedPiece = S( 6, 7);
|
||||
constexpr Score RookOnKingRing = S( 16, 0);
|
||||
constexpr Score SliderOnQueen = S( 60, 18);
|
||||
constexpr Score ThreatByKing = S( 24, 89);
|
||||
constexpr Score SliderOnQueen = S( 62, 21);
|
||||
constexpr Score ThreatByKing = S( 24, 87);
|
||||
constexpr Score ThreatByPawnPush = S( 48, 39);
|
||||
constexpr Score ThreatBySafePawn = S(173, 94);
|
||||
constexpr Score ThreatBySafePawn = S(167, 99);
|
||||
constexpr Score TrappedRook = S( 55, 13);
|
||||
constexpr Score WeakQueenProtection = S( 14, 0);
|
||||
constexpr Score WeakQueen = S( 56, 15);
|
||||
constexpr Score WeakQueen = S( 57, 19);
|
||||
|
||||
|
||||
#undef S
|
||||
|
@ -988,7 +989,9 @@ namespace {
|
|||
|
||||
// Early exit if score is high
|
||||
auto lazy_skip = [&](Value lazyThreshold) {
|
||||
return abs(mg_value(score) + eg_value(score)) / 2 > lazyThreshold + pos.non_pawn_material() / 64;
|
||||
return abs(mg_value(score) + eg_value(score)) > lazyThreshold
|
||||
+ std::abs(pos.this_thread()->bestValue) * 5 / 4
|
||||
+ pos.non_pawn_material() / 32;
|
||||
};
|
||||
|
||||
if (lazy_skip(LazyThreshold1))
|
||||
|
@ -1053,26 +1056,22 @@ make_v:
|
|||
|
||||
if ( pos.piece_on(SQ_A1) == W_BISHOP
|
||||
&& pos.piece_on(SQ_B2) == W_PAWN)
|
||||
correction += !pos.empty(SQ_B3) ? -CorneredBishop * 4
|
||||
: -CorneredBishop * 3;
|
||||
correction -= CorneredBishop;
|
||||
|
||||
if ( pos.piece_on(SQ_H1) == W_BISHOP
|
||||
&& pos.piece_on(SQ_G2) == W_PAWN)
|
||||
correction += !pos.empty(SQ_G3) ? -CorneredBishop * 4
|
||||
: -CorneredBishop * 3;
|
||||
correction -= CorneredBishop;
|
||||
|
||||
if ( pos.piece_on(SQ_A8) == B_BISHOP
|
||||
&& pos.piece_on(SQ_B7) == B_PAWN)
|
||||
correction += !pos.empty(SQ_B6) ? CorneredBishop * 4
|
||||
: CorneredBishop * 3;
|
||||
correction += CorneredBishop;
|
||||
|
||||
if ( pos.piece_on(SQ_H8) == B_BISHOP
|
||||
&& pos.piece_on(SQ_G7) == B_PAWN)
|
||||
correction += !pos.empty(SQ_G6) ? CorneredBishop * 4
|
||||
: CorneredBishop * 3;
|
||||
correction += CorneredBishop;
|
||||
|
||||
return pos.side_to_move() == WHITE ? Value(correction)
|
||||
: -Value(correction);
|
||||
return pos.side_to_move() == WHITE ? Value(3 * correction)
|
||||
: -Value(3 * correction);
|
||||
}
|
||||
|
||||
} // namespace Eval
|
||||
|
@ -1084,38 +1083,37 @@ make_v:
|
|||
Value Eval::evaluate(const Position& pos) {
|
||||
|
||||
Value v;
|
||||
bool useClassical = false;
|
||||
|
||||
if (!Eval::useNNUE)
|
||||
v = Evaluation<NO_TRACE>(pos).value();
|
||||
else
|
||||
// Deciding between classical and NNUE eval (~10 Elo): for high PSQ imbalance we use classical,
|
||||
// but we switch to NNUE during long shuffling or with high material on the board.
|
||||
if ( !useNNUE
|
||||
|| ((pos.this_thread()->depth > 9 || pos.count<ALL_PIECES>() > 7) &&
|
||||
abs(eg_value(pos.psq_score())) * 5 > (856 + pos.non_pawn_material() / 64) * (10 + pos.rule50_count())))
|
||||
{
|
||||
// Scale and shift NNUE for compatibility with search and classical evaluation
|
||||
auto adjusted_NNUE = [&]()
|
||||
{
|
||||
int scale = 903
|
||||
+ 32 * pos.count<PAWN>()
|
||||
+ 32 * pos.non_pawn_material() / 1024;
|
||||
v = Evaluation<NO_TRACE>(pos).value(); // classical
|
||||
useClassical = abs(v) >= 297;
|
||||
}
|
||||
|
||||
Value nnue = NNUE::evaluate(pos, true) * scale / 1024;
|
||||
// If result of a classical evaluation is much lower than threshold fall back to NNUE
|
||||
if (useNNUE && !useClassical)
|
||||
{
|
||||
Value nnue = NNUE::evaluate(pos, true); // NNUE
|
||||
int scale = 1036 + 22 * pos.non_pawn_material() / 1024;
|
||||
Color stm = pos.side_to_move();
|
||||
Value optimism = pos.this_thread()->optimism[stm];
|
||||
Value psq = (stm == WHITE ? 1 : -1) * eg_value(pos.psq_score());
|
||||
int complexity = 35 * abs(nnue - psq) / 256;
|
||||
|
||||
if (pos.is_chess960())
|
||||
nnue += fix_FRC(pos);
|
||||
optimism = optimism * (44 + complexity) / 31;
|
||||
v = (nnue + optimism) * scale / 1024 - optimism;
|
||||
|
||||
return nnue;
|
||||
};
|
||||
|
||||
// If there is PSQ imbalance we use the classical eval, but we switch to
|
||||
// NNUE eval faster when shuffling or if the material on the board is high.
|
||||
int r50 = pos.rule50_count();
|
||||
Value psq = Value(abs(eg_value(pos.psq_score())));
|
||||
bool classical = psq * 5 > (750 + pos.non_pawn_material() / 64) * (5 + r50);
|
||||
|
||||
v = classical ? Evaluation<NO_TRACE>(pos).value() // classical
|
||||
: adjusted_NNUE(); // NNUE
|
||||
if (pos.is_chess960())
|
||||
v += fix_FRC(pos);
|
||||
}
|
||||
|
||||
// Damp down the evaluation linearly when shuffling
|
||||
v = v * (100 - pos.rule50_count()) / 100;
|
||||
v = v * (195 - pos.rule50_count()) / 211;
|
||||
|
||||
// Guarantee evaluation does not hit the tablebase range
|
||||
v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1);
|
||||
|
@ -1140,7 +1138,12 @@ std::string Eval::trace(Position& pos) {
|
|||
|
||||
std::memset(scores, 0, sizeof(scores));
|
||||
|
||||
pos.this_thread()->trend = SCORE_ZERO; // Reset any dynamic contempt
|
||||
// Reset any global variable used in eval
|
||||
pos.this_thread()->depth = 0;
|
||||
pos.this_thread()->trend = SCORE_ZERO;
|
||||
pos.this_thread()->bestValue = VALUE_ZERO;
|
||||
pos.this_thread()->optimism[WHITE] = VALUE_ZERO;
|
||||
pos.this_thread()->optimism[BLACK] = VALUE_ZERO;
|
||||
|
||||
v = Evaluation<TRACE>(pos).value();
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -34,12 +34,12 @@ namespace Eval {
|
|||
Value evaluate(const Position& pos);
|
||||
|
||||
extern bool useNNUE;
|
||||
extern std::string eval_file_loaded;
|
||||
extern std::string currentEvalFileName;
|
||||
|
||||
// The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue
|
||||
// for the build process (profile-build and fishtest) to work. Do not change the
|
||||
// name of the macro, as it is used in the Makefile.
|
||||
#define EvalFileDefaultName "nn-3475407dc199.nnue"
|
||||
#define EvalFileDefaultName "nn-6877cd24400e.nnue"
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -36,6 +36,8 @@ typedef bool(*fun1_t)(LOGICAL_PROCESSOR_RELATIONSHIP,
|
|||
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD);
|
||||
typedef bool(*fun2_t)(USHORT, PGROUP_AFFINITY);
|
||||
typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY);
|
||||
typedef bool(*fun4_t)(USHORT, PGROUP_AFFINITY, USHORT, PUSHORT);
|
||||
typedef WORD(*fun5_t)();
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -67,7 +69,7 @@ namespace {
|
|||
|
||||
/// Version number. If Version is left empty, then compile date in the format
|
||||
/// DD-MM-YY and show in engine_info.
|
||||
const string Version = "14";
|
||||
const string Version = "15";
|
||||
|
||||
/// Our fancy logging facility. The trick here is to replace cin.rdbuf() and
|
||||
/// cout.rdbuf() with two Tie objects that tie cin and cout to a file stream. We
|
||||
|
@ -110,7 +112,14 @@ public:
|
|||
|
||||
static Logger l;
|
||||
|
||||
if (!fname.empty() && !l.file.is_open())
|
||||
if (l.file.is_open())
|
||||
{
|
||||
cout.rdbuf(l.out.buf);
|
||||
cin.rdbuf(l.in.buf);
|
||||
l.file.close();
|
||||
}
|
||||
|
||||
if (!fname.empty())
|
||||
{
|
||||
l.file.open(fname, ifstream::out);
|
||||
|
||||
|
@ -123,12 +132,6 @@ public:
|
|||
cin.rdbuf(&l.in);
|
||||
cout.rdbuf(&l.out);
|
||||
}
|
||||
else if (fname.empty() && l.file.is_open())
|
||||
{
|
||||
cout.rdbuf(l.out.buf);
|
||||
cin.rdbuf(l.in.buf);
|
||||
l.file.close();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -378,6 +381,7 @@ void std_aligned_free(void* ptr) {
|
|||
static void* aligned_large_pages_alloc_windows(size_t allocSize) {
|
||||
|
||||
#if !defined(_WIN64)
|
||||
(void)allocSize; // suppress unused-parameter compiler warning
|
||||
return nullptr;
|
||||
#else
|
||||
|
||||
|
@ -493,11 +497,11 @@ void bindThisThread(size_t) {}
|
|||
|
||||
#else
|
||||
|
||||
/// best_group() retrieves logical processor information using Windows specific
|
||||
/// API and returns the best group id for the thread with index idx. Original
|
||||
/// best_node() retrieves logical processor information using Windows specific
|
||||
/// API and returns the best node id for the thread with index idx. Original
|
||||
/// code from Texel by Peter Österlund.
|
||||
|
||||
int best_group(size_t idx) {
|
||||
int best_node(size_t idx) {
|
||||
|
||||
int threads = 0;
|
||||
int nodes = 0;
|
||||
|
@ -511,7 +515,8 @@ int best_group(size_t idx) {
|
|||
if (!fun1)
|
||||
return -1;
|
||||
|
||||
// First call to get returnLength. We expect it to fail due to null buffer
|
||||
// First call to GetLogicalProcessorInformationEx() to get returnLength.
|
||||
// We expect the call to fail due to null buffer.
|
||||
if (fun1(RelationAll, nullptr, &returnLength))
|
||||
return -1;
|
||||
|
||||
|
@ -519,7 +524,7 @@ int best_group(size_t idx) {
|
|||
SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *buffer, *ptr;
|
||||
ptr = buffer = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)malloc(returnLength);
|
||||
|
||||
// Second call, now we expect to succeed
|
||||
// Second call to GetLogicalProcessorInformationEx(), now we expect to succeed
|
||||
if (!fun1(RelationAll, buffer, &returnLength))
|
||||
{
|
||||
free(buffer);
|
||||
|
@ -569,22 +574,38 @@ int best_group(size_t idx) {
|
|||
void bindThisThread(size_t idx) {
|
||||
|
||||
// Use only local variables to be thread-safe
|
||||
int group = best_group(idx);
|
||||
int node = best_node(idx);
|
||||
|
||||
if (group == -1)
|
||||
if (node == -1)
|
||||
return;
|
||||
|
||||
// Early exit if the needed API are not available at runtime
|
||||
HMODULE k32 = GetModuleHandle("Kernel32.dll");
|
||||
auto fun2 = (fun2_t)(void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMaskEx");
|
||||
auto fun3 = (fun3_t)(void(*)())GetProcAddress(k32, "SetThreadGroupAffinity");
|
||||
auto fun4 = (fun4_t)(void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMask2");
|
||||
auto fun5 = (fun5_t)(void(*)())GetProcAddress(k32, "GetMaximumProcessorGroupCount");
|
||||
|
||||
if (!fun2 || !fun3)
|
||||
return;
|
||||
|
||||
GROUP_AFFINITY affinity;
|
||||
if (fun2(group, &affinity))
|
||||
fun3(GetCurrentThread(), &affinity, nullptr);
|
||||
if (!fun4 || !fun5)
|
||||
{
|
||||
GROUP_AFFINITY affinity;
|
||||
if (fun2(node, &affinity)) // GetNumaNodeProcessorMaskEx
|
||||
fun3(GetCurrentThread(), &affinity, nullptr); // SetThreadGroupAffinity
|
||||
}
|
||||
else
|
||||
{
|
||||
// If a numa node has more than one processor group, we assume they are
|
||||
// sized equal and we spread threads evenly across the groups.
|
||||
USHORT elements, returnedElements;
|
||||
elements = fun5(); // GetMaximumProcessorGroupCount
|
||||
GROUP_AFFINITY *affinity = (GROUP_AFFINITY*)malloc(elements * sizeof(GROUP_AFFINITY));
|
||||
if (fun4(node, affinity, elements, &returnedElements)) // GetNumaNodeProcessorMask2
|
||||
fun3(GetCurrentThread(), &affinity[idx % returnedElements], nullptr); // SetThreadGroupAffinity
|
||||
free(affinity);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -85,19 +85,30 @@ static inline const union { uint32_t i; char c[4]; } Le = { 0x01020304 };
|
|||
static inline const bool IsLittleEndian = (Le.c[0] == 4);
|
||||
|
||||
|
||||
template <typename T>
|
||||
class ValueListInserter {
|
||||
public:
|
||||
ValueListInserter(T* v, std::size_t& s) :
|
||||
values(v),
|
||||
size(&s)
|
||||
{
|
||||
}
|
||||
// RunningAverage : a class to calculate a running average of a series of values.
|
||||
// For efficiency, all computations are done with integers.
|
||||
class RunningAverage {
|
||||
public:
|
||||
|
||||
void push_back(const T& value) { values[(*size)++] = value; }
|
||||
private:
|
||||
T* values;
|
||||
std::size_t* size;
|
||||
// Reset the running average to rational value p / q
|
||||
void set(int64_t p, int64_t q)
|
||||
{ average = p * PERIOD * RESOLUTION / q; }
|
||||
|
||||
// Update average with value v
|
||||
void update(int64_t v)
|
||||
{ average = RESOLUTION * v + (PERIOD - 1) * average / PERIOD; }
|
||||
|
||||
// Test if average is strictly greater than rational a / b
|
||||
bool is_greater(int64_t a, int64_t b) const
|
||||
{ return b * average > a * (PERIOD * RESOLUTION); }
|
||||
|
||||
int64_t value() const
|
||||
{ return average / (PERIOD * RESOLUTION); }
|
||||
|
||||
private :
|
||||
static constexpr int64_t PERIOD = 4096;
|
||||
static constexpr int64_t RESOLUTION = 1024;
|
||||
int64_t average;
|
||||
};
|
||||
|
||||
template <typename T, std::size_t MaxSize>
|
||||
|
@ -113,7 +124,6 @@ public:
|
|||
const T& operator[](std::size_t index) const { return values_[index]; }
|
||||
const T* begin() const { return values_; }
|
||||
const T* end() const { return values_ + size_; }
|
||||
operator ValueListInserter<T>() { return ValueListInserter(values_, size_); }
|
||||
|
||||
void swap(ValueList& other) {
|
||||
const std::size_t maxSize = std::max(size_, other.size_);
|
||||
|
@ -128,6 +138,34 @@ private:
|
|||
std::size_t size_ = 0;
|
||||
};
|
||||
|
||||
|
||||
/// sigmoid(t, x0, y0, C, P, Q) implements a sigmoid-like function using only integers,
|
||||
/// with the following properties:
|
||||
///
|
||||
/// - sigmoid is centered in (x0, y0)
|
||||
/// - sigmoid has amplitude [-P/Q , P/Q] instead of [-1 , +1]
|
||||
/// - limit is (y0 - P/Q) when t tends to -infinity
|
||||
/// - limit is (y0 + P/Q) when t tends to +infinity
|
||||
/// - the slope can be adjusted using C > 0, smaller C giving a steeper sigmoid
|
||||
/// - the slope of the sigmoid when t = x0 is P/(Q*C)
|
||||
/// - sigmoid is increasing with t when P > 0 and Q > 0
|
||||
/// - to get a decreasing sigmoid, change sign of P
|
||||
/// - mean value of the sigmoid is y0
|
||||
///
|
||||
/// Use <https://www.desmos.com/calculator/jhh83sqq92> to draw the sigmoid
|
||||
|
||||
inline int64_t sigmoid(int64_t t, int64_t x0,
|
||||
int64_t y0,
|
||||
int64_t C,
|
||||
int64_t P,
|
||||
int64_t Q)
|
||||
{
|
||||
assert(C > 0);
|
||||
assert(Q != 0);
|
||||
return y0 + P * (t-x0) / (Q * (std::abs(t-x0) + C)) ;
|
||||
}
|
||||
|
||||
|
||||
/// xorshift64star Pseudo-Random Number Generator
|
||||
/// This class is based on original code written and dedicated
|
||||
/// to the public domain by Sebastiano Vigna (2014).
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -52,9 +52,9 @@ namespace {
|
|||
constexpr Direction UpRight = (Us == WHITE ? NORTH_EAST : SOUTH_WEST);
|
||||
constexpr Direction UpLeft = (Us == WHITE ? NORTH_WEST : SOUTH_EAST);
|
||||
|
||||
const Bitboard emptySquares = Type == QUIETS || Type == QUIET_CHECKS ? target : ~pos.pieces();
|
||||
const Bitboard enemies = Type == EVASIONS ? pos.checkers()
|
||||
: Type == CAPTURES ? target : pos.pieces(Them);
|
||||
const Bitboard emptySquares = ~pos.pieces();
|
||||
const Bitboard enemies = Type == EVASIONS ? pos.checkers()
|
||||
: pos.pieces(Them);
|
||||
|
||||
Bitboard pawnsOn7 = pos.pieces(Us, PAWN) & TRank7BB;
|
||||
Bitboard pawnsNotOn7 = pos.pieces(Us, PAWN) & ~TRank7BB;
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -18,6 +18,7 @@
|
|||
|
||||
#include <cassert>
|
||||
|
||||
#include "bitboard.h"
|
||||
#include "movepick.h"
|
||||
|
||||
namespace Stockfish {
|
||||
|
@ -56,11 +57,14 @@ namespace {
|
|||
/// ordering is at the current node.
|
||||
|
||||
/// MovePicker constructor for the main search
|
||||
MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHistory* mh, const LowPlyHistory* lp,
|
||||
const CapturePieceToHistory* cph, const PieceToHistory** ch, Move cm, const Move* killers, int pl)
|
||||
: pos(p), mainHistory(mh), lowPlyHistory(lp), captureHistory(cph), continuationHistory(ch),
|
||||
ttMove(ttm), refutations{{killers[0], 0}, {killers[1], 0}, {cm, 0}}, depth(d), ply(pl) {
|
||||
|
||||
MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHistory* mh,
|
||||
const CapturePieceToHistory* cph,
|
||||
const PieceToHistory** ch,
|
||||
Move cm,
|
||||
const Move* killers)
|
||||
: pos(p), mainHistory(mh), captureHistory(cph), continuationHistory(ch),
|
||||
ttMove(ttm), refutations{{killers[0], 0}, {killers[1], 0}, {cm, 0}}, depth(d)
|
||||
{
|
||||
assert(d > 0);
|
||||
|
||||
stage = (pos.checkers() ? EVASION_TT : MAIN_TT) +
|
||||
|
@ -69,9 +73,11 @@ MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHist
|
|||
|
||||
/// MovePicker constructor for quiescence search
|
||||
MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHistory* mh,
|
||||
const CapturePieceToHistory* cph, const PieceToHistory** ch, Square rs)
|
||||
: pos(p), mainHistory(mh), captureHistory(cph), continuationHistory(ch), ttMove(ttm), recaptureSquare(rs), depth(d) {
|
||||
|
||||
const CapturePieceToHistory* cph,
|
||||
const PieceToHistory** ch,
|
||||
Square rs)
|
||||
: pos(p), mainHistory(mh), captureHistory(cph), continuationHistory(ch), ttMove(ttm), recaptureSquare(rs), depth(d)
|
||||
{
|
||||
assert(d <= 0);
|
||||
|
||||
stage = (pos.checkers() ? EVASION_TT : QSEARCH_TT) +
|
||||
|
@ -82,9 +88,9 @@ MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHist
|
|||
|
||||
/// MovePicker constructor for ProbCut: we generate captures with SEE greater
|
||||
/// than or equal to the given threshold.
|
||||
MovePicker::MovePicker(const Position& p, Move ttm, Value th, const CapturePieceToHistory* cph)
|
||||
: pos(p), captureHistory(cph), ttMove(ttm), threshold(th) {
|
||||
|
||||
MovePicker::MovePicker(const Position& p, Move ttm, Value th, Depth d, const CapturePieceToHistory* cph)
|
||||
: pos(p), captureHistory(cph), ttMove(ttm), threshold(th), depth(d)
|
||||
{
|
||||
assert(!pos.checkers());
|
||||
|
||||
stage = PROBCUT_TT + !(ttm && pos.capture(ttm)
|
||||
|
@ -100,10 +106,35 @@ void MovePicker::score() {
|
|||
|
||||
static_assert(Type == CAPTURES || Type == QUIETS || Type == EVASIONS, "Wrong type");
|
||||
|
||||
Bitboard threatened, threatenedByPawn, threatenedByMinor, threatenedByRook;
|
||||
if constexpr (Type == QUIETS)
|
||||
{
|
||||
Color us = pos.side_to_move();
|
||||
// squares threatened by pawns
|
||||
threatenedByPawn = pos.attacks_by<PAWN>(~us);
|
||||
// squares threatened by minors or pawns
|
||||
threatenedByMinor = pos.attacks_by<KNIGHT>(~us) | pos.attacks_by<BISHOP>(~us) | threatenedByPawn;
|
||||
// squares threatened by rooks, minors or pawns
|
||||
threatenedByRook = pos.attacks_by<ROOK>(~us) | threatenedByMinor;
|
||||
|
||||
// pieces threatened by pieces of lesser material value
|
||||
threatened = (pos.pieces(us, QUEEN) & threatenedByRook)
|
||||
| (pos.pieces(us, ROOK) & threatenedByMinor)
|
||||
| (pos.pieces(us, KNIGHT, BISHOP) & threatenedByPawn);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Silence unused variable warnings
|
||||
(void) threatened;
|
||||
(void) threatenedByPawn;
|
||||
(void) threatenedByMinor;
|
||||
(void) threatenedByRook;
|
||||
}
|
||||
|
||||
for (auto& m : *this)
|
||||
if constexpr (Type == CAPTURES)
|
||||
m.value = int(PieceValue[MG][pos.piece_on(to_sq(m))]) * 6
|
||||
+ (*captureHistory)[pos.moved_piece(m)][to_sq(m)][type_of(pos.piece_on(to_sq(m)))];
|
||||
m.value = 6 * int(PieceValue[MG][pos.piece_on(to_sq(m))])
|
||||
+ (*captureHistory)[pos.moved_piece(m)][to_sq(m)][type_of(pos.piece_on(to_sq(m)))];
|
||||
|
||||
else if constexpr (Type == QUIETS)
|
||||
m.value = (*mainHistory)[pos.side_to_move()][from_to(m)]
|
||||
|
@ -111,7 +142,12 @@ void MovePicker::score() {
|
|||
+ (*continuationHistory[1])[pos.moved_piece(m)][to_sq(m)]
|
||||
+ (*continuationHistory[3])[pos.moved_piece(m)][to_sq(m)]
|
||||
+ (*continuationHistory[5])[pos.moved_piece(m)][to_sq(m)]
|
||||
+ (ply < MAX_LPH ? std::min(4, depth / 3) * (*lowPlyHistory)[ply][from_to(m)] : 0);
|
||||
+ (threatened & from_sq(m) ?
|
||||
(type_of(pos.moved_piece(m)) == QUEEN && !(to_sq(m) & threatenedByRook) ? 50000
|
||||
: type_of(pos.moved_piece(m)) == ROOK && !(to_sq(m) & threatenedByMinor) ? 25000
|
||||
: !(to_sq(m) & threatenedByPawn) ? 15000
|
||||
: 0)
|
||||
: 0);
|
||||
|
||||
else // Type == EVASIONS
|
||||
{
|
||||
|
@ -165,11 +201,12 @@ top:
|
|||
endMoves = generate<CAPTURES>(pos, cur);
|
||||
|
||||
score<CAPTURES>();
|
||||
partial_insertion_sort(cur, endMoves, -3000 * depth);
|
||||
++stage;
|
||||
goto top;
|
||||
|
||||
case GOOD_CAPTURE:
|
||||
if (select<Best>([&](){
|
||||
if (select<Next>([&](){
|
||||
return pos.see_ge(*cur, Value(-69 * cur->value / 1024)) ?
|
||||
// Move losing capture to endBadCaptures to be tried later
|
||||
true : (*endBadCaptures++ = *cur, false); }))
|
||||
|
@ -237,10 +274,10 @@ top:
|
|||
return select<Best>([](){ return true; });
|
||||
|
||||
case PROBCUT:
|
||||
return select<Best>([&](){ return pos.see_ge(*cur, threshold); });
|
||||
return select<Next>([&](){ return pos.see_ge(*cur, threshold); });
|
||||
|
||||
case QCAPTURE:
|
||||
if (select<Best>([&](){ return depth > DEPTH_QS_RECAPTURES
|
||||
if (select<Next>([&](){ return depth > DEPTH_QS_RECAPTURES
|
||||
|| to_sq(*cur) == recaptureSquare; }))
|
||||
return *(cur - 1);
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -86,13 +86,7 @@ enum StatsType { NoCaptures, Captures };
|
|||
/// unsuccessful during the current search, and is used for reduction and move
|
||||
/// ordering decisions. It uses 2 tables (one for each color) indexed by
|
||||
/// the move's from and to squares, see www.chessprogramming.org/Butterfly_Boards
|
||||
typedef Stats<int16_t, 13365, COLOR_NB, int(SQUARE_NB) * int(SQUARE_NB)> ButterflyHistory;
|
||||
|
||||
/// At higher depths LowPlyHistory records successful quiet moves near the root
|
||||
/// and quiet moves which are/were in the PV (ttPv). It is cleared with each new
|
||||
/// search and filled during iterative deepening.
|
||||
constexpr int MAX_LPH = 4;
|
||||
typedef Stats<int16_t, 10692, MAX_LPH, int(SQUARE_NB) * int(SQUARE_NB)> LowPlyHistory;
|
||||
typedef Stats<int16_t, 14365, COLOR_NB, int(SQUARE_NB) * int(SQUARE_NB)> ButterflyHistory;
|
||||
|
||||
/// CounterMoveHistory stores counter moves indexed by [piece][to] of the previous
|
||||
/// move, see www.chessprogramming.org/Countermove_Heuristic
|
||||
|
@ -123,18 +117,16 @@ class MovePicker {
|
|||
public:
|
||||
MovePicker(const MovePicker&) = delete;
|
||||
MovePicker& operator=(const MovePicker&) = delete;
|
||||
MovePicker(const Position&, Move, Value, const CapturePieceToHistory*);
|
||||
MovePicker(const Position&, Move, Depth, const ButterflyHistory*,
|
||||
const CapturePieceToHistory*,
|
||||
const PieceToHistory**,
|
||||
Move,
|
||||
const Move*);
|
||||
MovePicker(const Position&, Move, Depth, const ButterflyHistory*,
|
||||
const CapturePieceToHistory*,
|
||||
const PieceToHistory**,
|
||||
Square);
|
||||
MovePicker(const Position&, Move, Depth, const ButterflyHistory*,
|
||||
const LowPlyHistory*,
|
||||
const CapturePieceToHistory*,
|
||||
const PieceToHistory**,
|
||||
Move,
|
||||
const Move*,
|
||||
int);
|
||||
MovePicker(const Position&, Move, Value, Depth, const CapturePieceToHistory*);
|
||||
Move next_move(bool skipQuiets = false);
|
||||
|
||||
private:
|
||||
|
@ -145,7 +137,6 @@ private:
|
|||
|
||||
const Position& pos;
|
||||
const ButterflyHistory* mainHistory;
|
||||
const LowPlyHistory* lowPlyHistory;
|
||||
const CapturePieceToHistory* captureHistory;
|
||||
const PieceToHistory** continuationHistory;
|
||||
Move ttMove;
|
||||
|
@ -154,7 +145,6 @@ private:
|
|||
Square recaptureSquare;
|
||||
Value threshold;
|
||||
Depth depth;
|
||||
int ply;
|
||||
ExtMove moves[MAX_MOVES];
|
||||
};
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -109,7 +109,7 @@ namespace Stockfish::Eval::NNUE {
|
|||
{
|
||||
write_little_endian<std::uint32_t>(stream, Version);
|
||||
write_little_endian<std::uint32_t>(stream, hashValue);
|
||||
write_little_endian<std::uint32_t>(stream, desc.size());
|
||||
write_little_endian<std::uint32_t>(stream, (std::uint32_t)desc.size());
|
||||
stream.write(&desc[0], desc.size());
|
||||
return !stream.fail();
|
||||
}
|
||||
|
@ -143,39 +143,29 @@ namespace Stockfish::Eval::NNUE {
|
|||
// overaligning stack variables with alignas() doesn't work correctly.
|
||||
|
||||
constexpr uint64_t alignment = CacheLineSize;
|
||||
int delta = 10 - pos.non_pawn_material() / 1515;
|
||||
|
||||
#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
|
||||
TransformedFeatureType transformedFeaturesUnaligned[
|
||||
FeatureTransformer::BufferSize + alignment / sizeof(TransformedFeatureType)];
|
||||
char bufferUnaligned[Network::BufferSize + alignment];
|
||||
|
||||
auto* transformedFeatures = align_ptr_up<alignment>(&transformedFeaturesUnaligned[0]);
|
||||
auto* buffer = align_ptr_up<alignment>(&bufferUnaligned[0]);
|
||||
#else
|
||||
alignas(alignment)
|
||||
TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize];
|
||||
alignas(alignment) char buffer[Network::BufferSize];
|
||||
#endif
|
||||
|
||||
ASSERT_ALIGNED(transformedFeatures, alignment);
|
||||
ASSERT_ALIGNED(buffer, alignment);
|
||||
|
||||
const std::size_t bucket = (pos.count<ALL_PIECES>() - 1) / 4;
|
||||
const int bucket = (pos.count<ALL_PIECES>() - 1) / 4;
|
||||
const auto psqt = featureTransformer->transform(pos, transformedFeatures, bucket);
|
||||
const auto output = network[bucket]->propagate(transformedFeatures, buffer);
|
||||
const auto positional = network[bucket]->propagate(transformedFeatures);
|
||||
|
||||
int materialist = psqt;
|
||||
int positional = output[0];
|
||||
|
||||
int delta_npm = abs(pos.non_pawn_material(WHITE) - pos.non_pawn_material(BLACK));
|
||||
int entertainment = (adjusted && delta_npm <= BishopValueMg - KnightValueMg ? 7 : 0);
|
||||
|
||||
int A = 128 - entertainment;
|
||||
int B = 128 + entertainment;
|
||||
|
||||
int sum = (A * materialist + B * positional) / 128;
|
||||
|
||||
return static_cast<Value>( sum / OutputScale );
|
||||
// Give more value to positional evaluation when adjusted flag is set
|
||||
if (adjusted)
|
||||
return static_cast<Value>(((128 - delta) * psqt + (128 + delta) * positional) / 128 / OutputScale);
|
||||
else
|
||||
return static_cast<Value>((psqt + positional) / OutputScale);
|
||||
}
|
||||
|
||||
struct NnueEvalTrace {
|
||||
|
@ -196,27 +186,20 @@ namespace Stockfish::Eval::NNUE {
|
|||
#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
|
||||
TransformedFeatureType transformedFeaturesUnaligned[
|
||||
FeatureTransformer::BufferSize + alignment / sizeof(TransformedFeatureType)];
|
||||
char bufferUnaligned[Network::BufferSize + alignment];
|
||||
|
||||
auto* transformedFeatures = align_ptr_up<alignment>(&transformedFeaturesUnaligned[0]);
|
||||
auto* buffer = align_ptr_up<alignment>(&bufferUnaligned[0]);
|
||||
#else
|
||||
alignas(alignment)
|
||||
TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize];
|
||||
alignas(alignment) char buffer[Network::BufferSize];
|
||||
#endif
|
||||
|
||||
ASSERT_ALIGNED(transformedFeatures, alignment);
|
||||
ASSERT_ALIGNED(buffer, alignment);
|
||||
|
||||
NnueEvalTrace t{};
|
||||
t.correctBucket = (pos.count<ALL_PIECES>() - 1) / 4;
|
||||
for (std::size_t bucket = 0; bucket < LayerStacks; ++bucket) {
|
||||
const auto psqt = featureTransformer->transform(pos, transformedFeatures, bucket);
|
||||
const auto output = network[bucket]->propagate(transformedFeatures, buffer);
|
||||
|
||||
int materialist = psqt;
|
||||
int positional = output[0];
|
||||
for (IndexType bucket = 0; bucket < LayerStacks; ++bucket) {
|
||||
const auto materialist = featureTransformer->transform(pos, transformedFeatures, bucket);
|
||||
const auto positional = network[bucket]->propagate(transformedFeatures);
|
||||
|
||||
t.psqt[bucket] = static_cast<Value>( materialist / OutputScale );
|
||||
t.positional[bucket] = static_cast<Value>( positional / OutputScale );
|
||||
|
@ -227,69 +210,46 @@ namespace Stockfish::Eval::NNUE {
|
|||
|
||||
static const std::string PieceToChar(" PNBRQK pnbrqk");
|
||||
|
||||
// Requires the buffer to have capacity for at least 5 values
|
||||
|
||||
// format_cp_compact() converts a Value into (centi)pawns and writes it in a buffer.
|
||||
// The buffer must have capacity for at least 5 chars.
|
||||
static void format_cp_compact(Value v, char* buffer) {
|
||||
|
||||
buffer[0] = (v < 0 ? '-' : v > 0 ? '+' : ' ');
|
||||
|
||||
int cp = std::abs(100 * v / PawnValueEg);
|
||||
|
||||
if (cp >= 10000)
|
||||
{
|
||||
buffer[1] = '0' + cp / 10000; cp %= 10000;
|
||||
buffer[2] = '0' + cp / 1000; cp %= 1000;
|
||||
buffer[3] = '0' + cp / 100; cp %= 100;
|
||||
buffer[4] = ' ';
|
||||
buffer[1] = '0' + cp / 10000; cp %= 10000;
|
||||
buffer[2] = '0' + cp / 1000; cp %= 1000;
|
||||
buffer[3] = '0' + cp / 100;
|
||||
buffer[4] = ' ';
|
||||
}
|
||||
else if (cp >= 1000)
|
||||
{
|
||||
buffer[1] = '0' + cp / 1000; cp %= 1000;
|
||||
buffer[2] = '0' + cp / 100; cp %= 100;
|
||||
buffer[3] = '.';
|
||||
buffer[4] = '0' + cp / 10;
|
||||
buffer[1] = '0' + cp / 1000; cp %= 1000;
|
||||
buffer[2] = '0' + cp / 100; cp %= 100;
|
||||
buffer[3] = '.';
|
||||
buffer[4] = '0' + cp / 10;
|
||||
}
|
||||
else
|
||||
{
|
||||
buffer[1] = '0' + cp / 100; cp %= 100;
|
||||
buffer[2] = '.';
|
||||
buffer[3] = '0' + cp / 10; cp %= 10;
|
||||
buffer[4] = '0' + cp / 1;
|
||||
buffer[1] = '0' + cp / 100; cp %= 100;
|
||||
buffer[2] = '.';
|
||||
buffer[3] = '0' + cp / 10; cp %= 10;
|
||||
buffer[4] = '0' + cp / 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Requires the buffer to have capacity for at least 7 values
|
||||
|
||||
// format_cp_aligned_dot() converts a Value into (centi)pawns and writes it in a buffer,
|
||||
// always keeping two decimals. The buffer must have capacity for at least 7 chars.
|
||||
static void format_cp_aligned_dot(Value v, char* buffer) {
|
||||
|
||||
buffer[0] = (v < 0 ? '-' : v > 0 ? '+' : ' ');
|
||||
|
||||
int cp = std::abs(100 * v / PawnValueEg);
|
||||
|
||||
if (cp >= 10000)
|
||||
{
|
||||
buffer[1] = '0' + cp / 10000; cp %= 10000;
|
||||
buffer[2] = '0' + cp / 1000; cp %= 1000;
|
||||
buffer[3] = '0' + cp / 100; cp %= 100;
|
||||
buffer[4] = '.';
|
||||
buffer[5] = '0' + cp / 10; cp %= 10;
|
||||
buffer[6] = '0' + cp;
|
||||
}
|
||||
else if (cp >= 1000)
|
||||
{
|
||||
buffer[1] = ' ';
|
||||
buffer[2] = '0' + cp / 1000; cp %= 1000;
|
||||
buffer[3] = '0' + cp / 100; cp %= 100;
|
||||
buffer[4] = '.';
|
||||
buffer[5] = '0' + cp / 10; cp %= 10;
|
||||
buffer[6] = '0' + cp;
|
||||
}
|
||||
else
|
||||
{
|
||||
buffer[1] = ' ';
|
||||
buffer[2] = ' ';
|
||||
buffer[3] = '0' + cp / 100; cp %= 100;
|
||||
buffer[4] = '.';
|
||||
buffer[5] = '0' + cp / 10; cp %= 10;
|
||||
buffer[6] = '0' + cp / 1;
|
||||
}
|
||||
double cp = 1.0 * std::abs(int(v)) / PawnValueEg;
|
||||
sprintf(&buffer[1], "%6.2f", cp);
|
||||
}
|
||||
|
||||
|
||||
|
@ -419,7 +379,7 @@ namespace Stockfish::Eval::NNUE {
|
|||
actualFilename = filename.value();
|
||||
else
|
||||
{
|
||||
if (eval_file_loaded != EvalFileDefaultName)
|
||||
if (currentEvalFileName != EvalFileDefaultName)
|
||||
{
|
||||
msg = "Failed to export a net. A non-embedded net can only be saved if the filename is specified";
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -16,31 +16,32 @@
|
|||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
//Definition of input features HalfKAv2 of NNUE evaluation function
|
||||
//Definition of input features HalfKAv2_hm of NNUE evaluation function
|
||||
|
||||
#include "half_ka_v2.h"
|
||||
#include "half_ka_v2_hm.h"
|
||||
|
||||
#include "../../position.h"
|
||||
|
||||
namespace Stockfish::Eval::NNUE::Features {
|
||||
|
||||
// Orient a square according to perspective (rotates by 180 for black)
|
||||
inline Square HalfKAv2::orient(Color perspective, Square s) {
|
||||
return Square(int(s) ^ (bool(perspective) * 56));
|
||||
inline Square HalfKAv2_hm::orient(Color perspective, Square s, Square ksq) {
|
||||
return Square(int(s) ^ (bool(perspective) * SQ_A8) ^ ((file_of(ksq) < FILE_E) * SQ_H1));
|
||||
}
|
||||
|
||||
// Index of a feature for a given king position and another piece on some square
|
||||
inline IndexType HalfKAv2::make_index(Color perspective, Square s, Piece pc, Square ksq) {
|
||||
return IndexType(orient(perspective, s) + PieceSquareIndex[perspective][pc] + PS_NB * ksq);
|
||||
inline IndexType HalfKAv2_hm::make_index(Color perspective, Square s, Piece pc, Square ksq) {
|
||||
Square o_ksq = orient(perspective, ksq, ksq);
|
||||
return IndexType(orient(perspective, s, ksq) + PieceSquareIndex[perspective][pc] + PS_NB * KingBuckets[o_ksq]);
|
||||
}
|
||||
|
||||
// Get a list of indices for active features
|
||||
void HalfKAv2::append_active_indices(
|
||||
void HalfKAv2_hm::append_active_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
ValueListInserter<IndexType> active
|
||||
IndexList& active
|
||||
) {
|
||||
Square ksq = orient(perspective, pos.square<KING>(perspective));
|
||||
Square ksq = pos.square<KING>(perspective);
|
||||
Bitboard bb = pos.pieces();
|
||||
while (bb)
|
||||
{
|
||||
|
@ -52,33 +53,30 @@ namespace Stockfish::Eval::NNUE::Features {
|
|||
|
||||
// append_changed_indices() : get a list of indices for recently changed features
|
||||
|
||||
void HalfKAv2::append_changed_indices(
|
||||
void HalfKAv2_hm::append_changed_indices(
|
||||
Square ksq,
|
||||
StateInfo* st,
|
||||
const DirtyPiece& dp,
|
||||
Color perspective,
|
||||
ValueListInserter<IndexType> removed,
|
||||
ValueListInserter<IndexType> added
|
||||
IndexList& removed,
|
||||
IndexList& added
|
||||
) {
|
||||
const auto& dp = st->dirtyPiece;
|
||||
Square oriented_ksq = orient(perspective, ksq);
|
||||
for (int i = 0; i < dp.dirty_num; ++i) {
|
||||
Piece pc = dp.piece[i];
|
||||
if (dp.from[i] != SQ_NONE)
|
||||
removed.push_back(make_index(perspective, dp.from[i], pc, oriented_ksq));
|
||||
removed.push_back(make_index(perspective, dp.from[i], dp.piece[i], ksq));
|
||||
if (dp.to[i] != SQ_NONE)
|
||||
added.push_back(make_index(perspective, dp.to[i], pc, oriented_ksq));
|
||||
added.push_back(make_index(perspective, dp.to[i], dp.piece[i], ksq));
|
||||
}
|
||||
}
|
||||
|
||||
int HalfKAv2::update_cost(StateInfo* st) {
|
||||
int HalfKAv2_hm::update_cost(const StateInfo* st) {
|
||||
return st->dirtyPiece.dirty_num;
|
||||
}
|
||||
|
||||
int HalfKAv2::refresh_cost(const Position& pos) {
|
||||
int HalfKAv2_hm::refresh_cost(const Position& pos) {
|
||||
return pos.count<ALL_PIECES>();
|
||||
}
|
||||
|
||||
bool HalfKAv2::requires_refresh(StateInfo* st, Color perspective) {
|
||||
bool HalfKAv2_hm::requires_refresh(const StateInfo* st, Color perspective) {
|
||||
return st->dirtyPiece.piece[0] == make_piece(perspective, KING);
|
||||
}
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -18,8 +18,8 @@
|
|||
|
||||
//Definition of input features HalfKP of NNUE evaluation function
|
||||
|
||||
#ifndef NNUE_FEATURES_HALF_KA_V2_H_INCLUDED
|
||||
#define NNUE_FEATURES_HALF_KA_V2_H_INCLUDED
|
||||
#ifndef NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED
|
||||
#define NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED
|
||||
|
||||
#include "../nnue_common.h"
|
||||
|
||||
|
@ -32,9 +32,9 @@ namespace Stockfish {
|
|||
|
||||
namespace Stockfish::Eval::NNUE::Features {
|
||||
|
||||
// Feature HalfKAv2: Combination of the position of own king
|
||||
// and the position of pieces
|
||||
class HalfKAv2 {
|
||||
// Feature HalfKAv2_hm: Combination of the position of own king
|
||||
// and the position of pieces. Position mirrored such that king always on e..h files.
|
||||
class HalfKAv2_hm {
|
||||
|
||||
// unique number for each piece type on each square
|
||||
enum {
|
||||
|
@ -50,7 +50,7 @@ namespace Stockfish::Eval::NNUE::Features {
|
|||
PS_W_QUEEN = 8 * SQUARE_NB,
|
||||
PS_B_QUEEN = 9 * SQUARE_NB,
|
||||
PS_KING = 10 * SQUARE_NB,
|
||||
PS_NB = 11 * SQUARE_NB
|
||||
PS_NB = 11 * SQUARE_NB
|
||||
};
|
||||
|
||||
static constexpr IndexType PieceSquareIndex[COLOR_NB][PIECE_NB] = {
|
||||
|
@ -63,49 +63,62 @@ namespace Stockfish::Eval::NNUE::Features {
|
|||
};
|
||||
|
||||
// Orient a square according to perspective (rotates by 180 for black)
|
||||
static Square orient(Color perspective, Square s);
|
||||
static Square orient(Color perspective, Square s, Square ksq);
|
||||
|
||||
// Index of a feature for a given king position and another piece on some square
|
||||
static IndexType make_index(Color perspective, Square s, Piece pc, Square ksq);
|
||||
|
||||
public:
|
||||
// Feature name
|
||||
static constexpr const char* Name = "HalfKAv2(Friend)";
|
||||
static constexpr const char* Name = "HalfKAv2_hm(Friend)";
|
||||
|
||||
// Hash value embedded in the evaluation file
|
||||
static constexpr std::uint32_t HashValue = 0x5f234cb8u;
|
||||
static constexpr std::uint32_t HashValue = 0x7f234cb8u;
|
||||
|
||||
// Number of feature dimensions
|
||||
static constexpr IndexType Dimensions =
|
||||
static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_NB);
|
||||
static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_NB) / 2;
|
||||
|
||||
static constexpr int KingBuckets[64] = {
|
||||
-1, -1, -1, -1, 31, 30, 29, 28,
|
||||
-1, -1, -1, -1, 27, 26, 25, 24,
|
||||
-1, -1, -1, -1, 23, 22, 21, 20,
|
||||
-1, -1, -1, -1, 19, 18, 17, 16,
|
||||
-1, -1, -1, -1, 15, 14, 13, 12,
|
||||
-1, -1, -1, -1, 11, 10, 9, 8,
|
||||
-1, -1, -1, -1, 7, 6, 5, 4,
|
||||
-1, -1, -1, -1, 3, 2, 1, 0
|
||||
};
|
||||
|
||||
// Maximum number of simultaneously active features.
|
||||
static constexpr IndexType MaxActiveDimensions = 32;
|
||||
using IndexList = ValueList<IndexType, MaxActiveDimensions>;
|
||||
|
||||
// Get a list of indices for active features
|
||||
static void append_active_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
ValueListInserter<IndexType> active);
|
||||
IndexList& active);
|
||||
|
||||
// Get a list of indices for recently changed features
|
||||
static void append_changed_indices(
|
||||
Square ksq,
|
||||
StateInfo* st,
|
||||
const DirtyPiece& dp,
|
||||
Color perspective,
|
||||
ValueListInserter<IndexType> removed,
|
||||
ValueListInserter<IndexType> added);
|
||||
IndexList& removed,
|
||||
IndexList& added
|
||||
);
|
||||
|
||||
// Returns the cost of updating one perspective, the most costly one.
|
||||
// Assumes no refresh needed.
|
||||
static int update_cost(StateInfo* st);
|
||||
static int update_cost(const StateInfo* st);
|
||||
static int refresh_cost(const Position& pos);
|
||||
|
||||
// Returns whether the change stored in this StateInfo means that
|
||||
// a full accumulator refresh is required.
|
||||
static bool requires_refresh(StateInfo* st, Color perspective);
|
||||
static bool requires_refresh(const StateInfo* st, Color perspective);
|
||||
};
|
||||
|
||||
} // namespace Stockfish::Eval::NNUE::Features
|
||||
|
||||
#endif // #ifndef NNUE_FEATURES_HALF_KA_V2_H_INCLUDED
|
||||
#endif // #ifndef NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -22,398 +22,338 @@
|
|||
#define NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
|
||||
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <type_traits>
|
||||
#include "../nnue_common.h"
|
||||
#include "../../simd.h"
|
||||
|
||||
/*
|
||||
This file contains the definition for a fully connected layer (aka affine transform).
|
||||
Two approaches are employed, depending on the sizes of the transform.
|
||||
|
||||
Approach 1:
|
||||
- used when the PaddedInputDimensions >= 128
|
||||
- uses AVX512 if possible
|
||||
- processes inputs in batches of 2*InputSimdWidth
|
||||
- so in batches of 128 for AVX512
|
||||
- the weight blocks of size InputSimdWidth are transposed such that
|
||||
access is sequential
|
||||
- N columns of the weight matrix are processed a time, where N
|
||||
depends on the architecture (the amount of registers)
|
||||
- accumulate + hadd is used
|
||||
|
||||
Approach 2:
|
||||
- used when the PaddedInputDimensions < 128
|
||||
- does not use AVX512
|
||||
- expected use-case is for when PaddedInputDimensions == 32 and InputDimensions <= 32.
|
||||
- that's why AVX512 is hard to implement
|
||||
- expected use-case is small layers
|
||||
- not optimized as well as the approach 1
|
||||
- inputs are processed in chunks of 4, weights are respectively transposed
|
||||
- accumulation happens directly to int32s
|
||||
*/
|
||||
|
||||
namespace Stockfish::Eval::NNUE::Layers {
|
||||
|
||||
// Affine transformation layer
|
||||
template <typename PreviousLayer, IndexType OutDims>
|
||||
class AffineTransform {
|
||||
public:
|
||||
// Input/output type
|
||||
using InputType = typename PreviousLayer::OutputType;
|
||||
using OutputType = std::int32_t;
|
||||
static_assert(std::is_same<InputType, std::uint8_t>::value, "");
|
||||
// Fallback implementation for older/other architectures.
|
||||
// Identical for both approaches. Requires the input to be padded to at least 16 values.
|
||||
#if !defined(USE_SSSE3)
|
||||
template <IndexType InputDimensions, IndexType PaddedInputDimensions, IndexType OutputDimensions>
|
||||
static void affine_transform_non_ssse3(std::int32_t* output, const std::int8_t* weights, const std::int32_t* biases, const std::uint8_t* input)
|
||||
{
|
||||
# if defined(USE_SSE2)
|
||||
// At least a multiple of 16, with SSE2.
|
||||
constexpr IndexType NumChunks = ceil_to_multiple<IndexType>(InputDimensions, 16) / 16;
|
||||
const __m128i Zeros = _mm_setzero_si128();
|
||||
const auto inputVector = reinterpret_cast<const __m128i*>(input);
|
||||
|
||||
// Number of input/output dimensions
|
||||
static constexpr IndexType InputDimensions =
|
||||
PreviousLayer::OutputDimensions;
|
||||
static constexpr IndexType OutputDimensions = OutDims;
|
||||
static constexpr IndexType PaddedInputDimensions =
|
||||
ceil_to_multiple<IndexType>(InputDimensions, MaxSimdWidth);
|
||||
#if defined (USE_AVX512)
|
||||
static constexpr const IndexType OutputSimdWidth = SimdWidth / 2;
|
||||
#elif defined (USE_SSSE3)
|
||||
static constexpr const IndexType OutputSimdWidth = SimdWidth / 4;
|
||||
# elif defined(USE_MMX)
|
||||
constexpr IndexType NumChunks = ceil_to_multiple<IndexType>(InputDimensions, 8) / 8;
|
||||
const __m64 Zeros = _mm_setzero_si64();
|
||||
const auto inputVector = reinterpret_cast<const __m64*>(input);
|
||||
|
||||
# elif defined(USE_NEON)
|
||||
constexpr IndexType NumChunks = ceil_to_multiple<IndexType>(InputDimensions, 16) / 16;
|
||||
const auto inputVector = reinterpret_cast<const int8x8_t*>(input);
|
||||
# endif
|
||||
|
||||
for (IndexType i = 0; i < OutputDimensions; ++i) {
|
||||
const IndexType offset = i * PaddedInputDimensions;
|
||||
|
||||
# if defined(USE_SSE2)
|
||||
__m128i sumLo = _mm_cvtsi32_si128(biases[i]);
|
||||
__m128i sumHi = Zeros;
|
||||
const auto row = reinterpret_cast<const __m128i*>(&weights[offset]);
|
||||
for (IndexType j = 0; j < NumChunks; ++j) {
|
||||
__m128i row_j = _mm_load_si128(&row[j]);
|
||||
__m128i input_j = _mm_load_si128(&inputVector[j]);
|
||||
__m128i extendedRowLo = _mm_srai_epi16(_mm_unpacklo_epi8(row_j, row_j), 8);
|
||||
__m128i extendedRowHi = _mm_srai_epi16(_mm_unpackhi_epi8(row_j, row_j), 8);
|
||||
__m128i extendedInputLo = _mm_unpacklo_epi8(input_j, Zeros);
|
||||
__m128i extendedInputHi = _mm_unpackhi_epi8(input_j, Zeros);
|
||||
__m128i productLo = _mm_madd_epi16(extendedRowLo, extendedInputLo);
|
||||
__m128i productHi = _mm_madd_epi16(extendedRowHi, extendedInputHi);
|
||||
sumLo = _mm_add_epi32(sumLo, productLo);
|
||||
sumHi = _mm_add_epi32(sumHi, productHi);
|
||||
}
|
||||
__m128i sum = _mm_add_epi32(sumLo, sumHi);
|
||||
__m128i sumHigh_64 = _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2));
|
||||
sum = _mm_add_epi32(sum, sumHigh_64);
|
||||
__m128i sum_second_32 = _mm_shufflelo_epi16(sum, _MM_SHUFFLE(1, 0, 3, 2));
|
||||
sum = _mm_add_epi32(sum, sum_second_32);
|
||||
output[i] = _mm_cvtsi128_si32(sum);
|
||||
|
||||
# elif defined(USE_MMX)
|
||||
__m64 sumLo = _mm_cvtsi32_si64(biases[i]);
|
||||
__m64 sumHi = Zeros;
|
||||
const auto row = reinterpret_cast<const __m64*>(&weights[offset]);
|
||||
for (IndexType j = 0; j < NumChunks; ++j) {
|
||||
__m64 row_j = row[j];
|
||||
__m64 input_j = inputVector[j];
|
||||
__m64 extendedRowLo = _mm_srai_pi16(_mm_unpacklo_pi8(row_j, row_j), 8);
|
||||
__m64 extendedRowHi = _mm_srai_pi16(_mm_unpackhi_pi8(row_j, row_j), 8);
|
||||
__m64 extendedInputLo = _mm_unpacklo_pi8(input_j, Zeros);
|
||||
__m64 extendedInputHi = _mm_unpackhi_pi8(input_j, Zeros);
|
||||
__m64 productLo = _mm_madd_pi16(extendedRowLo, extendedInputLo);
|
||||
__m64 productHi = _mm_madd_pi16(extendedRowHi, extendedInputHi);
|
||||
sumLo = _mm_add_pi32(sumLo, productLo);
|
||||
sumHi = _mm_add_pi32(sumHi, productHi);
|
||||
}
|
||||
__m64 sum = _mm_add_pi32(sumLo, sumHi);
|
||||
sum = _mm_add_pi32(sum, _mm_unpackhi_pi32(sum, sum));
|
||||
output[i] = _mm_cvtsi64_si32(sum);
|
||||
|
||||
# elif defined(USE_NEON)
|
||||
int32x4_t sum = {biases[i]};
|
||||
const auto row = reinterpret_cast<const int8x8_t*>(&weights[offset]);
|
||||
for (IndexType j = 0; j < NumChunks; ++j) {
|
||||
int16x8_t product = vmull_s8(inputVector[j * 2], row[j * 2]);
|
||||
product = vmlal_s8(product, inputVector[j * 2 + 1], row[j * 2 + 1]);
|
||||
sum = vpadalq_s16(sum, product);
|
||||
}
|
||||
output[i] = sum[0] + sum[1] + sum[2] + sum[3];
|
||||
|
||||
# else
|
||||
std::int32_t sum = biases[i];
|
||||
for (IndexType j = 0; j < InputDimensions; ++j) {
|
||||
sum += weights[offset + j] * input[j];
|
||||
}
|
||||
output[i] = sum;
|
||||
# endif
|
||||
}
|
||||
|
||||
# if defined(USE_MMX)
|
||||
_mm_empty();
|
||||
# endif
|
||||
}
|
||||
#endif
|
||||
|
||||
// Size of forward propagation buffer used in this layer
|
||||
static constexpr std::size_t SelfBufferSize =
|
||||
ceil_to_multiple(OutputDimensions * sizeof(OutputType), CacheLineSize);
|
||||
template <IndexType InDims, IndexType OutDims, typename Enabled = void>
|
||||
class AffineTransform;
|
||||
|
||||
// Size of the forward propagation buffer used from the input layer to this layer
|
||||
static constexpr std::size_t BufferSize =
|
||||
PreviousLayer::BufferSize + SelfBufferSize;
|
||||
// A specialization for large inputs.
|
||||
template <IndexType InDims, IndexType OutDims>
|
||||
class AffineTransform<InDims, OutDims, std::enable_if_t<(ceil_to_multiple<IndexType>(InDims, MaxSimdWidth) >= 2*64)>> {
|
||||
public:
|
||||
// Input/output type
|
||||
using InputType = std::uint8_t;
|
||||
using OutputType = std::int32_t;
|
||||
|
||||
// Number of input/output dimensions
|
||||
static constexpr IndexType InputDimensions = InDims;
|
||||
static constexpr IndexType OutputDimensions = OutDims;
|
||||
|
||||
static constexpr IndexType PaddedInputDimensions =
|
||||
ceil_to_multiple<IndexType>(InputDimensions, MaxSimdWidth);
|
||||
static constexpr IndexType PaddedOutputDimensions =
|
||||
ceil_to_multiple<IndexType>(OutputDimensions, MaxSimdWidth);
|
||||
|
||||
using OutputBuffer = OutputType[PaddedOutputDimensions];
|
||||
|
||||
static_assert(PaddedInputDimensions >= 128, "Something went wrong. This specialization should not have been chosen.");
|
||||
|
||||
#if defined (USE_AVX512)
|
||||
static constexpr const IndexType InputSimdWidth = 64;
|
||||
static constexpr const IndexType MaxNumOutputRegs = 16;
|
||||
#elif defined (USE_AVX2)
|
||||
static constexpr const IndexType InputSimdWidth = 32;
|
||||
static constexpr const IndexType MaxNumOutputRegs = 8;
|
||||
#elif defined (USE_SSSE3)
|
||||
static constexpr const IndexType InputSimdWidth = 16;
|
||||
static constexpr const IndexType MaxNumOutputRegs = 8;
|
||||
#elif defined (USE_NEON)
|
||||
static constexpr const IndexType InputSimdWidth = 8;
|
||||
static constexpr const IndexType MaxNumOutputRegs = 8;
|
||||
#else
|
||||
// The fallback implementation will not have permuted weights.
|
||||
// We define these to avoid a lot of ifdefs later.
|
||||
static constexpr const IndexType InputSimdWidth = 1;
|
||||
static constexpr const IndexType MaxNumOutputRegs = 1;
|
||||
#endif
|
||||
|
||||
// A big block is a region in the weight matrix of the size [PaddedInputDimensions, NumOutputRegs].
|
||||
// A small block is a region of size [InputSimdWidth, 1]
|
||||
|
||||
static constexpr const IndexType NumOutputRegs = std::min(MaxNumOutputRegs, OutputDimensions);
|
||||
static constexpr const IndexType SmallBlockSize = InputSimdWidth;
|
||||
static constexpr const IndexType BigBlockSize = NumOutputRegs * PaddedInputDimensions;
|
||||
static constexpr const IndexType NumSmallBlocksInBigBlock = BigBlockSize / SmallBlockSize;
|
||||
static constexpr const IndexType NumSmallBlocksPerOutput = PaddedInputDimensions / SmallBlockSize;
|
||||
static constexpr const IndexType NumBigBlocks = OutputDimensions / NumOutputRegs;
|
||||
|
||||
static_assert(OutputDimensions % NumOutputRegs == 0);
|
||||
|
||||
// Hash value embedded in the evaluation file
|
||||
static constexpr std::uint32_t get_hash_value() {
|
||||
static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) {
|
||||
std::uint32_t hashValue = 0xCC03DAE4u;
|
||||
hashValue += OutputDimensions;
|
||||
hashValue ^= PreviousLayer::get_hash_value() >> 1;
|
||||
hashValue ^= PreviousLayer::get_hash_value() << 31;
|
||||
hashValue ^= prevHash >> 1;
|
||||
hashValue ^= prevHash << 31;
|
||||
return hashValue;
|
||||
}
|
||||
|
||||
/*
|
||||
Transposes the small blocks within a block.
|
||||
Effectively means that weights can be traversed sequentially during inference.
|
||||
*/
|
||||
static IndexType get_weight_index(IndexType i)
|
||||
{
|
||||
const IndexType smallBlock = (i / SmallBlockSize) % NumSmallBlocksInBigBlock;
|
||||
const IndexType smallBlockCol = smallBlock / NumSmallBlocksPerOutput;
|
||||
const IndexType smallBlockRow = smallBlock % NumSmallBlocksPerOutput;
|
||||
const IndexType bigBlock = i / BigBlockSize;
|
||||
const IndexType rest = i % SmallBlockSize;
|
||||
|
||||
const IndexType idx =
|
||||
bigBlock * BigBlockSize
|
||||
+ smallBlockRow * SmallBlockSize * NumOutputRegs
|
||||
+ smallBlockCol * SmallBlockSize
|
||||
+ rest;
|
||||
|
||||
return idx;
|
||||
}
|
||||
|
||||
// Read network parameters
|
||||
bool read_parameters(std::istream& stream) {
|
||||
if (!previousLayer.read_parameters(stream)) return false;
|
||||
for (std::size_t i = 0; i < OutputDimensions; ++i)
|
||||
for (IndexType i = 0; i < OutputDimensions; ++i)
|
||||
biases[i] = read_little_endian<BiasType>(stream);
|
||||
for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
|
||||
#if !defined (USE_SSSE3)
|
||||
weights[i] = read_little_endian<WeightType>(stream);
|
||||
#else
|
||||
weights[
|
||||
(i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 +
|
||||
i / PaddedInputDimensions * 4 +
|
||||
i % 4
|
||||
] = read_little_endian<WeightType>(stream);
|
||||
#endif
|
||||
|
||||
for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
|
||||
weights[get_weight_index(i)] = read_little_endian<WeightType>(stream);
|
||||
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
// Write network parameters
|
||||
bool write_parameters(std::ostream& stream) const {
|
||||
if (!previousLayer.write_parameters(stream)) return false;
|
||||
for (std::size_t i = 0; i < OutputDimensions; ++i)
|
||||
for (IndexType i = 0; i < OutputDimensions; ++i)
|
||||
write_little_endian<BiasType>(stream, biases[i]);
|
||||
#if !defined (USE_SSSE3)
|
||||
for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
|
||||
write_little_endian<WeightType>(stream, weights[i]);
|
||||
#else
|
||||
std::unique_ptr<WeightType[]> unscrambledWeights = std::make_unique<WeightType[]>(OutputDimensions * PaddedInputDimensions);
|
||||
for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) {
|
||||
unscrambledWeights[i] =
|
||||
weights[
|
||||
(i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 +
|
||||
i / PaddedInputDimensions * 4 +
|
||||
i % 4
|
||||
];
|
||||
}
|
||||
|
||||
for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
|
||||
write_little_endian<WeightType>(stream, unscrambledWeights[i]);
|
||||
#endif
|
||||
for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
|
||||
write_little_endian<WeightType>(stream, weights[get_weight_index(i)]);
|
||||
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
// Forward propagation
|
||||
const OutputType* propagate(
|
||||
const TransformedFeatureType* transformedFeatures, char* buffer) const {
|
||||
const auto input = previousLayer.propagate(
|
||||
transformedFeatures, buffer + SelfBufferSize);
|
||||
const InputType* input, OutputType* output) const {
|
||||
|
||||
#if defined (USE_AVX512)
|
||||
|
||||
[[maybe_unused]] const __m512i Ones512 = _mm512_set1_epi16(1);
|
||||
|
||||
[[maybe_unused]] auto m512_hadd = [](__m512i sum, int bias) -> int {
|
||||
return _mm512_reduce_add_epi32(sum) + bias;
|
||||
};
|
||||
|
||||
[[maybe_unused]] auto m512_add_dpbusd_epi32 = [=](__m512i& acc, __m512i a, __m512i b) {
|
||||
#if defined (USE_VNNI)
|
||||
acc = _mm512_dpbusd_epi32(acc, a, b);
|
||||
#else
|
||||
__m512i product0 = _mm512_maddubs_epi16(a, b);
|
||||
product0 = _mm512_madd_epi16(product0, Ones512);
|
||||
acc = _mm512_add_epi32(acc, product0);
|
||||
#endif
|
||||
};
|
||||
|
||||
[[maybe_unused]] auto m512_add_dpbusd_epi32x4 = [=](__m512i& acc, __m512i a0, __m512i b0, __m512i a1, __m512i b1,
|
||||
__m512i a2, __m512i b2, __m512i a3, __m512i b3) {
|
||||
#if defined (USE_VNNI)
|
||||
acc = _mm512_dpbusd_epi32(acc, a0, b0);
|
||||
acc = _mm512_dpbusd_epi32(acc, a1, b1);
|
||||
acc = _mm512_dpbusd_epi32(acc, a2, b2);
|
||||
acc = _mm512_dpbusd_epi32(acc, a3, b3);
|
||||
#else
|
||||
__m512i product0 = _mm512_maddubs_epi16(a0, b0);
|
||||
__m512i product1 = _mm512_maddubs_epi16(a1, b1);
|
||||
__m512i product2 = _mm512_maddubs_epi16(a2, b2);
|
||||
__m512i product3 = _mm512_maddubs_epi16(a3, b3);
|
||||
product0 = _mm512_adds_epi16(product0, product1);
|
||||
product0 = _mm512_madd_epi16(product0, Ones512);
|
||||
product2 = _mm512_adds_epi16(product2, product3);
|
||||
product2 = _mm512_madd_epi16(product2, Ones512);
|
||||
acc = _mm512_add_epi32(acc, _mm512_add_epi32(product0, product2));
|
||||
#endif
|
||||
};
|
||||
|
||||
#endif
|
||||
#if defined (USE_AVX2)
|
||||
|
||||
[[maybe_unused]] const __m256i Ones256 = _mm256_set1_epi16(1);
|
||||
|
||||
[[maybe_unused]] auto m256_hadd = [](__m256i sum, int bias) -> int {
|
||||
__m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1));
|
||||
sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC));
|
||||
sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_CDAB));
|
||||
return _mm_cvtsi128_si32(sum128) + bias;
|
||||
};
|
||||
|
||||
[[maybe_unused]] auto m256_add_dpbusd_epi32 = [=](__m256i& acc, __m256i a, __m256i b) {
|
||||
#if defined (USE_VNNI)
|
||||
acc = _mm256_dpbusd_epi32(acc, a, b);
|
||||
#else
|
||||
__m256i product0 = _mm256_maddubs_epi16(a, b);
|
||||
product0 = _mm256_madd_epi16(product0, Ones256);
|
||||
acc = _mm256_add_epi32(acc, product0);
|
||||
#endif
|
||||
};
|
||||
|
||||
[[maybe_unused]] auto m256_add_dpbusd_epi32x4 = [=](__m256i& acc, __m256i a0, __m256i b0, __m256i a1, __m256i b1,
|
||||
__m256i a2, __m256i b2, __m256i a3, __m256i b3) {
|
||||
#if defined (USE_VNNI)
|
||||
acc = _mm256_dpbusd_epi32(acc, a0, b0);
|
||||
acc = _mm256_dpbusd_epi32(acc, a1, b1);
|
||||
acc = _mm256_dpbusd_epi32(acc, a2, b2);
|
||||
acc = _mm256_dpbusd_epi32(acc, a3, b3);
|
||||
#else
|
||||
__m256i product0 = _mm256_maddubs_epi16(a0, b0);
|
||||
__m256i product1 = _mm256_maddubs_epi16(a1, b1);
|
||||
__m256i product2 = _mm256_maddubs_epi16(a2, b2);
|
||||
__m256i product3 = _mm256_maddubs_epi16(a3, b3);
|
||||
product0 = _mm256_adds_epi16(product0, product1);
|
||||
product0 = _mm256_madd_epi16(product0, Ones256);
|
||||
product2 = _mm256_adds_epi16(product2, product3);
|
||||
product2 = _mm256_madd_epi16(product2, Ones256);
|
||||
acc = _mm256_add_epi32(acc, _mm256_add_epi32(product0, product2));
|
||||
#endif
|
||||
};
|
||||
|
||||
#endif
|
||||
#if defined (USE_SSSE3)
|
||||
|
||||
[[maybe_unused]] const __m128i Ones128 = _mm_set1_epi16(1);
|
||||
|
||||
[[maybe_unused]] auto m128_hadd = [](__m128i sum, int bias) -> int {
|
||||
sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E)); //_MM_PERM_BADC
|
||||
sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); //_MM_PERM_CDAB
|
||||
return _mm_cvtsi128_si32(sum) + bias;
|
||||
};
|
||||
|
||||
[[maybe_unused]] auto m128_add_dpbusd_epi32 = [=](__m128i& acc, __m128i a, __m128i b) {
|
||||
__m128i product0 = _mm_maddubs_epi16(a, b);
|
||||
product0 = _mm_madd_epi16(product0, Ones128);
|
||||
acc = _mm_add_epi32(acc, product0);
|
||||
};
|
||||
|
||||
[[maybe_unused]] auto m128_add_dpbusd_epi32x4 = [=](__m128i& acc, __m128i a0, __m128i b0, __m128i a1, __m128i b1,
|
||||
__m128i a2, __m128i b2, __m128i a3, __m128i b3) {
|
||||
__m128i product0 = _mm_maddubs_epi16(a0, b0);
|
||||
__m128i product1 = _mm_maddubs_epi16(a1, b1);
|
||||
__m128i product2 = _mm_maddubs_epi16(a2, b2);
|
||||
__m128i product3 = _mm_maddubs_epi16(a3, b3);
|
||||
product0 = _mm_adds_epi16(product0, product1);
|
||||
product0 = _mm_madd_epi16(product0, Ones128);
|
||||
product2 = _mm_adds_epi16(product2, product3);
|
||||
product2 = _mm_madd_epi16(product2, Ones128);
|
||||
acc = _mm_add_epi32(acc, _mm_add_epi32(product0, product2));
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#if defined (USE_AVX512)
|
||||
using vec_t = __m512i;
|
||||
#define vec_setzero _mm512_setzero_si512
|
||||
#define vec_set_32 _mm512_set1_epi32
|
||||
auto& vec_add_dpbusd_32 = m512_add_dpbusd_epi32;
|
||||
auto& vec_add_dpbusd_32x4 = m512_add_dpbusd_epi32x4;
|
||||
auto& vec_hadd = m512_hadd;
|
||||
using acc_vec_t = __m512i;
|
||||
using bias_vec_t = __m128i;
|
||||
using weight_vec_t = __m512i;
|
||||
using in_vec_t = __m512i;
|
||||
#define vec_zero _mm512_setzero_si512()
|
||||
#define vec_add_dpbusd_32x2 Simd::m512_add_dpbusd_epi32x2
|
||||
#define vec_hadd Simd::m512_hadd
|
||||
#define vec_haddx4 Simd::m512_haddx4
|
||||
#elif defined (USE_AVX2)
|
||||
using vec_t = __m256i;
|
||||
#define vec_setzero _mm256_setzero_si256
|
||||
#define vec_set_32 _mm256_set1_epi32
|
||||
auto& vec_add_dpbusd_32 = m256_add_dpbusd_epi32;
|
||||
auto& vec_add_dpbusd_32x4 = m256_add_dpbusd_epi32x4;
|
||||
auto& vec_hadd = m256_hadd;
|
||||
using acc_vec_t = __m256i;
|
||||
using bias_vec_t = __m128i;
|
||||
using weight_vec_t = __m256i;
|
||||
using in_vec_t = __m256i;
|
||||
#define vec_zero _mm256_setzero_si256()
|
||||
#define vec_add_dpbusd_32x2 Simd::m256_add_dpbusd_epi32x2
|
||||
#define vec_hadd Simd::m256_hadd
|
||||
#define vec_haddx4 Simd::m256_haddx4
|
||||
#elif defined (USE_SSSE3)
|
||||
using vec_t = __m128i;
|
||||
#define vec_setzero _mm_setzero_si128
|
||||
#define vec_set_32 _mm_set1_epi32
|
||||
auto& vec_add_dpbusd_32 = m128_add_dpbusd_epi32;
|
||||
auto& vec_add_dpbusd_32x4 = m128_add_dpbusd_epi32x4;
|
||||
auto& vec_hadd = m128_hadd;
|
||||
using acc_vec_t = __m128i;
|
||||
using bias_vec_t = __m128i;
|
||||
using weight_vec_t = __m128i;
|
||||
using in_vec_t = __m128i;
|
||||
#define vec_zero _mm_setzero_si128()
|
||||
#define vec_add_dpbusd_32x2 Simd::m128_add_dpbusd_epi32x2
|
||||
#define vec_hadd Simd::m128_hadd
|
||||
#define vec_haddx4 Simd::m128_haddx4
|
||||
#elif defined (USE_NEON)
|
||||
using acc_vec_t = int32x4_t;
|
||||
using bias_vec_t = int32x4_t;
|
||||
using weight_vec_t = int8x8_t;
|
||||
using in_vec_t = int8x8_t;
|
||||
#define vec_zero {0}
|
||||
#define vec_add_dpbusd_32x2 Simd::neon_m128_add_dpbusd_epi32x2
|
||||
#define vec_hadd Simd::neon_m128_hadd
|
||||
#define vec_haddx4 Simd::neon_m128_haddx4
|
||||
#endif
|
||||
|
||||
#if defined (USE_SSSE3)
|
||||
// Different layout, we process 4 inputs at a time, always.
|
||||
static_assert(InputDimensions % 4 == 0);
|
||||
#if defined (USE_SSSE3) || defined (USE_NEON)
|
||||
const in_vec_t* invec = reinterpret_cast<const in_vec_t*>(input);
|
||||
|
||||
const auto output = reinterpret_cast<OutputType*>(buffer);
|
||||
const auto inputVector = reinterpret_cast<const vec_t*>(input);
|
||||
|
||||
static_assert(OutputDimensions % OutputSimdWidth == 0 || OutputDimensions == 1);
|
||||
|
||||
// OutputDimensions is either 1 or a multiple of SimdWidth
|
||||
// because then it is also an input dimension.
|
||||
if constexpr (OutputDimensions % OutputSimdWidth == 0)
|
||||
// Perform accumulation to registers for each big block
|
||||
for (IndexType bigBlock = 0; bigBlock < NumBigBlocks; ++bigBlock)
|
||||
{
|
||||
constexpr IndexType NumChunks = InputDimensions / 4;
|
||||
acc_vec_t acc[NumOutputRegs] = { vec_zero };
|
||||
|
||||
const auto input32 = reinterpret_cast<const std::int32_t*>(input);
|
||||
vec_t* outptr = reinterpret_cast<vec_t*>(output);
|
||||
std::memcpy(output, biases, OutputDimensions * sizeof(OutputType));
|
||||
// Each big block has NumOutputRegs small blocks in each "row", one per register.
|
||||
// We process two small blocks at a time to save on one addition without VNNI.
|
||||
for (IndexType smallBlock = 0; smallBlock < NumSmallBlocksPerOutput; smallBlock += 2)
|
||||
{
|
||||
const weight_vec_t* weightvec =
|
||||
reinterpret_cast<const weight_vec_t*>(
|
||||
weights
|
||||
+ bigBlock * BigBlockSize
|
||||
+ smallBlock * SmallBlockSize * NumOutputRegs);
|
||||
|
||||
for (int i = 0; i < (int)NumChunks - 3; i += 4)
|
||||
const in_vec_t in0 = invec[smallBlock + 0];
|
||||
const in_vec_t in1 = invec[smallBlock + 1];
|
||||
|
||||
for (IndexType k = 0; k < NumOutputRegs; ++k)
|
||||
vec_add_dpbusd_32x2(acc[k], in0, weightvec[k], in1, weightvec[k + NumOutputRegs]);
|
||||
}
|
||||
|
||||
// Horizontally add all accumulators.
|
||||
if constexpr (NumOutputRegs % 4 == 0)
|
||||
{
|
||||
bias_vec_t* outputvec = reinterpret_cast<bias_vec_t*>(output);
|
||||
const bias_vec_t* biasvec = reinterpret_cast<const bias_vec_t*>(biases);
|
||||
|
||||
for (IndexType k = 0; k < NumOutputRegs; k += 4)
|
||||
{
|
||||
const vec_t in0 = vec_set_32(input32[i + 0]);
|
||||
const vec_t in1 = vec_set_32(input32[i + 1]);
|
||||
const vec_t in2 = vec_set_32(input32[i + 2]);
|
||||
const vec_t in3 = vec_set_32(input32[i + 3]);
|
||||
const auto col0 = reinterpret_cast<const vec_t*>(&weights[(i + 0) * OutputDimensions * 4]);
|
||||
const auto col1 = reinterpret_cast<const vec_t*>(&weights[(i + 1) * OutputDimensions * 4]);
|
||||
const auto col2 = reinterpret_cast<const vec_t*>(&weights[(i + 2) * OutputDimensions * 4]);
|
||||
const auto col3 = reinterpret_cast<const vec_t*>(&weights[(i + 3) * OutputDimensions * 4]);
|
||||
for (int j = 0; j * OutputSimdWidth < OutputDimensions; ++j)
|
||||
vec_add_dpbusd_32x4(outptr[j], in0, col0[j], in1, col1[j], in2, col2[j], in3, col3[j]);
|
||||
const IndexType idx = (bigBlock * NumOutputRegs + k) / 4;
|
||||
outputvec[idx] = vec_haddx4(acc[k+0], acc[k+1], acc[k+2], acc[k+3], biasvec[idx]);
|
||||
}
|
||||
}
|
||||
else if constexpr (OutputDimensions == 1)
|
||||
{
|
||||
#if defined (USE_AVX512)
|
||||
if constexpr (PaddedInputDimensions % (SimdWidth * 2) != 0)
|
||||
}
|
||||
else
|
||||
{
|
||||
for (IndexType k = 0; k < NumOutputRegs; ++k)
|
||||
{
|
||||
constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth;
|
||||
const auto inputVector256 = reinterpret_cast<const __m256i*>(input);
|
||||
|
||||
__m256i sum0 = _mm256_setzero_si256();
|
||||
const auto row0 = reinterpret_cast<const __m256i*>(&weights[0]);
|
||||
|
||||
for (int j = 0; j < (int)NumChunks; ++j)
|
||||
{
|
||||
const __m256i in = inputVector256[j];
|
||||
m256_add_dpbusd_epi32(sum0, in, row0[j]);
|
||||
}
|
||||
output[0] = m256_hadd(sum0, biases[0]);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
#if defined (USE_AVX512)
|
||||
constexpr IndexType NumChunks = PaddedInputDimensions / (SimdWidth * 2);
|
||||
#else
|
||||
constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth;
|
||||
#endif
|
||||
vec_t sum0 = vec_setzero();
|
||||
const auto row0 = reinterpret_cast<const vec_t*>(&weights[0]);
|
||||
|
||||
for (int j = 0; j < (int)NumChunks; ++j)
|
||||
{
|
||||
const vec_t in = inputVector[j];
|
||||
vec_add_dpbusd_32(sum0, in, row0[j]);
|
||||
}
|
||||
output[0] = vec_hadd(sum0, biases[0]);
|
||||
const IndexType idx = (bigBlock * NumOutputRegs + k);
|
||||
output[idx] = vec_hadd(acc[k], biases[idx]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# undef vec_zero
|
||||
# undef vec_add_dpbusd_32x2
|
||||
# undef vec_hadd
|
||||
# undef vec_haddx4
|
||||
#else
|
||||
|
||||
// Use old implementation for the other architectures.
|
||||
|
||||
auto output = reinterpret_cast<OutputType*>(buffer);
|
||||
|
||||
#if defined(USE_SSE2)
|
||||
// At least a multiple of 16, with SSE2.
|
||||
static_assert(InputDimensions % SimdWidth == 0);
|
||||
constexpr IndexType NumChunks = InputDimensions / SimdWidth;
|
||||
const __m128i Zeros = _mm_setzero_si128();
|
||||
const auto inputVector = reinterpret_cast<const __m128i*>(input);
|
||||
|
||||
#elif defined(USE_MMX)
|
||||
static_assert(InputDimensions % SimdWidth == 0);
|
||||
constexpr IndexType NumChunks = InputDimensions / SimdWidth;
|
||||
const __m64 Zeros = _mm_setzero_si64();
|
||||
const auto inputVector = reinterpret_cast<const __m64*>(input);
|
||||
|
||||
#elif defined(USE_NEON)
|
||||
static_assert(InputDimensions % SimdWidth == 0);
|
||||
constexpr IndexType NumChunks = InputDimensions / SimdWidth;
|
||||
const auto inputVector = reinterpret_cast<const int8x8_t*>(input);
|
||||
#endif
|
||||
|
||||
for (IndexType i = 0; i < OutputDimensions; ++i) {
|
||||
const IndexType offset = i * PaddedInputDimensions;
|
||||
|
||||
#if defined(USE_SSE2)
|
||||
__m128i sumLo = _mm_cvtsi32_si128(biases[i]);
|
||||
__m128i sumHi = Zeros;
|
||||
const auto row = reinterpret_cast<const __m128i*>(&weights[offset]);
|
||||
for (IndexType j = 0; j < NumChunks; ++j) {
|
||||
__m128i row_j = _mm_load_si128(&row[j]);
|
||||
__m128i input_j = _mm_load_si128(&inputVector[j]);
|
||||
__m128i extendedRowLo = _mm_srai_epi16(_mm_unpacklo_epi8(row_j, row_j), 8);
|
||||
__m128i extendedRowHi = _mm_srai_epi16(_mm_unpackhi_epi8(row_j, row_j), 8);
|
||||
__m128i extendedInputLo = _mm_unpacklo_epi8(input_j, Zeros);
|
||||
__m128i extendedInputHi = _mm_unpackhi_epi8(input_j, Zeros);
|
||||
__m128i productLo = _mm_madd_epi16(extendedRowLo, extendedInputLo);
|
||||
__m128i productHi = _mm_madd_epi16(extendedRowHi, extendedInputHi);
|
||||
sumLo = _mm_add_epi32(sumLo, productLo);
|
||||
sumHi = _mm_add_epi32(sumHi, productHi);
|
||||
}
|
||||
__m128i sum = _mm_add_epi32(sumLo, sumHi);
|
||||
__m128i sumHigh_64 = _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2));
|
||||
sum = _mm_add_epi32(sum, sumHigh_64);
|
||||
__m128i sum_second_32 = _mm_shufflelo_epi16(sum, _MM_SHUFFLE(1, 0, 3, 2));
|
||||
sum = _mm_add_epi32(sum, sum_second_32);
|
||||
output[i] = _mm_cvtsi128_si32(sum);
|
||||
|
||||
#elif defined(USE_MMX)
|
||||
__m64 sumLo = _mm_cvtsi32_si64(biases[i]);
|
||||
__m64 sumHi = Zeros;
|
||||
const auto row = reinterpret_cast<const __m64*>(&weights[offset]);
|
||||
for (IndexType j = 0; j < NumChunks; ++j) {
|
||||
__m64 row_j = row[j];
|
||||
__m64 input_j = inputVector[j];
|
||||
__m64 extendedRowLo = _mm_srai_pi16(_mm_unpacklo_pi8(row_j, row_j), 8);
|
||||
__m64 extendedRowHi = _mm_srai_pi16(_mm_unpackhi_pi8(row_j, row_j), 8);
|
||||
__m64 extendedInputLo = _mm_unpacklo_pi8(input_j, Zeros);
|
||||
__m64 extendedInputHi = _mm_unpackhi_pi8(input_j, Zeros);
|
||||
__m64 productLo = _mm_madd_pi16(extendedRowLo, extendedInputLo);
|
||||
__m64 productHi = _mm_madd_pi16(extendedRowHi, extendedInputHi);
|
||||
sumLo = _mm_add_pi32(sumLo, productLo);
|
||||
sumHi = _mm_add_pi32(sumHi, productHi);
|
||||
}
|
||||
__m64 sum = _mm_add_pi32(sumLo, sumHi);
|
||||
sum = _mm_add_pi32(sum, _mm_unpackhi_pi32(sum, sum));
|
||||
output[i] = _mm_cvtsi64_si32(sum);
|
||||
|
||||
#elif defined(USE_NEON)
|
||||
int32x4_t sum = {biases[i]};
|
||||
const auto row = reinterpret_cast<const int8x8_t*>(&weights[offset]);
|
||||
for (IndexType j = 0; j < NumChunks; ++j) {
|
||||
int16x8_t product = vmull_s8(inputVector[j * 2], row[j * 2]);
|
||||
product = vmlal_s8(product, inputVector[j * 2 + 1], row[j * 2 + 1]);
|
||||
sum = vpadalq_s16(sum, product);
|
||||
}
|
||||
output[i] = sum[0] + sum[1] + sum[2] + sum[3];
|
||||
|
||||
#else
|
||||
OutputType sum = biases[i];
|
||||
for (IndexType j = 0; j < InputDimensions; ++j) {
|
||||
sum += weights[offset + j] * input[j];
|
||||
}
|
||||
output[i] = sum;
|
||||
#endif
|
||||
|
||||
}
|
||||
#if defined(USE_MMX)
|
||||
_mm_empty();
|
||||
#endif
|
||||
// Use old implementation for the other architectures.
|
||||
affine_transform_non_ssse3<
|
||||
InputDimensions,
|
||||
PaddedInputDimensions,
|
||||
OutputDimensions>(output, weights, biases, input);
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -424,7 +364,171 @@ namespace Stockfish::Eval::NNUE::Layers {
|
|||
using BiasType = OutputType;
|
||||
using WeightType = std::int8_t;
|
||||
|
||||
PreviousLayer previousLayer;
|
||||
alignas(CacheLineSize) BiasType biases[OutputDimensions];
|
||||
alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions];
|
||||
};
|
||||
|
||||
template <IndexType InDims, IndexType OutDims>
|
||||
class AffineTransform<InDims, OutDims, std::enable_if_t<(ceil_to_multiple<IndexType>(InDims, MaxSimdWidth) < 2*64)>> {
|
||||
public:
|
||||
// Input/output type
|
||||
// Input/output type
|
||||
using InputType = std::uint8_t;
|
||||
using OutputType = std::int32_t;
|
||||
|
||||
// Number of input/output dimensions
|
||||
static constexpr IndexType InputDimensions = InDims;
|
||||
static constexpr IndexType OutputDimensions = OutDims;
|
||||
|
||||
static constexpr IndexType PaddedInputDimensions =
|
||||
ceil_to_multiple<IndexType>(InputDimensions, MaxSimdWidth);
|
||||
static constexpr IndexType PaddedOutputDimensions =
|
||||
ceil_to_multiple<IndexType>(OutputDimensions, MaxSimdWidth);
|
||||
|
||||
using OutputBuffer = OutputType[PaddedOutputDimensions];
|
||||
|
||||
static_assert(PaddedInputDimensions < 128, "Something went wrong. This specialization should not have been chosen.");
|
||||
|
||||
#if defined (USE_SSSE3)
|
||||
static constexpr const IndexType OutputSimdWidth = SimdWidth / 4;
|
||||
static constexpr const IndexType InputSimdWidth = SimdWidth;
|
||||
#endif
|
||||
|
||||
// Hash value embedded in the evaluation file
|
||||
static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) {
|
||||
std::uint32_t hashValue = 0xCC03DAE4u;
|
||||
hashValue += OutputDimensions;
|
||||
hashValue ^= prevHash >> 1;
|
||||
hashValue ^= prevHash << 31;
|
||||
return hashValue;
|
||||
}
|
||||
|
||||
static IndexType get_weight_index_scrambled(IndexType i)
|
||||
{
|
||||
return
|
||||
(i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 +
|
||||
i / PaddedInputDimensions * 4 +
|
||||
i % 4;
|
||||
}
|
||||
|
||||
static IndexType get_weight_index(IndexType i)
|
||||
{
|
||||
#if defined (USE_SSSE3)
|
||||
return get_weight_index_scrambled(i);
|
||||
#else
|
||||
return i;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Read network parameters
|
||||
bool read_parameters(std::istream& stream) {
|
||||
for (IndexType i = 0; i < OutputDimensions; ++i)
|
||||
biases[i] = read_little_endian<BiasType>(stream);
|
||||
for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
|
||||
weights[get_weight_index(i)] = read_little_endian<WeightType>(stream);
|
||||
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
// Write network parameters
|
||||
bool write_parameters(std::ostream& stream) const {
|
||||
for (IndexType i = 0; i < OutputDimensions; ++i)
|
||||
write_little_endian<BiasType>(stream, biases[i]);
|
||||
|
||||
for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
|
||||
write_little_endian<WeightType>(stream, weights[get_weight_index(i)]);
|
||||
|
||||
return !stream.fail();
|
||||
}
|
||||
// Forward propagation
|
||||
const OutputType* propagate(
|
||||
const InputType* input, OutputType* output) const {
|
||||
|
||||
#if defined (USE_AVX2)
|
||||
using vec_t = __m256i;
|
||||
#define vec_setzero _mm256_setzero_si256
|
||||
#define vec_set_32 _mm256_set1_epi32
|
||||
#define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32
|
||||
#define vec_add_dpbusd_32x2 Simd::m256_add_dpbusd_epi32x2
|
||||
#define vec_add_dpbusd_32x4 Simd::m256_add_dpbusd_epi32x4
|
||||
#define vec_hadd Simd::m256_hadd
|
||||
#define vec_haddx4 Simd::m256_haddx4
|
||||
#elif defined (USE_SSSE3)
|
||||
using vec_t = __m128i;
|
||||
#define vec_setzero _mm_setzero_si128
|
||||
#define vec_set_32 _mm_set1_epi32
|
||||
#define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32
|
||||
#define vec_add_dpbusd_32x2 Simd::m128_add_dpbusd_epi32x2
|
||||
#define vec_add_dpbusd_32x4 Simd::m128_add_dpbusd_epi32x4
|
||||
#define vec_hadd Simd::m128_hadd
|
||||
#define vec_haddx4 Simd::m128_haddx4
|
||||
#endif
|
||||
|
||||
#if defined (USE_SSSE3)
|
||||
const auto inputVector = reinterpret_cast<const vec_t*>(input);
|
||||
|
||||
static_assert(OutputDimensions % OutputSimdWidth == 0 || OutputDimensions == 1);
|
||||
|
||||
if constexpr (OutputDimensions % OutputSimdWidth == 0)
|
||||
{
|
||||
constexpr IndexType NumChunks = ceil_to_multiple<IndexType>(InputDimensions, 8) / 4;
|
||||
constexpr IndexType NumRegs = OutputDimensions / OutputSimdWidth;
|
||||
|
||||
const auto input32 = reinterpret_cast<const std::int32_t*>(input);
|
||||
const vec_t* biasvec = reinterpret_cast<const vec_t*>(biases);
|
||||
vec_t acc[NumRegs];
|
||||
for (IndexType k = 0; k < NumRegs; ++k)
|
||||
acc[k] = biasvec[k];
|
||||
|
||||
for (IndexType i = 0; i < NumChunks; i += 2)
|
||||
{
|
||||
const vec_t in0 = vec_set_32(input32[i + 0]);
|
||||
const vec_t in1 = vec_set_32(input32[i + 1]);
|
||||
const auto col0 = reinterpret_cast<const vec_t*>(&weights[(i + 0) * OutputDimensions * 4]);
|
||||
const auto col1 = reinterpret_cast<const vec_t*>(&weights[(i + 1) * OutputDimensions * 4]);
|
||||
for (IndexType k = 0; k < NumRegs; ++k)
|
||||
vec_add_dpbusd_32x2(acc[k], in0, col0[k], in1, col1[k]);
|
||||
}
|
||||
|
||||
vec_t* outptr = reinterpret_cast<vec_t*>(output);
|
||||
for (IndexType k = 0; k < NumRegs; ++k)
|
||||
outptr[k] = acc[k];
|
||||
}
|
||||
else if constexpr (OutputDimensions == 1)
|
||||
{
|
||||
constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth;
|
||||
vec_t sum0 = vec_setzero();
|
||||
const auto row0 = reinterpret_cast<const vec_t*>(&weights[0]);
|
||||
|
||||
for (int j = 0; j < (int)NumChunks; ++j)
|
||||
{
|
||||
const vec_t in = inputVector[j];
|
||||
vec_add_dpbusd_32(sum0, in, row0[j]);
|
||||
}
|
||||
output[0] = vec_hadd(sum0, biases[0]);
|
||||
}
|
||||
|
||||
# undef vec_setzero
|
||||
# undef vec_set_32
|
||||
# undef vec_add_dpbusd_32
|
||||
# undef vec_add_dpbusd_32x2
|
||||
# undef vec_add_dpbusd_32x4
|
||||
# undef vec_hadd
|
||||
# undef vec_haddx4
|
||||
#else
|
||||
// Use old implementation for the other architectures.
|
||||
affine_transform_non_ssse3<
|
||||
InputDimensions,
|
||||
PaddedInputDimensions,
|
||||
OutputDimensions>(output, weights, biases, input);
|
||||
#endif
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
private:
|
||||
using BiasType = OutputType;
|
||||
using WeightType = std::int8_t;
|
||||
|
||||
alignas(CacheLineSize) BiasType biases[OutputDimensions];
|
||||
alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions];
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -26,50 +26,41 @@
|
|||
namespace Stockfish::Eval::NNUE::Layers {
|
||||
|
||||
// Clipped ReLU
|
||||
template <typename PreviousLayer>
|
||||
template <IndexType InDims>
|
||||
class ClippedReLU {
|
||||
public:
|
||||
// Input/output type
|
||||
using InputType = typename PreviousLayer::OutputType;
|
||||
using InputType = std::int32_t;
|
||||
using OutputType = std::uint8_t;
|
||||
static_assert(std::is_same<InputType, std::int32_t>::value, "");
|
||||
|
||||
// Number of input/output dimensions
|
||||
static constexpr IndexType InputDimensions =
|
||||
PreviousLayer::OutputDimensions;
|
||||
static constexpr IndexType InputDimensions = InDims;
|
||||
static constexpr IndexType OutputDimensions = InputDimensions;
|
||||
static constexpr IndexType PaddedOutputDimensions =
|
||||
ceil_to_multiple<IndexType>(OutputDimensions, 32);
|
||||
|
||||
// Size of forward propagation buffer used in this layer
|
||||
static constexpr std::size_t SelfBufferSize =
|
||||
ceil_to_multiple(OutputDimensions * sizeof(OutputType), CacheLineSize);
|
||||
|
||||
// Size of the forward propagation buffer used from the input layer to this layer
|
||||
static constexpr std::size_t BufferSize =
|
||||
PreviousLayer::BufferSize + SelfBufferSize;
|
||||
using OutputBuffer = OutputType[PaddedOutputDimensions];
|
||||
|
||||
// Hash value embedded in the evaluation file
|
||||
static constexpr std::uint32_t get_hash_value() {
|
||||
static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) {
|
||||
std::uint32_t hashValue = 0x538D24C7u;
|
||||
hashValue += PreviousLayer::get_hash_value();
|
||||
hashValue += prevHash;
|
||||
return hashValue;
|
||||
}
|
||||
|
||||
// Read network parameters
|
||||
bool read_parameters(std::istream& stream) {
|
||||
return previousLayer.read_parameters(stream);
|
||||
bool read_parameters(std::istream&) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Write network parameters
|
||||
bool write_parameters(std::ostream& stream) const {
|
||||
return previousLayer.write_parameters(stream);
|
||||
bool write_parameters(std::ostream&) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Forward propagation
|
||||
const OutputType* propagate(
|
||||
const TransformedFeatureType* transformedFeatures, char* buffer) const {
|
||||
const auto input = previousLayer.propagate(
|
||||
transformedFeatures, buffer + SelfBufferSize);
|
||||
const auto output = reinterpret_cast<OutputType*>(buffer);
|
||||
const InputType* input, OutputType* output) const {
|
||||
|
||||
#if defined(USE_AVX2)
|
||||
if constexpr (InputDimensions % SimdWidth == 0) {
|
||||
|
@ -179,11 +170,9 @@ namespace Stockfish::Eval::NNUE::Layers {
|
|||
output[i] = static_cast<OutputType>(
|
||||
std::max(0, std::min(127, input[i] >> WeightScaleBits)));
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
private:
|
||||
PreviousLayer previousLayer;
|
||||
};
|
||||
|
||||
} // namespace Stockfish::Eval::NNUE::Layers
|
||||
|
|
|
@ -1,73 +0,0 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// NNUE evaluation function layer InputSlice definition
|
||||
|
||||
#ifndef NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
|
||||
#define NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
|
||||
|
||||
#include "../nnue_common.h"
|
||||
|
||||
namespace Stockfish::Eval::NNUE::Layers {
|
||||
|
||||
// Input layer
|
||||
template <IndexType OutDims, IndexType Offset = 0>
|
||||
class InputSlice {
|
||||
public:
|
||||
// Need to maintain alignment
|
||||
static_assert(Offset % MaxSimdWidth == 0, "");
|
||||
|
||||
// Output type
|
||||
using OutputType = TransformedFeatureType;
|
||||
|
||||
// Output dimensionality
|
||||
static constexpr IndexType OutputDimensions = OutDims;
|
||||
|
||||
// Size of forward propagation buffer used from the input layer to this layer
|
||||
static constexpr std::size_t BufferSize = 0;
|
||||
|
||||
// Hash value embedded in the evaluation file
|
||||
static constexpr std::uint32_t get_hash_value() {
|
||||
std::uint32_t hashValue = 0xEC42E90Du;
|
||||
hashValue ^= OutputDimensions ^ (Offset << 10);
|
||||
return hashValue;
|
||||
}
|
||||
|
||||
// Read network parameters
|
||||
bool read_parameters(std::istream& /*stream*/) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Write network parameters
|
||||
bool write_parameters(std::ostream& /*stream*/) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Forward propagation
|
||||
const OutputType* propagate(
|
||||
const TransformedFeatureType* transformedFeatures,
|
||||
char* /*buffer*/) const {
|
||||
return transformedFeatures + Offset;
|
||||
}
|
||||
|
||||
private:
|
||||
};
|
||||
|
||||
} // namespace Stockfish::Eval::NNUE::Layers
|
||||
|
||||
#endif // #ifndef NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -21,39 +21,112 @@
|
|||
#ifndef NNUE_ARCHITECTURE_H_INCLUDED
|
||||
#define NNUE_ARCHITECTURE_H_INCLUDED
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "nnue_common.h"
|
||||
|
||||
#include "features/half_ka_v2.h"
|
||||
#include "features/half_ka_v2_hm.h"
|
||||
|
||||
#include "layers/input_slice.h"
|
||||
#include "layers/affine_transform.h"
|
||||
#include "layers/clipped_relu.h"
|
||||
|
||||
#include "../misc.h"
|
||||
|
||||
namespace Stockfish::Eval::NNUE {
|
||||
|
||||
// Input features used in evaluation function
|
||||
using FeatureSet = Features::HalfKAv2;
|
||||
// Input features used in evaluation function
|
||||
using FeatureSet = Features::HalfKAv2_hm;
|
||||
|
||||
// Number of input feature dimensions after conversion
|
||||
constexpr IndexType TransformedFeatureDimensions = 512;
|
||||
constexpr IndexType PSQTBuckets = 8;
|
||||
constexpr IndexType LayerStacks = 8;
|
||||
// Number of input feature dimensions after conversion
|
||||
constexpr IndexType TransformedFeatureDimensions = 1024;
|
||||
constexpr IndexType PSQTBuckets = 8;
|
||||
constexpr IndexType LayerStacks = 8;
|
||||
|
||||
namespace Layers {
|
||||
struct Network
|
||||
{
|
||||
static constexpr int FC_0_OUTPUTS = 15;
|
||||
static constexpr int FC_1_OUTPUTS = 32;
|
||||
|
||||
// Define network structure
|
||||
using InputLayer = InputSlice<TransformedFeatureDimensions * 2>;
|
||||
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 16>>;
|
||||
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
|
||||
using OutputLayer = AffineTransform<HiddenLayer2, 1>;
|
||||
Layers::AffineTransform<TransformedFeatureDimensions, FC_0_OUTPUTS + 1> fc_0;
|
||||
Layers::ClippedReLU<FC_0_OUTPUTS + 1> ac_0;
|
||||
Layers::AffineTransform<FC_0_OUTPUTS, FC_1_OUTPUTS> fc_1;
|
||||
Layers::ClippedReLU<FC_1_OUTPUTS> ac_1;
|
||||
Layers::AffineTransform<FC_1_OUTPUTS, 1> fc_2;
|
||||
|
||||
} // namespace Layers
|
||||
// Hash value embedded in the evaluation file
|
||||
static constexpr std::uint32_t get_hash_value() {
|
||||
// input slice hash
|
||||
std::uint32_t hashValue = 0xEC42E90Du;
|
||||
hashValue ^= TransformedFeatureDimensions * 2;
|
||||
|
||||
using Network = Layers::OutputLayer;
|
||||
hashValue = decltype(fc_0)::get_hash_value(hashValue);
|
||||
hashValue = decltype(ac_0)::get_hash_value(hashValue);
|
||||
hashValue = decltype(fc_1)::get_hash_value(hashValue);
|
||||
hashValue = decltype(ac_1)::get_hash_value(hashValue);
|
||||
hashValue = decltype(fc_2)::get_hash_value(hashValue);
|
||||
|
||||
static_assert(TransformedFeatureDimensions % MaxSimdWidth == 0, "");
|
||||
static_assert(Network::OutputDimensions == 1, "");
|
||||
static_assert(std::is_same<Network::OutputType, std::int32_t>::value, "");
|
||||
return hashValue;
|
||||
}
|
||||
|
||||
// Read network parameters
|
||||
bool read_parameters(std::istream& stream) {
|
||||
if (!fc_0.read_parameters(stream)) return false;
|
||||
if (!ac_0.read_parameters(stream)) return false;
|
||||
if (!fc_1.read_parameters(stream)) return false;
|
||||
if (!ac_1.read_parameters(stream)) return false;
|
||||
if (!fc_2.read_parameters(stream)) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Read network parameters
|
||||
bool write_parameters(std::ostream& stream) const {
|
||||
if (!fc_0.write_parameters(stream)) return false;
|
||||
if (!ac_0.write_parameters(stream)) return false;
|
||||
if (!fc_1.write_parameters(stream)) return false;
|
||||
if (!ac_1.write_parameters(stream)) return false;
|
||||
if (!fc_2.write_parameters(stream)) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
std::int32_t propagate(const TransformedFeatureType* transformedFeatures)
|
||||
{
|
||||
struct alignas(CacheLineSize) Buffer
|
||||
{
|
||||
alignas(CacheLineSize) decltype(fc_0)::OutputBuffer fc_0_out;
|
||||
alignas(CacheLineSize) decltype(ac_0)::OutputBuffer ac_0_out;
|
||||
alignas(CacheLineSize) decltype(fc_1)::OutputBuffer fc_1_out;
|
||||
alignas(CacheLineSize) decltype(ac_1)::OutputBuffer ac_1_out;
|
||||
alignas(CacheLineSize) decltype(fc_2)::OutputBuffer fc_2_out;
|
||||
|
||||
Buffer()
|
||||
{
|
||||
std::memset(this, 0, sizeof(*this));
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(__clang__) && (__APPLE__)
|
||||
// workaround for a bug reported with xcode 12
|
||||
static thread_local auto tlsBuffer = std::make_unique<Buffer>();
|
||||
// Access TLS only once, cache result.
|
||||
Buffer& buffer = *tlsBuffer;
|
||||
#else
|
||||
alignas(CacheLineSize) static thread_local Buffer buffer;
|
||||
#endif
|
||||
|
||||
fc_0.propagate(transformedFeatures, buffer.fc_0_out);
|
||||
ac_0.propagate(buffer.fc_0_out, buffer.ac_0_out);
|
||||
fc_1.propagate(buffer.ac_0_out, buffer.fc_1_out);
|
||||
ac_1.propagate(buffer.fc_1_out, buffer.ac_1_out);
|
||||
fc_2.propagate(buffer.ac_1_out, buffer.fc_2_out);
|
||||
|
||||
// buffer.fc_0_out[FC_0_OUTPUTS] is such that 1.0 is equal to 127*(1<<WeightScaleBits) in quantized form
|
||||
// but we want 1.0 to be equal to 600*OutputScale
|
||||
std::int32_t fwdOut = int(buffer.fc_0_out[FC_0_OUTPUTS]) * (600*OutputScale) / (127*(1<<WeightScaleBits));
|
||||
std::int32_t outputValue = buffer.fc_2_out[0] + fwdOut;
|
||||
|
||||
return outputValue;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Stockfish::Eval::NNUE
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -109,7 +109,7 @@ namespace Stockfish::Eval::NNUE {
|
|||
|
||||
// write_little_endian() is our utility to write an integer (signed or unsigned, any size)
|
||||
// to a stream in little-endian order. We swap the byte order before the write if
|
||||
// necessary to always write in little endian order, independantly of the byte
|
||||
// necessary to always write in little endian order, independently of the byte
|
||||
// ordering of the compiling machine.
|
||||
template <typename IntType>
|
||||
inline void write_little_endian(std::ostream& stream, IntType value) {
|
||||
|
@ -127,11 +127,11 @@ namespace Stockfish::Eval::NNUE {
|
|||
{
|
||||
for (; i + 1 < sizeof(IntType); ++i)
|
||||
{
|
||||
u[i] = v;
|
||||
u[i] = (std::uint8_t)v;
|
||||
v >>= 8;
|
||||
}
|
||||
}
|
||||
u[i] = v;
|
||||
u[i] = (std::uint8_t)v;
|
||||
|
||||
stream.write(reinterpret_cast<char*>(u), sizeof(IntType));
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -47,12 +47,22 @@ namespace Stockfish::Eval::NNUE {
|
|||
#define vec_store(a,b) _mm512_store_si512(a,b)
|
||||
#define vec_add_16(a,b) _mm512_add_epi16(a,b)
|
||||
#define vec_sub_16(a,b) _mm512_sub_epi16(a,b)
|
||||
#define vec_mul_16(a,b) _mm512_mullo_epi16(a,b)
|
||||
#define vec_zero() _mm512_setzero_epi32()
|
||||
#define vec_set_16(a) _mm512_set1_epi16(a)
|
||||
#define vec_max_16(a,b) _mm512_max_epi16(a,b)
|
||||
#define vec_min_16(a,b) _mm512_min_epi16(a,b)
|
||||
inline vec_t vec_msb_pack_16(vec_t a, vec_t b){
|
||||
vec_t compacted = _mm512_packs_epi16(_mm512_srli_epi16(a,7),_mm512_srli_epi16(b,7));
|
||||
return _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 2, 4, 6, 1, 3, 5, 7), compacted);
|
||||
}
|
||||
#define vec_load_psqt(a) _mm256_load_si256(a)
|
||||
#define vec_store_psqt(a,b) _mm256_store_si256(a,b)
|
||||
#define vec_add_psqt_32(a,b) _mm256_add_epi32(a,b)
|
||||
#define vec_sub_psqt_32(a,b) _mm256_sub_epi32(a,b)
|
||||
#define vec_zero_psqt() _mm256_setzero_si256()
|
||||
#define NumRegistersSIMD 32
|
||||
#define MaxChunkSize 64
|
||||
|
||||
#elif USE_AVX2
|
||||
typedef __m256i vec_t;
|
||||
|
@ -61,12 +71,22 @@ namespace Stockfish::Eval::NNUE {
|
|||
#define vec_store(a,b) _mm256_store_si256(a,b)
|
||||
#define vec_add_16(a,b) _mm256_add_epi16(a,b)
|
||||
#define vec_sub_16(a,b) _mm256_sub_epi16(a,b)
|
||||
#define vec_mul_16(a,b) _mm256_mullo_epi16(a,b)
|
||||
#define vec_zero() _mm256_setzero_si256()
|
||||
#define vec_set_16(a) _mm256_set1_epi16(a)
|
||||
#define vec_max_16(a,b) _mm256_max_epi16(a,b)
|
||||
#define vec_min_16(a,b) _mm256_min_epi16(a,b)
|
||||
inline vec_t vec_msb_pack_16(vec_t a, vec_t b){
|
||||
vec_t compacted = _mm256_packs_epi16(_mm256_srli_epi16(a,7), _mm256_srli_epi16(b,7));
|
||||
return _mm256_permute4x64_epi64(compacted, 0b11011000);
|
||||
}
|
||||
#define vec_load_psqt(a) _mm256_load_si256(a)
|
||||
#define vec_store_psqt(a,b) _mm256_store_si256(a,b)
|
||||
#define vec_add_psqt_32(a,b) _mm256_add_epi32(a,b)
|
||||
#define vec_sub_psqt_32(a,b) _mm256_sub_epi32(a,b)
|
||||
#define vec_zero_psqt() _mm256_setzero_si256()
|
||||
#define NumRegistersSIMD 16
|
||||
#define MaxChunkSize 32
|
||||
|
||||
#elif USE_SSE2
|
||||
typedef __m128i vec_t;
|
||||
|
@ -75,12 +95,19 @@ namespace Stockfish::Eval::NNUE {
|
|||
#define vec_store(a,b) *(a)=(b)
|
||||
#define vec_add_16(a,b) _mm_add_epi16(a,b)
|
||||
#define vec_sub_16(a,b) _mm_sub_epi16(a,b)
|
||||
#define vec_mul_16(a,b) _mm_mullo_epi16(a,b)
|
||||
#define vec_zero() _mm_setzero_si128()
|
||||
#define vec_set_16(a) _mm_set1_epi16(a)
|
||||
#define vec_max_16(a,b) _mm_max_epi16(a,b)
|
||||
#define vec_min_16(a,b) _mm_min_epi16(a,b)
|
||||
#define vec_msb_pack_16(a,b) _mm_packs_epi16(_mm_srli_epi16(a,7),_mm_srli_epi16(b,7))
|
||||
#define vec_load_psqt(a) (*(a))
|
||||
#define vec_store_psqt(a,b) *(a)=(b)
|
||||
#define vec_add_psqt_32(a,b) _mm_add_epi32(a,b)
|
||||
#define vec_sub_psqt_32(a,b) _mm_sub_epi32(a,b)
|
||||
#define vec_zero_psqt() _mm_setzero_si128()
|
||||
#define NumRegistersSIMD (Is64Bit ? 16 : 8)
|
||||
#define MaxChunkSize 16
|
||||
|
||||
#elif USE_MMX
|
||||
typedef __m64 vec_t;
|
||||
|
@ -89,12 +116,26 @@ namespace Stockfish::Eval::NNUE {
|
|||
#define vec_store(a,b) *(a)=(b)
|
||||
#define vec_add_16(a,b) _mm_add_pi16(a,b)
|
||||
#define vec_sub_16(a,b) _mm_sub_pi16(a,b)
|
||||
#define vec_mul_16(a,b) _mm_mullo_pi16(a,b)
|
||||
#define vec_zero() _mm_setzero_si64()
|
||||
#define vec_set_16(a) _mm_set1_pi16(a)
|
||||
inline vec_t vec_max_16(vec_t a,vec_t b){
|
||||
vec_t comparison = _mm_cmpgt_pi16(a,b);
|
||||
return _mm_or_si64(_mm_and_si64(comparison, a), _mm_andnot_si64(comparison, b));
|
||||
}
|
||||
inline vec_t vec_min_16(vec_t a,vec_t b){
|
||||
vec_t comparison = _mm_cmpgt_pi16(a,b);
|
||||
return _mm_or_si64(_mm_and_si64(comparison, b), _mm_andnot_si64(comparison, a));
|
||||
}
|
||||
#define vec_msb_pack_16(a,b) _mm_packs_pi16(_mm_srli_pi16(a,7),_mm_srli_pi16(b,7))
|
||||
#define vec_load_psqt(a) (*(a))
|
||||
#define vec_store_psqt(a,b) *(a)=(b)
|
||||
#define vec_add_psqt_32(a,b) _mm_add_pi32(a,b)
|
||||
#define vec_sub_psqt_32(a,b) _mm_sub_pi32(a,b)
|
||||
#define vec_zero_psqt() _mm_setzero_si64()
|
||||
#define vec_cleanup() _mm_empty()
|
||||
#define NumRegistersSIMD 8
|
||||
#define MaxChunkSize 8
|
||||
|
||||
#elif USE_NEON
|
||||
typedef int16x8_t vec_t;
|
||||
|
@ -103,12 +144,24 @@ namespace Stockfish::Eval::NNUE {
|
|||
#define vec_store(a,b) *(a)=(b)
|
||||
#define vec_add_16(a,b) vaddq_s16(a,b)
|
||||
#define vec_sub_16(a,b) vsubq_s16(a,b)
|
||||
#define vec_mul_16(a,b) vmulq_s16(a,b)
|
||||
#define vec_zero() vec_t{0}
|
||||
#define vec_set_16(a) vdupq_n_s16(a)
|
||||
#define vec_max_16(a,b) vmaxq_s16(a,b)
|
||||
#define vec_min_16(a,b) vminq_s16(a,b)
|
||||
inline vec_t vec_msb_pack_16(vec_t a, vec_t b){
|
||||
const int8x8_t shifta = vshrn_n_s16(a, 7);
|
||||
const int8x8_t shiftb = vshrn_n_s16(b, 7);
|
||||
const int8x16_t compacted = vcombine_s8(shifta,shiftb);
|
||||
return *reinterpret_cast<const vec_t*> (&compacted);
|
||||
}
|
||||
#define vec_load_psqt(a) (*(a))
|
||||
#define vec_store_psqt(a,b) *(a)=(b)
|
||||
#define vec_add_psqt_32(a,b) vaddq_s32(a,b)
|
||||
#define vec_sub_psqt_32(a,b) vsubq_s32(a,b)
|
||||
#define vec_zero_psqt() psqt_vec_t{0}
|
||||
#define NumRegistersSIMD 16
|
||||
#define MaxChunkSize 16
|
||||
|
||||
#else
|
||||
#undef VECTOR
|
||||
|
@ -123,8 +176,10 @@ namespace Stockfish::Eval::NNUE {
|
|||
// We use __m* types as template arguments, which causes GCC to emit warnings
|
||||
// about losing some attribute information. This is irrelevant to us as we
|
||||
// only take their size, so the following pragma are harmless.
|
||||
#if defined(__GNUC__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wignored-attributes"
|
||||
#endif
|
||||
|
||||
template <typename SIMDRegisterType,
|
||||
typename LaneType,
|
||||
|
@ -156,9 +211,9 @@ namespace Stockfish::Eval::NNUE {
|
|||
|
||||
static constexpr int NumRegs = BestRegisterCount<vec_t, WeightType, TransformedFeatureDimensions, NumRegistersSIMD>();
|
||||
static constexpr int NumPsqtRegs = BestRegisterCount<psqt_vec_t, PSQTWeightType, PSQTBuckets, NumRegistersSIMD>();
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -183,7 +238,7 @@ namespace Stockfish::Eval::NNUE {
|
|||
|
||||
// Number of input/output dimensions
|
||||
static constexpr IndexType InputDimensions = FeatureSet::Dimensions;
|
||||
static constexpr IndexType OutputDimensions = HalfDimensions * 2;
|
||||
static constexpr IndexType OutputDimensions = HalfDimensions;
|
||||
|
||||
// Size of forward propagation buffer
|
||||
static constexpr std::size_t BufferSize =
|
||||
|
@ -191,7 +246,7 @@ namespace Stockfish::Eval::NNUE {
|
|||
|
||||
// Hash value embedded in the evaluation file
|
||||
static constexpr std::uint32_t get_hash_value() {
|
||||
return FeatureSet::HashValue ^ OutputDimensions;
|
||||
return FeatureSet::HashValue ^ (OutputDimensions * 2);
|
||||
}
|
||||
|
||||
// Read network parameters
|
||||
|
@ -229,136 +284,55 @@ namespace Stockfish::Eval::NNUE {
|
|||
) / 2;
|
||||
|
||||
|
||||
#if defined(USE_AVX512)
|
||||
|
||||
constexpr IndexType NumChunks = HalfDimensions / (SimdWidth * 2);
|
||||
static_assert(HalfDimensions % (SimdWidth * 2) == 0);
|
||||
const __m512i Control = _mm512_setr_epi64(0, 2, 4, 6, 1, 3, 5, 7);
|
||||
const __m512i Zero = _mm512_setzero_si512();
|
||||
|
||||
for (IndexType p = 0; p < 2; ++p)
|
||||
{
|
||||
const IndexType offset = HalfDimensions * p;
|
||||
auto out = reinterpret_cast<__m512i*>(&output[offset]);
|
||||
for (IndexType j = 0; j < NumChunks; ++j)
|
||||
const IndexType offset = (HalfDimensions / 2) * p;
|
||||
|
||||
#if defined(VECTOR)
|
||||
|
||||
constexpr IndexType OutputChunkSize = MaxChunkSize;
|
||||
static_assert((HalfDimensions / 2) % OutputChunkSize == 0);
|
||||
constexpr IndexType NumOutputChunks = HalfDimensions / 2 / OutputChunkSize;
|
||||
|
||||
vec_t Zero = vec_zero();
|
||||
vec_t One = vec_set_16(127);
|
||||
|
||||
const vec_t* in0 = reinterpret_cast<const vec_t*>(&(accumulation[perspectives[p]][0]));
|
||||
const vec_t* in1 = reinterpret_cast<const vec_t*>(&(accumulation[perspectives[p]][HalfDimensions / 2]));
|
||||
vec_t* out = reinterpret_cast< vec_t*>(output + offset);
|
||||
|
||||
for (IndexType j = 0; j < NumOutputChunks; j += 1)
|
||||
{
|
||||
__m512i sum0 = _mm512_load_si512(&reinterpret_cast<const __m512i*>
|
||||
(accumulation[perspectives[p]])[j * 2 + 0]);
|
||||
__m512i sum1 = _mm512_load_si512(&reinterpret_cast<const __m512i*>
|
||||
(accumulation[perspectives[p]])[j * 2 + 1]);
|
||||
const vec_t sum0a = vec_max_16(vec_min_16(in0[j * 2 + 0], One), Zero);
|
||||
const vec_t sum0b = vec_max_16(vec_min_16(in0[j * 2 + 1], One), Zero);
|
||||
const vec_t sum1a = vec_max_16(vec_min_16(in1[j * 2 + 0], One), Zero);
|
||||
const vec_t sum1b = vec_max_16(vec_min_16(in1[j * 2 + 1], One), Zero);
|
||||
|
||||
_mm512_store_si512(&out[j], _mm512_permutexvar_epi64(Control,
|
||||
_mm512_max_epi8(_mm512_packs_epi16(sum0, sum1), Zero)));
|
||||
const vec_t pa = vec_mul_16(sum0a, sum1a);
|
||||
const vec_t pb = vec_mul_16(sum0b, sum1b);
|
||||
|
||||
out[j] = vec_msb_pack_16(pa, pb);
|
||||
}
|
||||
}
|
||||
return psqt;
|
||||
|
||||
#elif defined(USE_AVX2)
|
||||
#else
|
||||
|
||||
constexpr IndexType NumChunks = HalfDimensions / SimdWidth;
|
||||
constexpr int Control = 0b11011000;
|
||||
const __m256i Zero = _mm256_setzero_si256();
|
||||
|
||||
for (IndexType p = 0; p < 2; ++p)
|
||||
{
|
||||
const IndexType offset = HalfDimensions * p;
|
||||
auto out = reinterpret_cast<__m256i*>(&output[offset]);
|
||||
for (IndexType j = 0; j < NumChunks; ++j)
|
||||
{
|
||||
__m256i sum0 = _mm256_load_si256(&reinterpret_cast<const __m256i*>
|
||||
(accumulation[perspectives[p]])[j * 2 + 0]);
|
||||
__m256i sum1 = _mm256_load_si256(&reinterpret_cast<const __m256i*>
|
||||
(accumulation[perspectives[p]])[j * 2 + 1]);
|
||||
|
||||
_mm256_store_si256(&out[j], _mm256_permute4x64_epi64(
|
||||
_mm256_max_epi8(_mm256_packs_epi16(sum0, sum1), Zero), Control));
|
||||
for (IndexType j = 0; j < HalfDimensions / 2; ++j) {
|
||||
BiasType sum0 = accumulation[static_cast<int>(perspectives[p])][j + 0];
|
||||
BiasType sum1 = accumulation[static_cast<int>(perspectives[p])][j + HalfDimensions / 2];
|
||||
sum0 = std::max<int>(0, std::min<int>(127, sum0));
|
||||
sum1 = std::max<int>(0, std::min<int>(127, sum1));
|
||||
output[offset + j] = static_cast<OutputType>(sum0 * sum1 / 128);
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(vec_cleanup)
|
||||
vec_cleanup();
|
||||
#endif
|
||||
|
||||
return psqt;
|
||||
|
||||
#elif defined(USE_SSE2)
|
||||
|
||||
#ifdef USE_SSE41
|
||||
constexpr IndexType NumChunks = HalfDimensions / SimdWidth;
|
||||
const __m128i Zero = _mm_setzero_si128();
|
||||
#else
|
||||
constexpr IndexType NumChunks = HalfDimensions / SimdWidth;
|
||||
const __m128i k0x80s = _mm_set1_epi8(-128);
|
||||
#endif
|
||||
|
||||
for (IndexType p = 0; p < 2; ++p)
|
||||
{
|
||||
const IndexType offset = HalfDimensions * p;
|
||||
auto out = reinterpret_cast<__m128i*>(&output[offset]);
|
||||
for (IndexType j = 0; j < NumChunks; ++j)
|
||||
{
|
||||
__m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>
|
||||
(accumulation[perspectives[p]])[j * 2 + 0]);
|
||||
__m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>
|
||||
(accumulation[perspectives[p]])[j * 2 + 1]);
|
||||
const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);
|
||||
|
||||
#ifdef USE_SSE41
|
||||
_mm_store_si128(&out[j], _mm_max_epi8(packedbytes, Zero));
|
||||
#else
|
||||
_mm_store_si128(&out[j], _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
return psqt;
|
||||
|
||||
#elif defined(USE_MMX)
|
||||
|
||||
constexpr IndexType NumChunks = HalfDimensions / SimdWidth;
|
||||
const __m64 k0x80s = _mm_set1_pi8(-128);
|
||||
|
||||
for (IndexType p = 0; p < 2; ++p)
|
||||
{
|
||||
const IndexType offset = HalfDimensions * p;
|
||||
auto out = reinterpret_cast<__m64*>(&output[offset]);
|
||||
for (IndexType j = 0; j < NumChunks; ++j)
|
||||
{
|
||||
__m64 sum0 = *(&reinterpret_cast<const __m64*>(accumulation[perspectives[p]])[j * 2 + 0]);
|
||||
__m64 sum1 = *(&reinterpret_cast<const __m64*>(accumulation[perspectives[p]])[j * 2 + 1]);
|
||||
const __m64 packedbytes = _mm_packs_pi16(sum0, sum1);
|
||||
out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s);
|
||||
}
|
||||
}
|
||||
_mm_empty();
|
||||
return psqt;
|
||||
|
||||
#elif defined(USE_NEON)
|
||||
|
||||
constexpr IndexType NumChunks = HalfDimensions / (SimdWidth / 2);
|
||||
const int8x8_t Zero = {0};
|
||||
|
||||
for (IndexType p = 0; p < 2; ++p)
|
||||
{
|
||||
const IndexType offset = HalfDimensions * p;
|
||||
const auto out = reinterpret_cast<int8x8_t*>(&output[offset]);
|
||||
for (IndexType j = 0; j < NumChunks; ++j)
|
||||
{
|
||||
int16x8_t sum = reinterpret_cast<const int16x8_t*>(accumulation[perspectives[p]])[j];
|
||||
out[j] = vmax_s8(vqmovn_s16(sum), Zero);
|
||||
}
|
||||
}
|
||||
return psqt;
|
||||
|
||||
#else
|
||||
|
||||
for (IndexType p = 0; p < 2; ++p)
|
||||
{
|
||||
const IndexType offset = HalfDimensions * p;
|
||||
for (IndexType j = 0; j < HalfDimensions; ++j)
|
||||
{
|
||||
BiasType sum = accumulation[perspectives[p]][j];
|
||||
output[offset + j] = static_cast<OutputType>(std::max<int>(0, std::min<int>(127, sum)));
|
||||
}
|
||||
}
|
||||
return psqt;
|
||||
|
||||
#endif
|
||||
|
||||
} // end of function transform()
|
||||
|
||||
|
||||
|
@ -370,7 +344,6 @@ namespace Stockfish::Eval::NNUE {
|
|||
// That might depend on the feature set and generally relies on the
|
||||
// feature set's update cost calculation to be correct and never
|
||||
// allow updates with more added/removed features than MaxActiveDimensions.
|
||||
using IndexList = ValueList<IndexType, FeatureSet::MaxActiveDimensions>;
|
||||
|
||||
#ifdef VECTOR
|
||||
// Gcc-10.2 unnecessarily spills AVX2 registers if this array
|
||||
|
@ -404,12 +377,12 @@ namespace Stockfish::Eval::NNUE {
|
|||
|
||||
// Gather all features to be updated.
|
||||
const Square ksq = pos.square<KING>(perspective);
|
||||
IndexList removed[2], added[2];
|
||||
FeatureSet::IndexList removed[2], added[2];
|
||||
FeatureSet::append_changed_indices(
|
||||
ksq, next, perspective, removed[0], added[0]);
|
||||
ksq, next->dirtyPiece, perspective, removed[0], added[0]);
|
||||
for (StateInfo *st2 = pos.state(); st2 != next; st2 = st2->previous)
|
||||
FeatureSet::append_changed_indices(
|
||||
ksq, st2, perspective, removed[1], added[1]);
|
||||
ksq, st2->dirtyPiece, perspective, removed[1], added[1]);
|
||||
|
||||
// Mark the accumulators as computed.
|
||||
next->accumulator.computed[perspective] = true;
|
||||
|
@ -534,7 +507,7 @@ namespace Stockfish::Eval::NNUE {
|
|||
// Refresh the accumulator
|
||||
auto& accumulator = pos.state()->accumulator;
|
||||
accumulator.computed[perspective] = true;
|
||||
IndexList active;
|
||||
FeatureSet::IndexList active;
|
||||
FeatureSet::append_active_indices(pos, perspective, active);
|
||||
|
||||
#ifdef VECTOR
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -32,30 +32,30 @@ namespace {
|
|||
#define S(mg, eg) make_score(mg, eg)
|
||||
|
||||
// Pawn penalties
|
||||
constexpr Score Backward = S( 9, 22);
|
||||
constexpr Score Doubled = S(13, 51);
|
||||
constexpr Score DoubledEarly = S(20, 7);
|
||||
constexpr Score Isolated = S( 3, 15);
|
||||
constexpr Score WeakLever = S( 4, 58);
|
||||
constexpr Score WeakUnopposed = S(13, 24);
|
||||
constexpr Score Backward = S( 6, 19);
|
||||
constexpr Score Doubled = S(11, 51);
|
||||
constexpr Score DoubledEarly = S(17, 7);
|
||||
constexpr Score Isolated = S( 1, 20);
|
||||
constexpr Score WeakLever = S( 2, 57);
|
||||
constexpr Score WeakUnopposed = S(15, 18);
|
||||
|
||||
// Bonus for blocked pawns at 5th or 6th rank
|
||||
constexpr Score BlockedPawn[2] = { S(-17, -6), S(-9, 2) };
|
||||
constexpr Score BlockedPawn[2] = { S(-19, -8), S(-7, 3) };
|
||||
|
||||
constexpr Score BlockedStorm[RANK_NB] = {
|
||||
S(0, 0), S(0, 0), S(75, 78), S(-8, 16), S(-6, 10), S(-6, 6), S(0, 2)
|
||||
S(0, 0), S(0, 0), S(64, 75), S(-3, 14), S(-12, 19), S(-7, 4), S(-10, 5)
|
||||
};
|
||||
|
||||
// Connected pawn bonus
|
||||
constexpr int Connected[RANK_NB] = { 0, 5, 7, 11, 23, 48, 87 };
|
||||
constexpr int Connected[RANK_NB] = { 0, 3, 7, 7, 15, 54, 86 };
|
||||
|
||||
// Strength of pawn shelter for our king by [distance from edge][rank].
|
||||
// RANK_1 = 0 is used for files where we have no pawn, or pawn is behind our king.
|
||||
constexpr Value ShelterStrength[int(FILE_NB) / 2][RANK_NB] = {
|
||||
{ V( -5), V( 82), V( 92), V( 54), V( 36), V( 22), V( 28) },
|
||||
{ V(-44), V( 63), V( 33), V(-50), V(-30), V(-12), V( -62) },
|
||||
{ V(-11), V( 77), V( 22), V( -6), V( 31), V( 8), V( -45) },
|
||||
{ V(-39), V(-12), V(-29), V(-50), V(-43), V(-68), V(-164) }
|
||||
{ V(-2), V(85), V(95), V(53), V(39), V(23), V(25) },
|
||||
{ V(-55), V(64), V(32), V(-55), V(-30), V(-11), V(-61) },
|
||||
{ V(-11), V(75), V(19), V(-6), V(26), V(9), V(-47) },
|
||||
{ V(-41), V(-11), V(-27), V(-58), V(-42), V(-66), V(-163) }
|
||||
};
|
||||
|
||||
// Danger of enemy pawns moving toward our king by [distance from edge][rank].
|
||||
|
@ -63,17 +63,17 @@ namespace {
|
|||
// is behind our king. Note that UnblockedStorm[0][1-2] accommodate opponent pawn
|
||||
// on edge, likely blocked by our king.
|
||||
constexpr Value UnblockedStorm[int(FILE_NB) / 2][RANK_NB] = {
|
||||
{ V( 87), V(-288), V(-168), V( 96), V( 47), V( 44), V( 46) },
|
||||
{ V( 42), V( -25), V( 120), V( 45), V( 34), V( -9), V( 24) },
|
||||
{ V( -8), V( 51), V( 167), V( 35), V( -4), V(-16), V(-12) },
|
||||
{ V(-17), V( -13), V( 100), V( 4), V( 9), V(-16), V(-31) }
|
||||
{ V(94), V(-280), V(-170), V(90), V(59), V(47), V(53) },
|
||||
{ V(43), V(-17), V(128), V(39), V(26), V(-17), V(15) },
|
||||
{ V(-9), V(62), V(170), V(34), V(-5), V(-20), V(-11) },
|
||||
{ V(-27), V(-19), V(106), V(10), V(2), V(-13), V(-24) }
|
||||
};
|
||||
|
||||
|
||||
// KingOnFile[semi-open Us][semi-open Them] contains bonuses/penalties
|
||||
// for king when the king is on a semi-open or open file.
|
||||
constexpr Score KingOnFile[2][2] = {{ S(-21,10), S(-7, 1) },
|
||||
{ S( 0,-3), S( 9,-4) }};
|
||||
constexpr Score KingOnFile[2][2] = {{ S(-18,11), S(-6,-3) },
|
||||
{ S( 0, 0), S( 5,-4) }};
|
||||
|
||||
#undef S
|
||||
#undef V
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -1013,9 +1013,9 @@ void Position::do_null_move(StateInfo& newSt) {
|
|||
}
|
||||
|
||||
st->key ^= Zobrist::side;
|
||||
++st->rule50;
|
||||
prefetch(TT.first_entry(key()));
|
||||
|
||||
++st->rule50;
|
||||
st->pliesFromNull = 0;
|
||||
|
||||
sideToMove = ~sideToMove;
|
||||
|
@ -1080,8 +1080,9 @@ bool Position::see_ge(Move m, Value threshold) const {
|
|||
if (swap <= 0)
|
||||
return true;
|
||||
|
||||
assert(color_of(piece_on(from)) == sideToMove);
|
||||
Bitboard occupied = pieces() ^ from ^ to;
|
||||
Color stm = color_of(piece_on(from));
|
||||
Color stm = sideToMove;
|
||||
Bitboard attackers = attackers_to(to, occupied);
|
||||
Bitboard stmAttackers, bb;
|
||||
int res = 1;
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -120,12 +120,12 @@ public:
|
|||
Bitboard attackers_to(Square s) const;
|
||||
Bitboard attackers_to(Square s, Bitboard occupied) const;
|
||||
Bitboard slider_blockers(Bitboard sliders, Square s, Bitboard& pinners) const;
|
||||
template<PieceType Pt> Bitboard attacks_by(Color c) const;
|
||||
|
||||
// Properties of moves
|
||||
bool legal(Move m) const;
|
||||
bool pseudo_legal(const Move m) const;
|
||||
bool capture(Move m) const;
|
||||
bool capture_or_promotion(Move m) const;
|
||||
bool gives_check(Move m) const;
|
||||
Piece moved_piece(Move m) const;
|
||||
Piece captured_piece() const;
|
||||
|
@ -285,6 +285,22 @@ inline Bitboard Position::attackers_to(Square s) const {
|
|||
return attackers_to(s, pieces());
|
||||
}
|
||||
|
||||
template<PieceType Pt>
|
||||
inline Bitboard Position::attacks_by(Color c) const {
|
||||
|
||||
if constexpr (Pt == PAWN)
|
||||
return c == WHITE ? pawn_attacks_bb<WHITE>(pieces(WHITE, PAWN))
|
||||
: pawn_attacks_bb<BLACK>(pieces(BLACK, PAWN));
|
||||
else
|
||||
{
|
||||
Bitboard threats = 0;
|
||||
Bitboard attackers = pieces(c, Pt);
|
||||
while (attackers)
|
||||
threats |= attacks_bb<Pt>(pop_lsb(attackers), pieces());
|
||||
return threats;
|
||||
}
|
||||
}
|
||||
|
||||
inline Bitboard Position::checkers() const {
|
||||
return st->checkersBB;
|
||||
}
|
||||
|
@ -352,11 +368,6 @@ inline bool Position::is_chess960() const {
|
|||
return chess960;
|
||||
}
|
||||
|
||||
inline bool Position::capture_or_promotion(Move m) const {
|
||||
assert(is_ok(m));
|
||||
return type_of(m) != NORMAL ? type_of(m) != CASTLING : !empty(to_sq(m));
|
||||
}
|
||||
|
||||
inline bool Position::capture(Move m) const {
|
||||
assert(is_ok(m));
|
||||
// Castling is encoded as "king captures rook"
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -47,6 +47,7 @@ struct Stack {
|
|||
Move excludedMove;
|
||||
Move killers[2];
|
||||
Value staticEval;
|
||||
Depth depth;
|
||||
int statScore;
|
||||
int moveCount;
|
||||
bool inCheck;
|
||||
|
@ -72,6 +73,7 @@ struct RootMove {
|
|||
|
||||
Value score = -VALUE_INFINITE;
|
||||
Value previousScore = -VALUE_INFINITE;
|
||||
Value averageScore = -VALUE_INFINITE;
|
||||
int selDepth = 0;
|
||||
int tbRank = 0;
|
||||
Value tbScore;
|
||||
|
|
387
DroidFishApp/src/main/cpp/stockfish/simd.h
Normal file
387
DroidFishApp/src/main/cpp/stockfish/simd.h
Normal file
|
@ -0,0 +1,387 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef STOCKFISH_SIMD_H_INCLUDED
|
||||
#define STOCKFISH_SIMD_H_INCLUDED
|
||||
|
||||
#if defined(USE_AVX2)
|
||||
# include <immintrin.h>
|
||||
|
||||
#elif defined(USE_SSE41)
|
||||
# include <smmintrin.h>
|
||||
|
||||
#elif defined(USE_SSSE3)
|
||||
# include <tmmintrin.h>
|
||||
|
||||
#elif defined(USE_SSE2)
|
||||
# include <emmintrin.h>
|
||||
|
||||
#elif defined(USE_MMX)
|
||||
# include <mmintrin.h>
|
||||
|
||||
#elif defined(USE_NEON)
|
||||
# include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
// The inline asm is only safe for GCC, where it is necessary to get good codegen.
|
||||
// See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101693
|
||||
// Clang does fine without it.
|
||||
// Play around here: https://godbolt.org/z/7EWqrYq51
|
||||
#if (defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER))
|
||||
#define USE_INLINE_ASM
|
||||
#endif
|
||||
|
||||
// Use either the AVX512 or AVX-VNNI version of the VNNI instructions.
|
||||
#if defined(USE_AVXVNNI)
|
||||
#define VNNI_PREFIX "%{vex%} "
|
||||
#else
|
||||
#define VNNI_PREFIX ""
|
||||
#endif
|
||||
|
||||
namespace Stockfish::Simd {
|
||||
|
||||
#if defined (USE_AVX512)
|
||||
|
||||
[[maybe_unused]] static int m512_hadd(__m512i sum, int bias) {
|
||||
return _mm512_reduce_add_epi32(sum) + bias;
|
||||
}
|
||||
|
||||
/*
|
||||
Parameters:
|
||||
sum0 = [zmm0.i128[0], zmm0.i128[1], zmm0.i128[2], zmm0.i128[3]]
|
||||
sum1 = [zmm1.i128[0], zmm1.i128[1], zmm1.i128[2], zmm1.i128[3]]
|
||||
sum2 = [zmm2.i128[0], zmm2.i128[1], zmm2.i128[2], zmm2.i128[3]]
|
||||
sum3 = [zmm3.i128[0], zmm3.i128[1], zmm3.i128[2], zmm3.i128[3]]
|
||||
|
||||
Returns:
|
||||
ret = [
|
||||
reduce_add_epi32(zmm0.i128[0]), reduce_add_epi32(zmm1.i128[0]), reduce_add_epi32(zmm2.i128[0]), reduce_add_epi32(zmm3.i128[0]),
|
||||
reduce_add_epi32(zmm0.i128[1]), reduce_add_epi32(zmm1.i128[1]), reduce_add_epi32(zmm2.i128[1]), reduce_add_epi32(zmm3.i128[1]),
|
||||
reduce_add_epi32(zmm0.i128[2]), reduce_add_epi32(zmm1.i128[2]), reduce_add_epi32(zmm2.i128[2]), reduce_add_epi32(zmm3.i128[2]),
|
||||
reduce_add_epi32(zmm0.i128[3]), reduce_add_epi32(zmm1.i128[3]), reduce_add_epi32(zmm2.i128[3]), reduce_add_epi32(zmm3.i128[3])
|
||||
]
|
||||
*/
|
||||
[[maybe_unused]] static __m512i m512_hadd128x16_interleave(
|
||||
__m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3) {
|
||||
|
||||
__m512i sum01a = _mm512_unpacklo_epi32(sum0, sum1);
|
||||
__m512i sum01b = _mm512_unpackhi_epi32(sum0, sum1);
|
||||
|
||||
__m512i sum23a = _mm512_unpacklo_epi32(sum2, sum3);
|
||||
__m512i sum23b = _mm512_unpackhi_epi32(sum2, sum3);
|
||||
|
||||
__m512i sum01 = _mm512_add_epi32(sum01a, sum01b);
|
||||
__m512i sum23 = _mm512_add_epi32(sum23a, sum23b);
|
||||
|
||||
__m512i sum0123a = _mm512_unpacklo_epi64(sum01, sum23);
|
||||
__m512i sum0123b = _mm512_unpackhi_epi64(sum01, sum23);
|
||||
|
||||
return _mm512_add_epi32(sum0123a, sum0123b);
|
||||
}
|
||||
|
||||
[[maybe_unused]] static __m128i m512_haddx4(
|
||||
__m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3,
|
||||
__m128i bias) {
|
||||
|
||||
__m512i sum = m512_hadd128x16_interleave(sum0, sum1, sum2, sum3);
|
||||
|
||||
__m256i sum256lo = _mm512_castsi512_si256(sum);
|
||||
__m256i sum256hi = _mm512_extracti64x4_epi64(sum, 1);
|
||||
|
||||
sum256lo = _mm256_add_epi32(sum256lo, sum256hi);
|
||||
|
||||
__m128i sum128lo = _mm256_castsi256_si128(sum256lo);
|
||||
__m128i sum128hi = _mm256_extracti128_si256(sum256lo, 1);
|
||||
|
||||
return _mm_add_epi32(_mm_add_epi32(sum128lo, sum128hi), bias);
|
||||
}
|
||||
|
||||
[[maybe_unused]] static void m512_add_dpbusd_epi32(
|
||||
__m512i& acc,
|
||||
__m512i a,
|
||||
__m512i b) {
|
||||
|
||||
# if defined (USE_VNNI)
|
||||
# if defined (USE_INLINE_ASM)
|
||||
asm(
|
||||
"vpdpbusd %[b], %[a], %[acc]\n\t"
|
||||
: [acc]"+v"(acc)
|
||||
: [a]"v"(a), [b]"vm"(b)
|
||||
);
|
||||
# else
|
||||
acc = _mm512_dpbusd_epi32(acc, a, b);
|
||||
# endif
|
||||
# else
|
||||
# if defined (USE_INLINE_ASM)
|
||||
__m512i tmp = _mm512_maddubs_epi16(a, b);
|
||||
asm(
|
||||
"vpmaddwd %[tmp], %[ones], %[tmp]\n\t"
|
||||
"vpaddd %[acc], %[tmp], %[acc]\n\t"
|
||||
: [acc]"+v"(acc), [tmp]"+&v"(tmp)
|
||||
: [ones]"v"(_mm512_set1_epi16(1))
|
||||
);
|
||||
# else
|
||||
__m512i product0 = _mm512_maddubs_epi16(a, b);
|
||||
product0 = _mm512_madd_epi16(product0, _mm512_set1_epi16(1));
|
||||
acc = _mm512_add_epi32(acc, product0);
|
||||
# endif
|
||||
# endif
|
||||
}
|
||||
|
||||
[[maybe_unused]] static void m512_add_dpbusd_epi32x2(
|
||||
__m512i& acc,
|
||||
__m512i a0, __m512i b0,
|
||||
__m512i a1, __m512i b1) {
|
||||
|
||||
# if defined (USE_VNNI)
|
||||
# if defined (USE_INLINE_ASM)
|
||||
asm(
|
||||
"vpdpbusd %[b0], %[a0], %[acc]\n\t"
|
||||
"vpdpbusd %[b1], %[a1], %[acc]\n\t"
|
||||
: [acc]"+v"(acc)
|
||||
: [a0]"v"(a0), [b0]"vm"(b0), [a1]"v"(a1), [b1]"vm"(b1)
|
||||
);
|
||||
# else
|
||||
acc = _mm512_dpbusd_epi32(acc, a0, b0);
|
||||
acc = _mm512_dpbusd_epi32(acc, a1, b1);
|
||||
# endif
|
||||
# else
|
||||
# if defined (USE_INLINE_ASM)
|
||||
__m512i tmp0 = _mm512_maddubs_epi16(a0, b0);
|
||||
__m512i tmp1 = _mm512_maddubs_epi16(a1, b1);
|
||||
asm(
|
||||
"vpaddsw %[tmp0], %[tmp1], %[tmp0]\n\t"
|
||||
"vpmaddwd %[tmp0], %[ones], %[tmp0]\n\t"
|
||||
"vpaddd %[acc], %[tmp0], %[acc]\n\t"
|
||||
: [acc]"+v"(acc), [tmp0]"+&v"(tmp0)
|
||||
: [tmp1]"v"(tmp1), [ones]"v"(_mm512_set1_epi16(1))
|
||||
);
|
||||
# else
|
||||
__m512i product0 = _mm512_maddubs_epi16(a0, b0);
|
||||
__m512i product1 = _mm512_maddubs_epi16(a1, b1);
|
||||
product0 = _mm512_adds_epi16(product0, product1);
|
||||
product0 = _mm512_madd_epi16(product0, _mm512_set1_epi16(1));
|
||||
acc = _mm512_add_epi32(acc, product0);
|
||||
# endif
|
||||
# endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined (USE_AVX2)
|
||||
|
||||
[[maybe_unused]] static int m256_hadd(__m256i sum, int bias) {
|
||||
__m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1));
|
||||
sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC));
|
||||
sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_CDAB));
|
||||
return _mm_cvtsi128_si32(sum128) + bias;
|
||||
}
|
||||
|
||||
[[maybe_unused]] static __m128i m256_haddx4(
|
||||
__m256i sum0, __m256i sum1, __m256i sum2, __m256i sum3,
|
||||
__m128i bias) {
|
||||
|
||||
sum0 = _mm256_hadd_epi32(sum0, sum1);
|
||||
sum2 = _mm256_hadd_epi32(sum2, sum3);
|
||||
|
||||
sum0 = _mm256_hadd_epi32(sum0, sum2);
|
||||
|
||||
__m128i sum128lo = _mm256_castsi256_si128(sum0);
|
||||
__m128i sum128hi = _mm256_extracti128_si256(sum0, 1);
|
||||
|
||||
return _mm_add_epi32(_mm_add_epi32(sum128lo, sum128hi), bias);
|
||||
}
|
||||
|
||||
[[maybe_unused]] static void m256_add_dpbusd_epi32(
|
||||
__m256i& acc,
|
||||
__m256i a,
|
||||
__m256i b) {
|
||||
|
||||
# if defined (USE_VNNI)
|
||||
# if defined (USE_INLINE_ASM)
|
||||
asm(
|
||||
VNNI_PREFIX "vpdpbusd %[b], %[a], %[acc]\n\t"
|
||||
: [acc]"+v"(acc)
|
||||
: [a]"v"(a), [b]"vm"(b)
|
||||
);
|
||||
# else
|
||||
acc = _mm256_dpbusd_epi32(acc, a, b);
|
||||
# endif
|
||||
# else
|
||||
# if defined (USE_INLINE_ASM)
|
||||
__m256i tmp = _mm256_maddubs_epi16(a, b);
|
||||
asm(
|
||||
"vpmaddwd %[tmp], %[ones], %[tmp]\n\t"
|
||||
"vpaddd %[acc], %[tmp], %[acc]\n\t"
|
||||
: [acc]"+v"(acc), [tmp]"+&v"(tmp)
|
||||
: [ones]"v"(_mm256_set1_epi16(1))
|
||||
);
|
||||
# else
|
||||
__m256i product0 = _mm256_maddubs_epi16(a, b);
|
||||
product0 = _mm256_madd_epi16(product0, _mm256_set1_epi16(1));
|
||||
acc = _mm256_add_epi32(acc, product0);
|
||||
# endif
|
||||
# endif
|
||||
}
|
||||
|
||||
[[maybe_unused]] static void m256_add_dpbusd_epi32x2(
|
||||
__m256i& acc,
|
||||
__m256i a0, __m256i b0,
|
||||
__m256i a1, __m256i b1) {
|
||||
|
||||
# if defined (USE_VNNI)
|
||||
# if defined (USE_INLINE_ASM)
|
||||
asm(
|
||||
VNNI_PREFIX "vpdpbusd %[b0], %[a0], %[acc]\n\t"
|
||||
VNNI_PREFIX "vpdpbusd %[b1], %[a1], %[acc]\n\t"
|
||||
: [acc]"+v"(acc)
|
||||
: [a0]"v"(a0), [b0]"vm"(b0), [a1]"v"(a1), [b1]"vm"(b1)
|
||||
);
|
||||
# else
|
||||
acc = _mm256_dpbusd_epi32(acc, a0, b0);
|
||||
acc = _mm256_dpbusd_epi32(acc, a1, b1);
|
||||
# endif
|
||||
# else
|
||||
# if defined (USE_INLINE_ASM)
|
||||
__m256i tmp0 = _mm256_maddubs_epi16(a0, b0);
|
||||
__m256i tmp1 = _mm256_maddubs_epi16(a1, b1);
|
||||
asm(
|
||||
"vpaddsw %[tmp0], %[tmp1], %[tmp0]\n\t"
|
||||
"vpmaddwd %[tmp0], %[ones], %[tmp0]\n\t"
|
||||
"vpaddd %[acc], %[tmp0], %[acc]\n\t"
|
||||
: [acc]"+v"(acc), [tmp0]"+&v"(tmp0)
|
||||
: [tmp1]"v"(tmp1), [ones]"v"(_mm256_set1_epi16(1))
|
||||
);
|
||||
# else
|
||||
__m256i product0 = _mm256_maddubs_epi16(a0, b0);
|
||||
__m256i product1 = _mm256_maddubs_epi16(a1, b1);
|
||||
product0 = _mm256_adds_epi16(product0, product1);
|
||||
product0 = _mm256_madd_epi16(product0, _mm256_set1_epi16(1));
|
||||
acc = _mm256_add_epi32(acc, product0);
|
||||
# endif
|
||||
# endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined (USE_SSSE3)
|
||||
|
||||
[[maybe_unused]] static int m128_hadd(__m128i sum, int bias) {
|
||||
sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E)); //_MM_PERM_BADC
|
||||
sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); //_MM_PERM_CDAB
|
||||
return _mm_cvtsi128_si32(sum) + bias;
|
||||
}
|
||||
|
||||
[[maybe_unused]] static __m128i m128_haddx4(
|
||||
__m128i sum0, __m128i sum1, __m128i sum2, __m128i sum3,
|
||||
__m128i bias) {
|
||||
|
||||
sum0 = _mm_hadd_epi32(sum0, sum1);
|
||||
sum2 = _mm_hadd_epi32(sum2, sum3);
|
||||
sum0 = _mm_hadd_epi32(sum0, sum2);
|
||||
return _mm_add_epi32(sum0, bias);
|
||||
}
|
||||
|
||||
[[maybe_unused]] static void m128_add_dpbusd_epi32(
|
||||
__m128i& acc,
|
||||
__m128i a,
|
||||
__m128i b) {
|
||||
|
||||
# if defined (USE_INLINE_ASM)
|
||||
__m128i tmp = _mm_maddubs_epi16(a, b);
|
||||
asm(
|
||||
"pmaddwd %[ones], %[tmp]\n\t"
|
||||
"paddd %[tmp], %[acc]\n\t"
|
||||
: [acc]"+v"(acc), [tmp]"+&v"(tmp)
|
||||
: [ones]"v"(_mm_set1_epi16(1))
|
||||
);
|
||||
# else
|
||||
__m128i product0 = _mm_maddubs_epi16(a, b);
|
||||
product0 = _mm_madd_epi16(product0, _mm_set1_epi16(1));
|
||||
acc = _mm_add_epi32(acc, product0);
|
||||
# endif
|
||||
}
|
||||
|
||||
[[maybe_unused]] static void m128_add_dpbusd_epi32x2(
|
||||
__m128i& acc,
|
||||
__m128i a0, __m128i b0,
|
||||
__m128i a1, __m128i b1) {
|
||||
|
||||
# if defined (USE_INLINE_ASM)
|
||||
__m128i tmp0 = _mm_maddubs_epi16(a0, b0);
|
||||
__m128i tmp1 = _mm_maddubs_epi16(a1, b1);
|
||||
asm(
|
||||
"paddsw %[tmp1], %[tmp0]\n\t"
|
||||
"pmaddwd %[ones], %[tmp0]\n\t"
|
||||
"paddd %[tmp0], %[acc]\n\t"
|
||||
: [acc]"+v"(acc), [tmp0]"+&v"(tmp0)
|
||||
: [tmp1]"v"(tmp1), [ones]"v"(_mm_set1_epi16(1))
|
||||
);
|
||||
# else
|
||||
__m128i product0 = _mm_maddubs_epi16(a0, b0);
|
||||
__m128i product1 = _mm_maddubs_epi16(a1, b1);
|
||||
product0 = _mm_adds_epi16(product0, product1);
|
||||
product0 = _mm_madd_epi16(product0, _mm_set1_epi16(1));
|
||||
acc = _mm_add_epi32(acc, product0);
|
||||
# endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined (USE_NEON)
|
||||
|
||||
[[maybe_unused]] static int neon_m128_reduce_add_epi32(int32x4_t s) {
|
||||
# if USE_NEON >= 8
|
||||
return vaddvq_s32(s);
|
||||
# else
|
||||
return s[0] + s[1] + s[2] + s[3];
|
||||
# endif
|
||||
}
|
||||
|
||||
[[maybe_unused]] static int neon_m128_hadd(int32x4_t sum, int bias) {
|
||||
return neon_m128_reduce_add_epi32(sum) + bias;
|
||||
}
|
||||
|
||||
[[maybe_unused]] static int32x4_t neon_m128_haddx4(
|
||||
int32x4_t sum0, int32x4_t sum1, int32x4_t sum2, int32x4_t sum3,
|
||||
int32x4_t bias) {
|
||||
|
||||
int32x4_t hsums {
|
||||
neon_m128_reduce_add_epi32(sum0),
|
||||
neon_m128_reduce_add_epi32(sum1),
|
||||
neon_m128_reduce_add_epi32(sum2),
|
||||
neon_m128_reduce_add_epi32(sum3)
|
||||
};
|
||||
return vaddq_s32(hsums, bias);
|
||||
}
|
||||
|
||||
[[maybe_unused]] static void neon_m128_add_dpbusd_epi32x2(
|
||||
int32x4_t& acc,
|
||||
int8x8_t a0, int8x8_t b0,
|
||||
int8x8_t a1, int8x8_t b1) {
|
||||
|
||||
int16x8_t product = vmull_s8(a0, b0);
|
||||
product = vmlal_s8(product, a1, b1);
|
||||
acc = vpadalq_s16(acc, product);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
#endif // STOCKFISH_SIMD_H_INCLUDED
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -472,8 +472,6 @@ TBTables TBTables;
|
|||
// If the corresponding file exists two new objects TBTable<WDL> and TBTable<DTZ>
|
||||
// are created and added to the lists and hash table. Called at init time.
|
||||
void TBTables::add(const std::vector<PieceType>& pieces) {
|
||||
if (sizeof(char*) < 8 && pieces.size() >= 6)
|
||||
return; // Not enough address space to support 6-men TB on 32-bit OS
|
||||
|
||||
std::string code;
|
||||
|
||||
|
@ -771,7 +769,7 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu
|
|||
goto encode_remaining; // With pawns we have finished special treatments
|
||||
}
|
||||
|
||||
// In positions withouth pawns, we further flip the squares to ensure leading
|
||||
// In positions without pawns, we further flip the squares to ensure leading
|
||||
// piece is below RANK_5.
|
||||
if (rank_of(squares[0]) > RANK_4)
|
||||
for (int i = 0; i < size; ++i)
|
||||
|
@ -814,7 +812,7 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu
|
|||
// Rs "together" in 62 * 61 / 2 ways (we divide by 2 because rooks can be
|
||||
// swapped and still get the same position.)
|
||||
//
|
||||
// In case we have at least 3 unique pieces (inlcuded kings) we encode them
|
||||
// In case we have at least 3 unique pieces (included kings) we encode them
|
||||
// together.
|
||||
if (entry->hasUniquePieces) {
|
||||
|
||||
|
@ -829,7 +827,7 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu
|
|||
+ (squares[1] - adjust1)) * 62
|
||||
+ squares[2] - adjust2;
|
||||
|
||||
// First piece is on a1-h8 diagonal, second below: map this occurence to
|
||||
// First piece is on a1-h8 diagonal, second below: map this occurrence to
|
||||
// 6 to differentiate from the above case, rank_of() maps a1-d4 diagonal
|
||||
// to 0...3 and finally MapB1H1H7[] maps the b1-h1-h7 triangle to 0..27.
|
||||
else if (off_A1H8(squares[1]))
|
||||
|
@ -859,7 +857,7 @@ encode_remaining:
|
|||
idx *= d->groupIdx[0];
|
||||
Square* groupSq = squares + d->groupLen[0];
|
||||
|
||||
// Encode remainig pawns then pieces according to square, in ascending order
|
||||
// Encode remaining pawns then pieces according to square, in ascending order
|
||||
bool remainingPawns = entry->hasPawns && entry->pawnCount[1];
|
||||
|
||||
while (d->groupLen[++next])
|
||||
|
@ -887,7 +885,7 @@ encode_remaining:
|
|||
|
||||
// Group together pieces that will be encoded together. The general rule is that
|
||||
// a group contains pieces of same type and color. The exception is the leading
|
||||
// group that, in case of positions withouth pawns, can be formed by 3 different
|
||||
// group that, in case of positions without pawns, can be formed by 3 different
|
||||
// pieces (default) or by the king pair when there is not a unique piece apart
|
||||
// from the kings. When there are pawns, pawns are always first in pieces[].
|
||||
//
|
||||
|
@ -919,7 +917,7 @@ void set_groups(T& e, PairsData* d, int order[], File f) {
|
|||
//
|
||||
// This ensures unique encoding for the whole position. The order of the
|
||||
// groups is a per-table parameter and could not follow the canonical leading
|
||||
// pawns/pieces -> remainig pawns -> remaining pieces. In particular the
|
||||
// pawns/pieces -> remaining pawns -> remaining pieces. In particular the
|
||||
// first group is at order[0] position and the remaining pawns, when present,
|
||||
// are at order[1] position.
|
||||
bool pp = e.hasPawns && e.pawnCount[1]; // Pawns on both sides
|
||||
|
@ -939,7 +937,7 @@ void set_groups(T& e, PairsData* d, int order[], File f) {
|
|||
d->groupIdx[1] = idx;
|
||||
idx *= Binomial[d->groupLen[1]][48 - d->groupLen[0]];
|
||||
}
|
||||
else // Remainig pieces
|
||||
else // Remaining pieces
|
||||
{
|
||||
d->groupIdx[next] = idx;
|
||||
idx *= Binomial[d->groupLen[next]][freeSquares];
|
||||
|
@ -949,7 +947,7 @@ void set_groups(T& e, PairsData* d, int order[], File f) {
|
|||
d->groupIdx[n] = idx;
|
||||
}
|
||||
|
||||
// In Recursive Pairing each symbol represents a pair of childern symbols. So
|
||||
// In Recursive Pairing each symbol represents a pair of children symbols. So
|
||||
// read d->btree[] symbols data and expand each one in his left and right child
|
||||
// symbol until reaching the leafs that represent the symbol value.
|
||||
uint8_t set_symlen(PairsData* d, Sym s, std::vector<bool>& visited) {
|
||||
|
@ -1319,7 +1317,7 @@ void Tablebases::init(const std::string& paths) {
|
|||
for (auto p : bothOnDiagonal)
|
||||
MapKK[p.first][p.second] = code++;
|
||||
|
||||
// Binomial[] stores the Binomial Coefficents using Pascal rule. There
|
||||
// Binomial[] stores the Binomial Coefficients using Pascal rule. There
|
||||
// are Binomial[k][n] ways to choose k elements from a set of n elements.
|
||||
Binomial[0][0] = 1;
|
||||
|
||||
|
@ -1339,7 +1337,7 @@ void Tablebases::init(const std::string& paths) {
|
|||
for (int leadPawnsCnt = 1; leadPawnsCnt <= 5; ++leadPawnsCnt)
|
||||
for (File f = FILE_A; f <= FILE_D; ++f)
|
||||
{
|
||||
// Restart the index at every file because TB table is splitted
|
||||
// Restart the index at every file because TB table is split
|
||||
// by file, so we can reuse the same index for different files.
|
||||
int idx = 0;
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -38,7 +38,7 @@ enum WDLScore {
|
|||
// Possible states after a probing operation
|
||||
enum ProbeState {
|
||||
FAIL = 0, // Probe failed (missing file table)
|
||||
OK = 1, // Probe succesful
|
||||
OK = 1, // Probe successful
|
||||
CHANGE_STM = -1, // DTZ should check the other side
|
||||
ZEROING_BEST_MOVE = 2 // Best move zeroes DTZ (capture or pawn move)
|
||||
};
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -59,7 +59,6 @@ void Thread::clear() {
|
|||
|
||||
counterMoves.fill(MOVE_NONE);
|
||||
mainHistory.fill(0);
|
||||
lowPlyHistory.fill(0);
|
||||
captureHistory.fill(0);
|
||||
|
||||
for (bool inCheck : { false, true })
|
||||
|
@ -67,7 +66,7 @@ void Thread::clear() {
|
|||
{
|
||||
for (auto& to : continuationHistory[inCheck][c])
|
||||
for (auto& h : to)
|
||||
h->fill(0);
|
||||
h->fill(-71);
|
||||
continuationHistory[inCheck][c][NO_PIECE][0]->fill(Search::CounterMovePruneThreshold - 1);
|
||||
}
|
||||
}
|
||||
|
@ -162,6 +161,7 @@ void ThreadPool::clear() {
|
|||
|
||||
main()->callsCnt = 0;
|
||||
main()->bestPreviousScore = VALUE_INFINITE;
|
||||
main()->bestPreviousAverageScore = VALUE_INFINITE;
|
||||
main()->previousTimeReduction = 1.0;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -60,18 +60,19 @@ public:
|
|||
Pawns::Table pawnsTable;
|
||||
Material::Table materialTable;
|
||||
size_t pvIdx, pvLast;
|
||||
uint64_t ttHitAverage;
|
||||
RunningAverage complexityAverage;
|
||||
std::atomic<uint64_t> nodes, tbHits, bestMoveChanges;
|
||||
int selDepth, nmpMinPly;
|
||||
Color nmpColor;
|
||||
std::atomic<uint64_t> nodes, tbHits, bestMoveChanges;
|
||||
Value bestValue, optimism[COLOR_NB];
|
||||
|
||||
Position rootPos;
|
||||
StateInfo rootState;
|
||||
Search::RootMoves rootMoves;
|
||||
Depth rootDepth, completedDepth;
|
||||
Depth rootDepth, completedDepth, depth;
|
||||
Value rootDelta;
|
||||
CounterMoveHistory counterMoves;
|
||||
ButterflyHistory mainHistory;
|
||||
LowPlyHistory lowPlyHistory;
|
||||
CapturePieceToHistory captureHistory;
|
||||
ContinuationHistory continuationHistory[2][2];
|
||||
Score trend;
|
||||
|
@ -89,6 +90,7 @@ struct MainThread : public Thread {
|
|||
|
||||
double previousTimeReduction;
|
||||
Value bestPreviousScore;
|
||||
Value bestPreviousAverageScore;
|
||||
Value iterValue[4];
|
||||
int callsCnt;
|
||||
bool stopOnPonderhit;
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -68,6 +68,9 @@ void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) {
|
|||
TimePoint timeLeft = std::max(TimePoint(1),
|
||||
limits.time[us] + limits.inc[us] * (mtg - 1) - moveOverhead * (2 + mtg));
|
||||
|
||||
// Use extra time with larger increments
|
||||
double optExtra = std::clamp(1.0 + 12.0 * limits.inc[us] / limits.time[us], 1.0, 1.12);
|
||||
|
||||
// A user may scale time usage by setting UCI option "Slow Mover"
|
||||
// Default is 100 and changing this value will probably lose elo.
|
||||
timeLeft = slowMover * timeLeft / 100;
|
||||
|
@ -78,15 +81,16 @@ void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) {
|
|||
if (limits.movestogo == 0)
|
||||
{
|
||||
optScale = std::min(0.0084 + std::pow(ply + 3.0, 0.5) * 0.0042,
|
||||
0.2 * limits.time[us] / double(timeLeft));
|
||||
0.2 * limits.time[us] / double(timeLeft))
|
||||
* optExtra;
|
||||
maxScale = std::min(7.0, 4.0 + ply / 12.0);
|
||||
}
|
||||
|
||||
// x moves in y seconds (+ z increment)
|
||||
else
|
||||
{
|
||||
optScale = std::min((0.8 + ply / 128.0) / mtg,
|
||||
0.8 * limits.time[us] / double(timeLeft));
|
||||
optScale = std::min((0.88 + ply / 116.4) / mtg,
|
||||
0.88 * limits.time[us] / double(timeLeft));
|
||||
maxScale = std::min(6.3, 1.5 + 0.11 * mtg);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -40,9 +40,9 @@ void TTEntry::save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev)
|
|||
move16 = (uint16_t)m;
|
||||
|
||||
// Overwrite less valuable entries (cheapest checks first)
|
||||
if (b == BOUND_EXACT
|
||||
if ( b == BOUND_EXACT
|
||||
|| (uint16_t)k != key16
|
||||
|| d - DEPTH_OFFSET > depth8 - 4)
|
||||
|| d - DEPTH_OFFSET + 2 * pv > depth8 - 4)
|
||||
{
|
||||
assert(d > DEPTH_OFFSET);
|
||||
assert(d < 256 + DEPTH_OFFSET);
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -84,7 +84,7 @@ class Tune {
|
|||
|
||||
static Tune& instance() { static Tune t; return t; } // Singleton
|
||||
|
||||
// Use polymorphism to accomodate Entry of different types in the same vector
|
||||
// Use polymorphism to accommodate Entry of different types in the same vector
|
||||
struct EntryBase {
|
||||
virtual ~EntryBase() = default;
|
||||
virtual void init_option() = 0;
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -465,10 +465,6 @@ constexpr Move make_move(Square from, Square to) {
|
|||
return Move((from << 6) + to);
|
||||
}
|
||||
|
||||
constexpr Move reverse_move(Move m) {
|
||||
return make_move(to_sq(m), from_sq(m));
|
||||
}
|
||||
|
||||
template<MoveType T>
|
||||
constexpr Move make(Square from, Square to, PieceType pt = KNIGHT) {
|
||||
return Move(T + ((pt - KNIGHT) << 12) + (from << 6) + to);
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -207,8 +207,8 @@ namespace {
|
|||
// Coefficients of a 3rd order polynomial fit based on fishtest data
|
||||
// for two parameters needed to transform eval to the argument of a
|
||||
// logistic function.
|
||||
double as[] = {-3.68389304, 30.07065921, -60.52878723, 149.53378557};
|
||||
double bs[] = {-2.0181857, 15.85685038, -29.83452023, 47.59078827};
|
||||
double as[] = {-1.17202460e-01, 5.94729104e-01, 1.12065546e+01, 1.22606222e+02};
|
||||
double bs[] = {-1.79066759, 11.30759193, -17.43677612, 36.47147479};
|
||||
double a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3];
|
||||
double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3];
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -164,7 +164,7 @@ Option& Option::operator=(const string& v) {
|
|||
|
||||
assert(!type.empty());
|
||||
|
||||
if ( (type != "button" && v.empty())
|
||||
if ( (type != "button" && type != "string" && v.empty())
|
||||
|| (type == "check" && v != "true" && v != "false")
|
||||
|| (type == "spin" && (stof(v) < min || stof(v) > max)))
|
||||
return *this;
|
||||
|
|
|
@ -36,7 +36,7 @@ import org.petero.droidfish.EngineOptions;
|
|||
|
||||
/** Stockfish engine running as process, started from assets resource. */
|
||||
public class InternalStockFish extends ExternalEngine {
|
||||
private static final String defaultNet = "nn-3475407dc199.nnue";
|
||||
private static final String defaultNet = "nn-6877cd24400e.nnue";
|
||||
private static final String netOption = "evalfile";
|
||||
private File defaultNetFile; // To get the full path of the copied default network file
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user