Update to Stockfish 14

2024-11-26 21:47:23 +01:00 · 2021-07-02 20:50:36 +02:00 · 2021-07-02 20:50:36 +02:00 · 4f7930acf0
commit 4f7930acf0
parent fe7a8fac11
61 changed files with 2020 additions and 1568 deletions
--- a/DroidFishApp/src/main/assets/nn-3475407dc199.nnue
+++ b/DroidFishApp/src/main/assets/nn-3475407dc199.nnue
--- a/DroidFishApp/src/main/assets/nn-62ef826d1a6d.nnue
+++ b/DroidFishApp/src/main/assets/nn-62ef826d1a6d.nnue
--- a/DroidFishApp/src/main/cpp/stockfish/Android.mk
+++ b/DroidFishApp/src/main/cpp/stockfish/Android.mk
@ -5,7 +5,7 @@ SF_SRC_FILES := \
 	bitbase.cpp endgame.cpp material.cpp movepick.cpp position.cpp timeman.cpp \
 	tune.cpp ucioption.cpp \
 	bitboard.cpp evaluate.cpp misc.cpp search.cpp tt.cpp syzygy/tbprobe.cpp \
-	nnue/evaluate_nnue.cpp nnue/features/half_kp.cpp
+	nnue/evaluate_nnue.cpp nnue/features/half_ka_v2.cpp

 MY_ARCH_DEF :=
 ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
--- a/DroidFishApp/src/main/cpp/stockfish/benchmark.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/benchmark.cpp
@ -92,6 +92,8 @@ const vector<string> Defaults = {

 } // namespace

+namespace Stockfish {
+
 /// setup_bench() builds a list of UCI commands to be run by bench. There
 /// are five parameters: TT size in MB, number of search threads that
 /// should be used, the limit value spent for each position, a file name
@ -168,3 +170,5 @@ vector<string> setup_bench(const Position& current, istream& is) {

  return list;
 }
+
+} // namespace Stockfish
--- a/DroidFishApp/src/main/cpp/stockfish/bitbase.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/bitbase.cpp
@ -23,6 +23,8 @@
 #include "bitboard.h"
 #include "types.h"

+namespace Stockfish {
+
 namespace {

  // There are 24 possible pawn squares: files A to D and ranks from 2 to 7.
@ -66,7 +68,6 @@ namespace {

 } // namespace

-
 bool Bitbases::probe(Square wksq, Square wpsq, Square bksq, Color stm) {

  assert(file_of(wpsq) <= FILE_D);
@ -96,7 +97,6 @@ void Bitbases::init() {
          KPKBitbase.set(idx);
 }

-
 namespace {

  KPKPosition::KPKPosition(unsigned idx) {
@ -150,8 +150,8 @@ namespace {
    Bitboard b = attacks_bb<KING>(ksq[stm]);

    while (b)
-        r |= stm == WHITE ? db[index(BLACK, ksq[BLACK] , pop_lsb(&b), psq)]
-                          : db[index(WHITE, pop_lsb(&b),  ksq[WHITE], psq)];
+        r |= stm == WHITE ? db[index(BLACK, ksq[BLACK], pop_lsb(b), psq)]
+                          : db[index(WHITE, pop_lsb(b), ksq[WHITE], psq)];

    if (stm == WHITE)
    {
@ -168,3 +168,5 @@ namespace {
  }

 } // namespace
+
+} // namespace Stockfish
--- a/DroidFishApp/src/main/cpp/stockfish/bitboard.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/bitboard.cpp
@ -22,11 +22,14 @@
 #include "bitboard.h"
 #include "misc.h"

+namespace Stockfish {
+
 uint8_t PopCnt16[1 << 16];
 uint8_t SquareDistance[SQUARE_NB][SQUARE_NB];

 Bitboard SquareBB[SQUARE_NB];
 Bitboard LineBB[SQUARE_NB][SQUARE_NB];
+Bitboard BetweenBB[SQUARE_NB][SQUARE_NB];
 Bitboard PseudoAttacks[PIECE_TYPE_NB][SQUARE_NB];
 Bitboard PawnAttacks[COLOR_NB][SQUARE_NB];

@ -42,7 +45,6 @@ namespace {

 }

-
 /// safe_destination() returns the bitboard of target square for the given step
 /// from the given square. If the step is off the board, returns empty bitboard.

@ -55,7 +57,7 @@ inline Bitboard safe_destination(Square s, int step) {
 /// Bitboards::pretty() returns an ASCII representation of a bitboard suitable
 /// to be printed to standard output. Useful for debugging.

-const std::string Bitboards::pretty(Bitboard b) {
+std::string Bitboards::pretty(Bitboard b) {

  std::string s = "+---+---+---+---+---+---+---+---+\n";

@ -106,12 +108,17 @@ void Bitboards::init() {

      for (PieceType pt : { BISHOP, ROOK })
          for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2)
+          {
              if (PseudoAttacks[pt][s1] & s2)
-                  LineBB[s1][s2] = (attacks_bb(pt, s1, 0) & attacks_bb(pt, s2, 0)) | s1 | s2;
+              {
+                  LineBB[s1][s2]    = (attacks_bb(pt, s1, 0) & attacks_bb(pt, s2, 0)) | s1 | s2;
+                  BetweenBB[s1][s2] = (attacks_bb(pt, s1, square_bb(s2)) & attacks_bb(pt, s2, square_bb(s1)));
+              }
+              BetweenBB[s1][s2] |= s2;
+          }
  }
 }

-
 namespace {

  Bitboard sliding_attack(PieceType pt, Square sq, Bitboard occupied) {
@ -123,7 +130,7 @@ namespace {
    for (Direction d : (pt == ROOK ? RookDirections : BishopDirections))
    {
        Square s = sq;
-        while(safe_destination(s, d) && !(occupied & s))
+        while (safe_destination(s, d) && !(occupied & s))
            attacks |= (s += d);
    }

@ -211,3 +218,5 @@ namespace {
    }
  }
 }
+
+} // namespace Stockfish
--- a/DroidFishApp/src/main/cpp/stockfish/bitboard.h
+++ b/DroidFishApp/src/main/cpp/stockfish/bitboard.h
@ -23,19 +23,21 @@

 #include "types.h"

+namespace Stockfish {
+
 namespace Bitbases {

 void init();
 bool probe(Square wksq, Square wpsq, Square bksq, Color us);

-}
+} // namespace Stockfish::Bitbases

 namespace Bitboards {

 void init();
-const std::string pretty(Bitboard b);
+std::string pretty(Bitboard b);

-}
+} // namespace Stockfish::Bitboards

 constexpr Bitboard AllSquares = ~Bitboard(0);
 constexpr Bitboard DarkSquares = 0xAA55AA55AA55AA55ULL;
@ -73,6 +75,7 @@ extern uint8_t PopCnt16[1 << 16];
 extern uint8_t SquareDistance[SQUARE_NB][SQUARE_NB];

 extern Bitboard SquareBB[SQUARE_NB];
+extern Bitboard BetweenBB[SQUARE_NB][SQUARE_NB];
 extern Bitboard LineBB[SQUARE_NB][SQUARE_NB];
 extern Bitboard PseudoAttacks[PIECE_TYPE_NB][SQUARE_NB];
 extern Bitboard PawnAttacks[COLOR_NB][SQUARE_NB];
@ -209,23 +212,29 @@ constexpr Bitboard adjacent_files_bb(Square s) {
 inline Bitboard line_bb(Square s1, Square s2) {

  assert(is_ok(s1) && is_ok(s2));
+
  return LineBB[s1][s2];
 }


-/// between_bb() returns a bitboard representing squares that are linearly
-/// between the two given squares (excluding the given squares). If the given
-/// squares are not on a same file/rank/diagonal, we return 0. For instance,
-/// between_bb(SQ_C4, SQ_F7) will return a bitboard with squares D5 and E6.
+/// between_bb(s1, s2) returns a bitboard representing the squares in the semi-open
+/// segment between the squares s1 and s2 (excluding s1 but including s2). If the
+/// given squares are not on a same file/rank/diagonal, it returns s2. For instance,
+/// between_bb(SQ_C4, SQ_F7) will return a bitboard with squares D5, E6 and F7, but
+/// between_bb(SQ_E6, SQ_F8) will return a bitboard with the square F8. This trick
+/// allows to generate non-king evasion moves faster: the defending piece must either
+/// interpose itself to cover the check or capture the checking piece.

 inline Bitboard between_bb(Square s1, Square s2) {
-  Bitboard b = line_bb(s1, s2) & ((AllSquares << s1) ^ (AllSquares << s2));
-  return b & (b - 1); //exclude lsb
+
+  assert(is_ok(s1) && is_ok(s2));
+
+  return BetweenBB[s1][s2];
 }


-/// forward_ranks_bb() returns a bitboard representing the squares on the ranks
-/// in front of the given one, from the point of view of the given color. For instance,
+/// forward_ranks_bb() returns a bitboard representing the squares on the ranks in
+/// front of the given one, from the point of view of the given color. For instance,
 /// forward_ranks_bb(BLACK, SQ_D3) will return the 16 squares on ranks 1 and 2.

 constexpr Bitboard forward_ranks_bb(Color c, Square s) {
@ -412,13 +421,20 @@ inline Square msb(Bitboard b) {

 #endif

+/// least_significant_square_bb() returns the bitboard of the least significant
+/// square of a non-zero bitboard. It is equivalent to square_bb(lsb(bb)).
+
+inline Bitboard least_significant_square_bb(Bitboard b) {
+  assert(b);
+  return b & -b;
+}

 /// pop_lsb() finds and clears the least significant bit in a non-zero bitboard

-inline Square pop_lsb(Bitboard* b) {
-  assert(*b);
-  const Square s = lsb(*b);
-  *b &= *b - 1;
+inline Square pop_lsb(Bitboard& b) {
+  assert(b);
+  const Square s = lsb(b);
+  b &= b - 1;
  return s;
 }

@ -430,4 +446,6 @@ inline Square frontmost_sq(Color c, Bitboard b) {
  return c == WHITE ? msb(b) : lsb(b);
 }

+} // namespace Stockfish
+
 #endif // #ifndef BITBOARD_H_INCLUDED
--- a/DroidFishApp/src/main/cpp/stockfish/endgame.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/endgame.cpp
@ -22,6 +22,8 @@
 #include "endgame.h"
 #include "movegen.h"

+namespace Stockfish {
+
 namespace {

  // Used to drive the king towards the edge of the board
@ -741,3 +743,5 @@ ScaleFactor Endgame<KPKP>::operator()(const Position& pos) const {
  // it's probably at least a draw even with the pawn.
  return Bitbases::probe(strongKing, strongPawn, weakKing, us) ? SCALE_FACTOR_NONE : SCALE_FACTOR_DRAW;
 }
+
+} // namespace Stockfish
--- a/DroidFishApp/src/main/cpp/stockfish/endgame.h
+++ b/DroidFishApp/src/main/cpp/stockfish/endgame.h
@ -28,6 +28,7 @@
 #include "position.h"
 #include "types.h"

+namespace Stockfish {

 /// EndgameCode lists all supported endgame functions by corresponding codes

@ -120,4 +121,6 @@ namespace Endgames {
  }
 }

+} // namespace Stockfish
+
 #endif // #ifndef ENDGAME_H_INCLUDED
--- a/DroidFishApp/src/main/cpp/stockfish/evaluate.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/evaluate.cpp
@ -33,6 +33,7 @@
 #include "misc.h"
 #include "pawns.h"
 #include "thread.h"
+#include "timeman.h"
 #include "uci.h"
 #include "incbin/incbin.h"

@ -54,7 +55,8 @@


 using namespace std;
-using namespace Eval::NNUE;
+
+namespace Stockfish {

 namespace Eval {

@ -110,8 +112,6 @@ namespace Eval {
                    eval_file_loaded = eval_file;
            }
        }
-    if (eval_file_loaded != eval_file)
-        eval_file_loaded = "";
  }

  /// NNUE::verify() verifies that the last net used was loaded successfully
@ -180,7 +180,7 @@ namespace Trace {
    else
        os << scores[t][WHITE] << " | " << scores[t][BLACK];

-    os << " | " << scores[t][WHITE] - scores[t][BLACK] << "\n";
+    os << " | " << scores[t][WHITE] - scores[t][BLACK] << " |\n";
    return os;
  }
 }
@ -190,11 +190,9 @@ using namespace Trace;
 namespace {

  // Threshold for lazy and space evaluation
-  constexpr Value LazyThreshold1 =  Value(1565);
-  constexpr Value LazyThreshold2 =  Value(1102);
-  constexpr Value SpaceThreshold = Value(11551);
-  constexpr Value NNUEThreshold1 =   Value(682);
-  constexpr Value NNUEThreshold2 =   Value(176);
+  constexpr Value LazyThreshold1    =  Value(1565);
+  constexpr Value LazyThreshold2    =  Value(1102);
+  constexpr Value SpaceThreshold    =  Value(11551);

  // KingAttackWeights[PieceType] contains king attack weights by piece type
  constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 81, 52, 44, 10 };
@ -257,11 +255,12 @@ namespace {
    S(0, 0), S(3, 44), S(37, 68), S(42, 60), S(0, 39), S(58, 43)
  };

+  constexpr Value CorneredBishop = Value(50);
+
  // Assorted bonuses and penalties
  constexpr Score UncontestedOutpost  = S(  1, 10);
  constexpr Score BishopOnKingRing    = S( 24,  0);
  constexpr Score BishopXRayPawns     = S(  4,  5);
-  constexpr Score CorneredBishop      = S( 50, 50);
  constexpr Score FlankAttacks        = S(  8,  0);
  constexpr Score Hanging             = S( 69, 36);
  constexpr Score KnightOnQueen       = S( 16, 11);
@ -396,8 +395,9 @@ namespace {

    attackedBy[Us][Pt] = 0;

-    while (b1) {
-        Square s = pop_lsb(&b1);
+    while (b1)
+    {
+        Square s = pop_lsb(b1);

        // Find attacked squares, including x-ray attacks for bishops and rooks
        b = Pt == BISHOP ? attacks_bb<BISHOP>(s, pos.pieces() ^ pos.pieces(QUEEN))
@ -477,9 +477,8 @@ namespace {
                {
                    Direction d = pawn_push(Us) + (file_of(s) == FILE_A ? EAST : WEST);
                    if (pos.piece_on(s + d) == make_piece(Us, PAWN))
-                        score -= !pos.empty(s + d + pawn_push(Us))                ? CorneredBishop * 4
-                                : pos.piece_on(s + d + d) == make_piece(Us, PAWN) ? CorneredBishop * 2
-                                                                                  : CorneredBishop;
+                        score -= !pos.empty(s + d + pawn_push(Us)) ? 4 * make_score(CorneredBishop, CorneredBishop)
+                                                                   : 3 * make_score(CorneredBishop, CorneredBishop);
                }
            }
        }
@ -658,11 +657,11 @@ namespace {
    {
        b = (defended | weak) & (attackedBy[Us][KNIGHT] | attackedBy[Us][BISHOP]);
        while (b)
-            score += ThreatByMinor[type_of(pos.piece_on(pop_lsb(&b)))];
+            score += ThreatByMinor[type_of(pos.piece_on(pop_lsb(b)))];

        b = weak & attackedBy[Us][ROOK];
        while (b)
-            score += ThreatByRook[type_of(pos.piece_on(pop_lsb(&b)))];
+            score += ThreatByRook[type_of(pos.piece_on(pop_lsb(b)))];

        if (weak & attackedBy[Us][KING])
            score += ThreatByKing;
@ -760,7 +759,7 @@ namespace {

    while (b)
    {
-        Square s = pop_lsb(&b);
+        Square s = pop_lsb(b);

        assert(!(pos.pieces(Them, PAWN) & forward_file_bb(Us, s + Up)));

@ -906,7 +905,7 @@ namespace {
    Color strongSide = eg > VALUE_DRAW ? WHITE : BLACK;
    int sf = me->scale_factor(pos, strongSide);

-    // If scale factor is not already specific, scale down via general heuristics
+    // If scale factor is not already specific, scale up/down via general heuristics
    if (sf == SCALE_FACTOR_NORMAL)
    {
        if (pos.opposite_bishops())
@ -979,7 +978,7 @@ namespace {
    // Initialize score by reading the incrementally updated scores included in
    // the position object (material + piece square tables) and the material
    // imbalance. Score is computed internally from the white point of view.
-    Score score = pos.psq_score() + me->imbalance() + pos.this_thread()->contempt;
+    Score score = pos.psq_score() + me->imbalance() + pos.this_thread()->trend;

    // Probe the pawn hash table
    pe = Pawns::probe(pos);
@ -1033,12 +1032,48 @@ make_v:
    v = (v / 16) * 16;

    // Side to move point of view
-    v = (pos.side_to_move() == WHITE ? v : -v) + Tempo;
+    v = (pos.side_to_move() == WHITE ? v : -v);

    return v;
  }

-} // namespace
+
+  /// Fisher Random Chess: correction for cornered bishops, to fix chess960 play with NNUE
+
+  Value fix_FRC(const Position& pos) {
+
+    constexpr Bitboard Corners =  1ULL << SQ_A1 | 1ULL << SQ_H1 | 1ULL << SQ_A8 | 1ULL << SQ_H8;
+
+    if (!(pos.pieces(BISHOP) & Corners))
+        return VALUE_ZERO;
+
+    int correction = 0;
+
+    if (   pos.piece_on(SQ_A1) == W_BISHOP
+        && pos.piece_on(SQ_B2) == W_PAWN)
+        correction += !pos.empty(SQ_B3) ? -CorneredBishop * 4
+                                        : -CorneredBishop * 3;
+
+    if (   pos.piece_on(SQ_H1) == W_BISHOP
+        && pos.piece_on(SQ_G2) == W_PAWN)
+        correction += !pos.empty(SQ_G3) ? -CorneredBishop * 4
+                                        : -CorneredBishop * 3;
+
+    if (   pos.piece_on(SQ_A8) == B_BISHOP
+        && pos.piece_on(SQ_B7) == B_PAWN)
+        correction += !pos.empty(SQ_B6) ? CorneredBishop * 4
+                                        : CorneredBishop * 3;
+
+    if (   pos.piece_on(SQ_H8) == B_BISHOP
+        && pos.piece_on(SQ_G7) == B_PAWN)
+        correction += !pos.empty(SQ_G6) ? CorneredBishop * 4
+                                        : CorneredBishop * 3;
+
+    return pos.side_to_move() == WHITE ?  Value(correction)
+                                       : -Value(correction);
+  }
+
+} // namespace Eval


 /// evaluate() is the evaluator for the outer world. It returns a static
@ -1053,32 +1088,28 @@ Value Eval::evaluate(const Position& pos) {
  else
  {
      // Scale and shift NNUE for compatibility with search and classical evaluation
-      auto  adjusted_NNUE = [&](){
-         int mat = pos.non_pawn_material() + 2 * PawnValueMg * pos.count<PAWN>();
-         return NNUE::evaluate(pos) * (641 + mat / 32 - 4 * pos.rule50_count()) / 1024 + Tempo;
+      auto  adjusted_NNUE = [&]()
+      {
+         int scale =   903
+                     + 32 * pos.count<PAWN>()
+                     + 32 * pos.non_pawn_material() / 1024;
+
+         Value nnue = NNUE::evaluate(pos, true) * scale / 1024;
+
+         if (pos.is_chess960())
+             nnue += fix_FRC(pos);
+
+         return nnue;
      };

-      // If there is PSQ imbalance use classical eval, with small probability if it is small
+      // If there is PSQ imbalance we use the classical eval, but we switch to
+      // NNUE eval faster when shuffling or if the material on the board is high.
+      int r50 = pos.rule50_count();
      Value psq = Value(abs(eg_value(pos.psq_score())));
-      int   r50 = 16 + pos.rule50_count();
-      bool  largePsq = psq * 16 > (NNUEThreshold1 + pos.non_pawn_material() / 64) * r50;
-      bool  classical = largePsq || (psq > PawnValueMg / 4 && !(pos.this_thread()->nodes & 0xB));
+      bool classical = psq * 5 > (750 + pos.non_pawn_material() / 64) * (5 + r50);

-      // Use classical evaluation for really low piece endgames.
-      // The most critical case is a bishop + A/H file pawn vs naked king draw.
-      bool strongClassical = pos.non_pawn_material() < 2 * RookValueMg && pos.count<PAWN>() < 2;
-
-      v = classical || strongClassical ? Evaluation<NO_TRACE>(pos).value() : adjusted_NNUE();
-
-      // If the classical eval is small and imbalance large, use NNUE nevertheless.
-      // For the case of opposite colored bishops, switch to NNUE eval with
-      // small probability if the classical eval is less than the threshold.
-      if (   largePsq && !strongClassical
-          && (   abs(v) * 16 < NNUEThreshold2 * r50
-              || (   pos.opposite_bishops()
-                  && abs(v) * 16 < (NNUEThreshold1 + pos.non_pawn_material() / 64) * r50
-                  && !(pos.this_thread()->nodes & 0xB))))
-          v = adjusted_NNUE();
+      v = classical ? Evaluation<NO_TRACE>(pos).value()  // classical
+                    : adjusted_NNUE();                   // NNUE
  }

  // Damp down the evaluation linearly when shuffling
@ -1095,7 +1126,7 @@ Value Eval::evaluate(const Position& pos) {
 /// descriptions and values of each evaluation term. Useful for debugging.
 /// Trace scores are from white's point of view

-std::string Eval::trace(const Position& pos) {
+std::string Eval::trace(Position& pos) {

  if (pos.checkers())
      return "Final evaluation: none (in check)";
@ -1107,44 +1138,55 @@ std::string Eval::trace(const Position& pos) {

  std::memset(scores, 0, sizeof(scores));

-  pos.this_thread()->contempt = SCORE_ZERO; // Reset any dynamic contempt
+  pos.this_thread()->trend = SCORE_ZERO; // Reset any dynamic contempt

  v = Evaluation<TRACE>(pos).value();

  ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2)
-     << "     Term    |    White    |    Black    |    Total   \n"
-     << "             |   MG    EG  |   MG    EG  |   MG    EG \n"
-     << " ------------+-------------+-------------+------------\n"
-     << "    Material | " << Term(MATERIAL)
-     << "   Imbalance | " << Term(IMBALANCE)
-     << "       Pawns | " << Term(PAWN)
-     << "     Knights | " << Term(KNIGHT)
-     << "     Bishops | " << Term(BISHOP)
-     << "       Rooks | " << Term(ROOK)
-     << "      Queens | " << Term(QUEEN)
-     << "    Mobility | " << Term(MOBILITY)
-     << " King safety | " << Term(KING)
-     << "     Threats | " << Term(THREAT)
-     << "      Passed | " << Term(PASSED)
-     << "       Space | " << Term(SPACE)
-     << "    Winnable | " << Term(WINNABLE)
-     << " ------------+-------------+-------------+------------\n"
-     << "       Total | " << Term(TOTAL);
-
-  v = pos.side_to_move() == WHITE ? v : -v;
-
-  ss << "\nClassical evaluation: " << to_cp(v) << " (white side)\n";
+     << " Contributing terms for the classical eval:\n"
+     << "+------------+-------------+-------------+-------------+\n"
+     << "|    Term    |    White    |    Black    |    Total    |\n"
+     << "|            |   MG    EG  |   MG    EG  |   MG    EG  |\n"
+     << "+------------+-------------+-------------+-------------+\n"
+     << "|   Material | " << Term(MATERIAL)
+     << "|  Imbalance | " << Term(IMBALANCE)
+     << "|      Pawns | " << Term(PAWN)
+     << "|    Knights | " << Term(KNIGHT)
+     << "|    Bishops | " << Term(BISHOP)
+     << "|      Rooks | " << Term(ROOK)
+     << "|     Queens | " << Term(QUEEN)
+     << "|   Mobility | " << Term(MOBILITY)
+     << "|King safety | " << Term(KING)
+     << "|    Threats | " << Term(THREAT)
+     << "|     Passed | " << Term(PASSED)
+     << "|      Space | " << Term(SPACE)
+     << "|   Winnable | " << Term(WINNABLE)
+     << "+------------+-------------+-------------+-------------+\n"
+     << "|      Total | " << Term(TOTAL)
+     << "+------------+-------------+-------------+-------------+\n";

+  if (Eval::useNNUE)
+      ss << '\n' << NNUE::trace(pos) << '\n';
+
+  ss << std::showpoint << std::showpos << std::fixed << std::setprecision(2) << std::setw(15);
+
+  v = pos.side_to_move() == WHITE ? v : -v;
+  ss << "\nClassical evaluation   " << to_cp(v) << " (white side)\n";
  if (Eval::useNNUE)
  {
-      v = NNUE::evaluate(pos);
+      v = NNUE::evaluate(pos, false);
      v = pos.side_to_move() == WHITE ? v : -v;
-      ss << "\nNNUE evaluation:      " << to_cp(v) << " (white side)\n";
+      ss << "NNUE evaluation        " << to_cp(v) << " (white side)\n";
  }

  v = evaluate(pos);
  v = pos.side_to_move() == WHITE ? v : -v;
-  ss << "\nFinal evaluation:     " << to_cp(v) << " (white side)\n";
+  ss << "Final evaluation       " << to_cp(v) << " (white side)";
+  if (Eval::useNNUE)
+     ss << " [with scaled NNUE, hybrid, ...]";
+  ss << "\n";

  return ss.str();
 }
+
+} // namespace Stockfish
--- a/DroidFishApp/src/main/cpp/stockfish/evaluate.h
+++ b/DroidFishApp/src/main/cpp/stockfish/evaluate.h
@ -20,14 +20,17 @@
 #define EVALUATE_H_INCLUDED

 #include <string>
+#include <optional>

 #include "types.h"

+namespace Stockfish {
+
 class Position;

 namespace Eval {

-  std::string trace(const Position& pos);
+  std::string trace(Position& pos);
  Value evaluate(const Position& pos);

  extern bool useNNUE;
@ -36,17 +39,24 @@ namespace Eval {
  // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue
  // for the build process (profile-build and fishtest) to work. Do not change the
  // name of the macro, as it is used in the Makefile.
-  #define EvalFileDefaultName   "nn-62ef826d1a6d.nnue"
+  #define EvalFileDefaultName   "nn-3475407dc199.nnue"

  namespace NNUE {

-    Value evaluate(const Position& pos);
-    bool load_eval(std::string name, std::istream& stream);
+    std::string trace(Position& pos);
+    Value evaluate(const Position& pos, bool adjusted = false);
+
    void init();
    void verify();

+    bool load_eval(std::string name, std::istream& stream);
+    bool save_eval(std::ostream& stream);
+    bool save_eval(const std::optional<std::string>& filename);
+
  } // namespace NNUE

 } // namespace Eval

+} // namespace Stockfish
+
 #endif // #ifndef EVALUATE_H_INCLUDED
--- a/DroidFishApp/src/main/cpp/stockfish/main.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/main.cpp
@ -28,6 +28,8 @@
 #include "tt.h"
 #include "uci.h"

+using namespace Stockfish;
+
 int main(int argc, char* argv[]) {

  std::cout << engine_info() << std::endl;
--- a/DroidFishApp/src/main/cpp/stockfish/material.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/material.cpp
@ -24,6 +24,8 @@

 using namespace std;

+namespace Stockfish {
+
 namespace {
  #define S(mg, eg) make_score(mg, eg)

@ -72,7 +74,7 @@ namespace {

  bool is_KBPsK(const Position& pos, Color us) {
    return   pos.non_pawn_material(us) == BishopValueMg
-          && pos.count<PAWN  >(us) >= 1;
+          && pos.count<PAWN>(us) >= 1;
  }

  bool is_KQKRPs(const Position& pos, Color us) {
@ -223,3 +225,5 @@ Entry* probe(const Position& pos) {
 }

 } // namespace Material
+
+} // namespace Stockfish
--- a/DroidFishApp/src/main/cpp/stockfish/material.h
+++ b/DroidFishApp/src/main/cpp/stockfish/material.h
@ -24,7 +24,7 @@
 #include "position.h"
 #include "types.h"

-namespace Material {
+namespace Stockfish::Material {

 /// Material::Entry contains various information about a material configuration.
 /// It contains a material imbalance evaluation, a function pointer to a special
@ -66,6 +66,6 @@ typedef HashTable<Entry, 8192> Table;

 Entry* probe(const Position& pos);

-} // namespace Material
+} // namespace Stockfish::Material

 #endif // #ifndef MATERIAL_H_INCLUDED
--- a/DroidFishApp/src/main/cpp/stockfish/misc.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/misc.cpp
@ -51,7 +51,7 @@ typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY);
 #include <sys/mman.h>
 #endif

-#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32))
+#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) || defined(__e2k__)
 #define POSIXALIGNEDALLOC
 #include <stdlib.h>
 #endif
@ -61,11 +61,13 @@ typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY);

 using namespace std;

+namespace Stockfish {
+
 namespace {

 /// Version number. If Version is left empty, then compile date in the format
 /// DD-MM-YY and show in engine_info.
-const string Version = "13";
+const string Version = "14";

 /// Our fancy logging facility. The trick here is to replace cin.rdbuf() and
 /// cout.rdbuf() with two Tie objects that tie cin and cout to a file stream. We
@ -138,7 +140,7 @@ public:
 /// the program was compiled) or "Stockfish <Version>", depending on whether
 /// Version is empty.

-const string engine_info(bool to_uci) {
+string engine_info(bool to_uci) {

  const string months("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec");
  string month, day, year;
@ -161,7 +163,7 @@ const string engine_info(bool to_uci) {

 /// compiler_info() returns a string trying to describe the compiler we use

-const std::string compiler_info() {
+std::string compiler_info() {

  #define stringify2(x) #x
  #define stringify(x) stringify2(x)
@ -190,6 +192,18 @@ const std::string compiler_info() {
     compiler += "(version ";
     compiler += stringify(_MSC_FULL_VER) "." stringify(_MSC_BUILD);
     compiler += ")";
+  #elif defined(__e2k__) && defined(__LCC__)
+    #define dot_ver2(n) \
+      compiler += (char)'.'; \
+      compiler += (char)('0' + (n) / 10); \
+      compiler += (char)('0' + (n) % 10);
+
+     compiler += "MCST LCC ";
+     compiler += "(version ";
+     compiler += std::to_string(__LCC__ / 100);
+     dot_ver2(__LCC__ % 100)
+     dot_ver2(__LCC_MINOR__)
+     compiler += ")";
  #elif __GNUC__
     compiler += "g++ (GNUC) ";
     compiler += make_version_string(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__);
@ -361,7 +375,11 @@ void std_aligned_free(void* ptr) {

 #if defined(_WIN32)

-static void* aligned_large_pages_alloc_win(size_t allocSize) {
+static void* aligned_large_pages_alloc_windows(size_t allocSize) {
+
+  #if !defined(_WIN64)
+    return nullptr;
+  #else

  HANDLE hProcessToken { };
  LUID luid { };
@ -404,12 +422,14 @@ static void* aligned_large_pages_alloc_win(size_t allocSize) {
  CloseHandle(hProcessToken);

  return mem;
+
+  #endif
 }

 void* aligned_large_pages_alloc(size_t allocSize) {

  // Try to allocate large pages
-  void* mem = aligned_large_pages_alloc_win(allocSize);
+  void* mem = aligned_large_pages_alloc_windows(allocSize);

  // Fall back to regular, page aligned, allocation if necessary
  if (!mem)
@ -449,8 +469,9 @@ void aligned_large_pages_free(void* mem) {
  if (mem && !VirtualFree(mem, 0, MEM_RELEASE))
  {
      DWORD err = GetLastError();
-      std::cerr << "Failed to free transposition table. Error code: 0x" <<
-          std::hex << err << std::dec << std::endl;
+      std::cerr << "Failed to free large page memory. Error code: 0x"
+                << std::hex << err
+                << std::dec << std::endl;
      exit(EXIT_FAILURE);
  }
 }
@ -626,3 +647,5 @@ void init(int argc, char* argv[]) {


 } // namespace CommandLine
+
+} // namespace Stockfish
--- a/DroidFishApp/src/main/cpp/stockfish/misc.h
+++ b/DroidFishApp/src/main/cpp/stockfish/misc.h
@ -28,8 +28,10 @@

 #include "types.h"

-const std::string engine_info(bool to_uci = false);
-const std::string compiler_info();
+namespace Stockfish {
+
+std::string engine_info(bool to_uci = false);
+std::string compiler_info();
 void prefetch(void* addr);
 void start_logger(const std::string& fname);
 void* std_aligned_alloc(size_t alignment, size_t size);
@ -64,9 +66,10 @@ std::ostream& operator<<(std::ostream&, SyncCout);
 #define sync_cout std::cout << IO_LOCK
 #define sync_endl std::endl << IO_UNLOCK

-// `ptr` must point to an array of size at least
-// `sizeof(T) * N + alignment` bytes, where `N` is the
-// number of elements in the array.
+
+// align_ptr_up() : get the first aligned element of an array.
+// ptr must point to an array of size at least `sizeof(T) * N + alignment` bytes,
+// where N is the number of elements in the array.
 template <uintptr_t Alignment, typename T>
 T* align_ptr_up(T* ptr)
 {
@ -76,6 +79,55 @@ T* align_ptr_up(T* ptr)
  return reinterpret_cast<T*>(reinterpret_cast<char*>((ptrint + (Alignment - 1)) / Alignment * Alignment));
 }

+
+// IsLittleEndian : true if and only if the binary is compiled on a little endian machine
+static inline const union { uint32_t i; char c[4]; } Le = { 0x01020304 };
+static inline const bool IsLittleEndian = (Le.c[0] == 4);
+
+
+template <typename T>
+class ValueListInserter {
+public:
+  ValueListInserter(T* v, std::size_t& s) :
+    values(v),
+    size(&s)
+  {
+  }
+
+  void push_back(const T& value) { values[(*size)++] = value; }
+private:
+  T* values;
+  std::size_t* size;
+};
+
+template <typename T, std::size_t MaxSize>
+class ValueList {
+
+public:
+  std::size_t size() const { return size_; }
+  void resize(std::size_t newSize) { size_ = newSize; }
+  void push_back(const T& value) { values_[size_++] = value; }
+  T& operator[](std::size_t index) { return values_[index]; }
+  T* begin() { return values_; }
+  T* end() { return values_ + size_; }
+  const T& operator[](std::size_t index) const { return values_[index]; }
+  const T* begin() const { return values_; }
+  const T* end() const { return values_ + size_; }
+  operator ValueListInserter<T>() { return ValueListInserter(values_, size_); }
+
+  void swap(ValueList& other) {
+    const std::size_t maxSize = std::max(size_, other.size_);
+    for (std::size_t i = 0; i < maxSize; ++i) {
+      std::swap(values_[i], other.values_[i]);
+    }
+    std::swap(size_, other.size_);
+  }
+
+private:
+  T values_[MaxSize];
+  std::size_t size_ = 0;
+};
+
 /// xorshift64star Pseudo-Random Number Generator
 /// This class is based on original code written and dedicated
 /// to the public domain by Sebastiano Vigna (2014).
@ -143,4 +195,6 @@ namespace CommandLine {
  extern std::string workingDirectory; // path of the working directory
 }

+} // namespace Stockfish
+
 #endif // #ifndef MISC_H_INCLUDED
--- a/DroidFishApp/src/main/cpp/stockfish/movegen.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/movegen.cpp
@ -21,24 +21,21 @@
 #include "movegen.h"
 #include "position.h"

+namespace Stockfish {
+
 namespace {

  template<GenType Type, Direction D>
-  ExtMove* make_promotions(ExtMove* moveList, Square to, Square ksq) {
+  ExtMove* make_promotions(ExtMove* moveList, Square to) {

    if (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS)
-    {
        *moveList++ = make<PROMOTION>(to - D, to, QUEEN);
-        if (attacks_bb<KNIGHT>(to) & ksq)
-            *moveList++ = make<PROMOTION>(to - D, to, KNIGHT);
-    }

    if (Type == QUIETS || Type == EVASIONS || Type == NON_EVASIONS)
    {
        *moveList++ = make<PROMOTION>(to - D, to, ROOK);
        *moveList++ = make<PROMOTION>(to - D, to, BISHOP);
-        if (!(attacks_bb<KNIGHT>(to) & ksq))
-            *moveList++ = make<PROMOTION>(to - D, to, KNIGHT);
+        *moveList++ = make<PROMOTION>(to - D, to, KNIGHT);
    }

    return moveList;
@ -55,20 +52,16 @@ namespace {
    constexpr Direction UpRight  = (Us == WHITE ? NORTH_EAST : SOUTH_WEST);
    constexpr Direction UpLeft   = (Us == WHITE ? NORTH_WEST : SOUTH_EAST);

-    const Square ksq = pos.square<KING>(Them);
-    Bitboard emptySquares;
+    const Bitboard emptySquares = Type == QUIETS || Type == QUIET_CHECKS ? target : ~pos.pieces();
+    const Bitboard enemies      = Type == EVASIONS ? pos.checkers()
+                                : Type == CAPTURES ? target : pos.pieces(Them);

    Bitboard pawnsOn7    = pos.pieces(Us, PAWN) &  TRank7BB;
    Bitboard pawnsNotOn7 = pos.pieces(Us, PAWN) & ~TRank7BB;

-    Bitboard enemies = (Type == EVASIONS ? pos.pieces(Them) & target:
-                        Type == CAPTURES ? target : pos.pieces(Them));
-
    // Single and double pawn pushes, no promotions
    if (Type != CAPTURES)
    {
-        emptySquares = (Type == QUIETS || Type == QUIET_CHECKS ? target : ~pos.pieces());
-
        Bitboard b1 = shift<Up>(pawnsNotOn7)   & emptySquares;
        Bitboard b2 = shift<Up>(b1 & TRank3BB) & emptySquares;

@ -80,33 +73,24 @@ namespace {

        if (Type == QUIET_CHECKS)
        {
-            b1 &= pawn_attacks_bb(Them, ksq);
-            b2 &= pawn_attacks_bb(Them, ksq);
-
-            // Add pawn pushes which give discovered check. This is possible only
-            // if the pawn is not on the same file as the enemy king, because we
-            // don't generate captures. Note that a possible discovered check
-            // promotion has been already generated amongst the captures.
-            Bitboard dcCandidateQuiets = pos.blockers_for_king(Them) & pawnsNotOn7;
-            if (dcCandidateQuiets)
-            {
-                Bitboard dc1 = shift<Up>(dcCandidateQuiets) & emptySquares & ~file_bb(ksq);
-                Bitboard dc2 = shift<Up>(dc1 & TRank3BB) & emptySquares;
-
-                b1 |= dc1;
-                b2 |= dc2;
-            }
+            // To make a quiet check, you either make a direct check by pushing a pawn
+            // or push a blocker pawn that is not on the same file as the enemy king.
+            // Discovered check promotion has been already generated amongst the captures.
+            Square ksq = pos.square<KING>(Them);
+            Bitboard dcCandidatePawns = pos.blockers_for_king(Them) & ~file_bb(ksq);
+            b1 &= pawn_attacks_bb(Them, ksq) | shift<   Up>(dcCandidatePawns);
+            b2 &= pawn_attacks_bb(Them, ksq) | shift<Up+Up>(dcCandidatePawns);
        }

        while (b1)
        {
-            Square to = pop_lsb(&b1);
+            Square to = pop_lsb(b1);
            *moveList++ = make_move(to - Up, to);
        }

        while (b2)
        {
-            Square to = pop_lsb(&b2);
+            Square to = pop_lsb(b2);
            *moveList++ = make_move(to - Up - Up, to);
        }
    }
@ -114,24 +98,21 @@ namespace {
    // Promotions and underpromotions
    if (pawnsOn7)
    {
-        if (Type == CAPTURES)
-            emptySquares = ~pos.pieces();
-
-        if (Type == EVASIONS)
-            emptySquares &= target;
-
        Bitboard b1 = shift<UpRight>(pawnsOn7) & enemies;
        Bitboard b2 = shift<UpLeft >(pawnsOn7) & enemies;
        Bitboard b3 = shift<Up     >(pawnsOn7) & emptySquares;

+        if (Type == EVASIONS)
+            b3 &= target;
+
        while (b1)
-            moveList = make_promotions<Type, UpRight>(moveList, pop_lsb(&b1), ksq);
+            moveList = make_promotions<Type, UpRight>(moveList, pop_lsb(b1));

        while (b2)
-            moveList = make_promotions<Type, UpLeft >(moveList, pop_lsb(&b2), ksq);
+            moveList = make_promotions<Type, UpLeft >(moveList, pop_lsb(b2));

        while (b3)
-            moveList = make_promotions<Type, Up     >(moveList, pop_lsb(&b3), ksq);
+            moveList = make_promotions<Type, Up     >(moveList, pop_lsb(b3));
    }

    // Standard and en passant captures
@ -142,13 +123,13 @@ namespace {

        while (b1)
        {
-            Square to = pop_lsb(&b1);
+            Square to = pop_lsb(b1);
            *moveList++ = make_move(to - UpRight, to);
        }

        while (b2)
        {
-            Square to = pop_lsb(&b2);
+            Square to = pop_lsb(b2);
            *moveList++ = make_move(to - UpLeft, to);
        }

@ -156,7 +137,7 @@ namespace {
        {
            assert(rank_of(pos.ep_square()) == relative_rank(Us, RANK_6));

-            // An en passant capture cannot resolve a discovered check.
+            // An en passant capture cannot resolve a discovered check
            if (Type == EVASIONS && (target & (pos.ep_square() + Up)))
                return moveList;

@ -165,7 +146,7 @@ namespace {
            assert(b1);

            while (b1)
-                *moveList++ = make<EN_PASSANT>(pop_lsb(&b1), pos.ep_square());
+                *moveList++ = make<EN_PASSANT>(pop_lsb(b1), pos.ep_square());
        }
    }

@ -173,27 +154,24 @@ namespace {
  }


-  template<PieceType Pt, bool Checks>
-  ExtMove* generate_moves(const Position& pos, ExtMove* moveList, Bitboard piecesToMove, Bitboard target) {
+  template<Color Us, PieceType Pt, bool Checks>
+  ExtMove* generate_moves(const Position& pos, ExtMove* moveList, Bitboard target) {

    static_assert(Pt != KING && Pt != PAWN, "Unsupported piece type in generate_moves()");

-    Bitboard bb = piecesToMove & pos.pieces(Pt);
-
-    if (!bb)
-        return moveList;
-
-    [[maybe_unused]] const Bitboard checkSquares = pos.check_squares(Pt);
-
-    while (bb) {
-        Square from = pop_lsb(&bb);
+    Bitboard bb = pos.pieces(Us, Pt);

+    while (bb)
+    {
+        Square from = pop_lsb(bb);
        Bitboard b = attacks_bb<Pt>(from, pos.pieces()) & target;
-        if constexpr (Checks)
-            b &= checkSquares;
+
+        // To check, you either move freely a blocker or make a direct check.
+        if (Checks && (Pt == QUEEN || !(pos.blockers_for_king(~Us) & from)))
+            b &= pos.check_squares(Pt);

        while (b)
-            *moveList++ = make_move(from, pop_lsb(&b));
+            *moveList++ = make_move(from, pop_lsb(b));
    }

    return moveList;
@ -206,45 +184,34 @@ namespace {
    static_assert(Type != LEGAL, "Unsupported type in generate_all()");

    constexpr bool Checks = Type == QUIET_CHECKS; // Reduce template instantiations
-    Bitboard target, piecesToMove = pos.pieces(Us);
+    const Square ksq = pos.square<KING>(Us);
+    Bitboard target;

-    if(Type == QUIET_CHECKS)
-        piecesToMove &= ~pos.blockers_for_king(~Us);
-
-    switch (Type)
+    // Skip generating non-king moves when in double check
+    if (Type != EVASIONS || !more_than_one(pos.checkers()))
    {
-        case CAPTURES:
-            target =  pos.pieces(~Us);
-            break;
-        case QUIETS:
-        case QUIET_CHECKS:
-            target = ~pos.pieces();
-            break;
-        case EVASIONS:
-        {
-            Square checksq = lsb(pos.checkers());
-            target = between_bb(pos.square<KING>(Us), checksq) | checksq;
-            break;
-        }
-        case NON_EVASIONS:
-            target = ~pos.pieces(Us);
-            break;
+        target = Type == EVASIONS     ?  between_bb(ksq, lsb(pos.checkers()))
+               : Type == NON_EVASIONS ? ~pos.pieces( Us)
+               : Type == CAPTURES     ?  pos.pieces(~Us)
+                                      : ~pos.pieces(   ); // QUIETS || QUIET_CHECKS
+
+        moveList = generate_pawn_moves<Us, Type>(pos, moveList, target);
+        moveList = generate_moves<Us, KNIGHT, Checks>(pos, moveList, target);
+        moveList = generate_moves<Us, BISHOP, Checks>(pos, moveList, target);
+        moveList = generate_moves<Us,   ROOK, Checks>(pos, moveList, target);
+        moveList = generate_moves<Us,  QUEEN, Checks>(pos, moveList, target);
    }

-    moveList = generate_pawn_moves<Us, Type>(pos, moveList, target);
-    moveList = generate_moves<KNIGHT, Checks>(pos, moveList, piecesToMove, target);
-    moveList = generate_moves<BISHOP, Checks>(pos, moveList, piecesToMove, target);
-    moveList = generate_moves<  ROOK, Checks>(pos, moveList, piecesToMove, target);
-    moveList = generate_moves< QUEEN, Checks>(pos, moveList, piecesToMove, target);
-
-    if (Type != QUIET_CHECKS && Type != EVASIONS)
+    if (!Checks || pos.blockers_for_king(~Us) & ksq)
    {
-        Square ksq = pos.square<KING>(Us);
-        Bitboard b = attacks_bb<KING>(ksq) & target;
-        while (b)
-            *moveList++ = make_move(ksq, pop_lsb(&b));
+        Bitboard b = attacks_bb<KING>(ksq) & (Type == EVASIONS ? ~pos.pieces(Us) : target);
+        if (Checks)
+            b &= ~attacks_bb<QUEEN>(pos.square<KING>(~Us));

-        if ((Type != CAPTURES) && pos.can_castle(Us & ANY_CASTLING))
+        while (b)
+            *moveList++ = make_move(ksq, pop_lsb(b));
+
+        if ((Type == QUIETS || Type == NON_EVASIONS) && pos.can_castle(Us & ANY_CASTLING))
            for (CastlingRights cr : { Us & KING_SIDE, Us & QUEEN_SIDE } )
                if (!pos.castling_impeded(cr) && pos.can_castle(cr))
                    *moveList++ = make<CASTLING>(ksq, pos.castling_rook_square(cr));
@ -256,8 +223,10 @@ namespace {
 } // namespace


-/// <CAPTURES>     Generates all pseudo-legal captures plus queen and checking knight promotions
-/// <QUIETS>       Generates all pseudo-legal non-captures and underpromotions (except checking knight)
+/// <CAPTURES>     Generates all pseudo-legal captures plus queen promotions
+/// <QUIETS>       Generates all pseudo-legal non-captures and underpromotions
+/// <EVASIONS>     Generates all pseudo-legal check evasions when the side to move is in check
+/// <QUIET_CHECKS> Generates all pseudo-legal non-captures giving check, except castling and promotions
 /// <NON_EVASIONS> Generates all pseudo-legal captures and non-captures
 ///
 /// Returns a pointer to the end of the move list.
@ -265,8 +234,8 @@ namespace {
 template<GenType Type>
 ExtMove* generate(const Position& pos, ExtMove* moveList) {

-  static_assert(Type == CAPTURES || Type == QUIETS || Type == NON_EVASIONS, "Unsupported type in generate()");
-  assert(!pos.checkers());
+  static_assert(Type != LEGAL, "Unsupported type in generate()");
+  assert((Type == EVASIONS) == (bool)pos.checkers());

  Color us = pos.side_to_move();

@ -277,70 +246,11 @@ ExtMove* generate(const Position& pos, ExtMove* moveList) {
 // Explicit template instantiations
 template ExtMove* generate<CAPTURES>(const Position&, ExtMove*);
 template ExtMove* generate<QUIETS>(const Position&, ExtMove*);
+template ExtMove* generate<EVASIONS>(const Position&, ExtMove*);
+template ExtMove* generate<QUIET_CHECKS>(const Position&, ExtMove*);
 template ExtMove* generate<NON_EVASIONS>(const Position&, ExtMove*);


-/// generate<QUIET_CHECKS> generates all pseudo-legal non-captures giving check,
-/// except castling. Returns a pointer to the end of the move list.
-template<>
-ExtMove* generate<QUIET_CHECKS>(const Position& pos, ExtMove* moveList) {
-
-  assert(!pos.checkers());
-
-  Color us = pos.side_to_move();
-  Bitboard dc = pos.blockers_for_king(~us) & pos.pieces(us) & ~pos.pieces(PAWN);
-
-  while (dc)
-  {
-     Square from = pop_lsb(&dc);
-     PieceType pt = type_of(pos.piece_on(from));
-
-     Bitboard b = attacks_bb(pt, from, pos.pieces()) & ~pos.pieces();
-
-     if (pt == KING)
-         b &= ~attacks_bb<QUEEN>(pos.square<KING>(~us));
-
-     while (b)
-         *moveList++ = make_move(from, pop_lsb(&b));
-  }
-
-  return us == WHITE ? generate_all<WHITE, QUIET_CHECKS>(pos, moveList)
-                     : generate_all<BLACK, QUIET_CHECKS>(pos, moveList);
-}
-
-
-/// generate<EVASIONS> generates all pseudo-legal check evasions when the side
-/// to move is in check. Returns a pointer to the end of the move list.
-template<>
-ExtMove* generate<EVASIONS>(const Position& pos, ExtMove* moveList) {
-
-  assert(pos.checkers());
-
-  Color us = pos.side_to_move();
-  Square ksq = pos.square<KING>(us);
-  Bitboard sliderAttacks = 0;
-  Bitboard sliders = pos.checkers() & ~pos.pieces(KNIGHT, PAWN);
-
-  // Find all the squares attacked by slider checkers. We will remove them from
-  // the king evasions in order to skip known illegal moves, which avoids any
-  // useless legality checks later on.
-  while (sliders)
-      sliderAttacks |= line_bb(ksq, pop_lsb(&sliders)) & ~pos.checkers();
-
-  // Generate evasions for king, capture and non capture moves
-  Bitboard b = attacks_bb<KING>(ksq) & ~pos.pieces(us) & ~sliderAttacks;
-  while (b)
-      *moveList++ = make_move(ksq, pop_lsb(&b));
-
-  if (more_than_one(pos.checkers()))
-      return moveList; // Double check, only a king move can save the day
-
-  // Generate blocking evasions or captures of the checking piece
-  return us == WHITE ? generate_all<WHITE, EVASIONS>(pos, moveList)
-                     : generate_all<BLACK, EVASIONS>(pos, moveList);
-}
-
-
 /// generate<LEGAL> generates all the legal moves in the given position

 template<>
@ -362,3 +272,5 @@ ExtMove* generate<LEGAL>(const Position& pos, ExtMove* moveList) {

  return moveList;
 }
+
+} // namespace Stockfish
--- a/DroidFishApp/src/main/cpp/stockfish/movegen.h
+++ b/DroidFishApp/src/main/cpp/stockfish/movegen.h
@ -23,6 +23,8 @@

 #include "types.h"

+namespace Stockfish {
+
 class Position;

 enum GenType {
@ -70,4 +72,6 @@ private:
  ExtMove moveList[MAX_MOVES], *last;
 };

+} // namespace Stockfish
+
 #endif // #ifndef MOVEGEN_H_INCLUDED
--- a/DroidFishApp/src/main/cpp/stockfish/movepick.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/movepick.cpp
@ -20,6 +20,8 @@

 #include "movepick.h"

+namespace Stockfish {
+
 namespace {

  enum Stages {
@ -263,3 +265,5 @@ top:
  assert(false);
  return MOVE_NONE; // Silence warning
 }
+
+} // namespace Stockfish
--- a/DroidFishApp/src/main/cpp/stockfish/movepick.h
+++ b/DroidFishApp/src/main/cpp/stockfish/movepick.h
@ -27,6 +27,8 @@
 #include "position.h"
 #include "types.h"

+namespace Stockfish {
+
 /// StatsEntry stores the stat table value. It is usually a number but could
 /// be a move or even a nested history. We use a class instead of naked value
 /// to directly call history update operator<<() on the entry so to use stats
@ -156,4 +158,6 @@ private:
  ExtMove moves[MAX_MOVES];
 };

+} // namespace Stockfish
+
 #endif // #ifndef MOVEPICK_H_INCLUDED
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/architectures/halfkp_256x2-32-32.h
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/architectures/halfkp_256x2-32-32.h
@ -1,54 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-// Definition of input features and network structure used in NNUE evaluation function
-
-#ifndef NNUE_HALFKP_256X2_32_32_H_INCLUDED
-#define NNUE_HALFKP_256X2_32_32_H_INCLUDED
-
-#include "../features/feature_set.h"
-#include "../features/half_kp.h"
-
-#include "../layers/input_slice.h"
-#include "../layers/affine_transform.h"
-#include "../layers/clipped_relu.h"
-
-namespace Eval::NNUE {
-
-// Input features used in evaluation function
-using RawFeatures = Features::FeatureSet<
-    Features::HalfKP<Features::Side::kFriend>>;
-
-// Number of input feature dimensions after conversion
-constexpr IndexType kTransformedFeatureDimensions = 256;
-
-namespace Layers {
-
-// Define network structure
-using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
-using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
-using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
-using OutputLayer = AffineTransform<HiddenLayer2, 1>;
-
-}  // namespace Layers
-
-using Network = Layers::OutputLayer;
-
-}  // namespace Eval::NNUE
-
-#endif // #ifndef NNUE_HALFKP_256X2_32_32_H_INCLUDED
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.cpp
@ -20,6 +20,9 @@

 #include <iostream>
 #include <set>
+#include <sstream>
+#include <iomanip>
+#include <fstream>

 #include "../evaluate.h"
 #include "../position.h"
@ -29,29 +32,30 @@

 #include "evaluate_nnue.h"

-namespace Eval::NNUE {
+namespace Stockfish::Eval::NNUE {

  // Input feature converter
-  LargePagePtr<FeatureTransformer> feature_transformer;
+  LargePagePtr<FeatureTransformer> featureTransformer;

  // Evaluation function
-  AlignedPtr<Network> network;
+  AlignedPtr<Network> network[LayerStacks];

  // Evaluation function file name
  std::string fileName;
+  std::string netDescription;

  namespace Detail {

  // Initialize the evaluation function parameters
  template <typename T>
-  void Initialize(AlignedPtr<T>& pointer) {
+  void initialize(AlignedPtr<T>& pointer) {

    pointer.reset(reinterpret_cast<T*>(std_aligned_alloc(alignof(T), sizeof(T))));
    std::memset(pointer.get(), 0, sizeof(T));
  }

  template <typename T>
-  void Initialize(LargePagePtr<T>& pointer) {
+  void initialize(LargePagePtr<T>& pointer) {

    static_assert(alignof(T) <= 4096, "aligned_large_pages_alloc() may fail for such a big alignment requirement of T");
    pointer.reset(reinterpret_cast<T*>(aligned_large_pages_alloc(sizeof(T))));
@ -60,85 +64,380 @@ namespace Eval::NNUE {

  // Read evaluation function parameters
  template <typename T>
-  bool ReadParameters(std::istream& stream, T& reference) {
+  bool read_parameters(std::istream& stream, T& reference) {

    std::uint32_t header;
    header = read_little_endian<std::uint32_t>(stream);
-    if (!stream || header != T::GetHashValue()) return false;
-    return reference.ReadParameters(stream);
+    if (!stream || header != T::get_hash_value()) return false;
+    return reference.read_parameters(stream);
+  }
+
+  // Write evaluation function parameters
+  template <typename T>
+  bool write_parameters(std::ostream& stream, const T& reference) {
+
+    write_little_endian<std::uint32_t>(stream, T::get_hash_value());
+    return reference.write_parameters(stream);
  }

  }  // namespace Detail

  // Initialize the evaluation function parameters
-  void Initialize() {
+  void initialize() {

-    Detail::Initialize(feature_transformer);
-    Detail::Initialize(network);
+    Detail::initialize(featureTransformer);
+    for (std::size_t i = 0; i < LayerStacks; ++i)
+      Detail::initialize(network[i]);
  }

  // Read network header
-  bool ReadHeader(std::istream& stream, std::uint32_t* hash_value, std::string* architecture)
+  bool read_header(std::istream& stream, std::uint32_t* hashValue, std::string* desc)
  {
    std::uint32_t version, size;

    version     = read_little_endian<std::uint32_t>(stream);
-    *hash_value = read_little_endian<std::uint32_t>(stream);
+    *hashValue  = read_little_endian<std::uint32_t>(stream);
    size        = read_little_endian<std::uint32_t>(stream);
-    if (!stream || version != kVersion) return false;
-    architecture->resize(size);
-    stream.read(&(*architecture)[0], size);
+    if (!stream || version != Version) return false;
+    desc->resize(size);
+    stream.read(&(*desc)[0], size);
+    return !stream.fail();
+  }
+
+  // Write network header
+  bool write_header(std::ostream& stream, std::uint32_t hashValue, const std::string& desc)
+  {
+    write_little_endian<std::uint32_t>(stream, Version);
+    write_little_endian<std::uint32_t>(stream, hashValue);
+    write_little_endian<std::uint32_t>(stream, desc.size());
+    stream.write(&desc[0], desc.size());
    return !stream.fail();
  }

  // Read network parameters
-  bool ReadParameters(std::istream& stream) {
+  bool read_parameters(std::istream& stream) {

-    std::uint32_t hash_value;
-    std::string architecture;
-    if (!ReadHeader(stream, &hash_value, &architecture)) return false;
-    if (hash_value != kHashValue) return false;
-    if (!Detail::ReadParameters(stream, *feature_transformer)) return false;
-    if (!Detail::ReadParameters(stream, *network)) return false;
+    std::uint32_t hashValue;
+    if (!read_header(stream, &hashValue, &netDescription)) return false;
+    if (hashValue != HashValue) return false;
+    if (!Detail::read_parameters(stream, *featureTransformer)) return false;
+    for (std::size_t i = 0; i < LayerStacks; ++i)
+      if (!Detail::read_parameters(stream, *(network[i]))) return false;
    return stream && stream.peek() == std::ios::traits_type::eof();
  }

+  // Write network parameters
+  bool write_parameters(std::ostream& stream) {
+
+    if (!write_header(stream, HashValue, netDescription)) return false;
+    if (!Detail::write_parameters(stream, *featureTransformer)) return false;
+    for (std::size_t i = 0; i < LayerStacks; ++i)
+      if (!Detail::write_parameters(stream, *(network[i]))) return false;
+    return (bool)stream;
+  }
+
  // Evaluation function. Perform differential calculation.
-  Value evaluate(const Position& pos) {
+  Value evaluate(const Position& pos, bool adjusted) {

    // We manually align the arrays on the stack because with gcc < 9.3
    // overaligning stack variables with alignas() doesn't work correctly.

-    constexpr uint64_t alignment = kCacheLineSize;
+    constexpr uint64_t alignment = CacheLineSize;

 #if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
-    TransformedFeatureType transformed_features_unaligned[
-      FeatureTransformer::kBufferSize + alignment / sizeof(TransformedFeatureType)];
-    char buffer_unaligned[Network::kBufferSize + alignment];
+    TransformedFeatureType transformedFeaturesUnaligned[
+      FeatureTransformer::BufferSize + alignment / sizeof(TransformedFeatureType)];
+    char bufferUnaligned[Network::BufferSize + alignment];

-    auto* transformed_features = align_ptr_up<alignment>(&transformed_features_unaligned[0]);
-    auto* buffer = align_ptr_up<alignment>(&buffer_unaligned[0]);
+    auto* transformedFeatures = align_ptr_up<alignment>(&transformedFeaturesUnaligned[0]);
+    auto* buffer = align_ptr_up<alignment>(&bufferUnaligned[0]);
 #else
    alignas(alignment)
-      TransformedFeatureType transformed_features[FeatureTransformer::kBufferSize];
-    alignas(alignment) char buffer[Network::kBufferSize];
+      TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize];
+    alignas(alignment) char buffer[Network::BufferSize];
 #endif

-    ASSERT_ALIGNED(transformed_features, alignment);
+    ASSERT_ALIGNED(transformedFeatures, alignment);
    ASSERT_ALIGNED(buffer, alignment);

-    feature_transformer->Transform(pos, transformed_features);
-    const auto output = network->Propagate(transformed_features, buffer);
+    const std::size_t bucket = (pos.count<ALL_PIECES>() - 1) / 4;
+    const auto psqt = featureTransformer->transform(pos, transformedFeatures, bucket);
+    const auto output = network[bucket]->propagate(transformedFeatures, buffer);

-    return static_cast<Value>(output[0] / FV_SCALE);
+    int materialist = psqt;
+    int positional  = output[0];
+
+    int delta_npm = abs(pos.non_pawn_material(WHITE) - pos.non_pawn_material(BLACK));
+    int entertainment = (adjusted && delta_npm <= BishopValueMg - KnightValueMg ? 7 : 0);
+
+    int A = 128 - entertainment;
+    int B = 128 + entertainment;
+
+    int sum = (A * materialist + B * positional) / 128;
+
+    return static_cast<Value>( sum / OutputScale );
  }

+  struct NnueEvalTrace {
+    static_assert(LayerStacks == PSQTBuckets);
+
+    Value psqt[LayerStacks];
+    Value positional[LayerStacks];
+    std::size_t correctBucket;
+  };
+
+  static NnueEvalTrace trace_evaluate(const Position& pos) {
+
+    // We manually align the arrays on the stack because with gcc < 9.3
+    // overaligning stack variables with alignas() doesn't work correctly.
+
+    constexpr uint64_t alignment = CacheLineSize;
+
+#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
+    TransformedFeatureType transformedFeaturesUnaligned[
+      FeatureTransformer::BufferSize + alignment / sizeof(TransformedFeatureType)];
+    char bufferUnaligned[Network::BufferSize + alignment];
+
+    auto* transformedFeatures = align_ptr_up<alignment>(&transformedFeaturesUnaligned[0]);
+    auto* buffer = align_ptr_up<alignment>(&bufferUnaligned[0]);
+#else
+    alignas(alignment)
+      TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize];
+    alignas(alignment) char buffer[Network::BufferSize];
+#endif
+
+    ASSERT_ALIGNED(transformedFeatures, alignment);
+    ASSERT_ALIGNED(buffer, alignment);
+
+    NnueEvalTrace t{};
+    t.correctBucket = (pos.count<ALL_PIECES>() - 1) / 4;
+    for (std::size_t bucket = 0; bucket < LayerStacks; ++bucket) {
+      const auto psqt = featureTransformer->transform(pos, transformedFeatures, bucket);
+      const auto output = network[bucket]->propagate(transformedFeatures, buffer);
+
+      int materialist = psqt;
+      int positional  = output[0];
+
+      t.psqt[bucket] = static_cast<Value>( materialist / OutputScale );
+      t.positional[bucket] = static_cast<Value>( positional / OutputScale );
+    }
+
+    return t;
+  }
+
+  static const std::string PieceToChar(" PNBRQK  pnbrqk");
+
+  // Requires the buffer to have capacity for at least 5 values
+  static void format_cp_compact(Value v, char* buffer) {
+
+    buffer[0] = (v < 0 ? '-' : v > 0 ? '+' : ' ');
+
+    int cp = std::abs(100 * v / PawnValueEg);
+
+    if (cp >= 10000)
+    {
+      buffer[1] = '0' + cp / 10000; cp %= 10000;
+      buffer[2] = '0' + cp / 1000; cp %= 1000;
+      buffer[3] = '0' + cp / 100; cp %= 100;
+      buffer[4] = ' ';
+    }
+    else if (cp >= 1000)
+    {
+      buffer[1] = '0' + cp / 1000; cp %= 1000;
+      buffer[2] = '0' + cp / 100; cp %= 100;
+      buffer[3] = '.';
+      buffer[4] = '0' + cp / 10;
+    }
+    else
+    {
+      buffer[1] = '0' + cp / 100; cp %= 100;
+      buffer[2] = '.';
+      buffer[3] = '0' + cp / 10; cp %= 10;
+      buffer[4] = '0' + cp / 1;
+    }
+  }
+
+  // Requires the buffer to have capacity for at least 7 values
+  static void format_cp_aligned_dot(Value v, char* buffer) {
+    buffer[0] = (v < 0 ? '-' : v > 0 ? '+' : ' ');
+
+    int cp = std::abs(100 * v / PawnValueEg);
+
+    if (cp >= 10000)
+    {
+      buffer[1] = '0' + cp / 10000; cp %= 10000;
+      buffer[2] = '0' + cp / 1000; cp %= 1000;
+      buffer[3] = '0' + cp / 100; cp %= 100;
+      buffer[4] = '.';
+      buffer[5] = '0' + cp / 10; cp %= 10;
+      buffer[6] = '0' + cp;
+    }
+    else if (cp >= 1000)
+    {
+      buffer[1] = ' ';
+      buffer[2] = '0' + cp / 1000; cp %= 1000;
+      buffer[3] = '0' + cp / 100; cp %= 100;
+      buffer[4] = '.';
+      buffer[5] = '0' + cp / 10; cp %= 10;
+      buffer[6] = '0' + cp;
+    }
+    else
+    {
+      buffer[1] = ' ';
+      buffer[2] = ' ';
+      buffer[3] = '0' + cp / 100; cp %= 100;
+      buffer[4] = '.';
+      buffer[5] = '0' + cp / 10; cp %= 10;
+      buffer[6] = '0' + cp / 1;
+    }
+  }
+
+
+  // trace() returns a string with the value of each piece on a board,
+  // and a table for (PSQT, Layers) values bucket by bucket.
+
+  std::string trace(Position& pos) {
+
+    std::stringstream ss;
+
+    char board[3*8+1][8*8+2];
+    std::memset(board, ' ', sizeof(board));
+    for (int row = 0; row < 3*8+1; ++row)
+      board[row][8*8+1] = '\0';
+
+    // A lambda to output one box of the board
+    auto writeSquare = [&board](File file, Rank rank, Piece pc, Value value) {
+
+      const int x = ((int)file) * 8;
+      const int y = (7 - (int)rank) * 3;
+      for (int i = 1; i < 8; ++i)
+         board[y][x+i] = board[y+3][x+i] = '-';
+      for (int i = 1; i < 3; ++i)
+         board[y+i][x] = board[y+i][x+8] = '|';
+      board[y][x] = board[y][x+8] = board[y+3][x+8] = board[y+3][x] = '+';
+      if (pc != NO_PIECE)
+        board[y+1][x+4] = PieceToChar[pc];
+      if (value != VALUE_NONE)
+        format_cp_compact(value, &board[y+2][x+2]);
+    };
+
+    // We estimate the value of each piece by doing a differential evaluation from
+    // the current base eval, simulating the removal of the piece from its square.
+    Value base = evaluate(pos);
+    base = pos.side_to_move() == WHITE ? base : -base;
+
+    for (File f = FILE_A; f <= FILE_H; ++f)
+      for (Rank r = RANK_1; r <= RANK_8; ++r)
+      {
+        Square sq = make_square(f, r);
+        Piece pc = pos.piece_on(sq);
+        Value v = VALUE_NONE;
+
+        if (pc != NO_PIECE && type_of(pc) != KING)
+        {
+          auto st = pos.state();
+
+          pos.remove_piece(sq);
+          st->accumulator.computed[WHITE] = false;
+          st->accumulator.computed[BLACK] = false;
+
+          Value eval = evaluate(pos);
+          eval = pos.side_to_move() == WHITE ? eval : -eval;
+          v = base - eval;
+
+          pos.put_piece(pc, sq);
+          st->accumulator.computed[WHITE] = false;
+          st->accumulator.computed[BLACK] = false;
+        }
+
+        writeSquare(f, r, pc, v);
+      }
+
+    ss << " NNUE derived piece values:\n";
+    for (int row = 0; row < 3*8+1; ++row)
+        ss << board[row] << '\n';
+    ss << '\n';
+
+    auto t = trace_evaluate(pos);
+
+    ss << " NNUE network contributions "
+       << (pos.side_to_move() == WHITE ? "(White to move)" : "(Black to move)") << std::endl
+       << "+------------+------------+------------+------------+\n"
+       << "|   Bucket   |  Material  | Positional |   Total    |\n"
+       << "|            |   (PSQT)   |  (Layers)  |            |\n"
+       << "+------------+------------+------------+------------+\n";
+
+    for (std::size_t bucket = 0; bucket < LayerStacks; ++bucket)
+    {
+      char buffer[3][8];
+      std::memset(buffer, '\0', sizeof(buffer));
+
+      format_cp_aligned_dot(t.psqt[bucket], buffer[0]);
+      format_cp_aligned_dot(t.positional[bucket], buffer[1]);
+      format_cp_aligned_dot(t.psqt[bucket] + t.positional[bucket], buffer[2]);
+
+      ss <<  "|  " << bucket    << "        "
+         << " |  " << buffer[0] << "  "
+         << " |  " << buffer[1] << "  "
+         << " |  " << buffer[2] << "  "
+         << " |";
+      if (bucket == t.correctBucket)
+          ss << " <-- this bucket is used";
+      ss << '\n';
+    }
+
+    ss << "+------------+------------+------------+------------+\n";
+
+    return ss.str();
+  }
+
+
  // Load eval, from a file stream or a memory stream
  bool load_eval(std::string name, std::istream& stream) {

-    Initialize();
+    initialize();
    fileName = name;
-    return ReadParameters(stream);
+    return read_parameters(stream);
  }

-} // namespace Eval::NNUE
+  // Save eval, to a file stream or a memory stream
+  bool save_eval(std::ostream& stream) {
+
+    if (fileName.empty())
+      return false;
+
+    return write_parameters(stream);
+  }
+
+  /// Save eval, to a file given by its name
+  bool save_eval(const std::optional<std::string>& filename) {
+
+    std::string actualFilename;
+    std::string msg;
+
+    if (filename.has_value())
+        actualFilename = filename.value();
+    else
+    {
+        if (eval_file_loaded != EvalFileDefaultName)
+        {
+             msg = "Failed to export a net. A non-embedded net can only be saved if the filename is specified";
+
+             sync_cout << msg << sync_endl;
+             return false;
+        }
+        actualFilename = EvalFileDefaultName;
+    }
+
+    std::ofstream stream(actualFilename, std::ios_base::binary);
+    bool saved = save_eval(stream);
+
+    msg = saved ? "Network saved successfully to " + actualFilename
+                : "Failed to export a net";
+
+    sync_cout << msg << sync_endl;
+    return saved;
+  }
+
+
+} // namespace Stockfish::Eval::NNUE
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.h
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.h
@ -25,11 +25,11 @@

 #include <memory>

-namespace Eval::NNUE {
+namespace Stockfish::Eval::NNUE {

  // Hash value of evaluation function structure
-  constexpr std::uint32_t kHashValue =
-      FeatureTransformer::GetHashValue() ^ Network::GetHashValue();
+  constexpr std::uint32_t HashValue =
+      FeatureTransformer::get_hash_value() ^ Network::get_hash_value();

  // Deleter for automating release of memory area
  template <typename T>
@ -54,6 +54,6 @@ namespace Eval::NNUE {
  template <typename T>
  using LargePagePtr = std::unique_ptr<T, LargePageDeleter<T>>;

-}  // namespace Eval::NNUE
+}  // namespace Stockfish::Eval::NNUE

 #endif // #ifndef NNUE_EVALUATE_NNUE_H_INCLUDED
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/features/feature_set.h
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/features/feature_set.h
@ -1,69 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-// A class template that represents the input feature set of the NNUE evaluation function
-
-#ifndef NNUE_FEATURE_SET_H_INCLUDED
-#define NNUE_FEATURE_SET_H_INCLUDED
-
-#include "features_common.h"
-#include <array>
-
-namespace Eval::NNUE::Features {
-
-  // Class template that represents a list of values
-  template <typename T, T... Values>
-  struct CompileTimeList;
-
-  template <typename T, T First, T... Remaining>
-  struct CompileTimeList<T, First, Remaining...> {
-    static constexpr bool Contains(T value) {
-      return value == First || CompileTimeList<T, Remaining...>::Contains(value);
-    }
-    static constexpr std::array<T, sizeof...(Remaining) + 1>
-        kValues = {{First, Remaining...}};
-  };
-
-  // Base class of feature set
-  template <typename Derived>
-  class FeatureSetBase {
-
-  };
-
-  // Class template that represents the feature set
-  template <typename FeatureType>
-  class FeatureSet<FeatureType> : public FeatureSetBase<FeatureSet<FeatureType>> {
-
-   public:
-    // Hash value embedded in the evaluation file
-    static constexpr std::uint32_t kHashValue = FeatureType::kHashValue;
-    // Number of feature dimensions
-    static constexpr IndexType kDimensions = FeatureType::kDimensions;
-    // Maximum number of simultaneously active features
-    static constexpr IndexType kMaxActiveDimensions =
-        FeatureType::kMaxActiveDimensions;
-    // Trigger for full calculation instead of difference calculation
-    using SortedTriggerSet =
-        CompileTimeList<TriggerEvent, FeatureType::kRefreshTrigger>;
-    static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
-
-  };
-
-}  // namespace Eval::NNUE::Features
-
-#endif // #ifndef NNUE_FEATURE_SET_H_INCLUDED
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/features/features_common.h
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/features/features_common.h
@ -1,45 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-//Common header of input features of NNUE evaluation function
-
-#ifndef NNUE_FEATURES_COMMON_H_INCLUDED
-#define NNUE_FEATURES_COMMON_H_INCLUDED
-
-#include "../../evaluate.h"
-#include "../nnue_common.h"
-
-namespace Eval::NNUE::Features {
-
-  class IndexList;
-
-  template <typename... FeatureTypes>
-  class FeatureSet;
-
-  // Trigger to perform full calculations instead of difference only
-  enum class TriggerEvent {
-    kFriendKingMoved // calculate full evaluation when own king moves
-  };
-
-  enum class Side {
-    kFriend // side to move
-  };
-
-}  // namespace Eval::NNUE::Features
-
-#endif // #ifndef NNUE_FEATURES_COMMON_H_INCLUDED
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2.cpp
@ -0,0 +1,85 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+//Definition of input features HalfKAv2 of NNUE evaluation function
+
+#include "half_ka_v2.h"
+
+#include "../../position.h"
+
+namespace Stockfish::Eval::NNUE::Features {
+
+  // Orient a square according to perspective (rotates by 180 for black)
+  inline Square HalfKAv2::orient(Color perspective, Square s) {
+    return Square(int(s) ^ (bool(perspective) * 56));
+  }
+
+  // Index of a feature for a given king position and another piece on some square
+  inline IndexType HalfKAv2::make_index(Color perspective, Square s, Piece pc, Square ksq) {
+    return IndexType(orient(perspective, s) + PieceSquareIndex[perspective][pc] + PS_NB * ksq);
+  }
+
+  // Get a list of indices for active features
+  void HalfKAv2::append_active_indices(
+    const Position& pos,
+    Color perspective,
+    ValueListInserter<IndexType> active
+  ) {
+    Square ksq = orient(perspective, pos.square<KING>(perspective));
+    Bitboard bb = pos.pieces();
+    while (bb)
+    {
+      Square s = pop_lsb(bb);
+      active.push_back(make_index(perspective, s, pos.piece_on(s), ksq));
+    }
+  }
+
+
+  // append_changed_indices() : get a list of indices for recently changed features
+
+  void HalfKAv2::append_changed_indices(
+    Square ksq,
+    StateInfo* st,
+    Color perspective,
+    ValueListInserter<IndexType> removed,
+    ValueListInserter<IndexType> added
+  ) {
+    const auto& dp = st->dirtyPiece;
+    Square oriented_ksq = orient(perspective, ksq);
+    for (int i = 0; i < dp.dirty_num; ++i) {
+      Piece pc = dp.piece[i];
+      if (dp.from[i] != SQ_NONE)
+        removed.push_back(make_index(perspective, dp.from[i], pc, oriented_ksq));
+      if (dp.to[i] != SQ_NONE)
+        added.push_back(make_index(perspective, dp.to[i], pc, oriented_ksq));
+    }
+  }
+
+  int HalfKAv2::update_cost(StateInfo* st) {
+    return st->dirtyPiece.dirty_num;
+  }
+
+  int HalfKAv2::refresh_cost(const Position& pos) {
+    return pos.count<ALL_PIECES>();
+  }
+
+  bool HalfKAv2::requires_refresh(StateInfo* st, Color perspective) {
+    return st->dirtyPiece.piece[0] == make_piece(perspective, KING);
+  }
+
+}  // namespace Stockfish::Eval::NNUE::Features
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2.h
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2.h
@ -0,0 +1,111 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+//Definition of input features HalfKP of NNUE evaluation function
+
+#ifndef NNUE_FEATURES_HALF_KA_V2_H_INCLUDED
+#define NNUE_FEATURES_HALF_KA_V2_H_INCLUDED
+
+#include "../nnue_common.h"
+
+#include "../../evaluate.h"
+#include "../../misc.h"
+
+namespace Stockfish {
+  struct StateInfo;
+}
+
+namespace Stockfish::Eval::NNUE::Features {
+
+  // Feature HalfKAv2: Combination of the position of own king
+  // and the position of pieces
+  class HalfKAv2 {
+
+    // unique number for each piece type on each square
+    enum {
+      PS_NONE     =  0,
+      PS_W_PAWN   =  0,
+      PS_B_PAWN   =  1 * SQUARE_NB,
+      PS_W_KNIGHT =  2 * SQUARE_NB,
+      PS_B_KNIGHT =  3 * SQUARE_NB,
+      PS_W_BISHOP =  4 * SQUARE_NB,
+      PS_B_BISHOP =  5 * SQUARE_NB,
+      PS_W_ROOK   =  6 * SQUARE_NB,
+      PS_B_ROOK   =  7 * SQUARE_NB,
+      PS_W_QUEEN  =  8 * SQUARE_NB,
+      PS_B_QUEEN  =  9 * SQUARE_NB,
+      PS_KING     =  10 * SQUARE_NB,
+      PS_NB = 11 * SQUARE_NB
+    };
+
+    static constexpr IndexType PieceSquareIndex[COLOR_NB][PIECE_NB] = {
+      // convention: W - us, B - them
+      // viewed from other side, W and B are reversed
+      { PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE,
+        PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE },
+      { PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE,
+        PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE }
+    };
+
+    // Orient a square according to perspective (rotates by 180 for black)
+    static Square orient(Color perspective, Square s);
+
+    // Index of a feature for a given king position and another piece on some square
+    static IndexType make_index(Color perspective, Square s, Piece pc, Square ksq);
+
+   public:
+    // Feature name
+    static constexpr const char* Name = "HalfKAv2(Friend)";
+
+    // Hash value embedded in the evaluation file
+    static constexpr std::uint32_t HashValue = 0x5f234cb8u;
+
+    // Number of feature dimensions
+    static constexpr IndexType Dimensions =
+        static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_NB);
+
+    // Maximum number of simultaneously active features.
+    static constexpr IndexType MaxActiveDimensions = 32;
+
+    // Get a list of indices for active features
+    static void append_active_indices(
+      const Position& pos,
+      Color perspective,
+      ValueListInserter<IndexType> active);
+
+    // Get a list of indices for recently changed features
+    static void append_changed_indices(
+      Square ksq,
+      StateInfo* st,
+      Color perspective,
+      ValueListInserter<IndexType> removed,
+      ValueListInserter<IndexType> added);
+
+    // Returns the cost of updating one perspective, the most costly one.
+    // Assumes no refresh needed.
+    static int update_cost(StateInfo* st);
+    static int refresh_cost(const Position& pos);
+
+    // Returns whether the change stored in this StateInfo means that
+    // a full accumulator refresh is required.
+    static bool requires_refresh(StateInfo* st, Color perspective);
+  };
+
+}  // namespace Stockfish::Eval::NNUE::Features
+
+#endif // #ifndef NNUE_FEATURES_HALF_KA_V2_H_INCLUDED
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_kp.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_kp.cpp
@ -1,68 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-//Definition of input features HalfKP of NNUE evaluation function
-
-#include "half_kp.h"
-#include "index_list.h"
-
-namespace Eval::NNUE::Features {
-
-  // Orient a square according to perspective (rotates by 180 for black)
-  inline Square orient(Color perspective, Square s) {
-    return Square(int(s) ^ (bool(perspective) * 63));
-  }
-
-  // Index of a feature for a given king position and another piece on some square
-  inline IndexType make_index(Color perspective, Square s, Piece pc, Square ksq) {
-    return IndexType(orient(perspective, s) + kpp_board_index[perspective][pc] + PS_END * ksq);
-  }
-
-  // Get a list of indices for active features
-  template <Side AssociatedKing>
-  void HalfKP<AssociatedKing>::AppendActiveIndices(
-      const Position& pos, Color perspective, IndexList* active) {
-
-    Square ksq = orient(perspective, pos.square<KING>(perspective));
-    Bitboard bb = pos.pieces() & ~pos.pieces(KING);
-    while (bb) {
-      Square s = pop_lsb(&bb);
-      active->push_back(make_index(perspective, s, pos.piece_on(s), ksq));
-    }
-  }
-
-  // Get a list of indices for recently changed features
-  template <Side AssociatedKing>
-  void HalfKP<AssociatedKing>::AppendChangedIndices(
-      const Position& pos, const DirtyPiece& dp, Color perspective,
-      IndexList* removed, IndexList* added) {
-
-    Square ksq = orient(perspective, pos.square<KING>(perspective));
-    for (int i = 0; i < dp.dirty_num; ++i) {
-      Piece pc = dp.piece[i];
-      if (type_of(pc) == KING) continue;
-      if (dp.from[i] != SQ_NONE)
-        removed->push_back(make_index(perspective, dp.from[i], pc, ksq));
-      if (dp.to[i] != SQ_NONE)
-        added->push_back(make_index(perspective, dp.to[i], pc, ksq));
-    }
-  }
-
-  template class HalfKP<Side::kFriend>;
-
-}  // namespace Eval::NNUE::Features
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_kp.h
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_kp.h
@ -1,59 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-//Definition of input features HalfKP of NNUE evaluation function
-
-#ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED
-#define NNUE_FEATURES_HALF_KP_H_INCLUDED
-
-#include "../../evaluate.h"
-#include "features_common.h"
-
-namespace Eval::NNUE::Features {
-
-  // Feature HalfKP: Combination of the position of own king
-  // and the position of pieces other than kings
-  template <Side AssociatedKing>
-  class HalfKP {
-
-   public:
-    // Feature name
-    static constexpr const char* kName = "HalfKP(Friend)";
-    // Hash value embedded in the evaluation file
-    static constexpr std::uint32_t kHashValue =
-        0x5D69D5B9u ^ (AssociatedKing == Side::kFriend);
-    // Number of feature dimensions
-    static constexpr IndexType kDimensions =
-        static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_END);
-    // Maximum number of simultaneously active features
-    static constexpr IndexType kMaxActiveDimensions = 30; // Kings don't count
-    // Trigger for full calculation instead of difference calculation
-    static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kFriendKingMoved;
-
-    // Get a list of indices for active features
-    static void AppendActiveIndices(const Position& pos, Color perspective,
-                                    IndexList* active);
-
-    // Get a list of indices for recently changed features
-    static void AppendChangedIndices(const Position& pos, const DirtyPiece& dp, Color perspective,
-                                     IndexList* removed, IndexList* added);
-  };
-
-}  // namespace Eval::NNUE::Features
-
-#endif // #ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/features/index_list.h
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/features/index_list.h
@ -1,64 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-// Definition of index list of input features
-
-#ifndef NNUE_FEATURES_INDEX_LIST_H_INCLUDED
-#define NNUE_FEATURES_INDEX_LIST_H_INCLUDED
-
-#include "../../position.h"
-#include "../nnue_architecture.h"
-
-namespace Eval::NNUE::Features {
-
-  // Class template used for feature index list
-  template <typename T, std::size_t MaxSize>
-  class ValueList {
-
-   public:
-    std::size_t size() const { return size_; }
-    void resize(std::size_t size) { size_ = size; }
-    void push_back(const T& value) { values_[size_++] = value; }
-    T& operator[](std::size_t index) { return values_[index]; }
-    T* begin() { return values_; }
-    T* end() { return values_ + size_; }
-    const T& operator[](std::size_t index) const { return values_[index]; }
-    const T* begin() const { return values_; }
-    const T* end() const { return values_ + size_; }
-
-    void swap(ValueList& other) {
-      const std::size_t max_size = std::max(size_, other.size_);
-      for (std::size_t i = 0; i < max_size; ++i) {
-        std::swap(values_[i], other.values_[i]);
-      }
-      std::swap(size_, other.size_);
-    }
-
-   private:
-    T values_[MaxSize];
-    std::size_t size_ = 0;
-  };
-
-  //Type of feature index list
-  class IndexList
-      : public ValueList<IndexType, RawFeatures::kMaxActiveDimensions> {
-  };
-
-}  // namespace Eval::NNUE::Features
-
-#endif // NNUE_FEATURES_INDEX_LIST_H_INCLUDED
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/affine_transform.h
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/affine_transform.h
@ -24,10 +24,10 @@
 #include <iostream>
 #include "../nnue_common.h"

-namespace Eval::NNUE::Layers {
+namespace Stockfish::Eval::NNUE::Layers {

  // Affine transformation layer
-  template <typename PreviousLayer, IndexType OutputDimensions>
+  template <typename PreviousLayer, IndexType OutDims>
  class AffineTransform {
   public:
    // Input/output type
@ -36,104 +36,88 @@ namespace Eval::NNUE::Layers {
    static_assert(std::is_same<InputType, std::uint8_t>::value, "");

    // Number of input/output dimensions
-    static constexpr IndexType kInputDimensions =
-        PreviousLayer::kOutputDimensions;
-    static constexpr IndexType kOutputDimensions = OutputDimensions;
-    static constexpr IndexType kPaddedInputDimensions =
-        CeilToMultiple<IndexType>(kInputDimensions, kMaxSimdWidth);
+    static constexpr IndexType InputDimensions =
+        PreviousLayer::OutputDimensions;
+    static constexpr IndexType OutputDimensions = OutDims;
+    static constexpr IndexType PaddedInputDimensions =
+        ceil_to_multiple<IndexType>(InputDimensions, MaxSimdWidth);
 #if defined (USE_AVX512)
-    static constexpr const IndexType kOutputSimdWidth = kSimdWidth / 2;
+    static constexpr const IndexType OutputSimdWidth = SimdWidth / 2;
 #elif defined (USE_SSSE3)
-    static constexpr const IndexType kOutputSimdWidth = kSimdWidth / 4;
+    static constexpr const IndexType OutputSimdWidth = SimdWidth / 4;
 #endif

    // Size of forward propagation buffer used in this layer
-    static constexpr std::size_t kSelfBufferSize =
-        CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
+    static constexpr std::size_t SelfBufferSize =
+        ceil_to_multiple(OutputDimensions * sizeof(OutputType), CacheLineSize);

    // Size of the forward propagation buffer used from the input layer to this layer
-    static constexpr std::size_t kBufferSize =
-        PreviousLayer::kBufferSize + kSelfBufferSize;
+    static constexpr std::size_t BufferSize =
+        PreviousLayer::BufferSize + SelfBufferSize;

    // Hash value embedded in the evaluation file
-    static constexpr std::uint32_t GetHashValue() {
-      std::uint32_t hash_value = 0xCC03DAE4u;
-      hash_value += kOutputDimensions;
-      hash_value ^= PreviousLayer::GetHashValue() >> 1;
-      hash_value ^= PreviousLayer::GetHashValue() << 31;
-      return hash_value;
+    static constexpr std::uint32_t get_hash_value() {
+      std::uint32_t hashValue = 0xCC03DAE4u;
+      hashValue += OutputDimensions;
+      hashValue ^= PreviousLayer::get_hash_value() >> 1;
+      hashValue ^= PreviousLayer::get_hash_value() << 31;
+      return hashValue;
    }

-   // Read network parameters
-    bool ReadParameters(std::istream& stream) {
-      if (!previous_layer_.ReadParameters(stream)) return false;
-      for (std::size_t i = 0; i < kOutputDimensions; ++i)
-        biases_[i] = read_little_endian<BiasType>(stream);
-      for (std::size_t i = 0; i < kOutputDimensions * kPaddedInputDimensions; ++i)
+    // Read network parameters
+    bool read_parameters(std::istream& stream) {
+      if (!previousLayer.read_parameters(stream)) return false;
+      for (std::size_t i = 0; i < OutputDimensions; ++i)
+        biases[i] = read_little_endian<BiasType>(stream);
+      for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
 #if !defined (USE_SSSE3)
-        weights_[i] = read_little_endian<WeightType>(stream);
+        weights[i] = read_little_endian<WeightType>(stream);
 #else
-        weights_[
-          (i / 4) % (kPaddedInputDimensions / 4) * kOutputDimensions * 4 +
-          i / kPaddedInputDimensions * 4 +
+        weights[
+          (i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 +
+          i / PaddedInputDimensions * 4 +
          i % 4
        ] = read_little_endian<WeightType>(stream);
-
-      // Determine if eights of weight and input products can be summed using 16bits
-      // without saturation. We assume worst case combinations of 0 and 127 for all inputs.
-      if (kOutputDimensions > 1 && !stream.fail())
-      {
-          canSaturate16.count = 0;
-#if !defined(USE_VNNI)
-          for (IndexType i = 0; i < kPaddedInputDimensions; i += 16)
-              for (IndexType j = 0; j < kOutputDimensions; ++j)
-                  for (int x = 0; x < 2; ++x)
-                  {
-                      WeightType* w = &weights_[i * kOutputDimensions + j * 4 + x * 2];
-                      int sum[2] = {0, 0};
-                      for (int k = 0; k < 8; ++k)
-                      {
-                          IndexType idx = k / 2 * kOutputDimensions * 4 + k % 2;
-                          sum[w[idx] < 0] += w[idx];
-                      }
-                      for (int sign : {-1, 1})
-                          while (sign * sum[sign == -1] > 258)
-                          {
-                              int maxK = 0, maxW = 0;
-                              for (int k = 0; k < 8; ++k)
-                              {
-                                  IndexType idx = k / 2 * kOutputDimensions * 4 + k % 2;
-                                  if (maxW < sign * w[idx])
-                                      maxK = k, maxW = sign * w[idx];
-                              }
-
-                              IndexType idx = maxK / 2 * kOutputDimensions * 4 + maxK % 2;
-                              sum[sign == -1] -= w[idx];
-                              canSaturate16.add(j, i + maxK / 2 * 4 + maxK % 2 + x * 2, w[idx]);
-                              w[idx] = 0;
-                          }
-                  }
-
-          // Non functional optimization for faster more linear access
-          std::sort(canSaturate16.ids, canSaturate16.ids + canSaturate16.count,
-                    [](const typename CanSaturate::Entry& e1, const typename CanSaturate::Entry& e2)
-                    { return e1.in == e2.in ? e1.out < e2.out : e1.in < e2.in; });
 #endif
+
+      return !stream.fail();
+    }
+
+    // Write network parameters
+    bool write_parameters(std::ostream& stream) const {
+      if (!previousLayer.write_parameters(stream)) return false;
+      for (std::size_t i = 0; i < OutputDimensions; ++i)
+          write_little_endian<BiasType>(stream, biases[i]);
+#if !defined (USE_SSSE3)
+      for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
+          write_little_endian<WeightType>(stream, weights[i]);
+#else
+      std::unique_ptr<WeightType[]> unscrambledWeights = std::make_unique<WeightType[]>(OutputDimensions * PaddedInputDimensions);
+      for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) {
+          unscrambledWeights[i] =
+              weights[
+                (i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 +
+                i / PaddedInputDimensions * 4 +
+                i % 4
+              ];
      }
+
+      for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
+          write_little_endian<WeightType>(stream, unscrambledWeights[i]);
 #endif

      return !stream.fail();
    }

    // Forward propagation
-    const OutputType* Propagate(
-        const TransformedFeatureType* transformed_features, char* buffer) const {
-      const auto input = previous_layer_.Propagate(
-          transformed_features, buffer + kSelfBufferSize);
+    const OutputType* propagate(
+        const TransformedFeatureType* transformedFeatures, char* buffer) const {
+      const auto input = previousLayer.propagate(
+          transformedFeatures, buffer + SelfBufferSize);

 #if defined (USE_AVX512)

-      [[maybe_unused]] const __m512i kOnes512 = _mm512_set1_epi16(1);
+      [[maybe_unused]] const __m512i Ones512 = _mm512_set1_epi16(1);

      [[maybe_unused]] auto m512_hadd = [](__m512i sum, int bias) -> int {
        return _mm512_reduce_add_epi32(sum) + bias;
@ -144,7 +128,7 @@ namespace Eval::NNUE::Layers {
        acc = _mm512_dpbusd_epi32(acc, a, b);
 #else
        __m512i product0 = _mm512_maddubs_epi16(a, b);
-        product0 = _mm512_madd_epi16(product0, kOnes512);
+        product0 = _mm512_madd_epi16(product0, Ones512);
        acc = _mm512_add_epi32(acc, product0);
 #endif
      };
@ -161,18 +145,18 @@ namespace Eval::NNUE::Layers {
        __m512i product1 = _mm512_maddubs_epi16(a1, b1);
        __m512i product2 = _mm512_maddubs_epi16(a2, b2);
        __m512i product3 = _mm512_maddubs_epi16(a3, b3);
-        product0 = _mm512_add_epi16(product0, product1);
-        product2 = _mm512_add_epi16(product2, product3);
-        product0 = _mm512_add_epi16(product0, product2);
-        product0 = _mm512_madd_epi16(product0, kOnes512);
-        acc = _mm512_add_epi32(acc, product0);
+        product0 = _mm512_adds_epi16(product0, product1);
+        product0 = _mm512_madd_epi16(product0, Ones512);
+        product2 = _mm512_adds_epi16(product2, product3);
+        product2 = _mm512_madd_epi16(product2, Ones512);
+        acc = _mm512_add_epi32(acc, _mm512_add_epi32(product0, product2));
 #endif
      };

 #endif
 #if defined (USE_AVX2)

-      [[maybe_unused]] const __m256i kOnes256 = _mm256_set1_epi16(1);
+      [[maybe_unused]] const __m256i Ones256 = _mm256_set1_epi16(1);

      [[maybe_unused]] auto m256_hadd = [](__m256i sum, int bias) -> int {
        __m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1));
@ -186,7 +170,7 @@ namespace Eval::NNUE::Layers {
        acc = _mm256_dpbusd_epi32(acc, a, b);
 #else
        __m256i product0 = _mm256_maddubs_epi16(a, b);
-        product0 = _mm256_madd_epi16(product0, kOnes256);
+        product0 = _mm256_madd_epi16(product0, Ones256);
        acc = _mm256_add_epi32(acc, product0);
 #endif
      };
@ -203,18 +187,18 @@ namespace Eval::NNUE::Layers {
        __m256i product1 = _mm256_maddubs_epi16(a1, b1);
        __m256i product2 = _mm256_maddubs_epi16(a2, b2);
        __m256i product3 = _mm256_maddubs_epi16(a3, b3);
-        product0 = _mm256_add_epi16(product0, product1);
-        product2 = _mm256_add_epi16(product2, product3);
-        product0 = _mm256_add_epi16(product0, product2);
-        product0 = _mm256_madd_epi16(product0, kOnes256);
-        acc = _mm256_add_epi32(acc, product0);
+        product0 = _mm256_adds_epi16(product0, product1);
+        product0 = _mm256_madd_epi16(product0, Ones256);
+        product2 = _mm256_adds_epi16(product2, product3);
+        product2 = _mm256_madd_epi16(product2, Ones256);
+        acc = _mm256_add_epi32(acc, _mm256_add_epi32(product0, product2));
 #endif
      };

 #endif
 #if defined (USE_SSSE3)

-      [[maybe_unused]] const __m128i kOnes128 = _mm_set1_epi16(1);
+      [[maybe_unused]] const __m128i Ones128 = _mm_set1_epi16(1);

      [[maybe_unused]] auto m128_hadd = [](__m128i sum, int bias) -> int {
        sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E)); //_MM_PERM_BADC
@ -224,7 +208,7 @@ namespace Eval::NNUE::Layers {

      [[maybe_unused]] auto m128_add_dpbusd_epi32 = [=](__m128i& acc, __m128i a, __m128i b) {
        __m128i product0 = _mm_maddubs_epi16(a, b);
-        product0 = _mm_madd_epi16(product0, kOnes128);
+        product0 = _mm_madd_epi16(product0, Ones128);
        acc = _mm_add_epi32(acc, product0);
      };

@ -235,10 +219,10 @@ namespace Eval::NNUE::Layers {
        __m128i product2 = _mm_maddubs_epi16(a2, b2);
        __m128i product3 = _mm_maddubs_epi16(a3, b3);
        product0 = _mm_adds_epi16(product0, product1);
+        product0 = _mm_madd_epi16(product0, Ones128);
        product2 = _mm_adds_epi16(product2, product3);
-        product0 = _mm_adds_epi16(product0, product2);
-        product0 = _mm_madd_epi16(product0, kOnes128);
-        acc = _mm_add_epi32(acc, product0);
+        product2 = _mm_madd_epi16(product2, Ones128);
+        acc = _mm_add_epi32(acc, _mm_add_epi32(product0, product2));
      };

 #endif
@ -267,73 +251,73 @@ namespace Eval::NNUE::Layers {
 #endif

 #if defined (USE_SSSE3)
+      // Different layout, we process 4 inputs at a time, always.
+      static_assert(InputDimensions % 4 == 0);

      const auto output = reinterpret_cast<OutputType*>(buffer);
-      const auto input_vector = reinterpret_cast<const vec_t*>(input);
+      const auto inputVector = reinterpret_cast<const vec_t*>(input);

-      static_assert(kOutputDimensions % kOutputSimdWidth == 0 || kOutputDimensions == 1);
+      static_assert(OutputDimensions % OutputSimdWidth == 0 || OutputDimensions == 1);

-      // kOutputDimensions is either 1 or a multiple of kSimdWidth
+      // OutputDimensions is either 1 or a multiple of SimdWidth
      // because then it is also an input dimension.
-      if constexpr (kOutputDimensions % kOutputSimdWidth == 0)
+      if constexpr (OutputDimensions % OutputSimdWidth == 0)
      {
-          constexpr IndexType kNumChunks = kPaddedInputDimensions / 4;
+          constexpr IndexType NumChunks = InputDimensions / 4;

          const auto input32 = reinterpret_cast<const std::int32_t*>(input);
          vec_t* outptr = reinterpret_cast<vec_t*>(output);
-          std::memcpy(output, biases_, kOutputDimensions * sizeof(OutputType));
+          std::memcpy(output, biases, OutputDimensions * sizeof(OutputType));

-          for (int i = 0; i < (int)kNumChunks - 3; i += 4)
+          for (int i = 0; i < (int)NumChunks - 3; i += 4)
          {
              const vec_t in0 = vec_set_32(input32[i + 0]);
              const vec_t in1 = vec_set_32(input32[i + 1]);
              const vec_t in2 = vec_set_32(input32[i + 2]);
              const vec_t in3 = vec_set_32(input32[i + 3]);
-              const auto col0 = reinterpret_cast<const vec_t*>(&weights_[(i + 0) * kOutputDimensions * 4]);
-              const auto col1 = reinterpret_cast<const vec_t*>(&weights_[(i + 1) * kOutputDimensions * 4]);
-              const auto col2 = reinterpret_cast<const vec_t*>(&weights_[(i + 2) * kOutputDimensions * 4]);
-              const auto col3 = reinterpret_cast<const vec_t*>(&weights_[(i + 3) * kOutputDimensions * 4]);
-              for (int j = 0; j * kOutputSimdWidth < kOutputDimensions; ++j)
+              const auto col0 = reinterpret_cast<const vec_t*>(&weights[(i + 0) * OutputDimensions * 4]);
+              const auto col1 = reinterpret_cast<const vec_t*>(&weights[(i + 1) * OutputDimensions * 4]);
+              const auto col2 = reinterpret_cast<const vec_t*>(&weights[(i + 2) * OutputDimensions * 4]);
+              const auto col3 = reinterpret_cast<const vec_t*>(&weights[(i + 3) * OutputDimensions * 4]);
+              for (int j = 0; j * OutputSimdWidth < OutputDimensions; ++j)
                  vec_add_dpbusd_32x4(outptr[j], in0, col0[j], in1, col1[j], in2, col2[j], in3, col3[j]);
          }
-          for (int i = 0; i < canSaturate16.count; ++i)
-              output[canSaturate16.ids[i].out] += input[canSaturate16.ids[i].in] * canSaturate16.ids[i].w;
      }
-      else if constexpr (kOutputDimensions == 1)
+      else if constexpr (OutputDimensions == 1)
      {
 #if defined (USE_AVX512)
-          if constexpr (kPaddedInputDimensions % (kSimdWidth * 2) != 0)
+          if constexpr (PaddedInputDimensions % (SimdWidth * 2) != 0)
          {
-              constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
-              const auto input_vector256 = reinterpret_cast<const __m256i*>(input);
+              constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth;
+              const auto inputVector256 = reinterpret_cast<const __m256i*>(input);

              __m256i sum0 = _mm256_setzero_si256();
-              const auto row0 = reinterpret_cast<const __m256i*>(&weights_[0]);
+              const auto row0 = reinterpret_cast<const __m256i*>(&weights[0]);

-              for (int j = 0; j < (int)kNumChunks; ++j)
+              for (int j = 0; j < (int)NumChunks; ++j)
              {
-                  const __m256i in = input_vector256[j];
+                  const __m256i in = inputVector256[j];
                  m256_add_dpbusd_epi32(sum0, in, row0[j]);
              }
-              output[0] = m256_hadd(sum0, biases_[0]);
+              output[0] = m256_hadd(sum0, biases[0]);
          }
          else
 #endif
          {
 #if defined (USE_AVX512)
-              constexpr IndexType kNumChunks = kPaddedInputDimensions / (kSimdWidth * 2);
+              constexpr IndexType NumChunks = PaddedInputDimensions / (SimdWidth * 2);
 #else
-              constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
+              constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth;
 #endif
              vec_t sum0 = vec_setzero();
-              const auto row0 = reinterpret_cast<const vec_t*>(&weights_[0]);
+              const auto row0 = reinterpret_cast<const vec_t*>(&weights[0]);

-              for (int j = 0; j < (int)kNumChunks; ++j)
+              for (int j = 0; j < (int)NumChunks; ++j)
              {
-                  const vec_t in = input_vector[j];
+                  const vec_t in = inputVector[j];
                  vec_add_dpbusd_32(sum0, in, row0[j]);
              }
-              output[0] = vec_hadd(sum0, biases_[0]);
+              output[0] = vec_hadd(sum0, biases[0]);
          }
      }

@ -344,80 +328,84 @@ namespace Eval::NNUE::Layers {
      auto output = reinterpret_cast<OutputType*>(buffer);

 #if defined(USE_SSE2)
-      constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
-      const __m128i kZeros = _mm_setzero_si128();
-      const auto input_vector = reinterpret_cast<const __m128i*>(input);
+      // At least a multiple of 16, with SSE2.
+      static_assert(InputDimensions % SimdWidth == 0);
+      constexpr IndexType NumChunks = InputDimensions / SimdWidth;
+      const __m128i Zeros = _mm_setzero_si128();
+      const auto inputVector = reinterpret_cast<const __m128i*>(input);

 #elif defined(USE_MMX)
-      constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
-      const __m64 kZeros = _mm_setzero_si64();
-      const auto input_vector = reinterpret_cast<const __m64*>(input);
+      static_assert(InputDimensions % SimdWidth == 0);
+      constexpr IndexType NumChunks = InputDimensions / SimdWidth;
+      const __m64 Zeros = _mm_setzero_si64();
+      const auto inputVector = reinterpret_cast<const __m64*>(input);

 #elif defined(USE_NEON)
-      constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
-      const auto input_vector = reinterpret_cast<const int8x8_t*>(input);
+      static_assert(InputDimensions % SimdWidth == 0);
+      constexpr IndexType NumChunks = InputDimensions / SimdWidth;
+      const auto inputVector = reinterpret_cast<const int8x8_t*>(input);
 #endif

-      for (IndexType i = 0; i < kOutputDimensions; ++i) {
-        const IndexType offset = i * kPaddedInputDimensions;
+      for (IndexType i = 0; i < OutputDimensions; ++i) {
+        const IndexType offset = i * PaddedInputDimensions;

 #if defined(USE_SSE2)
-        __m128i sum_lo = _mm_cvtsi32_si128(biases_[i]);
-        __m128i sum_hi = kZeros;
-        const auto row = reinterpret_cast<const __m128i*>(&weights_[offset]);
-        for (IndexType j = 0; j < kNumChunks; ++j) {
+        __m128i sumLo = _mm_cvtsi32_si128(biases[i]);
+        __m128i sumHi = Zeros;
+        const auto row = reinterpret_cast<const __m128i*>(&weights[offset]);
+        for (IndexType j = 0; j < NumChunks; ++j) {
          __m128i row_j = _mm_load_si128(&row[j]);
-          __m128i input_j = _mm_load_si128(&input_vector[j]);
-          __m128i extended_row_lo = _mm_srai_epi16(_mm_unpacklo_epi8(row_j, row_j), 8);
-          __m128i extended_row_hi = _mm_srai_epi16(_mm_unpackhi_epi8(row_j, row_j), 8);
-          __m128i extended_input_lo = _mm_unpacklo_epi8(input_j, kZeros);
-          __m128i extended_input_hi = _mm_unpackhi_epi8(input_j, kZeros);
-          __m128i product_lo = _mm_madd_epi16(extended_row_lo, extended_input_lo);
-          __m128i product_hi = _mm_madd_epi16(extended_row_hi, extended_input_hi);
-          sum_lo = _mm_add_epi32(sum_lo, product_lo);
-          sum_hi = _mm_add_epi32(sum_hi, product_hi);
+          __m128i input_j = _mm_load_si128(&inputVector[j]);
+          __m128i extendedRowLo = _mm_srai_epi16(_mm_unpacklo_epi8(row_j, row_j), 8);
+          __m128i extendedRowHi = _mm_srai_epi16(_mm_unpackhi_epi8(row_j, row_j), 8);
+          __m128i extendedInputLo = _mm_unpacklo_epi8(input_j, Zeros);
+          __m128i extendedInputHi = _mm_unpackhi_epi8(input_j, Zeros);
+          __m128i productLo = _mm_madd_epi16(extendedRowLo, extendedInputLo);
+          __m128i productHi = _mm_madd_epi16(extendedRowHi, extendedInputHi);
+          sumLo = _mm_add_epi32(sumLo, productLo);
+          sumHi = _mm_add_epi32(sumHi, productHi);
        }
-        __m128i sum = _mm_add_epi32(sum_lo, sum_hi);
-        __m128i sum_high_64 = _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2));
-        sum = _mm_add_epi32(sum, sum_high_64);
+        __m128i sum = _mm_add_epi32(sumLo, sumHi);
+        __m128i sumHigh_64 = _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2));
+        sum = _mm_add_epi32(sum, sumHigh_64);
        __m128i sum_second_32 = _mm_shufflelo_epi16(sum, _MM_SHUFFLE(1, 0, 3, 2));
        sum = _mm_add_epi32(sum, sum_second_32);
        output[i] = _mm_cvtsi128_si32(sum);

 #elif defined(USE_MMX)
-        __m64 sum_lo = _mm_cvtsi32_si64(biases_[i]);
-        __m64 sum_hi = kZeros;
-        const auto row = reinterpret_cast<const __m64*>(&weights_[offset]);
-        for (IndexType j = 0; j < kNumChunks; ++j) {
+        __m64 sumLo = _mm_cvtsi32_si64(biases[i]);
+        __m64 sumHi = Zeros;
+        const auto row = reinterpret_cast<const __m64*>(&weights[offset]);
+        for (IndexType j = 0; j < NumChunks; ++j) {
          __m64 row_j = row[j];
-          __m64 input_j = input_vector[j];
-          __m64 extended_row_lo = _mm_srai_pi16(_mm_unpacklo_pi8(row_j, row_j), 8);
-          __m64 extended_row_hi = _mm_srai_pi16(_mm_unpackhi_pi8(row_j, row_j), 8);
-          __m64 extended_input_lo = _mm_unpacklo_pi8(input_j, kZeros);
-          __m64 extended_input_hi = _mm_unpackhi_pi8(input_j, kZeros);
-          __m64 product_lo = _mm_madd_pi16(extended_row_lo, extended_input_lo);
-          __m64 product_hi = _mm_madd_pi16(extended_row_hi, extended_input_hi);
-          sum_lo = _mm_add_pi32(sum_lo, product_lo);
-          sum_hi = _mm_add_pi32(sum_hi, product_hi);
+          __m64 input_j = inputVector[j];
+          __m64 extendedRowLo = _mm_srai_pi16(_mm_unpacklo_pi8(row_j, row_j), 8);
+          __m64 extendedRowHi = _mm_srai_pi16(_mm_unpackhi_pi8(row_j, row_j), 8);
+          __m64 extendedInputLo = _mm_unpacklo_pi8(input_j, Zeros);
+          __m64 extendedInputHi = _mm_unpackhi_pi8(input_j, Zeros);
+          __m64 productLo = _mm_madd_pi16(extendedRowLo, extendedInputLo);
+          __m64 productHi = _mm_madd_pi16(extendedRowHi, extendedInputHi);
+          sumLo = _mm_add_pi32(sumLo, productLo);
+          sumHi = _mm_add_pi32(sumHi, productHi);
        }
-        __m64 sum = _mm_add_pi32(sum_lo, sum_hi);
+        __m64 sum = _mm_add_pi32(sumLo, sumHi);
        sum = _mm_add_pi32(sum, _mm_unpackhi_pi32(sum, sum));
        output[i] = _mm_cvtsi64_si32(sum);

 #elif defined(USE_NEON)
-        int32x4_t sum = {biases_[i]};
-        const auto row = reinterpret_cast<const int8x8_t*>(&weights_[offset]);
-        for (IndexType j = 0; j < kNumChunks; ++j) {
-          int16x8_t product = vmull_s8(input_vector[j * 2], row[j * 2]);
-          product = vmlal_s8(product, input_vector[j * 2 + 1], row[j * 2 + 1]);
+        int32x4_t sum = {biases[i]};
+        const auto row = reinterpret_cast<const int8x8_t*>(&weights[offset]);
+        for (IndexType j = 0; j < NumChunks; ++j) {
+          int16x8_t product = vmull_s8(inputVector[j * 2], row[j * 2]);
+          product = vmlal_s8(product, inputVector[j * 2 + 1], row[j * 2 + 1]);
          sum = vpadalq_s16(sum, product);
        }
        output[i] = sum[0] + sum[1] + sum[2] + sum[3];

 #else
-        OutputType sum = biases_[i];
-        for (IndexType j = 0; j < kInputDimensions; ++j) {
-          sum += weights_[offset + j] * input[j];
+        OutputType sum = biases[i];
+        for (IndexType j = 0; j < InputDimensions; ++j) {
+          sum += weights[offset + j] * input[j];
        }
        output[i] = sum;
 #endif
@ -436,29 +424,12 @@ namespace Eval::NNUE::Layers {
    using BiasType = OutputType;
    using WeightType = std::int8_t;

-    PreviousLayer previous_layer_;
+    PreviousLayer previousLayer;

-    alignas(kCacheLineSize) BiasType biases_[kOutputDimensions];
-    alignas(kCacheLineSize) WeightType weights_[kOutputDimensions * kPaddedInputDimensions];
-#if defined (USE_SSSE3)
-    struct CanSaturate {
-        int count;
-        struct Entry {
-            uint16_t out;
-            uint16_t in;
-            int8_t w;
-        } ids[kPaddedInputDimensions * kOutputDimensions * 3 / 4];
-
-        void add(int i, int j, int8_t w) {
-            ids[count].out = i;
-            ids[count].in = j;
-            ids[count].w = w;
-            ++count;
-        }
-    } canSaturate16;
-#endif
+    alignas(CacheLineSize) BiasType biases[OutputDimensions];
+    alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions];
  };

-}  // namespace Eval::NNUE::Layers
+}  // namespace Stockfish::Eval::NNUE::Layers

 #endif // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/clipped_relu.h
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/clipped_relu.h
@ -23,7 +23,7 @@

 #include "../nnue_common.h"

-namespace Eval::NNUE::Layers {
+namespace Stockfish::Eval::NNUE::Layers {

  // Clipped ReLU
  template <typename PreviousLayer>
@ -35,132 +35,157 @@ namespace Eval::NNUE::Layers {
    static_assert(std::is_same<InputType, std::int32_t>::value, "");

    // Number of input/output dimensions
-    static constexpr IndexType kInputDimensions =
-        PreviousLayer::kOutputDimensions;
-    static constexpr IndexType kOutputDimensions = kInputDimensions;
+    static constexpr IndexType InputDimensions =
+        PreviousLayer::OutputDimensions;
+    static constexpr IndexType OutputDimensions = InputDimensions;

    // Size of forward propagation buffer used in this layer
-    static constexpr std::size_t kSelfBufferSize =
-        CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
+    static constexpr std::size_t SelfBufferSize =
+        ceil_to_multiple(OutputDimensions * sizeof(OutputType), CacheLineSize);

    // Size of the forward propagation buffer used from the input layer to this layer
-    static constexpr std::size_t kBufferSize =
-        PreviousLayer::kBufferSize + kSelfBufferSize;
+    static constexpr std::size_t BufferSize =
+        PreviousLayer::BufferSize + SelfBufferSize;

    // Hash value embedded in the evaluation file
-    static constexpr std::uint32_t GetHashValue() {
-      std::uint32_t hash_value = 0x538D24C7u;
-      hash_value += PreviousLayer::GetHashValue();
-      return hash_value;
+    static constexpr std::uint32_t get_hash_value() {
+      std::uint32_t hashValue = 0x538D24C7u;
+      hashValue += PreviousLayer::get_hash_value();
+      return hashValue;
    }

    // Read network parameters
-    bool ReadParameters(std::istream& stream) {
-      return previous_layer_.ReadParameters(stream);
+    bool read_parameters(std::istream& stream) {
+      return previousLayer.read_parameters(stream);
+    }
+
+    // Write network parameters
+    bool write_parameters(std::ostream& stream) const {
+      return previousLayer.write_parameters(stream);
    }

    // Forward propagation
-    const OutputType* Propagate(
-        const TransformedFeatureType* transformed_features, char* buffer) const {
-      const auto input = previous_layer_.Propagate(
-          transformed_features, buffer + kSelfBufferSize);
+    const OutputType* propagate(
+        const TransformedFeatureType* transformedFeatures, char* buffer) const {
+      const auto input = previousLayer.propagate(
+          transformedFeatures, buffer + SelfBufferSize);
      const auto output = reinterpret_cast<OutputType*>(buffer);

  #if defined(USE_AVX2)
-      constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
-      const __m256i kZero = _mm256_setzero_si256();
-      const __m256i kOffsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
-      const auto in = reinterpret_cast<const __m256i*>(input);
-      const auto out = reinterpret_cast<__m256i*>(output);
-      for (IndexType i = 0; i < kNumChunks; ++i) {
-        const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32(
-            _mm256_load_si256(&in[i * 4 + 0]),
-            _mm256_load_si256(&in[i * 4 + 1])), kWeightScaleBits);
-        const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32(
-            _mm256_load_si256(&in[i * 4 + 2]),
-            _mm256_load_si256(&in[i * 4 + 3])), kWeightScaleBits);
-        _mm256_store_si256(&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
-            _mm256_packs_epi16(words0, words1), kZero), kOffsets));
+      if constexpr (InputDimensions % SimdWidth == 0) {
+        constexpr IndexType NumChunks = InputDimensions / SimdWidth;
+        const __m256i Zero = _mm256_setzero_si256();
+        const __m256i Offsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
+        const auto in = reinterpret_cast<const __m256i*>(input);
+        const auto out = reinterpret_cast<__m256i*>(output);
+        for (IndexType i = 0; i < NumChunks; ++i) {
+          const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32(
+              _mm256_load_si256(&in[i * 4 + 0]),
+              _mm256_load_si256(&in[i * 4 + 1])), WeightScaleBits);
+          const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32(
+              _mm256_load_si256(&in[i * 4 + 2]),
+              _mm256_load_si256(&in[i * 4 + 3])), WeightScaleBits);
+          _mm256_store_si256(&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
+              _mm256_packs_epi16(words0, words1), Zero), Offsets));
+        }
+      } else {
+        constexpr IndexType NumChunks = InputDimensions / (SimdWidth / 2);
+        const __m128i Zero = _mm_setzero_si128();
+        const auto in = reinterpret_cast<const __m128i*>(input);
+        const auto out = reinterpret_cast<__m128i*>(output);
+        for (IndexType i = 0; i < NumChunks; ++i) {
+          const __m128i words0 = _mm_srai_epi16(_mm_packs_epi32(
+              _mm_load_si128(&in[i * 4 + 0]),
+              _mm_load_si128(&in[i * 4 + 1])), WeightScaleBits);
+          const __m128i words1 = _mm_srai_epi16(_mm_packs_epi32(
+              _mm_load_si128(&in[i * 4 + 2]),
+              _mm_load_si128(&in[i * 4 + 3])), WeightScaleBits);
+          const __m128i packedbytes = _mm_packs_epi16(words0, words1);
+          _mm_store_si128(&out[i], _mm_max_epi8(packedbytes, Zero));
+        }
      }
-      constexpr IndexType kStart = kNumChunks * kSimdWidth;
+      constexpr IndexType Start =
+        InputDimensions % SimdWidth == 0
+        ? InputDimensions / SimdWidth * SimdWidth
+        : InputDimensions / (SimdWidth / 2) * (SimdWidth / 2);

  #elif defined(USE_SSE2)
-      constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
+      constexpr IndexType NumChunks = InputDimensions / SimdWidth;

  #ifdef USE_SSE41
-      const __m128i kZero = _mm_setzero_si128();
+      const __m128i Zero = _mm_setzero_si128();
  #else
      const __m128i k0x80s = _mm_set1_epi8(-128);
  #endif

      const auto in = reinterpret_cast<const __m128i*>(input);
      const auto out = reinterpret_cast<__m128i*>(output);
-      for (IndexType i = 0; i < kNumChunks; ++i) {
+      for (IndexType i = 0; i < NumChunks; ++i) {
        const __m128i words0 = _mm_srai_epi16(_mm_packs_epi32(
            _mm_load_si128(&in[i * 4 + 0]),
-            _mm_load_si128(&in[i * 4 + 1])), kWeightScaleBits);
+            _mm_load_si128(&in[i * 4 + 1])), WeightScaleBits);
        const __m128i words1 = _mm_srai_epi16(_mm_packs_epi32(
            _mm_load_si128(&in[i * 4 + 2]),
-            _mm_load_si128(&in[i * 4 + 3])), kWeightScaleBits);
+            _mm_load_si128(&in[i * 4 + 3])), WeightScaleBits);
        const __m128i packedbytes = _mm_packs_epi16(words0, words1);
        _mm_store_si128(&out[i],

  #ifdef USE_SSE41
-          _mm_max_epi8(packedbytes, kZero)
+          _mm_max_epi8(packedbytes, Zero)
  #else
          _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
  #endif

        );
      }
-      constexpr IndexType kStart = kNumChunks * kSimdWidth;
+      constexpr IndexType Start = NumChunks * SimdWidth;

  #elif defined(USE_MMX)
-      constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
+      constexpr IndexType NumChunks = InputDimensions / SimdWidth;
      const __m64 k0x80s = _mm_set1_pi8(-128);
      const auto in = reinterpret_cast<const __m64*>(input);
      const auto out = reinterpret_cast<__m64*>(output);
-      for (IndexType i = 0; i < kNumChunks; ++i) {
+      for (IndexType i = 0; i < NumChunks; ++i) {
        const __m64 words0 = _mm_srai_pi16(
            _mm_packs_pi32(in[i * 4 + 0], in[i * 4 + 1]),
-            kWeightScaleBits);
+            WeightScaleBits);
        const __m64 words1 = _mm_srai_pi16(
            _mm_packs_pi32(in[i * 4 + 2], in[i * 4 + 3]),
-            kWeightScaleBits);
+            WeightScaleBits);
        const __m64 packedbytes = _mm_packs_pi16(words0, words1);
        out[i] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s);
      }
      _mm_empty();
-      constexpr IndexType kStart = kNumChunks * kSimdWidth;
+      constexpr IndexType Start = NumChunks * SimdWidth;

  #elif defined(USE_NEON)
-      constexpr IndexType kNumChunks = kInputDimensions / (kSimdWidth / 2);
-      const int8x8_t kZero = {0};
+      constexpr IndexType NumChunks = InputDimensions / (SimdWidth / 2);
+      const int8x8_t Zero = {0};
      const auto in = reinterpret_cast<const int32x4_t*>(input);
      const auto out = reinterpret_cast<int8x8_t*>(output);
-      for (IndexType i = 0; i < kNumChunks; ++i) {
+      for (IndexType i = 0; i < NumChunks; ++i) {
        int16x8_t shifted;
        const auto pack = reinterpret_cast<int16x4_t*>(&shifted);
-        pack[0] = vqshrn_n_s32(in[i * 2 + 0], kWeightScaleBits);
-        pack[1] = vqshrn_n_s32(in[i * 2 + 1], kWeightScaleBits);
-        out[i] = vmax_s8(vqmovn_s16(shifted), kZero);
+        pack[0] = vqshrn_n_s32(in[i * 2 + 0], WeightScaleBits);
+        pack[1] = vqshrn_n_s32(in[i * 2 + 1], WeightScaleBits);
+        out[i] = vmax_s8(vqmovn_s16(shifted), Zero);
      }
-      constexpr IndexType kStart = kNumChunks * (kSimdWidth / 2);
+      constexpr IndexType Start = NumChunks * (SimdWidth / 2);
  #else
-      constexpr IndexType kStart = 0;
+      constexpr IndexType Start = 0;
  #endif

-      for (IndexType i = kStart; i < kInputDimensions; ++i) {
+      for (IndexType i = Start; i < InputDimensions; ++i) {
        output[i] = static_cast<OutputType>(
-            std::max(0, std::min(127, input[i] >> kWeightScaleBits)));
+            std::max(0, std::min(127, input[i] >> WeightScaleBits)));
      }
      return output;
    }

   private:
-    PreviousLayer previous_layer_;
+    PreviousLayer previousLayer;
  };

-}  // namespace Eval::NNUE::Layers
+}  // namespace Stockfish::Eval::NNUE::Layers

 #endif // NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/input_slice.h
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/input_slice.h
@ -23,46 +23,51 @@

 #include "../nnue_common.h"

-namespace Eval::NNUE::Layers {
+namespace Stockfish::Eval::NNUE::Layers {

 // Input layer
-template <IndexType OutputDimensions, IndexType Offset = 0>
+template <IndexType OutDims, IndexType Offset = 0>
 class InputSlice {
 public:
  // Need to maintain alignment
-  static_assert(Offset % kMaxSimdWidth == 0, "");
+  static_assert(Offset % MaxSimdWidth == 0, "");

  // Output type
  using OutputType = TransformedFeatureType;

  // Output dimensionality
-  static constexpr IndexType kOutputDimensions = OutputDimensions;
+  static constexpr IndexType OutputDimensions = OutDims;

  // Size of forward propagation buffer used from the input layer to this layer
-  static constexpr std::size_t kBufferSize = 0;
+  static constexpr std::size_t BufferSize = 0;

  // Hash value embedded in the evaluation file
-  static constexpr std::uint32_t GetHashValue() {
-    std::uint32_t hash_value = 0xEC42E90Du;
-    hash_value ^= kOutputDimensions ^ (Offset << 10);
-    return hash_value;
+  static constexpr std::uint32_t get_hash_value() {
+    std::uint32_t hashValue = 0xEC42E90Du;
+    hashValue ^= OutputDimensions ^ (Offset << 10);
+    return hashValue;
  }

  // Read network parameters
-  bool ReadParameters(std::istream& /*stream*/) {
+  bool read_parameters(std::istream& /*stream*/) {
+    return true;
+  }
+
+  // Write network parameters
+  bool write_parameters(std::ostream& /*stream*/) const {
    return true;
  }

  // Forward propagation
-  const OutputType* Propagate(
-      const TransformedFeatureType* transformed_features,
+  const OutputType* propagate(
+      const TransformedFeatureType* transformedFeatures,
      char* /*buffer*/) const {
-    return transformed_features + Offset;
+    return transformedFeatures + Offset;
  }

 private:
 };

-}  // namespace Layers
+}  // namespace Stockfish::Eval::NNUE::Layers

 #endif // #ifndef NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_accumulator.h
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_accumulator.h
@ -23,18 +23,15 @@

 #include "nnue_architecture.h"

-namespace Eval::NNUE {
-
-  // The accumulator of a StateInfo without parent is set to the INIT state
-  enum AccumulatorState { EMPTY, COMPUTED, INIT };
+namespace Stockfish::Eval::NNUE {

  // Class that holds the result of affine transformation of input features
-  struct alignas(kCacheLineSize) Accumulator {
-    std::int16_t
-        accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
-    AccumulatorState state[2];
+  struct alignas(CacheLineSize) Accumulator {
+    std::int16_t accumulation[2][TransformedFeatureDimensions];
+    std::int32_t psqtAccumulation[2][PSQTBuckets];
+    bool computed[2];
  };

-}  // namespace Eval::NNUE
+}  // namespace Stockfish::Eval::NNUE

 #endif // NNUE_ACCUMULATOR_H_INCLUDED
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_architecture.h
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_architecture.h
@ -21,18 +21,40 @@
 #ifndef NNUE_ARCHITECTURE_H_INCLUDED
 #define NNUE_ARCHITECTURE_H_INCLUDED

-// Defines the network structure
-#include "architectures/halfkp_256x2-32-32.h"
+#include "nnue_common.h"

-namespace Eval::NNUE {
+#include "features/half_ka_v2.h"

-  static_assert(kTransformedFeatureDimensions % kMaxSimdWidth == 0, "");
-  static_assert(Network::kOutputDimensions == 1, "");
+#include "layers/input_slice.h"
+#include "layers/affine_transform.h"
+#include "layers/clipped_relu.h"
+
+namespace Stockfish::Eval::NNUE {
+
+  // Input features used in evaluation function
+  using FeatureSet = Features::HalfKAv2;
+
+  // Number of input feature dimensions after conversion
+  constexpr IndexType TransformedFeatureDimensions = 512;
+  constexpr IndexType PSQTBuckets = 8;
+  constexpr IndexType LayerStacks = 8;
+
+  namespace Layers {
+
+    // Define network structure
+    using InputLayer = InputSlice<TransformedFeatureDimensions * 2>;
+    using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 16>>;
+    using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
+    using OutputLayer = AffineTransform<HiddenLayer2, 1>;
+
+  }  // namespace Layers
+
+  using Network = Layers::OutputLayer;
+
+  static_assert(TransformedFeatureDimensions % MaxSimdWidth == 0, "");
+  static_assert(Network::OutputDimensions == 1, "");
  static_assert(std::is_same<Network::OutputType, std::int32_t>::value, "");

-  // Trigger for full calculation instead of difference calculation
-  constexpr auto kRefreshTriggers = RawFeatures::kRefreshTriggers;
-
-}  // namespace Eval::NNUE
+}  // namespace Stockfish::Eval::NNUE

 #endif // #ifndef NNUE_ARCHITECTURE_H_INCLUDED
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_common.h
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_common.h
@ -24,6 +24,8 @@
 #include <cstring>
 #include <iostream>

+#include "../misc.h"  // for IsLittleEndian
+
 #if defined(USE_AVX2)
 #include <immintrin.h>

@ -43,61 +45,33 @@
 #include <arm_neon.h>
 #endif

-namespace Eval::NNUE {
+namespace Stockfish::Eval::NNUE {

  // Version of the evaluation file
-  constexpr std::uint32_t kVersion = 0x7AF32F16u;
+  constexpr std::uint32_t Version = 0x7AF32F20u;

  // Constant used in evaluation value calculation
-  constexpr int FV_SCALE = 16;
-  constexpr int kWeightScaleBits = 6;
+  constexpr int OutputScale = 16;
+  constexpr int WeightScaleBits = 6;

  // Size of cache line (in bytes)
-  constexpr std::size_t kCacheLineSize = 64;
+  constexpr std::size_t CacheLineSize = 64;

  // SIMD width (in bytes)
  #if defined(USE_AVX2)
-  constexpr std::size_t kSimdWidth = 32;
+  constexpr std::size_t SimdWidth = 32;

  #elif defined(USE_SSE2)
-  constexpr std::size_t kSimdWidth = 16;
+  constexpr std::size_t SimdWidth = 16;

  #elif defined(USE_MMX)
-  constexpr std::size_t kSimdWidth = 8;
+  constexpr std::size_t SimdWidth = 8;

  #elif defined(USE_NEON)
-  constexpr std::size_t kSimdWidth = 16;
+  constexpr std::size_t SimdWidth = 16;
  #endif

-  constexpr std::size_t kMaxSimdWidth = 32;
-
-  // unique number for each piece type on each square
-  enum {
-    PS_NONE     =  0,
-    PS_W_PAWN   =  1,
-    PS_B_PAWN   =  1 * SQUARE_NB + 1,
-    PS_W_KNIGHT =  2 * SQUARE_NB + 1,
-    PS_B_KNIGHT =  3 * SQUARE_NB + 1,
-    PS_W_BISHOP =  4 * SQUARE_NB + 1,
-    PS_B_BISHOP =  5 * SQUARE_NB + 1,
-    PS_W_ROOK   =  6 * SQUARE_NB + 1,
-    PS_B_ROOK   =  7 * SQUARE_NB + 1,
-    PS_W_QUEEN  =  8 * SQUARE_NB + 1,
-    PS_B_QUEEN  =  9 * SQUARE_NB + 1,
-    PS_W_KING   = 10 * SQUARE_NB + 1,
-    PS_END      = PS_W_KING, // pieces without kings (pawns included)
-    PS_B_KING   = 11 * SQUARE_NB + 1,
-    PS_END2     = 12 * SQUARE_NB + 1
-  };
-
-  constexpr uint32_t kpp_board_index[COLOR_NB][PIECE_NB] = {
-    // convention: W - us, B - them
-    // viewed from other side, W and B are reversed
-    { PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_W_KING, PS_NONE,
-      PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_B_KING, PS_NONE },
-    { PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_B_KING, PS_NONE,
-      PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_W_KING, PS_NONE }
-  };
+  constexpr std::size_t MaxSimdWidth = 32;

  // Type of input feature after conversion
  using TransformedFeatureType = std::uint8_t;
@ -105,7 +79,7 @@ namespace Eval::NNUE {

  // Round n up to be a multiple of base
  template <typename IntType>
-  constexpr IntType CeilToMultiple(IntType n, IntType base) {
+  constexpr IntType ceil_to_multiple(IntType n, IntType base) {
      return (n + base - 1) / base * base;
  }

@ -114,19 +88,77 @@ namespace Eval::NNUE {
  // necessary to return a result with the byte ordering of the compiling machine.
  template <typename IntType>
  inline IntType read_little_endian(std::istream& stream) {
-
      IntType result;
-      std::uint8_t u[sizeof(IntType)];
-      typename std::make_unsigned<IntType>::type v = 0;

-      stream.read(reinterpret_cast<char*>(u), sizeof(IntType));
-      for (std::size_t i = 0; i < sizeof(IntType); ++i)
-          v = (v << 8) | u[sizeof(IntType) - i - 1];
+      if (IsLittleEndian)
+          stream.read(reinterpret_cast<char*>(&result), sizeof(IntType));
+      else
+      {
+          std::uint8_t u[sizeof(IntType)];
+          typename std::make_unsigned<IntType>::type v = 0;
+
+          stream.read(reinterpret_cast<char*>(u), sizeof(IntType));
+          for (std::size_t i = 0; i < sizeof(IntType); ++i)
+              v = (v << 8) | u[sizeof(IntType) - i - 1];
+
+          std::memcpy(&result, &v, sizeof(IntType));
+      }

-      std::memcpy(&result, &v, sizeof(IntType));
      return result;
  }

-}  // namespace Eval::NNUE
+  // write_little_endian() is our utility to write an integer (signed or unsigned, any size)
+  // to a stream in little-endian order. We swap the byte order before the write if
+  // necessary to always write in little endian order, independantly of the byte
+  // ordering of the compiling machine.
+  template <typename IntType>
+  inline void write_little_endian(std::ostream& stream, IntType value) {
+
+      if (IsLittleEndian)
+          stream.write(reinterpret_cast<const char*>(&value), sizeof(IntType));
+      else
+      {
+          std::uint8_t u[sizeof(IntType)];
+          typename std::make_unsigned<IntType>::type v = value;
+
+          std::size_t i = 0;
+          // if constexpr to silence the warning about shift by 8
+          if constexpr (sizeof(IntType) > 1)
+          {
+            for (; i + 1 < sizeof(IntType); ++i)
+            {
+                u[i] = v;
+                v >>= 8;
+            }
+          }
+          u[i] = v;
+
+          stream.write(reinterpret_cast<char*>(u), sizeof(IntType));
+      }
+  }
+
+  // read_little_endian(s, out, N) : read integers in bulk from a little indian stream.
+  // This reads N integers from stream s and put them in array out.
+  template <typename IntType>
+  inline void read_little_endian(std::istream& stream, IntType* out, std::size_t count) {
+      if (IsLittleEndian)
+          stream.read(reinterpret_cast<char*>(out), sizeof(IntType) * count);
+      else
+          for (std::size_t i = 0; i < count; ++i)
+              out[i] = read_little_endian<IntType>(stream);
+  }
+
+  // write_little_endian(s, values, N) : write integers in bulk to a little indian stream.
+  // This takes N integers from array values and writes them on stream s.
+  template <typename IntType>
+  inline void write_little_endian(std::ostream& stream, const IntType* values, std::size_t count) {
+      if (IsLittleEndian)
+          stream.write(reinterpret_cast<const char*>(values), sizeof(IntType) * count);
+      else
+          for (std::size_t i = 0; i < count; ++i)
+              write_little_endian<IntType>(stream, values[i]);
+  }
+
+}  // namespace Stockfish::Eval::NNUE

 #endif // #ifndef NNUE_COMMON_H_INCLUDED
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_feature_transformer.h
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_feature_transformer.h
@ -23,72 +23,158 @@

 #include "nnue_common.h"
 #include "nnue_architecture.h"
-#include "features/index_list.h"

 #include <cstring> // std::memset()

-namespace Eval::NNUE {
+namespace Stockfish::Eval::NNUE {
+
+  using BiasType       = std::int16_t;
+  using WeightType     = std::int16_t;
+  using PSQTWeightType = std::int32_t;

  // If vector instructions are enabled, we update and refresh the
  // accumulator tile by tile such that each tile fits in the CPU's
  // vector registers.
  #define VECTOR

+  static_assert(PSQTBuckets % 8 == 0,
+    "Per feature PSQT values cannot be processed at granularity lower than 8 at a time.");
+
  #ifdef USE_AVX512
  typedef __m512i vec_t;
+  typedef __m256i psqt_vec_t;
  #define vec_load(a) _mm512_load_si512(a)
  #define vec_store(a,b) _mm512_store_si512(a,b)
  #define vec_add_16(a,b) _mm512_add_epi16(a,b)
  #define vec_sub_16(a,b) _mm512_sub_epi16(a,b)
-  static constexpr IndexType kNumRegs = 8; // only 8 are needed
+  #define vec_load_psqt(a) _mm256_load_si256(a)
+  #define vec_store_psqt(a,b) _mm256_store_si256(a,b)
+  #define vec_add_psqt_32(a,b) _mm256_add_epi32(a,b)
+  #define vec_sub_psqt_32(a,b) _mm256_sub_epi32(a,b)
+  #define vec_zero_psqt() _mm256_setzero_si256()
+  #define NumRegistersSIMD 32

  #elif USE_AVX2
  typedef __m256i vec_t;
+  typedef __m256i psqt_vec_t;
  #define vec_load(a) _mm256_load_si256(a)
  #define vec_store(a,b) _mm256_store_si256(a,b)
  #define vec_add_16(a,b) _mm256_add_epi16(a,b)
  #define vec_sub_16(a,b) _mm256_sub_epi16(a,b)
-  static constexpr IndexType kNumRegs = 16;
+  #define vec_load_psqt(a) _mm256_load_si256(a)
+  #define vec_store_psqt(a,b) _mm256_store_si256(a,b)
+  #define vec_add_psqt_32(a,b) _mm256_add_epi32(a,b)
+  #define vec_sub_psqt_32(a,b) _mm256_sub_epi32(a,b)
+  #define vec_zero_psqt() _mm256_setzero_si256()
+  #define NumRegistersSIMD 16

  #elif USE_SSE2
  typedef __m128i vec_t;
+  typedef __m128i psqt_vec_t;
  #define vec_load(a) (*(a))
  #define vec_store(a,b) *(a)=(b)
  #define vec_add_16(a,b) _mm_add_epi16(a,b)
  #define vec_sub_16(a,b) _mm_sub_epi16(a,b)
-  static constexpr IndexType kNumRegs = Is64Bit ? 16 : 8;
+  #define vec_load_psqt(a) (*(a))
+  #define vec_store_psqt(a,b) *(a)=(b)
+  #define vec_add_psqt_32(a,b) _mm_add_epi32(a,b)
+  #define vec_sub_psqt_32(a,b) _mm_sub_epi32(a,b)
+  #define vec_zero_psqt() _mm_setzero_si128()
+  #define NumRegistersSIMD (Is64Bit ? 16 : 8)

  #elif USE_MMX
  typedef __m64 vec_t;
+  typedef __m64 psqt_vec_t;
  #define vec_load(a) (*(a))
  #define vec_store(a,b) *(a)=(b)
  #define vec_add_16(a,b) _mm_add_pi16(a,b)
  #define vec_sub_16(a,b) _mm_sub_pi16(a,b)
-  static constexpr IndexType kNumRegs = 8;
+  #define vec_load_psqt(a) (*(a))
+  #define vec_store_psqt(a,b) *(a)=(b)
+  #define vec_add_psqt_32(a,b) _mm_add_pi32(a,b)
+  #define vec_sub_psqt_32(a,b) _mm_sub_pi32(a,b)
+  #define vec_zero_psqt() _mm_setzero_si64()
+  #define NumRegistersSIMD 8

  #elif USE_NEON
  typedef int16x8_t vec_t;
+  typedef int32x4_t psqt_vec_t;
  #define vec_load(a) (*(a))
  #define vec_store(a,b) *(a)=(b)
  #define vec_add_16(a,b) vaddq_s16(a,b)
  #define vec_sub_16(a,b) vsubq_s16(a,b)
-  static constexpr IndexType kNumRegs = 16;
+  #define vec_load_psqt(a) (*(a))
+  #define vec_store_psqt(a,b) *(a)=(b)
+  #define vec_add_psqt_32(a,b) vaddq_s32(a,b)
+  #define vec_sub_psqt_32(a,b) vsubq_s32(a,b)
+  #define vec_zero_psqt() psqt_vec_t{0}
+  #define NumRegistersSIMD 16

  #else
  #undef VECTOR

  #endif

+
+  #ifdef VECTOR
+
+      // Compute optimal SIMD register count for feature transformer accumulation.
+
+      // We use __m* types as template arguments, which causes GCC to emit warnings
+      // about losing some attribute information. This is irrelevant to us as we
+      // only take their size, so the following pragma are harmless.
+      #pragma GCC diagnostic push
+      #pragma GCC diagnostic ignored "-Wignored-attributes"
+
+      template <typename SIMDRegisterType,
+                typename LaneType,
+                int      NumLanes,
+                int      MaxRegisters>
+      static constexpr int BestRegisterCount()
+      {
+          #define RegisterSize  sizeof(SIMDRegisterType)
+          #define LaneSize      sizeof(LaneType)
+
+          static_assert(RegisterSize >= LaneSize);
+          static_assert(MaxRegisters <= NumRegistersSIMD);
+          static_assert(MaxRegisters > 0);
+          static_assert(NumRegistersSIMD > 0);
+          static_assert(RegisterSize % LaneSize == 0);
+          static_assert((NumLanes * LaneSize) % RegisterSize == 0);
+
+          const int ideal = (NumLanes * LaneSize) / RegisterSize;
+          if (ideal <= MaxRegisters)
+            return ideal;
+
+          // Look for the largest divisor of the ideal register count that is smaller than MaxRegisters
+          for (int divisor = MaxRegisters; divisor > 1; --divisor)
+            if (ideal % divisor == 0)
+              return divisor;
+
+          return 1;
+      }
+
+      static constexpr int NumRegs     = BestRegisterCount<vec_t, WeightType, TransformedFeatureDimensions, NumRegistersSIMD>();
+      static constexpr int NumPsqtRegs = BestRegisterCount<psqt_vec_t, PSQTWeightType, PSQTBuckets, NumRegistersSIMD>();
+
+      #pragma GCC diagnostic pop
+
+  #endif
+
+
+
  // Input feature converter
  class FeatureTransformer {

   private:
    // Number of output dimensions for one side
-    static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions;
+    static constexpr IndexType HalfDimensions = TransformedFeatureDimensions;

    #ifdef VECTOR
-    static constexpr IndexType kTileHeight = kNumRegs * sizeof(vec_t) / 2;
-    static_assert(kHalfDimensions % kTileHeight == 0, "kTileHeight must divide kHalfDimensions");
+    static constexpr IndexType TileHeight = NumRegs * sizeof(vec_t) / 2;
+    static constexpr IndexType PsqtTileHeight = NumPsqtRegs * sizeof(psqt_vec_t) / 4;
+    static_assert(HalfDimensions % TileHeight == 0, "TileHeight must divide HalfDimensions");
+    static_assert(PSQTBuckets % PsqtTileHeight == 0, "PsqtTileHeight must divide PSQTBuckets");
    #endif

   public:
@ -96,174 +182,219 @@ namespace Eval::NNUE {
    using OutputType = TransformedFeatureType;

    // Number of input/output dimensions
-    static constexpr IndexType kInputDimensions = RawFeatures::kDimensions;
-    static constexpr IndexType kOutputDimensions = kHalfDimensions * 2;
+    static constexpr IndexType InputDimensions = FeatureSet::Dimensions;
+    static constexpr IndexType OutputDimensions = HalfDimensions * 2;

    // Size of forward propagation buffer
-    static constexpr std::size_t kBufferSize =
-        kOutputDimensions * sizeof(OutputType);
+    static constexpr std::size_t BufferSize =
+        OutputDimensions * sizeof(OutputType);

    // Hash value embedded in the evaluation file
-    static constexpr std::uint32_t GetHashValue() {
-
-      return RawFeatures::kHashValue ^ kOutputDimensions;
+    static constexpr std::uint32_t get_hash_value() {
+      return FeatureSet::HashValue ^ OutputDimensions;
    }

    // Read network parameters
-    bool ReadParameters(std::istream& stream) {
+    bool read_parameters(std::istream& stream) {
+
+      read_little_endian<BiasType      >(stream, biases     , HalfDimensions                  );
+      read_little_endian<WeightType    >(stream, weights    , HalfDimensions * InputDimensions);
+      read_little_endian<PSQTWeightType>(stream, psqtWeights, PSQTBuckets    * InputDimensions);
+
+      return !stream.fail();
+    }
+
+    // Write network parameters
+    bool write_parameters(std::ostream& stream) const {
+
+      write_little_endian<BiasType      >(stream, biases     , HalfDimensions                  );
+      write_little_endian<WeightType    >(stream, weights    , HalfDimensions * InputDimensions);
+      write_little_endian<PSQTWeightType>(stream, psqtWeights, PSQTBuckets    * InputDimensions);

-      for (std::size_t i = 0; i < kHalfDimensions; ++i)
-        biases_[i] = read_little_endian<BiasType>(stream);
-      for (std::size_t i = 0; i < kHalfDimensions * kInputDimensions; ++i)
-        weights_[i] = read_little_endian<WeightType>(stream);
      return !stream.fail();
    }

    // Convert input features
-    void Transform(const Position& pos, OutputType* output) const {
-
-      UpdateAccumulator(pos, WHITE);
-      UpdateAccumulator(pos, BLACK);
-
-      const auto& accumulation = pos.state()->accumulator.accumulation;
-
-  #if defined(USE_AVX512)
-      constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth * 2);
-      static_assert(kHalfDimensions % (kSimdWidth * 2) == 0);
-      const __m512i kControl = _mm512_setr_epi64(0, 2, 4, 6, 1, 3, 5, 7);
-      const __m512i kZero = _mm512_setzero_si512();
-
-  #elif defined(USE_AVX2)
-      constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
-      constexpr int kControl = 0b11011000;
-      const __m256i kZero = _mm256_setzero_si256();
-
-  #elif defined(USE_SSE2)
-      constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
-
-  #ifdef USE_SSE41
-      const __m128i kZero = _mm_setzero_si128();
-  #else
-      const __m128i k0x80s = _mm_set1_epi8(-128);
-  #endif
-
-  #elif defined(USE_MMX)
-      constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
-      const __m64 k0x80s = _mm_set1_pi8(-128);
-
-  #elif defined(USE_NEON)
-      constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-      const int8x8_t kZero = {0};
-  #endif
+    std::int32_t transform(const Position& pos, OutputType* output, int bucket) const {
+      update_accumulator(pos, WHITE);
+      update_accumulator(pos, BLACK);

      const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
-      for (IndexType p = 0; p < 2; ++p) {
-        const IndexType offset = kHalfDimensions * p;
+      const auto& accumulation = pos.state()->accumulator.accumulation;
+      const auto& psqtAccumulation = pos.state()->accumulator.psqtAccumulation;
+
+      const auto psqt = (
+            psqtAccumulation[perspectives[0]][bucket]
+          - psqtAccumulation[perspectives[1]][bucket]
+        ) / 2;
+

  #if defined(USE_AVX512)
-        auto out = reinterpret_cast<__m512i*>(&output[offset]);
-        for (IndexType j = 0; j < kNumChunks; ++j) {
-          __m512i sum0 = _mm512_load_si512(
-              &reinterpret_cast<const __m512i*>(accumulation[perspectives[p]][0])[j * 2 + 0]);
-          __m512i sum1 = _mm512_load_si512(
-              &reinterpret_cast<const __m512i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
-          _mm512_store_si512(&out[j], _mm512_permutexvar_epi64(kControl,
-              _mm512_max_epi8(_mm512_packs_epi16(sum0, sum1), kZero)));
-        }
+
+      constexpr IndexType NumChunks = HalfDimensions / (SimdWidth * 2);
+      static_assert(HalfDimensions % (SimdWidth * 2) == 0);
+      const __m512i Control = _mm512_setr_epi64(0, 2, 4, 6, 1, 3, 5, 7);
+      const __m512i Zero = _mm512_setzero_si512();
+
+      for (IndexType p = 0; p < 2; ++p)
+      {
+          const IndexType offset = HalfDimensions * p;
+          auto out = reinterpret_cast<__m512i*>(&output[offset]);
+          for (IndexType j = 0; j < NumChunks; ++j)
+          {
+              __m512i sum0 = _mm512_load_si512(&reinterpret_cast<const __m512i*>
+                                              (accumulation[perspectives[p]])[j * 2 + 0]);
+              __m512i sum1 = _mm512_load_si512(&reinterpret_cast<const __m512i*>
+                                              (accumulation[perspectives[p]])[j * 2 + 1]);
+
+              _mm512_store_si512(&out[j], _mm512_permutexvar_epi64(Control,
+                                 _mm512_max_epi8(_mm512_packs_epi16(sum0, sum1), Zero)));
+          }
+      }
+      return psqt;

  #elif defined(USE_AVX2)
-        auto out = reinterpret_cast<__m256i*>(&output[offset]);
-        for (IndexType j = 0; j < kNumChunks; ++j) {
-          __m256i sum0 = _mm256_load_si256(
-              &reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 0]);
-          __m256i sum1 = _mm256_load_si256(
-              &reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
-          _mm256_store_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
-              _mm256_packs_epi16(sum0, sum1), kZero), kControl));
-        }
+
+      constexpr IndexType NumChunks = HalfDimensions / SimdWidth;
+      constexpr int Control = 0b11011000;
+      const __m256i Zero = _mm256_setzero_si256();
+
+      for (IndexType p = 0; p < 2; ++p)
+      {
+          const IndexType offset = HalfDimensions * p;
+          auto out = reinterpret_cast<__m256i*>(&output[offset]);
+          for (IndexType j = 0; j < NumChunks; ++j)
+          {
+              __m256i sum0 = _mm256_load_si256(&reinterpret_cast<const __m256i*>
+                                              (accumulation[perspectives[p]])[j * 2 + 0]);
+              __m256i sum1 = _mm256_load_si256(&reinterpret_cast<const __m256i*>
+                                              (accumulation[perspectives[p]])[j * 2 + 1]);
+
+              _mm256_store_si256(&out[j], _mm256_permute4x64_epi64(
+                                 _mm256_max_epi8(_mm256_packs_epi16(sum0, sum1), Zero), Control));
+          }
+      }
+      return psqt;

  #elif defined(USE_SSE2)
-        auto out = reinterpret_cast<__m128i*>(&output[offset]);
-        for (IndexType j = 0; j < kNumChunks; ++j) {
-          __m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
-              accumulation[perspectives[p]][0])[j * 2 + 0]);
-          __m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
-              accumulation[perspectives[p]][0])[j * 2 + 1]);
-      const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);

-          _mm_store_si128(&out[j],
+      #ifdef USE_SSE41
+      constexpr IndexType NumChunks = HalfDimensions / SimdWidth;
+      const __m128i Zero = _mm_setzero_si128();
+      #else
+      constexpr IndexType NumChunks = HalfDimensions / SimdWidth;
+      const __m128i k0x80s = _mm_set1_epi8(-128);
+      #endif

-  #ifdef USE_SSE41
-              _mm_max_epi8(packedbytes, kZero)
-  #else
-              _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
-  #endif
+      for (IndexType p = 0; p < 2; ++p)
+      {
+          const IndexType offset = HalfDimensions * p;
+          auto out = reinterpret_cast<__m128i*>(&output[offset]);
+          for (IndexType j = 0; j < NumChunks; ++j)
+          {
+              __m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>
+                                           (accumulation[perspectives[p]])[j * 2 + 0]);
+              __m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>
+                                           (accumulation[perspectives[p]])[j * 2 + 1]);
+              const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);

-          );
-        }
+              #ifdef USE_SSE41
+              _mm_store_si128(&out[j], _mm_max_epi8(packedbytes, Zero));
+              #else
+              _mm_store_si128(&out[j], _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s));
+              #endif
+          }
+      }
+      return psqt;

  #elif defined(USE_MMX)
-        auto out = reinterpret_cast<__m64*>(&output[offset]);
-        for (IndexType j = 0; j < kNumChunks; ++j) {
-          __m64 sum0 = *(&reinterpret_cast<const __m64*>(
-              accumulation[perspectives[p]][0])[j * 2 + 0]);
-          __m64 sum1 = *(&reinterpret_cast<const __m64*>(
-              accumulation[perspectives[p]][0])[j * 2 + 1]);
-          const __m64 packedbytes = _mm_packs_pi16(sum0, sum1);
-          out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s);
-        }
+
+      constexpr IndexType NumChunks = HalfDimensions / SimdWidth;
+      const __m64 k0x80s = _mm_set1_pi8(-128);
+
+      for (IndexType p = 0; p < 2; ++p)
+      {
+          const IndexType offset = HalfDimensions * p;
+          auto out = reinterpret_cast<__m64*>(&output[offset]);
+          for (IndexType j = 0; j < NumChunks; ++j)
+          {
+              __m64 sum0 = *(&reinterpret_cast<const __m64*>(accumulation[perspectives[p]])[j * 2 + 0]);
+              __m64 sum1 = *(&reinterpret_cast<const __m64*>(accumulation[perspectives[p]])[j * 2 + 1]);
+              const __m64 packedbytes = _mm_packs_pi16(sum0, sum1);
+              out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s);
+          }
+      }
+      _mm_empty();
+      return psqt;

  #elif defined(USE_NEON)
-        const auto out = reinterpret_cast<int8x8_t*>(&output[offset]);
-        for (IndexType j = 0; j < kNumChunks; ++j) {
-          int16x8_t sum = reinterpret_cast<const int16x8_t*>(
-              accumulation[perspectives[p]][0])[j];
-          out[j] = vmax_s8(vqmovn_s16(sum), kZero);
-        }
+
+      constexpr IndexType NumChunks = HalfDimensions / (SimdWidth / 2);
+      const int8x8_t Zero = {0};
+
+      for (IndexType p = 0; p < 2; ++p)
+      {
+          const IndexType offset = HalfDimensions * p;
+          const auto out = reinterpret_cast<int8x8_t*>(&output[offset]);
+          for (IndexType j = 0; j < NumChunks; ++j)
+          {
+              int16x8_t sum = reinterpret_cast<const int16x8_t*>(accumulation[perspectives[p]])[j];
+              out[j] = vmax_s8(vqmovn_s16(sum), Zero);
+          }
+      }
+      return psqt;

  #else
-        for (IndexType j = 0; j < kHalfDimensions; ++j) {
-          BiasType sum = accumulation[static_cast<int>(perspectives[p])][0][j];
-          output[offset + j] = static_cast<OutputType>(
-              std::max<int>(0, std::min<int>(127, sum)));
-        }
+
+      for (IndexType p = 0; p < 2; ++p)
+      {
+          const IndexType offset = HalfDimensions * p;
+          for (IndexType j = 0; j < HalfDimensions; ++j)
+          {
+              BiasType sum = accumulation[perspectives[p]][j];
+              output[offset + j] = static_cast<OutputType>(std::max<int>(0, std::min<int>(127, sum)));
+          }
+      }
+      return psqt;
+
  #endif

-      }
-  #if defined(USE_MMX)
-      _mm_empty();
-  #endif
-    }
+   } // end of function transform()
+
+

   private:
-    void UpdateAccumulator(const Position& pos, const Color c) const {
+    void update_accumulator(const Position& pos, const Color perspective) const {
+
+      // The size must be enough to contain the largest possible update.
+      // That might depend on the feature set and generally relies on the
+      // feature set's update cost calculation to be correct and never
+      // allow updates with more added/removed features than MaxActiveDimensions.
+      using IndexList = ValueList<IndexType, FeatureSet::MaxActiveDimensions>;

  #ifdef VECTOR
      // Gcc-10.2 unnecessarily spills AVX2 registers if this array
      // is defined in the VECTOR code below, once in each branch
-      vec_t acc[kNumRegs];
+      vec_t acc[NumRegs];
+      psqt_vec_t psqt[NumPsqtRegs];
  #endif

      // Look for a usable accumulator of an earlier position. We keep track
      // of the estimated gain in terms of features to be added/subtracted.
      StateInfo *st = pos.state(), *next = nullptr;
-      int gain = pos.count<ALL_PIECES>() - 2;
-      while (st->accumulator.state[c] == EMPTY)
+      int gain = FeatureSet::refresh_cost(pos);
+      while (st->previous && !st->accumulator.computed[perspective])
      {
-        auto& dp = st->dirtyPiece;
-        // The first condition tests whether an incremental update is
-        // possible at all: if this side's king has moved, it is not possible.
-        static_assert(std::is_same_v<RawFeatures::SortedTriggerSet,
-              Features::CompileTimeList<Features::TriggerEvent, Features::TriggerEvent::kFriendKingMoved>>,
-              "Current code assumes that only kFriendlyKingMoved refresh trigger is being used.");
-        if (   dp.piece[0] == make_piece(c, KING)
-            || (gain -= dp.dirty_num + 1) < 0)
+        // This governs when a full feature refresh is needed and how many
+        // updates are better than just one full refresh.
+        if (   FeatureSet::requires_refresh(st, perspective)
+            || (gain -= FeatureSet::update_cost(st) + 1) < 0)
          break;
        next = st;
        st = st->previous;
      }

-      if (st->accumulator.state[c] == COMPUTED)
+      if (st->accumulator.computed[perspective])
      {
        if (next == nullptr)
          return;
@ -271,85 +402,129 @@ namespace Eval::NNUE {
        // Update incrementally in two steps. First, we update the "next"
        // accumulator. Then, we update the current accumulator (pos.state()).

-        // Gather all features to be updated. This code assumes HalfKP features
-        // only and doesn't support refresh triggers.
-        static_assert(std::is_same_v<Features::FeatureSet<Features::HalfKP<Features::Side::kFriend>>,
-                                     RawFeatures>);
-        Features::IndexList removed[2], added[2];
-        Features::HalfKP<Features::Side::kFriend>::AppendChangedIndices(pos,
-            next->dirtyPiece, c, &removed[0], &added[0]);
+        // Gather all features to be updated.
+        const Square ksq = pos.square<KING>(perspective);
+        IndexList removed[2], added[2];
+        FeatureSet::append_changed_indices(
+          ksq, next, perspective, removed[0], added[0]);
        for (StateInfo *st2 = pos.state(); st2 != next; st2 = st2->previous)
-          Features::HalfKP<Features::Side::kFriend>::AppendChangedIndices(pos,
-              st2->dirtyPiece, c, &removed[1], &added[1]);
+          FeatureSet::append_changed_indices(
+            ksq, st2, perspective, removed[1], added[1]);

        // Mark the accumulators as computed.
-        next->accumulator.state[c] = COMPUTED;
-        pos.state()->accumulator.state[c] = COMPUTED;
+        next->accumulator.computed[perspective] = true;
+        pos.state()->accumulator.computed[perspective] = true;

-        // Now update the accumulators listed in info[], where the last element is a sentinel.
-        StateInfo *info[3] =
+        // Now update the accumulators listed in states_to_update[], where the last element is a sentinel.
+        StateInfo *states_to_update[3] =
          { next, next == pos.state() ? nullptr : pos.state(), nullptr };
  #ifdef VECTOR
-        for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j)
+        for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
        {
          // Load accumulator
          auto accTile = reinterpret_cast<vec_t*>(
-            &st->accumulator.accumulation[c][0][j * kTileHeight]);
-          for (IndexType k = 0; k < kNumRegs; ++k)
+            &st->accumulator.accumulation[perspective][j * TileHeight]);
+          for (IndexType k = 0; k < NumRegs; ++k)
            acc[k] = vec_load(&accTile[k]);

-          for (IndexType i = 0; info[i]; ++i)
+          for (IndexType i = 0; states_to_update[i]; ++i)
          {
            // Difference calculation for the deactivated features
            for (const auto index : removed[i])
            {
-              const IndexType offset = kHalfDimensions * index + j * kTileHeight;
-              auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
-              for (IndexType k = 0; k < kNumRegs; ++k)
+              const IndexType offset = HalfDimensions * index + j * TileHeight;
+              auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
+              for (IndexType k = 0; k < NumRegs; ++k)
                acc[k] = vec_sub_16(acc[k], column[k]);
            }

            // Difference calculation for the activated features
            for (const auto index : added[i])
            {
-              const IndexType offset = kHalfDimensions * index + j * kTileHeight;
-              auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
-              for (IndexType k = 0; k < kNumRegs; ++k)
+              const IndexType offset = HalfDimensions * index + j * TileHeight;
+              auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
+              for (IndexType k = 0; k < NumRegs; ++k)
                acc[k] = vec_add_16(acc[k], column[k]);
            }

            // Store accumulator
            accTile = reinterpret_cast<vec_t*>(
-              &info[i]->accumulator.accumulation[c][0][j * kTileHeight]);
-            for (IndexType k = 0; k < kNumRegs; ++k)
+              &states_to_update[i]->accumulator.accumulation[perspective][j * TileHeight]);
+            for (IndexType k = 0; k < NumRegs; ++k)
              vec_store(&accTile[k], acc[k]);
          }
        }

-  #else
-        for (IndexType i = 0; info[i]; ++i)
+        for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j)
        {
-          std::memcpy(info[i]->accumulator.accumulation[c][0],
-              st->accumulator.accumulation[c][0],
-              kHalfDimensions * sizeof(BiasType));
-          st = info[i];
+          // Load accumulator
+          auto accTilePsqt = reinterpret_cast<psqt_vec_t*>(
+            &st->accumulator.psqtAccumulation[perspective][j * PsqtTileHeight]);
+          for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+            psqt[k] = vec_load_psqt(&accTilePsqt[k]);
+
+          for (IndexType i = 0; states_to_update[i]; ++i)
+          {
+            // Difference calculation for the deactivated features
+            for (const auto index : removed[i])
+            {
+              const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight;
+              auto columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
+              for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+                psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]);
+            }
+
+            // Difference calculation for the activated features
+            for (const auto index : added[i])
+            {
+              const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight;
+              auto columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
+              for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+                psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);
+            }
+
+            // Store accumulator
+            accTilePsqt = reinterpret_cast<psqt_vec_t*>(
+              &states_to_update[i]->accumulator.psqtAccumulation[perspective][j * PsqtTileHeight]);
+            for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+              vec_store_psqt(&accTilePsqt[k], psqt[k]);
+          }
+        }
+
+  #else
+        for (IndexType i = 0; states_to_update[i]; ++i)
+        {
+          std::memcpy(states_to_update[i]->accumulator.accumulation[perspective],
+              st->accumulator.accumulation[perspective],
+              HalfDimensions * sizeof(BiasType));
+
+          for (std::size_t k = 0; k < PSQTBuckets; ++k)
+            states_to_update[i]->accumulator.psqtAccumulation[perspective][k] = st->accumulator.psqtAccumulation[perspective][k];
+
+          st = states_to_update[i];

          // Difference calculation for the deactivated features
          for (const auto index : removed[i])
          {
-            const IndexType offset = kHalfDimensions * index;
+            const IndexType offset = HalfDimensions * index;

-            for (IndexType j = 0; j < kHalfDimensions; ++j)
-              st->accumulator.accumulation[c][0][j] -= weights_[offset + j];
+            for (IndexType j = 0; j < HalfDimensions; ++j)
+              st->accumulator.accumulation[perspective][j] -= weights[offset + j];
+
+            for (std::size_t k = 0; k < PSQTBuckets; ++k)
+              st->accumulator.psqtAccumulation[perspective][k] -= psqtWeights[index * PSQTBuckets + k];
          }

          // Difference calculation for the activated features
          for (const auto index : added[i])
          {
-            const IndexType offset = kHalfDimensions * index;
+            const IndexType offset = HalfDimensions * index;

-            for (IndexType j = 0; j < kHalfDimensions; ++j)
-              st->accumulator.accumulation[c][0][j] += weights_[offset + j];
+            for (IndexType j = 0; j < HalfDimensions; ++j)
+              st->accumulator.accumulation[perspective][j] += weights[offset + j];
+
+            for (std::size_t k = 0; k < PSQTBuckets; ++k)
+              st->accumulator.psqtAccumulation[perspective][k] += psqtWeights[index * PSQTBuckets + k];
          }
        }
  #endif
@ -358,43 +533,69 @@ namespace Eval::NNUE {
      {
        // Refresh the accumulator
        auto& accumulator = pos.state()->accumulator;
-        accumulator.state[c] = COMPUTED;
-        Features::IndexList active;
-        Features::HalfKP<Features::Side::kFriend>::AppendActiveIndices(pos, c, &active);
+        accumulator.computed[perspective] = true;
+        IndexList active;
+        FeatureSet::append_active_indices(pos, perspective, active);

  #ifdef VECTOR
-        for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j)
+        for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
        {
          auto biasesTile = reinterpret_cast<const vec_t*>(
-              &biases_[j * kTileHeight]);
-          for (IndexType k = 0; k < kNumRegs; ++k)
+              &biases[j * TileHeight]);
+          for (IndexType k = 0; k < NumRegs; ++k)
            acc[k] = biasesTile[k];

          for (const auto index : active)
          {
-            const IndexType offset = kHalfDimensions * index + j * kTileHeight;
-            auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
+            const IndexType offset = HalfDimensions * index + j * TileHeight;
+            auto column = reinterpret_cast<const vec_t*>(&weights[offset]);

-            for (unsigned k = 0; k < kNumRegs; ++k)
+            for (unsigned k = 0; k < NumRegs; ++k)
              acc[k] = vec_add_16(acc[k], column[k]);
          }

          auto accTile = reinterpret_cast<vec_t*>(
-              &accumulator.accumulation[c][0][j * kTileHeight]);
-          for (unsigned k = 0; k < kNumRegs; k++)
+              &accumulator.accumulation[perspective][j * TileHeight]);
+          for (unsigned k = 0; k < NumRegs; k++)
            vec_store(&accTile[k], acc[k]);
        }

+        for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j)
+        {
+          for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+            psqt[k] = vec_zero_psqt();
+
+          for (const auto index : active)
+          {
+            const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight;
+            auto columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
+
+            for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+              psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);
+          }
+
+          auto accTilePsqt = reinterpret_cast<psqt_vec_t*>(
+            &accumulator.psqtAccumulation[perspective][j * PsqtTileHeight]);
+          for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+            vec_store_psqt(&accTilePsqt[k], psqt[k]);
+        }
+
  #else
-        std::memcpy(accumulator.accumulation[c][0], biases_,
-            kHalfDimensions * sizeof(BiasType));
+        std::memcpy(accumulator.accumulation[perspective], biases,
+            HalfDimensions * sizeof(BiasType));
+
+        for (std::size_t k = 0; k < PSQTBuckets; ++k)
+          accumulator.psqtAccumulation[perspective][k] = 0;

        for (const auto index : active)
        {
-          const IndexType offset = kHalfDimensions * index;
+          const IndexType offset = HalfDimensions * index;

-          for (IndexType j = 0; j < kHalfDimensions; ++j)
-            accumulator.accumulation[c][0][j] += weights_[offset + j];
+          for (IndexType j = 0; j < HalfDimensions; ++j)
+            accumulator.accumulation[perspective][j] += weights[offset + j];
+
+          for (std::size_t k = 0; k < PSQTBuckets; ++k)
+            accumulator.psqtAccumulation[perspective][k] += psqtWeights[index * PSQTBuckets + k];
        }
  #endif
      }
@ -404,14 +605,11 @@ namespace Eval::NNUE {
  #endif
    }

-    using BiasType = std::int16_t;
-    using WeightType = std::int16_t;
-
-    alignas(kCacheLineSize) BiasType biases_[kHalfDimensions];
-    alignas(kCacheLineSize)
-        WeightType weights_[kHalfDimensions * kInputDimensions];
+    alignas(CacheLineSize) BiasType biases[HalfDimensions];
+    alignas(CacheLineSize) WeightType weights[HalfDimensions * InputDimensions];
+    alignas(CacheLineSize) PSQTWeightType psqtWeights[InputDimensions * PSQTBuckets];
  };

-}  // namespace Eval::NNUE
+}  // namespace Stockfish::Eval::NNUE

 #endif // #ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED
--- a/DroidFishApp/src/main/cpp/stockfish/pawns.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/pawns.cpp
@ -24,6 +24,8 @@
 #include "position.h"
 #include "thread.h"

+namespace Stockfish {
+
 namespace {

  #define V Value
@ -107,8 +109,9 @@ namespace {
    e->blockedCount += popcount(shift<Up>(ourPawns) & (theirPawns | doubleAttackThem));

    // Loop through all pawns of the current color and score each pawn
-    while (b) {
-        s = pop_lsb(&b);
+    while (b)
+    {
+        s = pop_lsb(b);

        assert(pos.piece_on(s) == make_piece(Us, PAWN));

@ -288,7 +291,7 @@ Score Entry::do_king_safety(const Position& pos) {
  if (pawns & attacks_bb<KING>(ksq))
      minPawnDist = 1;
  else while (pawns)
-      minPawnDist = std::min(minPawnDist, distance(ksq, pop_lsb(&pawns)));
+      minPawnDist = std::min(minPawnDist, distance(ksq, pop_lsb(pawns)));

  return shelter - make_score(0, 16 * minPawnDist);
 }
@ -298,3 +301,5 @@ template Score Entry::do_king_safety<WHITE>(const Position& pos);
 template Score Entry::do_king_safety<BLACK>(const Position& pos);

 } // namespace Pawns
+
+} // namespace Stockfish
--- a/DroidFishApp/src/main/cpp/stockfish/pawns.h
+++ b/DroidFishApp/src/main/cpp/stockfish/pawns.h
@ -23,7 +23,7 @@
 #include "position.h"
 #include "types.h"

-namespace Pawns {
+namespace Stockfish::Pawns {

 /// Pawns::Entry contains various information about a pawn structure. A lookup
 /// to the pawn hash table (performed by calling the probe function) returns a
@ -65,6 +65,6 @@ typedef HashTable<Entry, 131072> Table;

 Entry* probe(const Position& pos);

-} // namespace Pawns
+} // namespace Stockfish::Pawns

 #endif // #ifndef PAWNS_H_INCLUDED
--- a/DroidFishApp/src/main/cpp/stockfish/position.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/position.cpp
@ -34,6 +34,8 @@

 using std::string;

+namespace Stockfish {
+
 namespace Zobrist {

  Key psq[PIECE_NB][SQUARE_NB];
@ -71,13 +73,13 @@ std::ostream& operator<<(std::ostream& os, const Position& pos) {
     << std::setfill(' ') << std::dec << "\nCheckers: ";

  for (Bitboard b = pos.checkers(); b; )
-      os << UCI::square(pop_lsb(&b)) << " ";
+      os << UCI::square(pop_lsb(b)) << " ";

  if (    int(Tablebases::MaxCardinality) >= popcount(pos.pieces())
      && !pos.can_castle(ANY_CASTLING))
  {
      StateInfo st;
-      ASSERT_ALIGNED(&st, Eval::NNUE::kCacheLineSize);
+      ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize);

      Position p;
      p.set(pos.fen(), pos.is_chess960(), &st, pos.this_thread());
@ -249,8 +251,6 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th
      set_castling_right(c, rsq);
  }

-  set_state(st);
-
  // 4. En passant square.
  // Ignore if square is invalid or not on side to move relative rank 6.
  bool enpassant = false;
@ -264,24 +264,12 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th
      // a) side to move have a pawn threatening epSquare
      // b) there is an enemy pawn in front of epSquare
      // c) there is no piece on epSquare or behind epSquare
-      // d) enemy pawn didn't block a check of its own color by moving forward
      enpassant = pawn_attacks_bb(~sideToMove, st->epSquare) & pieces(sideToMove, PAWN)
               && (pieces(~sideToMove, PAWN) & (st->epSquare + pawn_push(~sideToMove)))
-               && !(pieces() & (st->epSquare | (st->epSquare + pawn_push(sideToMove))))
-               && (   file_of(square<KING>(sideToMove)) == file_of(st->epSquare)
-                   || !(blockers_for_king(sideToMove) & (st->epSquare + pawn_push(~sideToMove))));
+               && !(pieces() & (st->epSquare | (st->epSquare + pawn_push(sideToMove))));
  }

-  // It's necessary for st->previous to be intialized in this way because legality check relies on its existence
-  if (enpassant) {
-      st->previous = new StateInfo();
-      remove_piece(st->epSquare - pawn_push(sideToMove));
-      st->previous->checkersBB = attackers_to(square<KING>(~sideToMove)) & pieces(sideToMove);
-      st->previous->blockersForKing[WHITE] = slider_blockers(pieces(BLACK), square<KING>(WHITE), st->previous->pinners[BLACK]);
-      st->previous->blockersForKing[BLACK] = slider_blockers(pieces(WHITE), square<KING>(BLACK), st->previous->pinners[WHITE]);
-      put_piece(make_piece(~sideToMove, PAWN), st->epSquare - pawn_push(sideToMove));
-  }
-  else
+  if (!enpassant)
      st->epSquare = SQ_NONE;

  // 5-6. Halfmove clock and fullmove number
@ -293,8 +281,7 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th

  chess960 = isChess960;
  thisThread = th;
-  st->accumulator.state[WHITE] = Eval::NNUE::INIT;
-  st->accumulator.state[BLACK] = Eval::NNUE::INIT;
+  set_state(st);

  assert(pos_is_ok());

@ -318,7 +305,7 @@ void Position::set_castling_right(Color c, Square rfrom) {
  Square kto = relative_square(c, cr & KING_SIDE ? SQ_G1 : SQ_C1);
  Square rto = relative_square(c, cr & KING_SIDE ? SQ_F1 : SQ_D1);

-  castlingPath[cr] =   (between_bb(rfrom, rto) | between_bb(kfrom, kto) | rto | kto)
+  castlingPath[cr] =   (between_bb(rfrom, rto) | between_bb(kfrom, kto))
                    & ~(kfrom | rfrom);
 }

@ -357,7 +344,7 @@ void Position::set_state(StateInfo* si) const {

  for (Bitboard b = pieces(); b; )
  {
-      Square s = pop_lsb(&b);
+      Square s = pop_lsb(b);
      Piece pc = piece_on(s);
      si->key ^= Zobrist::psq[pc][s];

@ -408,7 +395,7 @@ Position& Position::set(const string& code, Color c, StateInfo* si) {
 /// Position::fen() returns a FEN representation of the position. In case of
 /// Chess960 the Shredder-FEN notation is used. This is mainly a debugging function.

-const string Position::fen() const {
+string Position::fen() const {

  int emptyCnt;
  std::ostringstream ss;
@ -474,7 +461,7 @@ Bitboard Position::slider_blockers(Bitboard sliders, Square s, Bitboard& pinners

  while (snipers)
  {
-    Square sniperSq = pop_lsb(&snipers);
+    Square sniperSq = pop_lsb(snipers);
    Bitboard b = between_bb(s, sniperSq) & occupancy;

    if (b && !more_than_one(b))
@ -515,11 +502,23 @@ bool Position::legal(Move m) const {
  assert(color_of(moved_piece(m)) == us);
  assert(piece_on(square<KING>(us)) == make_piece(us, KING));

-  // st->previous->blockersForKing consider capsq as empty.
-  // If pinned, it has to move along the king ray.
+  // En passant captures are a tricky special case. Because they are rather
+  // uncommon, we do it simply by testing whether the king is attacked after
+  // the move is made.
  if (type_of(m) == EN_PASSANT)
-      return   !(st->previous->blockersForKing[sideToMove] & from)
-            || aligned(from, to, square<KING>(us));
+  {
+      Square ksq = square<KING>(us);
+      Square capsq = to - pawn_push(us);
+      Bitboard occupied = (pieces() ^ from ^ capsq) | to;
+
+      assert(to == ep_square());
+      assert(moved_piece(m) == make_piece(us, PAWN));
+      assert(piece_on(capsq) == make_piece(~us, PAWN));
+      assert(piece_on(to) == NO_PIECE);
+
+      return   !(attacks_bb<  ROOK>(ksq, occupied) & pieces(~us, QUEEN, ROOK))
+            && !(attacks_bb<BISHOP>(ksq, occupied) & pieces(~us, QUEEN, BISHOP));
+  }

  // Castling moves generation does not check if the castling path is clear of
  // enemy attacks, it is delayed at a later time: now!
@ -542,7 +541,7 @@ bool Position::legal(Move m) const {
  // If the moving piece is a king, check whether the destination square is
  // attacked by the opponent.
  if (type_of(piece_on(from)) == KING)
-      return !(attackers_to(to) & pieces(~us));
+      return !(attackers_to(to, pieces() ^ from) & pieces(~us));

  // A non-king move is legal if and only if it is not pinned or it
  // is moving along the ray towards or away from the king.
@ -611,8 +610,8 @@ bool Position::pseudo_legal(const Move m) const {
          if (more_than_one(checkers()))
              return false;

-          // Our move must be a blocking evasion or a capture of the checking piece
-          if (!((between_bb(lsb(checkers()), square<KING>(us)) | checkers()) & to))
+          // Our move must be a blocking interposition or a capture of the checking piece
+          if (!(between_bb(square<KING>(us), lsb(checkers())) & to))
              return false;
      }
      // In case of king moves under check we have to remove king so as to catch
@ -652,15 +651,18 @@ bool Position::gives_check(Move m) const {
  case PROMOTION:
      return attacks_bb(promotion_type(m), to, pieces() ^ from) & square<KING>(~sideToMove);

-  // The double-pushed pawn blocked a check? En Passant will remove the blocker.
-  // The only discovery check that wasn't handle is through capsq and fromsq
-  // So the King must be in the same rank as fromsq to consider this possibility.
-  // st->previous->blockersForKing consider capsq as empty.
+  // En passant capture with check? We have already handled the case
+  // of direct checks and ordinary discovered check, so the only case we
+  // need to handle is the unusual case of a discovered check through
+  // the captured pawn.
  case EN_PASSANT:
-      return st->previous->checkersBB
-          || (   rank_of(square<KING>(~sideToMove)) == rank_of(from)
-              && st->previous->blockersForKing[~sideToMove] & from);
+  {
+      Square capsq = make_square(file_of(to), rank_of(from));
+      Bitboard b = (pieces() ^ from ^ capsq) | to;

+      return  (attacks_bb<  ROOK>(square<KING>(~sideToMove), b) & pieces(sideToMove, QUEEN, ROOK))
+            | (attacks_bb<BISHOP>(square<KING>(~sideToMove), b) & pieces(sideToMove, QUEEN, BISHOP));
+  }
  default: //CASTLING
  {
      // Castling is encoded as 'king captures the rook'
@ -700,8 +702,8 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
  ++st->pliesFromNull;

  // Used by NNUE
-  st->accumulator.state[WHITE] = Eval::NNUE::EMPTY;
-  st->accumulator.state[BLACK] = Eval::NNUE::EMPTY;
+  st->accumulator.computed[WHITE] = false;
+  st->accumulator.computed[BLACK] = false;
  auto& dp = st->dirtyPiece;
  dp.dirty_num = 1;

@ -986,7 +988,7 @@ void Position::do_castling(Color us, Square from, Square& to, Square& rfrom, Squ
 }


-/// Position::do(undo)_null_move() is used to do(undo) a "null move": it flips
+/// Position::do_null_move() is used to do a "null move": it flips
 /// the side to move without executing any move on the board.

 void Position::do_null_move(StateInfo& newSt) {
@ -1001,8 +1003,8 @@ void Position::do_null_move(StateInfo& newSt) {

  st->dirtyPiece.dirty_num = 0;
  st->dirtyPiece.piece[0] = NO_PIECE; // Avoid checks in UpdateAccumulator()
-  st->accumulator.state[WHITE] = Eval::NNUE::EMPTY;
-  st->accumulator.state[BLACK] = Eval::NNUE::EMPTY;
+  st->accumulator.computed[WHITE] = false;
+  st->accumulator.computed[BLACK] = false;

  if (st->epSquare != SQ_NONE)
  {
@ -1025,6 +1027,9 @@ void Position::do_null_move(StateInfo& newSt) {
  assert(pos_is_ok());
 }

+
+/// Position::undo_null_move() must be used to undo a "null move"
+
 void Position::undo_null_move() {

  assert(!checkers());
@ -1090,8 +1095,8 @@ bool Position::see_ge(Move m, Value threshold) const {
      if (!(stmAttackers = attackers & pieces(stm)))
          break;

-      // Don't allow pinned pieces to attack (except the king) as long as
-      // there are pinners on their original square.
+      // Don't allow pinned pieces to attack as long as there are
+      // pinners on their original square.
      if (pinners(~stm) & occupied)
          stmAttackers &= ~blockers_for_king(stm);

@ -1107,7 +1112,7 @@ bool Position::see_ge(Move m, Value threshold) const {
          if ((swap = PawnValueMg - swap) < res)
              break;

-          occupied ^= lsb(bb);
+          occupied ^= least_significant_square_bb(bb);
          attackers |= attacks_bb<BISHOP>(to, occupied) & pieces(BISHOP, QUEEN);
      }

@ -1116,7 +1121,7 @@ bool Position::see_ge(Move m, Value threshold) const {
          if ((swap = KnightValueMg - swap) < res)
              break;

-          occupied ^= lsb(bb);
+          occupied ^= least_significant_square_bb(bb);
      }

      else if ((bb = stmAttackers & pieces(BISHOP)))
@ -1124,7 +1129,7 @@ bool Position::see_ge(Move m, Value threshold) const {
          if ((swap = BishopValueMg - swap) < res)
              break;

-          occupied ^= lsb(bb);
+          occupied ^= least_significant_square_bb(bb);
          attackers |= attacks_bb<BISHOP>(to, occupied) & pieces(BISHOP, QUEEN);
      }

@ -1133,7 +1138,7 @@ bool Position::see_ge(Move m, Value threshold) const {
          if ((swap = RookValueMg - swap) < res)
              break;

-          occupied ^= lsb(bb);
+          occupied ^= least_significant_square_bb(bb);
          attackers |= attacks_bb<ROOK>(to, occupied) & pieces(ROOK, QUEEN);
      }

@ -1142,7 +1147,7 @@ bool Position::see_ge(Move m, Value threshold) const {
          if ((swap = QueenValueMg - swap) < res)
              break;

-          occupied ^= lsb(bb);
+          occupied ^= least_significant_square_bb(bb);
          attackers |=  (attacks_bb<BISHOP>(to, occupied) & pieces(BISHOP, QUEEN))
                      | (attacks_bb<ROOK  >(to, occupied) & pieces(ROOK  , QUEEN));
      }
@ -1216,7 +1221,7 @@ bool Position::has_game_cycle(int ply) const {
          Square s1 = from_sq(move);
          Square s2 = to_sq(move);

-          if (!(between_bb(s1, s2) & pieces()))
+          if (!((between_bb(s1, s2) ^ s2) & pieces()))
          {
              if (ply > i)
                  return true;
@ -1313,7 +1318,7 @@ bool Position::pos_is_ok() const {
              assert(0 && "pos_is_ok: Bitboards");

  StateInfo si = *st;
-  ASSERT_ALIGNED(&si, Eval::NNUE::kCacheLineSize);
+  ASSERT_ALIGNED(&si, Eval::NNUE::CacheLineSize);

  set_state(&si);
  if (std::memcmp(&si, st, sizeof(StateInfo)))
@ -1338,3 +1343,5 @@ bool Position::pos_is_ok() const {

  return true;
 }
+
+} // namespace Stockfish
--- a/DroidFishApp/src/main/cpp/stockfish/position.h
+++ b/DroidFishApp/src/main/cpp/stockfish/position.h
@ -31,6 +31,7 @@

 #include "nnue/nnue_accumulator.h"

+namespace Stockfish {

 /// StateInfo struct stores information needed to restore a Position object to
 /// its previous state when we retract a move. Whenever a move is made on the
@ -50,11 +51,11 @@ struct StateInfo {
  // Not copied when making a move (will be recomputed anyhow)
  Key        key;
  Bitboard   checkersBB;
-  Piece      capturedPiece;
  StateInfo* previous;
  Bitboard   blockersForKing[COLOR_NB];
  Bitboard   pinners[COLOR_NB];
  Bitboard   checkSquares[PIECE_TYPE_NB];
+  Piece      capturedPiece;
  int        repetition;

  // Used by NNUE
@ -87,7 +88,7 @@ public:
  // FEN string input/output
  Position& set(const std::string& fenStr, bool isChess960, StateInfo* si, Thread* th);
  Position& set(const std::string& code, Color c, StateInfo* si);
-  const std::string fen() const;
+  std::string fen() const;

  // Position representation
  Bitboard pieces(PieceType pt) const;
@ -114,7 +115,6 @@ public:
  Bitboard blockers_for_king(Color c) const;
  Bitboard check_squares(PieceType pt) const;
  Bitboard pinners(Color c) const;
-  bool is_discovered_check_on_king(Color c, Move m) const;

  // Attacks to/from a given square
  Bitboard attackers_to(Square s) const;
@ -127,7 +127,6 @@ public:
  bool capture(Move m) const;
  bool capture_or_promotion(Move m) const;
  bool gives_check(Move m) const;
-  bool advanced_pawn_push(Move m) const;
  Piece moved_piece(Move m) const;
  Piece captured_piece() const;

@ -172,6 +171,9 @@ public:
  // Used by NNUE
  StateInfo* state() const;

+  void put_piece(Piece pc, Square s);
+  void remove_piece(Square s);
+
 private:
  // Initialization helpers (used while setting up a position)
  void set_castling_right(Color c, Square rfrom);
@ -179,8 +181,6 @@ private:
  void set_check_info(StateInfo* si) const;

  // Other helpers
-  void put_piece(Piece pc, Square s);
-  void remove_piece(Square s);
  void move_piece(Square from, Square to);
  template<bool Do>
  void do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto);
@ -193,11 +193,11 @@ private:
  int castlingRightsMask[SQUARE_NB];
  Square castlingRookSquare[CASTLING_RIGHT_NB];
  Bitboard castlingPath[CASTLING_RIGHT_NB];
+  Thread* thisThread;
+  StateInfo* st;
  int gamePly;
  Color sideToMove;
  Score psq;
-  Thread* thisThread;
-  StateInfo* st;
  bool chess960;
 };

@ -301,19 +301,10 @@ inline Bitboard Position::check_squares(PieceType pt) const {
  return st->checkSquares[pt];
 }

-inline bool Position::is_discovered_check_on_king(Color c, Move m) const {
-  return st->blockersForKing[c] & from_sq(m);
-}
-
 inline bool Position::pawn_passed(Color c, Square s) const {
  return !(pieces(~c, PAWN) & passed_pawn_span(c, s));
 }

-inline bool Position::advanced_pawn_push(Move m) const {
-  return   type_of(moved_piece(m)) == PAWN
-        && relative_rank(sideToMove, to_sq(m)) > RANK_5;
-}
-
 inline int Position::pawns_on_same_color_squares(Color c, Square s) const {
  return popcount(pieces(c, PAWN) & ((DarkSquares & s) ? DarkSquares : ~DarkSquares));
 }
@ -396,7 +387,7 @@ inline void Position::remove_piece(Square s) {
  byTypeBB[ALL_PIECES] ^= s;
  byTypeBB[type_of(pc)] ^= s;
  byColorBB[color_of(pc)] ^= s;
-  /* board[s] = NO_PIECE;  Not needed, overwritten by the capturing one */
+  board[s] = NO_PIECE;
  pieceCount[pc]--;
  pieceCount[make_piece(color_of(pc), ALL_PIECES)]--;
  psq -= PSQT::psq[pc][s];
@ -423,4 +414,6 @@ inline StateInfo* Position::state() const {
  return st;
 }

+} // namespace Stockfish
+
 #endif // #ifndef POSITION_H_INCLUDED
--- a/DroidFishApp/src/main/cpp/stockfish/psqt.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/psqt.cpp
@ -24,6 +24,7 @@
 #include "bitboard.h"
 #include "types.h"

+namespace Stockfish {

 namespace
 {
@ -126,3 +127,5 @@ void init() {
 }

 } // namespace PSQT
+
+} // namespace Stockfish
--- a/DroidFishApp/src/main/cpp/stockfish/psqt.h
+++ b/DroidFishApp/src/main/cpp/stockfish/psqt.h
@ -24,7 +24,7 @@
 #include "types.h"


-namespace PSQT
+namespace Stockfish::PSQT
 {

 extern Score psq[PIECE_NB][SQUARE_NB];
@ -32,7 +32,7 @@ extern Score psq[PIECE_NB][SQUARE_NB];
 // Fill psqt array from a set of internally linked parameters
 extern void init();

-} // namespace PSQT
+} // namespace Stockfish::PSQT


 #endif // PSQT_H_INCLUDED
--- a/DroidFishApp/src/main/cpp/stockfish/search.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/search.cpp
@ -35,6 +35,8 @@
 #include "uci.h"
 #include "syzygy/tbprobe.h"

+namespace Stockfish {
+
 namespace Search {

  LimitsType Limits;
@ -57,14 +59,14 @@ using namespace Search;
 namespace {

  // Different node types, used as a template parameter
-  enum NodeType { NonPV, PV };
+  enum NodeType { NonPV, PV, Root };

  constexpr uint64_t TtHitAverageWindow     = 4096;
  constexpr uint64_t TtHitAverageResolution = 1024;

  // Futility margin
  Value futility_margin(Depth d, bool improving) {
-    return Value(234 * (d - improving));
+    return Value(214 * (d - improving));
  }

  // Reductions lookup table, initialized at startup
@ -72,7 +74,7 @@ namespace {

  Depth reduction(bool i, Depth d, int mn) {
    int r = Reductions[d] * Reductions[mn];
-    return (r + 503) / 1024 + (!i && r > 915);
+    return (r + 534) / 1024 + (!i && r > 904);
  }

  constexpr int futility_move_count(bool improving, Depth depth) {
@ -81,7 +83,7 @@ namespace {

  // History and stats update bonus, based on depth
  int stat_bonus(Depth d) {
-    return d > 14 ? 66 : 6 * d * d + 231 * d - 206;
+    return d > 14 ? 73 : 6 * d * d + 229 * d - 215;
  }

  // Add a small random component to draw evaluations to avoid 3-fold blindness
@ -100,53 +102,10 @@ namespace {
    Move best = MOVE_NONE;
  };

-  // Breadcrumbs are used to mark nodes as being searched by a given thread
-  struct Breadcrumb {
-    std::atomic<Thread*> thread;
-    std::atomic<Key> key;
-  };
-  std::array<Breadcrumb, 1024> breadcrumbs;
-
-  // ThreadHolding structure keeps track of which thread left breadcrumbs at the given
-  // node for potential reductions. A free node will be marked upon entering the moves
-  // loop by the constructor, and unmarked upon leaving that loop by the destructor.
-  struct ThreadHolding {
-    explicit ThreadHolding(Thread* thisThread, Key posKey, int ply) {
-       location = ply < 8 ? &breadcrumbs[posKey & (breadcrumbs.size() - 1)] : nullptr;
-       otherThread = false;
-       owning = false;
-       if (location)
-       {
-          // See if another already marked this location, if not, mark it ourselves
-          Thread* tmp = (*location).thread.load(std::memory_order_relaxed);
-          if (tmp == nullptr)
-          {
-              (*location).thread.store(thisThread, std::memory_order_relaxed);
-              (*location).key.store(posKey, std::memory_order_relaxed);
-              owning = true;
-          }
-          else if (   tmp != thisThread
-                   && (*location).key.load(std::memory_order_relaxed) == posKey)
-              otherThread = true;
-       }
-    }
-
-    ~ThreadHolding() {
-       if (owning) // Free the marked location
-           (*location).thread.store(nullptr, std::memory_order_relaxed);
-    }
-
-    bool marked() { return otherThread; }
-
-    private:
-    Breadcrumb* location;
-    bool otherThread, owning;
-  };
-
-  template <NodeType NT>
+  template <NodeType nodeType>
  Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode);

-  template <NodeType NT>
+  template <NodeType nodeType>
  Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth = 0);

  Value value_to_tt(Value v, int ply);
@ -163,7 +122,7 @@ namespace {
  uint64_t perft(Position& pos, Depth depth) {

    StateInfo st;
-    ASSERT_ALIGNED(&st, Eval::NNUE::kCacheLineSize);
+    ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize);

    uint64_t cnt, nodes = 0;
    const bool leaf = (depth == 2);
@ -193,7 +152,7 @@ namespace {
 void Search::init() {

  for (int i = 1; i < MAX_MOVES; ++i)
-      Reductions[i] = int((21.3 + 2 * std::log(Threads.size())) * std::log(i + 0.25 * std::log(i)));
+      Reductions[i] = int(21.9 * std::log(i));
 }


@ -294,7 +253,7 @@ void Thread::search() {
  // To allow access to (ss-7) up to (ss+2), the stack must be oversized.
  // The former is needed to allow update_continuation_histories(ss-1, ...),
  // which accesses its argument at ss-6, also near the root.
-  // The latter is needed for statScores and killer initialization.
+  // The latter is needed for statScore and killer initialization.
  Stack stack[MAX_PLY+10], *ss = stack+7;
  Move  pv[MAX_PLY+1];
  Value bestValue, alpha, beta, delta;
@ -309,6 +268,9 @@ void Thread::search() {
  for (int i = 7; i > 0; i--)
      (ss-i)->continuationHistory = &this->continuationHistory[0][0][NO_PIECE][0]; // Use as a sentinel

+  for (int i = 0; i <= MAX_PLY + 2; ++i)
+      (ss+i)->ply = i;
+
  ss->pv = pv;

  bestValue = delta = alpha = -VALUE_INFINITE;
@ -350,19 +312,7 @@ void Thread::search() {
  multiPV = std::min(multiPV, rootMoves.size());
  ttHitAverage = TtHitAverageWindow * TtHitAverageResolution / 2;

-  int ct = int(Options["Contempt"]) * PawnValueEg / 100; // From centipawns
-
-  // In analysis mode, adjust contempt in accordance with user preference
-  if (Limits.infinite || Options["UCI_AnalyseMode"])
-      ct =  Options["Analysis Contempt"] == "Off"  ? 0
-          : Options["Analysis Contempt"] == "Both" ? ct
-          : Options["Analysis Contempt"] == "White" && us == BLACK ? -ct
-          : Options["Analysis Contempt"] == "Black" && us == WHITE ? -ct
-          : ct;
-
-  // Evaluation score is from the white point of view
-  contempt = (us == WHITE ?  make_score(ct, ct / 2)
-                          : -make_score(ct, ct / 2));
+  trend = SCORE_ZERO;

  int searchAgainCounter = 0;

@ -408,21 +358,21 @@ void Thread::search() {
              alpha = std::max(prev - delta,-VALUE_INFINITE);
              beta  = std::min(prev + delta, VALUE_INFINITE);

-              // Adjust contempt based on root move's previousScore (dynamic contempt)
-              int dct = ct + (113 - ct / 2) * prev / (abs(prev) + 147);
+              // Adjust trend based on root move's previousScore (dynamic contempt)
+              int tr = 113 * prev / (abs(prev) + 147);

-              contempt = (us == WHITE ?  make_score(dct, dct / 2)
-                                      : -make_score(dct, dct / 2));
+              trend = (us == WHITE ?  make_score(tr, tr / 2)
+                                   : -make_score(tr, tr / 2));
          }

          // Start with a small aspiration window and, in the case of a fail
          // high/low, re-search with a bigger window until we don't fail
          // high/low anymore.
-          failedHighCnt = 0;
+          int failedHighCnt = 0;
          while (true)
          {
              Depth adjustedDepth = std::max(1, rootDepth - failedHighCnt - searchAgainCounter);
-              bestValue = ::search<PV>(rootPos, ss, alpha, beta, adjustedDepth, false);
+              bestValue = Stockfish::search<Root>(rootPos, ss, alpha, beta, adjustedDepth, false);

              // Bring the best move to the front. It is critical that sorting
              // is done with a stable algorithm because all the values but the
@ -518,8 +468,8 @@ void Thread::search() {
              totBestMoveChanges += th->bestMoveChanges;
              th->bestMoveChanges = 0;
          }
-          double bestMoveInstability = 1 + 2 * totBestMoveChanges / Threads.size();
-
+          double bestMoveInstability = 1.073 + std::max(1.0, 2.25 - 9.9 / rootDepth)
+                                              * totBestMoveChanges / Threads.size();
          double totalTime = Time.optimum() * fallingEval * reduction * bestMoveInstability;

          // Cap used time in case of a single legal move for a better viewer experience in tournaments
@ -565,18 +515,18 @@ namespace {

  // search<>() is the main search function for both PV and non-PV nodes

-  template <NodeType NT>
+  template <NodeType nodeType>
  Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode) {

-    constexpr bool PvNode = NT == PV;
-    const bool rootNode = PvNode && ss->ply == 0;
+    constexpr bool PvNode = nodeType != NonPV;
+    constexpr bool rootNode = nodeType == Root;
    const Depth maxNextDepth = rootNode ? depth : depth + 1;

    // Check if we have an upcoming move which draws by repetition, or
    // if the opponent had an alternative move earlier to this position.
-    if (   pos.rule50_count() >= 3
+    if (   !rootNode
+        && pos.rule50_count() >= 3
        && alpha < VALUE_DRAW
-        && !rootNode
        && pos.has_game_cycle(ss->ply))
    {
        alpha = value_draw(pos.this_thread());
@ -586,7 +536,7 @@ namespace {

    // Dive into quiescence search when the depth reaches zero
    if (depth <= 0)
-        return qsearch<NT>(pos, ss, alpha, beta);
+        return qsearch<PvNode ? PV : NonPV>(pos, ss, alpha, beta);

    assert(-VALUE_INFINITE <= alpha && alpha < beta && beta <= VALUE_INFINITE);
    assert(PvNode || (alpha == beta - 1));
@ -595,14 +545,14 @@ namespace {

    Move pv[MAX_PLY+1], capturesSearched[32], quietsSearched[64];
    StateInfo st;
-    ASSERT_ALIGNED(&st, Eval::NNUE::kCacheLineSize);
+    ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize);

    TTEntry* tte;
    Key posKey;
    Move ttMove, move, excludedMove, bestMove;
    Depth extension, newDepth;
    Value bestValue, value, ttValue, eval, maxValue, probCutBeta;
-    bool formerPv, givesCheck, improving, didLMR, priorCapture;
+    bool givesCheck, improving, didLMR, priorCapture;
    bool captureOrPromotion, doFullDepthSearch, moveCountPruning,
         ttCapture, singularQuietLMR;
    Piece movedPiece;
@ -610,12 +560,12 @@ namespace {

    // Step 1. Initialize node
    Thread* thisThread = pos.this_thread();
-    ss->inCheck = pos.checkers();
-    priorCapture = pos.captured_piece();
-    Color us = pos.side_to_move();
-    moveCount = captureCount = quietCount = ss->moveCount = 0;
-    bestValue = -VALUE_INFINITE;
-    maxValue = VALUE_INFINITE;
+    ss->inCheck        = pos.checkers();
+    priorCapture       = pos.captured_piece();
+    Color us           = pos.side_to_move();
+    moveCount          = captureCount = quietCount = ss->moveCount = 0;
+    bestValue          = -VALUE_INFINITE;
+    maxValue           = VALUE_INFINITE;

    // Check for the available remaining time
    if (thisThread == Threads.main())
@ -648,11 +598,11 @@ namespace {

    assert(0 <= ss->ply && ss->ply < MAX_PLY);

-    (ss+1)->ply = ss->ply + 1;
-    (ss+1)->ttPv = false;
+    (ss+1)->ttPv         = false;
    (ss+1)->excludedMove = bestMove = MOVE_NONE;
-    (ss+2)->killers[0] = (ss+2)->killers[1] = MOVE_NONE;
-    Square prevSq = to_sq((ss-1)->currentMove);
+    (ss+2)->killers[0]   = (ss+2)->killers[1] = MOVE_NONE;
+    ss->doubleExtensions = (ss-1)->doubleExtensions;
+    Square prevSq        = to_sq((ss-1)->currentMove);

    // Initialize statScore to zero for the grandchildren of the current position.
    // So statScore is shared between all grandchildren and only the first grandchild
@ -673,7 +623,6 @@ namespace {
            : ss->ttHit    ? tte->move() : MOVE_NONE;
    if (!excludedMove)
        ss->ttPv = PvNode || (ss->ttHit && tte->is_pv());
-    formerPv = ss->ttPv && !PvNode;

    // Update low ply history for previous move if we are near root and position is or has been in PV
    if (   ss->ttPv
@ -808,7 +757,7 @@ namespace {
        if ((ss-1)->currentMove != MOVE_NULL)
            ss->staticEval = eval = evaluate(pos);
        else
-            ss->staticEval = eval = -(ss-1)->staticEval + 2 * Tempo;
+            ss->staticEval = eval = -(ss-1)->staticEval;

        // Save static evaluation into transposition table
        tte->save(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval);
@ -817,7 +766,7 @@ namespace {
    // Use static evaluation difference to improve quiet move ordering
    if (is_ok((ss-1)->currentMove) && !(ss-1)->inCheck && !priorCapture)
    {
-        int bonus = std::clamp(-depth * 4 * int((ss-1)->staticEval + ss->staticEval - 2 * Tempo), -1000, 1000);
+        int bonus = std::clamp(-depth * 4 * int((ss-1)->staticEval + ss->staticEval), -1000, 1000);
        thisThread->mainHistory[~us][from_to((ss-1)->currentMove)] << bonus;
    }

@ -839,10 +788,10 @@ namespace {
    // Step 8. Null move search with verification search (~40 Elo)
    if (   !PvNode
        && (ss-1)->currentMove != MOVE_NULL
-        && (ss-1)->statScore < 22661
+        && (ss-1)->statScore < 23767
        &&  eval >= beta
        &&  eval >= ss->staticEval
-        &&  ss->staticEval >= beta - 24 * depth - 34 * improving + 162 * ss->ttPv + 159
+        &&  ss->staticEval >= beta - 20 * depth - 22 * improving + 168 * ss->ttPv + 159
        && !excludedMove
        &&  pos.non_pawn_material(us)
        && (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor))
@ -850,7 +799,7 @@ namespace {
        assert(eval - beta >= 0);

        // Null move dynamic reduction based on depth and value
-        Depth R = (1062 + 68 * depth) / 256 + std::min(int(eval - beta) / 190, 3);
+        Depth R = (1090 + 81 * depth) / 256 + std::min(int(eval - beta) / 205, 3);

        ss->currentMove = MOVE_NULL;
        ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0];
@ -888,7 +837,7 @@ namespace {

    probCutBeta = beta + 209 - 44 * improving;

-    // Step 9. ProbCut (~10 Elo)
+    // Step 9. ProbCut (~4 Elo)
    // If we have a good enough capture and a reduced search returns a value
    // much above beta, we can (almost) safely prune the previous move.
    if (   !PvNode
@ -903,17 +852,8 @@ namespace {
             && ttValue != VALUE_NONE
             && ttValue < probCutBeta))
    {
-        // if ttMove is a capture and value from transposition table is good enough produce probCut
-        // cutoff without digging into actual probCut search
-        if (   ss->ttHit
-            && tte->depth() >= depth - 3
-            && ttValue != VALUE_NONE
-            && ttValue >= probCutBeta
-            && ttMove
-            && pos.capture_or_promotion(ttMove))
-            return probCutBeta;
-
        assert(probCutBeta < VALUE_INFINITE);
+
        MovePicker mp(pos, ttMove, probCutBeta - ss->staticEval, &captureHistory);
        int probCutCount = 0;
        bool ttPv = ss->ttPv;
@ -969,6 +909,23 @@ namespace {

 moves_loop: // When in check, search starts from here

+    ttCapture = ttMove && pos.capture_or_promotion(ttMove);
+
+    // Step 11. A small Probcut idea, when we are in check
+    probCutBeta = beta + 409;
+    if (   ss->inCheck
+        && !PvNode
+        && depth >= 4
+        && ttCapture
+        && (tte->bound() & BOUND_LOWER)
+        && tte->depth() >= depth - 3
+        && ttValue >= probCutBeta
+        && abs(ttValue) <= VALUE_KNOWN_WIN
+        && abs(beta) <= VALUE_KNOWN_WIN
+       )
+        return probCutBeta;
+
+
    const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory,
                                          nullptr                   , (ss-4)->continuationHistory,
                                          nullptr                   , (ss-6)->continuationHistory };
@ -985,12 +942,16 @@ moves_loop: // When in check, search starts from here

    value = bestValue;
    singularQuietLMR = moveCountPruning = false;
-    ttCapture = ttMove && pos.capture_or_promotion(ttMove);
+    bool doubleExtension = false;

-    // Mark this node as being searched
-    ThreadHolding th(thisThread, posKey, ss->ply);
+    // Indicate PvNodes that will probably fail low if the node was searched
+    // at a depth equal or greater than the current depth, and the result of this search was a fail low.
+    bool likelyFailLow =    PvNode
+                         && ttMove
+                         && (tte->bound() & BOUND_UPPER)
+                         && tte->depth() >= depth;

-    // Step 11. Loop through all pseudo-legal moves until no moves remain
+    // Step 12. Loop through all pseudo-legal moves until no moves remain
    // or a beta cutoff occurs.
    while ((move = mp.next_move(moveCountPruning)) != MOVE_NONE)
    {
@ -1025,18 +986,10 @@ moves_loop: // When in check, search starts from here
      movedPiece = pos.moved_piece(move);
      givesCheck = pos.gives_check(move);

-      // Indicate PvNodes that will probably fail low if node was searched with non-PV search
-      // at depth equal or greater to current depth and result of this search was far below alpha
-      bool likelyFailLow =    PvNode
-                           && ttMove
-                           && (tte->bound() & BOUND_UPPER)
-                           && ttValue < alpha + 200 + 100 * depth
-                           && tte->depth() >= depth;
-
      // Calculate new depth for this move
      newDepth = depth - 1;

-      // Step 12. Pruning at shallow depth (~200 Elo)
+      // Step 13. Pruning at shallow depth (~200 Elo)
      if (  !rootNode
          && pos.non_pawn_material(us)
          && bestValue > VALUE_TB_LOSS_IN_MAX_PLY)
@ -1062,8 +1015,8 @@ moves_loop: // When in check, search starts from here
          }
          else
          {
-              // Countermoves based pruning (~20 Elo)
-              if (   lmrDepth < 4 + ((ss-1)->statScore > 0 || (ss-1)->moveCount == 1)
+              // Continuation history based pruning (~20 Elo)
+              if (   lmrDepth < 5
                  && (*contHist[0])[movedPiece][to_sq(move)] < CounterMovePruneThreshold
                  && (*contHist[1])[movedPiece][to_sq(move)] < CounterMovePruneThreshold)
                  continue;
@ -1075,7 +1028,7 @@ moves_loop: // When in check, search starts from here
                  &&  (*contHist[0])[movedPiece][to_sq(move)]
                    + (*contHist[1])[movedPiece][to_sq(move)]
                    + (*contHist[3])[movedPiece][to_sq(move)]
-                    + (*contHist[5])[movedPiece][to_sq(move)] / 3 < 26237)
+                    + (*contHist[5])[movedPiece][to_sq(move)] / 3 < 28255)
                  continue;

              // Prune moves with negative SEE (~20 Elo)
@ -1084,24 +1037,25 @@ moves_loop: // When in check, search starts from here
          }
      }

-      // Step 13. Extensions (~75 Elo)
+      // Step 14. Extensions (~75 Elo)

      // Singular extension search (~70 Elo). If all moves but one fail low on a
      // search of (alpha-s, beta-s), and just one fails high on (alpha, beta),
      // then that move is singular and should be extended. To verify this we do
      // a reduced search on all the other moves but the ttMove and if the
      // result is lower than ttValue minus a margin, then we will extend the ttMove.
-      if (    depth >= 7
+      if (   !rootNode
+          &&  depth >= 7
          &&  move == ttMove
-          && !rootNode
          && !excludedMove // Avoid recursive singular search
       /* &&  ttValue != VALUE_NONE Already implicit in the next condition */
          &&  abs(ttValue) < VALUE_KNOWN_WIN
          && (tte->bound() & BOUND_LOWER)
          &&  tte->depth() >= depth - 3)
      {
-          Value singularBeta = ttValue - ((formerPv + 4) * depth) / 2;
-          Depth singularDepth = (depth - 1 + 3 * formerPv) / 2;
+          Value singularBeta = ttValue - 2 * depth;
+          Depth singularDepth = (depth - 1) / 2;
+
          ss->excludedMove = move;
          value = search<NonPV>(pos, ss, singularBeta - 1, singularBeta, singularDepth, cutNode);
          ss->excludedMove = MOVE_NONE;
@ -1110,6 +1064,15 @@ moves_loop: // When in check, search starts from here
          {
              extension = 1;
              singularQuietLMR = !ttCapture;
+
+              // Avoid search explosion by limiting the number of double extensions to at most 3
+              if (   !PvNode
+                  && value < singularBeta - 93
+                  && ss->doubleExtensions < 3)
+              {
+                  extension = 2;
+                  doubleExtension = true;
+              }
          }

          // Multi-cut pruning
@ -1132,19 +1095,14 @@ moves_loop: // When in check, search starts from here
                  return beta;
          }
      }
-
-      // Check extension (~2 Elo)
-      else if (    givesCheck
-               && (pos.is_discovered_check_on_king(~us, move) || pos.see_ge(move)))
-          extension = 1;
-
-      // Last captures extension
-      else if (   PieceValue[EG][pos.captured_piece()] > PawnValueEg
-               && pos.non_pawn_material() <= 2 * RookValueMg)
+      else if (   givesCheck
+               && depth > 6
+               && abs(ss->staticEval) > Value(100))
          extension = 1;

      // Add extension to new depth
      newDepth += extension;
+      ss->doubleExtensions = (ss-1)->doubleExtensions + (extension == 2);

      // Speculative prefetch as early as possible
      prefetch(TT.first_entry(pos.key_after(move)));
@ -1156,117 +1114,87 @@ moves_loop: // When in check, search starts from here
                                                                [movedPiece]
                                                                [to_sq(move)];

-      // Step 14. Make the move
+      // Step 15. Make the move
      pos.do_move(move, st, givesCheck);

-      // Step 15. Reduced depth search (LMR, ~200 Elo). If the move fails high it will be
-      // re-searched at full depth.
+      // Step 16. Late moves reduction / extension (LMR, ~200 Elo)
+      // We use various heuristics for the sons of a node after the first son has
+      // been searched. In general we would like to reduce them, but there are many
+      // cases where we extend a son if it has good chances to be "interesting".
      if (    depth >= 3
          &&  moveCount > 1 + 2 * rootNode
          && (  !captureOrPromotion
-              || moveCountPruning
-              || ss->staticEval + PieceValue[EG][pos.captured_piece()] <= alpha
-              || cutNode
-              || (!PvNode && !formerPv && captureHistory[movedPiece][to_sq(move)][type_of(pos.captured_piece())] < 4506)
-              || thisThread->ttHitAverage < 432 * TtHitAverageResolution * TtHitAverageWindow / 1024))
+              || (cutNode && (ss-1)->moveCount > 1)
+              || !ss->ttPv)
+          && (!PvNode || ss->ply > 1 || thisThread->id() % 4 != 3))
      {
          Depth r = reduction(improving, depth, moveCount);

-          // Decrease reduction if the ttHit running average is large
+          if (PvNode)
+              r--;
+
+          // Decrease reduction if the ttHit running average is large (~0 Elo)
          if (thisThread->ttHitAverage > 537 * TtHitAverageResolution * TtHitAverageWindow / 1024)
              r--;

-          // Increase reduction if other threads are searching this position
-          if (th.marked())
-              r++;
-
          // Decrease reduction if position is or has been on the PV
-          // and node is not likely to fail low. (~10 Elo)
-          if (ss->ttPv && !likelyFailLow)
+          // and node is not likely to fail low. (~3 Elo)
+          if (   ss->ttPv
+              && !likelyFailLow)
              r -= 2;

          // Increase reduction at root and non-PV nodes when the best move does not change frequently
-          if ((rootNode || !PvNode) && thisThread->rootDepth > 10 && thisThread->bestMoveChanges <= 2)
+          if (   (rootNode || !PvNode)
+              && thisThread->bestMoveChanges <= 2)
              r++;

-          // More reductions for late moves if position was not in previous PV
-          if (moveCountPruning && !formerPv)
-              r++;
-
-          // Decrease reduction if opponent's move count is high (~5 Elo)
+          // Decrease reduction if opponent's move count is high (~1 Elo)
          if ((ss-1)->moveCount > 13)
              r--;

-          // Decrease reduction if ttMove has been singularly extended (~3 Elo)
+          // Decrease reduction if ttMove has been singularly extended (~1 Elo)
          if (singularQuietLMR)
              r--;

-          if (captureOrPromotion)
+          // Increase reduction for cut nodes (~3 Elo)
+          if (cutNode)
+              r += 1 + !captureOrPromotion;
+
+          if (!captureOrPromotion)
          {
-              // Unless giving check, this capture is likely bad
-              if (   !givesCheck
-                  && ss->staticEval + PieceValue[EG][pos.captured_piece()] + 210 * depth <= alpha)
-                  r++;
-          }
-          else
-          {
-              // Increase reduction if ttMove is a capture (~5 Elo)
+              // Increase reduction if ttMove is a capture (~3 Elo)
              if (ttCapture)
                  r++;

-              // Increase reduction at root if failing high
-              r += rootNode ? thisThread->failedHighCnt * thisThread->failedHighCnt * moveCount / 512 : 0;
-
-              // Increase reduction for cut nodes (~10 Elo)
-              if (cutNode)
-                  r += 2;
-
-              // Decrease reduction for moves that escape a capture. Filter out
-              // castling moves, because they are coded as "king captures rook" and
-              // hence break make_move(). (~2 Elo)
-              else if (    type_of(move) == NORMAL
-                       && !pos.see_ge(reverse_move(move)))
-                  r -= 2 + ss->ttPv - (type_of(movedPiece) == PAWN);
-
              ss->statScore =  thisThread->mainHistory[us][from_to(move)]
                             + (*contHist[0])[movedPiece][to_sq(move)]
                             + (*contHist[1])[movedPiece][to_sq(move)]
                             + (*contHist[3])[movedPiece][to_sq(move)]
-                             - 5337;
-
-              // Decrease/increase reduction by comparing opponent's stat score (~10 Elo)
-              if (ss->statScore >= -89 && (ss-1)->statScore < -116)
-                  r--;
-
-              else if ((ss-1)->statScore >= -112 && ss->statScore < -100)
-                  r++;
+                             - 4923;

              // Decrease/increase reduction for moves with a good/bad history (~30 Elo)
-              // If we are not in check use statScore, if we are in check
-              // use sum of main history and first continuation history with an offset
-              if (ss->inCheck)
-                  r -= (thisThread->mainHistory[us][from_to(move)]
-                     + (*contHist[0])[movedPiece][to_sq(move)] - 4341) / 16384;
-              else
-                  r -= ss->statScore / 14382;
+              if (!ss->inCheck)
+                  r -= ss->statScore / 14721;
          }

-          Depth d = std::clamp(newDepth - r, 1, newDepth);
+          // In general we want to cap the LMR depth search at newDepth. But if
+          // reductions are really negative and movecount is low, we allow this move
+          // to be searched deeper than the first move, unless ttMove was extended by 2.
+          Depth d = std::clamp(newDepth - r, 1, newDepth + (r < -1 && moveCount <= 5 && !doubleExtension));

          value = -search<NonPV>(pos, ss+1, -(alpha+1), -alpha, d, true);

-          doFullDepthSearch = value > alpha && d != newDepth;
-
+          // If the son is reduced and fails high it will be re-searched at full depth
+          doFullDepthSearch = value > alpha && d < newDepth;
          didLMR = true;
      }
      else
      {
          doFullDepthSearch = !PvNode || moveCount > 1;
-
          didLMR = false;
      }

-      // Step 16. Full depth search when LMR is skipped or fails high
+      // Step 17. Full depth search when LMR is skipped or fails high
      if (doFullDepthSearch)
      {
          value = -search<NonPV>(pos, ss+1, -(alpha+1), -alpha, newDepth, !cutNode);
@ -1293,12 +1221,12 @@ moves_loop: // When in check, search starts from here
                              std::min(maxNextDepth, newDepth), false);
      }

-      // Step 17. Undo move
+      // Step 18. Undo move
      pos.undo_move(move);

      assert(value > -VALUE_INFINITE && value < VALUE_INFINITE);

-      // Step 18. Check for a new best move
+      // Step 19. Check for a new best move
      // Finished searching the move. If a stop occurred, the return value of
      // the search cannot be trusted, and we return immediately without
      // updating best move, PV and TT.
@ -1350,7 +1278,6 @@ moves_loop: // When in check, search starts from here
              else
              {
                  assert(value >= beta); // Fail high
-                  ss->statScore = 0;
                  break;
              }
          }
@ -1375,7 +1302,7 @@ moves_loop: // When in check, search starts from here
        return VALUE_DRAW;
    */

-    // Step 19. Check for mate and stalemate
+    // Step 20. Check for mate and stalemate
    // All legal moves have been searched and if there are no legal moves, it
    // must be a mate or a stalemate. If we are in a singular extension search then
    // return a fail low score.
@ -1383,8 +1310,9 @@ moves_loop: // When in check, search starts from here
    assert(moveCount || !ss->inCheck || excludedMove || !MoveList<LEGAL>(pos).size());

    if (!moveCount)
-        bestValue = excludedMove ? alpha
-                   :     ss->inCheck ? mated_in(ss->ply) : VALUE_DRAW;
+        bestValue = excludedMove ? alpha :
+                    ss->inCheck  ? mated_in(ss->ply)
+                                 : VALUE_DRAW;

    // If there is a move which produces search value greater than alpha we update stats of searched moves
    else if (bestMove)
@ -1423,10 +1351,11 @@ moves_loop: // When in check, search starts from here

  // qsearch() is the quiescence search function, which is called by the main search
  // function with zero depth, or recursively with further decreasing depth per call.
-  template <NodeType NT>
+  template <NodeType nodeType>
  Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth) {

-    constexpr bool PvNode = NT == PV;
+    static_assert(nodeType != Root);
+    constexpr bool PvNode = nodeType == PV;

    assert(alpha >= -VALUE_INFINITE && alpha < beta && beta <= VALUE_INFINITE);
    assert(PvNode || (alpha == beta - 1));
@ -1434,7 +1363,7 @@ moves_loop: // When in check, search starts from here

    Move pv[MAX_PLY+1];
    StateInfo st;
-    ASSERT_ALIGNED(&st, Eval::NNUE::kCacheLineSize);
+    ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize);

    TTEntry* tte;
    Key posKey;
@ -1452,7 +1381,6 @@ moves_loop: // When in check, search starts from here
    }

    Thread* thisThread = pos.this_thread();
-    (ss+1)->ply = ss->ply + 1;
    bestMove = MOVE_NONE;
    ss->inCheck = pos.checkers();
    moveCount = 0;
@ -1508,7 +1436,7 @@ moves_loop: // When in check, search starts from here
            // and addition of two tempos
            ss->staticEval = bestValue =
            (ss-1)->currentMove != MOVE_NULL ? evaluate(pos)
-                                             : -(ss-1)->staticEval + 2 * Tempo;
+                                             : -(ss-1)->staticEval;

        // Stand pat. Return immediately if static value is at least beta
        if (bestValue >= beta)
@ -1533,7 +1461,7 @@ moves_loop: // When in check, search starts from here

    // Initialize a MovePicker object for the current position, and prepare
    // to search the moves. Because the depth is <= 0 here, only captures,
-    // queen and checking knight promotions, and other checks(only if depth >= DEPTH_QS_CHECKS)
+    // queen promotions, and other checks (only if depth >= DEPTH_QS_CHECKS)
    // will be generated.
    MovePicker mp(pos, ttMove, depth, &thisThread->mainHistory,
                                      &thisThread->captureHistory,
@ -1550,15 +1478,13 @@ moves_loop: // When in check, search starts from here

      moveCount++;

-      // Futility pruning
+      // Futility pruning and moveCount pruning
      if (    bestValue > VALUE_TB_LOSS_IN_MAX_PLY
          && !givesCheck
          &&  futilityBase > -VALUE_KNOWN_WIN
-          && !pos.advanced_pawn_push(move))
+          &&  type_of(move) != PROMOTION)
      {
-          assert(type_of(move) != EN_PASSANT); // Due to !pos.advanced_pawn_push

-          // moveCount pruning
          if (moveCount > 2)
              continue;

@ -1598,7 +1524,7 @@ moves_loop: // When in check, search starts from here
                                                                [pos.moved_piece(move)]
                                                                [to_sq(move)];

-      // CounterMove based pruning
+      // Continuation history based pruning
      if (  !captureOrPromotion
          && bestValue > VALUE_TB_LOSS_IN_MAX_PLY
          && (*contHist[0])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold
@ -1607,7 +1533,7 @@ moves_loop: // When in check, search starts from here

      // Make and search the move
      pos.do_move(move, st, givesCheck);
-      value = -qsearch<NT>(pos, ss+1, -beta, -alpha, depth - 1);
+      value = -qsearch<nodeType>(pos, ss+1, -beta, -alpha, depth - 1);
      pos.undo_move(move);

      assert(value > -VALUE_INFINITE && value < VALUE_INFINITE);
@ -1942,7 +1868,7 @@ string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) {
 bool RootMove::extract_ponder_from_tt(Position& pos) {

    StateInfo st;
-    ASSERT_ALIGNED(&st, Eval::NNUE::kCacheLineSize);
+    ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize);

    bool ttHit;

@ -2011,3 +1937,5 @@ void Tablebases::rank_root_moves(Position& pos, Search::RootMoves& rootMoves) {
            m.tbRank = 0;
    }
 }
+
+} // namespace Stockfish
--- a/DroidFishApp/src/main/cpp/stockfish/search.h
+++ b/DroidFishApp/src/main/cpp/stockfish/search.h
@ -25,6 +25,8 @@
 #include "movepick.h"
 #include "types.h"

+namespace Stockfish {
+
 class Position;

 namespace Search {
@ -50,6 +52,7 @@ struct Stack {
  bool inCheck;
  bool ttPv;
  bool ttHit;
+  int doubleExtensions;
 };


@ -106,4 +109,6 @@ void clear();

 } // namespace Search

+} // namespace Stockfish
+
 #endif // #ifndef SEARCH_H_INCLUDED
--- a/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.cpp
@ -50,9 +50,11 @@
 #include <windows.h>
 #endif

-using namespace Tablebases;
+using namespace Stockfish::Tablebases;

-int Tablebases::MaxCardinality;
+int Stockfish::Tablebases::MaxCardinality;
+
+namespace Stockfish {

 namespace {

@ -103,9 +105,6 @@ template<> inline void swap_endian<uint8_t>(uint8_t&) {}

 template<typename T, int LE> T number(void* addr)
 {
-    static const union { uint32_t i; char c[4]; } Le = { 0x01020304 };
-    static const bool IsLittleEndian = (Le.c[0] == 4);
-
    T v;

    if ((uintptr_t)addr & (alignof(T) - 1)) // Unaligned pointer (very rare)
@ -190,7 +189,8 @@ public:
        std::stringstream ss(Paths);
        std::string path;

-        while (std::getline(ss, path, SepChar)) {
+        while (std::getline(ss, path, SepChar))
+        {
            fname = path + "/" + f;
            std::ifstream::open(fname);
            if (is_open())
@ -472,8 +472,6 @@ TBTables TBTables;
 // If the corresponding file exists two new objects TBTable<WDL> and TBTable<DTZ>
 // are created and added to the lists and hash table. Called at init time.
 void TBTables::add(const std::vector<PieceType>& pieces) {
-    if (sizeof(char*) < 8 && pieces.size() >= 6)
-        return; // Not enough address space to support 6-men TB on 32-bit OS

    std::string code;

@ -567,7 +565,8 @@ int decompress_pairs(PairsData* d, uint64_t idx) {
    int buf64Size = 64;
    Sym sym;

-    while (true) {
+    while (true)
+    {
        int len = 0; // This is the symbol length - d->min_sym_len

        // Now get the symbol length. For any symbol s64 of length l right-padded
@ -605,8 +604,8 @@ int decompress_pairs(PairsData* d, uint64_t idx) {
    // We binary-search for our value recursively expanding into the left and
    // right child symbols until we reach a leaf node where symlen[sym] + 1 == 1
    // that will store the value we need.
-    while (d->symlen[sym]) {
-
+    while (d->symlen[sym])
+    {
        Sym left = d->btree[sym].get<LR::Left>();

        // If a symbol contains 36 sub-symbols (d->symlen[sym] + 1 = 36) and
@ -711,7 +710,7 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu

        leadPawns = b = pos.pieces(color_of(pc), PAWN);
        do
-            squares[size++] = pop_lsb(&b) ^ flipSquares;
+            squares[size++] = pop_lsb(b) ^ flipSquares;
        while (b);

        leadPawnsCnt = size;
@ -731,7 +730,7 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu
    // directly map them to the correct color and square.
    b = pos.pieces() ^ leadPawns;
    do {
-        Square s = pop_lsb(&b);
+        Square s = pop_lsb(b);
        squares[size] = s ^ flipSquares;
        pieces[size++] = Piece(pos.piece_on(s) ^ flipColor);
    } while (b);
@ -1537,6 +1536,14 @@ bool Tablebases::root_probe(Position& pos, Search::RootMoves& rootMoves) {
            WDLScore wdl = -probe_wdl(pos, &result);
            dtz = dtz_before_zeroing(wdl);
        }
+        else if (pos.is_draw(1))
+        {
+            // In case a root move leads to a draw by repetition or
+            // 50-move rule, we set dtz to zero. Note: since we are
+            // only 1 ply from the root, this must be a true 3-fold
+            // repetition inside the game history.
+            dtz = 0;
+        }
        else
        {
            // Otherwise, take dtz for the new position and correct by 1 ply
@ -1587,6 +1594,7 @@ bool Tablebases::root_probe_wdl(Position& pos, Search::RootMoves& rootMoves) {

    ProbeState result;
    StateInfo st;
+    WDLScore wdl;

    bool rule50 = Options["Syzygy50MoveRule"];

@ -1595,7 +1603,10 @@ bool Tablebases::root_probe_wdl(Position& pos, Search::RootMoves& rootMoves) {
    {
        pos.do_move(m.pv[0], st);

-        WDLScore wdl = -probe_wdl(pos, &result);
+        if (pos.is_draw(1))
+            wdl = WDLDraw;
+        else
+            wdl = -probe_wdl(pos, &result);

        pos.undo_move(m.pv[0]);

@ -1612,3 +1623,5 @@ bool Tablebases::root_probe_wdl(Position& pos, Search::RootMoves& rootMoves) {

    return true;
 }
+
+} // namespace Stockfish
--- a/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.h
+++ b/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.h
@ -23,7 +23,7 @@

 #include "../search.h"

-namespace Tablebases {
+namespace Stockfish::Tablebases {

 enum WDLScore {
    WDLLoss        = -2, // Loss
@ -73,6 +73,6 @@ inline std::ostream& operator<<(std::ostream& os, const ProbeState v) {
    return os;
 }

-}
+} // namespace Stockfish::Tablebases

 #endif
--- a/DroidFishApp/src/main/cpp/stockfish/thread.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/thread.cpp
@ -26,6 +26,8 @@
 #include "syzygy/tbprobe.h"
 #include "tt.h"

+namespace Stockfish {
+
 ThreadPool Threads; // Global object


@ -126,14 +128,16 @@ void Thread::idle_loop() {

 void ThreadPool::set(size_t requested) {

-  if (size() > 0) { // destroy any existing thread(s)
+  if (size() > 0)   // destroy any existing thread(s)
+  {
      main()->wait_for_search_finished();

      while (size() > 0)
          delete back(), pop_back();
  }

-  if (requested > 0) { // create new thread(s)
+  if (requested > 0)   // create new thread(s)
+  {
      push_back(new MainThread(0));

      while (size() < requested)
@ -258,3 +262,5 @@ void ThreadPool::wait_for_search_finished() const {
        if (th != front())
            th->wait_for_search_finished();
 }
+
+} // namespace Stockfish
--- a/DroidFishApp/src/main/cpp/stockfish/thread.h
+++ b/DroidFishApp/src/main/cpp/stockfish/thread.h
@ -32,6 +32,7 @@
 #include "search.h"
 #include "thread_win32_osx.h"

+namespace Stockfish {

 /// Thread class keeps together all the thread-related stuff. We use
 /// per-thread pawn and material hash tables so that once we get a
@ -54,6 +55,7 @@ public:
  void idle_loop();
  void start_searching();
  void wait_for_search_finished();
+  size_t id() const { return idx; }

  Pawns::Table pawnsTable;
  Material::Table materialTable;
@ -72,8 +74,7 @@ public:
  LowPlyHistory lowPlyHistory;
  CapturePieceToHistory captureHistory;
  ContinuationHistory continuationHistory[2][2];
-  Score contempt;
-  int failedHighCnt;
+  Score trend;
 };


@ -128,4 +129,6 @@ private:

 extern ThreadPool Threads;

+} // namespace Stockfish
+
 #endif // #ifndef THREAD_H_INCLUDED
--- a/DroidFishApp/src/main/cpp/stockfish/thread_win32_osx.h
+++ b/DroidFishApp/src/main/cpp/stockfish/thread_win32_osx.h
@ -31,6 +31,8 @@

 #include <pthread.h>

+namespace Stockfish {
+
 static const size_t TH_STACK_SIZE = 8 * 1024 * 1024;

 template <class T, class P = std::pair<T*, void(T::*)()>>
@ -57,10 +59,16 @@ public:
  void join() { pthread_join(thread, NULL); }
 };

+} // namespace Stockfish
+
 #else // Default case: use STL classes

+namespace Stockfish {
+
 typedef std::thread NativeThread;

+} // namespace Stockfish
+
 #endif

 #endif // #ifndef THREAD_WIN32_OSX_H_INCLUDED
--- a/DroidFishApp/src/main/cpp/stockfish/timeman.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/timeman.cpp
@ -24,6 +24,8 @@
 #include "timeman.h"
 #include "uci.h"

+namespace Stockfish {
+
 TimeManagement Time; // Our global time management object


@ -95,3 +97,5 @@ void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) {
  if (Options["Ponder"])
      optimumTime += optimumTime / 4;
 }
+
+} // namespace Stockfish
--- a/DroidFishApp/src/main/cpp/stockfish/timeman.h
+++ b/DroidFishApp/src/main/cpp/stockfish/timeman.h
@ -23,6 +23,8 @@
 #include "search.h"
 #include "thread.h"

+namespace Stockfish {
+
 /// The TimeManagement class computes the optimal time to think depending on
 /// the maximum available time, the game move number and other parameters.

@ -44,4 +46,6 @@ private:

 extern TimeManagement Time;

+} // namespace Stockfish
+
 #endif // #ifndef TIMEMAN_H_INCLUDED
--- a/DroidFishApp/src/main/cpp/stockfish/tt.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/tt.cpp
@ -26,6 +26,8 @@
 #include "tt.h"
 #include "uci.h"

+namespace Stockfish {
+
 TranspositionTable TT; // Our global transposition table

 /// TTEntry::save() populates the TTEntry with a new node's data, possibly
@ -156,3 +158,5 @@ int TranspositionTable::hashfull() const {

  return cnt / ClusterSize;
 }
+
+} // namespace Stockfish
--- a/DroidFishApp/src/main/cpp/stockfish/tt.h
+++ b/DroidFishApp/src/main/cpp/stockfish/tt.h
@ -22,6 +22,8 @@
 #include "misc.h"
 #include "types.h"

+namespace Stockfish {
+
 /// TTEntry struct is the 10 bytes transposition table entry, defined as below:
 ///
 /// key        16 bit
@ -100,4 +102,6 @@ private:

 extern TranspositionTable TT;

+} // namespace Stockfish
+
 #endif // #ifndef TT_H_INCLUDED
--- a/DroidFishApp/src/main/cpp/stockfish/tune.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/tune.cpp
@ -26,9 +26,10 @@

 using std::string;

+namespace Stockfish {
+
 bool Tune::update_on_last;
 const UCI::Option* LastOption = nullptr;
-BoolConditions Conditions;
 static std::map<std::string, int> TuneResults;

 string Tune::next(string& names, bool pop) {
@ -108,23 +109,7 @@ template<> void Tune::Entry<Score>::read_option() {
 template<> void Tune::Entry<Tune::PostUpdate>::init_option() {}
 template<> void Tune::Entry<Tune::PostUpdate>::read_option() { value(); }

-
-// Set binary conditions according to a probability that depends
-// on the corresponding parameter value.
-
-void BoolConditions::set() {
-
-  static PRNG rng(now());
-  static bool startup = true; // To workaround fishtest bench
-
-  for (size_t i = 0; i < binary.size(); i++)
-      binary[i] = !startup && (values[i] + int(rng.rand<unsigned>() % variance) > threshold);
-
-  startup = false;
-
-  for (size_t i = 0; i < binary.size(); i++)
-      sync_cout << binary[i] << sync_endl;
-}
+} // namespace Stockfish


 // Init options with tuning session results instead of default values. Useful to
@ -138,7 +123,11 @@ void BoolConditions::set() {

 #include <cmath>

+namespace Stockfish {
+
 void Tune::read_results() {

  /* ...insert your values here... */
 }
+
+} // namespace Stockfish
--- a/DroidFishApp/src/main/cpp/stockfish/tune.h
+++ b/DroidFishApp/src/main/cpp/stockfish/tune.h
@ -24,6 +24,8 @@
 #include <type_traits>
 #include <vector>

+namespace Stockfish {
+
 typedef std::pair<int, int> Range; // Option's min-max values
 typedef Range (RangeFun) (int);

@ -44,27 +46,6 @@ struct SetRange {
 #define SetDefaultRange SetRange(default_range)


-/// BoolConditions struct is used to tune boolean conditions in the
-/// code by toggling them on/off according to a probability that
-/// depends on the value of a tuned integer parameter: for high
-/// values of the parameter condition is always disabled, for low
-/// values is always enabled, otherwise it is enabled with a given
-/// probability that depnends on the parameter under tuning.
-
-struct BoolConditions {
-  void init(size_t size) { values.resize(size, defaultValue), binary.resize(size, 0); }
-  void set();
-
-  std::vector<int> binary, values;
-  int defaultValue = 465, variance = 40, threshold = 500;
-  SetRange range = SetRange(0, 1000);
-};
-
-extern BoolConditions Conditions;
-
-inline void set_conditions() { Conditions.set(); }
-
-
 /// Tune class implements the 'magic' code that makes the setup of a fishtest
 /// tuning session as easy as it can be. Mainly you have just to remove const
 /// qualifiers from the variables you want to tune and flag them for tuning, so
@ -157,14 +138,6 @@ class Tune {
    return add(value, (next(names), std::move(names)), args...);
  }

-  // Template specialization for BoolConditions
-  template<typename... Args>
-  int add(const SetRange& range, std::string&& names, BoolConditions& cond, Args&&... args) {
-    for (size_t size = cond.values.size(), i = 0; i < size; i++)
-        add(cond.range, next(names, i == size - 1) + "_" + std::to_string(i), cond.values[i]);
-    return add(range, std::move(names), args...);
-  }
-
  std::vector<std::unique_ptr<EntryBase>> list;

 public:
@ -185,9 +158,6 @@ public:

 #define UPDATE_ON_LAST() bool UNIQUE(p, __LINE__) = Tune::update_on_last = true

-// Some macro to tune toggling of boolean conditions
-#define CONDITION(x) (Conditions.binary[__COUNTER__] || (x))
-#define TUNE_CONDITIONS() int UNIQUE(c, __LINE__) = (Conditions.init(__COUNTER__), 0); \
-                          TUNE(Conditions, set_conditions)
+} // namespace Stockfish

 #endif // #ifndef TUNE_H_INCLUDED
--- a/DroidFishApp/src/main/cpp/stockfish/types.h
+++ b/DroidFishApp/src/main/cpp/stockfish/types.h
@ -83,6 +83,8 @@
 #  define pext(b, m) 0
 #endif

+namespace Stockfish {
+
 #ifdef USE_POPCNT
 constexpr bool HasPopCnt = true;
 #else
@ -189,7 +191,6 @@ enum Value : int {
  BishopValueMg = 825,   BishopValueEg = 915,
  RookValueMg   = 1276,  RookValueEg   = 1380,
  QueenValueMg  = 2538,  QueenValueEg  = 2682,
-  Tempo = 28,

  MidgameLimit  = 15258, EndgameLimit  = 3915
 };
@ -482,6 +483,8 @@ constexpr Key make_key(uint64_t seed) {
  return seed * 6364136223846793005ULL + 1442695040888963407ULL;
 }

+} // namespace Stockfish
+
 #endif // #ifndef TYPES_H_INCLUDED

 #include "tune.h" // Global visibility to tuning setup
--- a/DroidFishApp/src/main/cpp/stockfish/uci.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/uci.cpp
@ -34,6 +34,8 @@

 using namespace std;

+namespace Stockfish {
+
 extern vector<string> setup_bench(const Position&, istream&);

 namespace {
@ -205,13 +207,13 @@ namespace {
     // Coefficients of a 3rd order polynomial fit based on fishtest data
     // for two parameters needed to transform eval to the argument of a
     // logistic function.
-     double as[] = {-8.24404295, 64.23892342, -95.73056462, 153.86478679};
-     double bs[] = {-3.37154371, 28.44489198, -56.67657741,  72.05858751};
+     double as[] = {-3.68389304,  30.07065921, -60.52878723, 149.53378557};
+     double bs[] = {-2.0181857,   15.85685038, -29.83452023,  47.59078827};
     double a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3];
     double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3];

     // Transform eval to centipawns with limited range
-     double x = std::clamp(double(100 * v) / PawnValueEg, -1000.0, 1000.0);
+     double x = std::clamp(double(100 * v) / PawnValueEg, -2000.0, 2000.0);

     // Return win rate in per mille (rounded to nearest)
     return int(0.5 + 1000 / (1 + std::exp((a - x) / b)));
@ -275,7 +277,15 @@ void UCI::loop(int argc, char* argv[]) {
      else if (token == "d")        sync_cout << pos << sync_endl;
      else if (token == "eval")     trace_eval(pos);
      else if (token == "compiler") sync_cout << compiler_info() << sync_endl;
-      else
+      else if (token == "export_net")
+      {
+          std::optional<std::string> filename;
+          std::string f;
+          if (is >> skipws >> f)
+              filename = f;
+          Eval::NNUE::save_eval(filename);
+      }
+      else if (!token.empty() && token[0] != '#')
          sync_cout << "Unknown command: " << cmd << sync_endl;

  } while (token != "quit" && argc == 1); // Command line args are one-shot
@ -369,3 +379,5 @@ Move UCI::to_move(const Position& pos, string& str) {

  return MOVE_NONE;
 }
+
+} // namespace Stockfish
--- a/DroidFishApp/src/main/cpp/stockfish/uci.h
+++ b/DroidFishApp/src/main/cpp/stockfish/uci.h
@ -24,6 +24,8 @@

 #include "types.h"

+namespace Stockfish {
+
 class Position;

 namespace UCI {
@ -78,4 +80,6 @@ Move to_move(const Position& pos, std::string& str);

 extern UCI::OptionsMap Options;

+} // namespace Stockfish
+
 #endif // #ifndef UCI_H_INCLUDED
--- a/DroidFishApp/src/main/cpp/stockfish/ucioption.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/ucioption.cpp
@ -31,6 +31,8 @@

 using std::string;

+namespace Stockfish {
+
 UCI::OptionsMap Options; // Global object

 namespace UCI {
@ -59,8 +61,6 @@ void init(OptionsMap& o) {
  constexpr int MaxHashMB = Is64Bit ? 33554432 : 2048;

  o["Debug Log File"]        << Option("", on_logger);
-  o["Contempt"]              << Option(24, -100, 100);
-  o["Analysis Contempt"]     << Option("Both var Off var White var Black var Both", "Both");
  o["Threads"]               << Option(1, 1, 512, on_threads);
  o["Hash"]                  << Option(16, 1, MaxHashMB, on_hash_size);
  o["Clear Hash"]            << Option(on_clear_hash);
@ -190,3 +190,5 @@ Option& Option::operator=(const string& v) {
 }

 } // namespace UCI
+
+} // namespace Stockfish
--- a/DroidFishApp/src/main/java/org/petero/droidfish/engine/InternalStockFish.java
+++ b/DroidFishApp/src/main/java/org/petero/droidfish/engine/InternalStockFish.java
@ -36,7 +36,7 @@ import org.petero.droidfish.EngineOptions;

 /** Stockfish engine running as process, started from assets resource. */
 public class InternalStockFish extends ExternalEngine {
-    private static final String defaultNet = "nn-62ef826d1a6d.nnue";
+    private static final String defaultNet = "nn-3475407dc199.nnue";
    private static final String netOption = "evalfile";
    private File defaultNetFile; // To get the full path of the copied default network file