diff --git a/DroidFishApp/src/main/assets/nn-3475407dc199.nnue b/DroidFishApp/src/main/assets/nn-6877cd24400e.nnue
similarity index 78%
rename from DroidFishApp/src/main/assets/nn-3475407dc199.nnue
rename to DroidFishApp/src/main/assets/nn-6877cd24400e.nnue
index d1fdcf2..0bf5b62 100644
Binary files a/DroidFishApp/src/main/assets/nn-3475407dc199.nnue and b/DroidFishApp/src/main/assets/nn-6877cd24400e.nnue differ
diff --git a/DroidFishApp/src/main/cpp/stockfish/Android.mk b/DroidFishApp/src/main/cpp/stockfish/Android.mk
index 35969fa..abe90e4 100644
--- a/DroidFishApp/src/main/cpp/stockfish/Android.mk
+++ b/DroidFishApp/src/main/cpp/stockfish/Android.mk
@@ -5,7 +5,7 @@ SF_SRC_FILES := \
 	bitbase.cpp endgame.cpp material.cpp movepick.cpp position.cpp timeman.cpp \
 	tune.cpp ucioption.cpp \
 	bitboard.cpp evaluate.cpp misc.cpp search.cpp tt.cpp syzygy/tbprobe.cpp \
-	nnue/evaluate_nnue.cpp nnue/features/half_ka_v2.cpp
+	nnue/evaluate_nnue.cpp nnue/features/half_ka_v2_hm.cpp
 
 MY_ARCH_DEF :=
 ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
diff --git a/DroidFishApp/src/main/cpp/stockfish/benchmark.cpp b/DroidFishApp/src/main/cpp/stockfish/benchmark.cpp
index 7945a45..e1c025a 100644
--- a/DroidFishApp/src/main/cpp/stockfish/benchmark.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/benchmark.cpp
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -87,6 +87,7 @@ const vector<string> Defaults = {
   // Chess 960
   "setoption name UCI_Chess960 value true",
   "bbqnnrkr/pppppppp/8/8/8/8/PPPPPPPP/BBQNNRKR w HFhf - 0 1 moves g2g3 d7d5 d2d4 c8h3 c1g5 e8d6 g5e7 f7f6",
+  "nqbnrkrb/pppppppp/8/8/8/8/PPPPPPPP/NQBNRKRB w KQkq - 0 1",
   "setoption name UCI_Chess960 value false"
 };
 
diff --git a/DroidFishApp/src/main/cpp/stockfish/bitbase.cpp b/DroidFishApp/src/main/cpp/stockfish/bitbase.cpp
index 27bf409..84300ba 100644
--- a/DroidFishApp/src/main/cpp/stockfish/bitbase.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/bitbase.cpp
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/DroidFishApp/src/main/cpp/stockfish/bitboard.cpp b/DroidFishApp/src/main/cpp/stockfish/bitboard.cpp
index 6b84b51..fd0ba23 100644
--- a/DroidFishApp/src/main/cpp/stockfish/bitboard.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/bitboard.cpp
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/DroidFishApp/src/main/cpp/stockfish/bitboard.h b/DroidFishApp/src/main/cpp/stockfish/bitboard.h
index b29f3e2..2b6e2a6 100644
--- a/DroidFishApp/src/main/cpp/stockfish/bitboard.h
+++ b/DroidFishApp/src/main/cpp/stockfish/bitboard.h
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/DroidFishApp/src/main/cpp/stockfish/endgame.cpp b/DroidFishApp/src/main/cpp/stockfish/endgame.cpp
index a44d3a1..e773e7a 100644
--- a/DroidFishApp/src/main/cpp/stockfish/endgame.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/endgame.cpp
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/DroidFishApp/src/main/cpp/stockfish/endgame.h b/DroidFishApp/src/main/cpp/stockfish/endgame.h
index 146111b..e79f696 100644
--- a/DroidFishApp/src/main/cpp/stockfish/endgame.h
+++ b/DroidFishApp/src/main/cpp/stockfish/endgame.h
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/DroidFishApp/src/main/cpp/stockfish/evaluate.cpp b/DroidFishApp/src/main/cpp/stockfish/evaluate.cpp
index c83f0de..8bb42ce 100644
--- a/DroidFishApp/src/main/cpp/stockfish/evaluate.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/evaluate.cpp
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -61,7 +61,7 @@ namespace Stockfish {
 namespace Eval {
 
   bool useNNUE;
-  string eval_file_loaded = "None";
+  string currentEvalFileName = "None";
 
   /// NNUE::init() tries to load a NNUE network at startup time, or when the engine
   /// receives a UCI command "setoption name EvalFile value nn-[a-z0-9]{12}.nnue"
@@ -78,6 +78,8 @@ namespace Eval {
         return;
 
     string eval_file = string(Options["EvalFile"]);
+    if (eval_file.empty())
+        eval_file = EvalFileDefaultName;
 
     #if defined(DEFAULT_NNUE_DIRECTORY)
     #define stringify2(x) #x
@@ -88,13 +90,13 @@ namespace Eval {
     #endif
 
     for (string directory : dirs)
-        if (eval_file_loaded != eval_file)
+        if (currentEvalFileName != eval_file)
         {
             if (directory != "<internal>")
             {
                 ifstream stream(directory + eval_file, ios::binary);
                 if (load_eval(eval_file, stream))
-                    eval_file_loaded = eval_file;
+                    currentEvalFileName = eval_file;
             }
 
             if (directory == "<internal>" && eval_file == EvalFileDefaultName)
@@ -106,30 +108,29 @@ namespace Eval {
 
                 MemoryBuffer buffer(const_cast<char*>(reinterpret_cast<const char*>(gEmbeddedNNUEData)),
                                     size_t(gEmbeddedNNUESize));
+                (void) gEmbeddedNNUEEnd; // Silence warning on unused variable
 
                 istream stream(&buffer);
                 if (load_eval(eval_file, stream))
-                    eval_file_loaded = eval_file;
+                    currentEvalFileName = eval_file;
             }
         }
-    if (eval_file_loaded != eval_file)
-        eval_file_loaded = "";
   }
 
   /// NNUE::verify() verifies that the last net used was loaded successfully
   void NNUE::verify() {
 
     string eval_file = string(Options["EvalFile"]);
+    if (eval_file.empty())
+        eval_file = EvalFileDefaultName;
 
-    if (useNNUE && eval_file_loaded != eval_file)
+    if (useNNUE && currentEvalFileName != eval_file)
     {
-        UCI::OptionsMap defaults;
-        UCI::init(defaults);
 
         string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available.";
         string msg2 = "The option is set to true, but the network file " + eval_file + " was not loaded successfully.";
         string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file.";
-        string msg4 = "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/" + string(defaults["EvalFile"]);
+        string msg4 = "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/" + std::string(EvalFileDefaultName);
         string msg5 = "The engine will be terminated now.";
 
         sync_cout << "info string ERROR: " << msg1 << sync_endl;
@@ -192,17 +193,17 @@ using namespace Trace;
 namespace {
 
   // Threshold for lazy and space evaluation
-  constexpr Value LazyThreshold1    =  Value(1565);
-  constexpr Value LazyThreshold2    =  Value(1102);
+  constexpr Value LazyThreshold1    =  Value(3631);
+  constexpr Value LazyThreshold2    =  Value(2084);
   constexpr Value SpaceThreshold    =  Value(11551);
 
   // KingAttackWeights[PieceType] contains king attack weights by piece type
-  constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 81, 52, 44, 10 };
+  constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 76, 46, 45, 14 };
 
   // SafeCheck[PieceType][single/multiple] contains safe check bonus by piece type,
   // higher if multiple safe checks are possible for that piece type.
   constexpr int SafeCheck[][2] = {
-      {}, {}, {803, 1292}, {639, 974}, {1087, 1878}, {759, 1132}
+      {}, {}, {805, 1292}, {650, 984}, {1071, 1886}, {730, 1128}
   };
 
 #define S(mg, eg) make_score(mg, eg)
@@ -228,58 +229,58 @@ namespace {
   // BishopPawns[distance from edge] contains a file-dependent penalty for pawns on
   // squares of the same color as our bishop.
   constexpr Score BishopPawns[int(FILE_NB) / 2] = {
-    S(3, 8), S(3, 9), S(2, 8), S(3, 8)
+    S(3, 8), S(3, 9), S(2, 7), S(3, 7)
   };
 
   // KingProtector[knight/bishop] contains penalty for each distance unit to own king
-  constexpr Score KingProtector[] = { S(8, 9), S(6, 9) };
+  constexpr Score KingProtector[] = { S(9, 9), S(7, 9) };
 
   // Outpost[knight/bishop] contains bonuses for each knight or bishop occupying a
   // pawn protected square on rank 4 to 6 which is also safe from a pawn attack.
-  constexpr Score Outpost[] = { S(57, 38), S(31, 24) };
+  constexpr Score Outpost[] = { S(54, 34), S(31, 25) };
 
   // PassedRank[Rank] contains a bonus according to the rank of a passed pawn
   constexpr Score PassedRank[RANK_NB] = {
-    S(0, 0), S(7, 27), S(16, 32), S(17, 40), S(64, 71), S(170, 174), S(278, 262)
+    S(0, 0), S(2, 38), S(15, 36), S(22, 50), S(64, 81), S(166, 184), S(284, 269)
   };
 
   constexpr Score RookOnClosedFile = S(10, 5);
-  constexpr Score RookOnOpenFile[] = { S(19, 6), S(47, 26) };
+  constexpr Score RookOnOpenFile[] = { S(18, 8), S(49, 26) };
 
   // ThreatByMinor/ByRook[attacked PieceType] contains bonuses according to
   // which piece type attacks which one. Attacks on lesser pieces which are
   // pawn-defended are not considered.
   constexpr Score ThreatByMinor[PIECE_TYPE_NB] = {
-    S(0, 0), S(5, 32), S(55, 41), S(77, 56), S(89, 119), S(79, 162)
+    S(0, 0), S(6, 37), S(64, 50), S(82, 57), S(103, 130), S(81, 163)
   };
 
   constexpr Score ThreatByRook[PIECE_TYPE_NB] = {
-    S(0, 0), S(3, 44), S(37, 68), S(42, 60), S(0, 39), S(58, 43)
+    S(0, 0), S(3, 44), S(36, 71), S(44, 59), S(0, 39), S(60, 39)
   };
 
   constexpr Value CorneredBishop = Value(50);
 
   // Assorted bonuses and penalties
-  constexpr Score UncontestedOutpost  = S(  1, 10);
+  constexpr Score UncontestedOutpost  = S(  0, 10);
   constexpr Score BishopOnKingRing    = S( 24,  0);
   constexpr Score BishopXRayPawns     = S(  4,  5);
   constexpr Score FlankAttacks        = S(  8,  0);
-  constexpr Score Hanging             = S( 69, 36);
+  constexpr Score Hanging             = S( 72, 40);
   constexpr Score KnightOnQueen       = S( 16, 11);
   constexpr Score LongDiagonalBishop  = S( 45,  0);
   constexpr Score MinorBehindPawn     = S( 18,  3);
-  constexpr Score PassedFile          = S( 11,  8);
-  constexpr Score PawnlessFlank       = S( 17, 95);
-  constexpr Score ReachableOutpost    = S( 31, 22);
-  constexpr Score RestrictedPiece     = S(  7,  7);
+  constexpr Score PassedFile          = S( 13,  8);
+  constexpr Score PawnlessFlank       = S( 19, 97);
+  constexpr Score ReachableOutpost    = S( 33, 19);
+  constexpr Score RestrictedPiece     = S(  6,  7);
   constexpr Score RookOnKingRing      = S( 16,  0);
-  constexpr Score SliderOnQueen       = S( 60, 18);
-  constexpr Score ThreatByKing        = S( 24, 89);
+  constexpr Score SliderOnQueen       = S( 62, 21);
+  constexpr Score ThreatByKing        = S( 24, 87);
   constexpr Score ThreatByPawnPush    = S( 48, 39);
-  constexpr Score ThreatBySafePawn    = S(173, 94);
+  constexpr Score ThreatBySafePawn    = S(167, 99);
   constexpr Score TrappedRook         = S( 55, 13);
   constexpr Score WeakQueenProtection = S( 14,  0);
-  constexpr Score WeakQueen           = S( 56, 15);
+  constexpr Score WeakQueen           = S( 57, 19);
 
 
 #undef S
@@ -988,7 +989,9 @@ namespace {
 
     // Early exit if score is high
     auto lazy_skip = [&](Value lazyThreshold) {
-        return abs(mg_value(score) + eg_value(score)) / 2 > lazyThreshold + pos.non_pawn_material() / 64;
+        return abs(mg_value(score) + eg_value(score)) >   lazyThreshold
+                                                        + std::abs(pos.this_thread()->bestValue) * 5 / 4
+                                                        + pos.non_pawn_material() / 32;
     };
 
     if (lazy_skip(LazyThreshold1))
@@ -1053,26 +1056,22 @@ make_v:
 
     if (   pos.piece_on(SQ_A1) == W_BISHOP
         && pos.piece_on(SQ_B2) == W_PAWN)
-        correction += !pos.empty(SQ_B3) ? -CorneredBishop * 4
-                                        : -CorneredBishop * 3;
+        correction -= CorneredBishop;
 
     if (   pos.piece_on(SQ_H1) == W_BISHOP
         && pos.piece_on(SQ_G2) == W_PAWN)
-        correction += !pos.empty(SQ_G3) ? -CorneredBishop * 4
-                                        : -CorneredBishop * 3;
+        correction -= CorneredBishop;
 
     if (   pos.piece_on(SQ_A8) == B_BISHOP
         && pos.piece_on(SQ_B7) == B_PAWN)
-        correction += !pos.empty(SQ_B6) ? CorneredBishop * 4
-                                        : CorneredBishop * 3;
+        correction += CorneredBishop;
 
     if (   pos.piece_on(SQ_H8) == B_BISHOP
         && pos.piece_on(SQ_G7) == B_PAWN)
-        correction += !pos.empty(SQ_G6) ? CorneredBishop * 4
-                                        : CorneredBishop * 3;
+        correction += CorneredBishop;
 
-    return pos.side_to_move() == WHITE ?  Value(correction)
-                                       : -Value(correction);
+    return pos.side_to_move() == WHITE ?  Value(3 * correction)
+                                       : -Value(3 * correction);
   }
 
 } // namespace Eval
@@ -1084,38 +1083,37 @@ make_v:
 Value Eval::evaluate(const Position& pos) {
 
   Value v;
+  bool useClassical = false;
 
-  if (!Eval::useNNUE)
-      v = Evaluation<NO_TRACE>(pos).value();
-  else
+  // Deciding between classical and NNUE eval (~10 Elo): for high PSQ imbalance we use classical,
+  // but we switch to NNUE during long shuffling or with high material on the board.
+  if (  !useNNUE
+      || ((pos.this_thread()->depth > 9 || pos.count<ALL_PIECES>() > 7) &&
+          abs(eg_value(pos.psq_score())) * 5 > (856 + pos.non_pawn_material() / 64) * (10 + pos.rule50_count())))
   {
-      // Scale and shift NNUE for compatibility with search and classical evaluation
-      auto  adjusted_NNUE = [&]()
-      {
-         int scale =   903
-                     + 32 * pos.count<PAWN>()
-                     + 32 * pos.non_pawn_material() / 1024;
+      v = Evaluation<NO_TRACE>(pos).value();          // classical
+      useClassical = abs(v) >= 297;
+  }
 
-         Value nnue = NNUE::evaluate(pos, true) * scale / 1024;
+  // If result of a classical evaluation is much lower than threshold fall back to NNUE
+  if (useNNUE && !useClassical)
+  {
+       Value nnue     = NNUE::evaluate(pos, true);     // NNUE
+       int scale      = 1036 + 22 * pos.non_pawn_material() / 1024;
+       Color stm      = pos.side_to_move();
+       Value optimism = pos.this_thread()->optimism[stm];
+       Value psq      = (stm == WHITE ? 1 : -1) * eg_value(pos.psq_score());
+       int complexity = 35 * abs(nnue - psq) / 256;
 
-         if (pos.is_chess960())
-             nnue += fix_FRC(pos);
+       optimism = optimism * (44 + complexity) / 31;
+       v = (nnue + optimism) * scale / 1024 - optimism;
 
-         return nnue;
-      };
-
-      // If there is PSQ imbalance we use the classical eval, but we switch to
-      // NNUE eval faster when shuffling or if the material on the board is high.
-      int r50 = pos.rule50_count();
-      Value psq = Value(abs(eg_value(pos.psq_score())));
-      bool classical = psq * 5 > (750 + pos.non_pawn_material() / 64) * (5 + r50);
-
-      v = classical ? Evaluation<NO_TRACE>(pos).value()  // classical
-                    : adjusted_NNUE();                   // NNUE
+       if (pos.is_chess960())
+           v += fix_FRC(pos);
   }
 
   // Damp down the evaluation linearly when shuffling
-  v = v * (100 - pos.rule50_count()) / 100;
+  v = v * (195 - pos.rule50_count()) / 211;
 
   // Guarantee evaluation does not hit the tablebase range
   v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1);
@@ -1140,7 +1138,12 @@ std::string Eval::trace(Position& pos) {
 
   std::memset(scores, 0, sizeof(scores));
 
-  pos.this_thread()->trend = SCORE_ZERO; // Reset any dynamic contempt
+  // Reset any global variable used in eval
+  pos.this_thread()->depth           = 0;
+  pos.this_thread()->trend           = SCORE_ZERO;
+  pos.this_thread()->bestValue       = VALUE_ZERO;
+  pos.this_thread()->optimism[WHITE] = VALUE_ZERO;
+  pos.this_thread()->optimism[BLACK] = VALUE_ZERO;
 
   v = Evaluation<TRACE>(pos).value();
 
diff --git a/DroidFishApp/src/main/cpp/stockfish/evaluate.h b/DroidFishApp/src/main/cpp/stockfish/evaluate.h
index 91da01d..1934c9b 100644
--- a/DroidFishApp/src/main/cpp/stockfish/evaluate.h
+++ b/DroidFishApp/src/main/cpp/stockfish/evaluate.h
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -34,12 +34,12 @@ namespace Eval {
   Value evaluate(const Position& pos);
 
   extern bool useNNUE;
-  extern std::string eval_file_loaded;
+  extern std::string currentEvalFileName;
 
   // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue
   // for the build process (profile-build and fishtest) to work. Do not change the
   // name of the macro, as it is used in the Makefile.
-  #define EvalFileDefaultName   "nn-3475407dc199.nnue"
+  #define EvalFileDefaultName   "nn-6877cd24400e.nnue"
 
   namespace NNUE {
 
diff --git a/DroidFishApp/src/main/cpp/stockfish/main.cpp b/DroidFishApp/src/main/cpp/stockfish/main.cpp
index 62e0ed5..fad0ef8 100644
--- a/DroidFishApp/src/main/cpp/stockfish/main.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/main.cpp
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/DroidFishApp/src/main/cpp/stockfish/material.cpp b/DroidFishApp/src/main/cpp/stockfish/material.cpp
index 9d17af2..1567358 100644
--- a/DroidFishApp/src/main/cpp/stockfish/material.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/material.cpp
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/DroidFishApp/src/main/cpp/stockfish/material.h b/DroidFishApp/src/main/cpp/stockfish/material.h
index 26535a5..3ca169c 100644
--- a/DroidFishApp/src/main/cpp/stockfish/material.h
+++ b/DroidFishApp/src/main/cpp/stockfish/material.h
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/DroidFishApp/src/main/cpp/stockfish/misc.cpp b/DroidFishApp/src/main/cpp/stockfish/misc.cpp
index 78227ee..178465c 100644
--- a/DroidFishApp/src/main/cpp/stockfish/misc.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/misc.cpp
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -36,6 +36,8 @@ typedef bool(*fun1_t)(LOGICAL_PROCESSOR_RELATIONSHIP,
                       PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD);
 typedef bool(*fun2_t)(USHORT, PGROUP_AFFINITY);
 typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY);
+typedef bool(*fun4_t)(USHORT, PGROUP_AFFINITY, USHORT, PUSHORT);
+typedef WORD(*fun5_t)();
 }
 #endif
 
@@ -67,7 +69,7 @@ namespace {
 
 /// Version number. If Version is left empty, then compile date in the format
 /// DD-MM-YY and show in engine_info.
-const string Version = "14";
+const string Version = "15";
 
 /// Our fancy logging facility. The trick here is to replace cin.rdbuf() and
 /// cout.rdbuf() with two Tie objects that tie cin and cout to a file stream. We
@@ -110,7 +112,14 @@ public:
 
     static Logger l;
 
-    if (!fname.empty() && !l.file.is_open())
+    if (l.file.is_open())
+    {
+        cout.rdbuf(l.out.buf);
+        cin.rdbuf(l.in.buf);
+        l.file.close();
+    }
+
+    if (!fname.empty())
     {
         l.file.open(fname, ifstream::out);
 
@@ -123,12 +132,6 @@ public:
         cin.rdbuf(&l.in);
         cout.rdbuf(&l.out);
     }
-    else if (fname.empty() && l.file.is_open())
-    {
-        cout.rdbuf(l.out.buf);
-        cin.rdbuf(l.in.buf);
-        l.file.close();
-    }
   }
 };
 
@@ -378,6 +381,7 @@ void std_aligned_free(void* ptr) {
 static void* aligned_large_pages_alloc_windows(size_t allocSize) {
 
   #if !defined(_WIN64)
+    (void)allocSize; // suppress unused-parameter compiler warning
     return nullptr;
   #else
 
@@ -493,11 +497,11 @@ void bindThisThread(size_t) {}
 
 #else
 
-/// best_group() retrieves logical processor information using Windows specific
-/// API and returns the best group id for the thread with index idx. Original
+/// best_node() retrieves logical processor information using Windows specific
+/// API and returns the best node id for the thread with index idx. Original
 /// code from Texel by Peter Österlund.
 
-int best_group(size_t idx) {
+int best_node(size_t idx) {
 
   int threads = 0;
   int nodes = 0;
@@ -511,7 +515,8 @@ int best_group(size_t idx) {
   if (!fun1)
       return -1;
 
-  // First call to get returnLength. We expect it to fail due to null buffer
+  // First call to GetLogicalProcessorInformationEx() to get returnLength.
+  // We expect the call to fail due to null buffer.
   if (fun1(RelationAll, nullptr, &returnLength))
       return -1;
 
@@ -519,7 +524,7 @@ int best_group(size_t idx) {
   SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *buffer, *ptr;
   ptr = buffer = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)malloc(returnLength);
 
-  // Second call, now we expect to succeed
+  // Second call to GetLogicalProcessorInformationEx(), now we expect to succeed
   if (!fun1(RelationAll, buffer, &returnLength))
   {
       free(buffer);
@@ -569,22 +574,38 @@ int best_group(size_t idx) {
 void bindThisThread(size_t idx) {
 
   // Use only local variables to be thread-safe
-  int group = best_group(idx);
+  int node = best_node(idx);
 
-  if (group == -1)
+  if (node == -1)
       return;
 
   // Early exit if the needed API are not available at runtime
   HMODULE k32 = GetModuleHandle("Kernel32.dll");
   auto fun2 = (fun2_t)(void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMaskEx");
   auto fun3 = (fun3_t)(void(*)())GetProcAddress(k32, "SetThreadGroupAffinity");
+  auto fun4 = (fun4_t)(void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMask2");
+  auto fun5 = (fun5_t)(void(*)())GetProcAddress(k32, "GetMaximumProcessorGroupCount");
 
   if (!fun2 || !fun3)
       return;
 
-  GROUP_AFFINITY affinity;
-  if (fun2(group, &affinity))
-      fun3(GetCurrentThread(), &affinity, nullptr);
+  if (!fun4 || !fun5)
+  {
+      GROUP_AFFINITY affinity;
+      if (fun2(node, &affinity))                                                 // GetNumaNodeProcessorMaskEx
+          fun3(GetCurrentThread(), &affinity, nullptr);                          // SetThreadGroupAffinity
+  }
+  else
+  {
+      // If a numa node has more than one processor group, we assume they are
+      // sized equal and we spread threads evenly across the groups.
+      USHORT elements, returnedElements;
+      elements = fun5();                                                         // GetMaximumProcessorGroupCount
+      GROUP_AFFINITY *affinity = (GROUP_AFFINITY*)malloc(elements * sizeof(GROUP_AFFINITY));
+      if (fun4(node, affinity, elements, &returnedElements))                     // GetNumaNodeProcessorMask2
+          fun3(GetCurrentThread(), &affinity[idx % returnedElements], nullptr);  // SetThreadGroupAffinity
+      free(affinity);
+  }
 }
 
 #endif
diff --git a/DroidFishApp/src/main/cpp/stockfish/misc.h b/DroidFishApp/src/main/cpp/stockfish/misc.h
index dae37cd..2fd2b40 100644
--- a/DroidFishApp/src/main/cpp/stockfish/misc.h
+++ b/DroidFishApp/src/main/cpp/stockfish/misc.h
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -85,19 +85,30 @@ static inline const union { uint32_t i; char c[4]; } Le = { 0x01020304 };
 static inline const bool IsLittleEndian = (Le.c[0] == 4);
 
 
-template <typename T>
-class ValueListInserter {
-public:
-  ValueListInserter(T* v, std::size_t& s) :
-    values(v),
-    size(&s)
-  {
-  }
+// RunningAverage : a class to calculate a running average of a series of values.
+// For efficiency, all computations are done with integers.
+class RunningAverage {
+  public:
 
-  void push_back(const T& value) { values[(*size)++] = value; }
-private:
-  T* values;
-  std::size_t* size;
+      // Reset the running average to rational value p / q
+      void set(int64_t p, int64_t q)
+        { average = p * PERIOD * RESOLUTION / q; }
+
+      // Update average with value v
+      void update(int64_t v)
+        { average = RESOLUTION * v + (PERIOD - 1) * average / PERIOD; }
+
+      // Test if average is strictly greater than rational a / b
+      bool is_greater(int64_t a, int64_t b) const
+        { return b * average > a * (PERIOD * RESOLUTION); }
+
+      int64_t value() const
+        { return average / (PERIOD * RESOLUTION); }
+
+  private :
+      static constexpr int64_t PERIOD     = 4096;
+      static constexpr int64_t RESOLUTION = 1024;
+      int64_t average;
 };
 
 template <typename T, std::size_t MaxSize>
@@ -113,7 +124,6 @@ public:
   const T& operator[](std::size_t index) const { return values_[index]; }
   const T* begin() const { return values_; }
   const T* end() const { return values_ + size_; }
-  operator ValueListInserter<T>() { return ValueListInserter(values_, size_); }
 
   void swap(ValueList& other) {
     const std::size_t maxSize = std::max(size_, other.size_);
@@ -128,6 +138,34 @@ private:
   std::size_t size_ = 0;
 };
 
+
+/// sigmoid(t, x0, y0, C, P, Q) implements a sigmoid-like function using only integers,
+/// with the following properties:
+///
+///  -  sigmoid is centered in (x0, y0)
+///  -  sigmoid has amplitude [-P/Q , P/Q] instead of [-1 , +1]
+///  -  limit is (y0 - P/Q) when t tends to -infinity
+///  -  limit is (y0 + P/Q) when t tends to +infinity
+///  -  the slope can be adjusted using C > 0, smaller C giving a steeper sigmoid
+///  -  the slope of the sigmoid when t = x0 is P/(Q*C)
+///  -  sigmoid is increasing with t when P > 0 and Q > 0
+///  -  to get a decreasing sigmoid, change sign of P
+///  -  mean value of the sigmoid is y0
+///
+/// Use <https://www.desmos.com/calculator/jhh83sqq92> to draw the sigmoid
+
+inline int64_t sigmoid(int64_t t, int64_t x0,
+                                  int64_t y0,
+                                  int64_t  C,
+                                  int64_t  P,
+                                  int64_t  Q)
+{
+   assert(C > 0);
+   assert(Q != 0);
+   return y0 + P * (t-x0) / (Q * (std::abs(t-x0) + C)) ;
+}
+
+
 /// xorshift64star Pseudo-Random Number Generator
 /// This class is based on original code written and dedicated
 /// to the public domain by Sebastiano Vigna (2014).
diff --git a/DroidFishApp/src/main/cpp/stockfish/movegen.cpp b/DroidFishApp/src/main/cpp/stockfish/movegen.cpp
index 5f3ba90..c7a3c29 100644
--- a/DroidFishApp/src/main/cpp/stockfish/movegen.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/movegen.cpp
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -52,9 +52,9 @@ namespace {
     constexpr Direction UpRight  = (Us == WHITE ? NORTH_EAST : SOUTH_WEST);
     constexpr Direction UpLeft   = (Us == WHITE ? NORTH_WEST : SOUTH_EAST);
 
-    const Bitboard emptySquares = Type == QUIETS || Type == QUIET_CHECKS ? target : ~pos.pieces();
-    const Bitboard enemies      = Type == EVASIONS ? pos.checkers()
-                                : Type == CAPTURES ? target : pos.pieces(Them);
+    const Bitboard emptySquares = ~pos.pieces();
+    const Bitboard enemies      =  Type == EVASIONS ? pos.checkers()
+                                                    : pos.pieces(Them);
 
     Bitboard pawnsOn7    = pos.pieces(Us, PAWN) &  TRank7BB;
     Bitboard pawnsNotOn7 = pos.pieces(Us, PAWN) & ~TRank7BB;
diff --git a/DroidFishApp/src/main/cpp/stockfish/movegen.h b/DroidFishApp/src/main/cpp/stockfish/movegen.h
index 3f895f0..bbb35b3 100644
--- a/DroidFishApp/src/main/cpp/stockfish/movegen.h
+++ b/DroidFishApp/src/main/cpp/stockfish/movegen.h
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/DroidFishApp/src/main/cpp/stockfish/movepick.cpp b/DroidFishApp/src/main/cpp/stockfish/movepick.cpp
index 4ff4cff..b0166c6 100644
--- a/DroidFishApp/src/main/cpp/stockfish/movepick.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/movepick.cpp
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -18,6 +18,7 @@
 
 #include <cassert>
 
+#include "bitboard.h"
 #include "movepick.h"
 
 namespace Stockfish {
@@ -56,11 +57,14 @@ namespace {
 /// ordering is at the current node.
 
 /// MovePicker constructor for the main search
-MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHistory* mh, const LowPlyHistory* lp,
-                       const CapturePieceToHistory* cph, const PieceToHistory** ch, Move cm, const Move* killers, int pl)
-           : pos(p), mainHistory(mh), lowPlyHistory(lp), captureHistory(cph), continuationHistory(ch),
-             ttMove(ttm), refutations{{killers[0], 0}, {killers[1], 0}, {cm, 0}}, depth(d), ply(pl) {
-
+MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHistory* mh,
+                                                             const CapturePieceToHistory* cph,
+                                                             const PieceToHistory** ch,
+                                                             Move cm,
+                                                             const Move* killers)
+           : pos(p), mainHistory(mh), captureHistory(cph), continuationHistory(ch),
+             ttMove(ttm), refutations{{killers[0], 0}, {killers[1], 0}, {cm, 0}}, depth(d)
+{
   assert(d > 0);
 
   stage = (pos.checkers() ? EVASION_TT : MAIN_TT) +
@@ -69,9 +73,11 @@ MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHist
 
 /// MovePicker constructor for quiescence search
 MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHistory* mh,
-                       const CapturePieceToHistory* cph, const PieceToHistory** ch, Square rs)
-           : pos(p), mainHistory(mh), captureHistory(cph), continuationHistory(ch), ttMove(ttm), recaptureSquare(rs), depth(d) {
-
+                                                             const CapturePieceToHistory* cph,
+                                                             const PieceToHistory** ch,
+                                                             Square rs)
+           : pos(p), mainHistory(mh), captureHistory(cph), continuationHistory(ch), ttMove(ttm), recaptureSquare(rs), depth(d)
+{
   assert(d <= 0);
 
   stage = (pos.checkers() ? EVASION_TT : QSEARCH_TT) +
@@ -82,9 +88,9 @@ MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHist
 
 /// MovePicker constructor for ProbCut: we generate captures with SEE greater
 /// than or equal to the given threshold.
-MovePicker::MovePicker(const Position& p, Move ttm, Value th, const CapturePieceToHistory* cph)
-           : pos(p), captureHistory(cph), ttMove(ttm), threshold(th) {
-
+MovePicker::MovePicker(const Position& p, Move ttm, Value th, Depth d, const CapturePieceToHistory* cph)
+           : pos(p), captureHistory(cph), ttMove(ttm), threshold(th), depth(d)
+{
   assert(!pos.checkers());
 
   stage = PROBCUT_TT + !(ttm && pos.capture(ttm)
@@ -100,10 +106,35 @@ void MovePicker::score() {
 
   static_assert(Type == CAPTURES || Type == QUIETS || Type == EVASIONS, "Wrong type");
 
+  Bitboard threatened, threatenedByPawn, threatenedByMinor, threatenedByRook;
+  if constexpr (Type == QUIETS)
+  {
+      Color us = pos.side_to_move();
+      // squares threatened by pawns
+      threatenedByPawn  = pos.attacks_by<PAWN>(~us);
+      // squares threatened by minors or pawns
+      threatenedByMinor = pos.attacks_by<KNIGHT>(~us) | pos.attacks_by<BISHOP>(~us) | threatenedByPawn;
+      // squares threatened by rooks, minors or pawns
+      threatenedByRook  = pos.attacks_by<ROOK>(~us) | threatenedByMinor;
+
+      // pieces threatened by pieces of lesser material value
+      threatened =  (pos.pieces(us, QUEEN) & threatenedByRook)
+                  | (pos.pieces(us, ROOK)  & threatenedByMinor)
+                  | (pos.pieces(us, KNIGHT, BISHOP) & threatenedByPawn);
+  }
+  else
+  {
+      // Silence unused variable warnings
+      (void) threatened;
+      (void) threatenedByPawn;
+      (void) threatenedByMinor;
+      (void) threatenedByRook;
+  }
+
   for (auto& m : *this)
       if constexpr (Type == CAPTURES)
-          m.value =  int(PieceValue[MG][pos.piece_on(to_sq(m))]) * 6
-                   + (*captureHistory)[pos.moved_piece(m)][to_sq(m)][type_of(pos.piece_on(to_sq(m)))];
+          m.value =  6 * int(PieceValue[MG][pos.piece_on(to_sq(m))])
+                   +     (*captureHistory)[pos.moved_piece(m)][to_sq(m)][type_of(pos.piece_on(to_sq(m)))];
 
       else if constexpr (Type == QUIETS)
           m.value =      (*mainHistory)[pos.side_to_move()][from_to(m)]
@@ -111,7 +142,12 @@ void MovePicker::score() {
                    +     (*continuationHistory[1])[pos.moved_piece(m)][to_sq(m)]
                    +     (*continuationHistory[3])[pos.moved_piece(m)][to_sq(m)]
                    +     (*continuationHistory[5])[pos.moved_piece(m)][to_sq(m)]
-                   + (ply < MAX_LPH ? std::min(4, depth / 3) * (*lowPlyHistory)[ply][from_to(m)] : 0);
+                   +     (threatened & from_sq(m) ?
+                           (type_of(pos.moved_piece(m)) == QUEEN && !(to_sq(m) & threatenedByRook)  ? 50000
+                          : type_of(pos.moved_piece(m)) == ROOK  && !(to_sq(m) & threatenedByMinor) ? 25000
+                          :                                         !(to_sq(m) & threatenedByPawn)  ? 15000
+                          :                                                                           0)
+                          :                                                                           0);
 
       else // Type == EVASIONS
       {
@@ -165,11 +201,12 @@ top:
       endMoves = generate<CAPTURES>(pos, cur);
 
       score<CAPTURES>();
+      partial_insertion_sort(cur, endMoves, -3000 * depth);
       ++stage;
       goto top;
 
   case GOOD_CAPTURE:
-      if (select<Best>([&](){
+      if (select<Next>([&](){
                        return pos.see_ge(*cur, Value(-69 * cur->value / 1024)) ?
                               // Move losing capture to endBadCaptures to be tried later
                               true : (*endBadCaptures++ = *cur, false); }))
@@ -237,10 +274,10 @@ top:
       return select<Best>([](){ return true; });
 
   case PROBCUT:
-      return select<Best>([&](){ return pos.see_ge(*cur, threshold); });
+      return select<Next>([&](){ return pos.see_ge(*cur, threshold); });
 
   case QCAPTURE:
-      if (select<Best>([&](){ return   depth > DEPTH_QS_RECAPTURES
+      if (select<Next>([&](){ return   depth > DEPTH_QS_RECAPTURES
                                     || to_sq(*cur) == recaptureSquare; }))
           return *(cur - 1);
 
diff --git a/DroidFishApp/src/main/cpp/stockfish/movepick.h b/DroidFishApp/src/main/cpp/stockfish/movepick.h
index c76d495..9a3c279 100644
--- a/DroidFishApp/src/main/cpp/stockfish/movepick.h
+++ b/DroidFishApp/src/main/cpp/stockfish/movepick.h
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -86,13 +86,7 @@ enum StatsType { NoCaptures, Captures };
 /// unsuccessful during the current search, and is used for reduction and move
 /// ordering decisions. It uses 2 tables (one for each color) indexed by
 /// the move's from and to squares, see www.chessprogramming.org/Butterfly_Boards
-typedef Stats<int16_t, 13365, COLOR_NB, int(SQUARE_NB) * int(SQUARE_NB)> ButterflyHistory;
-
-/// At higher depths LowPlyHistory records successful quiet moves near the root
-/// and quiet moves which are/were in the PV (ttPv). It is cleared with each new
-/// search and filled during iterative deepening.
-constexpr int MAX_LPH = 4;
-typedef Stats<int16_t, 10692, MAX_LPH, int(SQUARE_NB) * int(SQUARE_NB)> LowPlyHistory;
+typedef Stats<int16_t, 14365, COLOR_NB, int(SQUARE_NB) * int(SQUARE_NB)> ButterflyHistory;
 
 /// CounterMoveHistory stores counter moves indexed by [piece][to] of the previous
 /// move, see www.chessprogramming.org/Countermove_Heuristic
@@ -123,18 +117,16 @@ class MovePicker {
 public:
   MovePicker(const MovePicker&) = delete;
   MovePicker& operator=(const MovePicker&) = delete;
-  MovePicker(const Position&, Move, Value, const CapturePieceToHistory*);
+  MovePicker(const Position&, Move, Depth, const ButterflyHistory*,
+                                           const CapturePieceToHistory*,
+                                           const PieceToHistory**,
+                                           Move,
+                                           const Move*);
   MovePicker(const Position&, Move, Depth, const ButterflyHistory*,
                                            const CapturePieceToHistory*,
                                            const PieceToHistory**,
                                            Square);
-  MovePicker(const Position&, Move, Depth, const ButterflyHistory*,
-                                           const LowPlyHistory*,
-                                           const CapturePieceToHistory*,
-                                           const PieceToHistory**,
-                                           Move,
-                                           const Move*,
-                                           int);
+  MovePicker(const Position&, Move, Value, Depth, const CapturePieceToHistory*);
   Move next_move(bool skipQuiets = false);
 
 private:
@@ -145,7 +137,6 @@ private:
 
   const Position& pos;
   const ButterflyHistory* mainHistory;
-  const LowPlyHistory* lowPlyHistory;
   const CapturePieceToHistory* captureHistory;
   const PieceToHistory** continuationHistory;
   Move ttMove;
@@ -154,7 +145,6 @@ private:
   Square recaptureSquare;
   Value threshold;
   Depth depth;
-  int ply;
   ExtMove moves[MAX_MOVES];
 };
 
diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.cpp b/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.cpp
index 8828ae5..9ee599f 100644
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.cpp
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -109,7 +109,7 @@ namespace Stockfish::Eval::NNUE {
   {
     write_little_endian<std::uint32_t>(stream, Version);
     write_little_endian<std::uint32_t>(stream, hashValue);
-    write_little_endian<std::uint32_t>(stream, desc.size());
+    write_little_endian<std::uint32_t>(stream, (std::uint32_t)desc.size());
     stream.write(&desc[0], desc.size());
     return !stream.fail();
   }
@@ -143,39 +143,29 @@ namespace Stockfish::Eval::NNUE {
     // overaligning stack variables with alignas() doesn't work correctly.
 
     constexpr uint64_t alignment = CacheLineSize;
+    int delta = 10 - pos.non_pawn_material() / 1515;
 
 #if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
     TransformedFeatureType transformedFeaturesUnaligned[
       FeatureTransformer::BufferSize + alignment / sizeof(TransformedFeatureType)];
-    char bufferUnaligned[Network::BufferSize + alignment];
 
     auto* transformedFeatures = align_ptr_up<alignment>(&transformedFeaturesUnaligned[0]);
-    auto* buffer = align_ptr_up<alignment>(&bufferUnaligned[0]);
 #else
     alignas(alignment)
       TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize];
-    alignas(alignment) char buffer[Network::BufferSize];
 #endif
 
     ASSERT_ALIGNED(transformedFeatures, alignment);
-    ASSERT_ALIGNED(buffer, alignment);
 
-    const std::size_t bucket = (pos.count<ALL_PIECES>() - 1) / 4;
+    const int bucket = (pos.count<ALL_PIECES>() - 1) / 4;
     const auto psqt = featureTransformer->transform(pos, transformedFeatures, bucket);
-    const auto output = network[bucket]->propagate(transformedFeatures, buffer);
+    const auto positional = network[bucket]->propagate(transformedFeatures);
 
-    int materialist = psqt;
-    int positional  = output[0];
-
-    int delta_npm = abs(pos.non_pawn_material(WHITE) - pos.non_pawn_material(BLACK));
-    int entertainment = (adjusted && delta_npm <= BishopValueMg - KnightValueMg ? 7 : 0);
-
-    int A = 128 - entertainment;
-    int B = 128 + entertainment;
-
-    int sum = (A * materialist + B * positional) / 128;
-
-    return static_cast<Value>( sum / OutputScale );
+    // Give more value to positional evaluation when adjusted flag is set
+    if (adjusted)
+        return static_cast<Value>(((128 - delta) * psqt + (128 + delta) * positional) / 128 / OutputScale);
+    else
+        return static_cast<Value>((psqt + positional) / OutputScale);
   }
 
   struct NnueEvalTrace {
@@ -196,27 +186,20 @@ namespace Stockfish::Eval::NNUE {
 #if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
     TransformedFeatureType transformedFeaturesUnaligned[
       FeatureTransformer::BufferSize + alignment / sizeof(TransformedFeatureType)];
-    char bufferUnaligned[Network::BufferSize + alignment];
 
     auto* transformedFeatures = align_ptr_up<alignment>(&transformedFeaturesUnaligned[0]);
-    auto* buffer = align_ptr_up<alignment>(&bufferUnaligned[0]);
 #else
     alignas(alignment)
       TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize];
-    alignas(alignment) char buffer[Network::BufferSize];
 #endif
 
     ASSERT_ALIGNED(transformedFeatures, alignment);
-    ASSERT_ALIGNED(buffer, alignment);
 
     NnueEvalTrace t{};
     t.correctBucket = (pos.count<ALL_PIECES>() - 1) / 4;
-    for (std::size_t bucket = 0; bucket < LayerStacks; ++bucket) {
-      const auto psqt = featureTransformer->transform(pos, transformedFeatures, bucket);
-      const auto output = network[bucket]->propagate(transformedFeatures, buffer);
-
-      int materialist = psqt;
-      int positional  = output[0];
+    for (IndexType bucket = 0; bucket < LayerStacks; ++bucket) {
+      const auto materialist = featureTransformer->transform(pos, transformedFeatures, bucket);
+      const auto positional = network[bucket]->propagate(transformedFeatures);
 
       t.psqt[bucket] = static_cast<Value>( materialist / OutputScale );
       t.positional[bucket] = static_cast<Value>( positional / OutputScale );
@@ -227,69 +210,46 @@ namespace Stockfish::Eval::NNUE {
 
   static const std::string PieceToChar(" PNBRQK  pnbrqk");
 
-  // Requires the buffer to have capacity for at least 5 values
+
+  // format_cp_compact() converts a Value into (centi)pawns and writes it in a buffer.
+  // The buffer must have capacity for at least 5 chars.
   static void format_cp_compact(Value v, char* buffer) {
 
     buffer[0] = (v < 0 ? '-' : v > 0 ? '+' : ' ');
 
     int cp = std::abs(100 * v / PawnValueEg);
-
     if (cp >= 10000)
     {
-      buffer[1] = '0' + cp / 10000; cp %= 10000;
-      buffer[2] = '0' + cp / 1000; cp %= 1000;
-      buffer[3] = '0' + cp / 100; cp %= 100;
-      buffer[4] = ' ';
+        buffer[1] = '0' + cp / 10000; cp %= 10000;
+        buffer[2] = '0' + cp / 1000; cp %= 1000;
+        buffer[3] = '0' + cp / 100;
+        buffer[4] = ' ';
     }
     else if (cp >= 1000)
     {
-      buffer[1] = '0' + cp / 1000; cp %= 1000;
-      buffer[2] = '0' + cp / 100; cp %= 100;
-      buffer[3] = '.';
-      buffer[4] = '0' + cp / 10;
+        buffer[1] = '0' + cp / 1000; cp %= 1000;
+        buffer[2] = '0' + cp / 100; cp %= 100;
+        buffer[3] = '.';
+        buffer[4] = '0' + cp / 10;
     }
     else
     {
-      buffer[1] = '0' + cp / 100; cp %= 100;
-      buffer[2] = '.';
-      buffer[3] = '0' + cp / 10; cp %= 10;
-      buffer[4] = '0' + cp / 1;
+        buffer[1] = '0' + cp / 100; cp %= 100;
+        buffer[2] = '.';
+        buffer[3] = '0' + cp / 10; cp %= 10;
+        buffer[4] = '0' + cp / 1;
     }
   }
 
-  // Requires the buffer to have capacity for at least 7 values
+
+  // format_cp_aligned_dot() converts a Value into (centi)pawns and writes it in a buffer,
+  // always keeping two decimals. The buffer must have capacity for at least 7 chars.
   static void format_cp_aligned_dot(Value v, char* buffer) {
+
     buffer[0] = (v < 0 ? '-' : v > 0 ? '+' : ' ');
 
-    int cp = std::abs(100 * v / PawnValueEg);
-
-    if (cp >= 10000)
-    {
-      buffer[1] = '0' + cp / 10000; cp %= 10000;
-      buffer[2] = '0' + cp / 1000; cp %= 1000;
-      buffer[3] = '0' + cp / 100; cp %= 100;
-      buffer[4] = '.';
-      buffer[5] = '0' + cp / 10; cp %= 10;
-      buffer[6] = '0' + cp;
-    }
-    else if (cp >= 1000)
-    {
-      buffer[1] = ' ';
-      buffer[2] = '0' + cp / 1000; cp %= 1000;
-      buffer[3] = '0' + cp / 100; cp %= 100;
-      buffer[4] = '.';
-      buffer[5] = '0' + cp / 10; cp %= 10;
-      buffer[6] = '0' + cp;
-    }
-    else
-    {
-      buffer[1] = ' ';
-      buffer[2] = ' ';
-      buffer[3] = '0' + cp / 100; cp %= 100;
-      buffer[4] = '.';
-      buffer[5] = '0' + cp / 10; cp %= 10;
-      buffer[6] = '0' + cp / 1;
-    }
+    double cp = 1.0 * std::abs(int(v)) / PawnValueEg;
+    sprintf(&buffer[1], "%6.2f", cp);
   }
 
 
@@ -419,7 +379,7 @@ namespace Stockfish::Eval::NNUE {
         actualFilename = filename.value();
     else
     {
-        if (eval_file_loaded != EvalFileDefaultName)
+        if (currentEvalFileName != EvalFileDefaultName)
         {
              msg = "Failed to export a net. A non-embedded net can only be saved if the filename is specified";
 
diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.h b/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.h
index c7fa4a9..2e4f1f5 100644
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.h
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.h
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2.cpp b/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2_hm.cpp
similarity index 58%
rename from DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2.cpp
rename to DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2_hm.cpp
index 57f43e5..07a1d7a 100644
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2_hm.cpp
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -16,31 +16,32 @@
   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
-//Definition of input features HalfKAv2 of NNUE evaluation function
+//Definition of input features HalfKAv2_hm of NNUE evaluation function
 
-#include "half_ka_v2.h"
+#include "half_ka_v2_hm.h"
 
 #include "../../position.h"
 
 namespace Stockfish::Eval::NNUE::Features {
 
   // Orient a square according to perspective (rotates by 180 for black)
-  inline Square HalfKAv2::orient(Color perspective, Square s) {
-    return Square(int(s) ^ (bool(perspective) * 56));
+  inline Square HalfKAv2_hm::orient(Color perspective, Square s, Square ksq) {
+    return Square(int(s) ^ (bool(perspective) * SQ_A8) ^ ((file_of(ksq) < FILE_E) * SQ_H1));
   }
 
   // Index of a feature for a given king position and another piece on some square
-  inline IndexType HalfKAv2::make_index(Color perspective, Square s, Piece pc, Square ksq) {
-    return IndexType(orient(perspective, s) + PieceSquareIndex[perspective][pc] + PS_NB * ksq);
+  inline IndexType HalfKAv2_hm::make_index(Color perspective, Square s, Piece pc, Square ksq) {
+    Square o_ksq = orient(perspective, ksq, ksq);
+    return IndexType(orient(perspective, s, ksq) + PieceSquareIndex[perspective][pc] + PS_NB * KingBuckets[o_ksq]);
   }
 
   // Get a list of indices for active features
-  void HalfKAv2::append_active_indices(
+  void HalfKAv2_hm::append_active_indices(
     const Position& pos,
     Color perspective,
-    ValueListInserter<IndexType> active
+    IndexList& active
   ) {
-    Square ksq = orient(perspective, pos.square<KING>(perspective));
+    Square ksq = pos.square<KING>(perspective);
     Bitboard bb = pos.pieces();
     while (bb)
     {
@@ -52,33 +53,30 @@ namespace Stockfish::Eval::NNUE::Features {
 
   // append_changed_indices() : get a list of indices for recently changed features
 
-  void HalfKAv2::append_changed_indices(
+  void HalfKAv2_hm::append_changed_indices(
     Square ksq,
-    StateInfo* st,
+    const DirtyPiece& dp,
     Color perspective,
-    ValueListInserter<IndexType> removed,
-    ValueListInserter<IndexType> added
+    IndexList& removed,
+    IndexList& added
   ) {
-    const auto& dp = st->dirtyPiece;
-    Square oriented_ksq = orient(perspective, ksq);
     for (int i = 0; i < dp.dirty_num; ++i) {
-      Piece pc = dp.piece[i];
       if (dp.from[i] != SQ_NONE)
-        removed.push_back(make_index(perspective, dp.from[i], pc, oriented_ksq));
+        removed.push_back(make_index(perspective, dp.from[i], dp.piece[i], ksq));
       if (dp.to[i] != SQ_NONE)
-        added.push_back(make_index(perspective, dp.to[i], pc, oriented_ksq));
+        added.push_back(make_index(perspective, dp.to[i], dp.piece[i], ksq));
     }
   }
 
-  int HalfKAv2::update_cost(StateInfo* st) {
+  int HalfKAv2_hm::update_cost(const StateInfo* st) {
     return st->dirtyPiece.dirty_num;
   }
 
-  int HalfKAv2::refresh_cost(const Position& pos) {
+  int HalfKAv2_hm::refresh_cost(const Position& pos) {
     return pos.count<ALL_PIECES>();
   }
 
-  bool HalfKAv2::requires_refresh(StateInfo* st, Color perspective) {
+  bool HalfKAv2_hm::requires_refresh(const StateInfo* st, Color perspective) {
     return st->dirtyPiece.piece[0] == make_piece(perspective, KING);
   }
 
diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2.h b/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2_hm.h
similarity index 71%
rename from DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2.h
rename to DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2_hm.h
index e4b2edd..1e6da0b 100644
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2.h
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2_hm.h
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -18,8 +18,8 @@
 
 //Definition of input features HalfKP of NNUE evaluation function
 
-#ifndef NNUE_FEATURES_HALF_KA_V2_H_INCLUDED
-#define NNUE_FEATURES_HALF_KA_V2_H_INCLUDED
+#ifndef NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED
+#define NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED
 
 #include "../nnue_common.h"
 
@@ -32,9 +32,9 @@ namespace Stockfish {
 
 namespace Stockfish::Eval::NNUE::Features {
 
-  // Feature HalfKAv2: Combination of the position of own king
-  // and the position of pieces
-  class HalfKAv2 {
+  // Feature HalfKAv2_hm: Combination of the position of own king
+  // and the position of pieces. Position mirrored such that king always on e..h files.
+  class HalfKAv2_hm {
 
     // unique number for each piece type on each square
     enum {
@@ -50,7 +50,7 @@ namespace Stockfish::Eval::NNUE::Features {
       PS_W_QUEEN  =  8 * SQUARE_NB,
       PS_B_QUEEN  =  9 * SQUARE_NB,
       PS_KING     =  10 * SQUARE_NB,
-      PS_NB = 11 * SQUARE_NB
+      PS_NB       =  11 * SQUARE_NB
     };
 
     static constexpr IndexType PieceSquareIndex[COLOR_NB][PIECE_NB] = {
@@ -63,49 +63,62 @@ namespace Stockfish::Eval::NNUE::Features {
     };
 
     // Orient a square according to perspective (rotates by 180 for black)
-    static Square orient(Color perspective, Square s);
+    static Square orient(Color perspective, Square s, Square ksq);
 
     // Index of a feature for a given king position and another piece on some square
     static IndexType make_index(Color perspective, Square s, Piece pc, Square ksq);
 
    public:
     // Feature name
-    static constexpr const char* Name = "HalfKAv2(Friend)";
+    static constexpr const char* Name = "HalfKAv2_hm(Friend)";
 
     // Hash value embedded in the evaluation file
-    static constexpr std::uint32_t HashValue = 0x5f234cb8u;
+    static constexpr std::uint32_t HashValue = 0x7f234cb8u;
 
     // Number of feature dimensions
     static constexpr IndexType Dimensions =
-        static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_NB);
+        static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_NB) / 2;
+
+    static constexpr int KingBuckets[64] = {
+      -1, -1, -1, -1, 31, 30, 29, 28,
+      -1, -1, -1, -1, 27, 26, 25, 24,
+      -1, -1, -1, -1, 23, 22, 21, 20,
+      -1, -1, -1, -1, 19, 18, 17, 16,
+      -1, -1, -1, -1, 15, 14, 13, 12,
+      -1, -1, -1, -1, 11, 10,  9,  8,
+      -1, -1, -1, -1,  7,  6,  5,  4,
+      -1, -1, -1, -1,  3,  2,  1,  0
+    };
 
     // Maximum number of simultaneously active features.
     static constexpr IndexType MaxActiveDimensions = 32;
+    using IndexList = ValueList<IndexType, MaxActiveDimensions>;
 
     // Get a list of indices for active features
     static void append_active_indices(
       const Position& pos,
       Color perspective,
-      ValueListInserter<IndexType> active);
+      IndexList& active);
 
     // Get a list of indices for recently changed features
     static void append_changed_indices(
       Square ksq,
-      StateInfo* st,
+      const DirtyPiece& dp,
       Color perspective,
-      ValueListInserter<IndexType> removed,
-      ValueListInserter<IndexType> added);
+      IndexList& removed,
+      IndexList& added
+    );
 
     // Returns the cost of updating one perspective, the most costly one.
     // Assumes no refresh needed.
-    static int update_cost(StateInfo* st);
+    static int update_cost(const StateInfo* st);
     static int refresh_cost(const Position& pos);
 
     // Returns whether the change stored in this StateInfo means that
     // a full accumulator refresh is required.
-    static bool requires_refresh(StateInfo* st, Color perspective);
+    static bool requires_refresh(const StateInfo* st, Color perspective);
   };
 
 }  // namespace Stockfish::Eval::NNUE::Features
 
-#endif // #ifndef NNUE_FEATURES_HALF_KA_V2_H_INCLUDED
+#endif // #ifndef NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED
diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/affine_transform.h b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/affine_transform.h
index 9a3b778..9a99260 100644
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/affine_transform.h
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/affine_transform.h
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -22,398 +22,338 @@
 #define NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
 
 #include <iostream>
+#include <algorithm>
+#include <type_traits>
 #include "../nnue_common.h"
+#include "../../simd.h"
+
+/*
+  This file contains the definition for a fully connected layer (aka affine transform).
+  Two approaches are employed, depending on the sizes of the transform.
+
+  Approach 1:
+    - used when the PaddedInputDimensions >= 128
+    - uses AVX512 if possible
+    - processes inputs in batches of 2*InputSimdWidth
+      - so in batches of 128 for AVX512
+    - the weight blocks of size InputSimdWidth are transposed such that
+      access is sequential
+    - N columns of the weight matrix are processed a time, where N
+      depends on the architecture (the amount of registers)
+    - accumulate + hadd is used
+
+  Approach 2:
+    - used when the PaddedInputDimensions < 128
+    - does not use AVX512
+    - expected use-case is for when PaddedInputDimensions == 32 and InputDimensions <= 32.
+      - that's why AVX512 is hard to implement
+    - expected use-case is small layers
+      - not optimized as well as the approach 1
+    - inputs are processed in chunks of 4, weights are respectively transposed
+    - accumulation happens directly to int32s
+*/
 
 namespace Stockfish::Eval::NNUE::Layers {
 
-  // Affine transformation layer
-  template <typename PreviousLayer, IndexType OutDims>
-  class AffineTransform {
-   public:
-    // Input/output type
-    using InputType = typename PreviousLayer::OutputType;
-    using OutputType = std::int32_t;
-    static_assert(std::is_same<InputType, std::uint8_t>::value, "");
+// Fallback implementation for older/other architectures.
+// Identical for both approaches. Requires the input to be padded to at least 16 values.
+#if !defined(USE_SSSE3)
+  template <IndexType InputDimensions, IndexType PaddedInputDimensions, IndexType OutputDimensions>
+  static void affine_transform_non_ssse3(std::int32_t* output, const std::int8_t* weights, const std::int32_t* biases, const std::uint8_t* input)
+  {
+# if defined(USE_SSE2)
+    // At least a multiple of 16, with SSE2.
+    constexpr IndexType NumChunks = ceil_to_multiple<IndexType>(InputDimensions, 16) / 16;
+    const __m128i Zeros = _mm_setzero_si128();
+    const auto inputVector = reinterpret_cast<const __m128i*>(input);
 
-    // Number of input/output dimensions
-    static constexpr IndexType InputDimensions =
-        PreviousLayer::OutputDimensions;
-    static constexpr IndexType OutputDimensions = OutDims;
-    static constexpr IndexType PaddedInputDimensions =
-        ceil_to_multiple<IndexType>(InputDimensions, MaxSimdWidth);
-#if defined (USE_AVX512)
-    static constexpr const IndexType OutputSimdWidth = SimdWidth / 2;
-#elif defined (USE_SSSE3)
-    static constexpr const IndexType OutputSimdWidth = SimdWidth / 4;
+# elif defined(USE_MMX)
+    constexpr IndexType NumChunks = ceil_to_multiple<IndexType>(InputDimensions, 8) / 8;
+    const __m64 Zeros = _mm_setzero_si64();
+    const auto inputVector = reinterpret_cast<const __m64*>(input);
+
+# elif defined(USE_NEON)
+    constexpr IndexType NumChunks = ceil_to_multiple<IndexType>(InputDimensions, 16) / 16;
+    const auto inputVector = reinterpret_cast<const int8x8_t*>(input);
+# endif
+
+    for (IndexType i = 0; i < OutputDimensions; ++i) {
+      const IndexType offset = i * PaddedInputDimensions;
+
+# if defined(USE_SSE2)
+      __m128i sumLo = _mm_cvtsi32_si128(biases[i]);
+      __m128i sumHi = Zeros;
+      const auto row = reinterpret_cast<const __m128i*>(&weights[offset]);
+      for (IndexType j = 0; j < NumChunks; ++j) {
+        __m128i row_j = _mm_load_si128(&row[j]);
+        __m128i input_j = _mm_load_si128(&inputVector[j]);
+        __m128i extendedRowLo = _mm_srai_epi16(_mm_unpacklo_epi8(row_j, row_j), 8);
+        __m128i extendedRowHi = _mm_srai_epi16(_mm_unpackhi_epi8(row_j, row_j), 8);
+        __m128i extendedInputLo = _mm_unpacklo_epi8(input_j, Zeros);
+        __m128i extendedInputHi = _mm_unpackhi_epi8(input_j, Zeros);
+        __m128i productLo = _mm_madd_epi16(extendedRowLo, extendedInputLo);
+        __m128i productHi = _mm_madd_epi16(extendedRowHi, extendedInputHi);
+        sumLo = _mm_add_epi32(sumLo, productLo);
+        sumHi = _mm_add_epi32(sumHi, productHi);
+      }
+      __m128i sum = _mm_add_epi32(sumLo, sumHi);
+      __m128i sumHigh_64 = _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2));
+      sum = _mm_add_epi32(sum, sumHigh_64);
+      __m128i sum_second_32 = _mm_shufflelo_epi16(sum, _MM_SHUFFLE(1, 0, 3, 2));
+      sum = _mm_add_epi32(sum, sum_second_32);
+      output[i] = _mm_cvtsi128_si32(sum);
+
+# elif defined(USE_MMX)
+      __m64 sumLo = _mm_cvtsi32_si64(biases[i]);
+      __m64 sumHi = Zeros;
+      const auto row = reinterpret_cast<const __m64*>(&weights[offset]);
+      for (IndexType j = 0; j < NumChunks; ++j) {
+        __m64 row_j = row[j];
+        __m64 input_j = inputVector[j];
+        __m64 extendedRowLo = _mm_srai_pi16(_mm_unpacklo_pi8(row_j, row_j), 8);
+        __m64 extendedRowHi = _mm_srai_pi16(_mm_unpackhi_pi8(row_j, row_j), 8);
+        __m64 extendedInputLo = _mm_unpacklo_pi8(input_j, Zeros);
+        __m64 extendedInputHi = _mm_unpackhi_pi8(input_j, Zeros);
+        __m64 productLo = _mm_madd_pi16(extendedRowLo, extendedInputLo);
+        __m64 productHi = _mm_madd_pi16(extendedRowHi, extendedInputHi);
+        sumLo = _mm_add_pi32(sumLo, productLo);
+        sumHi = _mm_add_pi32(sumHi, productHi);
+      }
+      __m64 sum = _mm_add_pi32(sumLo, sumHi);
+      sum = _mm_add_pi32(sum, _mm_unpackhi_pi32(sum, sum));
+      output[i] = _mm_cvtsi64_si32(sum);
+
+# elif defined(USE_NEON)
+      int32x4_t sum = {biases[i]};
+      const auto row = reinterpret_cast<const int8x8_t*>(&weights[offset]);
+      for (IndexType j = 0; j < NumChunks; ++j) {
+        int16x8_t product = vmull_s8(inputVector[j * 2], row[j * 2]);
+        product = vmlal_s8(product, inputVector[j * 2 + 1], row[j * 2 + 1]);
+        sum = vpadalq_s16(sum, product);
+      }
+      output[i] = sum[0] + sum[1] + sum[2] + sum[3];
+
+# else
+      std::int32_t sum = biases[i];
+      for (IndexType j = 0; j < InputDimensions; ++j) {
+        sum += weights[offset + j] * input[j];
+      }
+      output[i] = sum;
+# endif
+    }
+
+# if defined(USE_MMX)
+    _mm_empty();
+# endif
+  }
 #endif
 
-    // Size of forward propagation buffer used in this layer
-    static constexpr std::size_t SelfBufferSize =
-        ceil_to_multiple(OutputDimensions * sizeof(OutputType), CacheLineSize);
+  template <IndexType InDims, IndexType OutDims, typename Enabled = void>
+  class AffineTransform;
 
-    // Size of the forward propagation buffer used from the input layer to this layer
-    static constexpr std::size_t BufferSize =
-        PreviousLayer::BufferSize + SelfBufferSize;
+  // A specialization for large inputs.
+  template <IndexType InDims, IndexType OutDims>
+  class AffineTransform<InDims, OutDims, std::enable_if_t<(ceil_to_multiple<IndexType>(InDims, MaxSimdWidth) >= 2*64)>> {
+   public:
+    // Input/output type
+    using InputType = std::uint8_t;
+    using OutputType = std::int32_t;
+
+    // Number of input/output dimensions
+    static constexpr IndexType InputDimensions = InDims;
+    static constexpr IndexType OutputDimensions = OutDims;
+
+    static constexpr IndexType PaddedInputDimensions =
+      ceil_to_multiple<IndexType>(InputDimensions, MaxSimdWidth);
+    static constexpr IndexType PaddedOutputDimensions =
+      ceil_to_multiple<IndexType>(OutputDimensions, MaxSimdWidth);
+
+    using OutputBuffer = OutputType[PaddedOutputDimensions];
+
+    static_assert(PaddedInputDimensions >= 128, "Something went wrong. This specialization should not have been chosen.");
+
+#if defined (USE_AVX512)
+    static constexpr const IndexType InputSimdWidth = 64;
+    static constexpr const IndexType MaxNumOutputRegs = 16;
+#elif defined (USE_AVX2)
+    static constexpr const IndexType InputSimdWidth = 32;
+    static constexpr const IndexType MaxNumOutputRegs = 8;
+#elif defined (USE_SSSE3)
+    static constexpr const IndexType InputSimdWidth = 16;
+    static constexpr const IndexType MaxNumOutputRegs = 8;
+#elif defined (USE_NEON)
+    static constexpr const IndexType InputSimdWidth = 8;
+    static constexpr const IndexType MaxNumOutputRegs = 8;
+#else
+    // The fallback implementation will not have permuted weights.
+    // We define these to avoid a lot of ifdefs later.
+    static constexpr const IndexType InputSimdWidth = 1;
+    static constexpr const IndexType MaxNumOutputRegs = 1;
+#endif
+
+    // A big block is a region in the weight matrix of the size [PaddedInputDimensions, NumOutputRegs].
+    // A small block is a region of size [InputSimdWidth, 1]
+
+    static constexpr const IndexType NumOutputRegs = std::min(MaxNumOutputRegs, OutputDimensions);
+    static constexpr const IndexType SmallBlockSize = InputSimdWidth;
+    static constexpr const IndexType BigBlockSize = NumOutputRegs * PaddedInputDimensions;
+    static constexpr const IndexType NumSmallBlocksInBigBlock = BigBlockSize / SmallBlockSize;
+    static constexpr const IndexType NumSmallBlocksPerOutput = PaddedInputDimensions / SmallBlockSize;
+    static constexpr const IndexType NumBigBlocks = OutputDimensions / NumOutputRegs;
+
+    static_assert(OutputDimensions % NumOutputRegs == 0);
 
     // Hash value embedded in the evaluation file
-    static constexpr std::uint32_t get_hash_value() {
+    static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) {
       std::uint32_t hashValue = 0xCC03DAE4u;
       hashValue += OutputDimensions;
-      hashValue ^= PreviousLayer::get_hash_value() >> 1;
-      hashValue ^= PreviousLayer::get_hash_value() << 31;
+      hashValue ^= prevHash >> 1;
+      hashValue ^= prevHash << 31;
       return hashValue;
     }
 
+    /*
+      Transposes the small blocks within a block.
+      Effectively means that weights can be traversed sequentially during inference.
+    */
+    static IndexType get_weight_index(IndexType i)
+    {
+      const IndexType smallBlock = (i / SmallBlockSize) % NumSmallBlocksInBigBlock;
+      const IndexType smallBlockCol = smallBlock / NumSmallBlocksPerOutput;
+      const IndexType smallBlockRow = smallBlock % NumSmallBlocksPerOutput;
+      const IndexType bigBlock   = i / BigBlockSize;
+      const IndexType rest       = i % SmallBlockSize;
+
+      const IndexType idx =
+          bigBlock * BigBlockSize
+        + smallBlockRow * SmallBlockSize * NumOutputRegs
+        + smallBlockCol * SmallBlockSize
+        + rest;
+
+      return idx;
+    }
+
     // Read network parameters
     bool read_parameters(std::istream& stream) {
-      if (!previousLayer.read_parameters(stream)) return false;
-      for (std::size_t i = 0; i < OutputDimensions; ++i)
+      for (IndexType i = 0; i < OutputDimensions; ++i)
         biases[i] = read_little_endian<BiasType>(stream);
-      for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
-#if !defined (USE_SSSE3)
-        weights[i] = read_little_endian<WeightType>(stream);
-#else
-        weights[
-          (i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 +
-          i / PaddedInputDimensions * 4 +
-          i % 4
-        ] = read_little_endian<WeightType>(stream);
-#endif
+
+      for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
+        weights[get_weight_index(i)] = read_little_endian<WeightType>(stream);
 
       return !stream.fail();
     }
 
     // Write network parameters
     bool write_parameters(std::ostream& stream) const {
-      if (!previousLayer.write_parameters(stream)) return false;
-      for (std::size_t i = 0; i < OutputDimensions; ++i)
+      for (IndexType i = 0; i < OutputDimensions; ++i)
           write_little_endian<BiasType>(stream, biases[i]);
-#if !defined (USE_SSSE3)
-      for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
-          write_little_endian<WeightType>(stream, weights[i]);
-#else
-      std::unique_ptr<WeightType[]> unscrambledWeights = std::make_unique<WeightType[]>(OutputDimensions * PaddedInputDimensions);
-      for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) {
-          unscrambledWeights[i] =
-              weights[
-                (i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 +
-                i / PaddedInputDimensions * 4 +
-                i % 4
-              ];
-      }
 
-      for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
-          write_little_endian<WeightType>(stream, unscrambledWeights[i]);
-#endif
+      for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
+        write_little_endian<WeightType>(stream, weights[get_weight_index(i)]);
 
       return !stream.fail();
     }
 
     // Forward propagation
     const OutputType* propagate(
-        const TransformedFeatureType* transformedFeatures, char* buffer) const {
-      const auto input = previousLayer.propagate(
-          transformedFeatures, buffer + SelfBufferSize);
+        const InputType* input, OutputType* output) const {
 
 #if defined (USE_AVX512)
-
-      [[maybe_unused]] const __m512i Ones512 = _mm512_set1_epi16(1);
-
-      [[maybe_unused]] auto m512_hadd = [](__m512i sum, int bias) -> int {
-        return _mm512_reduce_add_epi32(sum) + bias;
-      };
-
-      [[maybe_unused]] auto m512_add_dpbusd_epi32 = [=](__m512i& acc, __m512i a, __m512i b) {
-#if defined (USE_VNNI)
-        acc = _mm512_dpbusd_epi32(acc, a, b);
-#else
-        __m512i product0 = _mm512_maddubs_epi16(a, b);
-        product0 = _mm512_madd_epi16(product0, Ones512);
-        acc = _mm512_add_epi32(acc, product0);
-#endif
-      };
-
-      [[maybe_unused]] auto m512_add_dpbusd_epi32x4 = [=](__m512i& acc, __m512i a0, __m512i b0, __m512i a1, __m512i b1,
-                                                                        __m512i a2, __m512i b2, __m512i a3, __m512i b3) {
-#if defined (USE_VNNI)
-        acc = _mm512_dpbusd_epi32(acc, a0, b0);
-        acc = _mm512_dpbusd_epi32(acc, a1, b1);
-        acc = _mm512_dpbusd_epi32(acc, a2, b2);
-        acc = _mm512_dpbusd_epi32(acc, a3, b3);
-#else
-        __m512i product0 = _mm512_maddubs_epi16(a0, b0);
-        __m512i product1 = _mm512_maddubs_epi16(a1, b1);
-        __m512i product2 = _mm512_maddubs_epi16(a2, b2);
-        __m512i product3 = _mm512_maddubs_epi16(a3, b3);
-        product0 = _mm512_adds_epi16(product0, product1);
-        product0 = _mm512_madd_epi16(product0, Ones512);
-        product2 = _mm512_adds_epi16(product2, product3);
-        product2 = _mm512_madd_epi16(product2, Ones512);
-        acc = _mm512_add_epi32(acc, _mm512_add_epi32(product0, product2));
-#endif
-      };
-
-#endif
-#if defined (USE_AVX2)
-
-      [[maybe_unused]] const __m256i Ones256 = _mm256_set1_epi16(1);
-
-      [[maybe_unused]] auto m256_hadd = [](__m256i sum, int bias) -> int {
-        __m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1));
-        sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC));
-        sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_CDAB));
-        return _mm_cvtsi128_si32(sum128) + bias;
-      };
-
-      [[maybe_unused]] auto m256_add_dpbusd_epi32 = [=](__m256i& acc, __m256i a, __m256i b) {
-#if defined (USE_VNNI)
-        acc = _mm256_dpbusd_epi32(acc, a, b);
-#else
-        __m256i product0 = _mm256_maddubs_epi16(a, b);
-        product0 = _mm256_madd_epi16(product0, Ones256);
-        acc = _mm256_add_epi32(acc, product0);
-#endif
-      };
-
-      [[maybe_unused]] auto m256_add_dpbusd_epi32x4 = [=](__m256i& acc, __m256i a0, __m256i b0, __m256i a1, __m256i b1,
-                                                                        __m256i a2, __m256i b2, __m256i a3, __m256i b3) {
-#if defined (USE_VNNI)
-        acc = _mm256_dpbusd_epi32(acc, a0, b0);
-        acc = _mm256_dpbusd_epi32(acc, a1, b1);
-        acc = _mm256_dpbusd_epi32(acc, a2, b2);
-        acc = _mm256_dpbusd_epi32(acc, a3, b3);
-#else
-        __m256i product0 = _mm256_maddubs_epi16(a0, b0);
-        __m256i product1 = _mm256_maddubs_epi16(a1, b1);
-        __m256i product2 = _mm256_maddubs_epi16(a2, b2);
-        __m256i product3 = _mm256_maddubs_epi16(a3, b3);
-        product0 = _mm256_adds_epi16(product0, product1);
-        product0 = _mm256_madd_epi16(product0, Ones256);
-        product2 = _mm256_adds_epi16(product2, product3);
-        product2 = _mm256_madd_epi16(product2, Ones256);
-        acc = _mm256_add_epi32(acc, _mm256_add_epi32(product0, product2));
-#endif
-      };
-
-#endif
-#if defined (USE_SSSE3)
-
-      [[maybe_unused]] const __m128i Ones128 = _mm_set1_epi16(1);
-
-      [[maybe_unused]] auto m128_hadd = [](__m128i sum, int bias) -> int {
-        sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E)); //_MM_PERM_BADC
-        sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); //_MM_PERM_CDAB
-        return _mm_cvtsi128_si32(sum) + bias;
-      };
-
-      [[maybe_unused]] auto m128_add_dpbusd_epi32 = [=](__m128i& acc, __m128i a, __m128i b) {
-        __m128i product0 = _mm_maddubs_epi16(a, b);
-        product0 = _mm_madd_epi16(product0, Ones128);
-        acc = _mm_add_epi32(acc, product0);
-      };
-
-      [[maybe_unused]] auto m128_add_dpbusd_epi32x4 = [=](__m128i& acc, __m128i a0, __m128i b0, __m128i a1, __m128i b1,
-                                                                        __m128i a2, __m128i b2, __m128i a3, __m128i b3) {
-        __m128i product0 = _mm_maddubs_epi16(a0, b0);
-        __m128i product1 = _mm_maddubs_epi16(a1, b1);
-        __m128i product2 = _mm_maddubs_epi16(a2, b2);
-        __m128i product3 = _mm_maddubs_epi16(a3, b3);
-        product0 = _mm_adds_epi16(product0, product1);
-        product0 = _mm_madd_epi16(product0, Ones128);
-        product2 = _mm_adds_epi16(product2, product3);
-        product2 = _mm_madd_epi16(product2, Ones128);
-        acc = _mm_add_epi32(acc, _mm_add_epi32(product0, product2));
-      };
-
-#endif
-
-#if defined (USE_AVX512)
-      using vec_t = __m512i;
-      #define vec_setzero _mm512_setzero_si512
-      #define vec_set_32 _mm512_set1_epi32
-      auto& vec_add_dpbusd_32 = m512_add_dpbusd_epi32;
-      auto& vec_add_dpbusd_32x4 = m512_add_dpbusd_epi32x4;
-      auto& vec_hadd = m512_hadd;
+      using acc_vec_t = __m512i;
+      using bias_vec_t = __m128i;
+      using weight_vec_t = __m512i;
+      using in_vec_t = __m512i;
+      #define vec_zero _mm512_setzero_si512()
+      #define vec_add_dpbusd_32x2 Simd::m512_add_dpbusd_epi32x2
+      #define vec_hadd Simd::m512_hadd
+      #define vec_haddx4 Simd::m512_haddx4
 #elif defined (USE_AVX2)
-      using vec_t = __m256i;
-      #define vec_setzero _mm256_setzero_si256
-      #define vec_set_32 _mm256_set1_epi32
-      auto& vec_add_dpbusd_32 = m256_add_dpbusd_epi32;
-      auto& vec_add_dpbusd_32x4 = m256_add_dpbusd_epi32x4;
-      auto& vec_hadd = m256_hadd;
+      using acc_vec_t = __m256i;
+      using bias_vec_t = __m128i;
+      using weight_vec_t = __m256i;
+      using in_vec_t = __m256i;
+      #define vec_zero _mm256_setzero_si256()
+      #define vec_add_dpbusd_32x2 Simd::m256_add_dpbusd_epi32x2
+      #define vec_hadd Simd::m256_hadd
+      #define vec_haddx4 Simd::m256_haddx4
 #elif defined (USE_SSSE3)
-      using vec_t = __m128i;
-      #define vec_setzero _mm_setzero_si128
-      #define vec_set_32 _mm_set1_epi32
-      auto& vec_add_dpbusd_32 = m128_add_dpbusd_epi32;
-      auto& vec_add_dpbusd_32x4 = m128_add_dpbusd_epi32x4;
-      auto& vec_hadd = m128_hadd;
+      using acc_vec_t = __m128i;
+      using bias_vec_t = __m128i;
+      using weight_vec_t = __m128i;
+      using in_vec_t = __m128i;
+      #define vec_zero _mm_setzero_si128()
+      #define vec_add_dpbusd_32x2 Simd::m128_add_dpbusd_epi32x2
+      #define vec_hadd Simd::m128_hadd
+      #define vec_haddx4 Simd::m128_haddx4
+#elif defined (USE_NEON)
+      using acc_vec_t = int32x4_t;
+      using bias_vec_t = int32x4_t;
+      using weight_vec_t = int8x8_t;
+      using in_vec_t = int8x8_t;
+      #define vec_zero {0}
+      #define vec_add_dpbusd_32x2 Simd::neon_m128_add_dpbusd_epi32x2
+      #define vec_hadd Simd::neon_m128_hadd
+      #define vec_haddx4 Simd::neon_m128_haddx4
 #endif
 
-#if defined (USE_SSSE3)
-      // Different layout, we process 4 inputs at a time, always.
-      static_assert(InputDimensions % 4 == 0);
+#if defined (USE_SSSE3) || defined (USE_NEON)
+      const in_vec_t* invec = reinterpret_cast<const in_vec_t*>(input);
 
-      const auto output = reinterpret_cast<OutputType*>(buffer);
-      const auto inputVector = reinterpret_cast<const vec_t*>(input);
-
-      static_assert(OutputDimensions % OutputSimdWidth == 0 || OutputDimensions == 1);
-
-      // OutputDimensions is either 1 or a multiple of SimdWidth
-      // because then it is also an input dimension.
-      if constexpr (OutputDimensions % OutputSimdWidth == 0)
+      // Perform accumulation to registers for each big block
+      for (IndexType bigBlock = 0; bigBlock < NumBigBlocks; ++bigBlock)
       {
-          constexpr IndexType NumChunks = InputDimensions / 4;
+        acc_vec_t acc[NumOutputRegs] = { vec_zero };
 
-          const auto input32 = reinterpret_cast<const std::int32_t*>(input);
-          vec_t* outptr = reinterpret_cast<vec_t*>(output);
-          std::memcpy(output, biases, OutputDimensions * sizeof(OutputType));
+        // Each big block has NumOutputRegs small blocks in each "row", one per register.
+        // We process two small blocks at a time to save on one addition without VNNI.
+        for (IndexType smallBlock = 0; smallBlock < NumSmallBlocksPerOutput; smallBlock += 2)
+        {
+          const weight_vec_t* weightvec =
+            reinterpret_cast<const weight_vec_t*>(
+                weights
+              + bigBlock * BigBlockSize
+              + smallBlock * SmallBlockSize * NumOutputRegs);
 
-          for (int i = 0; i < (int)NumChunks - 3; i += 4)
+          const in_vec_t in0 = invec[smallBlock + 0];
+          const in_vec_t in1 = invec[smallBlock + 1];
+
+          for (IndexType k = 0; k < NumOutputRegs; ++k)
+            vec_add_dpbusd_32x2(acc[k], in0, weightvec[k], in1, weightvec[k + NumOutputRegs]);
+        }
+
+        // Horizontally add all accumulators.
+        if constexpr (NumOutputRegs % 4 == 0)
+        {
+          bias_vec_t* outputvec = reinterpret_cast<bias_vec_t*>(output);
+          const bias_vec_t* biasvec = reinterpret_cast<const bias_vec_t*>(biases);
+
+          for (IndexType k = 0; k < NumOutputRegs; k += 4)
           {
-              const vec_t in0 = vec_set_32(input32[i + 0]);
-              const vec_t in1 = vec_set_32(input32[i + 1]);
-              const vec_t in2 = vec_set_32(input32[i + 2]);
-              const vec_t in3 = vec_set_32(input32[i + 3]);
-              const auto col0 = reinterpret_cast<const vec_t*>(&weights[(i + 0) * OutputDimensions * 4]);
-              const auto col1 = reinterpret_cast<const vec_t*>(&weights[(i + 1) * OutputDimensions * 4]);
-              const auto col2 = reinterpret_cast<const vec_t*>(&weights[(i + 2) * OutputDimensions * 4]);
-              const auto col3 = reinterpret_cast<const vec_t*>(&weights[(i + 3) * OutputDimensions * 4]);
-              for (int j = 0; j * OutputSimdWidth < OutputDimensions; ++j)
-                  vec_add_dpbusd_32x4(outptr[j], in0, col0[j], in1, col1[j], in2, col2[j], in3, col3[j]);
+            const IndexType idx = (bigBlock * NumOutputRegs + k) / 4;
+            outputvec[idx] = vec_haddx4(acc[k+0], acc[k+1], acc[k+2], acc[k+3], biasvec[idx]);
           }
-      }
-      else if constexpr (OutputDimensions == 1)
-      {
-#if defined (USE_AVX512)
-          if constexpr (PaddedInputDimensions % (SimdWidth * 2) != 0)
+        }
+        else
+        {
+          for (IndexType k = 0; k < NumOutputRegs; ++k)
           {
-              constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth;
-              const auto inputVector256 = reinterpret_cast<const __m256i*>(input);
-
-              __m256i sum0 = _mm256_setzero_si256();
-              const auto row0 = reinterpret_cast<const __m256i*>(&weights[0]);
-
-              for (int j = 0; j < (int)NumChunks; ++j)
-              {
-                  const __m256i in = inputVector256[j];
-                  m256_add_dpbusd_epi32(sum0, in, row0[j]);
-              }
-              output[0] = m256_hadd(sum0, biases[0]);
-          }
-          else
-#endif
-          {
-#if defined (USE_AVX512)
-              constexpr IndexType NumChunks = PaddedInputDimensions / (SimdWidth * 2);
-#else
-              constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth;
-#endif
-              vec_t sum0 = vec_setzero();
-              const auto row0 = reinterpret_cast<const vec_t*>(&weights[0]);
-
-              for (int j = 0; j < (int)NumChunks; ++j)
-              {
-                  const vec_t in = inputVector[j];
-                  vec_add_dpbusd_32(sum0, in, row0[j]);
-              }
-              output[0] = vec_hadd(sum0, biases[0]);
+            const IndexType idx = (bigBlock * NumOutputRegs + k);
+            output[idx] = vec_hadd(acc[k], biases[idx]);
           }
+        }
       }
 
+# undef vec_zero
+# undef vec_add_dpbusd_32x2
+# undef vec_hadd
+# undef vec_haddx4
 #else
-
-// Use old implementation for the other architectures.
-
-      auto output = reinterpret_cast<OutputType*>(buffer);
-
-#if defined(USE_SSE2)
-      // At least a multiple of 16, with SSE2.
-      static_assert(InputDimensions % SimdWidth == 0);
-      constexpr IndexType NumChunks = InputDimensions / SimdWidth;
-      const __m128i Zeros = _mm_setzero_si128();
-      const auto inputVector = reinterpret_cast<const __m128i*>(input);
-
-#elif defined(USE_MMX)
-      static_assert(InputDimensions % SimdWidth == 0);
-      constexpr IndexType NumChunks = InputDimensions / SimdWidth;
-      const __m64 Zeros = _mm_setzero_si64();
-      const auto inputVector = reinterpret_cast<const __m64*>(input);
-
-#elif defined(USE_NEON)
-      static_assert(InputDimensions % SimdWidth == 0);
-      constexpr IndexType NumChunks = InputDimensions / SimdWidth;
-      const auto inputVector = reinterpret_cast<const int8x8_t*>(input);
-#endif
-
-      for (IndexType i = 0; i < OutputDimensions; ++i) {
-        const IndexType offset = i * PaddedInputDimensions;
-
-#if defined(USE_SSE2)
-        __m128i sumLo = _mm_cvtsi32_si128(biases[i]);
-        __m128i sumHi = Zeros;
-        const auto row = reinterpret_cast<const __m128i*>(&weights[offset]);
-        for (IndexType j = 0; j < NumChunks; ++j) {
-          __m128i row_j = _mm_load_si128(&row[j]);
-          __m128i input_j = _mm_load_si128(&inputVector[j]);
-          __m128i extendedRowLo = _mm_srai_epi16(_mm_unpacklo_epi8(row_j, row_j), 8);
-          __m128i extendedRowHi = _mm_srai_epi16(_mm_unpackhi_epi8(row_j, row_j), 8);
-          __m128i extendedInputLo = _mm_unpacklo_epi8(input_j, Zeros);
-          __m128i extendedInputHi = _mm_unpackhi_epi8(input_j, Zeros);
-          __m128i productLo = _mm_madd_epi16(extendedRowLo, extendedInputLo);
-          __m128i productHi = _mm_madd_epi16(extendedRowHi, extendedInputHi);
-          sumLo = _mm_add_epi32(sumLo, productLo);
-          sumHi = _mm_add_epi32(sumHi, productHi);
-        }
-        __m128i sum = _mm_add_epi32(sumLo, sumHi);
-        __m128i sumHigh_64 = _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2));
-        sum = _mm_add_epi32(sum, sumHigh_64);
-        __m128i sum_second_32 = _mm_shufflelo_epi16(sum, _MM_SHUFFLE(1, 0, 3, 2));
-        sum = _mm_add_epi32(sum, sum_second_32);
-        output[i] = _mm_cvtsi128_si32(sum);
-
-#elif defined(USE_MMX)
-        __m64 sumLo = _mm_cvtsi32_si64(biases[i]);
-        __m64 sumHi = Zeros;
-        const auto row = reinterpret_cast<const __m64*>(&weights[offset]);
-        for (IndexType j = 0; j < NumChunks; ++j) {
-          __m64 row_j = row[j];
-          __m64 input_j = inputVector[j];
-          __m64 extendedRowLo = _mm_srai_pi16(_mm_unpacklo_pi8(row_j, row_j), 8);
-          __m64 extendedRowHi = _mm_srai_pi16(_mm_unpackhi_pi8(row_j, row_j), 8);
-          __m64 extendedInputLo = _mm_unpacklo_pi8(input_j, Zeros);
-          __m64 extendedInputHi = _mm_unpackhi_pi8(input_j, Zeros);
-          __m64 productLo = _mm_madd_pi16(extendedRowLo, extendedInputLo);
-          __m64 productHi = _mm_madd_pi16(extendedRowHi, extendedInputHi);
-          sumLo = _mm_add_pi32(sumLo, productLo);
-          sumHi = _mm_add_pi32(sumHi, productHi);
-        }
-        __m64 sum = _mm_add_pi32(sumLo, sumHi);
-        sum = _mm_add_pi32(sum, _mm_unpackhi_pi32(sum, sum));
-        output[i] = _mm_cvtsi64_si32(sum);
-
-#elif defined(USE_NEON)
-        int32x4_t sum = {biases[i]};
-        const auto row = reinterpret_cast<const int8x8_t*>(&weights[offset]);
-        for (IndexType j = 0; j < NumChunks; ++j) {
-          int16x8_t product = vmull_s8(inputVector[j * 2], row[j * 2]);
-          product = vmlal_s8(product, inputVector[j * 2 + 1], row[j * 2 + 1]);
-          sum = vpadalq_s16(sum, product);
-        }
-        output[i] = sum[0] + sum[1] + sum[2] + sum[3];
-
-#else
-        OutputType sum = biases[i];
-        for (IndexType j = 0; j < InputDimensions; ++j) {
-          sum += weights[offset + j] * input[j];
-        }
-        output[i] = sum;
-#endif
-
-      }
-#if defined(USE_MMX)
-      _mm_empty();
-#endif
+      // Use old implementation for the other architectures.
+      affine_transform_non_ssse3<
+        InputDimensions,
+        PaddedInputDimensions,
+        OutputDimensions>(output, weights, biases, input);
 
 #endif
 
@@ -424,7 +364,171 @@ namespace Stockfish::Eval::NNUE::Layers {
     using BiasType = OutputType;
     using WeightType = std::int8_t;
 
-    PreviousLayer previousLayer;
+    alignas(CacheLineSize) BiasType biases[OutputDimensions];
+    alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions];
+  };
+
+  template <IndexType InDims, IndexType OutDims>
+  class AffineTransform<InDims, OutDims, std::enable_if_t<(ceil_to_multiple<IndexType>(InDims, MaxSimdWidth) < 2*64)>> {
+   public:
+    // Input/output type
+    // Input/output type
+    using InputType = std::uint8_t;
+    using OutputType = std::int32_t;
+
+    // Number of input/output dimensions
+    static constexpr IndexType InputDimensions = InDims;
+    static constexpr IndexType OutputDimensions = OutDims;
+
+    static constexpr IndexType PaddedInputDimensions =
+      ceil_to_multiple<IndexType>(InputDimensions, MaxSimdWidth);
+    static constexpr IndexType PaddedOutputDimensions =
+      ceil_to_multiple<IndexType>(OutputDimensions, MaxSimdWidth);
+
+    using OutputBuffer = OutputType[PaddedOutputDimensions];
+
+    static_assert(PaddedInputDimensions < 128, "Something went wrong. This specialization should not have been chosen.");
+
+#if defined (USE_SSSE3)
+    static constexpr const IndexType OutputSimdWidth = SimdWidth / 4;
+    static constexpr const IndexType InputSimdWidth = SimdWidth;
+#endif
+
+    // Hash value embedded in the evaluation file
+    static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) {
+      std::uint32_t hashValue = 0xCC03DAE4u;
+      hashValue += OutputDimensions;
+      hashValue ^= prevHash >> 1;
+      hashValue ^= prevHash << 31;
+      return hashValue;
+    }
+
+    static IndexType get_weight_index_scrambled(IndexType i)
+    {
+      return
+        (i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 +
+        i / PaddedInputDimensions * 4 +
+        i % 4;
+    }
+
+    static IndexType get_weight_index(IndexType i)
+    {
+#if defined (USE_SSSE3)
+      return get_weight_index_scrambled(i);
+#else
+      return i;
+#endif
+    }
+
+    // Read network parameters
+    bool read_parameters(std::istream& stream) {
+      for (IndexType i = 0; i < OutputDimensions; ++i)
+        biases[i] = read_little_endian<BiasType>(stream);
+      for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
+        weights[get_weight_index(i)] = read_little_endian<WeightType>(stream);
+
+      return !stream.fail();
+    }
+
+    // Write network parameters
+    bool write_parameters(std::ostream& stream) const {
+      for (IndexType i = 0; i < OutputDimensions; ++i)
+        write_little_endian<BiasType>(stream, biases[i]);
+
+      for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
+        write_little_endian<WeightType>(stream, weights[get_weight_index(i)]);
+
+      return !stream.fail();
+    }
+    // Forward propagation
+    const OutputType* propagate(
+        const InputType* input, OutputType* output) const {
+
+#if defined (USE_AVX2)
+      using vec_t = __m256i;
+      #define vec_setzero _mm256_setzero_si256
+      #define vec_set_32 _mm256_set1_epi32
+      #define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32
+      #define vec_add_dpbusd_32x2 Simd::m256_add_dpbusd_epi32x2
+      #define vec_add_dpbusd_32x4 Simd::m256_add_dpbusd_epi32x4
+      #define vec_hadd Simd::m256_hadd
+      #define vec_haddx4 Simd::m256_haddx4
+#elif defined (USE_SSSE3)
+      using vec_t = __m128i;
+      #define vec_setzero _mm_setzero_si128
+      #define vec_set_32 _mm_set1_epi32
+      #define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32
+      #define vec_add_dpbusd_32x2 Simd::m128_add_dpbusd_epi32x2
+      #define vec_add_dpbusd_32x4 Simd::m128_add_dpbusd_epi32x4
+      #define vec_hadd Simd::m128_hadd
+      #define vec_haddx4 Simd::m128_haddx4
+#endif
+
+#if defined (USE_SSSE3)
+      const auto inputVector = reinterpret_cast<const vec_t*>(input);
+
+      static_assert(OutputDimensions % OutputSimdWidth == 0 || OutputDimensions == 1);
+
+      if constexpr (OutputDimensions % OutputSimdWidth == 0)
+      {
+        constexpr IndexType NumChunks = ceil_to_multiple<IndexType>(InputDimensions, 8) / 4;
+        constexpr IndexType NumRegs = OutputDimensions / OutputSimdWidth;
+
+        const auto input32 = reinterpret_cast<const std::int32_t*>(input);
+        const vec_t* biasvec = reinterpret_cast<const vec_t*>(biases);
+        vec_t acc[NumRegs];
+        for (IndexType k = 0; k < NumRegs; ++k)
+          acc[k] = biasvec[k];
+
+        for (IndexType i = 0; i < NumChunks; i += 2)
+        {
+          const vec_t in0 = vec_set_32(input32[i + 0]);
+          const vec_t in1 = vec_set_32(input32[i + 1]);
+          const auto col0 = reinterpret_cast<const vec_t*>(&weights[(i + 0) * OutputDimensions * 4]);
+          const auto col1 = reinterpret_cast<const vec_t*>(&weights[(i + 1) * OutputDimensions * 4]);
+          for (IndexType k = 0; k < NumRegs; ++k)
+            vec_add_dpbusd_32x2(acc[k], in0, col0[k], in1, col1[k]);
+        }
+
+        vec_t* outptr = reinterpret_cast<vec_t*>(output);
+        for (IndexType k = 0; k < NumRegs; ++k)
+          outptr[k] = acc[k];
+      }
+      else if constexpr (OutputDimensions == 1)
+      {
+        constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth;
+        vec_t sum0 = vec_setzero();
+        const auto row0 = reinterpret_cast<const vec_t*>(&weights[0]);
+
+        for (int j = 0; j < (int)NumChunks; ++j)
+        {
+          const vec_t in = inputVector[j];
+          vec_add_dpbusd_32(sum0, in, row0[j]);
+        }
+        output[0] = vec_hadd(sum0, biases[0]);
+      }
+
+# undef vec_setzero
+# undef vec_set_32
+# undef vec_add_dpbusd_32
+# undef vec_add_dpbusd_32x2
+# undef vec_add_dpbusd_32x4
+# undef vec_hadd
+# undef vec_haddx4
+#else
+      // Use old implementation for the other architectures.
+      affine_transform_non_ssse3<
+        InputDimensions,
+        PaddedInputDimensions,
+        OutputDimensions>(output, weights, biases, input);
+#endif
+
+      return output;
+    }
+
+   private:
+    using BiasType = OutputType;
+    using WeightType = std::int8_t;
 
     alignas(CacheLineSize) BiasType biases[OutputDimensions];
     alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions];
diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/clipped_relu.h b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/clipped_relu.h
index 65455df..f94d308 100644
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/clipped_relu.h
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/clipped_relu.h
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -26,50 +26,41 @@
 namespace Stockfish::Eval::NNUE::Layers {
 
   // Clipped ReLU
-  template <typename PreviousLayer>
+  template <IndexType InDims>
   class ClippedReLU {
    public:
     // Input/output type
-    using InputType = typename PreviousLayer::OutputType;
+    using InputType = std::int32_t;
     using OutputType = std::uint8_t;
-    static_assert(std::is_same<InputType, std::int32_t>::value, "");
 
     // Number of input/output dimensions
-    static constexpr IndexType InputDimensions =
-        PreviousLayer::OutputDimensions;
+    static constexpr IndexType InputDimensions = InDims;
     static constexpr IndexType OutputDimensions = InputDimensions;
+    static constexpr IndexType PaddedOutputDimensions =
+        ceil_to_multiple<IndexType>(OutputDimensions, 32);
 
-    // Size of forward propagation buffer used in this layer
-    static constexpr std::size_t SelfBufferSize =
-        ceil_to_multiple(OutputDimensions * sizeof(OutputType), CacheLineSize);
-
-    // Size of the forward propagation buffer used from the input layer to this layer
-    static constexpr std::size_t BufferSize =
-        PreviousLayer::BufferSize + SelfBufferSize;
+    using OutputBuffer = OutputType[PaddedOutputDimensions];
 
     // Hash value embedded in the evaluation file
-    static constexpr std::uint32_t get_hash_value() {
+    static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) {
       std::uint32_t hashValue = 0x538D24C7u;
-      hashValue += PreviousLayer::get_hash_value();
+      hashValue += prevHash;
       return hashValue;
     }
 
     // Read network parameters
-    bool read_parameters(std::istream& stream) {
-      return previousLayer.read_parameters(stream);
+    bool read_parameters(std::istream&) {
+      return true;
     }
 
     // Write network parameters
-    bool write_parameters(std::ostream& stream) const {
-      return previousLayer.write_parameters(stream);
+    bool write_parameters(std::ostream&) const {
+      return true;
     }
 
     // Forward propagation
     const OutputType* propagate(
-        const TransformedFeatureType* transformedFeatures, char* buffer) const {
-      const auto input = previousLayer.propagate(
-          transformedFeatures, buffer + SelfBufferSize);
-      const auto output = reinterpret_cast<OutputType*>(buffer);
+        const InputType* input, OutputType* output) const {
 
   #if defined(USE_AVX2)
       if constexpr (InputDimensions % SimdWidth == 0) {
@@ -179,11 +170,9 @@ namespace Stockfish::Eval::NNUE::Layers {
         output[i] = static_cast<OutputType>(
             std::max(0, std::min(127, input[i] >> WeightScaleBits)));
       }
+
       return output;
     }
-
-   private:
-    PreviousLayer previousLayer;
   };
 
 }  // namespace Stockfish::Eval::NNUE::Layers
diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/input_slice.h b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/input_slice.h
deleted file mode 100644
index b6bf172..0000000
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/input_slice.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-// NNUE evaluation function layer InputSlice definition
-
-#ifndef NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
-#define NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
-
-#include "../nnue_common.h"
-
-namespace Stockfish::Eval::NNUE::Layers {
-
-// Input layer
-template <IndexType OutDims, IndexType Offset = 0>
-class InputSlice {
- public:
-  // Need to maintain alignment
-  static_assert(Offset % MaxSimdWidth == 0, "");
-
-  // Output type
-  using OutputType = TransformedFeatureType;
-
-  // Output dimensionality
-  static constexpr IndexType OutputDimensions = OutDims;
-
-  // Size of forward propagation buffer used from the input layer to this layer
-  static constexpr std::size_t BufferSize = 0;
-
-  // Hash value embedded in the evaluation file
-  static constexpr std::uint32_t get_hash_value() {
-    std::uint32_t hashValue = 0xEC42E90Du;
-    hashValue ^= OutputDimensions ^ (Offset << 10);
-    return hashValue;
-  }
-
-  // Read network parameters
-  bool read_parameters(std::istream& /*stream*/) {
-    return true;
-  }
-
-  // Write network parameters
-  bool write_parameters(std::ostream& /*stream*/) const {
-    return true;
-  }
-
-  // Forward propagation
-  const OutputType* propagate(
-      const TransformedFeatureType* transformedFeatures,
-      char* /*buffer*/) const {
-    return transformedFeatures + Offset;
-  }
-
- private:
-};
-
-}  // namespace Stockfish::Eval::NNUE::Layers
-
-#endif // #ifndef NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_accumulator.h b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_accumulator.h
index d41ecf9..600483b 100644
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_accumulator.h
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_accumulator.h
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_architecture.h b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_architecture.h
index 879a39c..4f9596a 100644
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_architecture.h
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_architecture.h
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -21,39 +21,112 @@
 #ifndef NNUE_ARCHITECTURE_H_INCLUDED
 #define NNUE_ARCHITECTURE_H_INCLUDED
 
+#include <memory>
+
 #include "nnue_common.h"
 
-#include "features/half_ka_v2.h"
+#include "features/half_ka_v2_hm.h"
 
-#include "layers/input_slice.h"
 #include "layers/affine_transform.h"
 #include "layers/clipped_relu.h"
 
+#include "../misc.h"
+
 namespace Stockfish::Eval::NNUE {
 
-  // Input features used in evaluation function
-  using FeatureSet = Features::HalfKAv2;
+// Input features used in evaluation function
+using FeatureSet = Features::HalfKAv2_hm;
 
-  // Number of input feature dimensions after conversion
-  constexpr IndexType TransformedFeatureDimensions = 512;
-  constexpr IndexType PSQTBuckets = 8;
-  constexpr IndexType LayerStacks = 8;
+// Number of input feature dimensions after conversion
+constexpr IndexType TransformedFeatureDimensions = 1024;
+constexpr IndexType PSQTBuckets = 8;
+constexpr IndexType LayerStacks = 8;
 
-  namespace Layers {
+struct Network
+{
+  static constexpr int FC_0_OUTPUTS = 15;
+  static constexpr int FC_1_OUTPUTS = 32;
 
-    // Define network structure
-    using InputLayer = InputSlice<TransformedFeatureDimensions * 2>;
-    using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 16>>;
-    using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
-    using OutputLayer = AffineTransform<HiddenLayer2, 1>;
+  Layers::AffineTransform<TransformedFeatureDimensions, FC_0_OUTPUTS + 1> fc_0;
+  Layers::ClippedReLU<FC_0_OUTPUTS + 1> ac_0;
+  Layers::AffineTransform<FC_0_OUTPUTS, FC_1_OUTPUTS> fc_1;
+  Layers::ClippedReLU<FC_1_OUTPUTS> ac_1;
+  Layers::AffineTransform<FC_1_OUTPUTS, 1> fc_2;
 
-  }  // namespace Layers
+  // Hash value embedded in the evaluation file
+  static constexpr std::uint32_t get_hash_value() {
+    // input slice hash
+    std::uint32_t hashValue = 0xEC42E90Du;
+    hashValue ^= TransformedFeatureDimensions * 2;
 
-  using Network = Layers::OutputLayer;
+    hashValue = decltype(fc_0)::get_hash_value(hashValue);
+    hashValue = decltype(ac_0)::get_hash_value(hashValue);
+    hashValue = decltype(fc_1)::get_hash_value(hashValue);
+    hashValue = decltype(ac_1)::get_hash_value(hashValue);
+    hashValue = decltype(fc_2)::get_hash_value(hashValue);
 
-  static_assert(TransformedFeatureDimensions % MaxSimdWidth == 0, "");
-  static_assert(Network::OutputDimensions == 1, "");
-  static_assert(std::is_same<Network::OutputType, std::int32_t>::value, "");
+    return hashValue;
+  }
+
+  // Read network parameters
+  bool read_parameters(std::istream& stream) {
+    if (!fc_0.read_parameters(stream)) return false;
+    if (!ac_0.read_parameters(stream)) return false;
+    if (!fc_1.read_parameters(stream)) return false;
+    if (!ac_1.read_parameters(stream)) return false;
+    if (!fc_2.read_parameters(stream)) return false;
+    return true;
+  }
+
+  // Read network parameters
+  bool write_parameters(std::ostream& stream) const {
+    if (!fc_0.write_parameters(stream)) return false;
+    if (!ac_0.write_parameters(stream)) return false;
+    if (!fc_1.write_parameters(stream)) return false;
+    if (!ac_1.write_parameters(stream)) return false;
+    if (!fc_2.write_parameters(stream)) return false;
+    return true;
+  }
+
+  std::int32_t propagate(const TransformedFeatureType* transformedFeatures)
+  {
+    struct alignas(CacheLineSize) Buffer
+    {
+      alignas(CacheLineSize) decltype(fc_0)::OutputBuffer fc_0_out;
+      alignas(CacheLineSize) decltype(ac_0)::OutputBuffer ac_0_out;
+      alignas(CacheLineSize) decltype(fc_1)::OutputBuffer fc_1_out;
+      alignas(CacheLineSize) decltype(ac_1)::OutputBuffer ac_1_out;
+      alignas(CacheLineSize) decltype(fc_2)::OutputBuffer fc_2_out;
+
+      Buffer()
+      {
+          std::memset(this, 0, sizeof(*this));
+      }
+    };
+
+#if defined(__clang__) && (__APPLE__)
+    // workaround for a bug reported with xcode 12
+    static thread_local auto tlsBuffer = std::make_unique<Buffer>();
+    // Access TLS only once, cache result.
+    Buffer& buffer = *tlsBuffer;
+#else
+    alignas(CacheLineSize) static thread_local Buffer buffer;
+#endif
+
+    fc_0.propagate(transformedFeatures, buffer.fc_0_out);
+    ac_0.propagate(buffer.fc_0_out, buffer.ac_0_out);
+    fc_1.propagate(buffer.ac_0_out, buffer.fc_1_out);
+    ac_1.propagate(buffer.fc_1_out, buffer.ac_1_out);
+    fc_2.propagate(buffer.ac_1_out, buffer.fc_2_out);
+
+    // buffer.fc_0_out[FC_0_OUTPUTS] is such that 1.0 is equal to 127*(1<<WeightScaleBits) in quantized form
+    // but we want 1.0 to be equal to 600*OutputScale
+    std::int32_t fwdOut = int(buffer.fc_0_out[FC_0_OUTPUTS]) * (600*OutputScale) / (127*(1<<WeightScaleBits));
+    std::int32_t outputValue = buffer.fc_2_out[0] + fwdOut;
+
+    return outputValue;
+  }
+};
 
 }  // namespace Stockfish::Eval::NNUE
 
diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_common.h b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_common.h
index 75ac786..1795618 100644
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_common.h
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_common.h
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -109,7 +109,7 @@ namespace Stockfish::Eval::NNUE {
 
   // write_little_endian() is our utility to write an integer (signed or unsigned, any size)
   // to a stream in little-endian order. We swap the byte order before the write if
-  // necessary to always write in little endian order, independantly of the byte
+  // necessary to always write in little endian order, independently of the byte
   // ordering of the compiling machine.
   template <typename IntType>
   inline void write_little_endian(std::ostream& stream, IntType value) {
@@ -127,11 +127,11 @@ namespace Stockfish::Eval::NNUE {
           {
             for (; i + 1 < sizeof(IntType); ++i)
             {
-                u[i] = v;
+                u[i] = (std::uint8_t)v;
                 v >>= 8;
             }
           }
-          u[i] = v;
+          u[i] = (std::uint8_t)v;
 
           stream.write(reinterpret_cast<char*>(u), sizeof(IntType));
       }
diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_feature_transformer.h b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_feature_transformer.h
index 59a965a..c969ac6 100644
--- a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_feature_transformer.h
+++ b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_feature_transformer.h
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -47,12 +47,22 @@ namespace Stockfish::Eval::NNUE {
   #define vec_store(a,b) _mm512_store_si512(a,b)
   #define vec_add_16(a,b) _mm512_add_epi16(a,b)
   #define vec_sub_16(a,b) _mm512_sub_epi16(a,b)
+  #define vec_mul_16(a,b) _mm512_mullo_epi16(a,b)
+  #define vec_zero() _mm512_setzero_epi32()
+  #define vec_set_16(a) _mm512_set1_epi16(a)
+  #define vec_max_16(a,b) _mm512_max_epi16(a,b)
+  #define vec_min_16(a,b) _mm512_min_epi16(a,b)
+  inline vec_t vec_msb_pack_16(vec_t a, vec_t b){
+    vec_t compacted = _mm512_packs_epi16(_mm512_srli_epi16(a,7),_mm512_srli_epi16(b,7));
+    return _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 2, 4, 6, 1, 3, 5, 7), compacted);
+  }
   #define vec_load_psqt(a) _mm256_load_si256(a)
   #define vec_store_psqt(a,b) _mm256_store_si256(a,b)
   #define vec_add_psqt_32(a,b) _mm256_add_epi32(a,b)
   #define vec_sub_psqt_32(a,b) _mm256_sub_epi32(a,b)
   #define vec_zero_psqt() _mm256_setzero_si256()
   #define NumRegistersSIMD 32
+  #define MaxChunkSize 64
 
   #elif USE_AVX2
   typedef __m256i vec_t;
@@ -61,12 +71,22 @@ namespace Stockfish::Eval::NNUE {
   #define vec_store(a,b) _mm256_store_si256(a,b)
   #define vec_add_16(a,b) _mm256_add_epi16(a,b)
   #define vec_sub_16(a,b) _mm256_sub_epi16(a,b)
+  #define vec_mul_16(a,b) _mm256_mullo_epi16(a,b)
+  #define vec_zero() _mm256_setzero_si256()
+  #define vec_set_16(a) _mm256_set1_epi16(a)
+  #define vec_max_16(a,b) _mm256_max_epi16(a,b)
+  #define vec_min_16(a,b) _mm256_min_epi16(a,b)
+  inline vec_t vec_msb_pack_16(vec_t a, vec_t b){
+    vec_t compacted = _mm256_packs_epi16(_mm256_srli_epi16(a,7), _mm256_srli_epi16(b,7));
+    return _mm256_permute4x64_epi64(compacted, 0b11011000);
+  }
   #define vec_load_psqt(a) _mm256_load_si256(a)
   #define vec_store_psqt(a,b) _mm256_store_si256(a,b)
   #define vec_add_psqt_32(a,b) _mm256_add_epi32(a,b)
   #define vec_sub_psqt_32(a,b) _mm256_sub_epi32(a,b)
   #define vec_zero_psqt() _mm256_setzero_si256()
   #define NumRegistersSIMD 16
+  #define MaxChunkSize 32
 
   #elif USE_SSE2
   typedef __m128i vec_t;
@@ -75,12 +95,19 @@ namespace Stockfish::Eval::NNUE {
   #define vec_store(a,b) *(a)=(b)
   #define vec_add_16(a,b) _mm_add_epi16(a,b)
   #define vec_sub_16(a,b) _mm_sub_epi16(a,b)
+  #define vec_mul_16(a,b) _mm_mullo_epi16(a,b)
+  #define vec_zero() _mm_setzero_si128()
+  #define vec_set_16(a) _mm_set1_epi16(a)
+  #define vec_max_16(a,b) _mm_max_epi16(a,b)
+  #define vec_min_16(a,b) _mm_min_epi16(a,b)
+  #define vec_msb_pack_16(a,b) _mm_packs_epi16(_mm_srli_epi16(a,7),_mm_srli_epi16(b,7))
   #define vec_load_psqt(a) (*(a))
   #define vec_store_psqt(a,b) *(a)=(b)
   #define vec_add_psqt_32(a,b) _mm_add_epi32(a,b)
   #define vec_sub_psqt_32(a,b) _mm_sub_epi32(a,b)
   #define vec_zero_psqt() _mm_setzero_si128()
   #define NumRegistersSIMD (Is64Bit ? 16 : 8)
+  #define MaxChunkSize 16
 
   #elif USE_MMX
   typedef __m64 vec_t;
@@ -89,12 +116,26 @@ namespace Stockfish::Eval::NNUE {
   #define vec_store(a,b) *(a)=(b)
   #define vec_add_16(a,b) _mm_add_pi16(a,b)
   #define vec_sub_16(a,b) _mm_sub_pi16(a,b)
+  #define vec_mul_16(a,b) _mm_mullo_pi16(a,b)
+  #define vec_zero() _mm_setzero_si64()
+  #define vec_set_16(a) _mm_set1_pi16(a)
+  inline vec_t vec_max_16(vec_t a,vec_t b){
+      vec_t comparison = _mm_cmpgt_pi16(a,b);
+      return _mm_or_si64(_mm_and_si64(comparison, a), _mm_andnot_si64(comparison, b));
+  }
+  inline vec_t vec_min_16(vec_t a,vec_t b){
+      vec_t comparison = _mm_cmpgt_pi16(a,b);
+      return _mm_or_si64(_mm_and_si64(comparison, b), _mm_andnot_si64(comparison, a));
+  }
+  #define vec_msb_pack_16(a,b) _mm_packs_pi16(_mm_srli_pi16(a,7),_mm_srli_pi16(b,7))
   #define vec_load_psqt(a) (*(a))
   #define vec_store_psqt(a,b) *(a)=(b)
   #define vec_add_psqt_32(a,b) _mm_add_pi32(a,b)
   #define vec_sub_psqt_32(a,b) _mm_sub_pi32(a,b)
   #define vec_zero_psqt() _mm_setzero_si64()
+  #define vec_cleanup() _mm_empty()
   #define NumRegistersSIMD 8
+  #define MaxChunkSize 8
 
   #elif USE_NEON
   typedef int16x8_t vec_t;
@@ -103,12 +144,24 @@ namespace Stockfish::Eval::NNUE {
   #define vec_store(a,b) *(a)=(b)
   #define vec_add_16(a,b) vaddq_s16(a,b)
   #define vec_sub_16(a,b) vsubq_s16(a,b)
+  #define vec_mul_16(a,b) vmulq_s16(a,b)
+  #define vec_zero() vec_t{0}
+  #define vec_set_16(a) vdupq_n_s16(a)
+  #define vec_max_16(a,b) vmaxq_s16(a,b)
+  #define vec_min_16(a,b) vminq_s16(a,b)
+  inline vec_t vec_msb_pack_16(vec_t a, vec_t b){
+        const int8x8_t shifta = vshrn_n_s16(a, 7);
+        const int8x8_t shiftb = vshrn_n_s16(b, 7);
+	const int8x16_t compacted = vcombine_s8(shifta,shiftb);
+	return *reinterpret_cast<const vec_t*> (&compacted);
+  }
   #define vec_load_psqt(a) (*(a))
   #define vec_store_psqt(a,b) *(a)=(b)
   #define vec_add_psqt_32(a,b) vaddq_s32(a,b)
   #define vec_sub_psqt_32(a,b) vsubq_s32(a,b)
   #define vec_zero_psqt() psqt_vec_t{0}
   #define NumRegistersSIMD 16
+  #define MaxChunkSize 16
 
   #else
   #undef VECTOR
@@ -123,8 +176,10 @@ namespace Stockfish::Eval::NNUE {
       // We use __m* types as template arguments, which causes GCC to emit warnings
       // about losing some attribute information. This is irrelevant to us as we
       // only take their size, so the following pragma are harmless.
+      #if defined(__GNUC__)
       #pragma GCC diagnostic push
       #pragma GCC diagnostic ignored "-Wignored-attributes"
+      #endif
 
       template <typename SIMDRegisterType,
                 typename LaneType,
@@ -156,9 +211,9 @@ namespace Stockfish::Eval::NNUE {
 
       static constexpr int NumRegs     = BestRegisterCount<vec_t, WeightType, TransformedFeatureDimensions, NumRegistersSIMD>();
       static constexpr int NumPsqtRegs = BestRegisterCount<psqt_vec_t, PSQTWeightType, PSQTBuckets, NumRegistersSIMD>();
-
+      #if defined(__GNUC__)
       #pragma GCC diagnostic pop
-
+      #endif
   #endif
 
 
@@ -183,7 +238,7 @@ namespace Stockfish::Eval::NNUE {
 
     // Number of input/output dimensions
     static constexpr IndexType InputDimensions = FeatureSet::Dimensions;
-    static constexpr IndexType OutputDimensions = HalfDimensions * 2;
+    static constexpr IndexType OutputDimensions = HalfDimensions;
 
     // Size of forward propagation buffer
     static constexpr std::size_t BufferSize =
@@ -191,7 +246,7 @@ namespace Stockfish::Eval::NNUE {
 
     // Hash value embedded in the evaluation file
     static constexpr std::uint32_t get_hash_value() {
-      return FeatureSet::HashValue ^ OutputDimensions;
+      return FeatureSet::HashValue ^ (OutputDimensions * 2);
     }
 
     // Read network parameters
@@ -229,136 +284,55 @@ namespace Stockfish::Eval::NNUE {
         ) / 2;
 
 
-  #if defined(USE_AVX512)
-
-      constexpr IndexType NumChunks = HalfDimensions / (SimdWidth * 2);
-      static_assert(HalfDimensions % (SimdWidth * 2) == 0);
-      const __m512i Control = _mm512_setr_epi64(0, 2, 4, 6, 1, 3, 5, 7);
-      const __m512i Zero = _mm512_setzero_si512();
-
       for (IndexType p = 0; p < 2; ++p)
       {
-          const IndexType offset = HalfDimensions * p;
-          auto out = reinterpret_cast<__m512i*>(&output[offset]);
-          for (IndexType j = 0; j < NumChunks; ++j)
+          const IndexType offset = (HalfDimensions / 2) * p;
+
+#if defined(VECTOR)
+
+	  constexpr IndexType OutputChunkSize = MaxChunkSize;
+          static_assert((HalfDimensions / 2) % OutputChunkSize == 0);
+          constexpr IndexType NumOutputChunks = HalfDimensions / 2 / OutputChunkSize;
+
+          vec_t Zero = vec_zero();
+          vec_t One = vec_set_16(127);
+
+          const vec_t* in0 = reinterpret_cast<const vec_t*>(&(accumulation[perspectives[p]][0]));
+          const vec_t* in1 = reinterpret_cast<const vec_t*>(&(accumulation[perspectives[p]][HalfDimensions / 2]));
+                vec_t* out = reinterpret_cast<      vec_t*>(output + offset);
+
+          for (IndexType j = 0; j < NumOutputChunks; j += 1)
           {
-              __m512i sum0 = _mm512_load_si512(&reinterpret_cast<const __m512i*>
-                                              (accumulation[perspectives[p]])[j * 2 + 0]);
-              __m512i sum1 = _mm512_load_si512(&reinterpret_cast<const __m512i*>
-                                              (accumulation[perspectives[p]])[j * 2 + 1]);
+              const vec_t sum0a = vec_max_16(vec_min_16(in0[j * 2 + 0], One), Zero);
+              const vec_t sum0b = vec_max_16(vec_min_16(in0[j * 2 + 1], One), Zero);
+              const vec_t sum1a = vec_max_16(vec_min_16(in1[j * 2 + 0], One), Zero);
+              const vec_t sum1b = vec_max_16(vec_min_16(in1[j * 2 + 1], One), Zero);
 
-              _mm512_store_si512(&out[j], _mm512_permutexvar_epi64(Control,
-                                 _mm512_max_epi8(_mm512_packs_epi16(sum0, sum1), Zero)));
+              const vec_t pa = vec_mul_16(sum0a, sum1a);
+              const vec_t pb = vec_mul_16(sum0b, sum1b);
+
+              out[j] = vec_msb_pack_16(pa, pb);
           }
-      }
-      return psqt;
 
-  #elif defined(USE_AVX2)
+#else
 
-      constexpr IndexType NumChunks = HalfDimensions / SimdWidth;
-      constexpr int Control = 0b11011000;
-      const __m256i Zero = _mm256_setzero_si256();
-
-      for (IndexType p = 0; p < 2; ++p)
-      {
-          const IndexType offset = HalfDimensions * p;
-          auto out = reinterpret_cast<__m256i*>(&output[offset]);
-          for (IndexType j = 0; j < NumChunks; ++j)
-          {
-              __m256i sum0 = _mm256_load_si256(&reinterpret_cast<const __m256i*>
-                                              (accumulation[perspectives[p]])[j * 2 + 0]);
-              __m256i sum1 = _mm256_load_si256(&reinterpret_cast<const __m256i*>
-                                              (accumulation[perspectives[p]])[j * 2 + 1]);
-
-              _mm256_store_si256(&out[j], _mm256_permute4x64_epi64(
-                                 _mm256_max_epi8(_mm256_packs_epi16(sum0, sum1), Zero), Control));
+          for (IndexType j = 0; j < HalfDimensions / 2; ++j) {
+              BiasType sum0 = accumulation[static_cast<int>(perspectives[p])][j + 0];
+              BiasType sum1 = accumulation[static_cast<int>(perspectives[p])][j + HalfDimensions / 2];
+              sum0 = std::max<int>(0, std::min<int>(127, sum0));
+              sum1 = std::max<int>(0, std::min<int>(127, sum1));
+              output[offset + j] = static_cast<OutputType>(sum0 * sum1 / 128);
           }
+
+#endif
       }
+
+#if defined(vec_cleanup)
+      vec_cleanup();
+#endif
+
       return psqt;
 
-  #elif defined(USE_SSE2)
-
-      #ifdef USE_SSE41
-      constexpr IndexType NumChunks = HalfDimensions / SimdWidth;
-      const __m128i Zero = _mm_setzero_si128();
-      #else
-      constexpr IndexType NumChunks = HalfDimensions / SimdWidth;
-      const __m128i k0x80s = _mm_set1_epi8(-128);
-      #endif
-
-      for (IndexType p = 0; p < 2; ++p)
-      {
-          const IndexType offset = HalfDimensions * p;
-          auto out = reinterpret_cast<__m128i*>(&output[offset]);
-          for (IndexType j = 0; j < NumChunks; ++j)
-          {
-              __m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>
-                                           (accumulation[perspectives[p]])[j * 2 + 0]);
-              __m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>
-                                           (accumulation[perspectives[p]])[j * 2 + 1]);
-              const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);
-
-              #ifdef USE_SSE41
-              _mm_store_si128(&out[j], _mm_max_epi8(packedbytes, Zero));
-              #else
-              _mm_store_si128(&out[j], _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s));
-              #endif
-          }
-      }
-      return psqt;
-
-  #elif defined(USE_MMX)
-
-      constexpr IndexType NumChunks = HalfDimensions / SimdWidth;
-      const __m64 k0x80s = _mm_set1_pi8(-128);
-
-      for (IndexType p = 0; p < 2; ++p)
-      {
-          const IndexType offset = HalfDimensions * p;
-          auto out = reinterpret_cast<__m64*>(&output[offset]);
-          for (IndexType j = 0; j < NumChunks; ++j)
-          {
-              __m64 sum0 = *(&reinterpret_cast<const __m64*>(accumulation[perspectives[p]])[j * 2 + 0]);
-              __m64 sum1 = *(&reinterpret_cast<const __m64*>(accumulation[perspectives[p]])[j * 2 + 1]);
-              const __m64 packedbytes = _mm_packs_pi16(sum0, sum1);
-              out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s);
-          }
-      }
-      _mm_empty();
-      return psqt;
-
-  #elif defined(USE_NEON)
-
-      constexpr IndexType NumChunks = HalfDimensions / (SimdWidth / 2);
-      const int8x8_t Zero = {0};
-
-      for (IndexType p = 0; p < 2; ++p)
-      {
-          const IndexType offset = HalfDimensions * p;
-          const auto out = reinterpret_cast<int8x8_t*>(&output[offset]);
-          for (IndexType j = 0; j < NumChunks; ++j)
-          {
-              int16x8_t sum = reinterpret_cast<const int16x8_t*>(accumulation[perspectives[p]])[j];
-              out[j] = vmax_s8(vqmovn_s16(sum), Zero);
-          }
-      }
-      return psqt;
-
-  #else
-
-      for (IndexType p = 0; p < 2; ++p)
-      {
-          const IndexType offset = HalfDimensions * p;
-          for (IndexType j = 0; j < HalfDimensions; ++j)
-          {
-              BiasType sum = accumulation[perspectives[p]][j];
-              output[offset + j] = static_cast<OutputType>(std::max<int>(0, std::min<int>(127, sum)));
-          }
-      }
-      return psqt;
-
-  #endif
-
    } // end of function transform()
 
 
@@ -370,7 +344,6 @@ namespace Stockfish::Eval::NNUE {
       // That might depend on the feature set and generally relies on the
       // feature set's update cost calculation to be correct and never
       // allow updates with more added/removed features than MaxActiveDimensions.
-      using IndexList = ValueList<IndexType, FeatureSet::MaxActiveDimensions>;
 
   #ifdef VECTOR
       // Gcc-10.2 unnecessarily spills AVX2 registers if this array
@@ -404,12 +377,12 @@ namespace Stockfish::Eval::NNUE {
 
         // Gather all features to be updated.
         const Square ksq = pos.square<KING>(perspective);
-        IndexList removed[2], added[2];
+        FeatureSet::IndexList removed[2], added[2];
         FeatureSet::append_changed_indices(
-          ksq, next, perspective, removed[0], added[0]);
+          ksq, next->dirtyPiece, perspective, removed[0], added[0]);
         for (StateInfo *st2 = pos.state(); st2 != next; st2 = st2->previous)
           FeatureSet::append_changed_indices(
-            ksq, st2, perspective, removed[1], added[1]);
+            ksq, st2->dirtyPiece, perspective, removed[1], added[1]);
 
         // Mark the accumulators as computed.
         next->accumulator.computed[perspective] = true;
@@ -534,7 +507,7 @@ namespace Stockfish::Eval::NNUE {
         // Refresh the accumulator
         auto& accumulator = pos.state()->accumulator;
         accumulator.computed[perspective] = true;
-        IndexList active;
+        FeatureSet::IndexList active;
         FeatureSet::append_active_indices(pos, perspective, active);
 
   #ifdef VECTOR
diff --git a/DroidFishApp/src/main/cpp/stockfish/pawns.cpp b/DroidFishApp/src/main/cpp/stockfish/pawns.cpp
index 70fb6f2..fdcfa02 100644
--- a/DroidFishApp/src/main/cpp/stockfish/pawns.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/pawns.cpp
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -32,30 +32,30 @@ namespace {
   #define S(mg, eg) make_score(mg, eg)
 
   // Pawn penalties
-  constexpr Score Backward      = S( 9, 22);
-  constexpr Score Doubled       = S(13, 51);
-  constexpr Score DoubledEarly  = S(20,  7);
-  constexpr Score Isolated      = S( 3, 15);
-  constexpr Score WeakLever     = S( 4, 58);
-  constexpr Score WeakUnopposed = S(13, 24);
+  constexpr Score Backward      = S( 6, 19);
+  constexpr Score Doubled       = S(11, 51);
+  constexpr Score DoubledEarly  = S(17,  7);
+  constexpr Score Isolated      = S( 1, 20);
+  constexpr Score WeakLever     = S( 2, 57);
+  constexpr Score WeakUnopposed = S(15, 18);
 
   // Bonus for blocked pawns at 5th or 6th rank
-  constexpr Score BlockedPawn[2] = { S(-17, -6), S(-9, 2) };
+  constexpr Score BlockedPawn[2] = { S(-19, -8), S(-7, 3) };
 
   constexpr Score BlockedStorm[RANK_NB] = {
-    S(0, 0), S(0, 0), S(75, 78), S(-8, 16), S(-6, 10), S(-6, 6), S(0, 2)
+    S(0, 0), S(0, 0), S(64, 75), S(-3, 14), S(-12, 19), S(-7, 4), S(-10, 5)
   };
 
   // Connected pawn bonus
-  constexpr int Connected[RANK_NB] = { 0, 5, 7, 11, 23, 48, 87 };
+  constexpr int Connected[RANK_NB] = { 0, 3, 7, 7, 15, 54, 86 };
 
   // Strength of pawn shelter for our king by [distance from edge][rank].
   // RANK_1 = 0 is used for files where we have no pawn, or pawn is behind our king.
   constexpr Value ShelterStrength[int(FILE_NB) / 2][RANK_NB] = {
-    { V( -5), V( 82), V( 92), V( 54), V( 36), V( 22), V(  28) },
-    { V(-44), V( 63), V( 33), V(-50), V(-30), V(-12), V( -62) },
-    { V(-11), V( 77), V( 22), V( -6), V( 31), V(  8), V( -45) },
-    { V(-39), V(-12), V(-29), V(-50), V(-43), V(-68), V(-164) }
+    { V(-2), V(85), V(95), V(53), V(39), V(23), V(25) },
+    { V(-55), V(64), V(32), V(-55), V(-30), V(-11), V(-61) },
+    { V(-11), V(75), V(19), V(-6), V(26), V(9), V(-47) },
+    { V(-41), V(-11), V(-27), V(-58), V(-42), V(-66), V(-163) }
   };
 
   // Danger of enemy pawns moving toward our king by [distance from edge][rank].
@@ -63,17 +63,17 @@ namespace {
   // is behind our king. Note that UnblockedStorm[0][1-2] accommodate opponent pawn
   // on edge, likely blocked by our king.
   constexpr Value UnblockedStorm[int(FILE_NB) / 2][RANK_NB] = {
-    { V( 87), V(-288), V(-168), V( 96), V( 47), V( 44), V( 46) },
-    { V( 42), V( -25), V( 120), V( 45), V( 34), V( -9), V( 24) },
-    { V( -8), V(  51), V( 167), V( 35), V( -4), V(-16), V(-12) },
-    { V(-17), V( -13), V( 100), V(  4), V(  9), V(-16), V(-31) }
+    { V(94), V(-280), V(-170), V(90), V(59), V(47), V(53) },
+    { V(43), V(-17), V(128), V(39), V(26), V(-17), V(15) },
+    { V(-9), V(62), V(170), V(34), V(-5), V(-20), V(-11) },
+    { V(-27), V(-19), V(106), V(10), V(2), V(-13), V(-24) }
   };
 
 
   // KingOnFile[semi-open Us][semi-open Them] contains bonuses/penalties
   // for king when the king is on a semi-open or open file.
-  constexpr Score KingOnFile[2][2] = {{ S(-21,10), S(-7, 1)  },
-                                     {  S(  0,-3), S( 9,-4) }};
+  constexpr Score KingOnFile[2][2] = {{ S(-18,11), S(-6,-3)  },
+                                     {  S(  0, 0), S( 5,-4) }};
 
   #undef S
   #undef V
diff --git a/DroidFishApp/src/main/cpp/stockfish/pawns.h b/DroidFishApp/src/main/cpp/stockfish/pawns.h
index 124619d..af0370f 100644
--- a/DroidFishApp/src/main/cpp/stockfish/pawns.h
+++ b/DroidFishApp/src/main/cpp/stockfish/pawns.h
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/DroidFishApp/src/main/cpp/stockfish/position.cpp b/DroidFishApp/src/main/cpp/stockfish/position.cpp
index ba015d3..ec9229e 100644
--- a/DroidFishApp/src/main/cpp/stockfish/position.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/position.cpp
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -1013,9 +1013,9 @@ void Position::do_null_move(StateInfo& newSt) {
   }
 
   st->key ^= Zobrist::side;
+  ++st->rule50;
   prefetch(TT.first_entry(key()));
 
-  ++st->rule50;
   st->pliesFromNull = 0;
 
   sideToMove = ~sideToMove;
@@ -1080,8 +1080,9 @@ bool Position::see_ge(Move m, Value threshold) const {
   if (swap <= 0)
       return true;
 
+  assert(color_of(piece_on(from)) == sideToMove);
   Bitboard occupied = pieces() ^ from ^ to;
-  Color stm = color_of(piece_on(from));
+  Color stm = sideToMove;
   Bitboard attackers = attackers_to(to, occupied);
   Bitboard stmAttackers, bb;
   int res = 1;
diff --git a/DroidFishApp/src/main/cpp/stockfish/position.h b/DroidFishApp/src/main/cpp/stockfish/position.h
index 9f694a7..e558581 100644
--- a/DroidFishApp/src/main/cpp/stockfish/position.h
+++ b/DroidFishApp/src/main/cpp/stockfish/position.h
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -120,12 +120,12 @@ public:
   Bitboard attackers_to(Square s) const;
   Bitboard attackers_to(Square s, Bitboard occupied) const;
   Bitboard slider_blockers(Bitboard sliders, Square s, Bitboard& pinners) const;
+  template<PieceType Pt> Bitboard attacks_by(Color c) const;
 
   // Properties of moves
   bool legal(Move m) const;
   bool pseudo_legal(const Move m) const;
   bool capture(Move m) const;
-  bool capture_or_promotion(Move m) const;
   bool gives_check(Move m) const;
   Piece moved_piece(Move m) const;
   Piece captured_piece() const;
@@ -285,6 +285,22 @@ inline Bitboard Position::attackers_to(Square s) const {
   return attackers_to(s, pieces());
 }
 
+template<PieceType Pt>
+inline Bitboard Position::attacks_by(Color c) const {
+
+  if constexpr (Pt == PAWN)
+      return c == WHITE ? pawn_attacks_bb<WHITE>(pieces(WHITE, PAWN))
+                        : pawn_attacks_bb<BLACK>(pieces(BLACK, PAWN));
+  else
+  {
+      Bitboard threats = 0;
+      Bitboard attackers = pieces(c, Pt);
+      while (attackers)
+          threats |= attacks_bb<Pt>(pop_lsb(attackers), pieces());
+      return threats;
+  }
+}
+
 inline Bitboard Position::checkers() const {
   return st->checkersBB;
 }
@@ -352,11 +368,6 @@ inline bool Position::is_chess960() const {
   return chess960;
 }
 
-inline bool Position::capture_or_promotion(Move m) const {
-  assert(is_ok(m));
-  return type_of(m) != NORMAL ? type_of(m) != CASTLING : !empty(to_sq(m));
-}
-
 inline bool Position::capture(Move m) const {
   assert(is_ok(m));
   // Castling is encoded as "king captures rook"
diff --git a/DroidFishApp/src/main/cpp/stockfish/psqt.cpp b/DroidFishApp/src/main/cpp/stockfish/psqt.cpp
index 33a3e00..ca5664c 100644
--- a/DroidFishApp/src/main/cpp/stockfish/psqt.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/psqt.cpp
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/DroidFishApp/src/main/cpp/stockfish/psqt.h b/DroidFishApp/src/main/cpp/stockfish/psqt.h
index 7abb148..4ee0e37 100644
--- a/DroidFishApp/src/main/cpp/stockfish/psqt.h
+++ b/DroidFishApp/src/main/cpp/stockfish/psqt.h
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/DroidFishApp/src/main/cpp/stockfish/search.cpp b/DroidFishApp/src/main/cpp/stockfish/search.cpp
index a413bd3..49d7c5c 100644
--- a/DroidFishApp/src/main/cpp/stockfish/search.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/search.cpp
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -61,20 +61,17 @@ namespace {
   // Different node types, used as a template parameter
   enum NodeType { NonPV, PV, Root };
 
-  constexpr uint64_t TtHitAverageWindow     = 4096;
-  constexpr uint64_t TtHitAverageResolution = 1024;
-
   // Futility margin
   Value futility_margin(Depth d, bool improving) {
-    return Value(214 * (d - improving));
+    return Value(168 * (d - improving));
   }
 
   // Reductions lookup table, initialized at startup
   int Reductions[MAX_MOVES]; // [depth or moveNumber]
 
-  Depth reduction(bool i, Depth d, int mn) {
+  Depth reduction(bool i, Depth d, int mn, Value delta, Value rootDelta) {
     int r = Reductions[d] * Reductions[mn];
-    return (r + 534) / 1024 + (!i && r > 904);
+    return (r + 1463 - int(delta) * 1024 / int(rootDelta)) / 1024 + (!i && r > 1010);
   }
 
   constexpr int futility_move_count(bool improving, Depth depth) {
@@ -83,7 +80,7 @@ namespace {
 
   // History and stats update bonus, based on depth
   int stat_bonus(Depth d) {
-    return d > 14 ? 73 : 6 * d * d + 229 * d - 215;
+    return std::min((9 * d + 270) * d - 311 , 2145);
   }
 
   // Add a small random component to draw evaluations to avoid 3-fold blindness
@@ -91,14 +88,22 @@ namespace {
     return VALUE_DRAW + Value(2 * (thisThread->nodes & 1) - 1);
   }
 
-  // Skill structure is used to implement strength limit
+  // Skill structure is used to implement strength limit. If we have an uci_elo then
+  // we convert it to a suitable fractional skill level using anchoring to CCRL Elo
+  // (goldfish 1.13 = 2000) and a fit through Ordo derived Elo for match (TC 60+0.6)
+  // results spanning a wide range of k values.
   struct Skill {
-    explicit Skill(int l) : level(l) {}
-    bool enabled() const { return level < 20; }
-    bool time_to_pick(Depth depth) const { return depth == 1 + level; }
+    Skill(int skill_level, int uci_elo) {
+        if (uci_elo)
+            level = std::clamp(std::pow((uci_elo - 1346.6) / 143.4, 1 / 0.806), 0.0, 20.0);
+        else
+            level = double(skill_level);
+    }
+    bool enabled() const { return level < 20.0; }
+    bool time_to_pick(Depth depth) const { return depth == 1 + int(level); }
     Move pick_best(size_t multiPV);
 
-    int level;
+    double level;
     Move best = MOVE_NONE;
   };
 
@@ -112,7 +117,7 @@ namespace {
   Value value_from_tt(Value v, int ply, int r50c);
   void update_pv(Move* pv, Move move, Move* childPv);
   void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus);
-  void update_quiet_stats(const Position& pos, Stack* ss, Move move, int bonus, int depth);
+  void update_quiet_stats(const Position& pos, Stack* ss, Move move, int bonus);
   void update_all_stats(const Position& pos, Stack* ss, Move bestMove, Value bestValue, Value beta, Square prevSq,
                         Move* quietsSearched, int quietCount, Move* capturesSearched, int captureCount, Depth depth);
 
@@ -152,7 +157,7 @@ namespace {
 void Search::init() {
 
   for (int i = 1; i < MAX_MOVES; ++i)
-      Reductions[i] = int(21.9 * std::log(i));
+      Reductions[i] = int((20.81 + std::log(Threads.size()) / 2) * std::log(i));
 }
 
 
@@ -222,14 +227,16 @@ void MainThread::search() {
       Time.availableNodes += Limits.inc[us] - Threads.nodes_searched();
 
   Thread* bestThread = this;
+  Skill skill = Skill(Options["Skill Level"], Options["UCI_LimitStrength"] ? int(Options["UCI_Elo"]) : 0);
 
   if (   int(Options["MultiPV"]) == 1
       && !Limits.depth
-      && !(Skill(Options["Skill Level"]).enabled() || int(Options["UCI_LimitStrength"]))
+      && !skill.enabled()
       && rootMoves[0].pv[0] != MOVE_NONE)
       bestThread = Threads.get_best_thread();
 
   bestPreviousScore = bestThread->rootMoves[0].score;
+  bestPreviousAverageScore = bestThread->rootMoves[0].averageScore;
 
   // Send again PV info if we have a new best thread
   if (bestThread != this)
@@ -256,7 +263,7 @@ void Thread::search() {
   // The latter is needed for statScore and killer initialization.
   Stack stack[MAX_PLY+10], *ss = stack+7;
   Move  pv[MAX_PLY+1];
-  Value bestValue, alpha, beta, delta;
+  Value alpha, beta, delta;
   Move  lastBestMove = MOVE_NONE;
   Depth lastBestMoveDepth = 0;
   MainThread* mainThread = (this == Threads.main() ? Threads.main() : nullptr);
@@ -286,23 +293,8 @@ void Thread::search() {
               mainThread->iterValue[i] = mainThread->bestPreviousScore;
   }
 
-  std::copy(&lowPlyHistory[2][0], &lowPlyHistory.back().back() + 1, &lowPlyHistory[0][0]);
-  std::fill(&lowPlyHistory[MAX_LPH - 2][0], &lowPlyHistory.back().back() + 1, 0);
-
   size_t multiPV = size_t(Options["MultiPV"]);
-
-  // Pick integer skill levels, but non-deterministically round up or down
-  // such that the average integer skill corresponds to the input floating point one.
-  // UCI_Elo is converted to a suitable fractional skill level, using anchoring
-  // to CCRL Elo (goldfish 1.13 = 2000) and a fit through Ordo derived Elo
-  // for match (TC 60+0.6) results spanning a wide range of k values.
-  PRNG rng(now());
-  double floatLevel = Options["UCI_LimitStrength"] ?
-                      std::clamp(std::pow((Options["UCI_Elo"] - 1346.6) / 143.4, 1 / 0.806), 0.0, 20.0) :
-                        double(Options["Skill Level"]);
-  int intLevel = int(floatLevel) +
-                 ((floatLevel - int(floatLevel)) * 1024 > rng.rand<unsigned>() % 1024  ? 1 : 0);
-  Skill skill(intLevel);
+  Skill skill(Options["Skill Level"], Options["UCI_LimitStrength"] ? int(Options["UCI_Elo"]) : 0);
 
   // When playing with strength handicap enable MultiPV search that we will
   // use behind the scenes to retrieve a set of possible moves.
@@ -310,9 +302,12 @@ void Thread::search() {
       multiPV = std::max(multiPV, (size_t)4);
 
   multiPV = std::min(multiPV, rootMoves.size());
-  ttHitAverage = TtHitAverageWindow * TtHitAverageResolution / 2;
 
-  trend = SCORE_ZERO;
+  complexityAverage.set(202, 1);
+
+  trend         = SCORE_ZERO;
+  optimism[ us] = Value(39);
+  optimism[~us] = -optimism[us];
 
   int searchAgainCounter = 0;
 
@@ -353,16 +348,19 @@ void Thread::search() {
           // Reset aspiration window starting size
           if (rootDepth >= 4)
           {
-              Value prev = rootMoves[pvIdx].previousScore;
-              delta = Value(17);
+              Value prev = rootMoves[pvIdx].averageScore;
+              delta = Value(16) + int(prev) * prev / 19178;
               alpha = std::max(prev - delta,-VALUE_INFINITE);
               beta  = std::min(prev + delta, VALUE_INFINITE);
 
-              // Adjust trend based on root move's previousScore (dynamic contempt)
-              int tr = 113 * prev / (abs(prev) + 147);
-
+              // Adjust trend and optimism based on root move's previousScore
+              int tr = sigmoid(prev, 3, 8, 90, 125, 1);
               trend = (us == WHITE ?  make_score(tr, tr / 2)
                                    : -make_score(tr, tr / 2));
+
+              int opt = sigmoid(prev, 8, 17, 144, 13966, 183);
+              optimism[ us] = Value(opt);
+              optimism[~us] = -optimism[us];
           }
 
           // Start with a small aspiration window and, in the case of a fail
@@ -415,7 +413,7 @@ void Thread::search() {
               else
                   break;
 
-              delta += delta / 4 + 5;
+              delta += delta / 4 + 2;
 
               assert(alpha >= -VALUE_INFINITE && beta <= VALUE_INFINITE);
           }
@@ -449,28 +447,31 @@ void Thread::search() {
       if (skill.enabled() && skill.time_to_pick(rootDepth))
           skill.pick_best(multiPV);
 
+      // Use part of the gained time from a previous stable move for the current move
+      for (Thread* th : Threads)
+      {
+          totBestMoveChanges += th->bestMoveChanges;
+          th->bestMoveChanges = 0;
+      }
+
       // Do we have time for the next iteration? Can we stop searching now?
       if (    Limits.use_time_management()
           && !Threads.stop
           && !mainThread->stopOnPonderhit)
       {
-          double fallingEval = (318 + 6 * (mainThread->bestPreviousScore - bestValue)
-                                    + 6 * (mainThread->iterValue[iterIdx] - bestValue)) / 825.0;
+          double fallingEval = (69 + 12 * (mainThread->bestPreviousAverageScore - bestValue)
+                                    +  6 * (mainThread->iterValue[iterIdx] - bestValue)) / 781.4;
           fallingEval = std::clamp(fallingEval, 0.5, 1.5);
 
           // If the bestMove is stable over several iterations, reduce time accordingly
-          timeReduction = lastBestMoveDepth + 9 < completedDepth ? 1.92 : 0.95;
-          double reduction = (1.47 + mainThread->previousTimeReduction) / (2.32 * timeReduction);
-
-          // Use part of the gained time from a previous stable move for the current move
-          for (Thread* th : Threads)
-          {
-              totBestMoveChanges += th->bestMoveChanges;
-              th->bestMoveChanges = 0;
-          }
+          timeReduction = lastBestMoveDepth + 10 < completedDepth ? 1.63 : 0.73;
+          double reduction = (1.56 + mainThread->previousTimeReduction) / (2.20 * timeReduction);
           double bestMoveInstability = 1.073 + std::max(1.0, 2.25 - 9.9 / rootDepth)
                                               * totBestMoveChanges / Threads.size();
-          double totalTime = Time.optimum() * fallingEval * reduction * bestMoveInstability;
+          int complexity = mainThread->complexityAverage.value();
+          double complexPosition = std::clamp(1.0 + (complexity - 326) / 1618.1, 0.5, 1.5);
+
+          double totalTime = Time.optimum() * fallingEval * reduction * bestMoveInstability * complexPosition;
 
           // Cap used time in case of a single legal move for a better viewer experience in tournaments
           // yielding correct scores and sufficiently fast moves.
@@ -489,7 +490,7 @@ void Thread::search() {
           }
           else if (   Threads.increaseDepth
                    && !mainThread->ponder
-                   && Time.elapsed() > totalTime * 0.58)
+                   && Time.elapsed() > totalTime * 0.43)
                    Threads.increaseDepth = false;
           else
                    Threads.increaseDepth = true;
@@ -553,17 +554,17 @@ namespace {
     Depth extension, newDepth;
     Value bestValue, value, ttValue, eval, maxValue, probCutBeta;
     bool givesCheck, improving, didLMR, priorCapture;
-    bool captureOrPromotion, doFullDepthSearch, moveCountPruning,
-         ttCapture, singularQuietLMR;
+    bool capture, doFullDepthSearch, moveCountPruning, ttCapture;
     Piece movedPiece;
-    int moveCount, captureCount, quietCount;
+    int moveCount, captureCount, quietCount, bestMoveCount, improvement, complexity;
 
     // Step 1. Initialize node
     Thread* thisThread = pos.this_thread();
+    thisThread->depth  = depth;
     ss->inCheck        = pos.checkers();
     priorCapture       = pos.captured_piece();
     Color us           = pos.side_to_move();
-    moveCount          = captureCount = quietCount = ss->moveCount = 0;
+    moveCount          = bestMoveCount = captureCount = quietCount = ss->moveCount = 0;
     bestValue          = -VALUE_INFINITE;
     maxValue           = VALUE_INFINITE;
 
@@ -595,6 +596,8 @@ namespace {
         if (alpha >= beta)
             return alpha;
     }
+    else
+        thisThread->rootDelta = beta - alpha;
 
     assert(0 <= ss->ply && ss->ply < MAX_PLY);
 
@@ -602,6 +605,7 @@ namespace {
     (ss+1)->excludedMove = bestMove = MOVE_NONE;
     (ss+2)->killers[0]   = (ss+2)->killers[1] = MOVE_NONE;
     ss->doubleExtensions = (ss-1)->doubleExtensions;
+    ss->depth            = depth;
     Square prevSq        = to_sq((ss-1)->currentMove);
 
     // Initialize statScore to zero for the grandchildren of the current position.
@@ -621,44 +625,33 @@ namespace {
     ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE;
     ttMove =  rootNode ? thisThread->rootMoves[thisThread->pvIdx].pv[0]
             : ss->ttHit    ? tte->move() : MOVE_NONE;
+    ttCapture = ttMove && pos.capture(ttMove);
     if (!excludedMove)
         ss->ttPv = PvNode || (ss->ttHit && tte->is_pv());
 
-    // Update low ply history for previous move if we are near root and position is or has been in PV
-    if (   ss->ttPv
-        && depth > 12
-        && ss->ply - 1 < MAX_LPH
-        && !priorCapture
-        && is_ok((ss-1)->currentMove))
-        thisThread->lowPlyHistory[ss->ply - 1][from_to((ss-1)->currentMove)] << stat_bonus(depth - 5);
-
-    // thisThread->ttHitAverage can be used to approximate the running average of ttHit
-    thisThread->ttHitAverage =   (TtHitAverageWindow - 1) * thisThread->ttHitAverage / TtHitAverageWindow
-                                + TtHitAverageResolution * ss->ttHit;
-
     // At non-PV nodes we check for an early TT cutoff
     if (  !PvNode
         && ss->ttHit
-        && tte->depth() >= depth
+        && tte->depth() > depth - (thisThread->id() % 2 == 1)
         && ttValue != VALUE_NONE // Possible in case of TT access race
         && (ttValue >= beta ? (tte->bound() & BOUND_LOWER)
                             : (tte->bound() & BOUND_UPPER)))
     {
-        // If ttMove is quiet, update move sorting heuristics on TT hit
+        // If ttMove is quiet, update move sorting heuristics on TT hit (~1 Elo)
         if (ttMove)
         {
             if (ttValue >= beta)
             {
-                // Bonus for a quiet ttMove that fails high
-                if (!pos.capture_or_promotion(ttMove))
-                    update_quiet_stats(pos, ss, ttMove, stat_bonus(depth), depth);
+                // Bonus for a quiet ttMove that fails high (~3 Elo)
+                if (!ttCapture)
+                    update_quiet_stats(pos, ss, ttMove, stat_bonus(depth));
 
-                // Extra penalty for early quiet moves of the previous ply
+                // Extra penalty for early quiet moves of the previous ply (~0 Elo)
                 if ((ss-1)->moveCount <= 2 && !priorCapture)
                     update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, -stat_bonus(depth + 1));
             }
-            // Penalty for a quiet ttMove that fails low
-            else if (!pos.capture_or_promotion(ttMove))
+            // Penalty for a quiet ttMove that fails low (~1 Elo)
+            else if (!ttCapture)
             {
                 int penalty = -stat_bonus(depth);
                 thisThread->mainHistory[us][from_to(ttMove)] << penalty;
@@ -732,6 +725,8 @@ namespace {
         // Skip early pruning when in check
         ss->staticEval = eval = VALUE_NONE;
         improving = false;
+        improvement = 0;
+        complexity = 0;
         goto moves_loop;
     }
     else if (ss->ttHit)
@@ -745,61 +740,76 @@ namespace {
         if (eval == VALUE_DRAW)
             eval = value_draw(thisThread);
 
-        // Can ttValue be used as a better position evaluation?
+        // ttValue can be used as a better position evaluation (~4 Elo)
         if (    ttValue != VALUE_NONE
             && (tte->bound() & (ttValue > eval ? BOUND_LOWER : BOUND_UPPER)))
             eval = ttValue;
     }
     else
     {
-        // In case of null move search use previous static eval with a different sign
-        // and addition of two tempos
-        if ((ss-1)->currentMove != MOVE_NULL)
-            ss->staticEval = eval = evaluate(pos);
-        else
-            ss->staticEval = eval = -(ss-1)->staticEval;
+        ss->staticEval = eval = evaluate(pos);
 
         // Save static evaluation into transposition table
-        tte->save(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval);
+        if (!excludedMove)
+            tte->save(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval);
     }
 
-    // Use static evaluation difference to improve quiet move ordering
+    // Use static evaluation difference to improve quiet move ordering (~3 Elo)
     if (is_ok((ss-1)->currentMove) && !(ss-1)->inCheck && !priorCapture)
     {
-        int bonus = std::clamp(-depth * 4 * int((ss-1)->staticEval + ss->staticEval), -1000, 1000);
+        int bonus = std::clamp(-16 * int((ss-1)->staticEval + ss->staticEval), -2000, 2000);
         thisThread->mainHistory[~us][from_to((ss-1)->currentMove)] << bonus;
     }
 
-    // Set up improving flag that is used in various pruning heuristics
-    // We define position as improving if static evaluation of position is better
-    // Than the previous static evaluation at our turn
-    // In case of us being in check at our previous move we look at move prior to it
-    improving =  (ss-2)->staticEval == VALUE_NONE
-               ? ss->staticEval > (ss-4)->staticEval || (ss-4)->staticEval == VALUE_NONE
-               : ss->staticEval > (ss-2)->staticEval;
+    // Set up the improvement variable, which is the difference between the current
+    // static evaluation and the previous static evaluation at our turn (if we were
+    // in check at our previous move we look at the move prior to it). The improvement
+    // margin and the improving flag are used in various pruning heuristics.
+    improvement =   (ss-2)->staticEval != VALUE_NONE ? ss->staticEval - (ss-2)->staticEval
+                  : (ss-4)->staticEval != VALUE_NONE ? ss->staticEval - (ss-4)->staticEval
+                  :                                    175;
 
-    // Step 7. Futility pruning: child node (~50 Elo)
+    improving = improvement > 0;
+    complexity = abs(ss->staticEval - (us == WHITE ? eg_value(pos.psq_score()) : -eg_value(pos.psq_score())));
+
+    thisThread->complexityAverage.update(complexity);
+
+    // Step 7. Razoring.
+    // If eval is really low check with qsearch if it can exceed alpha, if it can't,
+    // return a fail low.
     if (   !PvNode
-        &&  depth < 9
-        &&  eval - futility_margin(depth, improving) >= beta
-        &&  eval < VALUE_KNOWN_WIN) // Do not return unproven wins
+        && depth <= 7
+        && eval < alpha - 348 - 258 * depth * depth)
+    {
+        value = qsearch<NonPV>(pos, ss, alpha - 1, alpha);
+        if (value < alpha)
+            return value;
+    }
+
+    // Step 8. Futility pruning: child node (~25 Elo).
+    // The depth condition is important for mate finding.
+    if (   !ss->ttPv
+        &&  depth < 8
+        &&  eval - futility_margin(depth, improving) - (ss-1)->statScore / 256 >= beta
+        &&  eval >= beta
+        &&  eval < 26305) // larger than VALUE_KNOWN_WIN, but smaller than TB wins.
         return eval;
 
-    // Step 8. Null move search with verification search (~40 Elo)
+    // Step 9. Null move search with verification search (~22 Elo)
     if (   !PvNode
         && (ss-1)->currentMove != MOVE_NULL
-        && (ss-1)->statScore < 23767
+        && (ss-1)->statScore < 14695
         &&  eval >= beta
         &&  eval >= ss->staticEval
-        &&  ss->staticEval >= beta - 20 * depth - 22 * improving + 168 * ss->ttPv + 159
+        &&  ss->staticEval >= beta - 15 * depth - improvement / 15 + 198 + complexity / 28
         && !excludedMove
         &&  pos.non_pawn_material(us)
         && (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor))
     {
         assert(eval - beta >= 0);
 
-        // Null move dynamic reduction based on depth and value
-        Depth R = (1090 + 81 * depth) / 256 + std::min(int(eval - beta) / 205, 3);
+        // Null move dynamic reduction based on depth, eval and complexity of position
+        Depth R = std::min(int(eval - beta) / 147, 5) + depth / 3 + 4 - (complexity > 753);
 
         ss->currentMove = MOVE_NULL;
         ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0];
@@ -835,9 +845,9 @@ namespace {
         }
     }
 
-    probCutBeta = beta + 209 - 44 * improving;
+    probCutBeta = beta + 179 - 46 * improving;
 
-    // Step 9. ProbCut (~4 Elo)
+    // Step 10. ProbCut (~4 Elo)
     // If we have a good enough capture and a reduced search returns a value
     // much above beta, we can (almost) safely prune the previous move.
     if (   !PvNode
@@ -854,20 +864,17 @@ namespace {
     {
         assert(probCutBeta < VALUE_INFINITE);
 
-        MovePicker mp(pos, ttMove, probCutBeta - ss->staticEval, &captureHistory);
-        int probCutCount = 0;
+        MovePicker mp(pos, ttMove, probCutBeta - ss->staticEval, depth - 3, &captureHistory);
         bool ttPv = ss->ttPv;
+        bool captureOrPromotion;
         ss->ttPv = false;
 
-        while (   (move = mp.next_move()) != MOVE_NONE
-               && probCutCount < 2 + 2 * cutNode)
+        while ((move = mp.next_move()) != MOVE_NONE)
             if (move != excludedMove && pos.legal(move))
             {
-                assert(pos.capture_or_promotion(move));
-                assert(depth >= 5);
+                assert(pos.capture(move) || promotion_type(move) == QUEEN);
 
                 captureOrPromotion = true;
-                probCutCount++;
 
                 ss->currentMove = move;
                 ss->continuationHistory = &thisThread->continuationHistory[ss->inCheck]
@@ -901,21 +908,24 @@ namespace {
          ss->ttPv = ttPv;
     }
 
-    // Step 10. If the position is not in TT, decrease depth by 2
+    // Step 11. If the position is not in TT, decrease depth by 2 or 1 depending on node type (~3 Elo)
     if (   PvNode
-        && depth >= 6
+        && depth >= 3
         && !ttMove)
         depth -= 2;
 
-moves_loop: // When in check, search starts from here
+    if (   cutNode
+        && depth >= 8
+        && !ttMove)
+        depth--;
 
-    ttCapture = ttMove && pos.capture_or_promotion(ttMove);
+moves_loop: // When in check, search starts here
 
-    // Step 11. A small Probcut idea, when we are in check
-    probCutBeta = beta + 409;
+    // Step 12. A small Probcut idea, when we are in check (~0 Elo)
+    probCutBeta = beta + 481;
     if (   ss->inCheck
         && !PvNode
-        && depth >= 4
+        && depth >= 2
         && ttCapture
         && (tte->bound() & BOUND_LOWER)
         && tte->depth() >= depth - 3
@@ -933,16 +943,13 @@ moves_loop: // When in check, search starts from here
     Move countermove = thisThread->counterMoves[pos.piece_on(prevSq)][prevSq];
 
     MovePicker mp(pos, ttMove, depth, &thisThread->mainHistory,
-                                      &thisThread->lowPlyHistory,
                                       &captureHistory,
                                       contHist,
                                       countermove,
-                                      ss->killers,
-                                      ss->ply);
+                                      ss->killers);
 
     value = bestValue;
-    singularQuietLMR = moveCountPruning = false;
-    bool doubleExtension = false;
+    moveCountPruning = false;
 
     // Indicate PvNodes that will probably fail low if the node was searched
     // at a depth equal or greater than the current depth, and the result of this search was a fail low.
@@ -951,7 +958,7 @@ moves_loop: // When in check, search starts from here
                          && (tte->bound() & BOUND_UPPER)
                          && tte->depth() >= depth;
 
-    // Step 12. Loop through all pseudo-legal moves until no moves remain
+    // Step 13. Loop through all pseudo-legal moves until no moves remain
     // or a beta cutoff occurs.
     while ((move = mp.next_move(moveCountPruning)) != MOVE_NONE)
     {
@@ -982,123 +989,130 @@ moves_loop: // When in check, search starts from here
           (ss+1)->pv = nullptr;
 
       extension = 0;
-      captureOrPromotion = pos.capture_or_promotion(move);
+      capture = pos.capture(move);
       movedPiece = pos.moved_piece(move);
       givesCheck = pos.gives_check(move);
 
       // Calculate new depth for this move
       newDepth = depth - 1;
 
-      // Step 13. Pruning at shallow depth (~200 Elo)
+      Value delta = beta - alpha;
+
+      // Step 14. Pruning at shallow depth (~98 Elo). Depth conditions are important for mate finding.
       if (  !rootNode
           && pos.non_pawn_material(us)
           && bestValue > VALUE_TB_LOSS_IN_MAX_PLY)
       {
-          // Skip quiet moves if movecount exceeds our FutilityMoveCount threshold
+          // Skip quiet moves if movecount exceeds our FutilityMoveCount threshold (~7 Elo)
           moveCountPruning = moveCount >= futility_move_count(improving, depth);
 
           // Reduced depth of the next LMR search
-          int lmrDepth = std::max(newDepth - reduction(improving, depth, moveCount), 0);
+          int lmrDepth = std::max(newDepth - reduction(improving, depth, moveCount, delta, thisThread->rootDelta), 0);
 
-          if (   captureOrPromotion
+          if (   capture
               || givesCheck)
           {
-              // Capture history based pruning when the move doesn't give check
-              if (   !givesCheck
-                  && lmrDepth < 1
-                  && captureHistory[movedPiece][to_sq(move)][type_of(pos.piece_on(to_sq(move)))] < 0)
+              // Futility pruning for captures (~0 Elo)
+              if (   !pos.empty(to_sq(move))
+                  && !givesCheck
+                  && !PvNode
+                  && lmrDepth < 6
+                  && !ss->inCheck
+                  && ss->staticEval + 281 + 179 * lmrDepth + PieceValue[EG][pos.piece_on(to_sq(move))]
+                   + captureHistory[movedPiece][to_sq(move)][type_of(pos.piece_on(to_sq(move)))] / 6 < alpha)
                   continue;
 
-              // SEE based pruning
-              if (!pos.see_ge(move, Value(-218) * depth)) // (~25 Elo)
+              // SEE based pruning (~9 Elo)
+              if (!pos.see_ge(move, Value(-203) * depth))
                   continue;
           }
           else
           {
-              // Continuation history based pruning (~20 Elo)
+              int history =   (*contHist[0])[movedPiece][to_sq(move)]
+                            + (*contHist[1])[movedPiece][to_sq(move)]
+                            + (*contHist[3])[movedPiece][to_sq(move)];
+
+              // Continuation history based pruning (~2 Elo)
               if (   lmrDepth < 5
-                  && (*contHist[0])[movedPiece][to_sq(move)] < CounterMovePruneThreshold
-                  && (*contHist[1])[movedPiece][to_sq(move)] < CounterMovePruneThreshold)
+                  && history < -3875 * (depth - 1))
                   continue;
 
-              // Futility pruning: parent node (~5 Elo)
-              if (   lmrDepth < 7
-                  && !ss->inCheck
-                  && ss->staticEval + 174 + 157 * lmrDepth <= alpha
-                  &&  (*contHist[0])[movedPiece][to_sq(move)]
-                    + (*contHist[1])[movedPiece][to_sq(move)]
-                    + (*contHist[3])[movedPiece][to_sq(move)]
-                    + (*contHist[5])[movedPiece][to_sq(move)] / 3 < 28255)
+              history += thisThread->mainHistory[us][from_to(move)];
+
+              // Futility pruning: parent node (~9 Elo)
+              if (   !ss->inCheck
+                  && lmrDepth < 11
+                  && ss->staticEval + 122 + 138 * lmrDepth + history / 60 <= alpha)
                   continue;
 
-              // Prune moves with negative SEE (~20 Elo)
-              if (!pos.see_ge(move, Value(-(30 - std::min(lmrDepth, 18)) * lmrDepth * lmrDepth)))
+              // Prune moves with negative SEE (~3 Elo)
+              if (!pos.see_ge(move, Value(-25 * lmrDepth * lmrDepth - 20 * lmrDepth)))
                   continue;
           }
       }
 
-      // Step 14. Extensions (~75 Elo)
-
-      // Singular extension search (~70 Elo). If all moves but one fail low on a
-      // search of (alpha-s, beta-s), and just one fails high on (alpha, beta),
-      // then that move is singular and should be extended. To verify this we do
-      // a reduced search on all the other moves but the ttMove and if the
-      // result is lower than ttValue minus a margin, then we will extend the ttMove.
-      if (   !rootNode
-          &&  depth >= 7
-          &&  move == ttMove
-          && !excludedMove // Avoid recursive singular search
-       /* &&  ttValue != VALUE_NONE Already implicit in the next condition */
-          &&  abs(ttValue) < VALUE_KNOWN_WIN
-          && (tte->bound() & BOUND_LOWER)
-          &&  tte->depth() >= depth - 3)
+      // Step 15. Extensions (~66 Elo)
+      // We take care to not overdo to avoid search getting stuck.
+      if (ss->ply < thisThread->rootDepth * 2)
       {
-          Value singularBeta = ttValue - 2 * depth;
-          Depth singularDepth = (depth - 1) / 2;
-
-          ss->excludedMove = move;
-          value = search<NonPV>(pos, ss, singularBeta - 1, singularBeta, singularDepth, cutNode);
-          ss->excludedMove = MOVE_NONE;
-
-          if (value < singularBeta)
+          // Singular extension search (~58 Elo). If all moves but one fail low on a
+          // search of (alpha-s, beta-s), and just one fails high on (alpha, beta),
+          // then that move is singular and should be extended. To verify this we do
+          // a reduced search on all the other moves but the ttMove and if the
+          // result is lower than ttValue minus a margin, then we will extend the ttMove.
+          if (   !rootNode
+              &&  depth >= 4 + 2 * (PvNode && tte->is_pv())
+              &&  move == ttMove
+              && !excludedMove // Avoid recursive singular search
+           /* &&  ttValue != VALUE_NONE Already implicit in the next condition */
+              &&  abs(ttValue) < VALUE_KNOWN_WIN
+              && (tte->bound() & BOUND_LOWER)
+              &&  tte->depth() >= depth - 3)
           {
-              extension = 1;
-              singularQuietLMR = !ttCapture;
+              Value singularBeta = ttValue - 3 * depth;
+              Depth singularDepth = (depth - 1) / 2;
 
-              // Avoid search explosion by limiting the number of double extensions to at most 3
-              if (   !PvNode
-                  && value < singularBeta - 93
-                  && ss->doubleExtensions < 3)
-              {
-                  extension = 2;
-                  doubleExtension = true;
-              }
-          }
-
-          // Multi-cut pruning
-          // Our ttMove is assumed to fail high, and now we failed high also on a reduced
-          // search without the ttMove. So we assume this expected Cut-node is not singular,
-          // that multiple moves fail high, and we can prune the whole subtree by returning
-          // a soft bound.
-          else if (singularBeta >= beta)
-              return singularBeta;
-
-          // If the eval of ttMove is greater than beta we try also if there is another
-          // move that pushes it over beta, if so also produce a cutoff.
-          else if (ttValue >= beta)
-          {
               ss->excludedMove = move;
-              value = search<NonPV>(pos, ss, beta - 1, beta, (depth + 3) / 2, cutNode);
+              value = search<NonPV>(pos, ss, singularBeta - 1, singularBeta, singularDepth, cutNode);
               ss->excludedMove = MOVE_NONE;
 
-              if (value >= beta)
-                  return beta;
+              if (value < singularBeta)
+              {
+                  extension = 1;
+
+                  // Avoid search explosion by limiting the number of double extensions
+                  if (  !PvNode
+                      && value < singularBeta - 26
+                      && ss->doubleExtensions <= 8)
+                      extension = 2;
+              }
+
+              // Multi-cut pruning
+              // Our ttMove is assumed to fail high, and now we failed high also on a reduced
+              // search without the ttMove. So we assume this expected Cut-node is not singular,
+              // that multiple moves fail high, and we can prune the whole subtree by returning
+              // a soft bound.
+              else if (singularBeta >= beta)
+                  return singularBeta;
+
+              // If the eval of ttMove is greater than beta, we reduce it (negative extension)
+              else if (ttValue >= beta)
+                  extension = -2;
           }
+
+          // Check extensions (~1 Elo)
+          else if (   givesCheck
+                   && depth > 9
+                   && abs(ss->staticEval) > 71)
+              extension = 1;
+
+          // Quiet ttMove extensions (~0 Elo)
+          else if (   PvNode
+                   && move == ttMove
+                   && move == ss->killers[0]
+                   && (*contHist[0])[movedPiece][to_sq(move)] >= 5491)
+              extension = 1;
       }
-      else if (   givesCheck
-               && depth > 6
-               && abs(ss->staticEval) > Value(100))
-          extension = 1;
 
       // Add extension to new depth
       newDepth += extension;
@@ -1110,31 +1124,30 @@ moves_loop: // When in check, search starts from here
       // Update the current move (this must be done after singular extension search)
       ss->currentMove = move;
       ss->continuationHistory = &thisThread->continuationHistory[ss->inCheck]
-                                                                [captureOrPromotion]
+                                                                [capture]
                                                                 [movedPiece]
                                                                 [to_sq(move)];
 
-      // Step 15. Make the move
+      // Step 16. Make the move
       pos.do_move(move, st, givesCheck);
 
-      // Step 16. Late moves reduction / extension (LMR, ~200 Elo)
+      bool doDeeperSearch = false;
+
+      // Step 17. Late moves reduction / extension (LMR, ~98 Elo)
       // We use various heuristics for the sons of a node after the first son has
       // been searched. In general we would like to reduce them, but there are many
       // cases where we extend a son if it has good chances to be "interesting".
-      if (    depth >= 3
-          &&  moveCount > 1 + 2 * rootNode
-          && (  !captureOrPromotion
-              || (cutNode && (ss-1)->moveCount > 1)
-              || !ss->ttPv)
-          && (!PvNode || ss->ply > 1 || thisThread->id() % 4 != 3))
+      if (    depth >= 2
+          &&  moveCount > 1 + (PvNode && ss->ply <= 1)
+          && (   !ss->ttPv
+              || !capture
+              || (cutNode && (ss-1)->moveCount > 1)))
       {
-          Depth r = reduction(improving, depth, moveCount);
+          Depth r = reduction(improving, depth, moveCount, delta, thisThread->rootDelta);
 
-          if (PvNode)
-              r--;
-
-          // Decrease reduction if the ttHit running average is large (~0 Elo)
-          if (thisThread->ttHitAverage > 537 * TtHitAverageResolution * TtHitAverageWindow / 1024)
+          // Decrease reduction at some PvNodes (~2 Elo)
+          if (   PvNode
+              && bestMoveCount <= 3)
               r--;
 
           // Decrease reduction if position is or has been on the PV
@@ -1143,49 +1156,52 @@ moves_loop: // When in check, search starts from here
               && !likelyFailLow)
               r -= 2;
 
-          // Increase reduction at root and non-PV nodes when the best move does not change frequently
-          if (   (rootNode || !PvNode)
-              && thisThread->bestMoveChanges <= 2)
-              r++;
-
           // Decrease reduction if opponent's move count is high (~1 Elo)
-          if ((ss-1)->moveCount > 13)
-              r--;
-
-          // Decrease reduction if ttMove has been singularly extended (~1 Elo)
-          if (singularQuietLMR)
+          if ((ss-1)->moveCount > 7)
               r--;
 
           // Increase reduction for cut nodes (~3 Elo)
-          if (cutNode)
-              r += 1 + !captureOrPromotion;
+          if (cutNode && move != ss->killers[0])
+              r += 2;
 
-          if (!captureOrPromotion)
-          {
-              // Increase reduction if ttMove is a capture (~3 Elo)
-              if (ttCapture)
-                  r++;
+          // Increase reduction if ttMove is a capture (~3 Elo)
+          if (ttCapture)
+              r++;
 
-              ss->statScore =  thisThread->mainHistory[us][from_to(move)]
-                             + (*contHist[0])[movedPiece][to_sq(move)]
-                             + (*contHist[1])[movedPiece][to_sq(move)]
-                             + (*contHist[3])[movedPiece][to_sq(move)]
-                             - 4923;
+          // Decrease reduction at PvNodes if bestvalue
+          // is vastly different from static evaluation
+          if (PvNode && !ss->inCheck && abs(ss->staticEval - bestValue) > 250)
+              r--;
 
-              // Decrease/increase reduction for moves with a good/bad history (~30 Elo)
-              if (!ss->inCheck)
-                  r -= ss->statScore / 14721;
-          }
+          // Increase depth based reduction if PvNode
+          if (PvNode)
+              r -= 15 / ( 3 + depth );
 
-          // In general we want to cap the LMR depth search at newDepth. But if
-          // reductions are really negative and movecount is low, we allow this move
-          // to be searched deeper than the first move, unless ttMove was extended by 2.
-          Depth d = std::clamp(newDepth - r, 1, newDepth + (r < -1 && moveCount <= 5 && !doubleExtension));
+          ss->statScore =  thisThread->mainHistory[us][from_to(move)]
+                         + (*contHist[0])[movedPiece][to_sq(move)]
+                         + (*contHist[1])[movedPiece][to_sq(move)]
+                         + (*contHist[3])[movedPiece][to_sq(move)]
+                         - 4334;
+
+          // Decrease/increase reduction for moves with a good/bad history (~30 Elo)
+          r -= ss->statScore / 15914;
+
+          // In general we want to cap the LMR depth search at newDepth. But if reductions
+          // are really negative and movecount is low, we allow this move to be searched
+          // deeper than the first move (this may lead to hidden double extensions).
+          int deeper =   r >= -1                   ? 0
+                       : moveCount <= 4            ? 2
+                       : PvNode && depth > 4       ? 1
+                       : cutNode && moveCount <= 8 ? 1
+                       :                             0;
+
+          Depth d = std::clamp(newDepth - r, 1, newDepth + deeper);
 
           value = -search<NonPV>(pos, ss+1, -(alpha+1), -alpha, d, true);
 
           // If the son is reduced and fails high it will be re-searched at full depth
           doFullDepthSearch = value > alpha && d < newDepth;
+          doDeeperSearch = value > (alpha + 78 + 11 * (newDepth - d));
           didLMR = true;
       }
       else
@@ -1194,17 +1210,20 @@ moves_loop: // When in check, search starts from here
           didLMR = false;
       }
 
-      // Step 17. Full depth search when LMR is skipped or fails high
+      // Step 18. Full depth search when LMR is skipped or fails high
       if (doFullDepthSearch)
       {
-          value = -search<NonPV>(pos, ss+1, -(alpha+1), -alpha, newDepth, !cutNode);
+          value = -search<NonPV>(pos, ss+1, -(alpha+1), -alpha, newDepth + doDeeperSearch, !cutNode);
 
           // If the move passed LMR update its stats
-          if (didLMR && !captureOrPromotion)
+          if (didLMR)
           {
               int bonus = value > alpha ?  stat_bonus(newDepth)
                                         : -stat_bonus(newDepth);
 
+              if (capture)
+                  bonus /= 6;
+
               update_continuation_histories(ss, movedPiece, to_sq(move), bonus);
           }
       }
@@ -1221,12 +1240,12 @@ moves_loop: // When in check, search starts from here
                               std::min(maxNextDepth, newDepth), false);
       }
 
-      // Step 18. Undo move
+      // Step 19. Undo move
       pos.undo_move(move);
 
       assert(value > -VALUE_INFINITE && value < VALUE_INFINITE);
 
-      // Step 19. Check for a new best move
+      // Step 20. Check for a new best move
       // Finished searching the move. If a stop occurred, the return value of
       // the search cannot be trusted, and we return immediately without
       // updating best move, PV and TT.
@@ -1238,6 +1257,8 @@ moves_loop: // When in check, search starts from here
           RootMove& rm = *std::find(thisThread->rootMoves.begin(),
                                     thisThread->rootMoves.end(), move);
 
+          rm.averageScore = rm.averageScore != -VALUE_INFINITE ? (2 * value + rm.averageScore) / 3 : value;
+
           // PV move or new best move?
           if (moveCount == 1 || value > alpha)
           {
@@ -1250,9 +1271,11 @@ moves_loop: // When in check, search starts from here
               for (Move* m = (ss+1)->pv; *m != MOVE_NONE; ++m)
                   rm.pv.push_back(*m);
 
-              // We record how often the best move has been changed in each
-              // iteration. This information is used for time management and LMR
-              if (moveCount > 1)
+              // We record how often the best move has been changed in each iteration.
+              // This information is used for time management. In MultiPV mode,
+              // we must take care to only do this for the first PV line.
+              if (   moveCount > 1
+                  && !thisThread->pvIdx)
                   ++thisThread->bestMoveChanges;
           }
           else
@@ -1274,7 +1297,10 @@ moves_loop: // When in check, search starts from here
                   update_pv(ss->pv, move, (ss+1)->pv);
 
               if (PvNode && value < beta) // Update alpha! Always alpha < beta
+              {
                   alpha = value;
+                  bestMoveCount++;
+              }
               else
               {
                   assert(value >= beta); // Fail high
@@ -1286,10 +1312,10 @@ moves_loop: // When in check, search starts from here
       // If the move is worse than some previously searched move, remember it to update its stats later
       if (move != bestMove)
       {
-          if (captureOrPromotion && captureCount < 32)
+          if (capture && captureCount < 32)
               capturesSearched[captureCount++] = move;
 
-          else if (!captureOrPromotion && quietCount < 64)
+          else if (!capture && quietCount < 64)
               quietsSearched[quietCount++] = move;
       }
     }
@@ -1302,7 +1328,7 @@ moves_loop: // When in check, search starts from here
         return VALUE_DRAW;
     */
 
-    // Step 20. Check for mate and stalemate
+    // Step 21. Check for mate and stalemate
     // All legal moves have been searched and if there are no legal moves, it
     // must be a mate or a stalemate. If we are in a singular extension search then
     // return a fail low score.
@@ -1320,9 +1346,17 @@ moves_loop: // When in check, search starts from here
                          quietsSearched, quietCount, capturesSearched, captureCount, depth);
 
     // Bonus for prior countermove that caused the fail low
-    else if (   (depth >= 3 || PvNode)
+    else if (   (depth >= 4 || PvNode)
              && !priorCapture)
-        update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, stat_bonus(depth));
+    {
+        //Assign extra bonus if current node is PvNode or cutNode
+        //or fail low was really bad
+        bool extraBonus =    PvNode
+                          || cutNode
+                          || bestValue < alpha - 70 * depth;
+
+        update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, stat_bonus(depth) * (1 + extraBonus));
+    }
 
     if (PvNode)
         bestValue = std::min(bestValue, maxValue);
@@ -1331,10 +1365,6 @@ moves_loop: // When in check, search starts from here
     // opponent move is probably good and the new position is added to the search tree.
     if (bestValue <= alpha)
         ss->ttPv = ss->ttPv || ((ss-1)->ttPv && depth > 3);
-    // Otherwise, a counter move has been found and if the position is the last leaf
-    // in the search tree, remove the position from the search tree.
-    else if (depth > 3)
-        ss->ttPv = ss->ttPv && (ss+1)->ttPv;
 
     // Write gathered information in transposition table
     if (!excludedMove && !(rootNode && thisThread->pvIdx))
@@ -1369,13 +1399,12 @@ moves_loop: // When in check, search starts from here
     Key posKey;
     Move ttMove, move, bestMove;
     Depth ttDepth;
-    Value bestValue, value, ttValue, futilityValue, futilityBase, oldAlpha;
-    bool pvHit, givesCheck, captureOrPromotion;
+    Value bestValue, value, ttValue, futilityValue, futilityBase;
+    bool pvHit, givesCheck, capture;
     int moveCount;
 
     if (PvNode)
     {
-        oldAlpha = alpha; // To flag BOUND_EXACT when eval above alpha and no available moves
         (ss+1)->pv = pv;
         ss->pv[0] = MOVE_NONE;
     }
@@ -1426,14 +1455,13 @@ moves_loop: // When in check, search starts from here
             if ((ss->staticEval = bestValue = tte->eval()) == VALUE_NONE)
                 ss->staticEval = bestValue = evaluate(pos);
 
-            // Can ttValue be used as a better position evaluation?
+            // ttValue can be used as a better position evaluation (~7 Elo)
             if (    ttValue != VALUE_NONE
                 && (tte->bound() & (ttValue > bestValue ? BOUND_LOWER : BOUND_UPPER)))
                 bestValue = ttValue;
         }
         else
             // In case of null move search use previous static eval with a different sign
-            // and addition of two tempos
             ss->staticEval = bestValue =
             (ss-1)->currentMove != MOVE_NULL ? evaluate(pos)
                                              : -(ss-1)->staticEval;
@@ -1452,7 +1480,7 @@ moves_loop: // When in check, search starts from here
         if (PvNode && bestValue > alpha)
             alpha = bestValue;
 
-        futilityBase = bestValue + 155;
+        futilityBase = bestValue + 118;
     }
 
     const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory,
@@ -1463,24 +1491,32 @@ moves_loop: // When in check, search starts from here
     // to search the moves. Because the depth is <= 0 here, only captures,
     // queen promotions, and other checks (only if depth >= DEPTH_QS_CHECKS)
     // will be generated.
+    Square prevSq = to_sq((ss-1)->currentMove);
     MovePicker mp(pos, ttMove, depth, &thisThread->mainHistory,
                                       &thisThread->captureHistory,
                                       contHist,
-                                      to_sq((ss-1)->currentMove));
+                                      prevSq);
+
+    int quietCheckEvasions = 0;
 
     // Loop through the moves until no moves remain or a beta cutoff occurs
     while ((move = mp.next_move()) != MOVE_NONE)
     {
       assert(is_ok(move));
 
+      // Check for legality
+      if (!pos.legal(move))
+          continue;
+
       givesCheck = pos.gives_check(move);
-      captureOrPromotion = pos.capture_or_promotion(move);
+      capture = pos.capture(move);
 
       moveCount++;
 
-      // Futility pruning and moveCount pruning
+      // Futility pruning and moveCount pruning (~5 Elo)
       if (    bestValue > VALUE_TB_LOSS_IN_MAX_PLY
           && !givesCheck
+          &&  to_sq(move) != prevSq
           &&  futilityBase > -VALUE_KNOWN_WIN
           &&  type_of(move) != PROMOTION)
       {
@@ -1503,7 +1539,7 @@ moves_loop: // When in check, search starts from here
           }
       }
 
-      // Do not search moves with negative SEE values
+      // Do not search moves with negative SEE values (~5 Elo)
       if (    bestValue > VALUE_TB_LOSS_IN_MAX_PLY
           && !pos.see_ge(move))
           continue;
@@ -1511,26 +1547,28 @@ moves_loop: // When in check, search starts from here
       // Speculative prefetch as early as possible
       prefetch(TT.first_entry(pos.key_after(move)));
 
-      // Check for legality just before making the move
-      if (!pos.legal(move))
-      {
-          moveCount--;
-          continue;
-      }
-
       ss->currentMove = move;
       ss->continuationHistory = &thisThread->continuationHistory[ss->inCheck]
-                                                                [captureOrPromotion]
+                                                                [capture]
                                                                 [pos.moved_piece(move)]
                                                                 [to_sq(move)];
 
-      // Continuation history based pruning
-      if (  !captureOrPromotion
+      // Continuation history based pruning (~2 Elo)
+      if (  !capture
           && bestValue > VALUE_TB_LOSS_IN_MAX_PLY
           && (*contHist[0])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold
           && (*contHist[1])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold)
           continue;
 
+      // movecount pruning for quiet check evasions
+      if (  bestValue > VALUE_TB_LOSS_IN_MAX_PLY
+          && quietCheckEvasions > 1
+          && !capture
+          && ss->inCheck)
+          continue;
+
+      quietCheckEvasions += !capture && ss->inCheck;
+
       // Make and search the move
       pos.do_move(move, st, givesCheck);
       value = -qsearch<nodeType>(pos, ss+1, -beta, -alpha, depth - 1);
@@ -1569,8 +1607,7 @@ moves_loop: // When in check, search starts from here
 
     // Save gathered info in transposition table
     tte->save(posKey, value_to_tt(bestValue, ss->ply), pvHit,
-              bestValue >= beta ? BOUND_LOWER :
-              PvNode && bestValue > oldAlpha  ? BOUND_EXACT : BOUND_UPPER,
+              bestValue >= beta ? BOUND_LOWER : BOUND_UPPER,
               ttDepth, bestMove, ss->staticEval);
 
     assert(bestValue > -VALUE_INFINITE && bestValue < VALUE_INFINITE);
@@ -1646,13 +1683,13 @@ moves_loop: // When in check, search starts from here
     PieceType captured = type_of(pos.piece_on(to_sq(bestMove)));
 
     bonus1 = stat_bonus(depth + 1);
-    bonus2 = bestValue > beta + PawnValueMg ? bonus1                                 // larger bonus
-                                            : std::min(bonus1, stat_bonus(depth));   // smaller bonus
+    bonus2 = bestValue > beta + PawnValueMg ? bonus1               // larger bonus
+                                            : stat_bonus(depth);   // smaller bonus
 
-    if (!pos.capture_or_promotion(bestMove))
+    if (!pos.capture(bestMove))
     {
         // Increase stats for the best move in case it was a quiet move
-        update_quiet_stats(pos, ss, bestMove, bonus2, depth);
+        update_quiet_stats(pos, ss, bestMove, bonus2);
 
         // Decrease stats for all non-best quiet moves
         for (int i = 0; i < quietCount; ++i)
@@ -1699,7 +1736,7 @@ moves_loop: // When in check, search starts from here
 
   // update_quiet_stats() updates move sorting heuristics
 
-  void update_quiet_stats(const Position& pos, Stack* ss, Move move, int bonus, int depth) {
+  void update_quiet_stats(const Position& pos, Stack* ss, Move move, int bonus) {
 
     // Update killers
     if (ss->killers[0] != move)
@@ -1713,20 +1750,12 @@ moves_loop: // When in check, search starts from here
     thisThread->mainHistory[us][from_to(move)] << bonus;
     update_continuation_histories(ss, pos.moved_piece(move), to_sq(move), bonus);
 
-    // Penalty for reversed move in case of moved piece not being a pawn
-    if (type_of(pos.moved_piece(move)) != PAWN)
-        thisThread->mainHistory[us][from_to(reverse_move(move))] << -bonus;
-
     // Update countermove history
     if (is_ok((ss-1)->currentMove))
     {
         Square prevSq = to_sq((ss-1)->currentMove);
         thisThread->counterMoves[pos.piece_on(prevSq)][prevSq] = move;
     }
-
-    // Update low ply history
-    if (depth > 11 && ss->ply < MAX_LPH)
-        thisThread->lowPlyHistory[ss->ply][from_to(move)] << stat_bonus(depth - 7);
   }
 
   // When playing with strength handicap, choose best move among a set of RootMoves
@@ -1740,8 +1769,8 @@ moves_loop: // When in check, search starts from here
     // RootMoves are already sorted by score in descending order
     Value topScore = rootMoves[0].score;
     int delta = std::min(topScore - rootMoves[multiPV - 1].score, PawnValueMg);
-    int weakness = 120 - 2 * level;
     int maxScore = -VALUE_INFINITE;
+    double weakness = 120 - 2 * level;
 
     // Choose best move. For each move score we add two terms, both dependent on
     // weakness. One is deterministic and bigger for weaker levels, and one is
@@ -1749,8 +1778,8 @@ moves_loop: // When in check, search starts from here
     for (size_t i = 0; i < multiPV; ++i)
     {
         // This is our magic formula
-        int push = (  weakness * int(topScore - rootMoves[i].score)
-                    + delta * (rng.rand<unsigned>() % weakness)) / 128;
+        int push = int((  weakness * int(topScore - rootMoves[i].score)
+                        + delta * (rng.rand<unsigned>() % int(weakness))) / 128);
 
         if (rootMoves[i].score + push >= maxScore)
         {
diff --git a/DroidFishApp/src/main/cpp/stockfish/search.h b/DroidFishApp/src/main/cpp/stockfish/search.h
index 801baac..806295a 100644
--- a/DroidFishApp/src/main/cpp/stockfish/search.h
+++ b/DroidFishApp/src/main/cpp/stockfish/search.h
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -47,6 +47,7 @@ struct Stack {
   Move excludedMove;
   Move killers[2];
   Value staticEval;
+  Depth depth;
   int statScore;
   int moveCount;
   bool inCheck;
@@ -72,6 +73,7 @@ struct RootMove {
 
   Value score = -VALUE_INFINITE;
   Value previousScore = -VALUE_INFINITE;
+  Value averageScore = -VALUE_INFINITE;
   int selDepth = 0;
   int tbRank = 0;
   Value tbScore;
diff --git a/DroidFishApp/src/main/cpp/stockfish/simd.h b/DroidFishApp/src/main/cpp/stockfish/simd.h
new file mode 100644
index 0000000..7b9e8fb
--- /dev/null
+++ b/DroidFishApp/src/main/cpp/stockfish/simd.h
@@ -0,0 +1,387 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef STOCKFISH_SIMD_H_INCLUDED
+#define STOCKFISH_SIMD_H_INCLUDED
+
+#if defined(USE_AVX2)
+# include <immintrin.h>
+
+#elif defined(USE_SSE41)
+# include <smmintrin.h>
+
+#elif defined(USE_SSSE3)
+# include <tmmintrin.h>
+
+#elif defined(USE_SSE2)
+# include <emmintrin.h>
+
+#elif defined(USE_MMX)
+# include <mmintrin.h>
+
+#elif defined(USE_NEON)
+# include <arm_neon.h>
+#endif
+
+// The inline asm is only safe for GCC, where it is necessary to get good codegen.
+// See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101693
+// Clang does fine without it.
+// Play around here: https://godbolt.org/z/7EWqrYq51
+#if (defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER))
+#define USE_INLINE_ASM
+#endif
+
+// Use either the AVX512 or AVX-VNNI version of the VNNI instructions.
+#if defined(USE_AVXVNNI)
+#define VNNI_PREFIX "%{vex%} "
+#else
+#define VNNI_PREFIX ""
+#endif
+
+namespace Stockfish::Simd {
+
+#if defined (USE_AVX512)
+
+    [[maybe_unused]] static int m512_hadd(__m512i sum, int bias) {
+      return _mm512_reduce_add_epi32(sum) + bias;
+    }
+
+    /*
+      Parameters:
+        sum0 = [zmm0.i128[0], zmm0.i128[1], zmm0.i128[2], zmm0.i128[3]]
+        sum1 = [zmm1.i128[0], zmm1.i128[1], zmm1.i128[2], zmm1.i128[3]]
+        sum2 = [zmm2.i128[0], zmm2.i128[1], zmm2.i128[2], zmm2.i128[3]]
+        sum3 = [zmm3.i128[0], zmm3.i128[1], zmm3.i128[2], zmm3.i128[3]]
+
+      Returns:
+        ret = [
+          reduce_add_epi32(zmm0.i128[0]), reduce_add_epi32(zmm1.i128[0]), reduce_add_epi32(zmm2.i128[0]), reduce_add_epi32(zmm3.i128[0]),
+          reduce_add_epi32(zmm0.i128[1]), reduce_add_epi32(zmm1.i128[1]), reduce_add_epi32(zmm2.i128[1]), reduce_add_epi32(zmm3.i128[1]),
+          reduce_add_epi32(zmm0.i128[2]), reduce_add_epi32(zmm1.i128[2]), reduce_add_epi32(zmm2.i128[2]), reduce_add_epi32(zmm3.i128[2]),
+          reduce_add_epi32(zmm0.i128[3]), reduce_add_epi32(zmm1.i128[3]), reduce_add_epi32(zmm2.i128[3]), reduce_add_epi32(zmm3.i128[3])
+        ]
+    */
+    [[maybe_unused]] static __m512i m512_hadd128x16_interleave(
+        __m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3) {
+
+      __m512i sum01a = _mm512_unpacklo_epi32(sum0, sum1);
+      __m512i sum01b = _mm512_unpackhi_epi32(sum0, sum1);
+
+      __m512i sum23a = _mm512_unpacklo_epi32(sum2, sum3);
+      __m512i sum23b = _mm512_unpackhi_epi32(sum2, sum3);
+
+      __m512i sum01 = _mm512_add_epi32(sum01a, sum01b);
+      __m512i sum23 = _mm512_add_epi32(sum23a, sum23b);
+
+      __m512i sum0123a = _mm512_unpacklo_epi64(sum01, sum23);
+      __m512i sum0123b = _mm512_unpackhi_epi64(sum01, sum23);
+
+      return _mm512_add_epi32(sum0123a, sum0123b);
+    }
+
+    [[maybe_unused]] static __m128i m512_haddx4(
+        __m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3,
+        __m128i bias) {
+
+      __m512i sum = m512_hadd128x16_interleave(sum0, sum1, sum2, sum3);
+
+      __m256i sum256lo = _mm512_castsi512_si256(sum);
+      __m256i sum256hi = _mm512_extracti64x4_epi64(sum, 1);
+
+      sum256lo = _mm256_add_epi32(sum256lo, sum256hi);
+
+      __m128i sum128lo = _mm256_castsi256_si128(sum256lo);
+      __m128i sum128hi = _mm256_extracti128_si256(sum256lo, 1);
+
+      return _mm_add_epi32(_mm_add_epi32(sum128lo, sum128hi), bias);
+    }
+
+    [[maybe_unused]] static void m512_add_dpbusd_epi32(
+        __m512i& acc,
+        __m512i a,
+        __m512i b) {
+
+# if defined (USE_VNNI)
+#   if defined (USE_INLINE_ASM)
+      asm(
+        "vpdpbusd %[b], %[a], %[acc]\n\t"
+        : [acc]"+v"(acc)
+        : [a]"v"(a), [b]"vm"(b)
+      );
+#   else
+      acc = _mm512_dpbusd_epi32(acc, a, b);
+#   endif
+# else
+#   if defined (USE_INLINE_ASM)
+      __m512i tmp = _mm512_maddubs_epi16(a, b);
+      asm(
+          "vpmaddwd    %[tmp], %[ones], %[tmp]\n\t"
+          "vpaddd      %[acc], %[tmp], %[acc]\n\t"
+          : [acc]"+v"(acc), [tmp]"+&v"(tmp)
+          : [ones]"v"(_mm512_set1_epi16(1))
+      );
+#   else
+      __m512i product0 = _mm512_maddubs_epi16(a, b);
+      product0 = _mm512_madd_epi16(product0, _mm512_set1_epi16(1));
+      acc = _mm512_add_epi32(acc, product0);
+#   endif
+# endif
+    }
+
+    [[maybe_unused]] static void m512_add_dpbusd_epi32x2(
+        __m512i& acc,
+        __m512i a0, __m512i b0,
+        __m512i a1, __m512i b1) {
+
+# if defined (USE_VNNI)
+#   if defined (USE_INLINE_ASM)
+      asm(
+        "vpdpbusd %[b0], %[a0], %[acc]\n\t"
+        "vpdpbusd %[b1], %[a1], %[acc]\n\t"
+        : [acc]"+v"(acc)
+        : [a0]"v"(a0), [b0]"vm"(b0), [a1]"v"(a1), [b1]"vm"(b1)
+      );
+#   else
+      acc = _mm512_dpbusd_epi32(acc, a0, b0);
+      acc = _mm512_dpbusd_epi32(acc, a1, b1);
+#   endif
+# else
+#   if defined (USE_INLINE_ASM)
+      __m512i tmp0 = _mm512_maddubs_epi16(a0, b0);
+      __m512i tmp1 = _mm512_maddubs_epi16(a1, b1);
+      asm(
+          "vpaddsw     %[tmp0], %[tmp1], %[tmp0]\n\t"
+          "vpmaddwd    %[tmp0], %[ones], %[tmp0]\n\t"
+          "vpaddd      %[acc], %[tmp0], %[acc]\n\t"
+          : [acc]"+v"(acc), [tmp0]"+&v"(tmp0)
+          : [tmp1]"v"(tmp1), [ones]"v"(_mm512_set1_epi16(1))
+      );
+#   else
+      __m512i product0 = _mm512_maddubs_epi16(a0, b0);
+      __m512i product1 = _mm512_maddubs_epi16(a1, b1);
+      product0 = _mm512_adds_epi16(product0, product1);
+      product0 = _mm512_madd_epi16(product0, _mm512_set1_epi16(1));
+      acc = _mm512_add_epi32(acc, product0);
+#   endif
+# endif
+    }
+
+#endif
+
+#if defined (USE_AVX2)
+
+    [[maybe_unused]] static int m256_hadd(__m256i sum, int bias) {
+      __m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1));
+      sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC));
+      sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_CDAB));
+      return _mm_cvtsi128_si32(sum128) + bias;
+    }
+
+    [[maybe_unused]] static __m128i m256_haddx4(
+        __m256i sum0, __m256i sum1, __m256i sum2, __m256i sum3,
+        __m128i bias) {
+
+      sum0 = _mm256_hadd_epi32(sum0, sum1);
+      sum2 = _mm256_hadd_epi32(sum2, sum3);
+
+      sum0 = _mm256_hadd_epi32(sum0, sum2);
+
+      __m128i sum128lo = _mm256_castsi256_si128(sum0);
+      __m128i sum128hi = _mm256_extracti128_si256(sum0, 1);
+
+      return _mm_add_epi32(_mm_add_epi32(sum128lo, sum128hi), bias);
+    }
+
+    [[maybe_unused]] static void m256_add_dpbusd_epi32(
+        __m256i& acc,
+        __m256i a,
+        __m256i b) {
+
+# if defined (USE_VNNI)
+#   if defined (USE_INLINE_ASM)
+      asm(
+        VNNI_PREFIX "vpdpbusd %[b], %[a], %[acc]\n\t"
+        : [acc]"+v"(acc)
+        : [a]"v"(a), [b]"vm"(b)
+      );
+#   else
+      acc = _mm256_dpbusd_epi32(acc, a, b);
+#   endif
+# else
+#   if defined (USE_INLINE_ASM)
+      __m256i tmp = _mm256_maddubs_epi16(a, b);
+      asm(
+          "vpmaddwd    %[tmp], %[ones], %[tmp]\n\t"
+          "vpaddd      %[acc], %[tmp], %[acc]\n\t"
+          : [acc]"+v"(acc), [tmp]"+&v"(tmp)
+          : [ones]"v"(_mm256_set1_epi16(1))
+      );
+#   else
+      __m256i product0 = _mm256_maddubs_epi16(a, b);
+      product0 = _mm256_madd_epi16(product0, _mm256_set1_epi16(1));
+      acc = _mm256_add_epi32(acc, product0);
+#   endif
+# endif
+    }
+
+    [[maybe_unused]] static void m256_add_dpbusd_epi32x2(
+        __m256i& acc,
+        __m256i a0, __m256i b0,
+        __m256i a1, __m256i b1) {
+
+# if defined (USE_VNNI)
+#   if defined (USE_INLINE_ASM)
+      asm(
+        VNNI_PREFIX "vpdpbusd %[b0], %[a0], %[acc]\n\t"
+        VNNI_PREFIX "vpdpbusd %[b1], %[a1], %[acc]\n\t"
+        : [acc]"+v"(acc)
+        : [a0]"v"(a0), [b0]"vm"(b0), [a1]"v"(a1), [b1]"vm"(b1)
+      );
+#   else
+      acc = _mm256_dpbusd_epi32(acc, a0, b0);
+      acc = _mm256_dpbusd_epi32(acc, a1, b1);
+#   endif
+# else
+#   if defined (USE_INLINE_ASM)
+      __m256i tmp0 = _mm256_maddubs_epi16(a0, b0);
+      __m256i tmp1 = _mm256_maddubs_epi16(a1, b1);
+      asm(
+          "vpaddsw     %[tmp0], %[tmp1], %[tmp0]\n\t"
+          "vpmaddwd    %[tmp0], %[ones], %[tmp0]\n\t"
+          "vpaddd      %[acc], %[tmp0], %[acc]\n\t"
+          : [acc]"+v"(acc), [tmp0]"+&v"(tmp0)
+          : [tmp1]"v"(tmp1), [ones]"v"(_mm256_set1_epi16(1))
+      );
+#   else
+      __m256i product0 = _mm256_maddubs_epi16(a0, b0);
+      __m256i product1 = _mm256_maddubs_epi16(a1, b1);
+      product0 = _mm256_adds_epi16(product0, product1);
+      product0 = _mm256_madd_epi16(product0, _mm256_set1_epi16(1));
+      acc = _mm256_add_epi32(acc, product0);
+#   endif
+# endif
+    }
+
+#endif
+
+#if defined (USE_SSSE3)
+
+    [[maybe_unused]] static int m128_hadd(__m128i sum, int bias) {
+      sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E)); //_MM_PERM_BADC
+      sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); //_MM_PERM_CDAB
+      return _mm_cvtsi128_si32(sum) + bias;
+    }
+
+    [[maybe_unused]] static __m128i m128_haddx4(
+        __m128i sum0, __m128i sum1, __m128i sum2, __m128i sum3,
+        __m128i bias) {
+
+      sum0 = _mm_hadd_epi32(sum0, sum1);
+      sum2 = _mm_hadd_epi32(sum2, sum3);
+      sum0 = _mm_hadd_epi32(sum0, sum2);
+      return _mm_add_epi32(sum0, bias);
+    }
+
+    [[maybe_unused]] static void m128_add_dpbusd_epi32(
+        __m128i& acc,
+        __m128i a,
+        __m128i b) {
+
+#   if defined (USE_INLINE_ASM)
+      __m128i tmp = _mm_maddubs_epi16(a, b);
+      asm(
+          "pmaddwd    %[ones], %[tmp]\n\t"
+          "paddd      %[tmp], %[acc]\n\t"
+          : [acc]"+v"(acc), [tmp]"+&v"(tmp)
+          : [ones]"v"(_mm_set1_epi16(1))
+      );
+#   else
+      __m128i product0 = _mm_maddubs_epi16(a, b);
+      product0 = _mm_madd_epi16(product0, _mm_set1_epi16(1));
+      acc = _mm_add_epi32(acc, product0);
+#   endif
+    }
+
+    [[maybe_unused]] static void m128_add_dpbusd_epi32x2(
+        __m128i& acc,
+        __m128i a0, __m128i b0,
+        __m128i a1, __m128i b1) {
+
+#   if defined (USE_INLINE_ASM)
+      __m128i tmp0 = _mm_maddubs_epi16(a0, b0);
+      __m128i tmp1 = _mm_maddubs_epi16(a1, b1);
+      asm(
+          "paddsw     %[tmp1], %[tmp0]\n\t"
+          "pmaddwd    %[ones], %[tmp0]\n\t"
+          "paddd      %[tmp0], %[acc]\n\t"
+          : [acc]"+v"(acc), [tmp0]"+&v"(tmp0)
+          : [tmp1]"v"(tmp1), [ones]"v"(_mm_set1_epi16(1))
+      );
+#   else
+      __m128i product0 = _mm_maddubs_epi16(a0, b0);
+      __m128i product1 = _mm_maddubs_epi16(a1, b1);
+      product0 = _mm_adds_epi16(product0, product1);
+      product0 = _mm_madd_epi16(product0, _mm_set1_epi16(1));
+      acc = _mm_add_epi32(acc, product0);
+#   endif
+    }
+
+#endif
+
+#if defined (USE_NEON)
+
+    [[maybe_unused]] static int neon_m128_reduce_add_epi32(int32x4_t s) {
+#   if USE_NEON >= 8
+      return vaddvq_s32(s);
+#   else
+      return s[0] + s[1] + s[2] + s[3];
+#   endif
+    }
+
+    [[maybe_unused]] static int neon_m128_hadd(int32x4_t sum, int bias) {
+      return neon_m128_reduce_add_epi32(sum) + bias;
+    }
+
+    [[maybe_unused]] static int32x4_t neon_m128_haddx4(
+        int32x4_t sum0, int32x4_t sum1, int32x4_t sum2, int32x4_t sum3,
+        int32x4_t bias) {
+
+      int32x4_t hsums {
+        neon_m128_reduce_add_epi32(sum0),
+        neon_m128_reduce_add_epi32(sum1),
+        neon_m128_reduce_add_epi32(sum2),
+        neon_m128_reduce_add_epi32(sum3)
+      };
+      return vaddq_s32(hsums, bias);
+    }
+
+    [[maybe_unused]] static void neon_m128_add_dpbusd_epi32x2(
+        int32x4_t& acc,
+        int8x8_t a0, int8x8_t b0,
+        int8x8_t a1, int8x8_t b1) {
+
+      int16x8_t product = vmull_s8(a0, b0);
+      product = vmlal_s8(product, a1, b1);
+      acc = vpadalq_s16(acc, product);
+    }
+
+#endif
+
+}
+
+#endif // STOCKFISH_SIMD_H_INCLUDED
diff --git a/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.cpp b/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.cpp
index ff05a0c..a131524 100644
--- a/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.cpp
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -472,8 +472,6 @@ TBTables TBTables;
 // If the corresponding file exists two new objects TBTable<WDL> and TBTable<DTZ>
 // are created and added to the lists and hash table. Called at init time.
 void TBTables::add(const std::vector<PieceType>& pieces) {
-    if (sizeof(char*) < 8 && pieces.size() >= 6)
-        return; // Not enough address space to support 6-men TB on 32-bit OS
 
     std::string code;
 
@@ -771,7 +769,7 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu
         goto encode_remaining; // With pawns we have finished special treatments
     }
 
-    // In positions withouth pawns, we further flip the squares to ensure leading
+    // In positions without pawns, we further flip the squares to ensure leading
     // piece is below RANK_5.
     if (rank_of(squares[0]) > RANK_4)
         for (int i = 0; i < size; ++i)
@@ -814,7 +812,7 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu
     // Rs "together" in 62 * 61 / 2 ways (we divide by 2 because rooks can be
     // swapped and still get the same position.)
     //
-    // In case we have at least 3 unique pieces (inlcuded kings) we encode them
+    // In case we have at least 3 unique pieces (included kings) we encode them
     // together.
     if (entry->hasUniquePieces) {
 
@@ -829,7 +827,7 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu
                    + (squares[1] - adjust1)) * 62
                    +  squares[2] - adjust2;
 
-        // First piece is on a1-h8 diagonal, second below: map this occurence to
+        // First piece is on a1-h8 diagonal, second below: map this occurrence to
         // 6 to differentiate from the above case, rank_of() maps a1-d4 diagonal
         // to 0...3 and finally MapB1H1H7[] maps the b1-h1-h7 triangle to 0..27.
         else if (off_A1H8(squares[1]))
@@ -859,7 +857,7 @@ encode_remaining:
     idx *= d->groupIdx[0];
     Square* groupSq = squares + d->groupLen[0];
 
-    // Encode remainig pawns then pieces according to square, in ascending order
+    // Encode remaining pawns then pieces according to square, in ascending order
     bool remainingPawns = entry->hasPawns && entry->pawnCount[1];
 
     while (d->groupLen[++next])
@@ -887,7 +885,7 @@ encode_remaining:
 
 // Group together pieces that will be encoded together. The general rule is that
 // a group contains pieces of same type and color. The exception is the leading
-// group that, in case of positions withouth pawns, can be formed by 3 different
+// group that, in case of positions without pawns, can be formed by 3 different
 // pieces (default) or by the king pair when there is not a unique piece apart
 // from the kings. When there are pawns, pawns are always first in pieces[].
 //
@@ -919,7 +917,7 @@ void set_groups(T& e, PairsData* d, int order[], File f) {
     //
     // This ensures unique encoding for the whole position. The order of the
     // groups is a per-table parameter and could not follow the canonical leading
-    // pawns/pieces -> remainig pawns -> remaining pieces. In particular the
+    // pawns/pieces -> remaining pawns -> remaining pieces. In particular the
     // first group is at order[0] position and the remaining pawns, when present,
     // are at order[1] position.
     bool pp = e.hasPawns && e.pawnCount[1]; // Pawns on both sides
@@ -939,7 +937,7 @@ void set_groups(T& e, PairsData* d, int order[], File f) {
             d->groupIdx[1] = idx;
             idx *= Binomial[d->groupLen[1]][48 - d->groupLen[0]];
         }
-        else // Remainig pieces
+        else // Remaining pieces
         {
             d->groupIdx[next] = idx;
             idx *= Binomial[d->groupLen[next]][freeSquares];
@@ -949,7 +947,7 @@ void set_groups(T& e, PairsData* d, int order[], File f) {
     d->groupIdx[n] = idx;
 }
 
-// In Recursive Pairing each symbol represents a pair of childern symbols. So
+// In Recursive Pairing each symbol represents a pair of children symbols. So
 // read d->btree[] symbols data and expand each one in his left and right child
 // symbol until reaching the leafs that represent the symbol value.
 uint8_t set_symlen(PairsData* d, Sym s, std::vector<bool>& visited) {
@@ -1319,7 +1317,7 @@ void Tablebases::init(const std::string& paths) {
     for (auto p : bothOnDiagonal)
         MapKK[p.first][p.second] = code++;
 
-    // Binomial[] stores the Binomial Coefficents using Pascal rule. There
+    // Binomial[] stores the Binomial Coefficients using Pascal rule. There
     // are Binomial[k][n] ways to choose k elements from a set of n elements.
     Binomial[0][0] = 1;
 
@@ -1339,7 +1337,7 @@ void Tablebases::init(const std::string& paths) {
     for (int leadPawnsCnt = 1; leadPawnsCnt <= 5; ++leadPawnsCnt)
         for (File f = FILE_A; f <= FILE_D; ++f)
         {
-            // Restart the index at every file because TB table is splitted
+            // Restart the index at every file because TB table is split
             // by file, so we can reuse the same index for different files.
             int idx = 0;
 
diff --git a/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.h b/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.h
index 56734af..c2917fe 100644
--- a/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.h
+++ b/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.h
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -38,7 +38,7 @@ enum WDLScore {
 // Possible states after a probing operation
 enum ProbeState {
     FAIL              =  0, // Probe failed (missing file table)
-    OK                =  1, // Probe succesful
+    OK                =  1, // Probe successful
     CHANGE_STM        = -1, // DTZ should check the other side
     ZEROING_BEST_MOVE =  2  // Best move zeroes DTZ (capture or pawn move)
 };
diff --git a/DroidFishApp/src/main/cpp/stockfish/thread.cpp b/DroidFishApp/src/main/cpp/stockfish/thread.cpp
index da8e1d0..30177a3 100644
--- a/DroidFishApp/src/main/cpp/stockfish/thread.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/thread.cpp
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -59,7 +59,6 @@ void Thread::clear() {
 
   counterMoves.fill(MOVE_NONE);
   mainHistory.fill(0);
-  lowPlyHistory.fill(0);
   captureHistory.fill(0);
 
   for (bool inCheck : { false, true })
@@ -67,7 +66,7 @@ void Thread::clear() {
       {
           for (auto& to : continuationHistory[inCheck][c])
                 for (auto& h : to)
-                      h->fill(0);
+                      h->fill(-71);
           continuationHistory[inCheck][c][NO_PIECE][0]->fill(Search::CounterMovePruneThreshold - 1);
       }
 }
@@ -162,6 +161,7 @@ void ThreadPool::clear() {
 
   main()->callsCnt = 0;
   main()->bestPreviousScore = VALUE_INFINITE;
+  main()->bestPreviousAverageScore = VALUE_INFINITE;
   main()->previousTimeReduction = 1.0;
 }
 
diff --git a/DroidFishApp/src/main/cpp/stockfish/thread.h b/DroidFishApp/src/main/cpp/stockfish/thread.h
index 5bfa235..8027855 100644
--- a/DroidFishApp/src/main/cpp/stockfish/thread.h
+++ b/DroidFishApp/src/main/cpp/stockfish/thread.h
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -60,18 +60,19 @@ public:
   Pawns::Table pawnsTable;
   Material::Table materialTable;
   size_t pvIdx, pvLast;
-  uint64_t ttHitAverage;
+  RunningAverage complexityAverage;
+  std::atomic<uint64_t> nodes, tbHits, bestMoveChanges;
   int selDepth, nmpMinPly;
   Color nmpColor;
-  std::atomic<uint64_t> nodes, tbHits, bestMoveChanges;
+  Value bestValue, optimism[COLOR_NB];
 
   Position rootPos;
   StateInfo rootState;
   Search::RootMoves rootMoves;
-  Depth rootDepth, completedDepth;
+  Depth rootDepth, completedDepth, depth;
+  Value rootDelta;
   CounterMoveHistory counterMoves;
   ButterflyHistory mainHistory;
-  LowPlyHistory lowPlyHistory;
   CapturePieceToHistory captureHistory;
   ContinuationHistory continuationHistory[2][2];
   Score trend;
@@ -89,6 +90,7 @@ struct MainThread : public Thread {
 
   double previousTimeReduction;
   Value bestPreviousScore;
+  Value bestPreviousAverageScore;
   Value iterValue[4];
   int callsCnt;
   bool stopOnPonderhit;
diff --git a/DroidFishApp/src/main/cpp/stockfish/thread_win32_osx.h b/DroidFishApp/src/main/cpp/stockfish/thread_win32_osx.h
index a21674c..77d1c3c 100644
--- a/DroidFishApp/src/main/cpp/stockfish/thread_win32_osx.h
+++ b/DroidFishApp/src/main/cpp/stockfish/thread_win32_osx.h
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/DroidFishApp/src/main/cpp/stockfish/timeman.cpp b/DroidFishApp/src/main/cpp/stockfish/timeman.cpp
index f742d1e..0400401 100644
--- a/DroidFishApp/src/main/cpp/stockfish/timeman.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/timeman.cpp
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -68,6 +68,9 @@ void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) {
   TimePoint timeLeft =  std::max(TimePoint(1),
       limits.time[us] + limits.inc[us] * (mtg - 1) - moveOverhead * (2 + mtg));
 
+  // Use extra time with larger increments
+  double optExtra = std::clamp(1.0 + 12.0 * limits.inc[us] / limits.time[us], 1.0, 1.12);
+
   // A user may scale time usage by setting UCI option "Slow Mover"
   // Default is 100 and changing this value will probably lose elo.
   timeLeft = slowMover * timeLeft / 100;
@@ -78,15 +81,16 @@ void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) {
   if (limits.movestogo == 0)
   {
       optScale = std::min(0.0084 + std::pow(ply + 3.0, 0.5) * 0.0042,
-                           0.2 * limits.time[us] / double(timeLeft));
+                           0.2 * limits.time[us] / double(timeLeft))
+                 * optExtra;
       maxScale = std::min(7.0, 4.0 + ply / 12.0);
   }
 
   // x moves in y seconds (+ z increment)
   else
   {
-      optScale = std::min((0.8 + ply / 128.0) / mtg,
-                            0.8 * limits.time[us] / double(timeLeft));
+      optScale = std::min((0.88 + ply / 116.4) / mtg,
+                            0.88 * limits.time[us] / double(timeLeft));
       maxScale = std::min(6.3, 1.5 + 0.11 * mtg);
   }
 
diff --git a/DroidFishApp/src/main/cpp/stockfish/timeman.h b/DroidFishApp/src/main/cpp/stockfish/timeman.h
index b1878d6..a86f076 100644
--- a/DroidFishApp/src/main/cpp/stockfish/timeman.h
+++ b/DroidFishApp/src/main/cpp/stockfish/timeman.h
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/DroidFishApp/src/main/cpp/stockfish/tt.cpp b/DroidFishApp/src/main/cpp/stockfish/tt.cpp
index 1f495ca..c7118ae 100644
--- a/DroidFishApp/src/main/cpp/stockfish/tt.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/tt.cpp
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -40,9 +40,9 @@ void TTEntry::save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev)
       move16 = (uint16_t)m;
 
   // Overwrite less valuable entries (cheapest checks first)
-  if (b == BOUND_EXACT
+  if (   b == BOUND_EXACT
       || (uint16_t)k != key16
-      || d - DEPTH_OFFSET > depth8 - 4)
+      || d - DEPTH_OFFSET + 2 * pv > depth8 - 4)
   {
       assert(d > DEPTH_OFFSET);
       assert(d < 256 + DEPTH_OFFSET);
diff --git a/DroidFishApp/src/main/cpp/stockfish/tt.h b/DroidFishApp/src/main/cpp/stockfish/tt.h
index d915d92..03fe3e1 100644
--- a/DroidFishApp/src/main/cpp/stockfish/tt.h
+++ b/DroidFishApp/src/main/cpp/stockfish/tt.h
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/DroidFishApp/src/main/cpp/stockfish/tune.cpp b/DroidFishApp/src/main/cpp/stockfish/tune.cpp
index ac91b60..a885845 100644
--- a/DroidFishApp/src/main/cpp/stockfish/tune.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/tune.cpp
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/DroidFishApp/src/main/cpp/stockfish/tune.h b/DroidFishApp/src/main/cpp/stockfish/tune.h
index b5c715b..75ab484 100644
--- a/DroidFishApp/src/main/cpp/stockfish/tune.h
+++ b/DroidFishApp/src/main/cpp/stockfish/tune.h
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -84,7 +84,7 @@ class Tune {
 
   static Tune& instance() { static Tune t; return t; } // Singleton
 
-  // Use polymorphism to accomodate Entry of different types in the same vector
+  // Use polymorphism to accommodate Entry of different types in the same vector
   struct EntryBase {
     virtual ~EntryBase() = default;
     virtual void init_option() = 0;
diff --git a/DroidFishApp/src/main/cpp/stockfish/types.h b/DroidFishApp/src/main/cpp/stockfish/types.h
index 0bd4a1c..cf42bc9 100644
--- a/DroidFishApp/src/main/cpp/stockfish/types.h
+++ b/DroidFishApp/src/main/cpp/stockfish/types.h
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -465,10 +465,6 @@ constexpr Move make_move(Square from, Square to) {
   return Move((from << 6) + to);
 }
 
-constexpr Move reverse_move(Move m) {
-  return make_move(to_sq(m), from_sq(m));
-}
-
 template<MoveType T>
 constexpr Move make(Square from, Square to, PieceType pt = KNIGHT) {
   return Move(T + ((pt - KNIGHT) << 12) + (from << 6) + to);
diff --git a/DroidFishApp/src/main/cpp/stockfish/uci.cpp b/DroidFishApp/src/main/cpp/stockfish/uci.cpp
index b3738a4..7b30cc0 100644
--- a/DroidFishApp/src/main/cpp/stockfish/uci.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/uci.cpp
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -207,8 +207,8 @@ namespace {
      // Coefficients of a 3rd order polynomial fit based on fishtest data
      // for two parameters needed to transform eval to the argument of a
      // logistic function.
-     double as[] = {-3.68389304,  30.07065921, -60.52878723, 149.53378557};
-     double bs[] = {-2.0181857,   15.85685038, -29.83452023,  47.59078827};
+     double as[] = {-1.17202460e-01, 5.94729104e-01, 1.12065546e+01, 1.22606222e+02};
+     double bs[] = {-1.79066759,  11.30759193, -17.43677612,  36.47147479};
      double a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3];
      double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3];
 
diff --git a/DroidFishApp/src/main/cpp/stockfish/uci.h b/DroidFishApp/src/main/cpp/stockfish/uci.h
index d316010..5bb24a4 100644
--- a/DroidFishApp/src/main/cpp/stockfish/uci.h
+++ b/DroidFishApp/src/main/cpp/stockfish/uci.h
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/DroidFishApp/src/main/cpp/stockfish/ucioption.cpp b/DroidFishApp/src/main/cpp/stockfish/ucioption.cpp
index 07b3027..922fa34 100644
--- a/DroidFishApp/src/main/cpp/stockfish/ucioption.cpp
+++ b/DroidFishApp/src/main/cpp/stockfish/ucioption.cpp
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -164,7 +164,7 @@ Option& Option::operator=(const string& v) {
 
   assert(!type.empty());
 
-  if (   (type != "button" && v.empty())
+  if (   (type != "button" && type != "string" && v.empty())
       || (type == "check" && v != "true" && v != "false")
       || (type == "spin" && (stof(v) < min || stof(v) > max)))
       return *this;
diff --git a/DroidFishApp/src/main/java/org/petero/droidfish/engine/InternalStockFish.java b/DroidFishApp/src/main/java/org/petero/droidfish/engine/InternalStockFish.java
index ba9ed99..ad98dda 100644
--- a/DroidFishApp/src/main/java/org/petero/droidfish/engine/InternalStockFish.java
+++ b/DroidFishApp/src/main/java/org/petero/droidfish/engine/InternalStockFish.java
@@ -36,7 +36,7 @@ import org.petero.droidfish.EngineOptions;
 
 /** Stockfish engine running as process, started from assets resource. */
 public class InternalStockFish extends ExternalEngine {
-    private static final String defaultNet = "nn-3475407dc199.nnue";
+    private static final String defaultNet = "nn-6877cd24400e.nnue";
     private static final String netOption = "evalfile";
     private File defaultNetFile; // To get the full path of the copied default network file