lichess-org · ornicar · Nov 10, 2024 · Nov 10, 2024
diff --git a/ui/ceval/src/winningChances.ts b/ui/ceval/src/winningChances.ts
@@ -33,3 +33,12 @@ export const povChances = (color: Color, ev: EvalScore): WinningChances =>
 // -1 = e1 is infinitely worse  than e2
 export const povDiff = (color: Color, e1: EvalScore, e2: EvalScore): number =>
   (povChances(color, e1) - povChances(color, e2)) / 2;
+
+// used to check if two evaluations are similar enough
+// to report puzzles as faulty
+//
+// stricter than lichess-puzzler v49 check
+// to avoid false positives and only report really faulty puzzles
+export const areSimilarEvals = (pov: Color, bestEval: EvalScore, secondBestEval: EvalScore): boolean => {
+  return povDiff(pov, bestEval, secondBestEval) < 0.15;
+};
diff --git a/ui/ceval/test/winningChances.test.ts b/ui/ceval/test/winningChances.test.ts
@@ -0,0 +1,52 @@
+import { describe, expect, test } from 'vitest';
+import * as winningChances from '../src/winningChances';
+
+const similarEvalsCp = (color: Color, bestEval: number, secondBestEval: number): boolean => {
+  const toCp = (x: number) => {
+    return { cp: x, mate: undefined };
+  };
+  return winningChances.areSimilarEvals(color, toCp(bestEval), toCp(secondBestEval));
+};
+
+describe('similarEvals', () => {
+  // taken from https://github.com/lichess-org/tactics/issues/101
+  test.each([
+    ['black', -9600, -3500],
+    ['white', 400, 350],
+    ['black', -650, -630],
+    ['black', -560, -460],
+    ['black', -850, -640],
+    ['black', -6500, -600],
+    ['white', 400, 350],
+    ['black', -6500, -6300],
+    ['black', -560, -460],
+    ['black', -850, -640],
+    ['black', -6510, -600],
+  ])('be similar', (color, bestEval, secondBestEval) => {
+    expect(similarEvalsCp(color, bestEval, secondBestEval)).toBe(true);
+  });
+
+  // taken from the list of reported puzzles on zulip, and subjectively considered
+  // false positives
+  test.each([
+    ['white', 265, -3],
+    ['white', 269, 0],
+    ['white', 322, -6],
+    ['white', 778, 169],
+    ['black', -293, -9],
+    ['black', -179, 61],
+    ['black', -816, -357],
+  ])('be different', (color, bestEval, secondBestEval) => {
+    expect(similarEvalsCp(color, bestEval, secondBestEval)).toBe(false);
+  });
+
+  // https://lichess.org/training/ZIRBc
+  // It is unclear if this should be a false positive, but discussing with a few members
+  // seems to be good enough to be considered a fp for now.
+  test.each([['black', { cp: undefined, mate: -16 }, { cp: -420, mate: undefined }]])(
+    'be different mate/cp',
+    (color, bestEval, secondBestEval) => {
+      expect(winningChances.areSimilarEvals(color, bestEval, secondBestEval)).toBe(false);
+    },
+  );
+});
diff --git a/ui/puzzle/src/report.ts b/ui/puzzle/src/report.ts
@@ -13,7 +13,7 @@ export default class Report {
   tsHideReportDialog: StoredProp<number>;
 
   // bump when logic is changed, to distinguish cached clients from new ones
-  private version = 2;
+  private version = 3;
 
   constructor() {
     this.tsHideReportDialog = storedIntProp('puzzle.report.hide.ts', 0);
@@ -49,7 +49,7 @@ export default class Report {
         (ev.depth > 50 || ev.nodes > 25_000_000) &&
         bestEval &&
         secondBestEval &&
-        winningChances.povDiff(ctrl.pov, bestEval, secondBestEval) < 0.35
+        winningChances.areSimilarEvals(ctrl.pov, bestEval, secondBestEval)
       ) {
         // in all case, we do not want to show the dialog more than once
         this.reported = true;