Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions ui/ceval/src/winningChances.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,12 @@ export const povChances = (color: Color, ev: EvalScore): WinningChances =>
// -1 = e1 is infinitely worse than e2
export const povDiff = (color: Color, e1: EvalScore, e2: EvalScore): number =>
(povChances(color, e1) - povChances(color, e2)) / 2;

// used to check if two evaluations are similar enough
// to report puzzles as faulty
//
// stricter than lichess-puzzler v49 check
// to avoid false positives and only report really faulty puzzles
export const areSimilarEvals = (pov: Color, bestEval: EvalScore, secondBestEval: EvalScore): boolean => {
return povDiff(pov, bestEval, secondBestEval) < 0.15;
};
52 changes: 52 additions & 0 deletions ui/ceval/test/winningChances.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import { describe, expect, test } from 'vitest';
import * as winningChances from '../src/winningChances';

const similarEvalsCp = (color: Color, bestEval: number, secondBestEval: number): boolean => {
const toCp = (x: number) => {
return { cp: x, mate: undefined };
};
return winningChances.areSimilarEvals(color, toCp(bestEval), toCp(secondBestEval));
};

describe('similarEvals', () => {
// taken from https://github.com/lichess-org/tactics/issues/101
test.each([
['black', -9600, -3500],
['white', 400, 350],
['black', -650, -630],
['black', -560, -460],
['black', -850, -640],
['black', -6500, -600],
['white', 400, 350],
['black', -6500, -6300],
['black', -560, -460],
['black', -850, -640],
['black', -6510, -600],
])('be similar', (color, bestEval, secondBestEval) => {
expect(similarEvalsCp(color, bestEval, secondBestEval)).toBe(true);
});

// taken from the list of reported puzzles on zulip, and subjectively considered
// false positives
test.each([
['white', 265, -3],
['white', 269, 0],
['white', 322, -6],
['white', 778, 169],
['black', -293, -9],
['black', -179, 61],
['black', -816, -357],
])('be different', (color, bestEval, secondBestEval) => {
expect(similarEvalsCp(color, bestEval, secondBestEval)).toBe(false);
});

// https://lichess.org/training/ZIRBc
// It is unclear if this should be a false positive, but discussing with a few members
// seems to be good enough to be considered a fp for now.
test.each([['black', { cp: undefined, mate: -16 }, { cp: -420, mate: undefined }]])(
'be different mate/cp',
(color, bestEval, secondBestEval) => {
expect(winningChances.areSimilarEvals(color, bestEval, secondBestEval)).toBe(false);
},
);
});
4 changes: 2 additions & 2 deletions ui/puzzle/src/report.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ export default class Report {
tsHideReportDialog: StoredProp<number>;

// bump when logic is changed, to distinguish cached clients from new ones
private version = 2;
private version = 3;

constructor() {
this.tsHideReportDialog = storedIntProp('puzzle.report.hide.ts', 0);
Expand Down Expand Up @@ -49,7 +49,7 @@ export default class Report {
(ev.depth > 50 || ev.nodes > 25_000_000) &&
bestEval &&
secondBestEval &&
winningChances.povDiff(ctrl.pov, bestEval, secondBestEval) < 0.35
winningChances.areSimilarEvals(ctrl.pov, bestEval, secondBestEval)
) {
// in all case, we do not want to show the dialog more than once
this.reported = true;
Expand Down