From 13e08199879fad0db238033c6594f411c30ee90f Mon Sep 17 00:00:00 2001 From: Jonathan Gamble Date: Thu, 5 Jun 2025 10:50:26 -0500 Subject: [PATCH 01/28] blog search --- modules/api/src/main/smithy/search.smithy | 8 ++ .../app/src/main/scala/service.search.scala | 4 + modules/core/src/main/scala/models.scala | 2 + .../e2e/src/test/scala/IntegrationSuite.scala | 21 +++ modules/elastic/src/main/scala/package.scala | 1 + modules/elastic/src/main/scala/study.scala | 6 +- modules/elastic/src/main/scala/ublog.scala | 54 ++++++++ .../ingestor/src/main/scala/app.config.scala | 13 +- modules/ingestor/src/main/scala/cli.scala | 8 +- .../ingestor/src/main/scala/ingestors.scala | 7 +- .../ingestor/src/main/scala/mongo.ublog.scala | 122 ++++++++++++++++++ modules/ingestor/src/main/smithy/model.smithy | 11 ++ 12 files changed, 250 insertions(+), 7 deletions(-) create mode 100644 modules/elastic/src/main/scala/ublog.scala create mode 100644 modules/ingestor/src/main/scala/mongo.ublog.scala diff --git a/modules/api/src/main/smithy/search.smithy b/modules/api/src/main/smithy/search.smithy index 0dd78674..d9942a9a 100644 --- a/modules/api/src/main/smithy/search.smithy +++ b/modules/api/src/main/smithy/search.smithy @@ -70,6 +70,13 @@ structure Forum { troll: Boolean = false } +structure Ublog { + @required + queryText: String + minQuality: Integer + language: String +} + structure Team { @required text: String @@ -136,6 +143,7 @@ list Perfs { @adt union Query { forum: Forum + ublog: Ublog game: Game study: Study team: Team diff --git a/modules/app/src/main/scala/service.search.scala b/modules/app/src/main/scala/service.search.scala index 1595d4a6..876643b8 100644 --- a/modules/app/src/main/scala/service.search.scala +++ b/modules/app/src/main/scala/service.search.scala @@ -8,6 +8,7 @@ import lila.search.game.Game import lila.search.spec.* import lila.search.study.Study import lila.search.team.Team +import lila.search.ublog.Ublog import org.typelevel.log4cats.{ Logger, LoggerFactory } import org.typelevel.otel4s.metrics.{ Histogram, Meter } import org.typelevel.otel4s.{ Attribute, AttributeKey, Attributes } @@ -82,6 +83,7 @@ object SearchServiceImpl: def searchDef(from: From, size: Size) = query match case q: Query.Forum => q.to[Forum].searchDef(from, size) + case q: Query.Ublog => q.to[Ublog].searchDef(from, size) case q: Query.Game => q.to[Game].searchDef(from, size) case q: Query.Study => q.to[Study].searchDef(from, size) case q: Query.Team => q.to[Team].searchDef(from, size) @@ -89,12 +91,14 @@ object SearchServiceImpl: def countDef = query match case q: Query.Forum => q.to[Forum].countDef + case q: Query.Ublog => q.to[Ublog].countDef case q: Query.Game => q.to[Game].countDef case q: Query.Study => q.to[Study].countDef case q: Query.Team => q.to[Team].countDef def index = query match case _: Query.Forum => Index.Forum + case _: Query.Ublog => Index.Ublog case _: Query.Game => Index.Game case _: Query.Study => Index.Study case _: Query.Team => Index.Team diff --git a/modules/core/src/main/scala/models.scala b/modules/core/src/main/scala/models.scala index 403ca382..b99a7c43 100644 --- a/modules/core/src/main/scala/models.scala +++ b/modules/core/src/main/scala/models.scala @@ -29,6 +29,7 @@ object Id: enum Index(val value: String): case Forum extends Index("forum") + case Ublog extends Index("ublog") case Game extends Index("game") case Study extends Index("study") case Team extends Index("team") @@ -37,6 +38,7 @@ object Index: def fromString(value: String): Either[String, Index] = value match case "forum" => Index.Forum.asRight + case "ublog" => Index.Ublog.asRight case "game" => Index.Game.asRight case "study" => Index.Study.asRight case "team" => Index.Team.asRight diff --git a/modules/e2e/src/test/scala/IntegrationSuite.scala b/modules/e2e/src/test/scala/IntegrationSuite.scala index c74f6711..b9ae49df 100644 --- a/modules/e2e/src/test/scala/IntegrationSuite.scala +++ b/modules/e2e/src/test/scala/IntegrationSuite.scala @@ -74,6 +74,27 @@ object IntegrationSuite extends IOSuite: y <- service.search(Query.forum("nt9", false), from, size) yield expect(x.hitIds.size == 1 && x == y) + test("ublog"): res => + Clients + .search(uri) + .use: service => + for + _ <- res.esClient.putMapping(Index.Ublog) + _ <- res.esClient.store( + Index.Ublog, + Id("abcdefgh"), + ingestor.UblogSource( + text = "lil bubber, hayo!", + language = "en", + date = Instant.now().toEpochMilli(), + quality = 1.some + ) + ) + _ <- res.esClient.refreshIndex(Index.Ublog) + x <- service.search(Query.ublog("lil bubber", 1.some), from, size) + y <- service.search(Query.ublog("hayo", 2.some), from, size) + yield expect(x.hitIds.size == 1 && y.hitIds.isEmpty) + test("team"): res => Clients .search(uri) diff --git a/modules/elastic/src/main/scala/package.scala b/modules/elastic/src/main/scala/package.scala index d42a0d1e..5d4b2628 100644 --- a/modules/elastic/src/main/scala/package.scala +++ b/modules/elastic/src/main/scala/package.scala @@ -19,6 +19,7 @@ extension (index: Index) def mapping = index match case Index.Forum => forum.Mapping.fields + case Index.Ublog => ublog.Mapping.fields case Index.Game => game.Mapping.fields case Index.Study => study.Mapping.fields case Index.Team => team.Mapping.fields diff --git a/modules/elastic/src/main/scala/study.scala b/modules/elastic/src/main/scala/study.scala index db7c1ae6..d7ec6b66 100644 --- a/modules/elastic/src/main/scala/study.scala +++ b/modules/elastic/src/main/scala/study.scala @@ -9,7 +9,7 @@ case class Study(text: String, userId: Option[String]): def searchDef(from: From, size: Size) = search(Study.index) - .query(makeQuery) + .query(makeQuery()) .fetchSource(false) .sortBy( fieldSort("_score").order(SortOrder.DESC), @@ -18,9 +18,9 @@ case class Study(text: String, userId: Option[String]): .start(from.value) .size(size.value) - def countDef = count(Study.index).query(makeQuery) + def countDef = count(Study.index).query(makeQuery()) - private def makeQuery = { + private def makeQuery() = { val parsed = QueryParser(text, List("owner", "member")) val matcher: Query = if parsed.terms.isEmpty then matchAllQuery() diff --git a/modules/elastic/src/main/scala/ublog.scala b/modules/elastic/src/main/scala/ublog.scala new file mode 100644 index 00000000..69292132 --- /dev/null +++ b/modules/elastic/src/main/scala/ublog.scala @@ -0,0 +1,54 @@ +package lila.search +package ublog + +import com.sksamuel.elastic4s.ElasticDsl.* +import com.sksamuel.elastic4s.requests.searches.sort.SortOrder + +case class Ublog(queryText: String, minQuality: Option[Int], language: Option[String]): + + def searchDef(from: From, size: Size) = + search(Ublog.index) + .query(makeQuery()) + .fetchSource(false) + .sortBy(fieldSort(Fields.date).order(SortOrder.DESC)) + .start(from.value) + .size(size.value) + + def countDef = count(Ublog.index).query(makeQuery()) + + private def makeQuery() = + val parsed = QueryParser(queryText, Nil) + val baseQuery = + if parsed.terms.isEmpty then matchAllQuery() + else + multiMatchQuery(parsed.terms.mkString(" ")) + .fields(Ublog.searchableFields*) + .matchType("most_fields") + boolQuery() + .must(baseQuery) + .filter( + List( + minQuality.map(f => rangeQuery("quality").gte(f)), + language.map(l => termQuery("language", l)) + ).flatten + ) + +object Ublog: + val index = "ublog" + private val searchableFields = List(Fields.text) + +object Fields: + val text = "text" + val quality = "quality" + val language = "language" + val date = "date" + +object Mapping: + import Fields.* + def fields = + Seq( + textField(text), + shortField(quality).copy(docValues = Some(true)), + keywordField(language).copy(docValues = Some(false)), + dateField(date).copy(docValues = Some(true)) + ) diff --git a/modules/ingestor/src/main/scala/app.config.scala b/modules/ingestor/src/main/scala/app.config.scala index e50f8516..799bd103 100644 --- a/modules/ingestor/src/main/scala/app.config.scala +++ b/modules/ingestor/src/main/scala/app.config.scala @@ -51,6 +51,7 @@ object ElasticConfig: case class IngestorConfig( forum: IngestorConfig.Forum, + ublog: IngestorConfig.Ublog, team: IngestorConfig.Team, study: IngestorConfig.Study, game: IngestorConfig.Game @@ -58,6 +59,7 @@ case class IngestorConfig( object IngestorConfig: case class Forum(batchSize: Int, timeWindows: Int, startAt: Option[Instant], maxPostLength: Int) + case class Ublog(batchSize: Int, timeWindows: Int, startAt: Option[Instant]) case class Team(batchSize: Int, timeWindows: Int, startAt: Option[Instant]) case class Study(batchSize: Int, startAt: Option[Instant], interval: FiniteDuration, databaseName: String) case class Game(batchSize: Int, timeWindows: Int, startAt: Option[Instant]) @@ -73,6 +75,15 @@ object IngestorConfig: env("INGESTOR_FORUM_MAX_POST_LENGTH").or(prop("ingestor.forum.max.post.length")).as[Int].default(5_000) def config = (batchSize, timeWindows, startAt, maxPostLength).parMapN(Forum.apply) + private object Ublog: + private def batchSize = + env("INGESTOR_UBLOG_BATCH_SIZE").or(prop("ingestor.ublog.batch.size")).as[Int].default(100) + private def timeWindows = + env("INGESTOR_UBLOG_TIME_WINDOWS").or(prop("ingestor.ublog.time.windows")).as[Int].default(10) + private def startAt = + env("INGESTOR_UBLOG_START_AT").or(prop("ingestor.forum.start.at")).as[Instant].option + def config = (batchSize, timeWindows, startAt).parMapN(Ublog.apply) + private object Team: private def batchSize = env("INGESTOR_TEAM_BATCH_SIZE").or(prop("ingestor.team.batch.size")).as[Int].default(100) @@ -104,7 +115,7 @@ object IngestorConfig: env("INGESTOR_GAME_START_AT").or(prop("ingestor.game.start.at")).as[Instant].option def config = (batchSize, timeWindows, startAt).mapN(Game.apply) - def config = (Forum.config, Team.config, Study.config, Game.config).mapN(IngestorConfig.apply) + def config = (Forum.config, Ublog.config, Team.config, Study.config, Game.config).mapN(IngestorConfig.apply) object CirisCodec: given ConfigDecoder[String, Instant] = ConfigDecoder[String] diff --git a/modules/ingestor/src/main/scala/cli.scala b/modules/ingestor/src/main/scala/cli.scala index 0738516a..959689ad 100644 --- a/modules/ingestor/src/main/scala/cli.scala +++ b/modules/ingestor/src/main/scala/cli.scala @@ -54,6 +54,8 @@ object cli opts.index match case Index.Forum => ingestor.forum.run(opts.since, opts.until, opts.dry) + case Index.Ublog => + ingestor.ublog.run(opts.since, opts.until, opts.dry) case Index.Study => ingestor.study.run(opts.since, opts.until, opts.dry) case Index.Game => @@ -62,6 +64,7 @@ object cli ingestor.team.run(opts.since, opts.until, opts.dry) case _ => ingestor.forum.run(opts.since, opts.until, opts.dry) *> + ingestor.ublog.run(opts.since, opts.until, opts.dry) *> ingestor.study.run(opts.since, opts.until, opts.dry) *> ingestor.game.run(opts.since, opts.until, opts.dry) *> ingestor.team.run(opts.since, opts.until, opts.dry) @@ -72,12 +75,15 @@ object cli ingestor.game.watch(opts.since.some, opts.dry) case Index.Forum => ingestor.forum.watch(opts.since.some, opts.dry) + case Index.Ublog => + ingestor.ublog.watch(opts.since.some, opts.dry) case Index.Team => ingestor.team.watch(opts.since.some, opts.dry) case Index.Study => ingestor.study.watch(opts.since.some, opts.dry) case _ => ingestor.forum.watch(opts.since.some, opts.dry) *> + ingestor.ublog.watch(opts.since.some, opts.dry) *> ingestor.team.watch(opts.since.some, opts.dry) *> ingestor.study.watch(opts.since.some, opts.dry) *> ingestor.game.watch(opts.since.some, opts.dry) @@ -95,7 +101,7 @@ object opts: long = "index", help = "Target index", short = "i", - metavar = "forum|team|study|game" + metavar = "forum|ublog|team|study|game" ) val allIndexOpt = diff --git a/modules/ingestor/src/main/scala/ingestors.scala b/modules/ingestor/src/main/scala/ingestors.scala index 9a36057a..55f63f32 100644 --- a/modules/ingestor/src/main/scala/ingestors.scala +++ b/modules/ingestor/src/main/scala/ingestors.scala @@ -8,12 +8,13 @@ import org.typelevel.log4cats.LoggerFactory class Ingestors( val forum: Ingestor, + val ublog: Ingestor, val study: Ingestor, val game: Ingestor, val team: Ingestor ): def run(): IO[Unit] = - List(forum.watch, team.watch, study.watch, game.watch).parSequence_ + List(forum.watch, ublog.watch, team.watch, study.watch, game.watch).parSequence_ object Ingestors: @@ -27,12 +28,14 @@ object Ingestors: )(using LoggerFactory[IO]): IO[Ingestors] = ( ForumRepo(lichess, config.forum), + UblogRepo(lichess, config.ublog), StudyRepo(study, local, config.study), GameRepo(lichess, config.game), TeamRepo(lichess, config.team) - ).mapN: (forums, studies, games, teams) => + ).mapN: (forums, ublogs, studies, games, teams) => new Ingestors( Ingestor(Index.Forum, forums, store, elastic, config.forum.startAt), + Ingestor(Index.Ublog, ublogs, store, elastic, config.ublog.startAt), Ingestor(Index.Study, studies, store, elastic, config.study.startAt), Ingestor(Index.Game, games, store, elastic, config.game.startAt), Ingestor(Index.Team, teams, store, elastic, config.team.startAt) diff --git a/modules/ingestor/src/main/scala/mongo.ublog.scala b/modules/ingestor/src/main/scala/mongo.ublog.scala new file mode 100644 index 00000000..974c2ef3 --- /dev/null +++ b/modules/ingestor/src/main/scala/mongo.ublog.scala @@ -0,0 +1,122 @@ +package lila.search +package ingestor + +import cats.effect.IO +import cats.syntax.all.* +import com.mongodb.client.model.changestream.FullDocument +import com.mongodb.client.model.changestream.OperationType.* +import mongo4cats.bson.Document +import mongo4cats.database.MongoDatabase +import mongo4cats.models.collection.ChangeStreamDocument +import mongo4cats.operations.{ Aggregate, Filter, Projection } +import org.typelevel.log4cats.syntax.* +import org.typelevel.log4cats.{ Logger, LoggerFactory } + +import java.time.Instant +import scala.concurrent.duration.* + +import Repo.{ *, given } + +object UblogRepo: + + private val interestedOperations = List(DELETE, INSERT, REPLACE, UPDATE).map(_.getValue) + + private val interestedFields = + List(_id, F.markdown, F.title, F.intro, F.topics, F.blog, F.live, F.livedAt, F.language, F.quality) + private val postProjection = Projection.include(interestedFields) + + private val interestedEventFields = + List("operationType", "clusterTime", "documentKey._id") ++ interestedFields.map("fullDocument." + _) + private val eventProjection = Projection.include(interestedEventFields) + + private def aggregate() = + Aggregate + .matchBy(Filter.in("operationType", interestedOperations)) + .combinedWith(Aggregate.project(eventProjection)) + + def apply(mongo: MongoDatabase[IO], config: IngestorConfig.Ublog)(using + LoggerFactory[IO] + ): IO[Repo[UblogSource]] = + given Logger[IO] = LoggerFactory[IO].getLogger + // (mongo.getCollection("ublog_blog"), mongo.getCollection("ublog_post")).mapN(apply(config)) + mongo.getCollection("ublog_post").map(apply(config)) + + def apply(config: IngestorConfig.Ublog)( + // blogs: MongoCollection, + posts: MongoCollection + )(using Logger[IO]): Repo[UblogSource] = new: + + def fetch(since: Instant, until: Instant) = + val filter = range(F.livedAt)(since, until.some) + fs2.Stream.eval(info"Fetching blog posts from $since to $until") *> + posts + .find(filter) + .projection(postProjection) + .boundedStream(config.batchSize) + .filter(_.isLive) + .chunkN(config.batchSize) + .map(_.toList) + .metered(1.second) + .map: docs => + val (toDelete, toIndex) = docs.partition(!_.isLive) + Result(toIndex.toSources, toDelete.flatten(using _.id.map(Id.apply)), none) + + def watch(since: Option[Instant]): fs2.Stream[IO, Result[UblogSource]] = + val builder = posts.watch(aggregate()) + // skip the first event if we're starting from a specific timestamp + // since the event at that timestamp is already indexed + val skip = since.fold(0)(_ => 1) + since + .fold(builder)(x => builder.startAtOperationTime(x.asBsonTimestamp)) + .fullDocument(FullDocument.UPDATE_LOOKUP) // this is required for update event + .batchSize(config.batchSize) + .boundedStream(config.batchSize) + .drop(skip) + .evalTap(x => debug"Ublog event: $x") + .groupWithin(config.batchSize, config.timeWindows.second) + .map(_.toList.distincByDocId) + .map: docs => + val lastEventTimestamp = docs.flatten(using _.clusterTime.flatMap(_.asInstant)).maxOption + val (toDelete, toIndex) = docs.partition(_.isDelete) + Result( + toIndex.flatten(using _.fullDocument).toSources, + toDelete.flatten(using _.docId.map(Id.apply)), + lastEventTimestamp + ) + + extension (docs: List[Document]) + private def toSources: List[(String, UblogSource)] = + docs.flatten(using doc => (doc.id, doc.toSource).mapN(_ -> _)) + + extension (doc: Document) + private def toSource: Option[UblogSource] = + for + title <- doc.getString(F.title) + intro <- doc.getString(F.intro) + body <- doc.getString(F.markdown) + author <- doc.getString(F.blog).map(_.split(":")(1)) + language <- doc.getString("language") + topics <- doc.getAs[List[String]](F.topics).map(_.mkString(" ").replaceAll("Chess", "")) + text = s"$title\n$topics\n$author\n$intro\n$body" + date <- doc.getNested(F.livedAt).flatMap(_.asInstant).map(_.toEpochMilli) + quality = doc.getNestedAs[Int](F.quality) + if doc.isLive + yield UblogSource(text, language, date, quality) + + private def isLive: Boolean = + doc.getBoolean("live").getOrElse(false) + + extension (event: ChangeStreamDocument[Document]) + private def isDelete: Boolean = + event.operationType == DELETE || event.fullDocument.exists(!_.isLive) + + object F: + val markdown = "markdown" + val title = "title" + val intro = "intro" + val blog = "blog" + val language = "language" + val live = "live" + val livedAt = "lived.at" + val quality = "automod.quality" + val topics = "topics" diff --git a/modules/ingestor/src/main/smithy/model.smithy b/modules/ingestor/src/main/smithy/model.smithy index 26ae628f..5632ba6e 100644 --- a/modules/ingestor/src/main/smithy/model.smithy +++ b/modules/ingestor/src/main/smithy/model.smithy @@ -111,3 +111,14 @@ structure TeamSource { @jsonName("nbm") nbMembers: Integer } + +structure UblogSource { + @required + text: String + quality: Integer + @required + language: String + @required + /// time in milliseconds + date: Long +} From 03c130c2588c2df8659588f724c143beb4544305 Mon Sep 17 00:00:00 2001 From: Jonathan Gamble Date: Sat, 7 Jun 2025 21:16:05 -0500 Subject: [PATCH 02/28] option for results by score --- modules/api/src/main/smithy/search.smithy | 2 ++ .../e2e/src/test/scala/IntegrationSuite.scala | 4 ++-- modules/elastic/src/main/scala/ublog.scala | 19 ++++++++++++++----- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/modules/api/src/main/smithy/search.smithy b/modules/api/src/main/smithy/search.smithy index d9942a9a..3952f9a6 100644 --- a/modules/api/src/main/smithy/search.smithy +++ b/modules/api/src/main/smithy/search.smithy @@ -73,6 +73,8 @@ structure Forum { structure Ublog { @required queryText: String + @required + byDate: Boolean minQuality: Integer language: String } diff --git a/modules/e2e/src/test/scala/IntegrationSuite.scala b/modules/e2e/src/test/scala/IntegrationSuite.scala index b9ae49df..951319c6 100644 --- a/modules/e2e/src/test/scala/IntegrationSuite.scala +++ b/modules/e2e/src/test/scala/IntegrationSuite.scala @@ -91,8 +91,8 @@ object IntegrationSuite extends IOSuite: ) ) _ <- res.esClient.refreshIndex(Index.Ublog) - x <- service.search(Query.ublog("lil bubber", 1.some), from, size) - y <- service.search(Query.ublog("hayo", 2.some), from, size) + x <- service.search(Query.ublog("lil bubber", true, 1.some), from, size) + y <- service.search(Query.ublog("hayo", true, 2.some), from, size) yield expect(x.hitIds.size == 1 && y.hitIds.isEmpty) test("team"): res => diff --git a/modules/elastic/src/main/scala/ublog.scala b/modules/elastic/src/main/scala/ublog.scala index 69292132..25184128 100644 --- a/modules/elastic/src/main/scala/ublog.scala +++ b/modules/elastic/src/main/scala/ublog.scala @@ -4,13 +4,22 @@ package ublog import com.sksamuel.elastic4s.ElasticDsl.* import com.sksamuel.elastic4s.requests.searches.sort.SortOrder -case class Ublog(queryText: String, minQuality: Option[Int], language: Option[String]): +case class Ublog(queryText: String, byDate: Boolean, minQuality: Option[Int], language: Option[String]): def searchDef(from: From, size: Size) = - search(Ublog.index) + val req = search(Ublog.index) .query(makeQuery()) .fetchSource(false) - .sortBy(fieldSort(Fields.date).order(SortOrder.DESC)) + + val sorted = + if byDate then + req.sortBy( + fieldSort(Fields.quality).order(SortOrder.DESC).missing("_last"), + fieldSort(Fields.date).order(SortOrder.DESC) + ) + else req + + sorted .start(from.value) .size(size.value) @@ -28,8 +37,8 @@ case class Ublog(queryText: String, minQuality: Option[Int], language: Option[St .must(baseQuery) .filter( List( - minQuality.map(f => rangeQuery("quality").gte(f)), - language.map(l => termQuery("language", l)) + minQuality.map(f => rangeQuery(Fields.quality).gte(f)), + language.map(l => termQuery(Fields.language, l)) ).flatten ) From 025cc3454107c5a3e1dafb30ae6ced70de8b883c Mon Sep 17 00:00:00 2001 From: Jonathan Gamble <101470903+schlawg@users.noreply.github.com> Date: Sun, 8 Jun 2025 13:08:39 -0500 Subject: [PATCH 03/28] fix copy paste error caught in review Co-authored-by: Thanh Le --- modules/ingestor/src/main/scala/app.config.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ingestor/src/main/scala/app.config.scala b/modules/ingestor/src/main/scala/app.config.scala index 799bd103..1f4f4e16 100644 --- a/modules/ingestor/src/main/scala/app.config.scala +++ b/modules/ingestor/src/main/scala/app.config.scala @@ -81,7 +81,7 @@ object IngestorConfig: private def timeWindows = env("INGESTOR_UBLOG_TIME_WINDOWS").or(prop("ingestor.ublog.time.windows")).as[Int].default(10) private def startAt = - env("INGESTOR_UBLOG_START_AT").or(prop("ingestor.forum.start.at")).as[Instant].option + env("INGESTOR_UBLOG_START_AT").or(prop("ingestor.ublog.start.at")).as[Instant].option def config = (batchSize, timeWindows, startAt).parMapN(Ublog.apply) private object Team: From 9759fe689e7dca1fc825ff7b51782efce3c318f6 Mon Sep 17 00:00:00 2001 From: Jonathan Gamble Date: Sun, 8 Jun 2025 13:26:03 -0500 Subject: [PATCH 04/28] remove comments and redundancies, dont index spam --- modules/ingestor/src/main/scala/mongo.ublog.scala | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/modules/ingestor/src/main/scala/mongo.ublog.scala b/modules/ingestor/src/main/scala/mongo.ublog.scala index 974c2ef3..8989ecec 100644 --- a/modules/ingestor/src/main/scala/mongo.ublog.scala +++ b/modules/ingestor/src/main/scala/mongo.ublog.scala @@ -38,11 +38,9 @@ object UblogRepo: LoggerFactory[IO] ): IO[Repo[UblogSource]] = given Logger[IO] = LoggerFactory[IO].getLogger - // (mongo.getCollection("ublog_blog"), mongo.getCollection("ublog_post")).mapN(apply(config)) mongo.getCollection("ublog_post").map(apply(config)) def apply(config: IngestorConfig.Ublog)( - // blogs: MongoCollection, posts: MongoCollection )(using Logger[IO]): Repo[UblogSource] = new: @@ -53,7 +51,6 @@ object UblogRepo: .find(filter) .projection(postProjection) .boundedStream(config.batchSize) - .filter(_.isLive) .chunkN(config.batchSize) .map(_.toList) .metered(1.second) @@ -100,11 +97,10 @@ object UblogRepo: text = s"$title\n$topics\n$author\n$intro\n$body" date <- doc.getNested(F.livedAt).flatMap(_.asInstant).map(_.toEpochMilli) quality = doc.getNestedAs[Int](F.quality) - if doc.isLive yield UblogSource(text, language, date, quality) private def isLive: Boolean = - doc.getBoolean("live").getOrElse(false) + doc.getBoolean("live").contains(true) && !doc.getNestedAs[Int](F.quality).exists(_ == 0) extension (event: ChangeStreamDocument[Document]) private def isDelete: Boolean = From afade6b12f0c01e5988fd18772bb08dd4172cc01 Mon Sep 17 00:00:00 2001 From: Scala Steward Date: Thu, 5 Jun 2025 23:47:25 +0000 Subject: [PATCH 05/28] Update scala3-library to 3.7.1 --- build.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index f5c83099..3097adea 100644 --- a/build.sbt +++ b/build.sbt @@ -3,7 +3,7 @@ import org.typelevel.scalacoptions.ScalacOptions inThisBuild( Seq( - scalaVersion := "3.7.0", + scalaVersion := "3.7.1", versionScheme := Some("early-semver"), organization := "org.lichess.search", run / fork := true, From bae398260fe8382be75a513930513b1276523ca3 Mon Sep 17 00:00:00 2001 From: Scala Steward Date: Thu, 5 Jun 2025 23:47:10 +0000 Subject: [PATCH 06/28] Update smithy4s-core, smithy4s-http4s, ... to 0.18.37 --- project/plugins.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/plugins.sbt b/project/plugins.sbt index 2d6f2f45..cddf18f7 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1,6 +1,6 @@ addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.14.3") -addSbtPlugin("com.disneystreaming.smithy4s" % "smithy4s-sbt-codegen" % "0.18.36") +addSbtPlugin("com.disneystreaming.smithy4s" % "smithy4s-sbt-codegen" % "0.18.37") addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.13.1") From ee2f4a04ed8720c93dc53ae3710b4a61fa559b17 Mon Sep 17 00:00:00 2001 From: Thanh Le Date: Fri, 6 Jun 2025 08:10:59 +0200 Subject: [PATCH 07/28] Add -Wall which warns everything --- build.sbt | 2 +- modules/app/src/main/scala/app.scala | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/build.sbt b/build.sbt index 3097adea..478c57db 100644 --- a/build.sbt +++ b/build.sbt @@ -21,7 +21,7 @@ val commonSettings = Seq( ScalacOptions.other("-indent"), ScalacOptions.explain, ScalacOptions.release("21"), - ScalacOptions.other("-Wsafe-init") // fix in: https://github.com/typelevel/scalac-options/pull/136 + ScalacOptions.other("-Wall"), ), resolvers += "jitpack".at("https://jitpack.io") ) diff --git a/modules/app/src/main/scala/app.scala b/modules/app/src/main/scala/app.scala index 7bb9b05f..4c5d7e29 100644 --- a/modules/app/src/main/scala/app.scala +++ b/modules/app/src/main/scala/app.scala @@ -44,6 +44,6 @@ object App extends IOApp.Simple: for apiRoutes <- Routes(res, config.server) httpRoutes = apiRoutes <+> mkPrometheusRoutes - server <- MkHttpServer().newEmber(config.server, httpRoutes.orNotFound) - _ <- Logger[IO].info(s"BuildInfo: ${BuildInfo}").toResource + _ <- MkHttpServer().newEmber(config.server, httpRoutes.orNotFound) + _ <- Logger[IO].info(s"BuildInfo: ${BuildInfo}").toResource yield () From 29fb7e7297d76ee322b027fa0d69ccd746839896 Mon Sep 17 00:00:00 2001 From: Thanh Le Date: Fri, 6 Jun 2025 09:20:25 +0200 Subject: [PATCH 08/28] Fix warnings from Wtostring-interpolated https://github.com/scala/scala3/pull/20578 --- modules/app/src/main/scala/app.scala | 4 ++-- modules/app/src/main/scala/http.server.scala | 2 +- modules/app/src/main/scala/service.health.scala | 2 +- modules/app/src/main/scala/service.search.scala | 4 ++-- modules/ingestor/src/main/scala/app.scala | 8 ++++---- modules/ingestor/src/main/scala/cli.scala | 2 +- modules/ingestor/src/main/scala/ingestor.scala | 2 +- modules/ingestor/src/main/scala/mongo.chapter.scala | 4 ++-- 8 files changed, 14 insertions(+), 14 deletions(-) diff --git a/modules/app/src/main/scala/app.scala b/modules/app/src/main/scala/app.scala index 4c5d7e29..c46b6543 100644 --- a/modules/app/src/main/scala/app.scala +++ b/modules/app/src/main/scala/app.scala @@ -25,7 +25,7 @@ object App extends IOApp.Simple: given Meter[IO] <- mkMeter _ <- RuntimeMetrics.register[IO] config <- AppConfig.load.toResource - _ <- Logger[IO].info(s"Starting lila-search with config: $config").toResource + _ <- Logger[IO].info(s"Starting lila-search with config: ${config.toString}").toResource res <- AppResources.instance(config) _ <- mkServer(res, config) yield () @@ -45,5 +45,5 @@ object App extends IOApp.Simple: apiRoutes <- Routes(res, config.server) httpRoutes = apiRoutes <+> mkPrometheusRoutes _ <- MkHttpServer().newEmber(config.server, httpRoutes.orNotFound) - _ <- Logger[IO].info(s"BuildInfo: ${BuildInfo}").toResource + _ <- Logger[IO].info(s"BuildInfo: ${BuildInfo.toString}").toResource yield () diff --git a/modules/app/src/main/scala/http.server.scala b/modules/app/src/main/scala/http.server.scala index 11a90743..6c8791b6 100644 --- a/modules/app/src/main/scala/http.server.scala +++ b/modules/app/src/main/scala/http.server.scala @@ -29,4 +29,4 @@ object MkHttpServer: .evalTap(showBanner) private def showBanner(s: Server): IO[Unit] = - Logger[IO].info(s"lila-search started at ${s.address}") + Logger[IO].info(s"lila-search started at ${s.address.toString}") diff --git a/modules/app/src/main/scala/service.health.scala b/modules/app/src/main/scala/service.health.scala index 1ad76ab9..cea3ca1d 100644 --- a/modules/app/src/main/scala/service.health.scala +++ b/modules/app/src/main/scala/service.health.scala @@ -16,7 +16,7 @@ class HealthServiceImpl(esClient: ESClient[IO])(using LoggerFactory[IO]) extends .map(HealthCheckOutput(_)) .handleErrorWith: e => Logger[IO].error(e)("Error in health check") *> - IO.raiseError(InternalServerError(s"Internal server error $e")) + IO.raiseError(InternalServerError(s"Internal server error ${e.getMessage}")) private def transform(status: String): IO[ElasticStatus] = status match diff --git a/modules/app/src/main/scala/service.search.scala b/modules/app/src/main/scala/service.search.scala index 876643b8..f6e5db79 100644 --- a/modules/app/src/main/scala/service.search.scala +++ b/modules/app/src/main/scala/service.search.scala @@ -55,7 +55,7 @@ class SearchServiceImpl(esClient: ESClient[IO], metric: Histogram[IO, Double])(u .count(query) .map(CountOutput.apply) .handleErrorWith: e => - logger.error(e)(s"Error in count: query=$query") *> + logger.error(e)(s"Error in count: query=${query.toString}") *> IO.raiseError(InternalServerError("Internal server error")) override def search(query: Query, from: From, size: Size): IO[SearchOutput] = @@ -64,7 +64,7 @@ class SearchServiceImpl(esClient: ESClient[IO], metric: Histogram[IO, Double])(u .search(query, from, size) .map(SearchOutput.apply) .handleErrorWith: e => - logger.error(e)(s"Error in search: query=$query, from=$from, size=$size") *> + logger.error(e)(s"Error in search: query=${query.toString}, from=$from, size=$size") *> IO.raiseError(InternalServerError("Internal server error")) object SearchServiceImpl: diff --git a/modules/ingestor/src/main/scala/app.scala b/modules/ingestor/src/main/scala/app.scala index 178b59fe..c2c47130 100644 --- a/modules/ingestor/src/main/scala/app.scala +++ b/modules/ingestor/src/main/scala/app.scala @@ -23,10 +23,10 @@ object App extends IOApp.Simple: given Meter[IO] <- mkMeter _ <- RuntimeMetrics.register[IO] config <- AppConfig.load.toResource - _ <- Logger[IO].info(s"Starting lila-search ingestor with config: $config").toResource - _ <- Logger[IO].info(s"BuildInfo: ${BuildInfo}").toResource - res <- AppResources.instance(config) - _ <- IngestorApp(res, config).run() + _ <- Logger[IO].info(s"Starting lila-search ingestor with config: ${config.toString}").toResource + _ <- Logger[IO].info(s"BuildInfo: ${BuildInfo.toString}").toResource + res <- AppResources.instance(config) + _ <- IngestorApp(res, config).run() yield () def mkMeter = SdkMetrics diff --git a/modules/ingestor/src/main/scala/cli.scala b/modules/ingestor/src/main/scala/cli.scala index 959689ad..c512c2f2 100644 --- a/modules/ingestor/src/main/scala/cli.scala +++ b/modules/ingestor/src/main/scala/cli.scala @@ -135,7 +135,7 @@ object opts: ).mapN(IndexOpts.apply) .mapValidated(x => if x.until.isAfter(x.since) then Validated.valid(x) - else Validated.invalidNel(s"since: ${x.since} must be before until: ${x.until}") + else Validated.invalidNel(s"since: ${x.since.toString} must be before until: ${x.until.toString}") ) val watchOpt = ( diff --git a/modules/ingestor/src/main/scala/ingestor.scala b/modules/ingestor/src/main/scala/ingestor.scala index a9c76416..499bece5 100644 --- a/modules/ingestor/src/main/scala/ingestor.scala +++ b/modules/ingestor/src/main/scala/ingestor.scala @@ -90,5 +90,5 @@ object Ingestor: private val saveLastIndexedTimestamp: Option[Instant] => IO[Unit] = _.traverse_(time => store.put(index.value, time) - *> Logger[IO].info(s"Stored last indexed time ${time.getEpochSecond} for $index") + *> Logger[IO].info(s"Stored last indexed time ${time.getEpochSecond} for ${index.value}") ) diff --git a/modules/ingestor/src/main/scala/mongo.chapter.scala b/modules/ingestor/src/main/scala/mongo.chapter.scala index 1a05e786..60a0af11 100644 --- a/modules/ingestor/src/main/scala/mongo.chapter.scala +++ b/modules/ingestor/src/main/scala/mongo.chapter.scala @@ -48,7 +48,7 @@ object StudyData: case Array(name, value) => Tag(name, value).asRight case _ => "Invalid pgn tag $v".asLeft - given Encoder[Tag] = Encoder.encodeString.contramap(t => s"${t.name}:${t.value}") + given Encoder[Tag] = Encoder.encodeString.contramap(t => s"${t.name.toString}:${t.value}") private val relevantPgnTags: Set[chess.format.pgn.TagType] = Set( Tag.Variant, @@ -114,5 +114,5 @@ object ChapterRepo: .stream .compile .toList - .flatTap(docs => Logger[IO].debug(s"Received $docs chapters")) + .flatTap(docs => Logger[IO].debug(s"Received ${docs.toString} chapters")) .map(_.map(x => x._id -> x).toMap) From 76251cb4c6358e5a0bb0e1f3a91e579f52766771 Mon Sep 17 00:00:00 2001 From: Scala Steward Date: Wed, 28 May 2025 20:54:28 +0000 Subject: [PATCH 09/28] Update log4cats-slf4j to 2.7.1 --- project/Dependencies.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index d64624fe..5fbf0a05 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -62,7 +62,7 @@ object Dependencies { val otel4sInstrumentationMetrics = "org.typelevel" %% "otel4s-instrumentation-metrics" % V.otel4s val otel4sMetrics = "org.typelevel" %% "otel4s-experimental-metrics" % "0.6.0" - val log4Cats = "org.typelevel" %% "log4cats-slf4j" % "2.7.0" + val log4Cats = "org.typelevel" %% "log4cats-slf4j" % "2.7.1" val logback = "ch.qos.logback" % "logback-classic" % "1.5.18" val ducktape = "io.github.arainko" %% "ducktape" % "0.2.8" From 1a68daca0ab520e895f22c07c9671900b7c5e5bd Mon Sep 17 00:00:00 2001 From: Scala Steward Date: Wed, 28 May 2025 20:54:14 +0000 Subject: [PATCH 10/28] Update weaver-cats, weaver-scalacheck to 0.9.0 --- project/Dependencies.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 5fbf0a05..31cfa4a9 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -71,8 +71,8 @@ object Dependencies { val declineCatsEffect = "com.monovore" %% "decline-effect" % V.decline val testContainers = "com.dimafeng" %% "testcontainers-scala-core" % "0.43.0" % Test - val weaver = "com.disneystreaming" %% "weaver-cats" % "0.8.4" % Test - val weaverScalaCheck = "com.disneystreaming" %% "weaver-scalacheck" % "0.8.4" % Test + val weaver = "org.typelevel" %% "weaver-cats" % "0.9.0" % Test + val weaverScalaCheck = "org.typelevel" %% "weaver-scalacheck" % "0.9.0" % Test val catsEffectTestKit = "org.typelevel" %% "cats-effect-testkit" % V.catsEffect % Test val scalacheck = "org.scalacheck" %% "scalacheck" % "1.17.0" % Test } From 30e01cacb735e046bf04a8f491119e782ec63639 Mon Sep 17 00:00:00 2001 From: Thanh Le Date: Fri, 6 Jun 2025 09:41:13 +0200 Subject: [PATCH 11/28] Use named tuple for SourceWithId --- modules/elastic/src/main/scala/ESClient.scala | 6 +++--- modules/elastic/src/main/scala/package.scala | 2 ++ modules/ingestor/src/main/scala/Repo.scala | 1 - modules/ingestor/src/main/scala/ingestor.scala | 4 ++-- modules/ingestor/src/main/scala/mongo.game.scala | 4 ++-- 5 files changed, 9 insertions(+), 8 deletions(-) diff --git a/modules/elastic/src/main/scala/ESClient.scala b/modules/elastic/src/main/scala/ESClient.scala index 69b92173..d5d80756 100644 --- a/modules/elastic/src/main/scala/ESClient.scala +++ b/modules/elastic/src/main/scala/ESClient.scala @@ -18,7 +18,7 @@ trait ESClient[F[_]]: def search[A](query: A, from: From, size: Size)(using Queryable[A]): F[List[Id]] def count[A](query: A)(using Queryable[A]): F[Long] def store[A](index: Index, id: Id, obj: A)(using Indexable[A]): F[Unit] - def storeBulk[A](index: Index, objs: Seq[(String, A)])(using Indexable[A]): F[Unit] + def storeBulk[A](index: Index, objs: Seq[SourceWithId[A]])(using Indexable[A]): F[Unit] def deleteOne(index: Index, id: Id): F[Unit] def deleteMany(index: Index, ids: List[Id]): F[Unit] def putMapping(index: Index): F[Unit] @@ -96,9 +96,9 @@ object ESClient: .execute(indexInto(index.value).source(obj).id(id.value)) .flatMap(_.unitOrFail) - def storeBulk[A](index: Index, objs: Seq[(String, A)])(using Indexable[A]): F[Unit] = + def storeBulk[A](index: Index, objs: Seq[SourceWithId[A]])(using Indexable[A]): F[Unit] = val request = indexInto(index.value) - val requests = bulk(objs.map((id, obj) => request.source(obj).id(id))) + val requests = bulk(objs.map { case (id, source) => request.source(source).id(id) }) metric .recordDuration( TimeUnit.MILLISECONDS, diff --git a/modules/elastic/src/main/scala/package.scala b/modules/elastic/src/main/scala/package.scala index 5d4b2628..e791c479 100644 --- a/modules/elastic/src/main/scala/package.scala +++ b/modules/elastic/src/main/scala/package.scala @@ -6,6 +6,8 @@ import com.sksamuel.elastic4s.ElasticDsl.* import com.sksamuel.elastic4s.requests.searches.queries.Query import com.sksamuel.elastic4s.{ Index as ESIndex, Response } +type SourceWithId[A] = (id: String, source: A) + extension (self: Boolean) def fold[A](t: => A, f: => A): A = if self then t else f extension (queries: List[Query]) diff --git a/modules/ingestor/src/main/scala/Repo.scala b/modules/ingestor/src/main/scala/Repo.scala index 0d782e1b..72d716fb 100644 --- a/modules/ingestor/src/main/scala/Repo.scala +++ b/modules/ingestor/src/main/scala/Repo.scala @@ -10,7 +10,6 @@ trait Repo[A]: def fetch(since: Instant, until: Instant): fs2.Stream[IO, Repo.Result[A]] object Repo: - type SourceWithId[A] = (String, A) case class Result[A](toIndex: List[SourceWithId[A]], toDelete: List[Id], timestamp: Option[Instant]) import cats.effect.IO diff --git a/modules/ingestor/src/main/scala/ingestor.scala b/modules/ingestor/src/main/scala/ingestor.scala index 499bece5..6d92d6c5 100644 --- a/modules/ingestor/src/main/scala/ingestor.scala +++ b/modules/ingestor/src/main/scala/ingestor.scala @@ -78,12 +78,12 @@ object Ingestor: Logger[IO].error(e)(s"Failed to delete ${index.value}: ${ids.map(_.value).mkString(", ")}") .whenA(ids.nonEmpty) - private def storeBulk(index: Index, sources: List[(String, A)]): IO[Unit] = + private def storeBulk(index: Index, sources: List[SourceWithId[A]]): IO[Unit] = Logger[IO].info(s"Received ${sources.size} docs to ${index.value}") *> elastic .storeBulk(index, sources) .handleErrorWith: e => - Logger[IO].error(e)(s"Failed to ${index.value} index: ${sources.map(_._1).mkString(", ")}") + Logger[IO].error(e)(s"Failed to ${index.value} index: ${sources.map(_.id).mkString(", ")}") .whenA(sources.nonEmpty) *> Logger[IO].info(s"Indexed ${sources.size} ${index.value}s") diff --git a/modules/ingestor/src/main/scala/mongo.game.scala b/modules/ingestor/src/main/scala/mongo.game.scala index ae25d932..2f722686 100644 --- a/modules/ingestor/src/main/scala/mongo.game.scala +++ b/modules/ingestor/src/main/scala/mongo.game.scala @@ -162,7 +162,7 @@ case class DbGame( val seconds = (movedAt.toEpochMilli / 1000 - createdAt.toEpochMilli / 1000) Option.when(seconds < 60 * 60 * 12)(seconds.toInt) - def toSource: (String, GameSource) = + def toSource: SourceWithId[GameSource] = id -> GameSource( status = status, @@ -191,7 +191,7 @@ case class DbGame( def debug = import smithy4s.json.Json.given import com.github.plokhotnyuk.jsoniter_scala.core.* - id -> writeToString(toSource._2) + id -> writeToString(toSource.source) object DbGame: // format: off From 051db4a71147410c998f47846d6fda8fe43cbe2f Mon Sep 17 00:00:00 2001 From: Thanh Le Date: Fri, 6 Jun 2025 09:45:46 +0200 Subject: [PATCH 12/28] Remove a superfulous comment --- modules/ingestor/src/main/scala/mongo.game.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/ingestor/src/main/scala/mongo.game.scala b/modules/ingestor/src/main/scala/mongo.game.scala index 2f722686..203af199 100644 --- a/modules/ingestor/src/main/scala/mongo.game.scala +++ b/modules/ingestor/src/main/scala/mongo.game.scala @@ -89,7 +89,6 @@ object GameRepo: games .find(filter.and(gameFilter)) .hint("ca_-1") - // .projection(postProjection) .boundedStream(config.batchSize) .chunkN(config.batchSize) .map(_.toList) From b226d82c66e4979c65f55fa1dfe3b66278993619 Mon Sep 17 00:00:00 2001 From: Thanh Le Date: Fri, 6 Jun 2025 09:47:46 +0200 Subject: [PATCH 13/28] Fix test compilation and remove nowarns --- modules/e2e/src/test/scala/CompatSuite.scala | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/modules/e2e/src/test/scala/CompatSuite.scala b/modules/e2e/src/test/scala/CompatSuite.scala index 4a0a40be..68492e7d 100644 --- a/modules/e2e/src/test/scala/CompatSuite.scala +++ b/modules/e2e/src/test/scala/CompatSuite.scala @@ -16,7 +16,6 @@ import org.typelevel.otel4s.sdk.exporter.prometheus.PrometheusMetricExporter import org.typelevel.otel4s.sdk.metrics.exporter.MetricExporter import play.api.libs.ws.ahc.* -import scala.annotation.nowarn import scala.concurrent.ExecutionContext.Implicits.* object CompatSuite extends weaver.IOSuite: @@ -68,11 +67,10 @@ object CompatSuite extends weaver.IOSuite: def fakeClient: ESClient[IO] = new: - @nowarn("msg=unused implicit") override def store[A](index: Index, id: Id, obj: A)(using Indexable[A]): IO[Unit] = IO.unit - @nowarn("msg=unused implicit") - override def storeBulk[A](index: Index, objs: Seq[(String, A)])(using Indexable[A]): IO[Unit] = IO.unit + override def storeBulk[A](index: Index, objs: Seq[SourceWithId[A]])(using Indexable[A]): IO[Unit] = + IO.unit override def putMapping(index: Index): IO[Unit] = IO.unit @@ -82,11 +80,9 @@ object CompatSuite extends weaver.IOSuite: override def deleteMany(index: Index, ids: List[Id]): IO[Unit] = IO.unit - @nowarn("msg=unused implicit") override def count[A](query: A)(using Queryable[A]) = IO.pure(0) - @nowarn("msg=unused implicit") override def search[A](query: A, from: From, size: Size)(using Queryable[A]) = IO.pure(Nil) From 9954c65c158288f2ad7ec8c999462daed4f636d8 Mon Sep 17 00:00:00 2001 From: Thanh Le Date: Sat, 7 Jun 2025 08:49:12 +0200 Subject: [PATCH 14/28] Setting version to 3.1.9 --- version.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.sbt b/version.sbt index 95fe8be3..ae4e84d1 100644 --- a/version.sbt +++ b/version.sbt @@ -1 +1 @@ -ThisBuild / version := "3.1.9-SNAPSHOT" +ThisBuild / version := "3.1.9" From 07381f92af7537d920f207e28a4ecdf19a0af895 Mon Sep 17 00:00:00 2001 From: Thanh Le Date: Sat, 7 Jun 2025 08:49:13 +0200 Subject: [PATCH 15/28] Setting version to 3.1.10-SNAPSHOT --- version.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.sbt b/version.sbt index ae4e84d1..2cc91fda 100644 --- a/version.sbt +++ b/version.sbt @@ -1 +1 @@ -ThisBuild / version := "3.1.9" +ThisBuild / version := "3.1.10-SNAPSHOT" From e3a2b742f9ed7db43674882c90f7b2421f3b4c08 Mon Sep 17 00:00:00 2001 From: Jonathan Gamble Date: Mon, 9 Jun 2025 17:10:00 -0500 Subject: [PATCH 16/28] improve search quality, add boolean AND --- modules/elastic/src/main/scala/ublog.scala | 43 +++++++++++----------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/modules/elastic/src/main/scala/ublog.scala b/modules/elastic/src/main/scala/ublog.scala index 25184128..05eac296 100644 --- a/modules/elastic/src/main/scala/ublog.scala +++ b/modules/elastic/src/main/scala/ublog.scala @@ -6,35 +6,37 @@ import com.sksamuel.elastic4s.requests.searches.sort.SortOrder case class Ublog(queryText: String, byDate: Boolean, minQuality: Option[Int], language: Option[String]): + val sanitized = queryText + .trim() + .toLowerCase() + .replaceAll("""([\-=&|> s + case s => s.replace(":", " ") // devs can use the query string until we get a ui for lang/quality + .mkString(" ") + + println(sanitized) def searchDef(from: From, size: Size) = - val req = search(Ublog.index) + val sortFields = + (if !byDate then Seq(scoreSort().order(SortOrder.DESC)) else Nil) ++ Seq( + fieldSort("quality").order(SortOrder.DESC).missing("_last"), + fieldSort("date").order(SortOrder.DESC) + ) + search(Ublog.index) .query(makeQuery()) .fetchSource(false) - - val sorted = - if byDate then - req.sortBy( - fieldSort(Fields.quality).order(SortOrder.DESC).missing("_last"), - fieldSort(Fields.date).order(SortOrder.DESC) - ) - else req - - sorted + .sortBy(sortFields*) .start(from.value) .size(size.value) def countDef = count(Ublog.index).query(makeQuery()) private def makeQuery() = - val parsed = QueryParser(queryText, Nil) - val baseQuery = - if parsed.terms.isEmpty then matchAllQuery() - else - multiMatchQuery(parsed.terms.mkString(" ")) - .fields(Ublog.searchableFields*) - .matchType("most_fields") boolQuery() - .must(baseQuery) + .must(queryStringQuery(sanitized).defaultField(Fields.text)) .filter( List( minQuality.map(f => rangeQuery(Fields.quality).gte(f)), @@ -43,8 +45,7 @@ case class Ublog(queryText: String, byDate: Boolean, minQuality: Option[Int], la ) object Ublog: - val index = "ublog" - private val searchableFields = List(Fields.text) + val index = "ublog" object Fields: val text = "text" From 4a62dc26e7d5674192d319d104883a9430b54b5a Mon Sep 17 00:00:00 2001 From: Jonathan Gamble Date: Mon, 9 Jun 2025 17:12:56 -0500 Subject: [PATCH 17/28] use language string constant --- modules/ingestor/src/main/scala/mongo.ublog.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ingestor/src/main/scala/mongo.ublog.scala b/modules/ingestor/src/main/scala/mongo.ublog.scala index 8989ecec..eda71a90 100644 --- a/modules/ingestor/src/main/scala/mongo.ublog.scala +++ b/modules/ingestor/src/main/scala/mongo.ublog.scala @@ -92,7 +92,7 @@ object UblogRepo: intro <- doc.getString(F.intro) body <- doc.getString(F.markdown) author <- doc.getString(F.blog).map(_.split(":")(1)) - language <- doc.getString("language") + language <- doc.getString(F.language) topics <- doc.getAs[List[String]](F.topics).map(_.mkString(" ").replaceAll("Chess", "")) text = s"$title\n$topics\n$author\n$intro\n$body" date <- doc.getNested(F.livedAt).flatMap(_.asInstant).map(_.toEpochMilli) From 8c0e5ffb67e3506c239ef9282aea55ed23591e2d Mon Sep 17 00:00:00 2001 From: Jonathan Gamble Date: Fri, 13 Jun 2025 18:32:36 -0500 Subject: [PATCH 18/28] add sort by likes --- build.sbt | 32 +++++++++---------- modules/api/src/main/smithy/search.smithy | 9 +++++- .../e2e/src/test/scala/IntegrationSuite.scala | 5 +-- modules/elastic/src/main/scala/ublog.scala | 18 +++++++++-- .../ingestor/src/main/scala/mongo.ublog.scala | 18 +++++++++-- modules/ingestor/src/main/smithy/model.smithy | 5 +-- 6 files changed, 61 insertions(+), 26 deletions(-) diff --git a/build.sbt b/build.sbt index 478c57db..befb00e9 100644 --- a/build.sbt +++ b/build.sbt @@ -21,7 +21,7 @@ val commonSettings = Seq( ScalacOptions.other("-indent"), ScalacOptions.explain, ScalacOptions.release("21"), - ScalacOptions.other("-Wall"), + ScalacOptions.other("-Wall") ), resolvers += "jitpack".at("https://jitpack.io") ) @@ -46,6 +46,20 @@ lazy val core = project ) ) +lazy val api = project + .in(file("modules/api")) + .enablePlugins(Smithy4sCodegenPlugin) + .settings( + name := "api", + commonSettings, + smithy4sWildcardArgument := "?", + libraryDependencies ++= Seq( + catsCore, + smithy4sCore + ) + ) + .dependsOn(core) + lazy val elastic = project .in(file("modules/elastic")) .settings( @@ -61,21 +75,7 @@ lazy val elastic = project otel4sCore ) ) - .dependsOn(core) - -lazy val api = project - .in(file("modules/api")) - .enablePlugins(Smithy4sCodegenPlugin) - .settings( - name := "api", - commonSettings, - smithy4sWildcardArgument := "?", - libraryDependencies ++= Seq( - catsCore, - smithy4sCore - ) - ) - .dependsOn(core) + .dependsOn(api, core) lazy val ingestor = project .in(file("modules/ingestor")) diff --git a/modules/api/src/main/smithy/search.smithy b/modules/api/src/main/smithy/search.smithy index 3952f9a6..8067e8ce 100644 --- a/modules/api/src/main/smithy/search.smithy +++ b/modules/api/src/main/smithy/search.smithy @@ -74,7 +74,7 @@ structure Ublog { @required queryText: String @required - byDate: Boolean + by: SortBlogsBy minQuality: Integer language: String } @@ -142,6 +142,13 @@ list Perfs { member: Integer } +enum SortBlogsBy { + Newest + Oldest + Score + Likes +} + @adt union Query { forum: Forum diff --git a/modules/e2e/src/test/scala/IntegrationSuite.scala b/modules/e2e/src/test/scala/IntegrationSuite.scala index 951319c6..a2e66ee2 100644 --- a/modules/e2e/src/test/scala/IntegrationSuite.scala +++ b/modules/e2e/src/test/scala/IntegrationSuite.scala @@ -86,13 +86,14 @@ object IntegrationSuite extends IOSuite: ingestor.UblogSource( text = "lil bubber, hayo!", language = "en", + likes = 0, date = Instant.now().toEpochMilli(), quality = 1.some ) ) _ <- res.esClient.refreshIndex(Index.Ublog) - x <- service.search(Query.ublog("lil bubber", true, 1.some), from, size) - y <- service.search(Query.ublog("hayo", true, 2.some), from, size) + x <- service.search(Query.ublog("lil bubber", SortBlogsBy.Score, 1.some), from, size) + y <- service.search(Query.ublog("hayo", SortBlogsBy.Newest, 2.some), from, size) yield expect(x.hitIds.size == 1 && y.hitIds.isEmpty) test("team"): res => diff --git a/modules/elastic/src/main/scala/ublog.scala b/modules/elastic/src/main/scala/ublog.scala index 05eac296..ecfc36d3 100644 --- a/modules/elastic/src/main/scala/ublog.scala +++ b/modules/elastic/src/main/scala/ublog.scala @@ -3,8 +3,14 @@ package ublog import com.sksamuel.elastic4s.ElasticDsl.* import com.sksamuel.elastic4s.requests.searches.sort.SortOrder +import spec.SortBlogsBy -case class Ublog(queryText: String, byDate: Boolean, minQuality: Option[Int], language: Option[String]): +case class Ublog( + queryText: String, + by: SortBlogsBy, + minQuality: Option[Int], + language: Option[String] +): val sanitized = queryText .trim() @@ -21,9 +27,13 @@ case class Ublog(queryText: String, byDate: Boolean, minQuality: Option[Int], la println(sanitized) def searchDef(from: From, size: Size) = val sortFields = - (if !byDate then Seq(scoreSort().order(SortOrder.DESC)) else Nil) ++ Seq( + (if by == SortBlogsBy.Score then Seq(scoreSort().order(SortOrder.DESC)) + else if by == SortBlogsBy.Likes then Seq(fieldSort("likes").order(SortOrder.DESC)) + else Nil) ++ Seq( fieldSort("quality").order(SortOrder.DESC).missing("_last"), - fieldSort("date").order(SortOrder.DESC) + fieldSort("date") + .order(if by == SortBlogsBy.Oldest then SortOrder.ASC else SortOrder.DESC) + .missing("_last") ) search(Ublog.index) .query(makeQuery()) @@ -49,6 +59,7 @@ object Ublog: object Fields: val text = "text" + val likes = "likes" val quality = "quality" val language = "language" val date = "date" @@ -60,5 +71,6 @@ object Mapping: textField(text), shortField(quality).copy(docValues = Some(true)), keywordField(language).copy(docValues = Some(false)), + shortField(likes).copy(docValues = Some(true)), dateField(date).copy(docValues = Some(true)) ) diff --git a/modules/ingestor/src/main/scala/mongo.ublog.scala b/modules/ingestor/src/main/scala/mongo.ublog.scala index eda71a90..981d165e 100644 --- a/modules/ingestor/src/main/scala/mongo.ublog.scala +++ b/modules/ingestor/src/main/scala/mongo.ublog.scala @@ -22,7 +22,19 @@ object UblogRepo: private val interestedOperations = List(DELETE, INSERT, REPLACE, UPDATE).map(_.getValue) private val interestedFields = - List(_id, F.markdown, F.title, F.intro, F.topics, F.blog, F.live, F.livedAt, F.language, F.quality) + List( + _id, + F.markdown, + F.title, + F.intro, + F.topics, + F.blog, + F.live, + F.livedAt, + F.likes, + F.language, + F.quality + ) private val postProjection = Projection.include(interestedFields) private val interestedEventFields = @@ -93,11 +105,12 @@ object UblogRepo: body <- doc.getString(F.markdown) author <- doc.getString(F.blog).map(_.split(":")(1)) language <- doc.getString(F.language) + likes <- doc.getAs[Int](F.likes) topics <- doc.getAs[List[String]](F.topics).map(_.mkString(" ").replaceAll("Chess", "")) text = s"$title\n$topics\n$author\n$intro\n$body" date <- doc.getNested(F.livedAt).flatMap(_.asInstant).map(_.toEpochMilli) quality = doc.getNestedAs[Int](F.quality) - yield UblogSource(text, language, date, quality) + yield UblogSource(text, language, likes, date, quality) private def isLive: Boolean = doc.getBoolean("live").contains(true) && !doc.getNestedAs[Int](F.quality).exists(_ == 0) @@ -112,6 +125,7 @@ object UblogRepo: val intro = "intro" val blog = "blog" val language = "language" + val likes = "likes" val live = "live" val livedAt = "lived.at" val quality = "automod.quality" diff --git a/modules/ingestor/src/main/smithy/model.smithy b/modules/ingestor/src/main/smithy/model.smithy index 5632ba6e..1bf01669 100644 --- a/modules/ingestor/src/main/smithy/model.smithy +++ b/modules/ingestor/src/main/smithy/model.smithy @@ -115,10 +115,11 @@ structure TeamSource { structure UblogSource { @required text: String - quality: Integer @required language: String @required - /// time in milliseconds + likes: Integer + @required date: Long + quality: Integer } From 692deefd55a642becb4ba2b5e6fd2624829dfde7 Mon Sep 17 00:00:00 2001 From: Jonathan Gamble Date: Fri, 13 Jun 2025 21:53:17 -0500 Subject: [PATCH 19/28] scalafix --- modules/elastic/src/main/scala/ublog.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/elastic/src/main/scala/ublog.scala b/modules/elastic/src/main/scala/ublog.scala index ecfc36d3..c6c08bb0 100644 --- a/modules/elastic/src/main/scala/ublog.scala +++ b/modules/elastic/src/main/scala/ublog.scala @@ -3,6 +3,7 @@ package ublog import com.sksamuel.elastic4s.ElasticDsl.* import com.sksamuel.elastic4s.requests.searches.sort.SortOrder + import spec.SortBlogsBy case class Ublog( From 5a7588c15ecf733eb9d587b158ccb05f2fd62205 Mon Sep 17 00:00:00 2001 From: Jonathan Gamble Date: Sat, 14 Jun 2025 03:08:37 -0500 Subject: [PATCH 20/28] been a long time since jdk 8 --- build.sbt | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/build.sbt b/build.sbt index befb00e9..b9fc22ac 100644 --- a/build.sbt +++ b/build.sbt @@ -84,8 +84,9 @@ lazy val ingestor = project name := "ingestor", commonSettings, buildInfoSettings, - publish := {}, - publish / skip := true, + dockerBaseImage := "docker.io/eclipse-temurin:21-jdk", + publish := {}, + publish / skip := true, libraryDependencies ++= Seq( chess, catsCore, @@ -141,8 +142,9 @@ lazy val app = project name := "lila-search", commonSettings, buildInfoSettings, - publish := {}, - publish / skip := true, + dockerBaseImage := "docker.io/eclipse-temurin:21-jdk", + publish := {}, + publish / skip := true, libraryDependencies ++= Seq( smithy4sHttp4s, jsoniterCore, From 01b6e2ab24144933cedf35973cbd29db8ffde283 Mon Sep 17 00:00:00 2001 From: Jonathan Gamble Date: Thu, 19 Jun 2025 10:32:10 -0500 Subject: [PATCH 21/28] reflect recent enum constant capitalization change in lila --- modules/api/src/main/smithy/search.smithy | 8 ++++---- modules/elastic/src/main/scala/ublog.scala | 7 +++---- version.sbt | 2 +- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/modules/api/src/main/smithy/search.smithy b/modules/api/src/main/smithy/search.smithy index 8067e8ce..bc72cad6 100644 --- a/modules/api/src/main/smithy/search.smithy +++ b/modules/api/src/main/smithy/search.smithy @@ -143,10 +143,10 @@ list Perfs { } enum SortBlogsBy { - Newest - Oldest - Score - Likes + newest + oldest + score + likes } @adt diff --git a/modules/elastic/src/main/scala/ublog.scala b/modules/elastic/src/main/scala/ublog.scala index c6c08bb0..0f0ee927 100644 --- a/modules/elastic/src/main/scala/ublog.scala +++ b/modules/elastic/src/main/scala/ublog.scala @@ -25,15 +25,14 @@ case class Ublog( case s => s.replace(":", " ") // devs can use the query string until we get a ui for lang/quality .mkString(" ") - println(sanitized) def searchDef(from: From, size: Size) = val sortFields = - (if by == SortBlogsBy.Score then Seq(scoreSort().order(SortOrder.DESC)) - else if by == SortBlogsBy.Likes then Seq(fieldSort("likes").order(SortOrder.DESC)) + (if by == SortBlogsBy.score then Seq(scoreSort().order(SortOrder.DESC)) + else if by == SortBlogsBy.likes then Seq(fieldSort("likes").order(SortOrder.DESC)) else Nil) ++ Seq( fieldSort("quality").order(SortOrder.DESC).missing("_last"), fieldSort("date") - .order(if by == SortBlogsBy.Oldest then SortOrder.ASC else SortOrder.DESC) + .order(if by == SortBlogsBy.oldest then SortOrder.ASC else SortOrder.DESC) .missing("_last") ) search(Ublog.index) diff --git a/version.sbt b/version.sbt index 2cc91fda..f34faa2e 100644 --- a/version.sbt +++ b/version.sbt @@ -1 +1 @@ -ThisBuild / version := "3.1.10-SNAPSHOT" +ThisBuild / version := "3.2.1-UBLOG" From 7730ca5e546712410e49cb1cd8733c50b53977b5 Mon Sep 17 00:00:00 2001 From: Jonathan Gamble Date: Thu, 19 Jun 2025 10:55:42 -0500 Subject: [PATCH 22/28] fix test --- build.sbt | 4 ++-- modules/e2e/src/test/scala/IntegrationSuite.scala | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/build.sbt b/build.sbt index b9fc22ac..7defaf08 100644 --- a/build.sbt +++ b/build.sbt @@ -84,7 +84,7 @@ lazy val ingestor = project name := "ingestor", commonSettings, buildInfoSettings, - dockerBaseImage := "docker.io/eclipse-temurin:21-jdk", + dockerBaseImage := "docker.io/library/eclipse-temurin:21-jdk", publish := {}, publish / skip := true, libraryDependencies ++= Seq( @@ -142,7 +142,7 @@ lazy val app = project name := "lila-search", commonSettings, buildInfoSettings, - dockerBaseImage := "docker.io/eclipse-temurin:21-jdk", + dockerBaseImage := "docker.io/library/eclipse-temurin:21-jdk", publish := {}, publish / skip := true, libraryDependencies ++= Seq( diff --git a/modules/e2e/src/test/scala/IntegrationSuite.scala b/modules/e2e/src/test/scala/IntegrationSuite.scala index a2e66ee2..337131d4 100644 --- a/modules/e2e/src/test/scala/IntegrationSuite.scala +++ b/modules/e2e/src/test/scala/IntegrationSuite.scala @@ -92,8 +92,8 @@ object IntegrationSuite extends IOSuite: ) ) _ <- res.esClient.refreshIndex(Index.Ublog) - x <- service.search(Query.ublog("lil bubber", SortBlogsBy.Score, 1.some), from, size) - y <- service.search(Query.ublog("hayo", SortBlogsBy.Newest, 2.some), from, size) + x <- service.search(Query.ublog("lil bubber", SortBlogsBy.score, 1.some), from, size) + y <- service.search(Query.ublog("hayo", SortBlogsBy.newest, 2.some), from, size) yield expect(x.hitIds.size == 1 && y.hitIds.isEmpty) test("team"): res => From 46f6dbfae92fc8aad0549823cc2b3af7ea4e663b Mon Sep 17 00:00:00 2001 From: Thanh Le Date: Sat, 5 Jul 2025 08:32:57 +0200 Subject: [PATCH 23/28] Setting version to 3.1.11-SNAPSHOT --- version.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.sbt b/version.sbt index 06b00f3c..9eccbd32 100644 --- a/version.sbt +++ b/version.sbt @@ -1 +1 @@ -ThisBuild / version := "3.1.10" +ThisBuild / version := "3.1.11-SNAPSHOT" From beb2ecf63ba13127a8e1dad36f0ee557a40a0a1e Mon Sep 17 00:00:00 2001 From: Scala Steward Date: Sun, 6 Jul 2025 06:17:05 +0000 Subject: [PATCH 24/28] Update jsoniter-scala-core, ... to 2.36.7 --- project/Dependencies.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 22002d49..cc911523 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -45,8 +45,8 @@ object Dependencies { lazy val smithy4sHttp4sSwagger = smithy4s("http4s-swagger") lazy val smithy4sJson = smithy4s("json") - val jsoniterCore = "com.github.plokhotnyuk.jsoniter-scala" %% "jsoniter-scala-core" % "2.36.6" - val jsoniterMacro = "com.github.plokhotnyuk.jsoniter-scala" %% "jsoniter-scala-macros" % "2.36.6" + val jsoniterCore = "com.github.plokhotnyuk.jsoniter-scala" %% "jsoniter-scala-core" % "2.36.7" + val jsoniterMacro = "com.github.plokhotnyuk.jsoniter-scala" %% "jsoniter-scala-macros" % "2.36.7" val playWS = "com.typesafe.play" %% "play-ahc-ws-standalone" % "2.2.11" From 48dcdf0dfd6c75d5dccd7e5180e48b1b3e1d854f Mon Sep 17 00:00:00 2001 From: Scala Steward Date: Sun, 6 Jul 2025 06:17:10 +0000 Subject: [PATCH 25/28] Update sbt, scripted-plugin to 1.11.3 --- project/build.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/build.properties b/project/build.properties index bbb0b608..c02c575f 100644 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1 @@ -sbt.version=1.11.2 +sbt.version=1.11.3 From 4d04761777757ba9211dfeee86fc92bbfaaaf534 Mon Sep 17 00:00:00 2001 From: Scala Steward Date: Wed, 9 Jul 2025 19:28:35 +0000 Subject: [PATCH 26/28] Update cats-effect to 3.6.2 --- project/Dependencies.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index cc911523..d1eed027 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -8,7 +8,7 @@ object Dependencies { val ourResolvers = Seq(lilaMaven, jitpack) object V { - val catsEffect = "3.6.1" + val catsEffect = "3.6.2" val chess = "17.8.5" val ciris = "3.9.0" val decline = "2.5.0" From fb4fe1c450cde1f2afc5e87f8af704f39588b694 Mon Sep 17 00:00:00 2001 From: Scala Steward Date: Wed, 9 Jul 2025 19:28:40 +0000 Subject: [PATCH 27/28] Update otel4s-core, ... to 0.13.1 --- project/Dependencies.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index cc911523..70d1aa33 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -17,7 +17,7 @@ object Dependencies { val http4s = "0.23.30" val iron = "2.5.0" val mongo4cats = "0.7.13" - val otel4s = "0.13.0" + val otel4s = "0.13.1" } def http4s(artifact: String) = "org.http4s" %% s"http4s-$artifact" % V.http4s From 8a55dd60a0558af66879bc753e374122eb47318f Mon Sep 17 00:00:00 2001 From: Thanh Le Date: Wed, 16 Jul 2025 18:32:36 +0200 Subject: [PATCH 28/28] Bump version 3.2.0 --- version.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.sbt b/version.sbt index 42511079..e451539c 100644 --- a/version.sbt +++ b/version.sbt @@ -1 +1 @@ -ThisBuild / version := "3.2.1-UBLOG" \ No newline at end of file +ThisBuild / version := "3.2.0" \ No newline at end of file