Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
13e0819
blog search
schlawg Jun 5, 2025
03c130c
option for results by score
schlawg Jun 8, 2025
025cc34
fix copy paste error caught in review
schlawg Jun 8, 2025
9759fe6
remove comments and redundancies, dont index spam
schlawg Jun 8, 2025
afade6b
Update scala3-library to 3.7.1
scala-steward Jun 5, 2025
bae3982
Update smithy4s-core, smithy4s-http4s, ... to 0.18.37
scala-steward Jun 5, 2025
ee2f4a0
Add -Wall which warns everything
lenguyenthanh Jun 6, 2025
29fb7e7
Fix warnings from Wtostring-interpolated
lenguyenthanh Jun 6, 2025
76251cb
Update log4cats-slf4j to 2.7.1
scala-steward May 28, 2025
1a68dac
Update weaver-cats, weaver-scalacheck to 0.9.0
scala-steward May 28, 2025
30e01ca
Use named tuple for SourceWithId
lenguyenthanh Jun 6, 2025
051db4a
Remove a superfulous comment
lenguyenthanh Jun 6, 2025
b226d82
Fix test compilation and remove nowarns
lenguyenthanh Jun 6, 2025
9954c65
Setting version to 3.1.9
lenguyenthanh Jun 7, 2025
07381f9
Setting version to 3.1.10-SNAPSHOT
lenguyenthanh Jun 7, 2025
e3a2b74
improve search quality, add boolean AND
schlawg Jun 9, 2025
4a62dc2
use language string constant
schlawg Jun 9, 2025
8c0e5ff
add sort by likes
schlawg Jun 13, 2025
34dae2f
Merge branch 'master' into ublog-search
schlawg Jun 14, 2025
692deef
scalafix
schlawg Jun 14, 2025
5a7588c
been a long time since jdk 8
schlawg Jun 14, 2025
f4aee4e
Merge remote-tracking branch 'upstream/master' into ublog-search
schlawg Jun 19, 2025
01b6e2a
reflect recent enum constant capitalization change in lila
schlawg Jun 19, 2025
7730ca5
fix test
schlawg Jun 19, 2025
355eadb
Merge branch 'master' into ublog-search
ornicar Jul 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 21 additions & 19 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,20 @@ lazy val core = project
)
)

lazy val api = project
.in(file("modules/api"))
.enablePlugins(Smithy4sCodegenPlugin)
.settings(
name := "api",
commonSettings,
smithy4sWildcardArgument := "?",
libraryDependencies ++= Seq(
catsCore,
smithy4sCore
)
)
.dependsOn(core)

lazy val elastic = project
.in(file("modules/elastic"))
.settings(
Expand All @@ -61,21 +75,7 @@ lazy val elastic = project
otel4sCore
)
)
.dependsOn(core)

lazy val api = project
.in(file("modules/api"))
.enablePlugins(Smithy4sCodegenPlugin)
.settings(
name := "api",
commonSettings,
smithy4sWildcardArgument := "?",
libraryDependencies ++= Seq(
catsCore,
smithy4sCore
)
)
.dependsOn(core)
.dependsOn(api, core)

lazy val ingestor = project
.in(file("modules/ingestor"))
Expand All @@ -84,8 +84,9 @@ lazy val ingestor = project
name := "ingestor",
commonSettings,
buildInfoSettings,
publish := {},
publish / skip := true,
dockerBaseImage := "docker.io/library/eclipse-temurin:21-jdk",
publish := {},
publish / skip := true,
libraryDependencies ++= Seq(
chess,
catsCore,
Expand Down Expand Up @@ -141,8 +142,9 @@ lazy val app = project
name := "lila-search",
commonSettings,
buildInfoSettings,
publish := {},
publish / skip := true,
dockerBaseImage := "docker.io/library/eclipse-temurin:21-jdk",
publish := {},
publish / skip := true,
libraryDependencies ++= Seq(
smithy4sHttp4s,
jsoniterCore,
Expand Down
17 changes: 17 additions & 0 deletions modules/api/src/main/smithy/search.smithy
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,15 @@ structure Forum {
troll: Boolean = false
}

structure Ublog {
@required
queryText: String
@required
by: SortBlogsBy
minQuality: Integer
language: String
}

structure Team {
@required
text: String
Expand Down Expand Up @@ -133,9 +142,17 @@ list Perfs {
member: Integer
}

enum SortBlogsBy {
newest
oldest
score
likes
}

@adt
union Query {
forum: Forum
ublog: Ublog
game: Game
study: Study
team: Team
Expand Down
4 changes: 4 additions & 0 deletions modules/app/src/main/scala/service.search.scala
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import lila.search.game.Game
import lila.search.spec.*
import lila.search.study.Study
import lila.search.team.Team
import lila.search.ublog.Ublog
import org.typelevel.log4cats.{ Logger, LoggerFactory }
import org.typelevel.otel4s.metrics.{ Histogram, Meter }
import org.typelevel.otel4s.{ Attribute, AttributeKey, Attributes }
Expand Down Expand Up @@ -82,19 +83,22 @@ object SearchServiceImpl:
def searchDef(from: From, size: Size) =
query match
case q: Query.Forum => q.to[Forum].searchDef(from, size)
case q: Query.Ublog => q.to[Ublog].searchDef(from, size)
case q: Query.Game => q.to[Game].searchDef(from, size)
case q: Query.Study => q.to[Study].searchDef(from, size)
case q: Query.Team => q.to[Team].searchDef(from, size)

def countDef =
query match
case q: Query.Forum => q.to[Forum].countDef
case q: Query.Ublog => q.to[Ublog].countDef
case q: Query.Game => q.to[Game].countDef
case q: Query.Study => q.to[Study].countDef
case q: Query.Team => q.to[Team].countDef

def index = query match
case _: Query.Forum => Index.Forum
case _: Query.Ublog => Index.Ublog
case _: Query.Game => Index.Game
case _: Query.Study => Index.Study
case _: Query.Team => Index.Team
Expand Down
2 changes: 2 additions & 0 deletions modules/core/src/main/scala/models.scala
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ object Id:

enum Index(val value: String):
case Forum extends Index("forum")
case Ublog extends Index("ublog")
case Game extends Index("game")
case Study extends Index("study")
case Team extends Index("team")
Expand All @@ -37,6 +38,7 @@ object Index:
def fromString(value: String): Either[String, Index] =
value match
case "forum" => Index.Forum.asRight
case "ublog" => Index.Ublog.asRight
case "game" => Index.Game.asRight
case "study" => Index.Study.asRight
case "team" => Index.Team.asRight
Expand Down
22 changes: 22 additions & 0 deletions modules/e2e/src/test/scala/IntegrationSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,28 @@ object IntegrationSuite extends IOSuite:
y <- service.search(Query.forum("nt9", false), from, size)
yield expect(x.hitIds.size == 1 && x == y)

test("ublog"): res =>
Clients
.search(uri)
.use: service =>
for
_ <- res.esClient.putMapping(Index.Ublog)
_ <- res.esClient.store(
Index.Ublog,
Id("abcdefgh"),
ingestor.UblogSource(
text = "lil bubber, hayo!",
language = "en",
likes = 0,
date = Instant.now().toEpochMilli(),
quality = 1.some
)
)
_ <- res.esClient.refreshIndex(Index.Ublog)
x <- service.search(Query.ublog("lil bubber", SortBlogsBy.score, 1.some), from, size)
y <- service.search(Query.ublog("hayo", SortBlogsBy.newest, 2.some), from, size)
yield expect(x.hitIds.size == 1 && y.hitIds.isEmpty)

test("team"): res =>
Clients
.search(uri)
Expand Down
1 change: 1 addition & 0 deletions modules/elastic/src/main/scala/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ extension (index: Index)

def mapping = index match
case Index.Forum => forum.Mapping.fields
case Index.Ublog => ublog.Mapping.fields
case Index.Game => game.Mapping.fields
case Index.Study => study.Mapping.fields
case Index.Team => team.Mapping.fields
Expand Down
6 changes: 3 additions & 3 deletions modules/elastic/src/main/scala/study.scala
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ case class Study(text: String, userId: Option[String]):

def searchDef(from: From, size: Size) =
search(Study.index)
.query(makeQuery)
.query(makeQuery())
.fetchSource(false)
.sortBy(
fieldSort("_score").order(SortOrder.DESC),
Expand All @@ -18,9 +18,9 @@ case class Study(text: String, userId: Option[String]):
.start(from.value)
.size(size.value)

def countDef = count(Study.index).query(makeQuery)
def countDef = count(Study.index).query(makeQuery())

private def makeQuery = {
private def makeQuery() = {
val parsed = QueryParser(text, List("owner", "member"))
val matcher: Query =
if parsed.terms.isEmpty then matchAllQuery()
Expand Down
76 changes: 76 additions & 0 deletions modules/elastic/src/main/scala/ublog.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package lila.search
package ublog

import com.sksamuel.elastic4s.ElasticDsl.*
import com.sksamuel.elastic4s.requests.searches.sort.SortOrder

import spec.SortBlogsBy

case class Ublog(
queryText: String,
by: SortBlogsBy,
minQuality: Option[Int],
language: Option[String]
):

val sanitized = queryText
.trim()
.toLowerCase()
.replaceAll("""([\-=&|><!(){}\[\]^"~*?\\/])""", """\\$1""")
.replaceAll(" and ", " AND ")
.replaceAll("\\+", " AND ")
.split("\\s+")
.map:
case s if s.matches("language:[a-z]{2}") || s.matches("quality:[1-3]") => s
case s => s.replace(":", " ") // devs can use the query string until we get a ui for lang/quality
.mkString(" ")

def searchDef(from: From, size: Size) =
val sortFields =
(if by == SortBlogsBy.score then Seq(scoreSort().order(SortOrder.DESC))
else if by == SortBlogsBy.likes then Seq(fieldSort("likes").order(SortOrder.DESC))
else Nil) ++ Seq(
fieldSort("quality").order(SortOrder.DESC).missing("_last"),
fieldSort("date")
.order(if by == SortBlogsBy.oldest then SortOrder.ASC else SortOrder.DESC)
.missing("_last")
)
search(Ublog.index)
.query(makeQuery())
.fetchSource(false)
.sortBy(sortFields*)
.start(from.value)
.size(size.value)

def countDef = count(Ublog.index).query(makeQuery())

private def makeQuery() =
boolQuery()
.must(queryStringQuery(sanitized).defaultField(Fields.text))
.filter(
List(
minQuality.map(f => rangeQuery(Fields.quality).gte(f)),
language.map(l => termQuery(Fields.language, l))
).flatten
)

object Ublog:
val index = "ublog"

object Fields:
val text = "text"
val likes = "likes"
val quality = "quality"
val language = "language"
val date = "date"

object Mapping:
import Fields.*
def fields =
Seq(
textField(text),
shortField(quality).copy(docValues = Some(true)),
keywordField(language).copy(docValues = Some(false)),
shortField(likes).copy(docValues = Some(true)),
dateField(date).copy(docValues = Some(true))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a curious question, why does some fields keep values, some don't?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My understanding is that disabling docValues on fields you don’t sort/aggregate keeps segment size smaller. language would only ever be filtered or searched. it wouldn't be sorted.

)
13 changes: 12 additions & 1 deletion modules/ingestor/src/main/scala/app.config.scala
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,15 @@ object ElasticConfig:

case class IngestorConfig(
forum: IngestorConfig.Forum,
ublog: IngestorConfig.Ublog,
team: IngestorConfig.Team,
study: IngestorConfig.Study,
game: IngestorConfig.Game
)

object IngestorConfig:
case class Forum(batchSize: Int, timeWindows: Int, startAt: Option[Instant], maxPostLength: Int)
case class Ublog(batchSize: Int, timeWindows: Int, startAt: Option[Instant])
case class Team(batchSize: Int, timeWindows: Int, startAt: Option[Instant])
case class Study(batchSize: Int, startAt: Option[Instant], interval: FiniteDuration, databaseName: String)
case class Game(batchSize: Int, timeWindows: Int, startAt: Option[Instant])
Expand All @@ -73,6 +75,15 @@ object IngestorConfig:
env("INGESTOR_FORUM_MAX_POST_LENGTH").or(prop("ingestor.forum.max.post.length")).as[Int].default(5_000)
def config = (batchSize, timeWindows, startAt, maxPostLength).parMapN(Forum.apply)

private object Ublog:
private def batchSize =
env("INGESTOR_UBLOG_BATCH_SIZE").or(prop("ingestor.ublog.batch.size")).as[Int].default(100)
private def timeWindows =
env("INGESTOR_UBLOG_TIME_WINDOWS").or(prop("ingestor.ublog.time.windows")).as[Int].default(10)
private def startAt =
env("INGESTOR_UBLOG_START_AT").or(prop("ingestor.ublog.start.at")).as[Instant].option
def config = (batchSize, timeWindows, startAt).parMapN(Ublog.apply)

private object Team:
private def batchSize =
env("INGESTOR_TEAM_BATCH_SIZE").or(prop("ingestor.team.batch.size")).as[Int].default(100)
Expand Down Expand Up @@ -104,7 +115,7 @@ object IngestorConfig:
env("INGESTOR_GAME_START_AT").or(prop("ingestor.game.start.at")).as[Instant].option
def config = (batchSize, timeWindows, startAt).mapN(Game.apply)

def config = (Forum.config, Team.config, Study.config, Game.config).mapN(IngestorConfig.apply)
def config = (Forum.config, Ublog.config, Team.config, Study.config, Game.config).mapN(IngestorConfig.apply)

object CirisCodec:
given ConfigDecoder[String, Instant] = ConfigDecoder[String]
Expand Down
8 changes: 7 additions & 1 deletion modules/ingestor/src/main/scala/cli.scala
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ object cli
opts.index match
case Index.Forum =>
ingestor.forum.run(opts.since, opts.until, opts.dry)
case Index.Ublog =>
ingestor.ublog.run(opts.since, opts.until, opts.dry)
case Index.Study =>
ingestor.study.run(opts.since, opts.until, opts.dry)
case Index.Game =>
Expand All @@ -62,6 +64,7 @@ object cli
ingestor.team.run(opts.since, opts.until, opts.dry)
case _ =>
ingestor.forum.run(opts.since, opts.until, opts.dry) *>
ingestor.ublog.run(opts.since, opts.until, opts.dry) *>
ingestor.study.run(opts.since, opts.until, opts.dry) *>
ingestor.game.run(opts.since, opts.until, opts.dry) *>
ingestor.team.run(opts.since, opts.until, opts.dry)
Expand All @@ -72,12 +75,15 @@ object cli
ingestor.game.watch(opts.since.some, opts.dry)
case Index.Forum =>
ingestor.forum.watch(opts.since.some, opts.dry)
case Index.Ublog =>
ingestor.ublog.watch(opts.since.some, opts.dry)
case Index.Team =>
ingestor.team.watch(opts.since.some, opts.dry)
case Index.Study =>
ingestor.study.watch(opts.since.some, opts.dry)
case _ =>
ingestor.forum.watch(opts.since.some, opts.dry) *>
ingestor.ublog.watch(opts.since.some, opts.dry) *>
ingestor.team.watch(opts.since.some, opts.dry) *>
ingestor.study.watch(opts.since.some, opts.dry) *>
ingestor.game.watch(opts.since.some, opts.dry)
Expand All @@ -95,7 +101,7 @@ object opts:
long = "index",
help = "Target index",
short = "i",
metavar = "forum|team|study|game"
metavar = "forum|ublog|team|study|game"
)

val allIndexOpt =
Expand Down
7 changes: 5 additions & 2 deletions modules/ingestor/src/main/scala/ingestors.scala
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@ import org.typelevel.log4cats.LoggerFactory

class Ingestors(
val forum: Ingestor,
val ublog: Ingestor,
val study: Ingestor,
val game: Ingestor,
val team: Ingestor
):
def run(): IO[Unit] =
List(forum.watch, team.watch, study.watch, game.watch).parSequence_
List(forum.watch, ublog.watch, team.watch, study.watch, game.watch).parSequence_

object Ingestors:

Expand All @@ -27,12 +28,14 @@ object Ingestors:
)(using LoggerFactory[IO]): IO[Ingestors] =
(
ForumRepo(lichess, config.forum),
UblogRepo(lichess, config.ublog),
StudyRepo(study, local, config.study),
GameRepo(lichess, config.game),
TeamRepo(lichess, config.team)
).mapN: (forums, studies, games, teams) =>
).mapN: (forums, ublogs, studies, games, teams) =>
new Ingestors(
Ingestor(Index.Forum, forums, store, elastic, config.forum.startAt),
Ingestor(Index.Ublog, ublogs, store, elastic, config.ublog.startAt),
Ingestor(Index.Study, studies, store, elastic, config.study.startAt),
Ingestor(Index.Game, games, store, elastic, config.game.startAt),
Ingestor(Index.Team, teams, store, elastic, config.team.startAt)
Expand Down
Loading