Skip to content

Commit

Permalink
Now ML triggers on caption as well (#54)
Browse files Browse the repository at this point in the history
  • Loading branch information
Szer authored Sep 13, 2024
1 parent f194dfc commit 0c9fd91
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 8 deletions.
13 changes: 13 additions & 0 deletions src/VahterBanBot.Tests/MLBanTests.fs
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,19 @@ type MLBanTests(fixture: VahterTestContainers, _unused: MlAwaitFixture) =
let! msgBanned = fixture.MessageBanned spam.Message
Assert.False msgBanned
}

[<Fact>]
let ``Message with spam in photo caption also triggers auto-delete`` () = task {
// record a message, where 2222222 is in a training set as spam word
// but text is in a message.Caption
// ChatsToMonitor[0] doesn't have stopwords
let msgUpdate = Tg.quickMsg(chat = fixture.ChatsToMonitor[0], text = null, caption = "2222222")
let! _ = fixture.SendMessage msgUpdate

// assert that the message got auto banned
let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message
Assert.True msgBanned
}

interface IAssemblyFixture<VahterTestContainers>
interface IClassFixture<MlAwaitFixture>
3 changes: 2 additions & 1 deletion src/VahterBanBot.Tests/TgMessageUtils.fs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ type Tg() =
)
)

static member quickMsg (?text: string, ?chat: Chat, ?from: User, ?date: DateTime, ?callback: CallbackQuery) =
static member quickMsg (?text: string, ?chat: Chat, ?from: User, ?date: DateTime, ?callback: CallbackQuery, ?caption: string) =
Update(
Id = next(),
Message =
Expand All @@ -42,6 +42,7 @@ type Tg() =
Chat = (chat |> Option.defaultValue (Tg.chat())),
From = (from |> Option.defaultValue (Tg.user())),
Date = (date |> Option.defaultValue DateTime.UtcNow),
Caption = (caption |> Option.defaultValue null),
ReplyToMessage = null
)
)
Expand Down
10 changes: 5 additions & 5 deletions src/VahterBanBot/Bot.fs
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ let killSpammerAutomated
|> DB.banUserByBot

let msgType = if deleteMessage then "Deleted" else "Detected"
let logMsg = $"{msgType} spam (score: {score}) in {prependUsername message.Chat.Username} ({message.Chat.Id}) from {prependUsername message.From.Username} ({message.From.Id}) with text:\n{message.Text}"
let logMsg = $"{msgType} spam (score: {score}) in {prependUsername message.Chat.Username} ({message.Chat.Id}) from {prependUsername message.From.Username} ({message.From.Id}) with text:\n{message.TextOrCaption}"

let! replyMarkup = task {
if deleteMessage then
Expand Down Expand Up @@ -493,7 +493,7 @@ let justMessage
do! botClient.DeleteMessageAsync(ChatId(message.Chat.Id), message.MessageId)
|> safeTaskAwait (fun e -> logger.LogError ($"Failed to delete message {message.MessageId} from chat {message.Chat.Id}", e))

elif botConfig.MlEnabled && message.Text <> null then
elif botConfig.MlEnabled && message.TextOrCaption <> null then
use mlActivity = botActivity.StartActivity("mlPrediction")

let shouldBeSkipped =
Expand All @@ -506,14 +506,14 @@ let justMessage
match botConfig.MlStopWordsInChats.TryGetValue message.Chat.Id with
| true, stopWords ->
stopWords
|> Seq.exists (fun sw -> message.Text.Contains(sw, StringComparison.OrdinalIgnoreCase))
|> Seq.exists (fun sw -> message.TextOrCaption.Contains(sw, StringComparison.OrdinalIgnoreCase))
| _ -> false
%mlActivity.SetTag("skipPrediction", shouldBeSkipped)

if not shouldBeSkipped then
let! usrMsgCount = DB.countUniqueUserMsg message.From.Id

match ml.Predict(message.Text, usrMsgCount) with
match ml.Predict(message.TextOrCaption, usrMsgCount) with
| Some prediction ->
%mlActivity.SetTag("spamScoreMl", prediction.Score)

Expand Down Expand Up @@ -667,7 +667,7 @@ let vahterMarkedAsNotSpam

let vahterUsername = vahter.username |> Option.defaultValue null

let logMsg = $"Vahter {prependUsername vahterUsername} ({vahter.id}) marked message {msgId} in {prependUsername chatName}({chatId}) as false-positive (NOT A SPAM)\n{msg.message.Text}"
let logMsg = $"Vahter {prependUsername vahterUsername} ({vahter.id}) marked message {msgId} in {prependUsername chatName}({chatId}) as false-positive (NOT A SPAM)\n{msg.message.TextOrCaption}"
do! botClient.SendTextMessageAsync(ChatId(botConfig.LogsChannelId), logMsg) |> taskIgnore
logger.LogInformation logMsg
}
Expand Down
4 changes: 2 additions & 2 deletions src/VahterBanBot/Types.fs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ module DbBanned =
if isNull message.From || isNull message.Chat then
failwith "Message should have a user and a chat"
{ message_id = Some message.MessageId
message_text = message.Text
message_text = message.TextOrCaption
banned_user_id = message.From.Id
banned_at = DateTime.UtcNow
banned_in_chat_id = Some message.Chat.Id
Expand All @@ -92,7 +92,7 @@ type DbMessage =
message_id = message.MessageId
user_id = message.From.Id
created_at = DateTime.UtcNow
text = message.Text
text = message.TextOrCaption
raw_message = JsonConvert.SerializeObject message }

[<CLIMutable>]
Expand Down
7 changes: 7 additions & 0 deletions src/VahterBanBot/Utils.fs
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,10 @@ type Task<'x> with
member this.Ignore() = task { let! _ = this in () }

let inline taskIgnore (t: Task<'x>) = t.Ignore()

type Telegram.Bot.Types.Message with
member msg.TextOrCaption =
if isNull msg.Text then
msg.Caption
else
msg.Text

0 comments on commit 0c9fd91

Please sign in to comment.