Skip to content

Commit 70375f7

Browse files
committed
[feature/handle-similar-messages-as-scam]
1 parent e800cde commit 70375f7

File tree

5 files changed

+213
-12
lines changed

5 files changed

+213
-12
lines changed

application/config.json.template

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,11 @@
8989
],
9090
"isHostSimilarToKeywordDistanceThreshold": 2,
9191
"suspiciousAttachmentsThreshold": 3,
92-
"suspiciousAttachmentNamePattern": "(image|\\d{1,2})\\.[^.]{0,5}"
92+
"suspiciousAttachmentNamePattern": "(image|\\d{1,2})\\.[^.]{0,5}",
93+
"maxAllowedSimilarMessages": 2,
94+
"similarMessagesWindow": 1,
95+
"similarMessageLengthIgnore": 10,
96+
"similarMessagesWhitelist": []
9397
},
9498
"wolframAlphaAppId": "79J52T-6239TVXHR7",
9599
"helpSystem": {

application/src/main/java/org/togetherjava/tjbot/config/ScamBlockerConfig.java

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ public final class ScamBlockerConfig {
2626
private final int isHostSimilarToKeywordDistanceThreshold;
2727
private final int suspiciousAttachmentsThreshold;
2828
private final String suspiciousAttachmentNamePattern;
29+
private final int maxAllowedSimilarMessages;
30+
private final int similarMessagesWindow;
31+
private final int similarMessageLengthIgnore;
32+
private final Set<String> similarMessagesWhitelist;
2933

3034
@JsonCreator(mode = JsonCreator.Mode.PROPERTIES)
3135
private ScamBlockerConfig(@JsonProperty(value = "mode", required = true) Mode mode,
@@ -46,7 +50,12 @@ private ScamBlockerConfig(@JsonProperty(value = "mode", required = true) Mode mo
4650
@JsonProperty(value = "suspiciousAttachmentsThreshold",
4751
required = true) int suspiciousAttachmentsThreshold,
4852
@JsonProperty(value = "suspiciousAttachmentNamePattern",
49-
required = true) String suspiciousAttachmentNamePattern) {
53+
required = true) String suspiciousAttachmentNamePattern,
54+
@JsonProperty(value = "maxAllowedSimilarMessages") int maxAllowedSimilarMessages,
55+
@JsonProperty(value = "similarMessagesWindow") int similarMessagesWindow,
56+
@JsonProperty(value = "similarMessageLengthIgnore") int similarMessageLengthIgnore,
57+
@JsonProperty(
58+
value = "similarMessagesWhitelist") Set<String> similarMessagesWhitelist) {
5059
this.mode = Objects.requireNonNull(mode);
5160
this.reportChannelPattern = Objects.requireNonNull(reportChannelPattern);
5261
this.botTrapChannelPattern = Objects.requireNonNull(botTrapChannelPattern);
@@ -59,6 +68,10 @@ private ScamBlockerConfig(@JsonProperty(value = "mode", required = true) Mode mo
5968
this.suspiciousAttachmentsThreshold = suspiciousAttachmentsThreshold;
6069
this.suspiciousAttachmentNamePattern =
6170
Objects.requireNonNull(suspiciousAttachmentNamePattern);
71+
this.maxAllowedSimilarMessages = maxAllowedSimilarMessages;
72+
this.similarMessagesWindow = similarMessagesWindow;
73+
this.similarMessageLengthIgnore = similarMessageLengthIgnore;
74+
this.similarMessagesWhitelist = similarMessagesWhitelist;
6275
}
6376

6477
/**
@@ -167,6 +180,43 @@ public String getSuspiciousAttachmentNamePattern() {
167180
return suspiciousAttachmentNamePattern;
168181
}
169182

183+
/**
184+
* Gets the maximum amount of allowed messages before it gets flagged by the scam detector.
185+
*
186+
* @return the maximum amount of allowed messages
187+
*/
188+
public int getMaxAllowedSimilarMessages() {
189+
return maxAllowedSimilarMessages;
190+
}
191+
192+
/**
193+
* Gets the window in minutes to which messages are kept in the similar messages feature.
194+
*
195+
* @return the window in minutes to keep the messages
196+
*/
197+
public int getSimilarMessagesWindow() {
198+
return similarMessagesWindow;
199+
}
200+
201+
/**
202+
* Gets the maximum length allowed before the message gets monitored by the similar message
203+
* feature.
204+
*
205+
* @return maximum length allowed
206+
*/
207+
public int getSimilarMessageLengthIgnore() {
208+
return similarMessageLengthIgnore;
209+
}
210+
211+
/**
212+
* Gets the set of messages that are allowed to be spammed in the similar messages feature.
213+
*
214+
* @return set of whitelisted messages
215+
*/
216+
public Set<String> getSimilarMessagesWhitelist() {
217+
return similarMessagesWhitelist;
218+
}
219+
170220
/**
171221
* Mode of a scam blocker. Controls which actions it takes when detecting scam.
172222
*/
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
package org.togetherjava.tjbot.features.moderation.scam;
2+
3+
4+
import java.time.Instant;
5+
import java.util.Objects;
6+
7+
/**
8+
* Information about a message, used to detect spam of the same message by the same user in
9+
* different channels.
10+
*
11+
* @param userId the id of the user
12+
* @param channelId the channel where the message was posted
13+
* @param messageHash the hash of the message
14+
* @param timestamp when the message was posted
15+
*/
16+
public record MessageInfo(long userId, long channelId, String messageHash, Instant timestamp) {
17+
18+
@Override
19+
public boolean equals(Object other) {
20+
return other instanceof MessageInfo message && this.userId == message.userId
21+
&& this.channelId == message.channelId;
22+
}
23+
24+
@Override
25+
public int hashCode() {
26+
return Objects.hash(userId, channelId);
27+
}
28+
}

application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/ScamBlocker.java

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,7 @@
2525

2626
import org.togetherjava.tjbot.config.Config;
2727
import org.togetherjava.tjbot.config.ScamBlockerConfig;
28-
import org.togetherjava.tjbot.features.MessageReceiverAdapter;
29-
import org.togetherjava.tjbot.features.UserInteractionType;
30-
import org.togetherjava.tjbot.features.UserInteractor;
28+
import org.togetherjava.tjbot.features.*;
3129
import org.togetherjava.tjbot.features.componentids.ComponentIdGenerator;
3230
import org.togetherjava.tjbot.features.componentids.ComponentIdInteractor;
3331
import org.togetherjava.tjbot.features.moderation.ModerationAction;
@@ -38,11 +36,8 @@
3836
import org.togetherjava.tjbot.logging.LogMarkers;
3937

4038
import java.awt.Color;
41-
import java.util.Collection;
42-
import java.util.EnumSet;
43-
import java.util.List;
44-
import java.util.Optional;
45-
import java.util.Set;
39+
import java.util.*;
40+
import java.util.concurrent.TimeUnit;
4641
import java.util.function.Consumer;
4742
import java.util.function.Predicate;
4843
import java.util.function.UnaryOperator;
@@ -55,7 +50,7 @@
5550
* If scam is detected, depending on the configuration, the blockers actions range from deleting the
5651
* message and banning the author to just logging the message for auditing.
5752
*/
58-
public final class ScamBlocker extends MessageReceiverAdapter implements UserInteractor {
53+
public final class ScamBlocker extends MessageReceiverAdapter implements UserInteractor, Routine {
5954
private static final Logger logger = LoggerFactory.getLogger(ScamBlocker.class);
6055
private static final Color AMBIENT_COLOR = Color.decode("#CFBFF5");
6156
private static final Set<ScamBlockerConfig.Mode> MODES_WITH_IMMEDIATE_DELETION =
@@ -72,8 +67,8 @@ public final class ScamBlocker extends MessageReceiverAdapter implements UserInt
7267
private final ModerationActionsStore actionsStore;
7368
private final ScamHistoryStore scamHistoryStore;
7469
private final Predicate<String> hasRequiredRole;
75-
7670
private final ComponentIdInteractor componentIdInteractor;
71+
private final SimilarMessagesDetector similarMessagesDetector;
7772

7873
/**
7974
* Creates a new listener to receive all message sent in any channel.
@@ -103,6 +98,7 @@ public ScamBlocker(ModerationActionsStore actionsStore, ScamHistoryStore scamHis
10398
hasRequiredRole = Pattern.compile(config.getSoftModerationRolePattern()).asMatchPredicate();
10499

105100
componentIdInteractor = new ComponentIdInteractor(getInteractionType(), getName());
101+
similarMessagesDetector = new SimilarMessagesDetector(config.getScamBlocker());
106102
}
107103

108104
@Override
@@ -141,6 +137,10 @@ public void onMessageReceived(MessageReceivedEvent event) {
141137
isSafe = false;
142138
}
143139

140+
if (isSafe && similarMessagesDetector.doSimilarMessageCheck(event)) {
141+
isSafe = false;
142+
}
143+
144144
if (isSafe) {
145145
return;
146146
}
@@ -153,6 +153,16 @@ public void onMessageReceived(MessageReceivedEvent event) {
153153
takeAction(event);
154154
}
155155

156+
@Override
157+
public Schedule createSchedule() {
158+
return new Schedule(ScheduleMode.FIXED_RATE, 1, 1, TimeUnit.MINUTES);
159+
}
160+
161+
@Override
162+
public void runRoutine(JDA jda) {
163+
similarMessagesDetector.runRoutine();
164+
}
165+
156166
private void takeActionWasAlreadyReported(MessageReceivedEvent event) {
157167
// The user recently send the same scam already, and that was already reported and handled
158168
addScamToHistory(event);
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
package org.togetherjava.tjbot.features.moderation.scam;
2+
3+
import net.dv8tion.jda.api.entities.Message;
4+
import net.dv8tion.jda.api.events.message.MessageReceivedEvent;
5+
6+
import org.togetherjava.tjbot.config.ScamBlockerConfig;
7+
import org.togetherjava.tjbot.features.utils.Hashing;
8+
9+
import java.time.Instant;
10+
import java.time.temporal.ChronoUnit;
11+
import java.util.HashSet;
12+
import java.util.Set;
13+
import java.util.stream.Collectors;
14+
15+
/**
16+
* Class which tries to detect scams by monitoring similar messages.
17+
*/
18+
public class SimilarMessagesDetector {
19+
private static final String HASH_METHOD = "SHA";
20+
21+
private final ScamBlockerConfig scamBlockerConfig;
22+
private final Set<MessageInfo> messageCache;
23+
private final Set<Long> alreadyFlaggedUsers;
24+
25+
/**
26+
* Creates an instance of this class by using the given config.
27+
*
28+
* @param scamBlockerConfig the scam config
29+
*/
30+
public SimilarMessagesDetector(ScamBlockerConfig scamBlockerConfig) {
31+
this.scamBlockerConfig = scamBlockerConfig;
32+
this.messageCache = new HashSet<>();
33+
this.alreadyFlaggedUsers = new HashSet<>();
34+
}
35+
36+
private boolean shouldIgnore(Message message) {
37+
if (!message.getAttachments().isEmpty()) {
38+
return false;
39+
}
40+
if (message.getContentRaw().length() <= scamBlockerConfig.getSimilarMessageLengthIgnore()) {
41+
return true;
42+
}
43+
return scamBlockerConfig.getSimilarMessagesWhitelist()
44+
.contains(message.getContentRaw().toLowerCase());
45+
}
46+
47+
private MessageInfo addToMessageCache(MessageReceivedEvent event) {
48+
long userId = event.getAuthor().getIdLong();
49+
long channelId = event.getChannel().getIdLong();
50+
String messageHash = getHash(event.getMessage());
51+
Instant timestamp = event.getMessage().getTimeCreated().toInstant();
52+
MessageInfo messageInfo = new MessageInfo(userId, channelId, messageHash, timestamp);
53+
messageCache.add(messageInfo);
54+
return messageInfo;
55+
}
56+
57+
private String getHash(Message message) {
58+
String wholeText = message.getContentRaw() + message.getAttachments()
59+
.stream()
60+
.map(Message.Attachment::getFileName)
61+
.collect(Collectors.joining());
62+
return Hashing.bytesToHex(Hashing.hashUTF8(HASH_METHOD, wholeText));
63+
}
64+
65+
private boolean hasPostedTooManySimilarMessages(long userId, String messageHash) {
66+
long similarMessageCount = messageCache.stream()
67+
.filter(m -> m.userId() == userId && m.messageHash().equals(messageHash)
68+
&& !isObsolete(m))
69+
.count();
70+
return similarMessageCount > scamBlockerConfig.getMaxAllowedSimilarMessages();
71+
}
72+
73+
private boolean isObsolete(MessageInfo messageInfo) {
74+
return messageInfo.timestamp()
75+
.plus(scamBlockerConfig.getSimilarMessagesWindow(), ChronoUnit.MINUTES)
76+
.isBefore(Instant.now());
77+
}
78+
79+
/**
80+
* Stores message data and if many messages of same author, different channel and same content
81+
* is posted several times, returns true.
82+
*
83+
* @param event the message event
84+
* @return true if the user spammed the message in several channels, false otherwise
85+
*/
86+
public boolean doSimilarMessageCheck(MessageReceivedEvent event) {
87+
long userId = event.getAuthor().getIdLong();
88+
if (alreadyFlaggedUsers.contains(userId)) {
89+
return true;
90+
}
91+
if (shouldIgnore(event.getMessage())) {
92+
return false;
93+
}
94+
String hash = addToMessageCache(event).messageHash();
95+
if (hasPostedTooManySimilarMessages(userId, hash)) {
96+
alreadyFlaggedUsers.add(userId);
97+
return true;
98+
} else {
99+
return false;
100+
}
101+
}
102+
103+
/**
104+
* Has to be called often to clear the cache.
105+
*/
106+
public void runRoutine() {
107+
messageCache.removeIf(this::isObsolete);
108+
}
109+
}

0 commit comments

Comments
 (0)