perf: use slacc on check-word-mute (#10721)

* perf: use slacc on check-word-mute when all of specified words are single word

* perf: use slacc as possible

* build: avoid tarball

* chore: update slacc

* build: update package name
This commit is contained in:
Acid Chicken (硫酸鶏) 2023-05-05 19:49:34 +09:00 committed by GitHub
parent 14e364a74a
commit 4a72941eda
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 236 additions and 6 deletions

View file

@ -34,7 +34,18 @@
"@swc/core-win32-ia32-msvc": "1.3.56",
"@swc/core-win32-x64-msvc": "1.3.56",
"@tensorflow/tfjs": "4.4.0",
"@tensorflow/tfjs-node": "4.4.0"
"@tensorflow/tfjs-node": "4.4.0",
"slacc-android-arm-eabi": "0.0.7",
"slacc-android-arm64": "0.0.7",
"slacc-darwin-arm64": "0.0.7",
"slacc-darwin-universal": "0.0.7",
"slacc-darwin-x64": "0.0.7",
"slacc-linux-arm-gnueabihf": "0.0.7",
"slacc-linux-arm64-gnu": "0.0.7",
"slacc-linux-arm64-musl": "0.0.7",
"slacc-linux-x64-gnu": "0.0.7",
"slacc-win32-arm64-msvc": "0.0.7",
"slacc-win32-x64-msvc": "0.0.7"
},
"dependencies": {
"@aws-sdk/client-s3": "3.321.1",
@ -128,6 +139,7 @@
"semver": "7.5.0",
"sharp": "0.32.1",
"sharp-read-bmp": "github:misskey-dev/sharp-read-bmp",
"slacc": "0.0.7",
"strict-event-emitter-types": "2.0.0",
"stringz": "2.1.0",
"summaly": "github:misskey-dev/summaly",

View file

@ -1,3 +1,4 @@
import { AhoCorasick } from 'slacc';
import RE2 from 're2';
import type { Note } from '@/models/entities/Note.js';
import type { User } from '@/models/entities/User.js';
@ -12,6 +13,8 @@ type UserLike = {
id: User['id'];
};
const acCache = new Map<string, AhoCorasick>();
export async function checkWordMute(note: NoteLike, me: UserLike | null | undefined, mutedWords: Array<string | string[]>): Promise<boolean> {
// 自分自身
if (me && (note.userId === me.id)) return false;
@ -21,7 +24,22 @@ export async function checkWordMute(note: NoteLike, me: UserLike | null | undefi
if (text === '') return false;
const matched = mutedWords.some(filter => {
const acable = mutedWords.filter(filter => Array.isArray(filter) && filter.length === 1).map(filter => filter[0]).sort();
const unacable = mutedWords.filter(filter => !Array.isArray(filter) || filter.length !== 1);
const acCacheKey = acable.join('\n');
const ac = acCache.get(acCacheKey) ?? AhoCorasick.withPatterns(acable);
acCache.delete(acCacheKey);
for (const obsoleteKeys of acCache.keys()) {
if (acCache.size > 1000) {
acCache.delete(obsoleteKeys);
}
}
acCache.set(acCacheKey, ac);
if (ac.isMatch(text)) {
return true;
}
const matched = unacable.some(filter => {
if (Array.isArray(filter)) {
return filter.every(keyword => text.includes(keyword));
} else {

View file

@ -0,0 +1,49 @@
import { checkWordMute } from '@/misc/check-word-mute.js';
describe(checkWordMute, () => {
describe('Slacc boost mode', () => {
it('should return false if mutedWords is empty', async () => {
expect(await checkWordMute({ userId: '1', text: 'foo' }, null, [])).toBe(false);
});
it('should return true if mutedWords is not empty and text contains muted word', async () => {
expect(await checkWordMute({ userId: '1', text: 'foo' }, null, [['foo']])).toBe(true);
});
it('should return false if mutedWords is not empty and text does not contain muted word', async () => {
expect(await checkWordMute({ userId: '1', text: 'foo' }, null, [['bar']])).toBe(false);
});
it('should return false when the note is written by me even if mutedWords is not empty and text contains muted word', async () => {
expect(await checkWordMute({ userId: '1', text: 'foo' }, { id: '1' }, [['foo']])).toBe(false);
});
it('should return true if mutedWords is not empty and text contains muted word in CW', async () => {
expect(await checkWordMute({ userId: '1', text: 'foo', cw: 'bar' }, null, [['bar']])).toBe(true);
});
it('should return true if mutedWords is not empty and text contains muted word in both CW and text', async () => {
expect(await checkWordMute({ userId: '1', text: 'foo', cw: 'bar' }, null, [['foo'], ['bar']])).toBe(true);
});
it('should return true if mutedWords is not empty and text does not contain muted word in both CW and text', async () => {
expect(await checkWordMute({ userId: '1', text: 'foo', cw: 'bar' }, null, [['foo'], ['baz']])).toBe(true);
});
});
describe('normal mode', () => {
it('should return false if text does not contain muted words', async () => {
expect(await checkWordMute({ userId: '1', text: 'foo' }, null, [['foo', 'bar']])).toBe(false);
});
it('should return true if text contains muted words', async () => {
expect(await checkWordMute({ userId: '1', text: 'foobar' }, null, [['foo', 'bar']])).toBe(true);
});
it('should return false when the note is written by me even if text contains muted words', async () => {
expect(await checkWordMute({ userId: '1', text: 'foo bar' }, { id: '1' }, [['foo', 'bar']])).toBe(false);
});
});
describe('RegExp mode', () => {
it('should return false if text does not contain muted words', async () => {
expect(await checkWordMute({ userId: '1', text: 'foo' }, null, ['/bar/'])).toBe(false);
});
it('should return true if text contains muted words', async () => {
expect(await checkWordMute({ userId: '1', text: 'foobar' }, null, ['/bar/'])).toBe(true);
});
it('should return false when the note is written by me even if text contains muted words', async () => {
expect(await checkWordMute({ userId: '1', text: 'foo bar' }, { id: '1' }, ['/bar/'])).toBe(false);
});
});
});