feat: better profanity filtering

This commit is contained in:
Gerard Gascón 2025-04-23 16:24:22 +02:00
parent e97e204a5e
commit 3f5f2feff8
5 changed files with 1586 additions and 9 deletions

View file

@ -1,6 +1,5 @@
asgiref==3.8.1
Django==5.1.7
django-profanity-filter==0.2.1
inflection==0.5.1
python-dotenv==1.0.1
sqlparse==0.5.3

97
server/filters.py Normal file
View file

@ -0,0 +1,97 @@
import re
import os
import inflection
class ProfanityFilter:
def __init__(self, **kwargs):
# If defined, use this instead of _censor_list
self._custom_censor_list = kwargs.get('custom_censor_list', [])
# Words to be used in conjunction with _censor_list
self._extra_censor_list = kwargs.get('extra_censor_list', [])
# What to be censored -- should not be modified by user
self._censor_list = []
# What to censor the words with
self._censor_char = "*"
# Where to find the censored words
self._BASE_DIR = os.path.abspath(os.path.dirname(__file__))
self._words_file = os.path.join(self._BASE_DIR, 'wordlist.txt')
self._load_words()
def _load_words(self):
""" Loads the list of profane words from file. """
with open(self._words_file, 'r') as f:
self._censor_list = [line.strip() for line in f.readlines()]
def define_words(self, word_list):
""" Define a custom list of profane words. """
self._custom_censor_list = word_list
def append_words(self, word_list):
""" Extends the profane word list with word_list """
self._extra_censor_list.extend(word_list)
def set_censor(self, character):
""" Replaces the original censor character '*' with character """
if isinstance(character, int):
character = str(character)
self._censor_char = character
def has_bad_word(self, text):
""" Returns True if text contains profanity, False otherwise """
return self.censor(text) != text
def get_custom_censor_list(self):
""" Returns the list of custom profane words """
return self._custom_censor_list
def get_extra_censor_list(self):
""" Returns the list of custom, additional, profane words """
return self._extra_censor_list
def get_profane_words(self):
""" Gets all profane words """
profane_words = []
if self._custom_censor_list:
profane_words = [w for w in self._custom_censor_list] # Previous versions of Python don't have list.copy()
else:
profane_words = [w for w in self._censor_list]
profane_words.extend(self._extra_censor_list)
profane_words.extend([inflection.pluralize(word) for word in profane_words])
profane_words = list(set(profane_words))
return profane_words
def restore_words(self):
""" Clears all custom censor lists """
self._custom_censor_list = []
self._extra_censor_list = []
def censor(self, input_text):
""" Returns input_text with any profane words censored """
bad_words = self.get_profane_words()
res = input_text
for word in bad_words:
word = r'\b%s\b' % word # Apply word boundaries to the bad word
regex = re.compile(word, re.IGNORECASE)
res = regex.sub(self._censor_char * (len(word) - 4), res)
return res
def is_clean(self, input_text):
""" Returns True if input_text doesn't contain any profane words, False otherwise. """
return not self.has_bad_word(input_text)
def is_profane(self, input_text):
""" Returns True if input_text contains any profane words, False otherwise. """
return self.has_bad_word(input_text)

View file

@ -1,13 +1,20 @@
import json
import random
import profanity.templatetags.profanity
from django.db import IntegrityError
from django.http import HttpResponse, Http404
from server.filters import ProfanityFilter
from server.models import Dragon
pf = ProfanityFilter()
def is_profane(value):
return pf.is_profane(value)
def add(request, origin, name, color, shirt, hat, decor):
origin = origin.strip()
name = name.strip()
@ -16,11 +23,7 @@ def add(request, origin, name, color, shirt, hat, decor):
hat = int(hat.strip())
decor = int(decor.strip())
if profanity.templatetags.profanity.is_profane(name):
print("profanity detected")
return HttpResponse("profanity detected")
if profanity.templatetags.profanity.is_profane(origin):
if is_profane(name) or is_profane(origin):
print("profanity detected")
return HttpResponse("profanity detected")

1479
server/wordlist.txt Normal file

File diff suppressed because it is too large Load diff

View file

@ -46,8 +46,7 @@ INSTALLED_APPS = [
'django.contrib.messages',
'django.contrib.staticfiles',
'server',
'profanity'
'server'
]
MIDDLEWARE = [