feat: better profanity filtering

This commit is contained in:
Gerard Gascón 2025-04-23 16:24:22 +02:00
parent e97e204a5e
commit 3f5f2feff8
5 changed files with 1586 additions and 9 deletions

View file

@ -1,6 +1,5 @@
asgiref==3.8.1 asgiref==3.8.1
Django==5.1.7 Django==5.1.7
django-profanity-filter==0.2.1
inflection==0.5.1 inflection==0.5.1
python-dotenv==1.0.1 python-dotenv==1.0.1
sqlparse==0.5.3 sqlparse==0.5.3

97
server/filters.py Normal file
View file

@ -0,0 +1,97 @@
import re
import os
import inflection
class ProfanityFilter:
def __init__(self, **kwargs):
# If defined, use this instead of _censor_list
self._custom_censor_list = kwargs.get('custom_censor_list', [])
# Words to be used in conjunction with _censor_list
self._extra_censor_list = kwargs.get('extra_censor_list', [])
# What to be censored -- should not be modified by user
self._censor_list = []
# What to censor the words with
self._censor_char = "*"
# Where to find the censored words
self._BASE_DIR = os.path.abspath(os.path.dirname(__file__))
self._words_file = os.path.join(self._BASE_DIR, 'wordlist.txt')
self._load_words()
def _load_words(self):
""" Loads the list of profane words from file. """
with open(self._words_file, 'r') as f:
self._censor_list = [line.strip() for line in f.readlines()]
def define_words(self, word_list):
""" Define a custom list of profane words. """
self._custom_censor_list = word_list
def append_words(self, word_list):
""" Extends the profane word list with word_list """
self._extra_censor_list.extend(word_list)
def set_censor(self, character):
""" Replaces the original censor character '*' with character """
if isinstance(character, int):
character = str(character)
self._censor_char = character
def has_bad_word(self, text):
""" Returns True if text contains profanity, False otherwise """
return self.censor(text) != text
def get_custom_censor_list(self):
""" Returns the list of custom profane words """
return self._custom_censor_list
def get_extra_censor_list(self):
""" Returns the list of custom, additional, profane words """
return self._extra_censor_list
def get_profane_words(self):
""" Gets all profane words """
profane_words = []
if self._custom_censor_list:
profane_words = [w for w in self._custom_censor_list] # Previous versions of Python don't have list.copy()
else:
profane_words = [w for w in self._censor_list]
profane_words.extend(self._extra_censor_list)
profane_words.extend([inflection.pluralize(word) for word in profane_words])
profane_words = list(set(profane_words))
return profane_words
def restore_words(self):
""" Clears all custom censor lists """
self._custom_censor_list = []
self._extra_censor_list = []
def censor(self, input_text):
""" Returns input_text with any profane words censored """
bad_words = self.get_profane_words()
res = input_text
for word in bad_words:
word = r'\b%s\b' % word # Apply word boundaries to the bad word
regex = re.compile(word, re.IGNORECASE)
res = regex.sub(self._censor_char * (len(word) - 4), res)
return res
def is_clean(self, input_text):
""" Returns True if input_text doesn't contain any profane words, False otherwise. """
return not self.has_bad_word(input_text)
def is_profane(self, input_text):
""" Returns True if input_text contains any profane words, False otherwise. """
return self.has_bad_word(input_text)

View file

@ -1,13 +1,20 @@
import json import json
import random import random
import profanity.templatetags.profanity
from django.db import IntegrityError from django.db import IntegrityError
from django.http import HttpResponse, Http404 from django.http import HttpResponse, Http404
from server.filters import ProfanityFilter
from server.models import Dragon from server.models import Dragon
pf = ProfanityFilter()
def is_profane(value):
return pf.is_profane(value)
def add(request, origin, name, color, shirt, hat, decor): def add(request, origin, name, color, shirt, hat, decor):
origin = origin.strip() origin = origin.strip()
name = name.strip() name = name.strip()
@ -16,11 +23,7 @@ def add(request, origin, name, color, shirt, hat, decor):
hat = int(hat.strip()) hat = int(hat.strip())
decor = int(decor.strip()) decor = int(decor.strip())
if profanity.templatetags.profanity.is_profane(name): if is_profane(name) or is_profane(origin):
print("profanity detected")
return HttpResponse("profanity detected")
if profanity.templatetags.profanity.is_profane(origin):
print("profanity detected") print("profanity detected")
return HttpResponse("profanity detected") return HttpResponse("profanity detected")

1479
server/wordlist.txt Normal file

File diff suppressed because it is too large Load diff

View file

@ -46,8 +46,7 @@ INSTALLED_APPS = [
'django.contrib.messages', 'django.contrib.messages',
'django.contrib.staticfiles', 'django.contrib.staticfiles',
'server', 'server'
'profanity'
] ]
MIDDLEWARE = [ MIDDLEWARE = [