feat: better profanity filtering
This commit is contained in:
parent
e97e204a5e
commit
3f5f2feff8
5 changed files with 1586 additions and 9 deletions
97
server/filters.py
Normal file
97
server/filters.py
Normal file
|
@ -0,0 +1,97 @@
|
|||
import re
|
||||
import os
|
||||
|
||||
import inflection
|
||||
|
||||
|
||||
class ProfanityFilter:
|
||||
def __init__(self, **kwargs):
|
||||
|
||||
# If defined, use this instead of _censor_list
|
||||
self._custom_censor_list = kwargs.get('custom_censor_list', [])
|
||||
|
||||
# Words to be used in conjunction with _censor_list
|
||||
self._extra_censor_list = kwargs.get('extra_censor_list', [])
|
||||
|
||||
# What to be censored -- should not be modified by user
|
||||
self._censor_list = []
|
||||
|
||||
# What to censor the words with
|
||||
self._censor_char = "*"
|
||||
|
||||
# Where to find the censored words
|
||||
self._BASE_DIR = os.path.abspath(os.path.dirname(__file__))
|
||||
self._words_file = os.path.join(self._BASE_DIR, 'wordlist.txt')
|
||||
|
||||
self._load_words()
|
||||
|
||||
def _load_words(self):
|
||||
""" Loads the list of profane words from file. """
|
||||
with open(self._words_file, 'r') as f:
|
||||
self._censor_list = [line.strip() for line in f.readlines()]
|
||||
|
||||
def define_words(self, word_list):
|
||||
""" Define a custom list of profane words. """
|
||||
self._custom_censor_list = word_list
|
||||
|
||||
def append_words(self, word_list):
|
||||
""" Extends the profane word list with word_list """
|
||||
self._extra_censor_list.extend(word_list)
|
||||
|
||||
def set_censor(self, character):
|
||||
""" Replaces the original censor character '*' with character """
|
||||
if isinstance(character, int):
|
||||
character = str(character)
|
||||
self._censor_char = character
|
||||
|
||||
def has_bad_word(self, text):
|
||||
""" Returns True if text contains profanity, False otherwise """
|
||||
return self.censor(text) != text
|
||||
|
||||
def get_custom_censor_list(self):
|
||||
""" Returns the list of custom profane words """
|
||||
return self._custom_censor_list
|
||||
|
||||
def get_extra_censor_list(self):
|
||||
""" Returns the list of custom, additional, profane words """
|
||||
return self._extra_censor_list
|
||||
|
||||
def get_profane_words(self):
|
||||
""" Gets all profane words """
|
||||
profane_words = []
|
||||
|
||||
if self._custom_censor_list:
|
||||
profane_words = [w for w in self._custom_censor_list] # Previous versions of Python don't have list.copy()
|
||||
else:
|
||||
profane_words = [w for w in self._censor_list]
|
||||
|
||||
profane_words.extend(self._extra_censor_list)
|
||||
profane_words.extend([inflection.pluralize(word) for word in profane_words])
|
||||
profane_words = list(set(profane_words))
|
||||
|
||||
return profane_words
|
||||
|
||||
def restore_words(self):
|
||||
""" Clears all custom censor lists """
|
||||
self._custom_censor_list = []
|
||||
self._extra_censor_list = []
|
||||
|
||||
def censor(self, input_text):
|
||||
""" Returns input_text with any profane words censored """
|
||||
bad_words = self.get_profane_words()
|
||||
res = input_text
|
||||
|
||||
for word in bad_words:
|
||||
word = r'\b%s\b' % word # Apply word boundaries to the bad word
|
||||
regex = re.compile(word, re.IGNORECASE)
|
||||
res = regex.sub(self._censor_char * (len(word) - 4), res)
|
||||
|
||||
return res
|
||||
|
||||
def is_clean(self, input_text):
|
||||
""" Returns True if input_text doesn't contain any profane words, False otherwise. """
|
||||
return not self.has_bad_word(input_text)
|
||||
|
||||
def is_profane(self, input_text):
|
||||
""" Returns True if input_text contains any profane words, False otherwise. """
|
||||
return self.has_bad_word(input_text)
|
Loading…
Add table
Add a link
Reference in a new issue