97 lines
		
	
	
		
			No EOL
		
	
	
		
			3.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			97 lines
		
	
	
		
			No EOL
		
	
	
		
			3.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import re
 | |
| import os
 | |
| 
 | |
| import inflection
 | |
| 
 | |
| 
 | |
| class ProfanityFilter:
 | |
|     def __init__(self, **kwargs):
 | |
| 
 | |
|         # If defined, use this instead of _censor_list
 | |
|         self._custom_censor_list = kwargs.get('custom_censor_list', [])
 | |
| 
 | |
|         # Words to be used in conjunction with _censor_list
 | |
|         self._extra_censor_list = kwargs.get('extra_censor_list', [])
 | |
| 
 | |
|         # What to be censored -- should not be modified by user
 | |
|         self._censor_list = []
 | |
| 
 | |
|         # What to censor the words with
 | |
|         self._censor_char = "*"
 | |
| 
 | |
|         # Where to find the censored words
 | |
|         self._BASE_DIR = os.path.abspath(os.path.dirname(__file__))
 | |
|         self._words_file = os.path.join(self._BASE_DIR, 'wordlist.txt')
 | |
| 
 | |
|         self._load_words()
 | |
| 
 | |
|     def _load_words(self):
 | |
|         """ Loads the list of profane words from file. """
 | |
|         with open(self._words_file, 'r') as f:
 | |
|             self._censor_list = [line.strip() for line in f.readlines()]
 | |
| 
 | |
|     def define_words(self, word_list):
 | |
|         """ Define a custom list of profane words. """
 | |
|         self._custom_censor_list = word_list
 | |
| 
 | |
|     def append_words(self, word_list):
 | |
|         """ Extends the profane word list with word_list """
 | |
|         self._extra_censor_list.extend(word_list)
 | |
| 
 | |
|     def set_censor(self, character):
 | |
|         """ Replaces the original censor character '*' with character """
 | |
|         if isinstance(character, int):
 | |
|             character = str(character)
 | |
|         self._censor_char = character
 | |
| 
 | |
|     def has_bad_word(self, text):
 | |
|         """ Returns True if text contains profanity, False otherwise """
 | |
|         return self.censor(text) != text
 | |
| 
 | |
|     def get_custom_censor_list(self):
 | |
|         """ Returns the list of custom profane words """
 | |
|         return self._custom_censor_list
 | |
| 
 | |
|     def get_extra_censor_list(self):
 | |
|         """ Returns the list of custom, additional, profane words """
 | |
|         return self._extra_censor_list
 | |
| 
 | |
|     def get_profane_words(self):
 | |
|         """ Gets all profane words """
 | |
|         profane_words = []
 | |
| 
 | |
|         if self._custom_censor_list:
 | |
|             profane_words = [w for w in self._custom_censor_list]  # Previous versions of Python don't have list.copy()
 | |
|         else:
 | |
|             profane_words = [w for w in self._censor_list]
 | |
| 
 | |
|         profane_words.extend(self._extra_censor_list)
 | |
|         profane_words.extend([inflection.pluralize(word) for word in profane_words])
 | |
|         profane_words = list(set(profane_words))
 | |
| 
 | |
|         return profane_words
 | |
| 
 | |
|     def restore_words(self):
 | |
|         """ Clears all custom censor lists """
 | |
|         self._custom_censor_list = []
 | |
|         self._extra_censor_list = []
 | |
| 
 | |
|     def censor(self, input_text):
 | |
|         """ Returns input_text with any profane words censored """
 | |
|         bad_words = self.get_profane_words()
 | |
|         res = input_text
 | |
| 
 | |
|         for word in bad_words:
 | |
|             word = r'\b%s\b' % word  # Apply word boundaries to the bad word
 | |
|             regex = re.compile(word, re.IGNORECASE)
 | |
|             res = regex.sub(self._censor_char * (len(word) - 4), res)
 | |
| 
 | |
|         return res
 | |
| 
 | |
|     def is_clean(self, input_text):
 | |
|         """ Returns True if input_text doesn't contain any profane words, False otherwise. """
 | |
|         return not self.has_bad_word(input_text)
 | |
| 
 | |
|     def is_profane(self, input_text):
 | |
|         """ Returns True if input_text contains any profane words, False otherwise. """
 | |
|         return self.has_bad_word(input_text) | 
