diff --git a/wordview/preprocessing/cleaning.py b/wordview/preprocessing/cleaning.py index f3e4cf4..4984cbe 100644 --- a/wordview/preprocessing/cleaning.py +++ b/wordview/preprocessing/cleaning.py @@ -79,18 +79,18 @@ def remove_emojis(text: str) -> str: """ emoj = re.compile( "[" - "\U0001F600-\U0001F64F" # emoticons - "\U0001F300-\U0001F5FF" # symbols & pictographs - "\U0001F680-\U0001F6FF" # transport & map symbols - "\U0001F1E0-\U0001F1FF" # flags (iOS) - "\U00002500-\U00002BEF" # chinese char - "\U00002702-\U000027B0" - "\U00002702-\U000027B0" - "\U000024C2-\U0001F251" + "\U0001f600-\U0001f64f" # emoticons + "\U0001f300-\U0001f5ff" # symbols & pictographs + "\U0001f680-\U0001f6ff" # transport & map symbols + "\U0001f1e0-\U0001f1ff" # flags (iOS) + "\U00002500-\U00002bef" # chinese char + "\U00002702-\U000027b0" + "\U00002702-\U000027b0" + "\U000024c2-\U0001f251" "\U0001f926-\U0001f937" "\U00010000-\U0010ffff" "\u2640-\u2642" - "\u2600-\u2B55" + "\u2600-\u2b55" "\u200d" "\u23cf" "\u23e9"