2021-10-06 01:40:02 +02:00
|
|
|
import re
|
|
|
|
from typing import Optional, Pattern
|
|
|
|
|
|
|
|
|
|
|
|
class TokenEscaper:
|
|
|
|
"""
|
|
|
|
Escape punctuation within an input string.
|
|
|
|
"""
|
|
|
|
# Characters that RediSearch requires us to escape during queries.
|
|
|
|
# Source: https://oss.redis.com/redisearch/Escaping/#the_rules_of_text_field_tokenization
|
|
|
|
DEFAULT_ESCAPED_CHARS = r"[,.<>{}\[\]\\\"\':;!@#$%^&*()\-+=~\ ]"
|
|
|
|
|
|
|
|
def __init__(self, escape_chars_re: Optional[Pattern] = None):
|
|
|
|
if escape_chars_re:
|
|
|
|
self.escaped_chars_re = escape_chars_re
|
|
|
|
else:
|
|
|
|
self.escaped_chars_re = re.compile(self.DEFAULT_ESCAPED_CHARS)
|
|
|
|
|
2021-10-19 06:16:48 +02:00
|
|
|
def escape(self, value: str) -> str:
|
2021-10-06 01:40:02 +02:00
|
|
|
def escape_symbol(match):
|
|
|
|
value = match.group(0)
|
|
|
|
return f"\\{value}"
|
|
|
|
|
2021-10-19 06:16:48 +02:00
|
|
|
return self.escaped_chars_re.sub(escape_symbol, value)
|