2021-10-06 01:40:02 +02:00
|
|
|
import re
|
|
|
|
from typing import Optional, Pattern
|
|
|
|
|
|
|
|
|
|
|
|
class TokenEscaper:
|
|
|
|
"""
|
|
|
|
Escape punctuation within an input string.
|
|
|
|
"""
|
2021-10-20 22:01:46 +02:00
|
|
|
|
2021-10-06 01:40:02 +02:00
|
|
|
# Characters that RediSearch requires us to escape during queries.
|
2022-09-08 12:56:31 +02:00
|
|
|
# Source: https://redis.io/docs/stack/search/reference/escaping/#the-rules-of-text-field-tokenization
|
2022-09-11 15:50:15 +02:00
|
|
|
DEFAULT_ESCAPED_CHARS = r"[,.<>{}\[\]\\\"\':;!@#$%^&*()\-+=~\/ ]"
|
2021-10-06 01:40:02 +02:00
|
|
|
|
|
|
|
def __init__(self, escape_chars_re: Optional[Pattern] = None):
|
|
|
|
if escape_chars_re:
|
|
|
|
self.escaped_chars_re = escape_chars_re
|
|
|
|
else:
|
|
|
|
self.escaped_chars_re = re.compile(self.DEFAULT_ESCAPED_CHARS)
|
|
|
|
|
2021-10-19 06:16:48 +02:00
|
|
|
def escape(self, value: str) -> str:
|
2021-10-06 01:40:02 +02:00
|
|
|
def escape_symbol(match):
|
|
|
|
value = match.group(0)
|
|
|
|
return f"\\{value}"
|
|
|
|
|
2021-10-19 06:16:48 +02:00
|
|
|
return self.escaped_chars_re.sub(escape_symbol, value)
|