Skip to content

Commit 9992004

Browse files
authored
Merge pull request #9 from UncoderIO/tokenizer-and-render-fixes
parser, tokenizer, render fixes
2 parents c026f1d + c34e622 commit 9992004

File tree

19 files changed

+71
-63
lines changed

19 files changed

+71
-63
lines changed

siem-converter/app/converter/backends/athena/renders/athena.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ class AthenaQueryRender(BaseQueryRender):
6767

6868
field_value_map = AthenaFieldValue(or_token=or_token)
6969
query_pattern = "{prefix} WHERE {query} {functions}"
70+
comment_symbol = "--"
71+
is_multi_line_comment = True
7072

7173
def generate_prefix(self, log_source_signature: LogSourceSignature) -> str:
7274
table = str(log_source_signature) if str(log_source_signature) else "eventlog"

siem-converter/app/converter/backends/athena/tokenizer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ class AthenaTokenizer(QueryTokenizer):
3030
match_operator_pattern = r"""(?:___field___\s?(?P<match_operator>like|in|=|>|<|>=|<=|<>|!=))\s?"""
3131
num_value_pattern = r"(?P<num_value>\d+(?:\.\d+)*)\s*"
3232
bool_value_pattern = r"(?P<bool_value>true|false)\s*"
33-
single_quotes_value_pattern = r"""'(?P<s_q_value>(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{\}\s]|'')+)'"""
33+
single_quotes_value_pattern = r"""'(?P<s_q_value>(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{\}\s]|'')*)'"""
3434
_value_pattern = fr"{num_value_pattern}|{bool_value_pattern}|{single_quotes_value_pattern}"
3535
multi_value_pattern = r"""\((?P<value>\d+(?:,\s*\d+)*|'(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{\}\s]|'')*'(?:,\s*'(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{\}\s]|'')*')*)\)"""
3636

@@ -49,13 +49,13 @@ def should_process_value_wildcard_symbols(operator: str) -> bool:
4949
return operator.lower() in ("like",)
5050

5151
def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]:
52-
if num_value := get_match_group(match, group_name='num_value'):
52+
if (num_value := get_match_group(match, group_name='num_value')) is not None:
5353
return operator, num_value
5454

55-
elif bool_value := get_match_group(match, group_name='bool_value'):
55+
elif (bool_value := get_match_group(match, group_name='bool_value')) is not None:
5656
return operator, bool_value
5757

58-
elif s_q_value := get_match_group(match, group_name='s_q_value'):
58+
elif (s_q_value := get_match_group(match, group_name='s_q_value')) is not None:
5959
return operator, s_q_value
6060

6161
return super().get_operator_and_value(match, operator)

siem-converter/app/converter/backends/chronicle/tokenizer.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,22 +31,22 @@ class ChronicleQueryTokenizer(QueryTokenizer):
3131
num_value_pattern = r"(?P<num_value>\d+(?:\.\d+)*)\s*"
3232
bool_value_pattern = r"(?P<bool_value>true|false)\s*"
3333
double_quotes_value_pattern = r'"(?P<d_q_value>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{\}\s]|\\\"|\\\\)*)"\s*(?:nocase)?'
34-
re_value_pattern = r"/(?P<re_value>[:a-zA-Z\*0-9=+%#\\\-_\,\"\'\.$&^@!\(\)\{\}\s?]*)/\s*(?:nocase)?"
34+
re_value_pattern = r"/(?P<re_value>(?:\\\/|[:a-zA-Z\*0-9=+%#\\\-_\,\"\'\.$&^@!\(\)\{\}\s?])+)/\s*(?:nocase)?"
3535
_value_pattern = fr"{num_value_pattern}|{bool_value_pattern}|{double_quotes_value_pattern}|{re_value_pattern}"
3636

3737
wildcard_symbol = ".*"
3838

3939
def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]:
40-
if num_value := get_match_group(match, group_name='num_value'):
40+
if (num_value := get_match_group(match, group_name='num_value')) is not None:
4141
return operator, num_value
4242

43-
elif bool_value := get_match_group(match, group_name='bool_value'):
43+
elif (bool_value := get_match_group(match, group_name='bool_value')) is not None:
4444
return operator, bool_value
4545

46-
elif d_q_value := get_match_group(match, group_name='d_q_value'):
46+
elif (d_q_value := get_match_group(match, group_name='d_q_value')) is not None:
4747
return operator, d_q_value
4848

49-
elif re_value := get_match_group(match, group_name='re_value'):
49+
elif (re_value := get_match_group(match, group_name='re_value')) is not None:
5050
return OperatorType.REGEX, re_value
5151

5252
return super().get_operator_and_value(match, operator)
@@ -94,10 +94,10 @@ def search_field_value(self, query):
9494
return super().search_field_value(query=query)
9595

9696
def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]:
97-
if d_q_value := get_match_group(match, group_name='d_q_value'):
97+
if (d_q_value := get_match_group(match, group_name='d_q_value')) is not None:
9898
return operator, d_q_value
9999

100-
elif b_q_value := get_match_group(match, group_name='b_q_value'):
100+
elif (b_q_value := get_match_group(match, group_name='b_q_value')) is not None:
101101
return operator, b_q_value
102102

103103
return super().get_operator_and_value(match, operator)

siem-converter/app/converter/backends/elasticsearch/renders/elasticsearch.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ class ElasticSearchQueryRender(BaseQueryRender):
8282

8383
field_value_map = ElasticSearchFieldValue(or_token=or_token)
8484
query_pattern = "{query} {functions}"
85+
comment_symbol = "//"
86+
is_multi_line_comment = True
8587

8688
def generate_prefix(self, logsource: dict) -> str:
8789
return ""

siem-converter/app/converter/backends/elasticsearch/tokenizer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,16 +64,16 @@ def clean_quotes(value: Union[str, int]):
6464
return value
6565

6666
def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]:
67-
if num_value := get_match_group(match, group_name='num_value'):
67+
if (num_value := get_match_group(match, group_name='num_value')) is not None:
6868
return operator, num_value
6969

70-
elif re_value := get_match_group(match, group_name='re_value'):
70+
elif (re_value := get_match_group(match, group_name='re_value')) is not None:
7171
return OperatorType.REGEX, re_value
7272

73-
elif n_q_value := get_match_group(match, group_name='n_q_value'):
73+
elif (n_q_value := get_match_group(match, group_name='n_q_value')) is not None:
7474
return operator, n_q_value
7575

76-
elif d_q_value := get_match_group(match, group_name='d_q_value'):
76+
elif (d_q_value := get_match_group(match, group_name='d_q_value')) is not None:
7777
return operator, d_q_value
7878

7979
return super().get_operator_and_value(match)

siem-converter/app/converter/backends/logscale/tokenizer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,20 +30,20 @@ class LogScaleTokenizer(QueryTokenizer):
3030
match_operator_pattern = r"""(?:___field___\s?(?P<match_operator>=|!=))\s?"""
3131
num_value_pattern = r"(?P<num_value>\d+(?:\.\d+)*)\s*"
3232
double_quotes_value_pattern = r'"(?P<d_q_value>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{\}\s]|\\\"|\\)*)"\s*'
33-
re_value_pattern = r"/(?P<re_value>[:a-zA-Z\*0-9=+%#\\\-_\,\"\'\.$&^@!\(\)\{\}\s?]*)/i?\s*"
33+
re_value_pattern = r"/(?P<re_value>[:a-zA-Z\*0-9=+%#\\\-_\,\"\'\.$&^@!\(\)\{\}\s?]+)/i?\s*"
3434
_value_pattern = fr"""{num_value_pattern}|{re_value_pattern}|{double_quotes_value_pattern}"""
3535
keyword_pattern = double_quotes_value_pattern
3636

3737
wildcard_symbol = "*"
3838

3939
def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]:
40-
if num_value := get_match_group(match, group_name='num_value'):
40+
if (num_value := get_match_group(match, group_name='num_value')) is not None:
4141
return operator, num_value
4242

43-
elif d_q_value := get_match_group(match, group_name='d_q_value'):
43+
elif (d_q_value := get_match_group(match, group_name='d_q_value')) is not None:
4444
return operator, d_q_value
4545

46-
elif re_value := get_match_group(match, group_name='re_value'):
46+
elif (re_value := get_match_group(match, group_name='re_value')) is not None:
4747
return OperatorType.REGEX, re_value
4848

4949
return super().get_operator_and_value(match, operator)

siem-converter/app/converter/backends/microsoft/renders/microsoft_sentinel.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
limitations under the License.
1717
-----------------------------------------------------------------
1818
"""
19+
from typing import Union
1920

2021
from app.converter.backends.microsoft.const import microsoft_sentinel_query_details
2122
from app.converter.backends.microsoft.mapping import MicrosoftSentinelMappings, microsoft_sentinel_mappings
@@ -28,32 +29,36 @@
2829
class MicrosoftSentinelFieldValue(BaseQueryFieldValue):
2930
details: PlatformDetails = microsoft_sentinel_query_details
3031

32+
@staticmethod
33+
def __escape_value(value: Union[int, str]) -> Union[int, str]:
34+
return value.replace("'", "''") if isinstance(value, str) else value
35+
3136
def equal_modifier(self, field, value):
3237
if isinstance(value, str):
33-
return f"{field} =~ @'{value}'"
38+
return f"{field} =~ @'{self.__escape_value(value)}'"
3439
elif isinstance(value, list):
35-
prepared_values = ", ".join(f"@'{v}'" for v in value)
40+
prepared_values = ", ".join(f"@'{self.__escape_value(v)}'" for v in value)
3641
operator = "in~" if all(isinstance(v, str) for v in value) else "in"
3742
return f'{field} {operator} ({prepared_values})'
3843
return f'{field} == {value}'
3944

4045
def contains_modifier(self, field, value):
4146
if isinstance(value, list):
4247
return f"({self.or_token.join(self.contains_modifier(field=field, value=v) for v in value)})"
43-
return f"{field} contains @'{value}'"
48+
return f"{field} contains @'{self.__escape_value(value)}'"
4449

4550
def endswith_modifier(self, field, value):
4651
if isinstance(value, list):
4752
return f"({self.or_token.join(self.endswith_modifier(field=field, value=v) for v in value)})"
48-
return f"{field} endswith @'{value}'"
53+
return f"{field} endswith @'{self.__escape_value(value)}'"
4954

5055
def startswith_modifier(self, field, value):
5156
if isinstance(value, list):
5257
return f"({self.or_token.join(self.startswith_modifier(field=field, value=v) for v in value)})"
53-
return f"{field} startswith @'{value}'"
58+
return f"{field} startswith @'{self.__escape_value(value)}'"
5459

5560
def __regex_modifier(self, field, value):
56-
return f"{field} matches regex @'(?i){value}'"
61+
return f"{field} matches regex @'(?i){self.__escape_value(value)}'"
5762

5863
def regex_modifier(self, field, value):
5964
if isinstance(value, list):
@@ -63,7 +68,7 @@ def regex_modifier(self, field, value):
6368
def keywords(self, field, value):
6469
if isinstance(value, list):
6570
return f"({self.or_token.join(self.keywords(field=field, value=v) for v in value)})"
66-
return f"* contains @'{value}'"
71+
return f"* contains @'{self.__escape_value(value)}'"
6772

6873

6974
class MicrosoftSentinelQueryRender(BaseQueryRender):
@@ -78,14 +83,11 @@ class MicrosoftSentinelQueryRender(BaseQueryRender):
7883

7984
mappings: MicrosoftSentinelMappings = microsoft_sentinel_mappings
8085
comment_symbol = "//"
86+
is_multi_line_comment = True
8187

8288
def generate_prefix(self, log_source_signature: LogSourceSignature) -> str:
8389
return str(log_source_signature)
8490

85-
def render_not_supported_functions(self, not_supported_functions: list) -> str:
86-
render_not_suported = "\n".join([f'// {i}' for i in not_supported_functions])
87-
return "\n\n" + f"// {self.unsupported_functions_text}" + render_not_suported
88-
8991
def generate_functions(self, functions: list) -> str:
9092
if not functions:
9193
return ""

siem-converter/app/converter/backends/microsoft/tokenizer.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ class MicrosoftSentinelTokenizer(QueryTokenizer, OperatorBasedMixin):
3434
single_quotes_value_pattern = r"@?'(?P<s_q_value>(?:[:a-zA-Z\*0-9=+%#\-_/,\"\.$&^@!\(\)\{\}\s]|\\\'|\\\\)*)'\s*"
3535
str_value_pattern = fr"""{double_quotes_value_pattern}|{single_quotes_value_pattern}"""
3636
_value_pattern = fr"""{bool_value_pattern}|{num_value_pattern}|{str_value_pattern}"""
37-
multi_value_pattern = r"""\((?P<value>[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\s]*)\)"""
37+
multi_value_pattern = r"""\((?P<value>[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\s]+)\)"""
3838
keyword_pattern = fr"\*\s+contains\s+(?:{str_value_pattern})"
3939

4040
multi_value_operators = ("in", "in~")
@@ -50,16 +50,16 @@ def __init__(self, *args, **kwargs):
5050
self.operators_map.update(super().operators_map)
5151

5252
def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]:
53-
if num_value := get_match_group(match, group_name='num_value'):
53+
if (num_value := get_match_group(match, group_name='num_value')) is not None:
5454
return operator, num_value
5555

56-
elif bool_value := get_match_group(match, group_name='bool_value'):
56+
elif (bool_value := get_match_group(match, group_name='bool_value')) is not None:
5757
return operator, bool_value
5858

59-
elif d_q_value := get_match_group(match, group_name='d_q_value'):
59+
elif (d_q_value := get_match_group(match, group_name='d_q_value')) is not None:
6060
return operator, d_q_value
6161

62-
elif s_q_value := get_match_group(match, group_name='s_q_value'):
62+
elif (s_q_value := get_match_group(match, group_name='s_q_value')) is not None:
6363
return operator, s_q_value
6464

6565
return super().get_operator_and_value(match, operator)

siem-converter/app/converter/backends/opensearch/renders/opensearch.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@ class OpenSearchQueryRender(BaseQueryRender):
7171

7272
field_value_map = OpenSearchFieldValue(or_token=or_token)
7373
query_pattern = "{query} {functions}"
74+
comment_symbol = "//"
75+
is_multi_line_comment = True
7476

7577
def generate_prefix(self, logsource: dict) -> str:
7678
return ""

siem-converter/app/converter/backends/opensearch/tokenizer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,16 +64,16 @@ def clean_quotes(value: Union[str, int]):
6464
return value
6565

6666
def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]:
67-
if num_value := get_match_group(match, group_name='num_value'):
67+
if (num_value := get_match_group(match, group_name='num_value')) is not None:
6868
return operator, num_value
6969

70-
elif re_value := get_match_group(match, group_name='re_value'):
70+
elif (re_value := get_match_group(match, group_name='re_value')) is not None:
7171
return OperatorType.REGEX, re_value
7272

73-
elif n_q_value := get_match_group(match, group_name='n_q_value'):
73+
elif (n_q_value := get_match_group(match, group_name='n_q_value')) is not None:
7474
return operator, n_q_value
7575

76-
elif d_q_value := get_match_group(match, group_name='d_q_value'):
76+
elif (d_q_value := get_match_group(match, group_name='d_q_value')) is not None:
7777
return operator, d_q_value
7878

7979
return super().get_operator_and_value(match)

0 commit comments

Comments
 (0)