Skip to content

Commit

Permalink
postgresql output: escape all null bytes
Browse files Browse the repository at this point in the history
while null bytes (`\0`, not SQL "NULL") in TEXT and JSON/JSONB fields are valid, data containing null bytes can cause troubles in some combinations of clients, servers and each settings.
To prevent unhandled errors, and data which can't be inserted into the database, all null bytes are escaped

fixes #2203
  • Loading branch information
wagner-intevation committed Jul 25, 2022
1 parent 28ccf11 commit bbc3c91
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 8 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ CHANGELOG
- `intelmq.bots.outputs.templated_smtp.output`:
- Add new function `from_json()` (which just calls `json.loads()` in the standard Python environment), meaning the Templated SMTP output bot can take strings containing JSON documents and do the formatting itself (PR#2120 by Karl-Johan Karlsson).
- Lift restriction on requirement jinja2 < 3 (PR#2158 by Sebastian Wagner).
- `intelmq.bots.outputs.sql`:
- For PostgreSQL, escape Nullbytes in text to prevent "unsupported Unicode escape sequence" issues (PR#2223 by Sebastian Wagner, fixes #2203).

### Documentation
- Feeds: Add documentation for newly supported dataplane feeds, see above (PR#2102 by Mikk Margus Möll).
Expand Down
11 changes: 9 additions & 2 deletions docs/user/bots.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4026,7 +4026,8 @@ The parameters marked with 'PostgreSQL' will be sent to libpq via psycopg2. Chec
* `fields`: list of fields to read from the event. If None, read all fields
* `reconnect_delay`: number of seconds to wait before reconnecting in case of an error
**PostgreSQL**
PostgreSQL
~~~~~~~~~~
You have two basic choices to run PostgreSQL:
Expand Down Expand Up @@ -4081,7 +4082,13 @@ if the user `intelmq` can authenticate):
psql -h localhost intelmq-events intelmq </tmp/initdb.sql
**SQLite**
**PostgreSQL and null characters**
While null characters (`\0`, not SQL "NULL") in TEXT and JSON/JSONB fields are valid, data containing null characters can cause troubles in some combinations of clients, servers and each settings.
To prevent unhandled errors and data which can't be inserted into the database, all null characters are escaped (`\\u0000`) before insertion.
SQLite
~~~~~~
Similarly to PostgreSQL, you can use `intelmq_psql_initdb` to create initial SQL statements
from `harmonization.conf`. The script will create the required table layout
Expand Down
15 changes: 12 additions & 3 deletions intelmq/bots/outputs/sql/output.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: 2019 Edvard Rejthar
# SPDX-FileCopyrightText: 2019 Edvard Rejthar, 2022 Intevation GmbH
#
# SPDX-License-Identifier: AGPL-3.0-or-later

Expand Down Expand Up @@ -48,14 +48,23 @@ def process(self):
key_names = event.keys()
valid_keys = [key for key in key_names if key in event]
keys = '", "'.join(valid_keys)
values = itemgetter_tuple(*valid_keys)(event)
fvalues = len(values) * '{0}, '.format(self.format_char)
values = self.prepare_values(itemgetter_tuple(*valid_keys)(event))
fvalues = len(values) * f'{self.format_char}, '
query = ('INSERT INTO {table} ("{keys}") VALUES ({values})'
''.format(table=self.table, keys=keys, values=fvalues[:-2]))

if self.execute(query, values, rollback=True):
self.con.commit()
self.acknowledge_message()

def prepare_values(self, values):
if self._engine_name == self.POSTGRESQL:
# escape JSON-encoded NULL characters. JSON escapes them once, but we need to escape them twice,
# so that Postgres does not encounter a NULL char while decoding it
# https://github.com/certtools/intelmq/issues/2203
return [value.replace('\\u0000', '\\\\u0000') if isinstance(value, str) else value for value in values]
else:
return list(values)


BOT = SQLOutputBot
5 changes: 3 additions & 2 deletions intelmq/lib/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def log(name, *args, **kwargs):
return logger
return log

def prepare_bot(self, parameters={}, destination_queues=None):
def prepare_bot(self, parameters={}, destination_queues=None, prepare_source_queue: bool = True):
"""
Reconfigures the bot with the changed attributes.
Expand Down Expand Up @@ -238,7 +238,8 @@ def prepare_bot(self, parameters={}, destination_queues=None):
self.pipe.set_queues(parameters.source_queue, "source")
self.pipe.set_queues(parameters.destination_queues, "destination")

self.prepare_source_queue()
if prepare_source_queue:
self.prepare_source_queue()

def prepare_source_queue(self):
if self.input_message is not None:
Expand Down
26 changes: 25 additions & 1 deletion intelmq/tests/bots/outputs/sql/test_output_postgresql.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# SPDX-FileCopyrightText: 2019 Sebastian Wagner
# SPDX-FileCopyrightText: 2019 Sebastian Wagner, 2022 Intevation GmbH
#
# SPDX-License-Identifier: AGPL-3.0-or-later

# -*- coding: utf-8 -*-
import json
import os
import unittest

Expand All @@ -28,6 +29,10 @@
"extra.asn": 64496,
"extra.ip": "192.0.2.1",
}
INPUT_NULL = {"__type": "Event",
"classification.type": "undetermined",
"extra.payload": '{"text": "M41\u0012)3U>\bxӾ6\u0000\u0013M6M6M4M4]4y]4ӭ4"}',
}


@test.skip_database()
Expand Down Expand Up @@ -77,6 +82,18 @@ def test_extra(self):
from_db = {k: v for k, v in self.cur.fetchone().items() if v is not None}
self.assertEqual(from_db['extra'], {"asn": 64496, "ip": "192.0.2.1"})

def test_extra_nullbyte(self):
"""
Test a Nullbyte in an extra-field
https://github.com/certtools/intelmq/issues/2203
"""
self.input_message = INPUT_NULL
self.run_bot()
self.cur.execute('SELECT "extra" FROM tests WHERE "classification.type" = \'undetermined\'')
self.assertEqual(self.cur.rowcount, 1)
from_db = {k: v for k, v in self.cur.fetchone().items() if v is not None}
self.assertEqual(from_db['extra'], {"payload": '{"text": "M41\u0012)3U>\bxӾ6\\u0000\u0013M6M6M4M4]4y]4ӭ4"}'})

@classmethod
def tearDownClass(cls):
if not os.environ.get('INTELMQ_TEST_DATABASES'):
Expand Down Expand Up @@ -122,6 +139,13 @@ def test_event(self):
from_db = {k: v for k, v in self.cur.fetchone().items() if v is not None}
self.assertDictEqual(from_db, OUTPUT1)

def test_prepare_null(self):
""" Test if a null character in extra is correctly removed. https://github.com/certtools/intelmq/issues/2203 """
values = [json.dumps({"special": "foo\x00bar"})]
self.prepare_bot(prepare_source_queue=False)
output = self.bot.prepare_values(values)
self.assertEqual(output, ['{"special": "foo\\\\u0000bar"}'])

@classmethod
def tearDownClass(cls):
if not os.environ.get('INTELMQ_TEST_DATABASES'):
Expand Down

0 comments on commit bbc3c91

Please sign in to comment.