Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

postgresql output: escape all null bytes #2223

Merged
merged 1 commit into from
Aug 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ CHANGELOG
- `intelmq.bots.outputs.templated_smtp.output`:
- Add new function `from_json()` (which just calls `json.loads()` in the standard Python environment), meaning the Templated SMTP output bot can take strings containing JSON documents and do the formatting itself (PR#2120 by Karl-Johan Karlsson).
- Lift restriction on requirement jinja2 < 3 (PR#2158 by Sebastian Wagner).
- `intelmq.bots.outputs.sql`:
- For PostgreSQL, escape Nullbytes in text to prevent "unsupported Unicode escape sequence" issues (PR#2223 by Sebastian Wagner, fixes #2203).

### Documentation
- Feeds: Add documentation for newly supported dataplane feeds, see above (PR#2102 by Mikk Margus Möll).
Expand Down
11 changes: 9 additions & 2 deletions docs/user/bots.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4026,7 +4026,8 @@ The parameters marked with 'PostgreSQL' will be sent to libpq via psycopg2. Chec
* `fields`: list of fields to read from the event. If None, read all fields
* `reconnect_delay`: number of seconds to wait before reconnecting in case of an error

**PostgreSQL**
PostgreSQL
~~~~~~~~~~

You have two basic choices to run PostgreSQL:

Expand Down Expand Up @@ -4081,7 +4082,13 @@ if the user `intelmq` can authenticate):

psql -h localhost intelmq-events intelmq </tmp/initdb.sql

**SQLite**
**PostgreSQL and null characters**

While null characters (`\0`, not SQL "NULL") in TEXT and JSON/JSONB fields are valid, data containing null characters can cause troubles in some combinations of clients, servers and each settings.
To prevent unhandled errors and data which can't be inserted into the database, all null characters are escaped (`\\u0000`) before insertion.

SQLite
~~~~~~

Similarly to PostgreSQL, you can use `intelmq_psql_initdb` to create initial SQL statements
from `harmonization.conf`. The script will create the required table layout
Expand Down
15 changes: 12 additions & 3 deletions intelmq/bots/outputs/sql/output.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: 2019 Edvard Rejthar
# SPDX-FileCopyrightText: 2019 Edvard Rejthar, 2022 Intevation GmbH
#
# SPDX-License-Identifier: AGPL-3.0-or-later

Expand Down Expand Up @@ -48,14 +48,23 @@ def process(self):
key_names = event.keys()
valid_keys = [key for key in key_names if key in event]
keys = '", "'.join(valid_keys)
values = itemgetter_tuple(*valid_keys)(event)
fvalues = len(values) * '{0}, '.format(self.format_char)
values = self.prepare_values(itemgetter_tuple(*valid_keys)(event))
fvalues = len(values) * f'{self.format_char}, '
query = ('INSERT INTO {table} ("{keys}") VALUES ({values})'
''.format(table=self.table, keys=keys, values=fvalues[:-2]))

if self.execute(query, values, rollback=True):
self.con.commit()
self.acknowledge_message()

def prepare_values(self, values):
if self._engine_name == self.POSTGRESQL:
# escape JSON-encoded NULL characters. JSON escapes them once, but we need to escape them twice,
# so that Postgres does not encounter a NULL char while decoding it
# https://github.com/certtools/intelmq/issues/2203
return [value.replace('\\u0000', '\\\\u0000') if isinstance(value, str) else value for value in values]
else:
return list(values)


BOT = SQLOutputBot
5 changes: 3 additions & 2 deletions intelmq/lib/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def log(name, *args, **kwargs):
return logger
return log

def prepare_bot(self, parameters={}, destination_queues=None):
def prepare_bot(self, parameters={}, destination_queues=None, prepare_source_queue: bool = True):
"""
Reconfigures the bot with the changed attributes.

Expand Down Expand Up @@ -238,7 +238,8 @@ def prepare_bot(self, parameters={}, destination_queues=None):
self.pipe.set_queues(parameters.source_queue, "source")
self.pipe.set_queues(parameters.destination_queues, "destination")

self.prepare_source_queue()
if prepare_source_queue:
self.prepare_source_queue()

def prepare_source_queue(self):
if self.input_message is not None:
Expand Down
26 changes: 25 additions & 1 deletion intelmq/tests/bots/outputs/sql/test_output_postgresql.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# SPDX-FileCopyrightText: 2019 Sebastian Wagner
# SPDX-FileCopyrightText: 2019 Sebastian Wagner, 2022 Intevation GmbH
#
# SPDX-License-Identifier: AGPL-3.0-or-later

# -*- coding: utf-8 -*-
import json
import os
import unittest

Expand All @@ -28,6 +29,10 @@
"extra.asn": 64496,
"extra.ip": "192.0.2.1",
}
INPUT_NULL = {"__type": "Event",
"classification.type": "undetermined",
"extra.payload": '{"text": "M41\u0012)3U>\bxӾ6\u0000\u0013M6M6M4M4]4y]4ӭ4"}',
}


@test.skip_database()
Expand Down Expand Up @@ -77,6 +82,18 @@ def test_extra(self):
from_db = {k: v for k, v in self.cur.fetchone().items() if v is not None}
self.assertEqual(from_db['extra'], {"asn": 64496, "ip": "192.0.2.1"})

def test_extra_nullbyte(self):
"""
Test a Nullbyte in an extra-field
https://github.com/certtools/intelmq/issues/2203
"""
self.input_message = INPUT_NULL
self.run_bot()
self.cur.execute('SELECT "extra" FROM tests WHERE "classification.type" = \'undetermined\'')
self.assertEqual(self.cur.rowcount, 1)
from_db = {k: v for k, v in self.cur.fetchone().items() if v is not None}
self.assertEqual(from_db['extra'], {"payload": '{"text": "M41\u0012)3U>\bxӾ6\\u0000\u0013M6M6M4M4]4y]4ӭ4"}'})

@classmethod
def tearDownClass(cls):
if not os.environ.get('INTELMQ_TEST_DATABASES'):
Expand Down Expand Up @@ -122,6 +139,13 @@ def test_event(self):
from_db = {k: v for k, v in self.cur.fetchone().items() if v is not None}
self.assertDictEqual(from_db, OUTPUT1)

def test_prepare_null(self):
""" Test if a null character in extra is correctly removed. https://github.com/certtools/intelmq/issues/2203 """
values = [json.dumps({"special": "foo\x00bar"})]
self.prepare_bot(prepare_source_queue=False)
output = self.bot.prepare_values(values)
self.assertEqual(output, ['{"special": "foo\\\\u0000bar"}'])

@classmethod
def tearDownClass(cls):
if not os.environ.get('INTELMQ_TEST_DATABASES'):
Expand Down