Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FIX: Updates Turris Greylist parser and feed URL. #2373

Merged
merged 1 commit into from
Jun 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ CHANGELOG
- Removed duplicate mappings from the 'Spam-URL' report. (PR#2348)
- `intelmq.bots.parsers.generic.parser_csv`: Changes `time_format` parameter to use new `TimeFormat` class (PR#2329 by Filip Pokorný).
- `intelmq.bots.parsers.html_table.parser`: Changes `time_format` parameter to use new `TimeFormat` class (PR#2329 by Filip Pokorný).
- `intelmq.bots.parsers.turris.parser.py` Updated to the latest data format (issue #2167). (PR#2373 by Filip Pokorný).

#### Experts
- `intelmq.bots.experts.sieve`:
Expand Down
61 changes: 29 additions & 32 deletions intelmq/bots/parsers/turris/parser.py
Original file line number Diff line number Diff line change
@@ -1,50 +1,47 @@
# SPDX-FileCopyrightText: 2015 robcza
# SPDX-FileCopyrightText: 2023 Filip Pokorný
#
# SPDX-License-Identifier: AGPL-3.0-or-later

# -*- coding: utf-8 -*-
import csv
import io

from intelmq.lib import utils
from intelmq.lib.bot import ParserBot

PORTS = {
"ftp": 21,
"telnet": 23,
"http": 80
# smtp uses both 25 and 587, therefore we can't say for certain
gethvi marked this conversation as resolved.
Show resolved Hide resolved
}


class TurrisGreylistParserBot(ParserBot):
"""Parse the Turris Greylist feed"""

def process(self):
report = self.receive_message()

columns = [
"source.ip",
"source.geolocation.cc",
"event_description.text",
"source.asn"
]

headers = True
raw_report = utils.base64_decode(report.get("raw"))
raw_report = raw_report.translate({0: None})
for row in csv.reader(io.StringIO(raw_report)):
# ignore headers
if headers:
headers = False
continue
parse = ParserBot.parse_csv_dict
recover_line = ParserBot.recover_line_csv_dict
_ignore_lines_starting = ["#"]

def parse_line(self, line, report):

for tag in line.get("Tags").split(","):

event = self.new_event(report)

for key, value in zip(columns, row):
if key == "__IGNORE__":
continue
if tag in ["smtp", "http", "ftp", "telnet"]:
event.add("protocol.transport", "tcp")
event.add("protocol.application", tag)
event.add("classification.type", "brute-force")
event.add("destination.port", PORTS.get(tag))

event.add(key, value)
elif tag == "port_scan":
event.add("classification.type", "scanner")

event.add('classification.type', 'scanner')
event.add("raw", ",".join(row))
else:
# cases such as "haas", "hass_logged" and "hass_not_logged" come from CZ.NIC HaaS Feed (available in IntelMQ)
# it's better to use that feed for this data (it's data from SSH honeypot)
continue

self.send_message(event)
self.acknowledge_message()
event.add("raw", self.recover_line(line))
event.add("source.ip", line.get("Address"))
yield event


BOT = TurrisGreylistParserBot
4 changes: 2 additions & 2 deletions intelmq/etc/feeds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -651,14 +651,14 @@ providers:
collector:
module: intelmq.bots.collectors.http.collector_http
parameters:
http_url: https://www.turris.cz/greylist-data/greylist-latest.csv
http_url: https://view.sentinel.turris.cz/greylist-data/greylist-latest.csv
gethvi marked this conversation as resolved.
Show resolved Hide resolved
rate_limit: 43200
name: __FEED__
provider: __PROVIDER__
parser:
module: intelmq.bots.parsers.turris.parser
parameters:
revision: 2018-01-20
revision: 2023-06-13
documentation: https://project.turris.cz/en/greylist
public: true
Greylist with PGP signature verification:
Expand Down
15 changes: 14 additions & 1 deletion intelmq/lib/upgrades.py
Original file line number Diff line number Diff line change
Expand Up @@ -862,6 +862,19 @@ def v310_feed_changes(configuration, harmonization, dry_run, **kwargs):
return messages + ' Remove affected bots yourself.' if messages else None, configuration, harmonization


def v320_update_turris_greylist_url(configuration, harmonization, dry_run, **kwargs):

messages = []

for bot_id, bot in configuration.items():
if bot.get("module") == "intelmq.bots.collectors.http.collector":
if bot.get("parameters", {}).get("http_url", "").startswith("https://project.turris.cz/greylist-data/greylist-latest.csv"):
bot["parameters"]["http_url"] = "https://view.sentinel.turris.cz/greylist-data/greylist-latest.csv"
messages.append("Turris Greylist feed URL updated.")

return messages if messages else None, configuration, harmonization


UPGRADES = OrderedDict([
((1, 0, 0, 'dev7'), (v100_dev7_modify_syntax,)),
((1, 1, 0), (v110_shadowserver_feednames, v110_deprecations)),
Expand All @@ -887,7 +900,7 @@ def v310_feed_changes(configuration, harmonization, dry_run, **kwargs):
((3, 0, 1), (v301_deprecations,)),
((3, 0, 2), ()),
((3, 1, 0), (v310_feed_changes, v310_shadowserver_feednames)),
((3, 2, 0), ()),
((3, 2, 0), (v320_update_turris_greylist_url)),
Copy link
Member

@sebix sebix Jun 13, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That line misses a trailing comma to make it a tuple:

    ((3, 2, 0), (v320_update_turris_greylist_url, )),
Upgrading to version 3.2.0.
  File "/opt/hostedtoolcache/Python/3.11.4/x64/lib/python3.11/site-packages/intelmq/bin/intelmqsetup.py", line 184, in intelmqsetup_core
    controller.upgrade_conf(state_file=state_file, no_backup=True)
  File "/opt/hostedtoolcache/Python/3.11.4/x64/lib/python3.11/site-packages/intelmq/bin/intelmqctl.py", line 1152, in upgrade_conf
    for function in bunch:
TypeError: 'function' object is not iterable

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fix of the fix incoming in few...

])

ALWAYS = (harmonization,)
3 changes: 0 additions & 3 deletions intelmq/tests/bots/parsers/turris/greylist-latest.csv

This file was deleted.

2 changes: 0 additions & 2 deletions intelmq/tests/bots/parsers/turris/greylist-latest.csv.license

This file was deleted.

147 changes: 123 additions & 24 deletions intelmq/tests/bots/parsers/turris/test_parser.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,121 @@
# SPDX-FileCopyrightText: 2015 Sebastian Wagner
# SPDX-FileCopyrightText: 2023 Filip Pokorný
#
# SPDX-License-Identifier: AGPL-3.0-or-later

# -*- coding: utf-8 -*-
import base64
import os
import unittest

import intelmq.lib.test as test
from intelmq.bots.parsers.turris.parser import TurrisGreylistParserBot
from intelmq.lib import utils

with open(os.path.join(os.path.dirname(__file__), 'greylist-latest.csv'), 'rb') as fh:
RAW = base64.b64encode(fh.read()).decode()

OUTPUT1 = {'__type': 'Event',
'classification.type': 'scanner',
'event_description.text': 'dns',
'source.geolocation.cc': 'AU',
'source.asn': 15169,
'raw': 'MS4xLjEuMixBVSxkbnMsMTUxNjk=',
'source.ip': '1.1.1.2'}
OUTPUT2 = {'__type': 'Event',
'classification.type': 'scanner',
'event_description.text': 'telnet',
'raw': 'MS4yMC45Ni4xNDIsVEgsdGVsbmV0LDU2MTIw',
'source.geolocation.cc': 'TH',
'source.asn': 56120,
'source.ip': '1.20.96.142'}
INPUT = """\
# For the terms of use see https://view.sentinel.turris.cz/greylist-data/LICENSE.txt
Address,Tags
159.203.8.168,http
103.155.105.100,"ftp,http"
117.247.161.208,telnet
103.185.234.2,telnet
152.32.236.101,"ftp,http,port_scan,smtp,telnet"
61.219.175.42,telnet
"""

OUTPUT = [
{
"protocol.transport": "tcp",
"protocol.application": "http",
"classification.type": "brute-force",
"destination.port": 80,
"raw": "QWRkcmVzcyxUYWdzCjE1OS4yMDMuOC4xNjgsaHR0cA==",
"source.ip": "159.203.8.168",
"__type": "Event"
},
{
"protocol.transport": "tcp",
"protocol.application": "ftp",
"classification.type": "brute-force",
"destination.port": 21,
"raw": "QWRkcmVzcyxUYWdzCjEwMy4xNTUuMTA1LjEwMCwiZnRwLGh0dHAi",
"source.ip": "103.155.105.100",
"__type": "Event"
},
{
"protocol.transport": "tcp",
"protocol.application": "http",
"classification.type": "brute-force",
"destination.port": 80,
"raw": "QWRkcmVzcyxUYWdzCjEwMy4xNTUuMTA1LjEwMCwiZnRwLGh0dHAi",
"source.ip": "103.155.105.100",
"__type": "Event"
},
{
"protocol.transport": "tcp",
"protocol.application": "telnet",
"classification.type": "brute-force",
"destination.port": 23,
"raw": "QWRkcmVzcyxUYWdzCjExNy4yNDcuMTYxLjIwOCx0ZWxuZXQ=",
"source.ip": "117.247.161.208",
"__type": "Event"
},
{
"protocol.transport": "tcp",
"protocol.application": "telnet",
"classification.type": "brute-force",
"destination.port": 23,
"raw": "QWRkcmVzcyxUYWdzCjEwMy4xODUuMjM0LjIsdGVsbmV0",
"source.ip": "103.185.234.2",
"__type": "Event"
},
{
"protocol.transport": "tcp",
"protocol.application": "ftp",
"classification.type": "brute-force",
"destination.port": 21,
"raw": "QWRkcmVzcyxUYWdzCjE1Mi4zMi4yMzYuMTAxLCJmdHAsaHR0cCxwb3J0X3NjYW4sc210cCx0ZWxuZXQi",
"source.ip": "152.32.236.101",
"__type": "Event"
},
{
"protocol.transport": "tcp",
"protocol.application": "http",
"classification.type": "brute-force",
"destination.port": 80,
"raw": "QWRkcmVzcyxUYWdzCjE1Mi4zMi4yMzYuMTAxLCJmdHAsaHR0cCxwb3J0X3NjYW4sc210cCx0ZWxuZXQi",
"source.ip": "152.32.236.101",
"__type": "Event"
},
{
"classification.type": "scanner",
"raw": "QWRkcmVzcyxUYWdzCjE1Mi4zMi4yMzYuMTAxLCJmdHAsaHR0cCxwb3J0X3NjYW4sc210cCx0ZWxuZXQi",
"source.ip": "152.32.236.101",
"__type": "Event"
},
{
"protocol.transport": "tcp",
"protocol.application": "smtp",
"classification.type": "brute-force",
"raw": "QWRkcmVzcyxUYWdzCjE1Mi4zMi4yMzYuMTAxLCJmdHAsaHR0cCxwb3J0X3NjYW4sc210cCx0ZWxuZXQi",
"source.ip": "152.32.236.101",
"__type": "Event"
},
{
"protocol.transport": "tcp",
"protocol.application": "telnet",
"classification.type": "brute-force",
"destination.port": 23,
"raw": "QWRkcmVzcyxUYWdzCjE1Mi4zMi4yMzYuMTAxLCJmdHAsaHR0cCxwb3J0X3NjYW4sc210cCx0ZWxuZXQi",
"source.ip": "152.32.236.101",
"__type": "Event"
},
{
"protocol.transport": "tcp",
"protocol.application": "telnet",
"classification.type": "brute-force",
"destination.port": 23,
"raw": "QWRkcmVzcyxUYWdzCjYxLjIxOS4xNzUuNDIsdGVsbmV0",
"source.ip": "61.219.175.42",
"__type": "Event"
}
]


class TestTurrisGreylistParserBot(test.BotTestCase, unittest.TestCase):
Expand All @@ -37,12 +126,22 @@ class TestTurrisGreylistParserBot(test.BotTestCase, unittest.TestCase):
@classmethod
def set_bot(cls):
cls.bot_reference = TurrisGreylistParserBot
cls.default_input_message = {'__type': 'Report', 'raw': RAW}
cls.default_input_message = {'__type': 'Report', 'raw': utils.base64_encode(INPUT)}

def test_event(self):
self.run_bot()
self.assertMessageEqual(0, OUTPUT1)
self.assertMessageEqual(1, OUTPUT2)
self.assertMessageEqual(0, OUTPUT[0])
self.assertMessageEqual(1, OUTPUT[1])
self.assertMessageEqual(2, OUTPUT[2])
self.assertMessageEqual(3, OUTPUT[3])
self.assertMessageEqual(4, OUTPUT[4])
self.assertMessageEqual(5, OUTPUT[5])
self.assertMessageEqual(6, OUTPUT[6])
self.assertMessageEqual(7, OUTPUT[7])
self.assertMessageEqual(8, OUTPUT[8])
self.assertMessageEqual(9, OUTPUT[9])
self.assertMessageEqual(10, OUTPUT[10])


if __name__ == '__main__': # pragma: no cover
unittest.main()