From 2407730cccae426af1bd27bed6c163d585cdd5ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Lima?= Date: Wed, 2 Sep 2015 14:05:47 +0100 Subject: [PATCH 01/51] Update FAQ.md --- docs/FAQ.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/FAQ.md b/docs/FAQ.md index b41bed4de..9975eee60 100644 --- a/docs/FAQ.md +++ b/docs/FAQ.md @@ -50,4 +50,8 @@ Good Example: 2014-06-25 00:00:00 UTC ``` -Consult this example how ShadowServer SNMP Bot solve the problem. \ No newline at end of file +Consult this example how ShadowServer SNMP Bot solve the problem. + +## Git information + +https://docs.scipy.org/doc/numpy/dev/gitwash/development_workflow.html From 75f8a3efd803d686a0e9d4f61e257a4c37368a81 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Wed, 2 Sep 2015 09:53:56 +0200 Subject: [PATCH 02/51] BUG: Message None handling by experts+ output bots closes #294 Signed-off-by: Sebastian Wagner --- intelmq/bots/experts/deduplicator/expert.py | 5 +++++ intelmq/bots/experts/filter/expert.py | 4 ++++ intelmq/bots/experts/maxmind_geoip/expert.py | 4 ++++ intelmq/bots/outputs/file/output.py | 4 ++++ intelmq/bots/outputs/mongodb/output.py | 5 +++++ intelmq/bots/outputs/postgresql/output.py | 1 + intelmq/bots/outputs/restapi/output.py | 5 +++++ intelmq/bots/outputs/tcp/output.py | 5 +++++ 8 files changed, 33 insertions(+) diff --git a/intelmq/bots/experts/deduplicator/expert.py b/intelmq/bots/experts/deduplicator/expert.py index f4cb7bf27..f3e76c8b7 100644 --- a/intelmq/bots/experts/deduplicator/expert.py +++ b/intelmq/bots/experts/deduplicator/expert.py @@ -18,6 +18,11 @@ def init(self): def process(self): message = self.receive_message() + + if message is None: + self.acknowledge_message() + return + auxiliar_message = copy.copy(message) ignore_keys = self.parameters.ignore_keys.split(',') diff --git a/intelmq/bots/experts/filter/expert.py b/intelmq/bots/experts/filter/expert.py index e08c0795b..36fab4895 100644 --- a/intelmq/bots/experts/filter/expert.py +++ b/intelmq/bots/experts/filter/expert.py @@ -22,6 +22,10 @@ def init(self): def process(self): event = self.receive_message() + if event is None: + self.acknowledge_message() + return + if self.parameters.filter_action == "drop": if (event.contains(self.parameters.filter_key) and event.value(self.parameters.filter_key) == diff --git a/intelmq/bots/experts/maxmind_geoip/expert.py b/intelmq/bots/experts/maxmind_geoip/expert.py index 0cbdecfcb..d16c7089e 100644 --- a/intelmq/bots/experts/maxmind_geoip/expert.py +++ b/intelmq/bots/experts/maxmind_geoip/expert.py @@ -21,6 +21,10 @@ def init(self): def process(self): event = self.receive_message() + if event is None: + self.acknowledge_message() + return + for key in ["source.%s", "destination.%s"]: geo_key = key % "geolocation.%s" diff --git a/intelmq/bots/outputs/file/output.py b/intelmq/bots/outputs/file/output.py index b2b3877bc..4d19ae1d2 100644 --- a/intelmq/bots/outputs/file/output.py +++ b/intelmq/bots/outputs/file/output.py @@ -15,6 +15,10 @@ def init(self): def process(self): event = self.receive_message() + if event is None: + self.acknowledge_message() + return + if event: event_data = event.to_json() self.file.write(event_data) diff --git a/intelmq/bots/outputs/mongodb/output.py b/intelmq/bots/outputs/mongodb/output.py index 791c0e33b..e1fd6e844 100644 --- a/intelmq/bots/outputs/mongodb/output.py +++ b/intelmq/bots/outputs/mongodb/output.py @@ -16,6 +16,11 @@ def init(self): def process(self): event = self.receive_message() + + if event is None: + self.acknowledge_message() + return + self.collection.insert(event.to_dict()) self.acknowledge_message() diff --git a/intelmq/bots/outputs/postgresql/output.py b/intelmq/bots/outputs/postgresql/output.py index d49ea8871..da3f8f555 100644 --- a/intelmq/bots/outputs/postgresql/output.py +++ b/intelmq/bots/outputs/postgresql/output.py @@ -37,6 +37,7 @@ def init(self): def process(self): event = self.receive_message() + if not event: self.acknowledge_message() return diff --git a/intelmq/bots/outputs/restapi/output.py b/intelmq/bots/outputs/restapi/output.py index 2735113b9..4545c8c5a 100644 --- a/intelmq/bots/outputs/restapi/output.py +++ b/intelmq/bots/outputs/restapi/output.py @@ -17,6 +17,11 @@ def init(self): def process(self): event = self.receive_message() + + if event is None: + self.acknowledge_message() + return + try: r = self.session.post(self.parameters.host, event.to_json()) r.raise_for_status() diff --git a/intelmq/bots/outputs/tcp/output.py b/intelmq/bots/outputs/tcp/output.py index 4bfc4cb72..cdf889c41 100644 --- a/intelmq/bots/outputs/tcp/output.py +++ b/intelmq/bots/outputs/tcp/output.py @@ -12,6 +12,11 @@ class TCPBot(Bot): def process(self): event = self.receive_message() + + if event is None: + self.acknowledge_message() + return + data = event.to_json() self.send_data(data) self.acknowledge_message() From cb4948bab7735fa81a7f76c1982a38cd35228403 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Wed, 2 Sep 2015 10:21:33 +0200 Subject: [PATCH 03/51] BUG: Fix cymru whois encoding issue fixes #307 Signed-off-by: Sebastian Wagner --- intelmq/bots/experts/cymru_whois/lib.py | 2 +- .../bots/experts/cymru_whois/test_expert.py | 20 +++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/intelmq/bots/experts/cymru_whois/lib.py b/intelmq/bots/experts/cymru_whois/lib.py index 3730d8f6f..3b14c2c81 100755 --- a/intelmq/bots/experts/cymru_whois/lib.py +++ b/intelmq/bots/experts/cymru_whois/lib.py @@ -21,7 +21,7 @@ def query(ip): result = Cymru.__ip_query_parse(raw_result) if "asn" in result: - raw_result = Cymru.__asn_query(result['asn']) + raw_result = Cymru.__asn_query(result['asn']).decode('utf-8') extra_info = Cymru.__asn_query_parse(raw_result) result.update(extra_info) diff --git a/intelmq/tests/bots/experts/cymru_whois/test_expert.py b/intelmq/tests/bots/experts/cymru_whois/test_expert.py index 773cf3848..d19cbd559 100644 --- a/intelmq/tests/bots/experts/cymru_whois/test_expert.py +++ b/intelmq/tests/bots/experts/cymru_whois/test_expert.py @@ -35,6 +35,21 @@ "destination.asn": 16876, "destination.network": "2001:500:88::/48", } +UNICODE_INPUT = {"__type": "Event", + "destination.ip": "177.81.215.80", # some brazil IP + "time.observation": "2015-01-01T00:00:00+00:00", + } +UNICODE_OUTPUT = {"__type": "Event", + "destination.ip": "177.81.215.80", # some brazil IP + "time.observation": "2015-01-01T00:00:00+00:00", + "destination.registry": "lacnic", + "destination.allocated": "2011-08-30T00:00:00+00:00", + "destination.as_name": "NET Servi\xe7os de Comunica\xe7\xe3o" + " S.A.,BR", + "destination.geolocation.cc": "BR", + "destination.asn": 28573, + "destination.network": "177.81.0.0/16", + } class TestCymruExpertBot(test.BotTestCase, unittest.TestCase): @@ -57,6 +72,11 @@ def test_ipv6_lookup(self): self.run_bot() self.assertMessageEqual(0, EXAMPLE_OUTPUT6) + def test_unicode_as_name(self): + self.input_message = json.dumps(UNICODE_INPUT) + self.run_bot() + self.assertMessageEqual(0, UNICODE_OUTPUT) + if __name__ == '__main__': unittest.main() From abdf1a4513e8af1833aa7fbebdb34ef13c4d70a3 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Wed, 2 Sep 2015 10:24:09 +0200 Subject: [PATCH 04/51] BUG TST: Fix Spamhaus tests, renamed class names class names of spamhaus parsers have been renamed, but not updated in tests: dde699831e72b7f39b5f80fe4c3d67ca75740f4b 2bc87e377052a5325dcc2c7e0bf9d9935b89b22c 22fd373b524a7ed75c2d5e108eab996265dd185e by @SYNchroACK This commit corrects the class names in the tests, succeeding again Signed-off-by: Sebastian Wagner --- intelmq/tests/bots/parsers/spamhaus/test_parser_cert.py | 8 ++++---- intelmq/tests/bots/parsers/spamhaus/test_parser_drop.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/intelmq/tests/bots/parsers/spamhaus/test_parser_cert.py b/intelmq/tests/bots/parsers/spamhaus/test_parser_cert.py index 91aef457b..d69d55296 100644 --- a/intelmq/tests/bots/parsers/spamhaus/test_parser_cert.py +++ b/intelmq/tests/bots/parsers/spamhaus/test_parser_cert.py @@ -5,7 +5,7 @@ import unittest import intelmq.lib.test as test -from intelmq.bots.parsers.spamhaus.parser_cert import SpamHausCertParserBot +from intelmq.bots.parsers.spamhaus.parser_cert import SpamhausCERTParserBot EXAMPLE_REPORT = {"feed.url": "https://portal.spamhaus.org/cert/api.php?cert=" @@ -65,14 +65,14 @@ }] -class TestSpamHausCertParserBot(test.BotTestCase, unittest.TestCase): +class TestSpamhausCERTParserBot(test.BotTestCase, unittest.TestCase): """ - A TestCase for SpamHausCertParserBot. + A TestCase for SpamhausCERTParserBot. """ @classmethod def set_bot(cls): - cls.bot_reference = SpamHausCertParserBot + cls.bot_reference = SpamhausCERTParserBot cls.default_input_message = json.dumps(EXAMPLE_REPORT) def test_events(self): diff --git a/intelmq/tests/bots/parsers/spamhaus/test_parser_drop.py b/intelmq/tests/bots/parsers/spamhaus/test_parser_drop.py index d150ad463..78cdbe5e8 100644 --- a/intelmq/tests/bots/parsers/spamhaus/test_parser_drop.py +++ b/intelmq/tests/bots/parsers/spamhaus/test_parser_drop.py @@ -5,17 +5,17 @@ import unittest import intelmq.lib.test as test -from intelmq.bots.parsers.spamhaus.parser_drop import SpamHausDropParserBot +from intelmq.bots.parsers.spamhaus.parser_drop import SpamhausDropParserBot -class TestSpamHausDropParserBot(test.BotTestCase, unittest.TestCase): +class TestSpamhausDropParserBot(test.BotTestCase, unittest.TestCase): """ - A TestCase for SpamHausDropParserBot. + A TestCase for SpamhausDropParserBot. """ @classmethod def set_bot(self): - self.bot_reference = SpamHausDropParserBot + self.bot_reference = SpamhausDropParserBot self.default_input_message = json.dumps({'__type': 'Report'}) if __name__ == '__main__': From 7bc52de91031bf4e9b488e372af398c68b9cdd2a Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Wed, 2 Sep 2015 10:29:21 +0200 Subject: [PATCH 05/51] DOC: Update changelog for 1.0 (master) fix faq link in User-Guide Signed-off-by: Sebastian Wagner --- CHANGELOG.md | 50 +++++++++++++++++++++++++++++++++++++--------- docs/User-Guide.md | 2 +- 2 files changed, 42 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 22ae58a58..d88005757 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,17 +1,49 @@ CHANGELOG ========== -v1.0 (in developement) -- renamed bots.parsers.spamhaus.parser to bots.parsers.spamhaus.parser_drop -- added bots.parsers.spamhaus.parser_cert -- added bots.parsers.fraunhofer.parser_dga -- added bots.experts.certat_contact.expert +v1.0 (in development, master branch) +---- + +### Bot changes +- ENH: added bots.parsers.spamhaus.parser_cert +- ENH: added bots.parsers.fraunhofer.parser_dga +- ENH: added bots.experts.certat_contact.expert +- MAINT: renamed bots.parsers.spamhaus.parser to bots.parsers.spamhaus.parser_drop + +### Bug fixes +- FIX: all bots handle message which are None +- FIX: various encoding issues resolved in core and bots +- FIX: time.observation is generated in collectors, not in parsers + +### Other enhancements and changes +- TST: testing framework for core and tests. Newly introduced components should always come with proper unit tests. +- ENH: intelmqctl has shortcut parameters and can clear queues +- STY: code obeys PEP8, new code should always be properly formatted +- ENH: More code is Python 3 compatible +- DOC: Updated user and dev guide + +### Harmonization +- ENH: Additional data types: integer, float and Boolean +- ENH: Added descriptions and matching types to all fields +- DOC: harmonization documentation has same fields as configuration + +#### Most important changes: +- `(source|destination).bgp_prefix` is now `(source|destination).network` +- `(source|destination).cc` is now `(source|destination).geolocation.cc` +- `(source|destination).reverse_domain_name` is `(source|destination).reverse_dns` +- `misp_id` changed to `misp_uuid` +- `protocol.transport` added +- `webshot_url` removed + +----- + + ## 2015/06/03 (aaron) * fixed the license to AGPL in setup.py * moved back the docs/* files from the wiki repo to docs/. See #205. - * added python-zmq as a setup requirment in UserGuide . See #206 + * added python-zmq as a setup requirement in UserGuide . See #206 @@ -38,7 +70,7 @@ v1.0 (in developement) FILE: conf/harmonization.conf - in harmonization.conf is possible to define the fields of a specific message in json format. - - the harmonization.py has datatypes witch contains sanitize and validation methods that will make sure that the values are correct to be part of an event. + - the harmonization.py has data types witch contains sanitize and validation methods that will make sure that the values are correct to be part of an event. @@ -60,8 +92,8 @@ v1.0 (in developement) -* Defaults configrations - - new configuration file to specify the default parameters which will be apllied to all bots. Bots can overwrite the configurations. +* Defaults configurations + - new configuration file to specify the default parameters which will be applied to all bots. Bots can overwrite the configurations. diff --git a/docs/User-Guide.md b/docs/User-Guide.md index 7faf57b55..e7e13fd18 100644 --- a/docs/User-Guide.md +++ b/docs/User-Guide.md @@ -220,5 +220,5 @@ pip uninstall intelmq # Frequently Asked Questions -Consult the [FAQ](FAQ) +Consult the [FAQ.md](FAQ) if you encountered any problem. From 5e1960cdd31c99e4867128edca8569af4ad53025 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Wed, 2 Sep 2015 12:21:36 +0200 Subject: [PATCH 06/51] BUG: coll/http Fix exception handling Signed-off-by: Sebastian Wagner --- intelmq/bots/collectors/http/collector_http.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/intelmq/bots/collectors/http/collector_http.py b/intelmq/bots/collectors/http/collector_http.py index 653d246e5..7097fe018 100644 --- a/intelmq/bots/collectors/http/collector_http.py +++ b/intelmq/bots/collectors/http/collector_http.py @@ -19,7 +19,6 @@ from intelmq.lib.bot import Bot from intelmq.lib.harmonization import DateTime from intelmq.lib.message import Report -from intelmq.lib.exceptions import ConfigurationError class HTTPCollectorBot(Bot): @@ -51,8 +50,8 @@ def process(self): verify=self.verify_cert) if resp.status_code // 100 != 2: - raise ConfigurationError('HTTP response status code was {}.' - ''.format(resp.status_code)) + raise ValueError('HTTP response status code was {}.' + ''.format(resp.status_code)) self.logger.info("Report downloaded.") From b30e2b2238ccd25a0190b90140ed51d9c2225f6b Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Wed, 2 Sep 2015 16:52:52 +0200 Subject: [PATCH 07/51] BUG: Python 3 and encoding issues fixes #175 closes #178 closes #191 Signed-off-by: Sebastian Wagner --- intelmq/bots/outputs/tcp/output.py | 8 +-- intelmq/bots/parsers/autoshun/parser.py | 7 ++- intelmq/bots/parsers/blocklistde/parser.py | 7 ++- .../bots/parsers/malwaredomainlist/parser.py | 2 +- intelmq/bots/parsers/phishtank/parser.py | 4 +- intelmq/bots/parsers/turris/parser.py | 2 +- intelmq/bots/parsers/vxvault/parser.py | 7 ++- intelmq/lib/exceptions.py | 5 +- intelmq/lib/harmonization.py | 7 ++- intelmq/lib/message.py | 2 +- intelmq/lib/test.py | 20 ++++--- intelmq/lib/utils.py | 13 ++++- intelmq/tests/lib/test_harmonization.py | 2 - intelmq/tests/lib/test_message.py | 55 ++++++++++--------- intelmq/tests/lib/test_pipeline.py | 3 +- intelmq/tests/lib/test_utils.py | 8 +-- setup.py | 6 +- 17 files changed, 96 insertions(+), 62 deletions(-) diff --git a/intelmq/bots/outputs/tcp/output.py b/intelmq/bots/outputs/tcp/output.py index cdf889c41..ad54623fd 100644 --- a/intelmq/bots/outputs/tcp/output.py +++ b/intelmq/bots/outputs/tcp/output.py @@ -29,8 +29,8 @@ def connect(self): try: self.con.connect(address) break - except socket.error, e: - self.logger.error(e.args[1] + ". Retrying in 10 seconds.") + except socket.error as exc: + self.logger.error(exc.args[1] + ". Retrying in 10 seconds.") time.sleep(10) self.logger.info("Connected successfully to {!s}: {}" @@ -42,8 +42,8 @@ def send_data(self, data): self.con.send(utils.encode(data)) self.con.sendall("") break - except socket.error, e: - self.logger.error(e.args[1] + ". Reconnecting..") + except socket.error as exc: + self.logger.error(exc.args[1] + ". Reconnecting..") self.con.close() self.connect() except AttributeError: diff --git a/intelmq/bots/parsers/autoshun/parser.py b/intelmq/bots/parsers/autoshun/parser.py index 1fc664a30..582dee5e6 100644 --- a/intelmq/bots/parsers/autoshun/parser.py +++ b/intelmq/bots/parsers/autoshun/parser.py @@ -1,8 +1,11 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals import sys +try: + from HTMLParser import HTMLParser +except ImportError: + from html.parser import HTMLParser -import HTMLParser from intelmq.lib import utils from intelmq.lib.bot import Bot from intelmq.lib.harmonization import ClassificationType @@ -29,7 +32,7 @@ def process(self): raw_report = utils.base64_decode(report.value("raw")) raw_report_splitted = raw_report.split("")[2:] - parser = HTMLParser.HTMLParser() + parser = HTMLParser() for row in raw_report_splitted: event = Event() diff --git a/intelmq/bots/parsers/blocklistde/parser.py b/intelmq/bots/parsers/blocklistde/parser.py index a6012dc53..42c42d11a 100644 --- a/intelmq/bots/parsers/blocklistde/parser.py +++ b/intelmq/bots/parsers/blocklistde/parser.py @@ -2,7 +2,10 @@ from __future__ import unicode_literals import posixpath import sys -import urlparse +try: + from urlparse import urlparse +except ImportError: + from urllib.parse import urlparse from intelmq.lib import utils from intelmq.lib.bot import Bot @@ -82,7 +85,7 @@ def process(self): raw_report = raw_report.strip() url = report.value('feed.url') - path = urlparse.urlparse(url).path + path = urlparse(url).path filename = posixpath.basename(path) classification_type = 'blacklist' diff --git a/intelmq/bots/parsers/malwaredomainlist/parser.py b/intelmq/bots/parsers/malwaredomainlist/parser.py index 3150007d2..3d030cb3b 100644 --- a/intelmq/bots/parsers/malwaredomainlist/parser.py +++ b/intelmq/bots/parsers/malwaredomainlist/parser.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals import sys -from cStringIO import StringIO +from io import StringIO import unicodecsv diff --git a/intelmq/bots/parsers/phishtank/parser.py b/intelmq/bots/parsers/phishtank/parser.py index d85380339..bc7ef5630 100644 --- a/intelmq/bots/parsers/phishtank/parser.py +++ b/intelmq/bots/parsers/phishtank/parser.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals import sys -from cStringIO import StringIO +from io import StringIO import unicodecsv @@ -29,7 +29,7 @@ def process(self): "__IGNORE__", "__IGNORE__", "event_description.target" - ] + ] for row in unicodecsv.reader(StringIO(raw_report), encoding='utf-8'): diff --git a/intelmq/bots/parsers/turris/parser.py b/intelmq/bots/parsers/turris/parser.py index 675705832..dc3b7247e 100644 --- a/intelmq/bots/parsers/turris/parser.py +++ b/intelmq/bots/parsers/turris/parser.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals import sys -from cStringIO import StringIO +from io import StringIO import unicodecsv diff --git a/intelmq/bots/parsers/vxvault/parser.py b/intelmq/bots/parsers/vxvault/parser.py index 9bc6373d3..02b260e5d 100644 --- a/intelmq/bots/parsers/vxvault/parser.py +++ b/intelmq/bots/parsers/vxvault/parser.py @@ -1,7 +1,10 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals import sys -import urlparse +try: + from urlparse import urlparse +except ImportError: + from urllib.parse import urlparse from intelmq.lib import utils from intelmq.lib.bot import Bot @@ -28,7 +31,7 @@ def process(self): if len(row) == 0 or not row.startswith('http'): continue - url_object = urlparse.urlparse(row) + url_object = urlparse(row) if not url_object: continue diff --git a/intelmq/lib/exceptions.py b/intelmq/lib/exceptions.py index e5eec4e8d..cf4a1b37f 100644 --- a/intelmq/lib/exceptions.py +++ b/intelmq/lib/exceptions.py @@ -35,7 +35,10 @@ def __init__(self, argument, got=None, expected=None, docs=None): class PipelineError(IntelMQException): def __init__(self, argument): - message = "pipeline failed - %s" % traceback.format_exc(argument) + if type(argument) is type and issubclass(argument, Exception): + message = "pipeline failed - %s" % traceback.format_exc(argument) + else: + message = "pipeline failed - %s" % repr(argument) super(PipelineError, self).__init__(message) diff --git a/intelmq/lib/harmonization.py b/intelmq/lib/harmonization.py index 12c41d364..f9b199b21 100644 --- a/intelmq/lib/harmonization.py +++ b/intelmq/lib/harmonization.py @@ -207,7 +207,7 @@ def __parse(value): value = value.isoformat() except ValueError: return None - return value.decode("utf-8") + return utils.decode(value) @staticmethod def from_timestamp(tstamp, tzone='UTC'): @@ -224,7 +224,8 @@ def generate_datetime_now(): value = datetime.datetime.now(pytz.timezone('UTC')) value = value.replace(microsecond=0) value = value.isoformat() - return value.decode("utf-8") + # Is byte string in 2 and unicode string in 3, make unicode string + return utils.decode(value) class Float(GenericType): @@ -441,7 +442,7 @@ def is_valid(value, sanitize=False): if not GenericType().is_valid(value): return False - if type(value) is not unicode: + if type(value) is not six.text_type: return False if len(value) == 0: diff --git a/intelmq/lib/message.py b/intelmq/lib/message.py index b2856cee6..6f57478ea 100644 --- a/intelmq/lib/message.py +++ b/intelmq/lib/message.py @@ -211,7 +211,7 @@ def to_dict(self): def to_json(self): json_dict = self.to_dict() - return utils.encode(json.dumps(json_dict, ensure_ascii=False)) + return utils.decode(json.dumps(json_dict, ensure_ascii=False)) class Report(Message): diff --git a/intelmq/lib/test.py b/intelmq/lib/test.py index 1ad2e72b4..40131f52a 100644 --- a/intelmq/lib/test.py +++ b/intelmq/lib/test.py @@ -209,19 +209,19 @@ def test_log_end_dot(self): def test_log_not_error(self): """ Test if bot does not log errors. """ self.run_bot() - self.assertNotRegexpMatches(self.loglines_buffer, "ERROR") + self.assertNotRegexpMatchesLog("ERROR") def test_log_not_critical(self): """ Test if bot does not log critical errors. """ self.run_bot() - self.assertNotRegexpMatches(self.loglines_buffer, "CRITICAL") + self.assertNotRegexpMatchesLog("CRITICAL") def test_pipe_names(self): """ Test if all pipes are created with correct names. """ self.run_bot() pipenames = ["{}-input", "{}-input-internal", "{}-output"] - self.assertListEqual([x.format(self.bot_id) for x in pipenames], - list(self.pipe.state.keys())) + self.assertSetEqual({x.format(self.bot_id) for x in pipenames}, + set(self.pipe.state.keys())) def test_empty_message(self): """ @@ -236,7 +236,7 @@ def test_empty_message(self): self.input_message = '' self.run_bot() self.assertRegexpMatchesLog("WARNING - Empty message received.") - self.assertNotRegexpMatches(self.loglines_buffer, "ERROR") + self.assertNotRegexpMatchesLog("ERROR") def test_bot_name(self): """ @@ -309,13 +309,19 @@ def assertRegexpMatchesLog(self, pattern): """Asserts that pattern matches against log. """ self.assertIsNotNone(self.loglines_buffer) - self.assertRegexpMatches(self.loglines_buffer, pattern) + try: + self.assertRegexpMatches(self.loglines_buffer, pattern) + except AttributeError: + self.assertRegex(self.loglines_buffer, pattern) def assertNotRegexpMatchesLog(self, pattern): """Asserts that pattern doesn't match against log.""" self.assertIsNotNone(self.loglines_buffer) - self.assertNotRegexpMatches(self.loglines_buffer, pattern) + try: + self.assertNotRegexpMatches(self.loglines_buffer, pattern) + except AttributeError: + self.assertNotRegex(self.loglines_buffer, pattern) def assertMessageEqual(self, queue_pos, expected_message): """ diff --git a/intelmq/lib/utils.py b/intelmq/lib/utils.py index c465364f7..4dbacbef7 100644 --- a/intelmq/lib/utils.py +++ b/intelmq/lib/utils.py @@ -18,6 +18,7 @@ import logging import os import re +import six from intelmq import DEFAULT_LOGGING_PATH @@ -48,20 +49,26 @@ def decode(text, encodings=("utf-8", ), force=False): list/tuple of encodings to use, default ('utf-8') force : boolean Ignore invalid characters, default: False + + Returns + ------- + text : unicode string + unicode string is always returned, even when encoding is ascii + (Python 3 compat) """ - if type(text) is unicode: + if type(text) is six.text_type: return text for encoding in encodings: try: - return text.decode(encoding) + return six.text_type(text.decode(encoding)) except ValueError: pass if force: for encoding in encodings: try: - return text.decode(encoding, 'ignore') + return six.text_type(text.decode(encoding, 'ignore')) except ValueError: pass diff --git a/intelmq/tests/lib/test_harmonization.py b/intelmq/tests/lib/test_harmonization.py index 011eb318b..9877c96ee 100644 --- a/intelmq/tests/lib/test_harmonization.py +++ b/intelmq/tests/lib/test_harmonization.py @@ -49,7 +49,6 @@ def test_integer_valid_int(self): def test_integer_valid_other(self): """ Test Integer.is_valid with invalid values. """ - self.assertFalse(harmonization.Integer.is_valid(-4532L)) self.assertFalse(harmonization.Integer.is_valid('1337')) self.assertFalse(harmonization.Integer.is_valid(True)) @@ -79,7 +78,6 @@ def test_float_valid_flaot(self): def test_float_valid_other(self): """ Test Float.is_valid with invalid values. """ - self.assertFalse(harmonization.Float.is_valid(-4532L)) self.assertFalse(harmonization.Float.is_valid('1337.234')) self.assertFalse(harmonization.Float.is_valid(True)) diff --git a/intelmq/tests/lib/test_message.py b/intelmq/tests/lib/test_message.py index c4a412ab4..6adcf26c0 100644 --- a/intelmq/tests/lib/test_message.py +++ b/intelmq/tests/lib/test_message.py @@ -8,6 +8,7 @@ """ from __future__ import unicode_literals +import json import six import unittest @@ -204,7 +205,8 @@ def test_report_items(self): def test_report_add_byte(self): """ Test if report rejects a byte string. """ report = message.MessageFactory.unserialize('{"__type": "Report"}') - with self.assertRaises(exceptions.InvalidValue): + with self.assertRaises((exceptions.InvalidValue, + TypeError)): report.add('raw', bytes(LOREM_BASE64)) def test_report_sanitize_url(self): @@ -250,12 +252,13 @@ def test_factory_serialize(self): report.add('feed.name', 'Example') report.add('feed.url', URL_SANE) report.add('raw', LOREM_BASE64) - self.assertEqual('{"raw": "bG9yZW0gaXBzdW0=", "__type": "Report",' - ' "feed.url": "https://example.com/", "feed.name":' - ' "Example"}', - message.MessageFactory.serialize(report)) + actual = message.MessageFactory.serialize(report) + expected = ('{"raw": "bG9yZW0gaXBzdW0=", "__type": "Report", "feed.url' + '": "https://example.com/", "feed.name": "Example"}') + self.assertDictEqual(json.loads(expected), + json.loads(actual)) - def test_report_unicode(self): # TODO: Python 3 + def test_report_unicode(self): """ Test Message __unicode__ function, pointing to serialize. """ report = message.MessageFactory.unserialize('{"__type": "Report"}') report = self.add_report_examples(report) @@ -263,18 +266,18 @@ def test_report_unicode(self): # TODO: Python 3 six.text_type(report)) def test_deep_copy_content(self): - """ Test if depp_copy does not return the same object. """ + """ Test if deep_copy does not return the same object. """ report = message.MessageFactory.unserialize('{"__type": "Report"}') report = self.add_report_examples(report) - self.assertListEqual(list(report.deep_copy().items()), - list(report.items())) + self.assertSetEqual(set(report.deep_copy().items()), + set(report.items())) def test_deep_copy_items(self): # TODO: Sort by key - """ Test if depp_copy does not return the same object. """ + """ Test if deep_copy does not return the same object. """ report = message.MessageFactory.unserialize('{"__type": "Report"}') report = self.add_report_examples(report) - self.assertNotEqual(list(map(id, report.deep_copy())), - list(map(id, report))) + self.assertNotEqual(set(map(id, report.deep_copy())), + set(map(id, report))) def test_deep_copy_object(self): """ Test if depp_copy does not return the same object. """ @@ -283,21 +286,21 @@ def test_deep_copy_object(self): self.assertIsNot(report.deep_copy(), report) def test_copy_content(self): - """ Test if depp_copy does not return the same object. """ + """ Test if deep_copy does not return the same object. """ report = message.MessageFactory.unserialize('{"__type": "Report"}') report = self.add_report_examples(report) - self.assertListEqual(list(report.copy().items()), - list(report.items())) + self.assertSetEqual(set(report.copy().items()), + set(report.items())) - def test_copy_items(self): # TODO: Sort by key - """ Test if depp_copy does not return the same object. """ + def test_copy_items(self): + """ Test if deep_copy does not return the same object. """ report = message.MessageFactory.unserialize('{"__type": "Report"}') report = self.add_report_examples(report) - self.assertListEqual(list(map(id, report.copy())), - list(map(id, report))) + self.assertEqual(set(map(id, report.copy())), + set(map(id, report))) def test_copy_object(self): - """ Test if depp_copy does not return the same object. """ + """ Test if deep_copy does not return the same object. """ report = message.MessageFactory.unserialize('{"__type": "Report"}') report = self.add_report_examples(report) self.assertIsNot(report.copy(), report) @@ -323,10 +326,12 @@ def test_event_json(self): """ Test Event to_json. """ event = message.MessageFactory.unserialize('{"__type": "Event"}') event = self.add_event_examples(event) - self.assertEqual('{"feed": {"url": "https://example.com/", "name": ' - '"Example"}, "raw": "bG9yZW0gaXBzdW0=", "time": ' - '{"observation": "2015-01-01T13:37:00+00:00"}}', - event.to_json()) + actual = event.to_json() + self.assertIsInstance(actual, six.text_type) + expected = ('{"feed": {"url": "https://example.com/", "name": ' + '"Example"}, "raw": "bG9yZW0gaXBzdW0=", "time": ' + '{"observation": "2015-01-01T13:37:00+00:00"}}') + self.assertDictEqual(json.loads(expected), json.loads(actual)) def test_event_serialize(self): """ Test Event serialize. """ @@ -337,7 +342,7 @@ def test_event_serialize(self): def test_event_string(self): """ Test Event serialize. """ event = message.MessageFactory.unserialize('{"__type": "Event"}') - self.assertEqual(b'{"__type": "Event"}', + self.assertEqual('{"__type": "Event"}', event.serialize()) def test_event_unicode(self): diff --git a/intelmq/tests/lib/test_pipeline.py b/intelmq/tests/lib/test_pipeline.py index 9b1ad7bb7..a91da4dd0 100644 --- a/intelmq/tests/lib/test_pipeline.py +++ b/intelmq/tests/lib/test_pipeline.py @@ -69,9 +69,10 @@ def clear(self): self.pipe.clear_queue(self.pipe.source_queue) def test_send_receive(self): + """ Sending bytest and receiving unicode. """ self.clear() self.pipe.send(SAMPLES['normal'][0]) - self.assertEqual(SAMPLES['normal'][0], self.pipe.receive()) + self.assertEqual(SAMPLES['normal'][1], self.pipe.receive()) def test_send_receive_unicode(self): self.clear() diff --git a/intelmq/tests/lib/test_utils.py b/intelmq/tests/lib/test_utils.py index b8538a008..28a24f2d6 100644 --- a/intelmq/tests/lib/test_utils.py +++ b/intelmq/tests/lib/test_utils.py @@ -52,21 +52,21 @@ def test_encode_unicode(self): def test_decode_ascii(self): """ Test ASCII decoding enforcement. """ - self.assertEqual(b'fobar', + self.assertEqual('fobar', utils.decode(b'fo\xe4bar', encodings=('ascii', ), force=True)) def test_decode_unicode(self): """ Test decoding with unicode string. """ - self.assertEqual(u'foobar', utils.decode(u'foobar')) + self.assertEqual('foobar', utils.decode(u'foobar')) def test_encode_bytes(self): """ Test encoding with bytes string. """ - self.assertEqual(b'foobar', utils.decode(b'foobar')) + self.assertEqual('foobar', utils.decode(b'foobar')) def test_encode_force(self): """ Test ASCII encoding enforcement. """ - self.assertEqual('fobar', + self.assertEqual(b'fobar', utils.encode(u'fo\xe4bar', encodings=('ascii', ), force=True)) diff --git a/setup.py b/setup.py index cde9df222..bd18ad831 100644 --- a/setup.py +++ b/setup.py @@ -4,12 +4,16 @@ from setuptools import find_packages, setup + +if sys.version[0] == '2': + input = raw_input + if os.path.isdir("/opt/intelmq"): print() print("IntelMQ seems to be already installed due the existence of " "/opt/intelmq directory. If you continue the directory will be" " overwritten.") - answer = raw_input("Do you want to proceed? [y/N] ") + answer = input("Do you want to proceed? [y/N] ") if answer != "Y" and answer != "y": sys.exit(-1) From fbff52b21cc931e2500e2b0d0dbeb256e77eabc7 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Wed, 2 Sep 2015 17:01:28 +0200 Subject: [PATCH 08/51] DEV: gitignore: Add coverage files Signed-off-by: Sebastian Wagner --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index a5b7bf610..2ad92b9d2 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,6 @@ dist *.old .vagrant/ *~ +.coverage +.idea/ +htmlcov/ From 24889e503a859bba024a10bc09b84a1ad34b1aef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Lima?= Date: Thu, 3 Sep 2015 01:20:48 +0100 Subject: [PATCH 09/51] fix #310 --- intelmq/bots/parsers/spamhaus/parser_drop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intelmq/bots/parsers/spamhaus/parser_drop.py b/intelmq/bots/parsers/spamhaus/parser_drop.py index 65aec28c3..eb6a0b3e7 100644 --- a/intelmq/bots/parsers/spamhaus/parser_drop.py +++ b/intelmq/bots/parsers/spamhaus/parser_drop.py @@ -55,5 +55,5 @@ def process(self): self.acknowledge_message() if __name__ == "__main__": - bot = SpamhausParserBot(sys.argv[1]) + bot = SpamhausDropParserBot(sys.argv[1]) bot.start() From 807f3bbcb07f068d057ccc711ab1e898a7892a5b Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Thu, 3 Sep 2015 17:32:50 +0200 Subject: [PATCH 10/51] BUG: Parser and encoding fixes addresses parts of #307 Signed-off-by: Sebastian Wagner --- intelmq/bots/experts/asn_lookup/expert.py | 1 + intelmq/bots/experts/cymru_whois/expert.py | 7 ++--- intelmq/bots/experts/cymru_whois/lib.py | 5 ++-- intelmq/bots/outputs/file/output.py | 5 ++-- .../parsers/dragonresearchgroup/parser_ssh.py | 3 ++ intelmq/bots/parsers/spamhaus/parser_cert.py | 4 +-- intelmq/bots/parsers/spamhaus/parser_drop.py | 3 +- intelmq/lib/cache.py | 10 +++++-- intelmq/lib/harmonization.py | 9 ++++-- intelmq/lib/utils.py | 30 ++++++++++++++++--- intelmq/tests/lib/test_message.py | 9 ++++-- setup.py | 2 +- 12 files changed, 65 insertions(+), 23 deletions(-) diff --git a/intelmq/bots/experts/asn_lookup/expert.py b/intelmq/bots/experts/asn_lookup/expert.py index 3931da4ee..4618fbe12 100644 --- a/intelmq/bots/experts/asn_lookup/expert.py +++ b/intelmq/bots/experts/asn_lookup/expert.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- """ TODO: IPv6 +TODO: Known bug: https://github.com/hadiasghari/pyasn/issues/14 """ from __future__ import unicode_literals import sys diff --git a/intelmq/bots/experts/cymru_whois/expert.py b/intelmq/bots/experts/cymru_whois/expert.py index 3e5325fbb..49d5aae44 100644 --- a/intelmq/bots/experts/cymru_whois/expert.py +++ b/intelmq/bots/experts/cymru_whois/expert.py @@ -46,10 +46,9 @@ def process(self): elif ip_version == 6: minimum = MINIMUM_BGP_PREFIX_IPV6 - else: # TODO: Should never happen as IP is validated, raise? - self.logger.error("Invalid IP version") - self.send_message(event) - self.acknowledge_message() + else: + raise ValueError('Unexpected IP version ' + '{!r}.'.format(ip_version)) cache_key = bin(ip_integer)[2: minimum + 2] result_json = self.cache.get(cache_key) diff --git a/intelmq/bots/experts/cymru_whois/lib.py b/intelmq/bots/experts/cymru_whois/lib.py index 3b14c2c81..82eb27c80 100755 --- a/intelmq/bots/experts/cymru_whois/lib.py +++ b/intelmq/bots/experts/cymru_whois/lib.py @@ -8,6 +8,7 @@ import dns.resolver from intelmq.lib.harmonization import IPAddress +import intelmq.lib.utils as utils IP_QUERY = "%s.origin%s.asn.cymru.com" ASN_QUERY = "AS%s.asn.cymru.com" @@ -21,7 +22,7 @@ def query(ip): result = Cymru.__ip_query_parse(raw_result) if "asn" in result: - raw_result = Cymru.__asn_query(result['asn']).decode('utf-8') + raw_result = Cymru.__asn_query(result['asn']) extra_info = Cymru.__asn_query_parse(raw_result) result.update(extra_info) @@ -35,7 +36,7 @@ def __query(query): query_result.to_wire(fp) value = fp.getvalue()[1:] # ignore first character fp.close() - return value + return utils.decode(value) except dns.exception.DNSException: return None diff --git a/intelmq/bots/outputs/file/output.py b/intelmq/bots/outputs/file/output.py index 4d19ae1d2..b02d9df8e 100644 --- a/intelmq/bots/outputs/file/output.py +++ b/intelmq/bots/outputs/file/output.py @@ -3,13 +3,14 @@ import sys from intelmq.lib.bot import Bot +import intelmq.lib.utils as utils class FileBot(Bot): def init(self): self.logger.debug("Opening %s file" % self.parameters.file) - self.file = open(self.parameters.file, 'a') + self.file = open(self.parameters.file, 'at') self.logger.info("File %s is open." % self.parameters.file) def process(self): @@ -21,7 +22,7 @@ def process(self): if event: event_data = event.to_json() - self.file.write(event_data) + self.file.write(utils.encode(event_data)) self.file.write("\n") self.file.flush() self.acknowledge_message() diff --git a/intelmq/bots/parsers/dragonresearchgroup/parser_ssh.py b/intelmq/bots/parsers/dragonresearchgroup/parser_ssh.py index a66a53e87..db4d057c8 100644 --- a/intelmq/bots/parsers/dragonresearchgroup/parser_ssh.py +++ b/intelmq/bots/parsers/dragonresearchgroup/parser_ssh.py @@ -36,6 +36,9 @@ def process(self): if key == "time.source": value += "T00:00:00+00:00" + if value == 'NA': + continue + event.add(key, value, sanitize=True) event.add('time.observation', report.value( diff --git a/intelmq/bots/parsers/spamhaus/parser_cert.py b/intelmq/bots/parsers/spamhaus/parser_cert.py index 01f1da177..2762fb790 100644 --- a/intelmq/bots/parsers/spamhaus/parser_cert.py +++ b/intelmq/bots/parsers/spamhaus/parser_cert.py @@ -52,7 +52,7 @@ def process(self): if not len(row) or row.startswith(';'): continue - row_splitted = row.split(',') + row_splitted = [field.strip() for field in row.split(',')] event = Event() event.add('source.ip', row_splitted[0], sanitize=True) @@ -69,7 +69,7 @@ def process(self): pass # otherwise the same ip, ignore event.add('destination.ip', row_splitted[6], sanitize=True) event.add('destination.port', row_splitted[7], sanitize=True) - if row_splitted[8]: + if row_splitted[8] and row_splitted[8] != '-': event.add('additional', json.dumps({'destination.local_port': int(row_splitted[8])}), diff --git a/intelmq/bots/parsers/spamhaus/parser_drop.py b/intelmq/bots/parsers/spamhaus/parser_drop.py index 65aec28c3..4ad218b4a 100644 --- a/intelmq/bots/parsers/spamhaus/parser_drop.py +++ b/intelmq/bots/parsers/spamhaus/parser_drop.py @@ -42,7 +42,8 @@ def process(self): event.add('source.network', network, sanitize=True) if self.event_date: - event.add('time.source', self.event_date, sanitize=True) + event.add('time.source', self.event_date.isoformat(), + sanitize=True) event.add('time.observation', report.value( 'time.observation'), sanitize=True) diff --git a/intelmq/lib/cache.py b/intelmq/lib/cache.py index 6a05bc61f..1df93ee9c 100644 --- a/intelmq/lib/cache.py +++ b/intelmq/lib/cache.py @@ -11,6 +11,9 @@ import redis +import intelmq.lib.utils as utils + + class Cache(): def __init__(self, host, port, db, ttl): @@ -25,9 +28,12 @@ def exists(self, key): return self.redis.exists(key) def get(self, key): - return self.redis.get(key) + retval = self.redis.get(key) + if isinstance(retval, basestring): + return utils.decode(retval) + return retval def set(self, key, value): # backward compatibility (Redis v2.2) - self.redis.setnx(key, value) + self.redis.setnx(key, utils.encode(value)) self.redis.expire(key, self.ttl) diff --git a/intelmq/lib/harmonization.py b/intelmq/lib/harmonization.py index f9b199b21..50a8fe77b 100644 --- a/intelmq/lib/harmonization.py +++ b/intelmq/lib/harmonization.py @@ -80,6 +80,11 @@ def sanitize(value): class Base64(GenericType): + """ + Base64 type. Always gives unicode strings. + + Sanitation encodes to base64 and accepts binary and unicode strings. + """ @staticmethod def is_valid(value, sanitize=False): @@ -88,7 +93,7 @@ def is_valid(value, sanitize=False): value = Base64().sanitize(value) try: - base64.b64decode(value) + utils.base64_decode(value) except TypeError: return False @@ -100,7 +105,7 @@ def is_valid(value, sanitize=False): @staticmethod def sanitize(value): value = utils.base64_encode(value) - return GenericType().sanitize(value) + return value class Boolean(GenericType): diff --git a/intelmq/lib/utils.py b/intelmq/lib/utils.py index 4dbacbef7..8264acaea 100644 --- a/intelmq/lib/utils.py +++ b/intelmq/lib/utils.py @@ -56,7 +56,7 @@ def decode(text, encodings=("utf-8", ), force=False): unicode string is always returned, even when encoding is ascii (Python 3 compat) """ - if type(text) is six.text_type: + if isinstance(text, six.text_type): return text for encoding in encodings: @@ -72,7 +72,8 @@ def decode(text, encodings=("utf-8", ), force=False): except ValueError: pass - raise Exception("Could not decode string with given encodings.") + raise ValueError("Could not decode string with given encodings{!r}" + ".".format(encodings)) def encode(text, encodings=("utf-8", ), force=False): @@ -88,7 +89,7 @@ def encode(text, encodings=("utf-8", ), force=False): force : boolean Ignore invalid characters, default: False """ - if type(text) is bytes: + if isinstance(text, six.binary_type): return text for encoding in encodings: @@ -104,14 +105,35 @@ def encode(text, encodings=("utf-8", ), force=False): except ValueError: pass - raise Exception("Could not encode string with given encodings.") + raise ValueError("Could not encode string with given encodings{!r}" + ".".format(encodings)) def base64_decode(value): + """ + Parameters + ---------- + value : string + base 64, will be encoded to bytes if not already. + + Returns + ------- + retval : unicode string + """ return decode(base64.b64decode(encode(value))) def base64_encode(value): + """ + Parameters + ---------- + value : string + Will be encoded to bytes if not already of type bytes. + + Returns + ------- + retval : unicode string + """ return decode(base64.b64encode(encode(value))) diff --git a/intelmq/tests/lib/test_message.py b/intelmq/tests/lib/test_message.py index 6adcf26c0..db41d073d 100644 --- a/intelmq/tests/lib/test_message.py +++ b/intelmq/tests/lib/test_message.py @@ -306,10 +306,13 @@ def test_copy_object(self): self.assertIsNot(report.copy(), report) def test_event_hash(self): - """ Test Event __hash_,_ 'time.observation should be ignored. """ + """ Test Event __hash__ 'time.observation should be ignored. """ event = message.MessageFactory.unserialize('{"__type": "Event"}') - event = self.add_event_examples(event) - self.assertEqual(-2488641590542048631, hash(event)) + event1 = self.add_event_examples(event) + event2 = event1.deep_copy() + event2.add('time.observation', u'2015-12-12T13:37:50+01:00', + force=True, sanitize=True) + self.assertEqual(hash(event1), hash(event2)) def test_event_dict(self): """ Test Event to_dict. """ diff --git a/setup.py b/setup.py index bd18ad831..fb6ce237f 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ "/opt/intelmq directory. If you continue the directory will be" " overwritten.") answer = input("Do you want to proceed? [y/N] ") - if answer != "Y" and answer != "y": + if answer.lower().strip() != "y": sys.exit(-1) dirs = ['/opt/intelmq', From b02c4cf0466ec41d68721aff2f9921b3550444b8 Mon Sep 17 00:00:00 2001 From: Aaron Kaplan Date: Sun, 6 Sep 2015 23:13:04 +0200 Subject: [PATCH 11/51] add travis auto-checker --- .travis.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 000000000..4277a1dbf --- /dev/null +++ b/.travis.yml @@ -0,0 +1,15 @@ +language: python +python: + - "2.7" + - "3.2" + - "3.3" + - "3.4" +# command to install dependencies +install: + - "pip install -r REQUIREMENTS" + - "python setup.py install" + - "useradd -d /opt/intelmq -U -s /bin/bash intelmq" + - "chmod -R 0770 /opt/intelmq" + - "chown -R intelmq.intelmq /opt/intelmq" +# command to run tests +script: nosetests From 0445e7911f823bbb83dcfddec61b2cf9306acde5 Mon Sep 17 00:00:00 2001 From: Aaron Kaplan Date: Sun, 6 Sep 2015 23:15:09 +0200 Subject: [PATCH 12/51] Add note on Travis --- docs/Developers-Guide.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/Developers-Guide.md b/docs/Developers-Guide.md index 8ece2fb04..1c223ea18 100644 --- a/docs/Developers-Guide.md +++ b/docs/Developers-Guide.md @@ -39,6 +39,8 @@ All changes have to be tested and new contributions must must be accompanied by It may be necessary to switch the user to `intelmq` if the run-path (`/opt/intelmq/var/run/`) is not writeable by the current user. Some bots need local databases to succeed. If you don't mind about those and only want to test one explicit test file, you can give the filepath as argument. +There is a [Travis-CI](https://travis-ci.org/certtools/intelmq/builds) setup for automatic testing. (-> thx sebix!) + ## Coding-Rules In general, we follow the [Style Guide for Python Code (PEP8)](https://www.python.org/dev/peps/pep-0008/). From 5562230ef32ca2d7c5aa130e7d7cc1cb4ed0d33d Mon Sep 17 00:00:00 2001 From: Aaron Kaplan Date: Sun, 6 Sep 2015 23:16:49 +0200 Subject: [PATCH 13/51] no /opt here --- .travis.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 4277a1dbf..ec9448ee6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,8 +8,5 @@ python: install: - "pip install -r REQUIREMENTS" - "python setup.py install" - - "useradd -d /opt/intelmq -U -s /bin/bash intelmq" - - "chmod -R 0770 /opt/intelmq" - - "chown -R intelmq.intelmq /opt/intelmq" # command to run tests script: nosetests From 1c05a0c18f6a1f9d05a01457adae7a6bfe16d98a Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Fri, 4 Sep 2015 10:00:48 +0200 Subject: [PATCH 14/51] BUG: redefine minimal versions in REQUIREMENTS Signed-off-by: Sebastian Wagner --- REQUIREMENTS | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/REQUIREMENTS b/REQUIREMENTS index bf5524d9d..ea6258c66 100644 --- a/REQUIREMENTS +++ b/REQUIREMENTS @@ -1,15 +1,16 @@ -python-dateutil==1.5 -geoip2==0.5.1 -dnspython==1.11.1 -redis==2.10.3 -pymongo==2.7.1 -xmpppy==0.5.0rc1 -imbox==0.5.5 +python-dateutil>=1.5 +geoip2>=0.5.1 +dnspython>=1.11.1 +redis>=2.10.3 +pymongo>=2.7.1 +xmpppy>=0.5.0rc1 +imbox>=0.5.5 ipaddress -unicodecsv==0.9.4 -pytz==2012d -psutil==2.1.1 -pyzmq==14.6.0 -pydns==2.3.6 -pycurl==7.19.0 -mock +unicodecsv>=0.9.4 +pytz>=2012d +psutil>=2.1.1 +pyzmq>=14.6.0 +pydns>=2.3.6 +pycurl>=7.19.0 +mock>=1.1.1 +six>=1.7 From e9880453dff8403fc16c79478d0301ba12a5eefa Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Fri, 4 Sep 2015 10:59:33 +0200 Subject: [PATCH 15/51] ENH: Change some collector URLs to HTTPS all tested others do not support http asn lookup expert: ignore IPv6 for now, fix coming Signed-off-by: Sebastian Wagner --- intelmq/bots/BOTS | 40 +++++++++++------------ intelmq/bots/experts/asn_lookup/expert.py | 6 +++- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/intelmq/bots/BOTS b/intelmq/bots/BOTS index 8a0ab3ae5..7666d3354 100644 --- a/intelmq/bots/BOTS +++ b/intelmq/bots/BOTS @@ -78,7 +78,7 @@ "parameters": { "feed": "Autoshun", "rate_limit": "3600", - "url": "http://www.autoshun.org/files/shunlist.html" + "url": "https://www.autoshun.org/files/shunlist.html" } }, "BlockList.DE Apache": { @@ -87,7 +87,7 @@ "parameters": { "feed": "BlockList.de", "rate_limit": "86400", - "url": "http://lists.blocklist.de/lists/apache.txt" + "url": "https://lists.blocklist.de/lists/apache.txt" } }, "BlockList.DE Bots": { @@ -96,7 +96,7 @@ "parameters": { "feed": "BlockList.de", "rate_limit": "86400", - "url": "http://lists.blocklist.de/lists/bots.txt" + "url": "https://lists.blocklist.de/lists/bots.txt" } }, "BlockList.DE Brute-force Login": { @@ -105,7 +105,7 @@ "parameters": { "feed": "BlockList.de", "rate_limit": "86400", - "url": "http://lists.blocklist.de/lists/bruteforcelogin.txt" + "url": "https://lists.blocklist.de/lists/bruteforcelogin.txt" } }, "BlockList.DE FTP": { @@ -114,7 +114,7 @@ "parameters": { "feed": "BlockList.de", "rate_limit": "86400", - "url": "http://lists.blocklist.de/lists/ftp.txt" + "url": "https://lists.blocklist.de/lists/ftp.txt" } }, "BlockList.DE IMAP": { @@ -123,7 +123,7 @@ "parameters": { "feed": "BlockList.de", "rate_limit": "86400", - "url": "http://lists.blocklist.de/lists/imap.txt" + "url": "https://lists.blocklist.de/lists/imap.txt" } }, "BlockList.DE IRC Bot": { @@ -132,7 +132,7 @@ "parameters": { "feed": "BlockList.de", "rate_limit": "86400", - "url": "http://lists.blocklist.de/lists/ircbot.txt" + "url": "https://lists.blocklist.de/lists/ircbot.txt" } }, "BlockList.DE Mail": { @@ -141,7 +141,7 @@ "parameters": { "feed": "BlockList.de", "rate_limit": "86400", - "url": "http://lists.blocklist.de/lists/mail.txt" + "url": "https://lists.blocklist.de/lists/mail.txt" } }, "BlockList.DE SIP": { @@ -150,7 +150,7 @@ "parameters": { "feed": "BlockList.de", "rate_limit": "86400", - "url": "http://lists.blocklist.de/lists/sip.txt" + "url": "https://lists.blocklist.de/lists/sip.txt" } }, "BlockList.DE SSH": { @@ -159,7 +159,7 @@ "parameters": { "feed": "BlockList.de", "rate_limit": "86400", - "url": "http://lists.blocklist.de/lists/ssh.txt" + "url": "https://lists.blocklist.de/lists/ssh.txt" } }, "BlockList.DE Strong IPs": { @@ -168,7 +168,7 @@ "parameters": { "feed": "BlockList.de", "rate_limit": "86400", - "url": "http://lists.blocklist.de/lists/strongips.txt" + "url": "https://lists.blocklist.de/lists/strongips.txt" } }, "CI Army": { @@ -204,7 +204,7 @@ "parameters": { "feed": "Cymru", "rate_limit": "129600", - "url": "http://www.team-cymru.org/Services/Bogons/fullbogons-ipv4.txt" + "url": "https://www.team-cymru.org/Services/Bogons/fullbogons-ipv4.txt" } }, "DShield AS": { @@ -213,7 +213,7 @@ "parameters": { "feed": "DShield", "rate_limit": "129600", - "url": "http://dshield.org/asdetailsascii.html?as=" + "url": "https://dshield.org/asdetailsascii.html?as=" } }, "DShield Block": { @@ -249,7 +249,7 @@ "parameters": { "feed": "Dragon Research Group", "rate_limit": "3600", - "url": "http://dragonresearchgroup.org/insight/sshpwauth.txt" + "url": "https://dragonresearchgroup.org/insight/sshpwauth.txt" } }, "Dragon Research Group VNC": { @@ -296,7 +296,7 @@ "parameters": { "feed": "Malc0de", "rate_limit": "10800", - "url": "http://malc0de.com/bl/BOOT" + "url": "https://malc0de.com/bl/BOOT" } }, "Malc0de IP Blacklist": { @@ -305,7 +305,7 @@ "parameters": { "feed": "Malc0de", "rate_limit": "10800", - "url": "http://malc0de.com/bl/IP_Blacklist.txt" + "url": "https://malc0de.com/bl/IP_Blacklist.txt" } }, "Malware Domain List": { @@ -368,7 +368,7 @@ "parameters": { "feed": "OpenBL", "rate_limit": "43200", - "url": "http://www.openbl.org/lists/date_all.txt" + "url": "https://www.openbl.org/lists/date_all.txt" } }, "OpenPhish": { @@ -377,7 +377,7 @@ "parameters": { "feed": "OpenPhish", "rate_limit": "86400", - "url": "http://www.openphish.com/feed.txt" + "url": "https://www.openphish.com/feed.txt" } }, "PhishTank": { @@ -386,7 +386,7 @@ "parameters": { "feed": "Phishtank", "rate_limit": "28800", - "url": "http://data.phishtank.com/data/< API KEY >/online-valid.csv" + "url": "https://data.phishtank.com/data/< API KEY >/online-valid.csv" } }, "Taichung": { @@ -440,7 +440,7 @@ "parameters": { "feed": "Spamhaus Drop", "rate_limit": "3600", - "url": "http://www.spamhaus.org/drop/drop.lasso" + "url": "https://www.spamhaus.org/drop/drop.lasso" } }, "VXVault": { diff --git a/intelmq/bots/experts/asn_lookup/expert.py b/intelmq/bots/experts/asn_lookup/expert.py index 4618fbe12..138a5a9b7 100644 --- a/intelmq/bots/experts/asn_lookup/expert.py +++ b/intelmq/bots/experts/asn_lookup/expert.py @@ -9,7 +9,7 @@ import pyasn import six from intelmq.lib.bot import Bot - +from intelmq.lib.harmonization import IPAddress class ASNLookupExpertBot(Bot): @@ -41,6 +41,10 @@ def process(self): ip = event.value(ip_key) + if IPAddress.version(ip) == 6: + # Currently not supported by pyasn, fix will come soon + continue + info = self.database.lookup(ip) if info: From 5ae5009bef1637c64346679f35d01098214e1d1e Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Fri, 4 Sep 2015 11:09:58 +0200 Subject: [PATCH 16/51] ENH: outputs/postgres use autocommit feature Signed-off-by: Sebastian Wagner --- intelmq/bots/BOTS | 3 ++- intelmq/bots/outputs/postgresql/output.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/intelmq/bots/BOTS b/intelmq/bots/BOTS index 7666d3354..ee3471cdf 100644 --- a/intelmq/bots/BOTS +++ b/intelmq/bots/BOTS @@ -570,8 +570,9 @@ }, "PostgreSQL": { "module": "intelmq.bots.outputs.postgresql.output", - "description": "PostgreSQL is the bot responsible to send events to a PostgreSQL Database.", + "description": "PostgreSQL is the bot responsible to send events to a PostgreSQL Database. When activating autocommit, transactions are not used: http://initd.org/psycopg/docs/connection.html#connection.autocommit", "parameters": { + "autocommit": true, "host": "localhost", "port": "5432", "database": "intelmq-events", diff --git a/intelmq/bots/outputs/postgresql/output.py b/intelmq/bots/outputs/postgresql/output.py index da3f8f555..c842af3f7 100644 --- a/intelmq/bots/outputs/postgresql/output.py +++ b/intelmq/bots/outputs/postgresql/output.py @@ -30,6 +30,7 @@ def init(self): connect_timeout=connect_timeout, ) self.cur = self.con.cursor() + self.autocommit = getattr(self.parameters, 'autocommit', True) except: self.logger.exception('Failed to connect to database') self.stop() From 102a444e92e9919762fae59b19a2f640923b61c3 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Fri, 4 Sep 2015 11:12:31 +0200 Subject: [PATCH 17/51] ENH: BOTS: rate_limit is integer Signed-off-by: Sebastian Wagner --- intelmq/bots/BOTS | 100 +++++++++++++++++++++++----------------------- 1 file changed, 50 insertions(+), 50 deletions(-) diff --git a/intelmq/bots/BOTS b/intelmq/bots/BOTS index ee3471cdf..7c86cdafe 100644 --- a/intelmq/bots/BOTS +++ b/intelmq/bots/BOTS @@ -5,7 +5,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "Abuse.ch", - "rate_limit": "129600", + "rate_limit": 129600, "url": "https://feodotracker.abuse.ch/blocklist/?download=domainblocklist" } }, @@ -14,7 +14,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "Abuse.ch", - "rate_limit": "129600", + "rate_limit": 129600, "url": "https://feodotracker.abuse.ch/blocklist/?download=ipblocklist" } }, @@ -23,7 +23,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "Abuse.ch", - "rate_limit": "129600", + "rate_limit": 129600, "url": "https://palevotracker.abuse.ch/blocklists.php?download=domainblocklist" } }, @@ -32,7 +32,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "Abuse.ch", - "rate_limit": "129600", + "rate_limit": 129600, "url": "https://palevotracker.abuse.ch/blocklists.php?download=ipblocklist" } }, @@ -41,7 +41,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "Abuse.ch", - "rate_limit": "129600", + "rate_limit": 129600, "url": "https://zeustracker.abuse.ch/blocklist.php?download=baddomains" } }, @@ -50,7 +50,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "Abuse.ch", - "rate_limit": "129600", + "rate_limit": 129600, "url": "https://zeustracker.abuse.ch/blocklist.php?download=badips" } }, @@ -59,7 +59,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "AlienVault", - "rate_limit": "3600", + "rate_limit": 3600, "url": "https://reputation.alienvault.com/reputation.data" } }, @@ -68,7 +68,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "Arbor", - "rate_limit": "3600", + "rate_limit": 3600, "url": "http://atlas-public.ec2.arbor.net/public/ssh_attackers" } }, @@ -77,7 +77,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "Autoshun", - "rate_limit": "3600", + "rate_limit": 3600, "url": "https://www.autoshun.org/files/shunlist.html" } }, @@ -86,7 +86,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "BlockList.de", - "rate_limit": "86400", + "rate_limit": 86400, "url": "https://lists.blocklist.de/lists/apache.txt" } }, @@ -95,7 +95,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "BlockList.de", - "rate_limit": "86400", + "rate_limit": 86400, "url": "https://lists.blocklist.de/lists/bots.txt" } }, @@ -104,7 +104,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "BlockList.de", - "rate_limit": "86400", + "rate_limit": 86400, "url": "https://lists.blocklist.de/lists/bruteforcelogin.txt" } }, @@ -113,7 +113,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "BlockList.de", - "rate_limit": "86400", + "rate_limit": 86400, "url": "https://lists.blocklist.de/lists/ftp.txt" } }, @@ -122,7 +122,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "BlockList.de", - "rate_limit": "86400", + "rate_limit": 86400, "url": "https://lists.blocklist.de/lists/imap.txt" } }, @@ -131,7 +131,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "BlockList.de", - "rate_limit": "86400", + "rate_limit": 86400, "url": "https://lists.blocklist.de/lists/ircbot.txt" } }, @@ -140,7 +140,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "BlockList.de", - "rate_limit": "86400", + "rate_limit": 86400, "url": "https://lists.blocklist.de/lists/mail.txt" } }, @@ -149,7 +149,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "BlockList.de", - "rate_limit": "86400", + "rate_limit": 86400, "url": "https://lists.blocklist.de/lists/sip.txt" } }, @@ -158,7 +158,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "BlockList.de", - "rate_limit": "86400", + "rate_limit": 86400, "url": "https://lists.blocklist.de/lists/ssh.txt" } }, @@ -167,7 +167,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "BlockList.de", - "rate_limit": "86400", + "rate_limit": 86400, "url": "https://lists.blocklist.de/lists/strongips.txt" } }, @@ -176,7 +176,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "CI Army", - "rate_limit": "3600", + "rate_limit": 3600, "url": "http://cinsscore.com/list/ci-badguys.txt" } }, @@ -185,7 +185,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "CleanMX", - "rate_limit": "129600", + "rate_limit": 129600, "url": "http://support.clean-mx.de/clean-mx/xmlphishing?response=alive&format=csv&domain=" } }, @@ -194,7 +194,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "CleanMX", - "rate_limit": "129600", + "rate_limit": 129600, "url": "http://support.clean-mx.de/clean-mx/xmlviruses?response=alive&format=csv&domain=" } }, @@ -203,7 +203,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "Cymru", - "rate_limit": "129600", + "rate_limit": 129600, "url": "https://www.team-cymru.org/Services/Bogons/fullbogons-ipv4.txt" } }, @@ -212,7 +212,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "DShield", - "rate_limit": "129600", + "rate_limit": 129600, "url": "https://dshield.org/asdetailsascii.html?as=" } }, @@ -221,7 +221,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "DShield", - "rate_limit": "129600", + "rate_limit": 129600, "url": "https://www.dshield.org/block.txt" } }, @@ -230,7 +230,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "DShield", - "rate_limit": "129600", + "rate_limit": 129600, "url": "https://www.dshield.org/feeds/suspiciousdomains_High.txt" } }, @@ -239,7 +239,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "Danger Rulez", - "rate_limit": "3600", + "rate_limit": 3600, "url": "http://danger.rulez.sk/projects/bruteforceblocker/blist.php" } }, @@ -248,7 +248,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "Dragon Research Group", - "rate_limit": "3600", + "rate_limit": 3600, "url": "https://dragonresearchgroup.org/insight/sshpwauth.txt" } }, @@ -257,7 +257,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "Dragon Research Group", - "rate_limit": "3600", + "rate_limit": 3600, "url": "https://dragonresearchgroup.org/insight/vncprobe.txt" } }, @@ -266,7 +266,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "Dyn", - "rate_limit": "10800", + "rate_limit": 10800, "url": "http://security-research.dyndns.org/pub/botnet/ponmocup/ponmocup-finder/ponmocup-infected-domains-latest.txt" } }, @@ -276,7 +276,7 @@ "parameters": { "feed": "Fraunhofer DGA", "password": "", - "rate_limit": "10800", + "rate_limit": 10800, "url": "https://dgarchive.caad.fkie.fraunhofer.de/today", "username": "" } @@ -286,7 +286,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "HpHosts", - "rate_limit": "3600", + "rate_limit": 3600, "url": "http://hosts-file.net/download/hosts.txt" } }, @@ -295,7 +295,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "Malc0de", - "rate_limit": "10800", + "rate_limit": 10800, "url": "https://malc0de.com/bl/BOOT" } }, @@ -304,7 +304,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "Malc0de", - "rate_limit": "10800", + "rate_limit": 10800, "url": "https://malc0de.com/bl/IP_Blacklist.txt" } }, @@ -313,7 +313,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "Malware Domain List", - "rate_limit": "3600", + "rate_limit": 3600, "url": "http://www.malwaredomainlist.com/updatescsv.php" } }, @@ -322,7 +322,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "MalwareDomains", - "rate_limit": "172800", + "rate_limit": 172800, "url": "http://mirror2.malwaredomains.com/files/domains.txt" } }, @@ -331,7 +331,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "MalwareGroup", - "rate_limit": "129600", + "rate_limit": 129600, "url": "http://www.malwaregroup.com/domains" } }, @@ -340,7 +340,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "MalwareGroup", - "rate_limit": "129600", + "rate_limit": 129600, "url": "http://www.malwaregroup.com/ipaddresses" } }, @@ -349,7 +349,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "MalwareGroup", - "rate_limit": "129600", + "rate_limit": 129600, "url": "http://www.malwaregroup.com/proxies" } }, @@ -358,7 +358,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "MalwarePatrol", - "rate_limit": "180000", + "rate_limit": 180000, "url": "https://lists.malwarepatrol.net/cgi/getfile?receipt=< API KEY >&product=8&list=dansguardian" } }, @@ -367,7 +367,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "OpenBL", - "rate_limit": "43200", + "rate_limit": 43200, "url": "https://www.openbl.org/lists/date_all.txt" } }, @@ -376,7 +376,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "OpenPhish", - "rate_limit": "86400", + "rate_limit": 86400, "url": "https://www.openphish.com/feed.txt" } }, @@ -385,7 +385,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "Phishtank", - "rate_limit": "28800", + "rate_limit": 28800, "url": "https://data.phishtank.com/data/< API KEY >/online-valid.csv" } }, @@ -394,7 +394,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "Taichung", - "rate_limit": "3600", + "rate_limit": 3600, "url": "https://www.tc.edu.tw/net/netflow/lkout/recent/30" } }, @@ -403,7 +403,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "Turris Greylist", - "rate_limit": "43200", + "rate_limit": 43200, "url": "https://www.turris.cz/greylist-data/greylist-latest.csv" } }, @@ -412,7 +412,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "URLVir", - "rate_limit": "129600", + "rate_limit": 129600, "url": "http://www.urlvir.com/export-hosts/" } }, @@ -421,7 +421,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "URLVir", - "rate_limit": "129600", + "rate_limit": 129600, "url": "http://www.urlvir.com/export-ip-addresses/" } }, @@ -430,7 +430,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "Spamhaus CERT", - "rate_limit": "3600", + "rate_limit": 3600, "url": "https://portal.spamhaus.org/cert/api.php?cert=&key=" } }, @@ -439,7 +439,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "Spamhaus Drop", - "rate_limit": "3600", + "rate_limit": 3600, "url": "https://www.spamhaus.org/drop/drop.lasso" } }, @@ -448,7 +448,7 @@ "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "VxVault", - "rate_limit": "3600", + "rate_limit": 3600, "url": "http://vxvault.siri-urz.net/URL_List.php" } } From 22f6428220b99cf3248a993c46da895e96b154ab Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Fri, 4 Sep 2015 11:17:11 +0200 Subject: [PATCH 18/51] BUG: mising __init__ for certat_contact test Signed-off-by: Sebastian Wagner --- intelmq/tests/bots/experts/certat_contact/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 intelmq/tests/bots/experts/certat_contact/__init__.py diff --git a/intelmq/tests/bots/experts/certat_contact/__init__.py b/intelmq/tests/bots/experts/certat_contact/__init__.py new file mode 100644 index 000000000..e69de29bb From 934c8e10f42afae93594d13a3e432aa044801d7a Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Fri, 4 Sep 2015 11:23:52 +0200 Subject: [PATCH 19/51] BUG: cache: text en/decoding fixes Signed-off-by: Sebastian Wagner --- intelmq/lib/cache.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/intelmq/lib/cache.py b/intelmq/lib/cache.py index 1df93ee9c..067f2e885 100644 --- a/intelmq/lib/cache.py +++ b/intelmq/lib/cache.py @@ -9,7 +9,7 @@ """ from __future__ import unicode_literals import redis - +import six import intelmq.lib.utils as utils @@ -29,11 +29,13 @@ def exists(self, key): def get(self, key): retval = self.redis.get(key) - if isinstance(retval, basestring): + if isinstance(retval, six.binary_type): return utils.decode(retval) return retval def set(self, key, value): + if isinstance(value, six.text_type): + value = utils.encode(value) # backward compatibility (Redis v2.2) - self.redis.setnx(key, utils.encode(value)) + self.redis.setnx(key, value) self.redis.expire(key, self.ttl) From c81eefcf47d4e6140b92279ab882403dbc5944da Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Fri, 4 Sep 2015 11:52:22 +0200 Subject: [PATCH 20/51] DOC: document expert bots Signed-off-by: Sebastian Wagner --- docs/Bots.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 docs/Bots.md diff --git a/docs/Bots.md b/docs/Bots.md new file mode 100644 index 000000000..62ef6ac18 --- /dev/null +++ b/docs/Bots.md @@ -0,0 +1,19 @@ +Bots documentation +================== + +Experts +------- + +| name | IPv6 | lookup | public | cache: redis db | information | comment | +|:-----|:-----|:---------|:--------| +| abusix | n | ? | y | 5 | ip to abuse contact | ipv6 implementation missing | +| asn-lookup | n | local db | y | - | ip to asn | [IPv6 bugreport](https://github.com/hadiasghari/pyasn/issues/14) +| certat-contact | n | https | y | - | asn to cert abuse contact, cc | +| cymru-whois | y | cymru dns | y | 6 | ip to geolocation, asn, network | +| deduplicator | y | redis cache | y | 7 | - | not tested | +| filter | y | - | y | - | drops event | not tested | +| maxmind-geoip | ? | local db | n | - | ip to geolocation ? | not stable | +| reverse-dns | n | dns | y | 8 | ip to domain | ipv6 implementation missing | +| ripencc | n | ? | y | 9 | ip to abuse contact | +| taxonomy | - | - | y | - | classification type to taxonomy | +| tor-nodes | n | local db | y | - | if ip is tor node | From 0d4ca74d49563bfc3acc1e79d96f04d44467eb21 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Fri, 4 Sep 2015 12:09:15 +0200 Subject: [PATCH 21/51] TST/DOC: reipncc expert supports ipv6 Signed-off-by: Sebastian Wagner --- docs/Bots.md | 2 +- intelmq/bots/experts/ripencc_abuse_contact/expert.py | 1 - .../bots/experts/ripencc_abuse_contact/test_expert.py | 9 ++++----- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/docs/Bots.md b/docs/Bots.md index 62ef6ac18..cbf259e5c 100644 --- a/docs/Bots.md +++ b/docs/Bots.md @@ -14,6 +14,6 @@ Experts | filter | y | - | y | - | drops event | not tested | | maxmind-geoip | ? | local db | n | - | ip to geolocation ? | not stable | | reverse-dns | n | dns | y | 8 | ip to domain | ipv6 implementation missing | -| ripencc | n | ? | y | 9 | ip to abuse contact | +| ripencc-abuse-contact | y | ? | y | 9 | ip to abuse contact | | taxonomy | - | - | y | - | classification type to taxonomy | | tor-nodes | n | local db | y | - | if ip is tor node | diff --git a/intelmq/bots/experts/ripencc_abuse_contact/expert.py b/intelmq/bots/experts/ripencc_abuse_contact/expert.py index 95a9200df..18e50d692 100644 --- a/intelmq/bots/experts/ripencc_abuse_contact/expert.py +++ b/intelmq/bots/experts/ripencc_abuse_contact/expert.py @@ -7,7 +7,6 @@ Load RIPE networks prefixes into memory. Compare each IP with networks prefixes loaded. If ip matchs, query RIPE -IPv6 ''' from __future__ import unicode_literals import sys diff --git a/intelmq/tests/bots/experts/ripencc_abuse_contact/test_expert.py b/intelmq/tests/bots/experts/ripencc_abuse_contact/test_expert.py index 01a10a7e9..8126c4e5a 100644 --- a/intelmq/tests/bots/experts/ripencc_abuse_contact/test_expert.py +++ b/intelmq/tests/bots/experts/ripencc_abuse_contact/test_expert.py @@ -19,13 +19,13 @@ "destination.ip": "192.0.43.8", "time.observation": "2015-01-01T00:00:00+00:00", } -EXAMPLE_INPUT6 = {"__type": "Event", # example.com - "source.ip": "2606:2800:220:1:248:1893:25c8:1946", +EXAMPLE_INPUT6 = {"__type": "Event", + "source.ip": "2001:62a:4:100:80::8", # nic.at "time.observation": "2015-01-01T00:00:00+00:00", } EXAMPLE_OUTPUT6 = {"__type": "Event", - "source.ip": "2001:500:88:200::7", - "source.abuse_contact": "abuse@edgecast.com", + "source.ip": "2001:62a:4:100:80::8", + "source.abuse_contact": "security.zid@univie.ac.at", "time.observation": "2015-01-01T00:00:00+00:00", } @@ -45,7 +45,6 @@ def test_ipv4_lookup(self): self.run_bot() self.assertMessageEqual(0, EXAMPLE_OUTPUT) - @unittest.expectedFailure def test_ipv6_lookup(self): self.input_message = json.dumps(EXAMPLE_INPUT6) self.run_bot() From ab8c867bbe3a7f8aa9aa5ae9a652cc94de102949 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Fri, 4 Sep 2015 14:45:02 +0200 Subject: [PATCH 22/51] ENH: ripe abuse contact bot queries also rest.db.ripe.net URL_DB_IP = 'http://rest.db.ripe.net/abuse-contact/{}.json' URL_DB_AS = 'http://rest.db.ripe.net/abuse-contact/as{}.json' Docs: https://github.com/RIPE-NCC/whois/wiki/WHOIS-REST-API-abuse-contact Signed-off-by: Sebastian Wagner --- .../experts/ripencc_abuse_contact/expert.py | 40 ++++++++++---- .../bots/experts/ripencc_abuse_contact/lib.py | 53 +++++++++++++------ .../ripencc_abuse_contact/test_expert.py | 11 +++- 3 files changed, 74 insertions(+), 30 deletions(-) diff --git a/intelmq/bots/experts/ripencc_abuse_contact/expert.py b/intelmq/bots/experts/ripencc_abuse_contact/expert.py index 18e50d692..8a4c0709b 100644 --- a/intelmq/bots/experts/ripencc_abuse_contact/expert.py +++ b/intelmq/bots/experts/ripencc_abuse_contact/expert.py @@ -1,22 +1,28 @@ # -*- coding: utf-8 -*- ''' Reference: -https://stat.ripe.net/data/abuse-contact-finder/data.json?resource=1.1.1.1 +https://stat.ripe.net/docs/data_api +https://github.com/RIPE-NCC/whois/wiki/WHOIS-REST-API-abuse-contact -TODO: -Load RIPE networks prefixes into memory. -Compare each IP with networks prefixes loaded. -If ip matchs, query RIPE +TODO: Load RIPE networks prefixes into memory. +TODO: Compare each IP with networks prefixes loaded. +TODO: If ip matches, query RIPE ''' from __future__ import unicode_literals import sys -from intelmq.bots.experts.ripencc_abuse_contact.lib import RIPENCC +from intelmq.bots.experts.ripencc_abuse_contact import lib from intelmq.lib.bot import Bot class RIPENCCExpertBot(Bot): + def init(self): + self.query_db_asn = getattr(self.parameters, 'query_ripe_db_asn', True) + self.query_db_ip = getattr(self.parameters, 'query_ripe_db_ip', True) + self.query_stat_asn = getattr(self.parameters, 'query_ripe_stat', True) + self.query_stat_ip = getattr(self.parameters, 'query_ripe_stat', True) + def process(self): event = self.receive_message() @@ -26,11 +32,23 @@ def process(self): for key in ['source.', 'destination.']: ip_key = key + "ip" - if event.contains(ip_key): - ip = event.value(ip_key) - email = RIPENCC.query(ip) - if email: - event.add(key + "abuse_contact", email, sanitize=True) + abuse_key = key + "abuse_contact" + asn_key = key + "asn" + + ip = event.get(ip_key, None) + abuse = event.get(abuse_key.split(',')) if abuse_key in event else [] + asn = event.get(asn_key, None) + if self.query_db_asn and asn: + abuse.extend(lib.query_asn(asn)) + if self.query_db_ip and ip: + abuse.extend(lib.query_ripedb(ip)) + if self.query_stat_asn and asn: + abuse.extend(lib.query_ripestat(asn)) + if self.query_stat_ip and ip: + abuse.extend(lib.query_ripestat(ip)) + + event.add(abuse_key, ','.join(set(abuse)), sanitize=True, + force=True) self.send_message(event) self.acknowledge_message() diff --git a/intelmq/bots/experts/ripencc_abuse_contact/lib.py b/intelmq/bots/experts/ripencc_abuse_contact/lib.py index bc5cf922f..a97f4f44c 100755 --- a/intelmq/bots/experts/ripencc_abuse_contact/lib.py +++ b/intelmq/bots/experts/ripencc_abuse_contact/lib.py @@ -3,20 +3,39 @@ import requests -class RIPENCC(): - - @staticmethod - def query(ip): - - url = ('https://stat.ripe.net/data/abuse-contact-finder/data.json?' - 'resource=' + ip) - response = requests.get(url, data="") - - try: - if (response.json()['data']['anti_abuse_contacts']['abuse_c']): - return (response.json()['data']['anti_abuse_contacts'] - ['abuse_c'][0]['email']) - else: - return None - except: - return None +URL_STAT = ('https://stat.ripe.net/data/abuse-contact-finder/' + 'data.json?resource={}') +URL_DB_IP = 'http://rest.db.ripe.net/abuse-contact/{}.json' +URL_DB_AS = 'http://rest.db.ripe.net/abuse-contact/as{}.json' + + +def query_ripestat(resource): + response = requests.get(URL_STAT.format(resource), data="") + if response.status_code != 200: + raise ValueError('HTTP response status code was {}.' + ''.format(response.status_code)) + + try: + if (response.json()['data']['anti_abuse_contacts']['abuse_c']): + return [response.json()['data']['anti_abuse_contacts'] + ['abuse_c'][0]['email']] + else: + return [] + except: + return [] + + +def query_ripedb(ip=None, asn=None): + response = requests.get(URL_DB_IP.format(ip), data="") + if response.status_code != 200: + return [] + + return [response.json()['abuse-contacts']['email']] + + +def query_asn(asn): + response = requests.get(URL_DB_AS.format(asn), data="") + if response.status_code != 200: + return [] + + return [response.json()['abuse-contacts']['email']] diff --git a/intelmq/tests/bots/experts/ripencc_abuse_contact/test_expert.py b/intelmq/tests/bots/experts/ripencc_abuse_contact/test_expert.py index 8126c4e5a..9298af926 100644 --- a/intelmq/tests/bots/experts/ripencc_abuse_contact/test_expert.py +++ b/intelmq/tests/bots/experts/ripencc_abuse_contact/test_expert.py @@ -10,13 +10,16 @@ EXAMPLE_INPUT = {"__type": "Event", "source.ip": "93.184.216.34", # example.com - "destination.ip": "192.0.43.8", # iana.org, not in RIPENCC + "destination.ip": "193.238.157.5", # funkfeuer.at + "destination.asn": 35492, "time.observation": "2015-01-01T00:00:00+00:00", } EXAMPLE_OUTPUT = {"__type": "Event", "source.ip": "93.184.216.34", "source.abuse_contact": "abuse@edgecast.com", - "destination.ip": "192.0.43.8", + "destination.ip": "193.238.157.5", + "destination.abuse_contact": "abuse@funkfeuer.at", + "destination.asn": 35492, "time.observation": "2015-01-01T00:00:00+00:00", } EXAMPLE_INPUT6 = {"__type": "Event", @@ -39,6 +42,10 @@ class TestRIPENCCExpertBot(test.BotTestCase, unittest.TestCase): def set_bot(self): self.bot_reference = RIPENCCExpertBot self.default_input_message = json.dumps({'__type': 'Report'}) + self.sysconfig = {'query_ripe_db_asn': True, + 'query_ripe_db_ip': True, + 'query_ripe_stat': True, + } def test_ipv4_lookup(self): self.input_message = json.dumps(EXAMPLE_INPUT) From 263c564ad7a5ca4e708ef681352441382ee767d1 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Fri, 4 Sep 2015 14:53:42 +0200 Subject: [PATCH 23/51] DOC: documentation for abusech collectors and parsers Signed-off-by: Sebastian Wagner --- intelmq/bots/BOTS | 17 ++++++++++------- intelmq/bots/parsers/abusech/parser_domain.py | 8 ++++++++ intelmq/bots/parsers/abusech/parser_ip.py | 7 +++++++ 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/intelmq/bots/BOTS b/intelmq/bots/BOTS index 7c86cdafe..5c00eeb40 100644 --- a/intelmq/bots/BOTS +++ b/intelmq/bots/BOTS @@ -1,7 +1,7 @@ { "Collector": { "Abuse.ch Feodo Tracker Domains": { - "description": "Abuse.ch Feodo Tracker Domains is the bot responsible to get the report from source of information.", + "description": "The Feodo Tracker Feodo Domain Blocklist contains domain names (FQDN) used as C&C communication channel by the Feodo Trojan. These domains names are usually registered and operated by cybercriminals for the exclusive purpose of hosting a Feodo botnet controller. Hence you should expect no legit traffic to those domains. I highly recommend you to block/drop any traffic towards any Feodo C&C domain by using the Feodo Domain Blocklist. Please consider that domain names are usually only used by version B of the Feodo Trojan. C&C communication channels used by version A, version C and version D are not covered by this blocklist.", "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "Abuse.ch", @@ -10,7 +10,7 @@ } }, "Abuse.ch Feodo Tracker IPs": { - "description": "Abuse.ch Feodo Tracker IPs is the bot responsible to get the report from source of information.", + "description": "The Feodo Tracker Feodo IP Blocklist contains IP addresses (IPv4) used as C&C communication channel by the Feodo Trojan. This lists contains two types of IP address: Feodo C&C servers used by version A, version C and version D of the Feodo Trojan (these IP addresses are usually compromised servers running an nginx daemon on port 8080 TCP or 7779 TCP that is acting as proxy, forwarding all traffic to a tier 2 proxy node) and Feodo C&C servers used by version B which are usually used for the exclusive purpose of hosting a Feodo C&C server. Attention: Since Feodo C&C servers associated with version A, version C and version D are usually hosted on compromised servers, its likely that you also block/drop legit traffic e.g. towards websites hosted on a certain IP address acting as Feodo C&C for version A, version C and version D. If you only want to block/drop traffic to Feodo C&C servers hosted on bad IPs (version B), please use the blocklist BadIPs documented below.", "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "Abuse.ch", @@ -19,7 +19,7 @@ } }, "Abuse.ch Palevo Tracker Domains": { - "description": "Abuse.ch Palevo Tracker Domains is the bot responsible to get the report from source of information.", + "description": "Palevo C&C Domain Blocklists includes domain names which are being used as botnet C&C for the Palevo crimeware.", "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "Abuse.ch", @@ -28,7 +28,7 @@ } }, "Abuse.ch Palevo Tracker IPs": { - "description": "Abuse.ch Palevo Tracker IPs is the bot responsible to get the report from source of information.", + "description": "Palevo C&C IP Blocklist includes IP addresses which are being used as botnet C&C for the Palevo crimeware.", "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "Abuse.ch", @@ -37,7 +37,7 @@ } }, "Abuse.ch ZeuS Tracker Domains": { - "description": "Abuse.ch ZeuS Tracker Domains is the bot responsible to get the report from source of information.", + "description": "The ZeuS domain blocklist (BadDomains) is the recommended blocklist if you want to block only ZeuS domain names. It has domain names that ZeuS Tracker believes to be hijacked (level 2). Hence the false positive rate should be much lower compared to the standard ZeuS domain blocklist.", "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "Abuse.ch", @@ -46,7 +46,7 @@ } }, "Abuse.ch ZeuS Tracker IPs": { - "description": "Abuse.ch ZeuS Tracker IPs is the bot responsible to get the report from source of information.", + "description": "This list only includes IPv4 addresses that are used by the ZeuS trojan. It is the recommened list if you want to block only ZeuS IPs. It excludes IP addresses that ZeuS Tracker believes to be hijacked (level 2) or belong to a free web hosting provider (level 3). Hence the false postive rate should be much lower compared to the standard ZeuS IP blocklist.", "module": "intelmq.bots.collectors.http.collector_http", "parameters": { "feed": "Abuse.ch", @@ -518,9 +518,12 @@ } }, "RIPENCC": { - "description": "RIPENCC is the bot resposible to get the correspondent abuse contact from source IP and destination IP of the events", + "description": "RIPENCC is the bot resposible to get the correspondent abuse contact from source IP and destination IP of the events. RIPEstat documentation: https://stat.ripe.net/docs/data_api ", "module": "intelmq.bots.experts.ripencc_abuse_contact.expert", "parameters": { + "query_ripe_db_asn": true, + "query_ripe_db_ip": true, + "query_ripe_stat": true, "redis_cache_db": "5", "redis_cache_host": "127.0.0.1", "redis_cache_port": "6379", diff --git a/intelmq/bots/parsers/abusech/parser_domain.py b/intelmq/bots/parsers/abusech/parser_domain.py index 6f9d01e0e..bbeac366d 100644 --- a/intelmq/bots/parsers/abusech/parser_domain.py +++ b/intelmq/bots/parsers/abusech/parser_domain.py @@ -1,4 +1,12 @@ # -*- coding: utf-8 -*- +""" +Parsers simple newline separated list of domains. + +Docs: + - https://feodotracker.abuse.ch/blocklist/ + - https://palevotracker.abuse.ch/blocklists.php + - https://zeustracker.abuse.ch/blocklist.php +""" from __future__ import unicode_literals import sys diff --git a/intelmq/bots/parsers/abusech/parser_ip.py b/intelmq/bots/parsers/abusech/parser_ip.py index 8f85f2f13..de7f1b062 100644 --- a/intelmq/bots/parsers/abusech/parser_ip.py +++ b/intelmq/bots/parsers/abusech/parser_ip.py @@ -1,4 +1,11 @@ # -*- coding: utf-8 -*- +""" +Parsers simple newline separated list of IPs. + +Docs: + - https://feodotracker.abuse.ch/blocklist/ + - https://palevotracker.abuse.ch/blocklists.php +""" from __future__ import unicode_literals import sys From a8a5ec1f3a041d5b12901ef00e9366dcf2302d50 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Fri, 4 Sep 2015 14:55:01 +0200 Subject: [PATCH 24/51] DOC: Data-Harmo: remove ebnf, expand classification EBNF makes no sense here using dicts expand the classification table, still incomplete but better :) Signed-off-by: Sebastian Wagner --- docs/Data-Harmonization.md | 79 +++++++++----------------------------- 1 file changed, 18 insertions(+), 61 deletions(-) diff --git a/docs/Data-Harmonization.md b/docs/Data-Harmonization.md index 93c8f2fcd..c65012535 100644 --- a/docs/Data-Harmonization.md +++ b/docs/Data-Harmonization.md @@ -12,6 +12,8 @@ ## Overview +All messages (reports and events) are Python/JSON dictionaries. The key names and according types are defined by the so called *harmonization*. + The purpose of this document is to list and clearly define known **fields** in Abusehelper as well as Intelmq or similar systems. A field is a ```key=value``` pair. For a clear and unique definition of a field, we must define the **key** (field-name) as well as the possible **values**. A field belongs to an **event**. An event is basically a structured log record in the form ```key=value, key=value, key=value, …```. In the [List of known fields](#fields), each field is grouped by a **section**. We describe these sections briefly below. Every event **MUST** contain a timestamp field. @@ -22,53 +24,6 @@ Every event **MUST** contain a timestamp field. The keys can be grouped together in sub-fields, e.g. `source.ip` or `source.geolocation.latitude`. Thus, keys must match `[a-z_.]`. -## EBNF -To grasp the concept of fields, events, keys, values, etc. the following [EBNF](https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form) description might help. _Do not take this as a literal instruction for implementations_. The formatting of events and fields (and how fields are separated from each other) might vary depending on the encapsulating format (JSON, CSV , etc.) . This EBNF description is here to illustrate how these concepts work together (and are not complete): - - -``` -Events ::= Event - | Events '\n' Event - -Event ::= Field - | Event ', ' Field - - -Field ::= Key '=' Value - -Value ::= StringLiteral - | Number - -Key ::= [a-z0-9_-]+ -Number ::= [0-9]+ -StringLiteral - ::= '"' [^"]* '"' - | "'" [^']* "'" - -``` - -### Events -![Events EBNF](https://raw.githubusercontent.com/certtools/intelmq/master/docs/images/Events.png) - -### Event -![Event EBNF](https://raw.githubusercontent.com/certtools/intelmq/master/docs/images/Event.png) - -### Field -![Field EBNF](https://raw.githubusercontent.com/certtools/intelmq/master/docs/images/Field.png) - -### Key -![Key EBNF](https://raw.githubusercontent.com/certtools/intelmq/master/docs/images/Key.png) - -### Value -![Value EBNF](https://raw.githubusercontent.com/certtools/intelmq/master/docs/images/Value.png) - -### String Literal -![String Literal EBNF](https://raw.githubusercontent.com/certtools/intelmq/master/docs/images/StringLiteral.png) - -### Number -![Number EBNF](https://raw.githubusercontent.com/certtools/intelmq/master/docs/images/Number.png) - - ## Sections @@ -194,28 +149,30 @@ The following mapping is based on eCSIRT Taxonomy. Meaning of source, destination and local values for each classification type: -|Type|Source|Destination|Local| +|Type|Source|Destination|Local|Possible subtypes| |----|------|-----------|-----| |spam|*infected device*|targeted server|internal at source| -|malware|||| -|botnet drone|||| -|ransomware|||| -|malware configuration|||| +|malware|*infected device*||| +|botnet drone|*infected device*||| +|ransomware|*infected device*||| +|malware configuration|*infected device*||| |c&c|*connecting device*|sinkholed server|| |scanner|*scanning device*|scanned device|| -|exploit|||| +|exploit|*hosting server*||| |brute-force|*attacker*|target|| -|ids alert|||| -|defacement|||| -|compromised|||| -|backdoor|||| +|ids alert|*triggering device*||| +|defacement|*defaced website*||| +|compromised|*server*||| +|backdoor|*backdoored device*||| |ddos|*attacker*|target|| -|dropzone|||| -|phishing|||| -|vulnerable service|||| -|blacklist|||| +|dropzone|*server hosting stolen data*||| +|phishing|*phishing website*||| +|vulnerable service|*vulnerable device*||| +|blacklist|*blacklisted device*||| |unknown|||| +Field in italics is the interesting one for CERTs + ## Minimum requirements for events From 2cbfd87f20697602b263fa39f351e40b42c3fee4 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Mon, 7 Sep 2015 11:23:02 +0200 Subject: [PATCH 25/51] DEV: travis: get rid of /opt dependency Signed-off-by: Sebastian Wagner --- .travis.yml | 5 ++--- intelmq/lib/message.py | 1 + intelmq/lib/test.py | 18 ++++++++++++++---- intelmq/tests/lib/test_bot.py | 23 ++++++++++++++--------- intelmq/tests/lib/test_message.py | 5 +++++ scripts/vagrant/bootstrap.sh | 4 ++-- setup.py | 3 ++- 7 files changed, 40 insertions(+), 19 deletions(-) diff --git a/.travis.yml b/.travis.yml index ec9448ee6..da29b44e4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,12 +1,11 @@ language: python python: - "2.7" - - "3.2" - "3.3" - "3.4" # command to install dependencies -install: +install: - "pip install -r REQUIREMENTS" - - "python setup.py install" + - "python setup_travis.py install" # command to run tests script: nosetests diff --git a/intelmq/lib/message.py b/intelmq/lib/message.py index 6f57478ea..c6338be74 100644 --- a/intelmq/lib/message.py +++ b/intelmq/lib/message.py @@ -13,6 +13,7 @@ from intelmq import HARMONIZATION_CONF_FILE from intelmq.lib import utils + harm_config = utils.load_configuration(HARMONIZATION_CONF_FILE) diff --git a/intelmq/lib/test.py b/intelmq/lib/test.py index 40131f52a..fa638342d 100644 --- a/intelmq/lib/test.py +++ b/intelmq/lib/test.py @@ -11,8 +11,9 @@ import io import json import logging +import os +import pkg_resources -import intelmq.lib.message as message import intelmq.lib.pipeline as pipeline import intelmq.lib.utils as utils import mock @@ -20,7 +21,7 @@ from intelmq import PIPELINE_CONF_FILE, RUNTIME_CONF_FILE, SYSTEM_CONF_FILE -def mocked_config(bot_id, src_name, dst_names, sysconfig): +def mocked_config(bot_id='test-bot', src_name='', dst_names=(), sysconfig={}): def mock(conf_file): if conf_file == PIPELINE_CONF_FILE: return {bot_id: {"source-queue": src_name, @@ -45,13 +46,22 @@ def mock(conf_file): } conf.update(sysconfig) return conf + elif conf_file.startswith('/opt/intelmq/etc/'): + confname = os.path.join('conf/', os.path.split(conf_file)[-1]) + fname = pkg_resources.resource_filename('intelmq', + confname) + with open(fname, 'rt') as fpconfig: + return json.load(fpconfig) else: with open(conf_file, 'r') as fpconfig: - config = json.loads(fpconfig.read()) - return config + return json.load(fpconfig) return mock +with mock.patch('intelmq.lib.utils.load_configuration', new=mocked_config()): + import intelmq.lib.message as message + + def mocked_logger(logger): def log(name, log_path=None, log_level=None): return logger diff --git a/intelmq/tests/lib/test_bot.py b/intelmq/tests/lib/test_bot.py index adbef2791..4fc5f1c36 100644 --- a/intelmq/tests/lib/test_bot.py +++ b/intelmq/tests/lib/test_bot.py @@ -7,16 +7,19 @@ import io import json import logging +import os +import pkg_resources import unittest import intelmq.lib.pipeline as pipeline import intelmq.lib.utils as utils import mock from intelmq import PIPELINE_CONF_FILE, RUNTIME_CONF_FILE, SYSTEM_CONF_FILE -from intelmq.tests.bots import test_dummy_bot +from intelmq.lib.test import mocked_logger -def mocked_config(bot_id, src_name, dst_names, raise_on_connect): +def mocked_config(bot_id='', src_name='', dst_names=(), + raise_on_connect=False): def load_conf(conf_file): if conf_file == PIPELINE_CONF_FILE: @@ -37,17 +40,19 @@ def load_conf(conf_file): "error_max_retries": 0, "exit_on_stop": False, } + elif conf_file.startswith('/opt/intelmq/etc/'): + confname = os.path.join('conf/', os.path.split(conf_file)[-1]) + fname = pkg_resources.resource_filename('intelmq', + confname) + with open(fname, 'rt') as fpconfig: + return json.load(fpconfig) else: with open(conf_file, 'r') as fpconfig: - config = json.loads(fpconfig.read()) - return config + return json.load(fpconfig) return load_conf - -def mocked_logger(logger): - def log(name, log_path=None, log_level=None): - return logger - return log +with mock.patch('intelmq.lib.utils.load_configuration', new=mocked_config()): + from intelmq.tests.bots import test_dummy_bot class TestBot(unittest.TestCase): diff --git a/intelmq/tests/lib/test_message.py b/intelmq/tests/lib/test_message.py index db41d073d..4c5a7d516 100644 --- a/intelmq/tests/lib/test_message.py +++ b/intelmq/tests/lib/test_message.py @@ -9,10 +9,15 @@ from __future__ import unicode_literals import json +import pkg_resources import six import unittest import intelmq.lib.exceptions as exceptions +import intelmq.lib.utils as utils +conf_filename = pkg_resources.resource_filename('intelmq', + 'conf/harmonization.conf') +harm_config = utils.load_configuration(conf_filename) import intelmq.lib.message as message LOREM_BASE64 = 'bG9yZW0gaXBzdW0=' diff --git a/scripts/vagrant/bootstrap.sh b/scripts/vagrant/bootstrap.sh index 2210ee47d..3c77cd8f5 100755 --- a/scripts/vagrant/bootstrap.sh +++ b/scripts/vagrant/bootstrap.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -#Declare Variables +#Declare Variables #IntelMQ INTELMQ_REPO="https://github.com/certtools/intelmq.git" #BRANCH="master" @@ -26,7 +26,7 @@ function intelmq_install { pip install -r REQUIREMENTS; fi #Install - python setup.py install + python setup_auto.py install useradd -d /opt/intelmq -U -s /bin/bash intelmq chmod -R 0770 /opt/intelmq chown -R intelmq.intelmq /opt/intelmq diff --git a/setup.py b/setup.py index fb6ce237f..cc243c624 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -from __future__ import print_function, unicode_literals +from __future__ import print_function import os import sys @@ -40,6 +40,7 @@ maintainer='Tomas Lima', maintainer_email='synchroack@gmail.com', packages=find_packages(), + package_data={'intelmq': ['conf/*.conf']}, url='http://pypi.python.org/pypi/intelmq/', license='AGPLv3', description="IntelMQ Tool", From f1515a4cec5bc25c50ba42a994d66c1b4ec532b9 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Mon, 7 Sep 2015 09:46:01 +0200 Subject: [PATCH 26/51] ENH: Event init takes report as template Signed-off-by: Sebastian Wagner --- docs/Developers-Guide.md | 2 +- intelmq/bots/parsers/abusech/parser_domain.py | 6 +----- intelmq/bots/parsers/abusech/parser_ip.py | 6 +----- intelmq/bots/parsers/alienvault/parser.py | 6 +----- intelmq/bots/parsers/arbor/parser.py | 6 +----- intelmq/bots/parsers/autoshun/parser.py | 6 +----- intelmq/bots/parsers/blocklistde/parser.py | 6 +----- intelmq/bots/parsers/ci_army/parser.py | 6 +----- .../bots/parsers/cleanmx/parser_phishing.py | 6 +----- intelmq/bots/parsers/cleanmx/parser_virus.py | 6 +----- .../bots/parsers/cymru_full_bogons/parser.py | 6 +----- intelmq/bots/parsers/danger_rulez/parser.py | 6 +----- .../parsers/dragonresearchgroup/parser_ssh.py | 6 +----- .../parsers/dragonresearchgroup/parser_vnc.py | 6 +----- intelmq/bots/parsers/dshield/parser_asn.py | 6 +----- intelmq/bots/parsers/dshield/parser_block.py | 6 +----- intelmq/bots/parsers/dshield/parser_domain.py | 6 +----- intelmq/bots/parsers/dyn/parser.py | 13 ++---------- intelmq/bots/parsers/fraunhofer/parser_dga.py | 6 +----- intelmq/bots/parsers/hphosts/parser.py | 6 +----- .../malc0de/parser_domain_blacklist.py | 6 +----- .../parsers/malc0de/parser_ip_blacklist.py | 6 +----- .../bots/parsers/malwaredomainlist/parser.py | 6 +----- intelmq/bots/parsers/malwaredomains/parser.py | 6 +----- .../parsers/malwaregroup/parser_domains.py | 6 +----- .../bots/parsers/malwaregroup/parser_ips.py | 6 +----- .../parsers/malwaregroup/parser_proxies.py | 6 +----- .../malwarepatrol/parser_dansguardian.py | 6 +----- intelmq/bots/parsers/openbl/parser.py | 6 +----- intelmq/bots/parsers/openphish/parser.py | 6 +----- intelmq/bots/parsers/phishtank/parser.py | 6 +----- intelmq/bots/parsers/spamhaus/parser_cert.py | 7 +------ intelmq/bots/parsers/spamhaus/parser_drop.py | 6 +----- intelmq/bots/parsers/taichung/parser.py | 6 +----- intelmq/bots/parsers/turris/parser.py | 6 +----- intelmq/bots/parsers/urlvir/parser_hosts.py | 6 +----- intelmq/bots/parsers/urlvir/parser_ips.py | 6 +----- intelmq/bots/parsers/vxvault/parser.py | 6 +----- intelmq/lib/message.py | 20 +++++++++++++++++++ intelmq/lib/test.py | 1 - 40 files changed, 59 insertions(+), 194 deletions(-) diff --git a/docs/Developers-Guide.md b/docs/Developers-Guide.md index 1c223ea18..57271bbf3 100644 --- a/docs/Developers-Guide.md +++ b/docs/Developers-Guide.md @@ -261,7 +261,7 @@ class ExampleParserBot(Bot): self.acknowledge_message() return - event = Event() + event = Event(report) # copies feed.name, time.observation ... # implement the logic here event.add('additional_information', 'Nothing here') diff --git a/intelmq/bots/parsers/abusech/parser_domain.py b/intelmq/bots/parsers/abusech/parser_domain.py index bbeac366d..dd74bca65 100644 --- a/intelmq/bots/parsers/abusech/parser_domain.py +++ b/intelmq/bots/parsers/abusech/parser_domain.py @@ -32,14 +32,10 @@ def process(self): if row.startswith("#") or len(row) == 0: continue - event = Event() + event = Event(report) event.add('classification.type', u'c&c') event.add('source.fqdn', row, sanitize=True) - event.add('time.observation', report.value( - 'time.observation'), sanitize=True) - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add("raw", row, sanitize=True) self.send_message(event) diff --git a/intelmq/bots/parsers/abusech/parser_ip.py b/intelmq/bots/parsers/abusech/parser_ip.py index de7f1b062..000849191 100644 --- a/intelmq/bots/parsers/abusech/parser_ip.py +++ b/intelmq/bots/parsers/abusech/parser_ip.py @@ -31,14 +31,10 @@ def process(self): if row.startswith("#") or len(row) == 0: continue - event = Event() + event = Event(report) event.add('source.ip', row, sanitize=True) event.add('classification.type', u'c&c') - event.add('time.observation', report.value( - 'time.observation'), sanitize=True) - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add("raw", row, sanitize=True) self.send_message(event) diff --git a/intelmq/bots/parsers/alienvault/parser.py b/intelmq/bots/parsers/alienvault/parser.py index 9409d788f..eaead4a62 100644 --- a/intelmq/bots/parsers/alienvault/parser.py +++ b/intelmq/bots/parsers/alienvault/parser.py @@ -45,7 +45,7 @@ def process(self): for ctype in classification_types: - event = Event() + event = Event(report) if ctype.lower() in CLASSIFICATION: event.add('classification.type', @@ -69,10 +69,6 @@ def process(self): event.add('source.geolocation.longitude', geo_longitude.strip(), sanitize=True) - event.add('time.observation', report.value( - 'time.observation'), sanitize=True) - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add("raw", row, sanitize=True) self.send_message(event) diff --git a/intelmq/bots/parsers/arbor/parser.py b/intelmq/bots/parsers/arbor/parser.py index 9f111a1e9..c9b57a45f 100644 --- a/intelmq/bots/parsers/arbor/parser.py +++ b/intelmq/bots/parsers/arbor/parser.py @@ -23,12 +23,8 @@ def process(self): if len(row) == 0 or row.startswith('other'): continue - event = Event() + event = Event(report) - event.add('time.observation', report.value( - 'time.observation'), sanitize=True) - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add('classification.type', u'brute-force') event.add("raw", row, sanitize=True) diff --git a/intelmq/bots/parsers/autoshun/parser.py b/intelmq/bots/parsers/autoshun/parser.py index 582dee5e6..8f46d047e 100644 --- a/intelmq/bots/parsers/autoshun/parser.py +++ b/intelmq/bots/parsers/autoshun/parser.py @@ -35,7 +35,7 @@ def process(self): parser = HTMLParser() for row in raw_report_splitted: - event = Event() + event = Event(reports) row = row.strip() @@ -65,11 +65,7 @@ def process(self): if not event.contains("classification.type"): event.add("classification.type", u'unknown') - event.add('time.observation', - report.value('time.observation'), sanitize=True) event.add("time.source", last_seen, sanitize=True) - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add("source.ip", ip, sanitize=True) event.add("event_description.text", description, sanitize=True) event.add("raw", row, sanitize=True) diff --git a/intelmq/bots/parsers/blocklistde/parser.py b/intelmq/bots/parsers/blocklistde/parser.py index 42c42d11a..a9348230d 100644 --- a/intelmq/bots/parsers/blocklistde/parser.py +++ b/intelmq/bots/parsers/blocklistde/parser.py @@ -94,15 +94,11 @@ def process(self): classification_type = value for row in raw_report.split('\n'): - event = Event() + event = Event(report) event.add('source.ip', row.strip(), sanitize=True) event.add(key, classification_type, sanitize=True) - event.add('time.observation', report.value( - 'time.observation'), sanitize=True) - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add("raw", row, sanitize=True) self.send_message(event) diff --git a/intelmq/bots/parsers/ci_army/parser.py b/intelmq/bots/parsers/ci_army/parser.py index 3f9fefbb2..74869685e 100644 --- a/intelmq/bots/parsers/ci_army/parser.py +++ b/intelmq/bots/parsers/ci_army/parser.py @@ -23,12 +23,8 @@ def process(self): if row.startswith('#') or row == "": continue - event = Event() + event = Event(report) - event.add('time.observation', report.value( - 'time.observation'), sanitize=True) - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add('source.ip', row, sanitize=True) event.add('classification.type', u'blacklist') event.add("raw", row, sanitize=True) diff --git a/intelmq/bots/parsers/cleanmx/parser_phishing.py b/intelmq/bots/parsers/cleanmx/parser_phishing.py index b8b00b56e..2d254962d 100644 --- a/intelmq/bots/parsers/cleanmx/parser_phishing.py +++ b/intelmq/bots/parsers/cleanmx/parser_phishing.py @@ -51,7 +51,7 @@ def process(self): rows = csv.DictReader(fp) for row in rows: - event = Event() + event = Event(report) for key, value in row.items(): if not value: @@ -71,10 +71,6 @@ def process(self): event.add(key, value, sanitize=True) - event.add('time.observation', report.value( - 'time.observation'), sanitize=True) - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add('classification.type', u'phishing') event.add("raw", ",".join(row), sanitize=True) diff --git a/intelmq/bots/parsers/cleanmx/parser_virus.py b/intelmq/bots/parsers/cleanmx/parser_virus.py index df38b30d0..999084bd1 100644 --- a/intelmq/bots/parsers/cleanmx/parser_virus.py +++ b/intelmq/bots/parsers/cleanmx/parser_virus.py @@ -53,7 +53,7 @@ def process(self): rows = csv.DictReader(fp) for row in rows: - event = Event() + event = Event(report) for key, value in row.items(): if not value: @@ -82,10 +82,6 @@ def process(self): event.add(key, value, sanitize=True) - event.add('time.observation', report.value( - 'time.observation'), sanitize=True) - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add('classification.type', u'malware') event.add("raw", ",".join(row), sanitize=True) diff --git a/intelmq/bots/parsers/cymru_full_bogons/parser.py b/intelmq/bots/parsers/cymru_full_bogons/parser.py index ba4b4bd79..ae3c645bb 100644 --- a/intelmq/bots/parsers/cymru_full_bogons/parser.py +++ b/intelmq/bots/parsers/cymru_full_bogons/parser.py @@ -25,18 +25,14 @@ def process(self): if not len(val) or val.startswith('#') or val.startswith('//'): continue - event = Event() + event = Event(report) if IPAddress.is_valid(val, sanitize=True): event.add('source.ip', val, sanitize=True) else: event.add('source.network', val, sanitize=True) - event.add('time.observation', - report.value('time.observation'), sanitize=True) event.add('classification.type', u'blacklist') - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add('raw', row, sanitize=True) self.send_message(event) diff --git a/intelmq/bots/parsers/danger_rulez/parser.py b/intelmq/bots/parsers/danger_rulez/parser.py index e707b63a1..612ee0767 100644 --- a/intelmq/bots/parsers/danger_rulez/parser.py +++ b/intelmq/bots/parsers/danger_rulez/parser.py @@ -26,7 +26,7 @@ def process(self): if row.startswith('#'): continue - event = Event() + event = Event(report) match = re.search(REGEX_IP, row) if match: @@ -36,12 +36,8 @@ def process(self): if match: timestamp = match.group(1) + " UTC" - event.add('time.observation', report.value( - 'time.observation'), sanitize=True) event.add('time.source', timestamp, sanitize=True) event.add('source.ip', ip, sanitize=True) - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add('classification.type', u'brute-force') event.add("raw", row, sanitize=True) diff --git a/intelmq/bots/parsers/dragonresearchgroup/parser_ssh.py b/intelmq/bots/parsers/dragonresearchgroup/parser_ssh.py index db4d057c8..69478f165 100644 --- a/intelmq/bots/parsers/dragonresearchgroup/parser_ssh.py +++ b/intelmq/bots/parsers/dragonresearchgroup/parser_ssh.py @@ -25,7 +25,7 @@ def process(self): continue splitted_row = row.split('|') - event = Event() + event = Event(report) columns = ["source.asn", "source.as_name", "source.ip", "time.source"] @@ -41,10 +41,6 @@ def process(self): event.add(key, value, sanitize=True) - event.add('time.observation', report.value( - 'time.observation'), sanitize=True) - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add('classification.type', u'brute-force') event.add('protocol.application', u'ssh') event.add("raw", row, sanitize=True) diff --git a/intelmq/bots/parsers/dragonresearchgroup/parser_vnc.py b/intelmq/bots/parsers/dragonresearchgroup/parser_vnc.py index 16129914f..c3385ae2f 100644 --- a/intelmq/bots/parsers/dragonresearchgroup/parser_vnc.py +++ b/intelmq/bots/parsers/dragonresearchgroup/parser_vnc.py @@ -25,7 +25,7 @@ def process(self): continue splitted_row = row.split('|') - event = Event() + event = Event(report) columns = ["source.asn", "source.as_name", "source.ip", "time.source"] @@ -38,10 +38,6 @@ def process(self): event.add(key, value, sanitize=True) - event.add('time.observation', report.value( - 'time.observation'), sanitize=True) - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add('classification.type', u'brute-force') event.add('protocol.application', u'vnc') event.add("raw", row, sanitize=True) diff --git a/intelmq/bots/parsers/dshield/parser_asn.py b/intelmq/bots/parsers/dshield/parser_asn.py index dad4cc976..ea1ce6308 100644 --- a/intelmq/bots/parsers/dshield/parser_asn.py +++ b/intelmq/bots/parsers/dshield/parser_asn.py @@ -42,15 +42,11 @@ def process(self): source_ip = ".".join(parts) - event = Event() + event = Event(report) event.add('source.ip', source_ip, sanitize=True) event.add('classification.type', u'brute-force') - event.add('time.observation', report.value( - 'time.observation'), sanitize=True) event.add("time.source", last_seen, sanitize=True) - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add("raw", row, sanitize=True) self.send_message(event) diff --git a/intelmq/bots/parsers/dshield/parser_block.py b/intelmq/bots/parsers/dshield/parser_block.py index 6b1a90d57..b9f0eb007 100644 --- a/intelmq/bots/parsers/dshield/parser_block.py +++ b/intelmq/bots/parsers/dshield/parser_block.py @@ -34,14 +34,10 @@ def process(self): network_mask = values[2] network = '%s/%s' % (network_ip, network_mask) - event = Event() + event = Event(report) event.add('source.network', network, sanitize=True) event.add('classification.type', u'blacklist') - event.add('time.observation', report.value( - 'time.observation'), sanitize=True) - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add("raw", row, sanitize=True) self.send_message(event) diff --git a/intelmq/bots/parsers/dshield/parser_domain.py b/intelmq/bots/parsers/dshield/parser_domain.py index 53f60c732..3d1a99bd2 100644 --- a/intelmq/bots/parsers/dshield/parser_domain.py +++ b/intelmq/bots/parsers/dshield/parser_domain.py @@ -25,14 +25,10 @@ def process(self): if row.startswith("#") or len(row) == 0 or row == "Site": continue - event = Event() + event = Event(report) event.add('classification.type', u'malware') event.add('source.fqdn', row, sanitize=True) - event.add('time.observation', report.value( - 'time.observation'), sanitize=True) - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add("raw", row, sanitize=True) self.send_message(event) diff --git a/intelmq/bots/parsers/dyn/parser.py b/intelmq/bots/parsers/dyn/parser.py index 20e7d13f4..c8f5038e0 100644 --- a/intelmq/bots/parsers/dyn/parser.py +++ b/intelmq/bots/parsers/dyn/parser.py @@ -34,12 +34,8 @@ def process(self): infected_fqdn = splitted_row[0].split("checking domain:")[1] compromised_url = splitted_row[1].split("seems to be INFECTED:")[1] - event_infected = Event() - event_infected.add('time.observation', - report.value('time.observation'), sanitize=True) + event_infected = Event(report) event_infected.add('classification.type', 'malware') - event_infected.add('feed.name', report.value("feed.name")) - event_infected.add('feed.url', report.value("feed.url")) event_infected.add('source.fqdn', infected_fqdn, sanitize=True) event_infected.add('destination.url', compromised_url, sanitize=True) @@ -50,13 +46,8 @@ def process(self): self.send_message(event_infected) - event_compromised = Event() - event_compromised.add('time.observation', - report.value('time.observation'), - sanitize=True) + event_compromised = Event(report) event_compromised.add('classification.type', 'compromised') - event_compromised.add('feed.name', report.value("feed.name")) - event_compromised.add('feed.url', report.value("feed.url")) event_compromised.add('source.url', compromised_url, sanitize=True) event_compromised.add('event_description.text', 'host has been compromised and has ' diff --git a/intelmq/bots/parsers/fraunhofer/parser_dga.py b/intelmq/bots/parsers/fraunhofer/parser_dga.py index ad31be606..ce9097c88 100644 --- a/intelmq/bots/parsers/fraunhofer/parser_dga.py +++ b/intelmq/bots/parsers/fraunhofer/parser_dga.py @@ -30,17 +30,13 @@ def process(self): # add all lists together, only one loop needed for row in sum(dict_report.values(), []): - event = Event() + event = Event(report) event.add('classification.type', u'c&c') try: event.add('destination.ip', row, sanitize=True) except InvalidValue: event.add('destination.fqdn', row, sanitize=True) - event.add('time.observation', - report.value('time.observation'), sanitize=True) - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add("raw", row, sanitize=True) self.send_message(event) diff --git a/intelmq/bots/parsers/hphosts/parser.py b/intelmq/bots/parsers/hphosts/parser.py index 2d871a45a..af473c467 100644 --- a/intelmq/bots/parsers/hphosts/parser.py +++ b/intelmq/bots/parsers/hphosts/parser.py @@ -37,7 +37,7 @@ def process(self): if values[1].lower().strip() == "localhost": continue - event = Event() + event = Event(report) if IPAddress.is_valid(values[1], sanitize=True): event.add("source.ip", values[1], sanitize=True) @@ -45,10 +45,6 @@ def process(self): event.add("source.fqdn", values[1], sanitize=True) event.add('classification.type', u'blacklist') - event.add('time.observation', report.value( - 'time.observation'), sanitize=True) - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add("raw", row, sanitize=True) self.send_message(event) diff --git a/intelmq/bots/parsers/malc0de/parser_domain_blacklist.py b/intelmq/bots/parsers/malc0de/parser_domain_blacklist.py index f9f3acb75..0a908eeb1 100644 --- a/intelmq/bots/parsers/malc0de/parser_domain_blacklist.py +++ b/intelmq/bots/parsers/malc0de/parser_domain_blacklist.py @@ -24,13 +24,9 @@ def process(self): if row == "" or row[:2] == "//": continue - event = Event() + event = Event(report) - event.add('time.observation', report.value( - 'time.observation'), sanitize=True) event.add('classification.type', u'malware') - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add('source.fqdn', row.split(" ")[1], sanitize=True) event.add('raw', row, sanitize=True) diff --git a/intelmq/bots/parsers/malc0de/parser_ip_blacklist.py b/intelmq/bots/parsers/malc0de/parser_ip_blacklist.py index b6de899c6..9a260a7c9 100644 --- a/intelmq/bots/parsers/malc0de/parser_ip_blacklist.py +++ b/intelmq/bots/parsers/malc0de/parser_ip_blacklist.py @@ -24,13 +24,9 @@ def process(self): if row == "" or row[:2] == "//": continue - event = Event() + event = Event(report) - event.add('time.observation', report.value( - 'time.observation'), sanitize=True) event.add('classification.type', u'malware') - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add('source.ip', row, sanitize=True) event.add('raw', row, sanitize=True) diff --git a/intelmq/bots/parsers/malwaredomainlist/parser.py b/intelmq/bots/parsers/malwaredomainlist/parser.py index 3d030cb3b..f07c45a43 100644 --- a/intelmq/bots/parsers/malwaredomainlist/parser.py +++ b/intelmq/bots/parsers/malwaredomainlist/parser.py @@ -32,7 +32,7 @@ def process(self): raw_report = utils.base64_decode(report.value("raw")) for row in unicodecsv.reader(StringIO(raw_report), encoding='utf-8'): - event = Event() + event = Event(report) for key, value in zip(columns, row): @@ -45,10 +45,6 @@ def process(self): event.add(key, value, sanitize=True) - event.add('time.observation', report.value( - 'time.observation'), sanitize=True) - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add('classification.type', u'malware') event.add("raw", ",".join(row), sanitize=True) diff --git a/intelmq/bots/parsers/malwaredomains/parser.py b/intelmq/bots/parsers/malwaredomains/parser.py index 413ce4f4e..2ad2715c9 100644 --- a/intelmq/bots/parsers/malwaredomains/parser.py +++ b/intelmq/bots/parsers/malwaredomains/parser.py @@ -36,7 +36,7 @@ def process(self): values = row.split('\t')[1:] - event = Event() + event = Event(report) event.add('source.fqdn', values[1], sanitize=True) event.add('event_description.text', values[2], sanitize=True) @@ -47,11 +47,7 @@ def process(self): values[i]+"T00:00:00+00:00", sanitize=True) break - event.add('time.observation', report.value( - 'time.observation'), sanitize=True) event.add('classification.type', u'malware') - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add('raw', row, sanitize=True) self.send_message(event) diff --git a/intelmq/bots/parsers/malwaregroup/parser_domains.py b/intelmq/bots/parsers/malwaregroup/parser_domains.py index ae0e656ba..5d6cd8917 100644 --- a/intelmq/bots/parsers/malwaregroup/parser_domains.py +++ b/intelmq/bots/parsers/malwaregroup/parser_domains.py @@ -36,14 +36,10 @@ def process(self): time_source = row_splitted[8].replace("", "").strip() time_source = time_source + " 00:00:00 UTC" - event = Event() + event = Event(report) - event.add('time.observation', report.value( - 'time.observation'), sanitize=True) event.add('time.source', time_source, sanitize=True) event.add('classification.type', u'malware') - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add('source.fqdn', fqdn, sanitize=True) event.add('source.ip', ip, sanitize=True) event.add('raw', row, sanitize=True) diff --git a/intelmq/bots/parsers/malwaregroup/parser_ips.py b/intelmq/bots/parsers/malwaregroup/parser_ips.py index 3e414a405..eb9d0a1a5 100644 --- a/intelmq/bots/parsers/malwaregroup/parser_ips.py +++ b/intelmq/bots/parsers/malwaregroup/parser_ips.py @@ -35,14 +35,10 @@ def process(self): time_source = row_splitted[6].replace("", "").strip() time_source = time_source + " 00:00:00 UTC" - event = Event() + event = Event(report) - event.add('time.observation', report.value( - 'time.observation'), sanitize=True) event.add('time.source', time_source, sanitize=True) event.add('classification.type', u'malware') - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add('source.ip', ip, sanitize=True) event.add('raw', row, sanitize=True) diff --git a/intelmq/bots/parsers/malwaregroup/parser_proxies.py b/intelmq/bots/parsers/malwaregroup/parser_proxies.py index 25147aff3..aaa433e5e 100644 --- a/intelmq/bots/parsers/malwaregroup/parser_proxies.py +++ b/intelmq/bots/parsers/malwaregroup/parser_proxies.py @@ -37,14 +37,10 @@ def process(self): time_source = time_source + " 00:00:00 UTC" # TODO: Last line ignores time from source - event = Event() + event = Event(report) - event.add('time.observation', report.value( - 'time.observation'), sanitize=True) event.add('time.source', time_source, sanitize=True) event.add('classification.type', u'unknown') - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add('source.ip', ip, sanitize=True) event.add('source.port', port, sanitize=True) event.add('event_description.text', u'Malicious proxy') diff --git a/intelmq/bots/parsers/malwarepatrol/parser_dansguardian.py b/intelmq/bots/parsers/malwarepatrol/parser_dansguardian.py index ecd173802..d06c17f8d 100644 --- a/intelmq/bots/parsers/malwarepatrol/parser_dansguardian.py +++ b/intelmq/bots/parsers/malwarepatrol/parser_dansguardian.py @@ -24,17 +24,13 @@ def process(self): if len(row) == 0 or row.startswith('#'): continue - event = Event() + event = Event(report) splitted_row = row.split() columns = ["source.url"] for key, value in zip(columns, splitted_row): event.add(key, value, sanitize=True) - event.add('time.observation', report.value( - 'time.observation'), sanitize=True) - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add('classification.type', u'malware') event.add("raw", row, sanitize=True) diff --git a/intelmq/bots/parsers/openbl/parser.py b/intelmq/bots/parsers/openbl/parser.py index e45e4c5ff..d444d0174 100644 --- a/intelmq/bots/parsers/openbl/parser.py +++ b/intelmq/bots/parsers/openbl/parser.py @@ -26,7 +26,7 @@ def process(self): continue splitted_row = row.split() - event = Event() + event = Event(report) columns = ["source.ip", "time.source"] @@ -37,10 +37,6 @@ def process(self): event.add(key, value.strip(), sanitize=True) - event.add('time.observation', report.value( - 'time.observation'), sanitize=True) - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add('classification.type', u'blacklist') event.add("raw", row, sanitize=True) diff --git a/intelmq/bots/parsers/openphish/parser.py b/intelmq/bots/parsers/openphish/parser.py index 61402b68f..c7bfa7477 100644 --- a/intelmq/bots/parsers/openphish/parser.py +++ b/intelmq/bots/parsers/openphish/parser.py @@ -24,13 +24,9 @@ def process(self): if row == "": continue - event = Event() + event = Event(report) - event.add('time.observation', report.value( - 'time.observation'), sanitize=True) event.add('classification.type', u'phishing') - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add('source.url', row, sanitize=True) event.add('raw', row, sanitize=True) diff --git a/intelmq/bots/parsers/phishtank/parser.py b/intelmq/bots/parsers/phishtank/parser.py index bc7ef5630..9f7121a67 100644 --- a/intelmq/bots/parsers/phishtank/parser.py +++ b/intelmq/bots/parsers/phishtank/parser.py @@ -37,7 +37,7 @@ def process(self): if "phish_id" in row: continue - event = Event() + event = Event(report) for key, value in zip(columns, row): @@ -46,10 +46,6 @@ def process(self): event.add(key, value, sanitize=True) - event.add('time.observation', report.value( - 'time.observation'), sanitize=True) - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add('classification.type', u'phishing') event.add("raw", ",".join(row), sanitize=True) diff --git a/intelmq/bots/parsers/spamhaus/parser_cert.py b/intelmq/bots/parsers/spamhaus/parser_cert.py index 2762fb790..b209fd420 100644 --- a/intelmq/bots/parsers/spamhaus/parser_cert.py +++ b/intelmq/bots/parsers/spamhaus/parser_cert.py @@ -53,7 +53,7 @@ def process(self): continue row_splitted = [field.strip() for field in row.split(',')] - event = Event() + event = Event(report) event.add('source.ip', row_splitted[0], sanitize=True) event.add('source.asn', row_splitted[1].replace('AS', ''), @@ -75,12 +75,7 @@ def process(self): int(row_splitted[8])}), sanitize=True) event.add('protocol.transport', row_splitted[9], sanitize=True) - - event.add('time.observation', - report['time.observation'], sanitize=True) event.add('classification.type', u'c&c') - event.add('feed.name', report["feed.name"]) - event.add('feed.url', report["feed.url"]) event.add('raw', row, sanitize=True) self.send_message(event) diff --git a/intelmq/bots/parsers/spamhaus/parser_drop.py b/intelmq/bots/parsers/spamhaus/parser_drop.py index 1c2d6b266..6630eb510 100644 --- a/intelmq/bots/parsers/spamhaus/parser_drop.py +++ b/intelmq/bots/parsers/spamhaus/parser_drop.py @@ -38,18 +38,14 @@ def process(self): row_splitted = row.split(';') network = row_splitted[0].strip() - event = Event() + event = Event(report) event.add('source.network', network, sanitize=True) if self.event_date: event.add('time.source', self.event_date.isoformat(), sanitize=True) - event.add('time.observation', report.value( - 'time.observation'), sanitize=True) event.add('classification.type', u'spam') - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add('raw', row, sanitize=True) self.send_message(event) diff --git a/intelmq/bots/parsers/taichung/parser.py b/intelmq/bots/parsers/taichung/parser.py index 678465aaf..1ac214355 100644 --- a/intelmq/bots/parsers/taichung/parser.py +++ b/intelmq/bots/parsers/taichung/parser.py @@ -50,7 +50,7 @@ def process(self): "[\ ]*(\d{4}-\d{2}-\d{2}\ \d{2}:\d{2}:\d{2})[\ ]*", row) - event = Event() + event = Event(report) description = info1.group(2) description = utils.decode(description) @@ -58,13 +58,9 @@ def process(self): time_source = info2.group(1) + " UTC-8" event.add("time.source", time_source, sanitize=True) - event.add('time.observation', report.value( - 'time.observation'), sanitize=True) event.add("source.ip", info1.group(1), sanitize=True) event.add('classification.type', event_type, sanitize=True) event.add('event_description.text', description, sanitize=True) - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add("raw", row, sanitize=True) self.send_message(event) diff --git a/intelmq/bots/parsers/turris/parser.py b/intelmq/bots/parsers/turris/parser.py index dc3b7247e..f00cfc49f 100644 --- a/intelmq/bots/parsers/turris/parser.py +++ b/intelmq/bots/parsers/turris/parser.py @@ -36,7 +36,7 @@ def process(self): headers = False continue - event = Event() + event = Event(report) for key, value in zip(columns, row): if key == "__IGNORE__": @@ -44,10 +44,6 @@ def process(self): event.add(key, value, sanitize=True) - event.add('time.observation', report.value( - 'time.observation'), sanitize=True) - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add('classification.type', u'scanner') event.add("raw", ",".join(row), sanitize=True) diff --git a/intelmq/bots/parsers/urlvir/parser_hosts.py b/intelmq/bots/parsers/urlvir/parser_hosts.py index 448f0878e..62bcb9816 100644 --- a/intelmq/bots/parsers/urlvir/parser_hosts.py +++ b/intelmq/bots/parsers/urlvir/parser_hosts.py @@ -27,18 +27,14 @@ def process(self): if row == "" or row.startswith("#"): continue - event = Event() + event = Event(report) if IPAddress.is_valid(row, sanitize=True): event.add('source.ip', row, sanitize=True) else: event.add('source.fqdn', row, sanitize=True) - event.add('time.observation', report.value( - 'time.observation'), sanitize=True) event.add('classification.type', u'malware') - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add('raw', row, sanitize=True) self.send_message(event) diff --git a/intelmq/bots/parsers/urlvir/parser_ips.py b/intelmq/bots/parsers/urlvir/parser_ips.py index 9a375ab41..1260ae446 100644 --- a/intelmq/bots/parsers/urlvir/parser_ips.py +++ b/intelmq/bots/parsers/urlvir/parser_ips.py @@ -26,15 +26,11 @@ def process(self): if row == "" or row.startswith("#"): continue - event = Event() + event = Event(report) event.add('source.ip', row, sanitize=True) - event.add('time.observation', report.value( - 'time.observation'), sanitize=True) event.add('classification.type', u'malware') - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add('raw', row, sanitize=True) self.send_message(event) diff --git a/intelmq/bots/parsers/vxvault/parser.py b/intelmq/bots/parsers/vxvault/parser.py index 02b260e5d..622dcd751 100644 --- a/intelmq/bots/parsers/vxvault/parser.py +++ b/intelmq/bots/parsers/vxvault/parser.py @@ -40,17 +40,13 @@ def process(self): hostname = url_object.hostname port = url_object.port - event = Event() + event = Event(report) if IPAddress.is_valid(hostname, sanitize=True): event.add("source.ip", hostname, sanitize=True) else: event.add("source.fqdn", hostname, sanitize=True) - event.add('time.observation', report.value( - 'time.observation'), sanitize=True) - event.add('feed.name', report.value("feed.name")) - event.add('feed.url', report.value("feed.url")) event.add('classification.type', u'malware') event.add("source.url", url, sanitize=True) if port: diff --git a/intelmq/lib/message.py b/intelmq/lib/message.py index c6338be74..66d109088 100644 --- a/intelmq/lib/message.py +++ b/intelmq/lib/message.py @@ -178,6 +178,26 @@ def __get_class_name_from_key_type(self, key): class Event(Message): + def __init__(self, message=()): + """ + Parameters + ---------- + message : dict + Give a report and feed.name, feed.url and + time.observation will be used to construct the Event if given. + """ + if isinstance(message, Report): + template = {} + if 'feed.name' in message: + template['feed.name'] = message['feed.name'] + if 'feed.url' in message: + template['feed.url'] = message['feed.url'] + if 'time.observation' in message: + template['time.observation'] = message['time.observation'] + else: + template = message + super(Event, self).__init__(template) + def __hash__(self): event_hash = hashlib.sha256() diff --git a/intelmq/lib/test.py b/intelmq/lib/test.py index fa638342d..018c96791 100644 --- a/intelmq/lib/test.py +++ b/intelmq/lib/test.py @@ -279,7 +279,6 @@ def test_report(self): report = message.MessageFactory.unserialize(report_json) self.assertIsInstance(report, message.Report) self.assertIn('feed.name', report) - self.assertIn('feed.url', report) self.assertIn('raw', report) self.assertIn('time.observation', report) From cd9e268b1a95152b4751dbb860490da51754578c Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Mon, 7 Sep 2015 09:42:18 +0200 Subject: [PATCH 27/51] DOC: data-harm: added explann, filled table source... Signed-off-by: Sebastian Wagner --- docs/Data-Harmonization.md | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/docs/Data-Harmonization.md b/docs/Data-Harmonization.md index c65012535..a4ba03248 100644 --- a/docs/Data-Harmonization.md +++ b/docs/Data-Harmonization.md @@ -66,6 +66,8 @@ Some sources report an internal (NATed) IP address. ### Reported Identity +Not used currently. + #### Reported Source Identity As stated above, each abuse handling organization should define a policy, which IOC to use as the primary element describing an abuse event. Often the sources have done their attribution, but you may choose to correlate their attributive elements with your own. In practice this means that your sanitation should prefix the elements with the '''reported''' keyword, to denote that you've decided the attribute these yourself. The list below is not comprehensive, rather than a list of common things you may want to attribute yourself. Moreover, if you choose to perform your own attribution, the observation time will become your authoritative point of reference related to these IOC. @@ -94,7 +96,7 @@ The elements listed below are additional keys used to describe abusive behavior, #### Classification -Having a functional ontology to work with, especially for the abuse types is important for you to be able to classify, prioritize and report relevant actionable intelligence to the parties who need to be informed. The driving idea for this ontology has been to use a minimal set of values with maximal usability. Below, is a list of harmonized values for the abuse types. +Having a functional ontology to work with, especially for the abuse types is important for you to be able to classify, prioritize and report relevant actionable intelligence to the parties who need to be informed. The driving idea for this ontology has been to use a minimal set of values with maximal usability. See the classification section below for explanations and examples. ## Data types @@ -115,15 +117,14 @@ Note that this section does not yet define error handling and failure mechanisms A list of allowed fields can be found in [Harmonization-fields.md](Harmonization-fields.md) -### Rules + +## Classification -All keys MUST be written in lowercase. +Intelmq classifies events using three labels: taxonomy, type and identifier. This tuple of three values can be used for deduplication of events and describes what happened. +TODO: examples from chat - -## Type/Taxonomy Mapping +The taxonomy can be automatically added by the taxonomy expert bot based on the given type. The following taxonomy-type mapping is based on eCSIRT Taxonomy: -The following mapping is based on eCSIRT Taxonomy. - |Type|Taxonomy|Description| |----|--------|-----------| |spam|Abusive Content|This IOC refers to resources, which make up a SPAM infrastructure, be it a harvester, dictionary attacker, URL etc.| @@ -147,16 +148,16 @@ The following mapping is based on eCSIRT Taxonomy. |unknown|Other|unknown events| |test|Test|This is a value for testing purposes.| -Meaning of source, destination and local values for each classification type: +Meaning of source, destination and local values for each classification type and possible identifiers. The identifier is often a normalized malware name, grouping many variants. -|Type|Source|Destination|Local|Possible subtypes| +|Type|Source|Destination|Local|Possible identifiers| |----|------|-----------|-----| |spam|*infected device*|targeted server|internal at source| -|malware|*infected device*||| +|malware|*infected device*||internal at source|zeus, palevo, feodo| |botnet drone|*infected device*||| |ransomware|*infected device*||| |malware configuration|*infected device*||| -|c&c|*connecting device*|sinkholed server|| +|c&c|*(sinkholed) c&c server*|||zeus, palevo, feodo| |scanner|*scanning device*|scanned device|| |exploit|*hosting server*||| |brute-force|*attacker*|target|| @@ -167,11 +168,15 @@ Meaning of source, destination and local values for each classification type: |ddos|*attacker*|target|| |dropzone|*server hosting stolen data*||| |phishing|*phishing website*||| -|vulnerable service|*vulnerable device*||| +|vulnerable service|*vulnerable device*||| heartbleed, openresolver, snmp | |blacklist|*blacklisted device*||| |unknown|||| -Field in italics is the interesting one for CERTs +Field in italics is the interesting one for CERTs. + +Example: + +If you know of an IP address that connects to a zeus c&c server, it's about the infected device, thus type malware and identifier zeus. If you want to complain about the c&c server, it's type c&c and identifier zeus. The `malware.name` can have the full name, eg. 'zeus_p2p'. ## Minimum requirements for events From 81d922d1ec764f4397a918321f32b2943e5bb16e Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Fri, 4 Sep 2015 14:59:00 +0200 Subject: [PATCH 28/51] ENH: harm: add classification.subtype Signed-off-by: Sebastian Wagner --- intelmq/conf/harmonization.conf | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/intelmq/conf/harmonization.conf b/intelmq/conf/harmonization.conf index aba2ba3d8..eb35be1d6 100644 --- a/intelmq/conf/harmonization.conf +++ b/intelmq/conf/harmonization.conf @@ -4,6 +4,10 @@ "description": "All anecdotal information, which cannot be parsed into the data harmonization elements. TODO: Must be JSON encoded for machine readability.", "type": "String" }, + "classification.identifier": { + "description": "The lowercase identifier defines the actual software or service (e.g. 'heartbleed' or 'ntp_version') or standardized malware name (e.g. 'zeus').", + "type": "String" + }, "classification.taxonomy": { "description": "We recognize the need for the CSIRT teams to apply a static (incident) taxonomy to abuse data. With this goal in mind the type IOC will serve as a basis for this activity. Each value of the dynamic type mapping translates to a an element in the static taxonomy. The European CSIRT teams for example have decided to apply the eCSIRT.net incident classification. The value of the taxonomy key is thus a derivative of the dynamic type above. For more information about check [ENISA taxonomies](http://www.enisa.europa.eu/activities/cert/support/incident-management/browsable/incident-handling-process/incident-taxonomy/existing-taxonomies).", "type": "String" From dd9a343ecc3e001d8ef096c56f4a319a84296f2a Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Mon, 7 Sep 2015 11:38:25 +0200 Subject: [PATCH 29/51] DEV: Fix travis setup and requirements Signed-off-by: Sebastian Wagner --- .travis.yml | 3 ++- REQUIREMENTS | 21 +++++++++++---------- REQUIREMENTS3 | 16 ++++++++++++++++ setup_travis.py | 18 ++++++++++++++++++ 4 files changed, 47 insertions(+), 11 deletions(-) create mode 100644 REQUIREMENTS3 create mode 100644 setup_travis.py diff --git a/.travis.yml b/.travis.yml index da29b44e4..0b2bfc683 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,7 +5,8 @@ python: - "3.4" # command to install dependencies install: - - "pip install -r REQUIREMENTS" + - if [[ $TRAVIS_PYTHON_VERSION == '2.7' ]]; then pip install -r REQUIREMENTS; fi + - if [[ ${TRAVIS_PYTHON_VERSION%.?} == 3 ]]; then pip install -r REQUIREMENTS3; fi - "python setup_travis.py install" # command to run tests script: nosetests diff --git a/REQUIREMENTS b/REQUIREMENTS index ea6258c66..6aba4ac32 100644 --- a/REQUIREMENTS +++ b/REQUIREMENTS @@ -1,16 +1,17 @@ -python-dateutil>=1.5 -geoip2>=0.5.1 dnspython>=1.11.1 -redis>=2.10.3 -pymongo>=2.7.1 -xmpppy>=0.5.0rc1 +geoip2>=0.5.1 imbox>=0.5.5 ipaddress -unicodecsv>=0.9.4 -pytz>=2012d +mock>=1.1.1 psutil>=2.1.1 -pyzmq>=14.6.0 -pydns>=2.3.6 +pyasn pycurl>=7.19.0 -mock>=1.1.1 +pydns>=2.3.6 +pymongo>=2.7.1 +python-dateutil>=1.5 +pytz>=2012d +pyzmq>=14.6.0 +redis>=2.10.3 six>=1.7 +unicodecsv>=0.9.4 +xmpppy>=0.5.0rc1 diff --git a/REQUIREMENTS3 b/REQUIREMENTS3 new file mode 100644 index 000000000..b18245c78 --- /dev/null +++ b/REQUIREMENTS3 @@ -0,0 +1,16 @@ +dnspython>=1.11.1 +geoip2>=0.5.1 +imbox>=0.5.5 +ipaddress +mock>=1.1.1 +psutil>=2.1.1 +pyasn +pycurl>=7.19.0 +pymongo>=2.7.1 +python-dateutil>=1.5 +pytz>=2012d +pyzmq>=14.6.0 +redis>=2.10.3 +six>=1.7 +unicodecsv>=0.9.4 +xmpppy>=0.5.0rc1 diff --git a/setup_travis.py b/setup_travis.py new file mode 100644 index 000000000..8742be7a6 --- /dev/null +++ b/setup_travis.py @@ -0,0 +1,18 @@ +from __future__ import print_function + +from setuptools import find_packages, setup + + +setup( + name='intelmq', + version='1.0.0', + maintainer='Tomas Lima', + maintainer_email='synchroack@gmail.com', + packages=find_packages(), + package_data={'intelmq': ['conf/*.conf']}, + url='http://pypi.python.org/pypi/intelmq/', + license='AGPLv3', + description="IntelMQ Tool", + long_description='IntelMQ is a solution for CERTs to process data feeds, ' + 'pastebins, tweets throught a message queue.', +) From e662959c2a7ae40e9119d4d5cfff13a20c7e177b Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Mon, 7 Sep 2015 14:04:31 +0200 Subject: [PATCH 30/51] DEV: travis install redis, skip asn,tor tests Signed-off-by: Sebastian Wagner --- .travis.yml | 2 +- intelmq/tests/bots/experts/asn_lookup/test_expert.py | 7 +++++-- intelmq/tests/bots/experts/tor_nodes/test_expert.py | 7 +++++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index 0b2bfc683..0a34e40f0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,12 +1,12 @@ language: python python: - "2.7" - - "3.3" - "3.4" # command to install dependencies install: - if [[ $TRAVIS_PYTHON_VERSION == '2.7' ]]; then pip install -r REQUIREMENTS; fi - if [[ ${TRAVIS_PYTHON_VERSION%.?} == 3 ]]; then pip install -r REQUIREMENTS3; fi + - sudo apt-get install redis-server - "python setup_travis.py install" # command to run tests script: nosetests diff --git a/intelmq/tests/bots/experts/asn_lookup/test_expert.py b/intelmq/tests/bots/experts/asn_lookup/test_expert.py index 12307eddc..1504f67dc 100644 --- a/intelmq/tests/bots/experts/asn_lookup/test_expert.py +++ b/intelmq/tests/bots/experts/asn_lookup/test_expert.py @@ -8,12 +8,14 @@ from __future__ import unicode_literals import json +import os import unittest import intelmq.lib.test as test from intelmq.bots.experts.asn_lookup.expert import ASNLookupExpertBot +ASN_DB = '/opt/intelmq/var/lib/bots/asn_lookup/ipasn.dat' EXAMPLE_INPUT = {"__type": "Event", "source.ip": "93.184.216.34", # example.com "destination.ip": "192.0.43.8", # iana.org @@ -40,6 +42,8 @@ } +@unittest.skipUnless(os.path.exists(ASN_DB), 'asn-lookup database does not' + 'exist in {}.'.format(ASN_DB)) class TestASNLookupExpertBot(test.BotTestCase, unittest.TestCase): """ A TestCase for AbusixExpertBot. @@ -48,8 +52,7 @@ class TestASNLookupExpertBot(test.BotTestCase, unittest.TestCase): @classmethod def set_bot(self): self.bot_reference = ASNLookupExpertBot - self.sysconfig = {'database': '/opt/intelmq/var/lib/bots/asn_lookup/' - 'ipasn.dat'} + self.sysconfig = {'database': ASN_DB} self.default_input_message = json.dumps({'__type': 'Report'}) def test_ipv4_lookup(self): diff --git a/intelmq/tests/bots/experts/tor_nodes/test_expert.py b/intelmq/tests/bots/experts/tor_nodes/test_expert.py index b369abd0a..49a929621 100644 --- a/intelmq/tests/bots/experts/tor_nodes/test_expert.py +++ b/intelmq/tests/bots/experts/tor_nodes/test_expert.py @@ -7,12 +7,14 @@ from __future__ import unicode_literals import json +import os import unittest import intelmq.lib.test as test from intelmq.bots.experts.tor_nodes.expert import TorExpertBot +TOR_DB = '/opt/intelmq/var/lib/bots/tor_nodes/tor_nodes.dat' EXAMPLE_INPUT = {"__type": "Event", "source.ip": "37.130.227.133", "destination.ip": "192.0.43.8", # iana.org @@ -26,6 +28,8 @@ } +@unittest.skipUnless(os.path.exists(TOR_DB), 'tor-nodes database does not' + 'exist in {}.'.format(TOR_DB)) class TestTorExpertBot(test.BotTestCase, unittest.TestCase): """ A TestCase for AbusixExpertBot. @@ -34,8 +38,7 @@ class TestTorExpertBot(test.BotTestCase, unittest.TestCase): @classmethod def set_bot(self): self.bot_reference = TorExpertBot - self.sysconfig = {'database': '/opt/intelmq/var/lib/bots/tor_nodes/' - 'tor_nodes.dat'} + self.sysconfig = {'database': TOR_DB} self.default_input_message = json.dumps({'__type': 'Report'}) def test_ipv4_lookup(self): From 64095d99ac2c966bf41d120569f521a618af221b Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Mon, 7 Sep 2015 14:05:05 +0200 Subject: [PATCH 31/51] BUG: fix timestamp timezone handling Signed-off-by: Sebastian Wagner --- intelmq/lib/harmonization.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/intelmq/lib/harmonization.py b/intelmq/lib/harmonization.py index 50a8fe77b..77d5ec636 100644 --- a/intelmq/lib/harmonization.py +++ b/intelmq/lib/harmonization.py @@ -18,7 +18,6 @@ """ from __future__ import unicode_literals -import base64 import binascii import datetime import ipaddress @@ -220,7 +219,7 @@ def from_timestamp(tstamp, tzone='UTC'): Returns ISO formated datetime from given timestamp. You can give timezone for given timestamp, UTC by default. """ - dtime = datetime.datetime.fromtimestamp(tstamp) + dtime = datetime.datetime.utcfromtimestamp(tstamp) localized = pytz.timezone(tzone).localize(dtime) return six.text_type(localized.isoformat()) From 83f523429b266276cf52d5d0bbca20780b477e5f Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Mon, 7 Sep 2015 14:11:12 +0200 Subject: [PATCH 32/51] DEV: travis start redis Signed-off-by: Sebastian Wagner --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 0a34e40f0..e169e9765 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,7 +6,8 @@ python: install: - if [[ $TRAVIS_PYTHON_VERSION == '2.7' ]]; then pip install -r REQUIREMENTS; fi - if [[ ${TRAVIS_PYTHON_VERSION%.?} == 3 ]]; then pip install -r REQUIREMENTS3; fi - - sudo apt-get install redis-server - "python setup_travis.py install" # command to run tests script: nosetests +services: + - redis-server From 1526fa749bfa7c33383010631e7161ce4533b0fd Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Mon, 7 Sep 2015 14:36:55 +0200 Subject: [PATCH 33/51] BUG: timestamp timezone again Signed-off-by: Sebastian Wagner --- intelmq/lib/harmonization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intelmq/lib/harmonization.py b/intelmq/lib/harmonization.py index 77d5ec636..f26e76957 100644 --- a/intelmq/lib/harmonization.py +++ b/intelmq/lib/harmonization.py @@ -220,7 +220,7 @@ def from_timestamp(tstamp, tzone='UTC'): You can give timezone for given timestamp, UTC by default. """ dtime = datetime.datetime.utcfromtimestamp(tstamp) - localized = pytz.timezone(tzone).localize(dtime) + localized = dtime.replace(tzinfo=pytz.timezone(tzone)) return six.text_type(localized.isoformat()) @staticmethod From 174d19260a3e2e93d4976c5221e15cc05ea7f1e4 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Mon, 7 Sep 2015 14:45:13 +0200 Subject: [PATCH 34/51] BUG: Require dnspython3 for 3 Signed-off-by: Sebastian Wagner --- REQUIREMENTS3 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/REQUIREMENTS3 b/REQUIREMENTS3 index b18245c78..358be9f08 100644 --- a/REQUIREMENTS3 +++ b/REQUIREMENTS3 @@ -1,4 +1,4 @@ -dnspython>=1.11.1 +dnspython3>=1.12.0 geoip2>=0.5.1 imbox>=0.5.5 ipaddress From 51cfbce2d76afabca75e448adcf105b1764b6e80 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Tue, 8 Sep 2015 10:42:03 +0200 Subject: [PATCH 35/51] BUG: timestamp timezone third Signed-off-by: Sebastian Wagner --- intelmq/lib/harmonization.py | 5 +++-- .../bots/parsers/spamhaus/test_parser_cert.py | 6 +++--- intelmq/tests/lib/test_harmonization.py | 15 +++++++++------ 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/intelmq/lib/harmonization.py b/intelmq/lib/harmonization.py index f26e76957..13d4a5449 100644 --- a/intelmq/lib/harmonization.py +++ b/intelmq/lib/harmonization.py @@ -219,8 +219,9 @@ def from_timestamp(tstamp, tzone='UTC'): Returns ISO formated datetime from given timestamp. You can give timezone for given timestamp, UTC by default. """ - dtime = datetime.datetime.utcfromtimestamp(tstamp) - localized = dtime.replace(tzinfo=pytz.timezone(tzone)) + dtime = (datetime.datetime(1970, 1, 1, tzinfo=pytz.utc) + + datetime.timedelta(seconds=tstamp)) + localized = pytz.timezone(tzone).normalize(dtime) return six.text_type(localized.isoformat()) @staticmethod diff --git a/intelmq/tests/bots/parsers/spamhaus/test_parser_cert.py b/intelmq/tests/bots/parsers/spamhaus/test_parser_cert.py index d69d55296..4dbc9a7cd 100644 --- a/intelmq/tests/bots/parsers/spamhaus/test_parser_cert.py +++ b/intelmq/tests/bots/parsers/spamhaus/test_parser_cert.py @@ -36,7 +36,7 @@ 'Cxhc3Byb3gsLCwyNSwsdGNw', 'source.ip': '109.126.64.2', 'source.asn': 12635, - 'time.source': '2015-08-31T10:16:10+00:00', + 'time.source': '2015-08-31T08:16:10+00:00', 'malware.name': 'asprox', 'destination.port': 25, }, @@ -45,7 +45,7 @@ '4yMC4xOSw4MCwxMDM2LHRjcA==', 'source.ip': '109.90.233.19', 'source.asn': 6830, - 'time.source': '2015-08-31T10:05:51+00:00', + 'time.source': '2015-08-31T08:05:51+00:00', 'malware.name': 'patcher', 'destination.port': 80, 'destination.fqdn': 'dxxt.sinkhole.dk', @@ -57,7 +57,7 @@ '4xMDksODAsMTQzMCx0Y3A=', 'source.ip': '109.91.0.227', 'source.asn': 6830, - 'time.source': '2015-08-31T11:00:57+00:00', + 'time.source': '2015-08-31T09:00:57+00:00', 'malware.name': 'conficker', 'destination.port': 80, 'destination.ip': '216.66.15.109', diff --git a/intelmq/tests/lib/test_harmonization.py b/intelmq/tests/lib/test_harmonization.py index 9877c96ee..0016286cb 100644 --- a/intelmq/tests/lib/test_harmonization.py +++ b/intelmq/tests/lib/test_harmonization.py @@ -157,12 +157,15 @@ def test_ipnetwork_sanitize_invalid(self): def test_datetime_from_timestamp(self): """ Test DateTime.from_timestamp method. """ - self.assertEqual(harmonization.DateTime.from_timestamp(1441008970), - '2015-08-31T10:16:10+00:00') - self.assertEqual(harmonization.DateTime.from_timestamp(1441008970, - 'Europe/' - 'Vienna'), - '2015-08-31T10:16:10+02:00') + self.assertEqual('2015-08-31T08:16:10+00:00', + harmonization.DateTime.from_timestamp(1441008970)) + self.assertEqual('2015-08-31T07:16:10-01:00', + harmonization.DateTime.from_timestamp(1441008970, + 'Etc/GMT+1')) + self.assertEqual('2015-08-31T04:16:10-04:00', + harmonization.DateTime.from_timestamp(1441008970, + 'America/' + 'Guyana')) def test_datetime_from_timestamp_invalid(self): """ Test DateTime.from_timestamp method with invalid inputs. """ From 78b755e37c4bb042ff9d968475311facec153a27 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Tue, 8 Sep 2015 13:27:59 +0200 Subject: [PATCH 36/51] BUG: Fix classification spamhaus-cert Signed-off-by: Sebastian Wagner --- intelmq/bots/parsers/spamhaus/parser_cert.py | 2 +- intelmq/tests/bots/parsers/spamhaus/test_parser_cert.py | 2 +- scripts/vagrant/bootstrap.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/intelmq/bots/parsers/spamhaus/parser_cert.py b/intelmq/bots/parsers/spamhaus/parser_cert.py index b209fd420..51fd26554 100644 --- a/intelmq/bots/parsers/spamhaus/parser_cert.py +++ b/intelmq/bots/parsers/spamhaus/parser_cert.py @@ -75,7 +75,7 @@ def process(self): int(row_splitted[8])}), sanitize=True) event.add('protocol.transport', row_splitted[9], sanitize=True) - event.add('classification.type', u'c&c') + event.add('classification.type', u'botnet drone') event.add('raw', row, sanitize=True) self.send_message(event) diff --git a/intelmq/tests/bots/parsers/spamhaus/test_parser_cert.py b/intelmq/tests/bots/parsers/spamhaus/test_parser_cert.py index 4dbc9a7cd..3c644900c 100644 --- a/intelmq/tests/bots/parsers/spamhaus/test_parser_cert.py +++ b/intelmq/tests/bots/parsers/spamhaus/test_parser_cert.py @@ -27,7 +27,7 @@ "&key=", "feed.name": "Spamhaus Cert", "__type": "Event", - "classification.type": "c&c", + "classification.type": "botnet drone", "time.observation": "2015-01-01T00:00:00+00:00", "protocol.transport": "tcp", "source.geolocation.cc": "AT", diff --git a/scripts/vagrant/bootstrap.sh b/scripts/vagrant/bootstrap.sh index 3c77cd8f5..dbba2016f 100755 --- a/scripts/vagrant/bootstrap.sh +++ b/scripts/vagrant/bootstrap.sh @@ -26,7 +26,7 @@ function intelmq_install { pip install -r REQUIREMENTS; fi #Install - python setup_auto.py install + python setup.py install useradd -d /opt/intelmq -U -s /bin/bash intelmq chmod -R 0770 /opt/intelmq chown -R intelmq.intelmq /opt/intelmq From 9a59a11686337491d0de45d851432b4785c182a0 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Wed, 9 Sep 2015 11:26:22 +0200 Subject: [PATCH 37/51] ENH+TST+DOC: Modify expert added Signed-off-by: Sebastian Wagner --- docs/Bots.md | 38 ++++++++++ intelmq/__init__.py | 14 ++-- intelmq/bots/experts/modify/__init__.py | 0 intelmq/bots/experts/modify/expert.py | 69 +++++++++++++++++++ intelmq/conf/modify.conf | 24 +++++++ intelmq/tests/bots/experts/modify/__init__.py | 0 .../tests/bots/experts/modify/test_expert.py | 57 +++++++++++++++ setup.py | 15 ++-- 8 files changed, 204 insertions(+), 13 deletions(-) create mode 100644 intelmq/bots/experts/modify/__init__.py create mode 100644 intelmq/bots/experts/modify/expert.py create mode 100644 intelmq/conf/modify.conf create mode 100644 intelmq/tests/bots/experts/modify/__init__.py create mode 100644 intelmq/tests/bots/experts/modify/test_expert.py diff --git a/docs/Bots.md b/docs/Bots.md index cbf259e5c..3d5a5bd01 100644 --- a/docs/Bots.md +++ b/docs/Bots.md @@ -13,7 +13,45 @@ Experts | deduplicator | y | redis cache | y | 7 | - | not tested | | filter | y | - | y | - | drops event | not tested | | maxmind-geoip | ? | local db | n | - | ip to geolocation ? | not stable | +| modify | - | config | y | - | arbitrary | | reverse-dns | n | dns | y | 8 | ip to domain | ipv6 implementation missing | | ripencc-abuse-contact | y | ? | y | 9 | ip to abuse contact | | taxonomy | - | - | y | - | classification type to taxonomy | | tor-nodes | n | local db | y | - | if ip is tor node | + + +### Modify + +The modify expert bot allows you to change arbitrary field values of events just using a configuration file. Thus it is possible to adapt certain values or adding new ones only by changing JSON-files without touching the code of many other bots. + +The configuration is called `modify.conf` and looks like this: + +```json +{ +"Spamhaus Cert": { + "__default": [{ + "feed.name": "^Spamhaus Cert$" + }, { + "classification.identifier": "{msg[malware.name]}" + }], + "conficker": [{ + "malware.name": "^conficker(ab)?$" + }, { + "classification.identifier": "conficker" + }], + "urlzone": [{ + "malware.name": "^urlzone2?$" + }, { + "classification.identifier": "urlzone" + }] + } +} +``` + +The dictionary in the first level holds sections, here called `Spamhaus Cert` to group the rulessets and for easier navigation. It holds another dictionary of rules, consisting of *conditions* and *actions*. The first matching rule is used. Conditions and actions are again dictionaries holding the field names of harmonization and have regex-expressions to existing values (condition) or new values (action). The rule conditions are merged with the default condition and the default action is applied if no rule matches. + +#### Examples + +We have an event with `feed.name = Spamhaus Cert` and `malware.name = confickerab`. The expert loops over all sections in the file and enters section `Spamhaus Cert`. First, the default condition is checked, it matches! Ok, going on. Otherwise the expert would have continued to the next section. Now, iteration through the rules, the first is rule `conficker`. We combine the conditions of this rule with the default conditions, and both rules match! So we can apply the action, here `classification.identifier` is set to `conficker`, the trivial name. + +Assume we have an event with `feed.name = Spamhaus Cert` and `malware.name = feodo`. The default condition matches, but no others. So the default action is applied. The value for `classification.identifier` is `{msg[malware.name]}`, this is [standard Python string format syntax](https://docs.python.org/3/library/string.html#formatspec). Thus you can use any value from the processed event, which is available as `msg`. diff --git a/intelmq/__init__.py b/intelmq/__init__.py index 47789bc96..6ca5c4a0c 100644 --- a/intelmq/__init__.py +++ b/intelmq/__init__.py @@ -1,9 +1,11 @@ -SYSTEM_CONF_FILE = "/opt/intelmq/etc/system.conf" -PIPELINE_CONF_FILE = "/opt/intelmq/etc/pipeline.conf" -STARTUP_CONF_FILE = "/opt/intelmq/etc/startup.conf" -RUNTIME_CONF_FILE = "/opt/intelmq/etc/runtime.conf" +DEFAULT_LOGGING_LEVEL = "INFO" +DEFAULT_LOGGING_PATH = "/opt/intelmq/var/log/" DEFAULTS_CONF_FILE = "/opt/intelmq/etc/defaults.conf" HARMONIZATION_CONF_FILE = "/opt/intelmq/etc/harmonization.conf" -DEFAULT_LOGGING_PATH = "/opt/intelmq/var/log/" -DEFAULT_LOGGING_LEVEL = "INFO" +MODIFY_CONF_FILE = "/opt/intelmq/etc/modify.conf" +PIPELINE_CONF_FILE = "/opt/intelmq/etc/pipeline.conf" +RUNTIME_CONF_FILE = "/opt/intelmq/etc/runtime.conf" +STARTUP_CONF_FILE = "/opt/intelmq/etc/startup.conf" +SYSTEM_CONF_FILE = "/opt/intelmq/etc/system.conf" VAR_RUN_PATH = "/opt/intelmq/var/run/" + diff --git a/intelmq/bots/experts/modify/__init__.py b/intelmq/bots/experts/modify/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/intelmq/bots/experts/modify/expert.py b/intelmq/bots/experts/modify/expert.py new file mode 100644 index 000000000..dd9cc1f6f --- /dev/null +++ b/intelmq/bots/experts/modify/expert.py @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- +""" +Modify Expert bot let's you manipulate all fields with a config file. + + +""" +from __future__ import unicode_literals +import re + +from intelmq import MODIFY_CONF_FILE +from intelmq.lib.bot import Bot +import intelmq.lib.utils + + +def matches(event, *rules): + condition = {} + for rule in rules: + condition.update(rule) + + for name, rule in condition.items(): + if name not in event: + return False + if not re.search(rule, event[name]): + return False + + return True + + +def apply_action(event, action): + for name, value in action.items(): + event.add(name, value.format(msg=event), sanitize=True, force=True) + + +class ModifyExpertBot(Bot): + + def init(self): + self.config = intelmq.lib.utils.load_configuration(MODIFY_CONF_FILE) + + def process(self): + event = self.receive_message() + + if event is None: + self.acknowledge_message() + return + + for section_id, section in self.config.items(): + default_cond = section['__default'][0] + default_action = section['__default'][1] + if not matches(event, default_cond): + continue + + applied = False + for rule_id, (rule_cond, rule_action) in section.items(): + if rule_id == '__default': + continue + if matches(event, default_cond, rule_cond): + self.logger.debug('Apply rule {}/{}.'.format(section_id, + rule_id)) + apply_action(event, rule_action) + applied = True + continue + + if not applied: + self.logger.debug('Apply default rule {}/__default.' + ''.format(section_id)) + apply_action(event, default_action) + + self.send_message(event) + self.acknowledge_message() diff --git a/intelmq/conf/modify.conf b/intelmq/conf/modify.conf new file mode 100644 index 000000000..de9c96edc --- /dev/null +++ b/intelmq/conf/modify.conf @@ -0,0 +1,24 @@ +{ +"Spamhaus Cert": { + "__default": [{ + "feed.name": "^Spamhaus Cert$" + }, { + "classification.identifier": "{msg[malware.name]}" + }], + "conficker": [{ + "malware.name": "^conficker(ab)?$" + }, { + "classification.identifier": "conficker" + }], + "urlzone": [{ + "malware.name": "^urlzone2?$" + }, { + "classification.identifier": "urlzone" + }], + "gozi": [{ + "malware.name": "^gozi2?$" + }, { + "classification.identifier": "gozi" + }] + } +} diff --git a/intelmq/tests/bots/experts/modify/__init__.py b/intelmq/tests/bots/experts/modify/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/intelmq/tests/bots/experts/modify/test_expert.py b/intelmq/tests/bots/experts/modify/test_expert.py new file mode 100644 index 000000000..4d8bbc37e --- /dev/null +++ b/intelmq/tests/bots/experts/modify/test_expert.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- +""" +Testing modify expert bot. +""" +from __future__ import unicode_literals + +import unittest + +import intelmq.lib.test as test +from intelmq.bots.experts.modify.expert import ModifyExpertBot + +EVENT_TEMPL = {"__type": "Event", + "feed.name": "Spamhaus Cert", + "feed.url": "https://portal.spamhaus.org/cert/api.php?cert=" + "&key=", + "classification.type": "botnet drone", + "time.observation": "2015-01-01T00:00:00+00:00", + "raw": "", + } +INPUT = [{'malware.name': 'confickerab'}, + {'malware.name': 'gozi2'}, + {'malware.name': 'feodo'}, + ] +OUTPUT = [{'classification.identifier': 'conficker'}, + {'classification.identifier': 'gozi'}, + {'classification.identifier': 'feodo'}, + ] +for event_in, event_out in zip(INPUT, OUTPUT): + event_in.update(EVENT_TEMPL) + event_out.update(event_in) + event_out.update(EVENT_TEMPL) + + +class TestModifyExpertBot(test.BotTestCase, unittest.TestCase): + """ + A TestCase for ModifyExpertBot. + """ + + @classmethod + def set_bot(self): + self.bot_reference = ModifyExpertBot + self.sysconfig = {'filter': False, + 'overwrite_cc': False, + 'verify_cert': False, + } + + def test_events(self): + """ Test if correct Events have been produced. """ + self.input_message = INPUT + self.run_bot(iterations=len(INPUT)) + + for position, event_out in enumerate(OUTPUT): + self.assertMessageEqual(position, event_out) + + +if __name__ == '__main__': + unittest.main() diff --git a/setup.py b/setup.py index cc243c624..0b204f57d 100644 --- a/setup.py +++ b/setup.py @@ -18,15 +18,15 @@ sys.exit(-1) dirs = ['/opt/intelmq', + '/opt/intelmq/bin', + '/opt/intelmq/docs', '/opt/intelmq/etc', '/opt/intelmq/var', - '/opt/intelmq/var/log', - '/opt/intelmq/var/run', '/opt/intelmq/var/lib', '/opt/intelmq/var/lib/bots', '/opt/intelmq/var/lib/bots/file-output', - '/opt/intelmq/bin', - '/opt/intelmq/docs', + '/opt/intelmq/var/log', + '/opt/intelmq/var/run', ] for dir in dirs: @@ -49,12 +49,13 @@ data_files=[ ('/opt/intelmq/etc/', [ 'intelmq/bots/BOTS', - 'intelmq/conf/startup.conf', - 'intelmq/conf/runtime.conf', 'intelmq/conf/defaults.conf', + 'intelmq/conf/harmonization.conf', + 'intelmq/conf/modify.conf', 'intelmq/conf/pipeline.conf', + 'intelmq/conf/runtime.conf', + 'intelmq/conf/startup.conf', 'intelmq/conf/system.conf', - 'intelmq/conf/harmonization.conf' ], ), ('/opt/intelmq/bin/', [ From e1aa611aea329e69f559f486b7d0d8323c4cb1bb Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Wed, 9 Sep 2015 11:56:34 +0200 Subject: [PATCH 38/51] ENH: More Convinience for queue handle in tests Signed-off-by: Sebastian Wagner --- docs/Developers-Guide.md | 2 +- intelmq/lib/test.py | 32 ++++++++++++------- .../tests/bots/experts/abusix/test_expert.py | 6 ++-- .../bots/experts/asn_lookup/test_expert.py | 6 ++-- .../experts/certat_contact/test_expert.py | 6 ++-- .../bots/experts/cymru_whois/test_expert.py | 8 ++--- .../bots/experts/reverse_dns/test_expert.py | 6 ++-- .../ripencc_abuse_contact/test_expert.py | 6 ++-- .../bots/experts/taxonomy/test_expert.py | 4 +-- .../bots/experts/tor_nodes/test_expert.py | 4 +-- 10 files changed, 44 insertions(+), 36 deletions(-) diff --git a/docs/Developers-Guide.md b/docs/Developers-Guide.md index 57271bbf3..85e6d11bc 100644 --- a/docs/Developers-Guide.md +++ b/docs/Developers-Guide.md @@ -338,7 +338,7 @@ class TestExampleParserBot(test.BotTestCase, unittest.TestCase): # adjust test @classmethod def set_bot(cls): cls.bot_reference = ExampleParserBot # adjust bot class name - cls.default_input_message = json.dumps(EXAMPLE_EVENT) # adjust source of the example event + cls.default_input_message = EXAMPLE_EVENT # adjust source of the example event (dict) # This is an example how to test the log output def test_log_test_line(self): diff --git a/intelmq/lib/test.py b/intelmq/lib/test.py index 018c96791..9f7b30440 100644 --- a/intelmq/lib/test.py +++ b/intelmq/lib/test.py @@ -53,8 +53,7 @@ def mock(conf_file): with open(fname, 'rt') as fpconfig: return json.load(fpconfig) else: - with open(conf_file, 'r') as fpconfig: - return json.load(fpconfig) + return utils.load_configuration(conf_file) return mock @@ -146,25 +145,35 @@ class Parameters(object): with mock.patch('intelmq.lib.utils.log', self.mocked_log): self.bot = self.bot_reference(self.bot_id) if self.input_message is not None: - if type(self.input_message) is dict: - self.input_message = \ - utils.decode(json.dumps(self.input_message)) - self.input_queue = [self.input_message] + if type(self.input_message) is not list: + self.input_message = [self.input_message] + self.input_queue = [] + for msg in self.input_message: + if type(msg) is dict: + self.input_queue.append(json.dumps(msg)) + else: + self.input_queue.append(msg) self.input_message = None else: self.input_queue = [self.default_input_message] - def run_bot(self): + def run_bot(self, iterations=1): """ Call this method for actually doing a test run for the specified bot. + + Parameters + ---------- + iterations : integer + Bot instance will be run the given times, defaults to 1. """ self.prepare_bot() with mock.patch('intelmq.lib.utils.load_configuration', new=self.mocked_config): with mock.patch('intelmq.lib.utils.log', self.mocked_log): - self.bot.start(error_on_pipeline=False, - source_pipeline=self.pipe, - destination_pipeline=self.pipe) + for run in range(iterations): + self.bot.start(error_on_pipeline=False, + source_pipeline=self.pipe, + destination_pipeline=self.pipe) self.loglines_buffer = self.log_stream.getvalue() self.loglines = self.loglines_buffer.splitlines() @@ -243,7 +252,7 @@ def test_empty_message(self): if self.bot_type == 'collector': return - self.input_message = '' + self.input_message = [''] self.run_bot() self.assertRegexpMatchesLog("WARNING - Empty message received.") self.assertNotRegexpMatchesLog("ERROR") @@ -338,7 +347,6 @@ def assertMessageEqual(self, queue_pos, expected_message): contained in the generated event with given queue position. """ - event = self.get_output_queue()[queue_pos] self.assertIsInstance(event, six.text_type) diff --git a/intelmq/tests/bots/experts/abusix/test_expert.py b/intelmq/tests/bots/experts/abusix/test_expert.py index a488f92b3..7f272e732 100644 --- a/intelmq/tests/bots/experts/abusix/test_expert.py +++ b/intelmq/tests/bots/experts/abusix/test_expert.py @@ -38,16 +38,16 @@ class TestAbusixExpertBot(test.BotTestCase, unittest.TestCase): @classmethod def set_bot(self): self.bot_reference = AbusixExpertBot - self.default_input_message = json.dumps({'__type': 'Report'}) + self.default_input_message = {'__type': 'Report'} def test_ipv4_lookup(self): - self.input_message = json.dumps(EXAMPLE_INPUT) + self.input_message = EXAMPLE_INPUT self.run_bot() self.assertMessageEqual(0, EXAMPLE_OUTPUT) @unittest.expectedFailure def test_ipv6_lookup(self): - self.input_message = json.dumps(EXAMPLE_INPUT6) + self.input_message = EXAMPLE_INPUT6 self.run_bot() self.assertMessageEqual(0, EXAMPLE_OUTPUT6) diff --git a/intelmq/tests/bots/experts/asn_lookup/test_expert.py b/intelmq/tests/bots/experts/asn_lookup/test_expert.py index 1504f67dc..c94a1bbf2 100644 --- a/intelmq/tests/bots/experts/asn_lookup/test_expert.py +++ b/intelmq/tests/bots/experts/asn_lookup/test_expert.py @@ -53,16 +53,16 @@ class TestASNLookupExpertBot(test.BotTestCase, unittest.TestCase): def set_bot(self): self.bot_reference = ASNLookupExpertBot self.sysconfig = {'database': ASN_DB} - self.default_input_message = json.dumps({'__type': 'Report'}) + self.default_input_message = {'__type': 'Report'} def test_ipv4_lookup(self): - self.input_message = json.dumps(EXAMPLE_INPUT) + self.input_message = EXAMPLE_INPUT self.run_bot() self.assertMessageEqual(0, EXAMPLE_OUTPUT) @unittest.expectedFailure def test_ipv6_lookup(self): - self.input_message = json.dumps(EXAMPLE_INPUT6) + self.input_message = EXAMPLE_INPUT6 self.run_bot() self.assertMessageEqual(0, EXAMPLE_OUTPUT6) diff --git a/intelmq/tests/bots/experts/certat_contact/test_expert.py b/intelmq/tests/bots/experts/certat_contact/test_expert.py index 95ceb8799..faf04e099 100644 --- a/intelmq/tests/bots/experts/certat_contact/test_expert.py +++ b/intelmq/tests/bots/experts/certat_contact/test_expert.py @@ -49,15 +49,15 @@ def set_bot(self): 'overwrite_cc': False, 'verify_cert': False, } - self.default_input_message = json.dumps({'__type': 'Report'}) + self.default_input_message = {'__type': 'Report'} def test_ipv4_lookup(self): - self.input_message = json.dumps(EXAMPLE_INPUT) + self.input_message = EXAMPLE_INPUT self.run_bot() self.assertMessageEqual(0, EXAMPLE_OUTPUT) def test_ipv6_lookup(self): - self.input_message = json.dumps(EXAMPLE_INPUT6) + self.input_message = EXAMPLE_INPUT6 self.run_bot() self.assertMessageEqual(0, EXAMPLE_OUTPUT6) diff --git a/intelmq/tests/bots/experts/cymru_whois/test_expert.py b/intelmq/tests/bots/experts/cymru_whois/test_expert.py index d19cbd559..1c87ebbfc 100644 --- a/intelmq/tests/bots/experts/cymru_whois/test_expert.py +++ b/intelmq/tests/bots/experts/cymru_whois/test_expert.py @@ -60,20 +60,20 @@ class TestCymruExpertBot(test.BotTestCase, unittest.TestCase): @classmethod def set_bot(self): self.bot_reference = CymruExpertBot - self.default_input_message = json.dumps({'__type': 'Report'}) + self.default_input_message = {'__type': 'Report'} def test_ipv4_lookup(self): - self.input_message = json.dumps(EXAMPLE_INPUT) + self.input_message = EXAMPLE_INPUT self.run_bot() self.assertMessageEqual(0, EXAMPLE_OUTPUT) def test_ipv6_lookup(self): - self.input_message = json.dumps(EXAMPLE_INPUT6) + self.input_message = EXAMPLE_INPUT6 self.run_bot() self.assertMessageEqual(0, EXAMPLE_OUTPUT6) def test_unicode_as_name(self): - self.input_message = json.dumps(UNICODE_INPUT) + self.input_message = UNICODE_INPUT self.run_bot() self.assertMessageEqual(0, UNICODE_OUTPUT) diff --git a/intelmq/tests/bots/experts/reverse_dns/test_expert.py b/intelmq/tests/bots/experts/reverse_dns/test_expert.py index 41811e12e..13ead2c02 100644 --- a/intelmq/tests/bots/experts/reverse_dns/test_expert.py +++ b/intelmq/tests/bots/experts/reverse_dns/test_expert.py @@ -42,16 +42,16 @@ class TestReverseDnsExpertBot(test.BotTestCase, unittest.TestCase): @classmethod def set_bot(self): self.bot_reference = ReverseDnsExpertBot - self.default_input_message = json.dumps({'__type': 'Report'}) + self.default_input_message = {'__type': 'Report'} def test_ipv4_lookup(self): - self.input_message = json.dumps(EXAMPLE_INPUT) + self.input_message = EXAMPLE_INPUT self.run_bot() self.assertMessageEqual(0, EXAMPLE_OUTPUT) @unittest.expectedFailure def test_ipv6_lookup(self): - self.input_message = json.dumps(EXAMPLE_INPUT6) + self.input_message = EXAMPLE_INPUT6 self.run_bot() self.assertMessageEqual(0, EXAMPLE_OUTPUT6) diff --git a/intelmq/tests/bots/experts/ripencc_abuse_contact/test_expert.py b/intelmq/tests/bots/experts/ripencc_abuse_contact/test_expert.py index 9298af926..e17086bbf 100644 --- a/intelmq/tests/bots/experts/ripencc_abuse_contact/test_expert.py +++ b/intelmq/tests/bots/experts/ripencc_abuse_contact/test_expert.py @@ -41,19 +41,19 @@ class TestRIPENCCExpertBot(test.BotTestCase, unittest.TestCase): @classmethod def set_bot(self): self.bot_reference = RIPENCCExpertBot - self.default_input_message = json.dumps({'__type': 'Report'}) + self.default_input_message = {'__type': 'Report'} self.sysconfig = {'query_ripe_db_asn': True, 'query_ripe_db_ip': True, 'query_ripe_stat': True, } def test_ipv4_lookup(self): - self.input_message = json.dumps(EXAMPLE_INPUT) + self.input_message = EXAMPLE_INPUT self.run_bot() self.assertMessageEqual(0, EXAMPLE_OUTPUT) def test_ipv6_lookup(self): - self.input_message = json.dumps(EXAMPLE_INPUT6) + self.input_message = EXAMPLE_INPUT6 self.run_bot() self.assertMessageEqual(0, EXAMPLE_OUTPUT6) diff --git a/intelmq/tests/bots/experts/taxonomy/test_expert.py b/intelmq/tests/bots/experts/taxonomy/test_expert.py index a814404e3..8fef93042 100644 --- a/intelmq/tests/bots/experts/taxonomy/test_expert.py +++ b/intelmq/tests/bots/experts/taxonomy/test_expert.py @@ -27,10 +27,10 @@ class TestTaxonomyExpertBot(test.BotTestCase, unittest.TestCase): @classmethod def set_bot(self): self.bot_reference = TaxonomyExpertBot - self.default_input_message = json.dumps({'__type': 'Report'}) + self.default_input_message = {'__type': 'Report'} def test_classification(self): - self.input_message = json.dumps(EXAMPLE_INPUT) + self.input_message = EXAMPLE_INPUT self.run_bot() self.assertMessageEqual(0, EXAMPLE_OUTPUT) diff --git a/intelmq/tests/bots/experts/tor_nodes/test_expert.py b/intelmq/tests/bots/experts/tor_nodes/test_expert.py index 49a929621..dfb4581c8 100644 --- a/intelmq/tests/bots/experts/tor_nodes/test_expert.py +++ b/intelmq/tests/bots/experts/tor_nodes/test_expert.py @@ -39,10 +39,10 @@ class TestTorExpertBot(test.BotTestCase, unittest.TestCase): def set_bot(self): self.bot_reference = TorExpertBot self.sysconfig = {'database': TOR_DB} - self.default_input_message = json.dumps({'__type': 'Report'}) + self.default_input_message = {'__type': 'Report'} def test_ipv4_lookup(self): - self.input_message = json.dumps(EXAMPLE_INPUT) + self.input_message = EXAMPLE_INPUT self.run_bot() self.assertMessageEqual(0, EXAMPLE_OUTPUT) From 408c5c9068312b89178b79c65098c841688a821f Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Wed, 9 Sep 2015 12:15:29 +0200 Subject: [PATCH 39/51] ENH: discussed changes to harm, partial As discussed in #185: additional_information renamed to extra extra has to be JSON removed os.name, os.version, user_agent Signed-off-by: Sebastian Wagner --- CHANGELOG.md | 4 +++- docs/Developers-Guide.md | 3 ++- docs/Harmonization-fields.md | 23 ++++++++----------- intelmq/bots/parsers/spamhaus/parser_cert.py | 2 +- intelmq/conf/harmonization.conf | 22 ++++-------------- .../bots/parsers/spamhaus/test_parser_cert.py | 4 ++-- 6 files changed, 22 insertions(+), 36 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d88005757..a9a656d51 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,7 +25,7 @@ v1.0 (in development, master branch) ### Harmonization - ENH: Additional data types: integer, float and Boolean - ENH: Added descriptions and matching types to all fields -- DOC: harmonization documentation has same fields as configuration +- DOC: harmonization documentation has same fields as configuration, docs are generated from configuration #### Most important changes: - `(source|destination).bgp_prefix` is now `(source|destination).network` @@ -34,6 +34,8 @@ v1.0 (in development, master branch) - `misp_id` changed to `misp_uuid` - `protocol.transport` added - `webshot_url` removed +- `additional_information` renamed to `extra`, must be JSON +- `os.name`, `os.version`, `user_agent` removed in favor of `extra` ----- diff --git a/docs/Developers-Guide.md b/docs/Developers-Guide.md index 85e6d11bc..54867fecb 100644 --- a/docs/Developers-Guide.md +++ b/docs/Developers-Guide.md @@ -263,7 +263,8 @@ class ExampleParserBot(Bot): event = Event(report) # copies feed.name, time.observation ... # implement the logic here - event.add('additional_information', 'Nothing here') + event.add('source.ip', '127.0.0.1') + event.add('extra', '{"os.name": "Linux"') self.send_message(event) self.acknowledge_message() diff --git a/docs/Harmonization-fields.md b/docs/Harmonization-fields.md index 50c3dbf2a..17fed0cba 100644 --- a/docs/Harmonization-fields.md +++ b/docs/Harmonization-fields.md @@ -1,19 +1,17 @@ - Harmonization field names ========================= |Section|Name|Type|Description| |:------|:---|:---|:----------| -||additional|String|All anecdotal information, which cannot be parsed into the data harmonization elements. TODO: Must be JSON encoded for machine readability.| +|Classification|classification.identifier|String|The lowercase identifier defines the actual software or service (e.g. 'heartbleed' or 'ntp_version') or standardized malware name (e.g. 'zeus').| |Classification|classification.taxonomy|String|We recognize the need for the CSIRT teams to apply a static (incident) taxonomy to abuse data. With this goal in mind the type IOC will serve as a basis for this activity. Each value of the dynamic type mapping translates to a an element in the static taxonomy. The European CSIRT teams for example have decided to apply the eCSIRT.net incident classification. The value of the taxonomy key is thus a derivative of the dynamic type above. For more information about check [ENISA taxonomies](http://www.enisa.europa.eu/activities/cert/support/incident-management/browsable/incident-handling-process/incident-taxonomy/existing-taxonomies).| |Classification|classification.type|ClassificationType|The abuse type IOC is one of the most crucial pieces of information for any given abuse event. The main idea of dynamic typing is to keep our ontology flexible, since we need to evolve with the evolving threatscape of abuse data. In contrast with the static taxonomy below, the dynamic typing is used to perform business decisions in the abuse handling pipeline. Furthermore, the value data set should be kept as minimal as possible to avoid “type explosion”, which in turn dilutes the business value of the dynamic typing. In general, we normally have two types of abuse type IOC: ones referring to a compromized resource or ones referring to pieces of the criminal infrastructure, such as a command and control servers for example.| ||comment|String|Free text commentary about the abuse event inserted by an analyst.| |Destination|destination.abuse_contact|String|Abuse contact for destination address. TODO: list| -|Destination|destination.account|String|TODO: Description| +|Destination|destination.account|String|An account name or email address, which has been identified to relate to the destination of an abuse event.| |Destination|destination.allocated|DateTime|Allocation date corresponding to bgp prefix.| |Destination|destination.as_name|String|The autonomous system name to which the connection headed.| |Destination|destination.asn|Integer|The autonomous system number from which originated the connection.| -|Destination|destination.email_address|String|An email address, which has been identified to relate to the destination of an abuse event.| |Destination|destination.fqdn|FQDN|A DNS name related to the host to which the connection headed.| |Destination Geolocation|destination.geolocation.cc|String|Country-Code accoriding to ISO3166 for the destination IP.| |Destination Geolocation|destination.geolocation.city|String|Some geolocation services refer to city-level geolocation.| @@ -35,26 +33,24 @@ Harmonization field names |Event_Description|event_description.text|String|A free-form textual description of an abuse event.| |Event_Description|event_description.url|URL|A description URL is a link to a further description of the the abuse event in question.| ||event_hash|String|Computed event hash with specific keys and values that identify a unique event. At present, the hash should default to using the SHA1 function. Please note that for an event hash to be able to match more than one event (deduplication) the receiver of an event should calculate it based on a minimal set of keys and values present in the event. Using for example the observation time in the calculation will most likely render the checksum useless for deduplication purposes.| +||extra|String|All anecdotal information, which cannot be parsed into the data harmonization elements. E.g. os.name, os.version, user_agent. TODOs: Must be JSON encoded for machine readability.| |Feed|feed.code|String|Code name for the feed, e.g. DFGS, HSDAG etc.| |Feed|feed.name|String|Name for the feed, usually found in collector bot configuration.| |Feed|feed.url|URL|The URL of a given abuse feed, where applicable| |Malware|malware.hash|String|A string depicting a checksum for a file, be it a malware sample for example.| |Malware|malware.name|MalwareName|A malware family name in lower case.| |Malware|malware.version|String|A version string for an identified artifact generation, e.g. a crime-ware kit.| -||misp_ip|Integer|MISP - Malware Information Sharing Platform & Threat Sharing id.| -|Os|os.name|String|Operating system name.| -|Os|os.version|String|Operating system version.| +||misp_uuid|Integer|MISP - Malware Information Sharing Platform & Threat Sharing UUID.| |Protocol|protocol.application|String|e.g. vnc, ssh, sip, irc, http or p2p.| |Protocol|protocol.transport|String|e.g. tcp, udp, icmp.| ||raw|Base64|The original line of the event from encoded in base64.| ||rtir_id|Integer|Request Tracker Incident Response incident id.| ||screenshot_url|URL|Some source may report URLs related to a an image generated of a resource without any metadata. Or an URL pointing to resource, which has been rendered into a webshot, e.g. a PNG image and the relevant metadata related to its retrieval/generation.| |Source|source.abuse_contact|String|Abuse contact for source address. TODO: list?| -|Source|source.account|String|TODO: Description| +|Source|source.account|String|An account name or email address, which has been identified to relate to the source of an abuse event.| |Source|source.allocated|DateTime|Allocation date corresponding to bgp prefix.| |Source|source.as_name|String|The autonomous system name from which the connection originated.| |Source|source.asn|Integer|The autonomous system number from which originated the connection.| -|Source|source.email_address|String|An email address, which has been identified to relate to the source of an abuse event.| |Source|source.fqdn|FQDN|A DNS name related to the host from which the connection originated.| |Source Geolocation|source.geolocation.cc|String|Country-Code accoriding to ISO3166 for the source IP.| |Source Geolocation|source.geolocation.city|String|Some geolocation services refer to city-level geolocation.| @@ -77,7 +73,6 @@ Harmonization field names ||status|String|Status of the malicious resource (phishing, dropzone, etc), e.g. online, offline.| |Time|time.observation|DateTime|The time a source bot saw the event. This timestamp becomes especially important should you perform your own attribution on a host DNS name for example. The mechanism to denote the attributed elements with reference to the source provided is detailed below in Reported Identity IOC.(ISO8660).| |Time|time.source|DateTime|Time reported by a source. Some sources only report a date, which may be used here if there is no better observation.| -||user_agent|String|Some feeds report the user agent string used by the host to access a malicious resource, such as a command and control server.| Harmonization types @@ -86,6 +81,10 @@ Harmonization types ### Base64 +Base64 type. Always gives unicode strings. + +Sanitation encodes to base64 and accepts binary and unicode strings. + ### Boolean @@ -143,7 +142,3 @@ Sanitation accepts strings and everything int() accepts. ### URL - - - - diff --git a/intelmq/bots/parsers/spamhaus/parser_cert.py b/intelmq/bots/parsers/spamhaus/parser_cert.py index 51fd26554..2c3c3ec58 100644 --- a/intelmq/bots/parsers/spamhaus/parser_cert.py +++ b/intelmq/bots/parsers/spamhaus/parser_cert.py @@ -70,7 +70,7 @@ def process(self): event.add('destination.ip', row_splitted[6], sanitize=True) event.add('destination.port', row_splitted[7], sanitize=True) if row_splitted[8] and row_splitted[8] != '-': - event.add('additional', + event.add('extra', json.dumps({'destination.local_port': int(row_splitted[8])}), sanitize=True) diff --git a/intelmq/conf/harmonization.conf b/intelmq/conf/harmonization.conf index eb35be1d6..742e23d59 100644 --- a/intelmq/conf/harmonization.conf +++ b/intelmq/conf/harmonization.conf @@ -1,9 +1,5 @@ { "event": { - "additional": { - "description": "All anecdotal information, which cannot be parsed into the data harmonization elements. TODO: Must be JSON encoded for machine readability.", - "type": "String" - }, "classification.identifier": { "description": "The lowercase identifier defines the actual software or service (e.g. 'heartbleed' or 'ntp_version') or standardized malware name (e.g. 'zeus').", "type": "String" @@ -134,6 +130,10 @@ "regex": "^[a-fA-F0-9]+$", "type": "String" }, + "extra": { + "description": "All anecdotal information, which cannot be parsed into the data harmonization elements. E.g. os.name, os.version, user_agent. TODOs: Must be JSON encoded for machine readability.", + "type": "String" + }, "feed.code": { "description": "Code name for the feed, e.g. DFGS, HSDAG etc.", "length": 100, @@ -154,7 +154,7 @@ }, "malware.name": { "description": "A malware family name in lower case.", - "regex": "[a-z ]+", + "regex": "[-a-z0-9_ ]+", "type": "MalwareName" }, "malware.version": { @@ -166,14 +166,6 @@ "description": "MISP - Malware Information Sharing Platform & Threat Sharing UUID.", "type": "Integer" }, - "os.name": { - "description": "Operating system name.", - "type": "String" - }, - "os.version": { - "description": "Operating system version.", - "type": "String" - }, "protocol.application": { "ascii": true, "description": "e.g. vnc, ssh, sip, irc, http or p2p.", @@ -318,10 +310,6 @@ "time.source": { "description": "Time reported by a source. Some sources only report a date, which may be used here if there is no better observation.", "type": "DateTime" - }, - "user_agent": { - "description": "Some feeds report the user agent string used by the host to access a malicious resource, such as a command and control server.", - "type": "String" } }, "report": { diff --git a/intelmq/tests/bots/parsers/spamhaus/test_parser_cert.py b/intelmq/tests/bots/parsers/spamhaus/test_parser_cert.py index 3c644900c..f058ecf67 100644 --- a/intelmq/tests/bots/parsers/spamhaus/test_parser_cert.py +++ b/intelmq/tests/bots/parsers/spamhaus/test_parser_cert.py @@ -50,7 +50,7 @@ 'destination.port': 80, 'destination.fqdn': 'dxxt.sinkhole.dk', 'destination.ip': '212.227.20.19', - 'additional': '{"destination.local_port": 1036}', + 'extra': '{"destination.local_port": 1036}', }, {'raw': 'MTA5LjkxLjAuMjI3LEFTNjgzMCxBVCwxNDQxMDExNjU3L' 'GNvbmZpY2tlciwyMTYuNjYuMTUuMTA5LDIxNi42Ni4xNS' @@ -61,7 +61,7 @@ 'malware.name': 'conficker', 'destination.port': 80, 'destination.ip': '216.66.15.109', - 'additional': '{"destination.local_port": 1430}', + 'extra': '{"destination.local_port": 1430}', }] From 813d6349fe144467dde4dfb167f556a5751e511f Mon Sep 17 00:00:00 2001 From: robcza Date: Wed, 9 Sep 2015 23:14:56 +0200 Subject: [PATCH 40/51] encode to utf-8 --- intelmq/bots/outputs/restapi/output.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intelmq/bots/outputs/restapi/output.py b/intelmq/bots/outputs/restapi/output.py index 4545c8c5a..3c5af4784 100644 --- a/intelmq/bots/outputs/restapi/output.py +++ b/intelmq/bots/outputs/restapi/output.py @@ -23,7 +23,7 @@ def process(self): return try: - r = self.session.post(self.parameters.host, event.to_json()) + r = self.session.post(self.parameters.host, event.to_json().encode('utf-8')) r.raise_for_status() except requests.exceptions.RequestException as e: self.logger.error("Request exception: " + str(e)) From 69ed51032e5e8104ad1567f34db7d5bd47380de4 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Thu, 10 Sep 2015 09:28:53 +0200 Subject: [PATCH 41/51] ENH: postgres table parameter corrected autocommit Signed-off-by: Sebastian Wagner --- intelmq/bots/BOTS | 3 ++- intelmq/bots/outputs/postgresql/output.py | 8 +++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/intelmq/bots/BOTS b/intelmq/bots/BOTS index 5c00eeb40..aa1e9d903 100644 --- a/intelmq/bots/BOTS +++ b/intelmq/bots/BOTS @@ -581,7 +581,8 @@ "database": "intelmq-events", "user": "intelmq", "password": "", - "sslmode": "require" + "sslmode": "require", + "table": "events" } }, "REST API": { diff --git a/intelmq/bots/outputs/postgresql/output.py b/intelmq/bots/outputs/postgresql/output.py index c842af3f7..cf415ac4d 100644 --- a/intelmq/bots/outputs/postgresql/output.py +++ b/intelmq/bots/outputs/postgresql/output.py @@ -30,7 +30,9 @@ def init(self): connect_timeout=connect_timeout, ) self.cur = self.con.cursor() - self.autocommit = getattr(self.parameters, 'autocommit', True) + self.con.autocommit = getattr(self.parameters, 'autocommit', True) + + self.table = self.parameters.table except: self.logger.exception('Failed to connect to database') self.stop() @@ -46,8 +48,8 @@ def process(self): keys = '", "'.join(event.keys()) values = event.values() fvalues = len(values) * '%s, ' - query = ('INSERT INTO events ("{keys}") VALUES ({values})' - ''.format(keys=keys, values=fvalues[:-2])) + query = ('INSERT INTO {table} ("{keys}") VALUES ({values})' + ''.format(table=self.table, keys=keys, values=fvalues[:-2])) self.logger.debug('Query: {!r} with values {!r}'.format(query, values)) try: From 5638ca2360dca48734abf2cff4c431b71c642268 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Thu, 10 Sep 2015 12:17:54 +0200 Subject: [PATCH 42/51] BUG: lib/test required field check corrected event is copied before time.observation is deleted feed.name is required, not feed.url (does not exist for mail collectors) Signed-off-by: Sebastian Wagner --- intelmq/lib/test.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/intelmq/lib/test.py b/intelmq/lib/test.py index 9f7b30440..f43c75e73 100644 --- a/intelmq/lib/test.py +++ b/intelmq/lib/test.py @@ -301,7 +301,7 @@ def test_event(self): event = message.MessageFactory.unserialize(event_json) self.assertIsInstance(event, message.Event) self.assertIn('classification.type', event) - self.assertIn('feed.url', event) + self.assertIn('feed.name', event) self.assertIn('raw', event) self.assertIn('time.observation', event) @@ -352,7 +352,8 @@ def assertMessageEqual(self, queue_pos, expected_message): self.assertIsInstance(event, six.text_type) event_dict = json.loads(event) + expected = expected_message.copy() del event_dict['time.observation'] - del expected_message['time.observation'] + del expected['time.observation'] - self.assertDictEqual(expected_message, event_dict) + self.assertDictEqual(expected, event_dict) From 07f1b353d02f1d20eef81fdd8437198acc03396a Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Thu, 10 Sep 2015 12:35:38 +0200 Subject: [PATCH 43/51] ENH: import sorts in tests, removed json Signed-off-by: Sebastian Wagner --- intelmq/tests/bots/experts/abusix/test_expert.py | 3 +-- intelmq/tests/bots/experts/asn_lookup/test_expert.py | 2 -- intelmq/tests/bots/experts/certat_contact/test_expert.py | 2 -- intelmq/tests/bots/experts/cymru_whois/test_expert.py | 2 +- intelmq/tests/bots/experts/reverse_dns/test_expert.py | 2 -- .../tests/bots/experts/ripencc_abuse_contact/test_expert.py | 1 - intelmq/tests/bots/experts/taxonomy/test_expert.py | 1 - intelmq/tests/bots/experts/tor_nodes/test_expert.py | 1 - intelmq/tests/bots/parsers/fraunhofer/test_parser_dga.py | 1 - intelmq/tests/bots/parsers/spamhaus/test_parser_cert.py | 1 - intelmq/tests/lib/test_bot.py | 2 +- intelmq/tests/lib/test_message.py | 5 +++-- intelmq/tests/lib/test_pipeline.py | 1 + 13 files changed, 7 insertions(+), 17 deletions(-) diff --git a/intelmq/tests/bots/experts/abusix/test_expert.py b/intelmq/tests/bots/experts/abusix/test_expert.py index 7f272e732..1a3f5be29 100644 --- a/intelmq/tests/bots/experts/abusix/test_expert.py +++ b/intelmq/tests/bots/experts/abusix/test_expert.py @@ -1,12 +1,11 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -import json + import unittest import intelmq.lib.test as test from intelmq.bots.experts.abusix.expert import AbusixExpertBot - EXAMPLE_INPUT = {"__type": "Event", "source.ip": "93.184.216.34", # example.com "destination.ip": "192.0.43.8", # iana.org diff --git a/intelmq/tests/bots/experts/asn_lookup/test_expert.py b/intelmq/tests/bots/experts/asn_lookup/test_expert.py index c94a1bbf2..c757bd451 100644 --- a/intelmq/tests/bots/experts/asn_lookup/test_expert.py +++ b/intelmq/tests/bots/experts/asn_lookup/test_expert.py @@ -7,14 +7,12 @@ """ from __future__ import unicode_literals -import json import os import unittest import intelmq.lib.test as test from intelmq.bots.experts.asn_lookup.expert import ASNLookupExpertBot - ASN_DB = '/opt/intelmq/var/lib/bots/asn_lookup/ipasn.dat' EXAMPLE_INPUT = {"__type": "Event", "source.ip": "93.184.216.34", # example.com diff --git a/intelmq/tests/bots/experts/certat_contact/test_expert.py b/intelmq/tests/bots/experts/certat_contact/test_expert.py index faf04e099..93d7f1b35 100644 --- a/intelmq/tests/bots/experts/certat_contact/test_expert.py +++ b/intelmq/tests/bots/experts/certat_contact/test_expert.py @@ -4,13 +4,11 @@ """ from __future__ import unicode_literals -import json import unittest import intelmq.lib.test as test from intelmq.bots.experts.certat_contact.expert import CERTatContactExpertBot - EXAMPLE_INPUT = {"__type": "Event", "source.ip": "93.184.216.34", # example.com "destination.ip": "83.136.38.146", # cert.at diff --git a/intelmq/tests/bots/experts/cymru_whois/test_expert.py b/intelmq/tests/bots/experts/cymru_whois/test_expert.py index 1c87ebbfc..dd99156a3 100644 --- a/intelmq/tests/bots/experts/cymru_whois/test_expert.py +++ b/intelmq/tests/bots/experts/cymru_whois/test_expert.py @@ -1,12 +1,12 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals + import json import unittest import intelmq.lib.test as test from intelmq.bots.experts.cymru_whois.expert import CymruExpertBot - EXAMPLE_INPUT = {"__type": "Event", "source.ip": "93.184.216.34", # example.com "time.observation": "2015-01-01T00:00:00+00:00", diff --git a/intelmq/tests/bots/experts/reverse_dns/test_expert.py b/intelmq/tests/bots/experts/reverse_dns/test_expert.py index 13ead2c02..72bbce9a0 100644 --- a/intelmq/tests/bots/experts/reverse_dns/test_expert.py +++ b/intelmq/tests/bots/experts/reverse_dns/test_expert.py @@ -1,13 +1,11 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -import json import unittest import intelmq.lib.test as test from intelmq.bots.experts.reverse_dns.expert import ReverseDnsExpertBot - EXAMPLE_INPUT = {"__type": "Event", "source.ip": "192.0.43.7", # icann.org "destination.ip": "192.0.43.8", # iana.org diff --git a/intelmq/tests/bots/experts/ripencc_abuse_contact/test_expert.py b/intelmq/tests/bots/experts/ripencc_abuse_contact/test_expert.py index e17086bbf..ccdd19dbe 100644 --- a/intelmq/tests/bots/experts/ripencc_abuse_contact/test_expert.py +++ b/intelmq/tests/bots/experts/ripencc_abuse_contact/test_expert.py @@ -7,7 +7,6 @@ import intelmq.lib.test as test from intelmq.bots.experts.ripencc_abuse_contact.expert import RIPENCCExpertBot - EXAMPLE_INPUT = {"__type": "Event", "source.ip": "93.184.216.34", # example.com "destination.ip": "193.238.157.5", # funkfeuer.at diff --git a/intelmq/tests/bots/experts/taxonomy/test_expert.py b/intelmq/tests/bots/experts/taxonomy/test_expert.py index 8fef93042..83e2174a6 100644 --- a/intelmq/tests/bots/experts/taxonomy/test_expert.py +++ b/intelmq/tests/bots/experts/taxonomy/test_expert.py @@ -7,7 +7,6 @@ import intelmq.lib.test as test from intelmq.bots.experts.taxonomy.expert import TaxonomyExpertBot - EXAMPLE_INPUT = {"__type": "Event", "classification.type": "defacement", "time.observation": "2015-01-01T00:00:00+00:00", diff --git a/intelmq/tests/bots/experts/tor_nodes/test_expert.py b/intelmq/tests/bots/experts/tor_nodes/test_expert.py index dfb4581c8..24dea2dde 100644 --- a/intelmq/tests/bots/experts/tor_nodes/test_expert.py +++ b/intelmq/tests/bots/experts/tor_nodes/test_expert.py @@ -13,7 +13,6 @@ import intelmq.lib.test as test from intelmq.bots.experts.tor_nodes.expert import TorExpertBot - TOR_DB = '/opt/intelmq/var/lib/bots/tor_nodes/tor_nodes.dat' EXAMPLE_INPUT = {"__type": "Event", "source.ip": "37.130.227.133", diff --git a/intelmq/tests/bots/parsers/fraunhofer/test_parser_dga.py b/intelmq/tests/bots/parsers/fraunhofer/test_parser_dga.py index d6fb880d0..362c2b32e 100644 --- a/intelmq/tests/bots/parsers/fraunhofer/test_parser_dga.py +++ b/intelmq/tests/bots/parsers/fraunhofer/test_parser_dga.py @@ -7,7 +7,6 @@ import intelmq.lib.test as test from intelmq.bots.parsers.fraunhofer.parser_dga import FraunhoferDGAParserBot - EXAMPLE_REPORT = {"feed.url": "https://dgarchive.caad.fkie.fraunhofer.de/today", "raw": "ewogICJiYW5qb3JpX2RnYV9hbmRlcnNlbnNpbmFpeC5jb21fMHgz" "YzAzIjogWwogICAgImFuZGVyc2Vuc2luYWl4LmNvbSIsCiAgICAi" diff --git a/intelmq/tests/bots/parsers/spamhaus/test_parser_cert.py b/intelmq/tests/bots/parsers/spamhaus/test_parser_cert.py index f058ecf67..b2436ed24 100644 --- a/intelmq/tests/bots/parsers/spamhaus/test_parser_cert.py +++ b/intelmq/tests/bots/parsers/spamhaus/test_parser_cert.py @@ -7,7 +7,6 @@ import intelmq.lib.test as test from intelmq.bots.parsers.spamhaus.parser_cert import SpamhausCERTParserBot - EXAMPLE_REPORT = {"feed.url": "https://portal.spamhaus.org/cert/api.php?cert=" "&key=", "raw": "OyBpcCwgYXNuLCBjb3VudHJ5LCBsYXN0c2VlbiwgYm90bmFtZSwg" diff --git a/intelmq/tests/lib/test_bot.py b/intelmq/tests/lib/test_bot.py index 4fc5f1c36..fe75f1d14 100644 --- a/intelmq/tests/lib/test_bot.py +++ b/intelmq/tests/lib/test_bot.py @@ -8,12 +8,12 @@ import json import logging import os -import pkg_resources import unittest import intelmq.lib.pipeline as pipeline import intelmq.lib.utils as utils import mock +import pkg_resources from intelmq import PIPELINE_CONF_FILE, RUNTIME_CONF_FILE, SYSTEM_CONF_FILE from intelmq.lib.test import mocked_logger diff --git a/intelmq/tests/lib/test_message.py b/intelmq/tests/lib/test_message.py index 4c5a7d516..ac33e3c07 100644 --- a/intelmq/tests/lib/test_message.py +++ b/intelmq/tests/lib/test_message.py @@ -9,12 +9,13 @@ from __future__ import unicode_literals import json -import pkg_resources -import six import unittest import intelmq.lib.exceptions as exceptions import intelmq.lib.utils as utils +import pkg_resources +import six + conf_filename = pkg_resources.resource_filename('intelmq', 'conf/harmonization.conf') harm_config = utils.load_configuration(conf_filename) diff --git a/intelmq/tests/lib/test_pipeline.py b/intelmq/tests/lib/test_pipeline.py index a91da4dd0..b09d654e2 100644 --- a/intelmq/tests/lib/test_pipeline.py +++ b/intelmq/tests/lib/test_pipeline.py @@ -12,6 +12,7 @@ TODO: check internal representation of data in redis (like with Pythonlist) """ from __future__ import unicode_literals + import unittest import intelmq.lib.pipeline as pipeline From c2fdbf384785904c4fd87a8e28edc0d34c3482d3 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Thu, 10 Sep 2015 13:04:16 +0200 Subject: [PATCH 44/51] ENH: Move utils to bin/ Signed-off-by: Sebastian Wagner --- .../docs/gen_harm_docs.py => bin/intelmq_gen_harm_docs.py} | 0 .../psql_initdb_generator.py => bin/intelmq_psql_initdb.py} | 2 +- intelmq/lib/docs/__init__.py | 0 setup.py | 4 +++- 4 files changed, 4 insertions(+), 2 deletions(-) rename intelmq/{lib/docs/gen_harm_docs.py => bin/intelmq_gen_harm_docs.py} (100%) rename intelmq/{bots/outputs/postgresql/psql_initdb_generator.py => bin/intelmq_psql_initdb.py} (98%) delete mode 100644 intelmq/lib/docs/__init__.py diff --git a/intelmq/lib/docs/gen_harm_docs.py b/intelmq/bin/intelmq_gen_harm_docs.py similarity index 100% rename from intelmq/lib/docs/gen_harm_docs.py rename to intelmq/bin/intelmq_gen_harm_docs.py diff --git a/intelmq/bots/outputs/postgresql/psql_initdb_generator.py b/intelmq/bin/intelmq_psql_initdb.py similarity index 98% rename from intelmq/bots/outputs/postgresql/psql_initdb_generator.py rename to intelmq/bin/intelmq_psql_initdb.py index 103bca8fc..900d3343a 100755 --- a/intelmq/bots/outputs/postgresql/psql_initdb_generator.py +++ b/intelmq/bin/intelmq_psql_initdb.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python # -*- coding: utf-8 -*- """ Generates a SQL command file with commands to create the events table. diff --git a/intelmq/lib/docs/__init__.py b/intelmq/lib/docs/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/setup.py b/setup.py index 0b204f57d..0086210e5 100644 --- a/setup.py +++ b/setup.py @@ -59,7 +59,9 @@ ], ), ('/opt/intelmq/bin/', [ - 'intelmq/bin/intelmqctl' + 'intelmq/bin/intelmqctl', + 'intelmq/bin/intelmq_gen_harm_docs.py', + 'intelmq/bin/intelmq_psql_initdb.py', ], ), ], From e0c7dd3d9c2e9279ca914bec11b980494c9c642d Mon Sep 17 00:00:00 2001 From: robcza Date: Thu, 10 Sep 2015 14:58:12 +0200 Subject: [PATCH 45/51] detailed logging of exceptions --- intelmq/bots/outputs/restapi/output.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/intelmq/bots/outputs/restapi/output.py b/intelmq/bots/outputs/restapi/output.py index 3c5af4784..e80a01a0f 100644 --- a/intelmq/bots/outputs/restapi/output.py +++ b/intelmq/bots/outputs/restapi/output.py @@ -26,7 +26,9 @@ def process(self): r = self.session.post(self.parameters.host, event.to_json().encode('utf-8')) r.raise_for_status() except requests.exceptions.RequestException as e: - self.logger.error("Request exception: " + str(e)) + self.logger.error( + 'Event: {0}\nResponse code: {1}\nHeaders: {2}\nResponse body: {3}'.format( + event.to_json(), r, r.headers, r.text)) self.acknowledge_message() From 61bb5ee22b63246507a458df084567556589abba Mon Sep 17 00:00:00 2001 From: Thanat0s Date: Sat, 30 May 2015 20:41:23 +0200 Subject: [PATCH 46/51] rfc1918 plugin initial Conflicts: intelmq/bots/utils.py --- intelmq/bots/experts/rfc1918/__init__.py | 1 + intelmq/bots/experts/rfc1918/rfc1918.py | 75 ++++++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 intelmq/bots/experts/rfc1918/__init__.py create mode 100644 intelmq/bots/experts/rfc1918/rfc1918.py diff --git a/intelmq/bots/experts/rfc1918/__init__.py b/intelmq/bots/experts/rfc1918/__init__.py new file mode 100644 index 000000000..8d1c8b69c --- /dev/null +++ b/intelmq/bots/experts/rfc1918/__init__.py @@ -0,0 +1 @@ + diff --git a/intelmq/bots/experts/rfc1918/rfc1918.py b/intelmq/bots/experts/rfc1918/rfc1918.py new file mode 100644 index 000000000..fd8a2976e --- /dev/null +++ b/intelmq/bots/experts/rfc1918/rfc1918.py @@ -0,0 +1,75 @@ +from intelmq.lib.bot import Bot, sys +from intelmq.lib.message import Event +from intelmq.bots import utils + +# Source link : http://en.wikipedia.org/wiki/IPv4 +# http://www.ietf.org/rfc/rfc3849.txt +# http://www.ietf.org/rfc/rfc4291.txt + +ipranges = ("0.0.0.0/8", "10.0.0.0/8", "100.64.0.0/10", "127.0.0.0/8", + "169.254.0.0/16", "172.16.0.0/12", "192.0.0.0/24", "192.0.2.0/24", + "192.88.99.0/24", "192.168.0.0/16", "198.18.0.0/15", "198.51.100.0/24", + "203.0.113.0/24", "224.0.0.0/4", "240.0.0.0/4", "255.255.255.255/32", + "fe80::/64", "2001:0db8::/32") + + +class RFC1918Bot(Bot): +# +# RFC 1918 Will Drop Local IP from a given record and a bit more. +# It Check for RFC1918 IPv4 Host +# It Check for Localhosts, multicast, test lans +# It Check for Link Local and Documentation Lan in IPv6 +# +# Need only to feed the parameter "fields" to set the name of the field +# parameter designed to be filtered out. +# +# Several parameters could be used, separated by "," +# +# It could sanitize the whole records with the "drop" parameter set to "yes" +# + + def process(self): + report = self.receive_message() + + banned = [] + + # Read the config to see if we should drop or clean + if self.parameters.drop.upper() == "YES": + drop = True + dtext = "drop" + else: + drop = False + dtext = "apply cleanup" + + fields = self.parameters.fields + self.logger.debug("Will %s on parameter %s" % (dtext, fields)) + + if report: + for field in self.parameters.fields.split(","): + field = field.strip() # If not cleanly inputed in parameter + value = report.value(field) + if value: + found = False + for iprange in ipranges: # for All ranges do the test + if utils.is_in_net(value, iprange): + self.logger.debug("Found %s in %s in record %s" + % (value, iprange, field)) + found = True + if not drop: # if drop is not required we will + # drop only the field + self.logger.debug("Value removed from %s" + % (field)) + report.discard(field, value) + break # We found it exit loop + else: + self.logger.warning("Field %s is non existant" % (field)) + if found: # If the IP was found + if not drop: # and if we don't want the record at all + self.send_message(report) # If we have sanitized, save msg + else: # If the IP has not been found + self.send_message(report) + self.acknowledge_message() + +if __name__ == "__main__": + bot = RFC1918Bot(sys.argv[1]) + bot.start() From 1b234c312059688b40aaa7b90ebd442636093673 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Thu, 10 Sep 2015 15:14:50 +0200 Subject: [PATCH 47/51] ENH+TST: Integrate RFC1918, add tests and config closes #192 Signed-off-by: Sebastian Wagner --- intelmq/bots/BOTS | 8 ++ intelmq/bots/experts/rfc1918/expert.py | 74 ++++++++++++++++++ intelmq/bots/experts/rfc1918/rfc1918.py | 75 ------------------- intelmq/lib/test.py | 6 ++ .../tests/bots/experts/rfc1918/test_expert.py | 52 +++++++++++++ 5 files changed, 140 insertions(+), 75 deletions(-) create mode 100644 intelmq/bots/experts/rfc1918/expert.py delete mode 100644 intelmq/bots/experts/rfc1918/rfc1918.py create mode 100644 intelmq/tests/bots/experts/rfc1918/test_expert.py diff --git a/intelmq/bots/BOTS b/intelmq/bots/BOTS index aa1e9d903..a68a95c18 100644 --- a/intelmq/bots/BOTS +++ b/intelmq/bots/BOTS @@ -517,6 +517,14 @@ "database": "/opt/intelmq/var/lib/bots/maxmind_geoip/GeoLite2-City.mmdb" } }, + "RFC 1918": { + "description": "RFC 1918 removes fields or discards events if an ip is invalid (local, reserved, documentation).", + "module": "intelmq.bots.experts.rfc1918.expert", + "parameters": { + "fields": "destination.ip,source.ip", + "policy": "del,drop" + } + }, "RIPENCC": { "description": "RIPENCC is the bot resposible to get the correspondent abuse contact from source IP and destination IP of the events. RIPEstat documentation: https://stat.ripe.net/docs/data_api ", "module": "intelmq.bots.experts.ripencc_abuse_contact.expert", diff --git a/intelmq/bots/experts/rfc1918/expert.py b/intelmq/bots/experts/rfc1918/expert.py new file mode 100644 index 000000000..2d055b7c8 --- /dev/null +++ b/intelmq/bots/experts/rfc1918/expert.py @@ -0,0 +1,74 @@ +# *- coding: utf-8 -*- +""" +RFC 1918 Will Drop Local IP from a given record and a bit more. + It Check for RFC1918 IPv4 Host + It Check for Localhosts, multicast, test lans + It Check for Link Local and Documentation Lan in IPv6 + +Need only to feed the parameter "fields" to set the name of the field +parameter designed to be filtered out. +Several parameters could be used, separated by "," +It could sanitize the whole records with the "drop" parameter set to "yes" + +Sources: +https://tools.ietf.org/html/rfc1918 +https://tools.ietf.org/html/rfc3849 +https://tools.ietf.org/html/rfc4291 +https://tools.ietf.org/html/rfc5737 +https://en.wikipedia.org/wiki/IPv4 + +TODO: Extend for example domains +""" +from __future__ import unicode_literals + +import ipaddress +import sys + +from intelmq.lib.bot import Bot + +NETWORKS = ("10.0.0.0/8", "100.64.0.0/10", "127.0.0.0/8", + "169.254.0.0/16", "172.16.0.0/12", "192.0.0.0/24", "192.0.2.0/24", + "192.88.99.0/24", "192.168.0.0/16", "198.18.0.0/15", + "198.51.100.0/24", "203.0.113.0/24", "224.0.0.0/4", "240.0.0.0/4", + "255.255.255.255/32", "fe80::/64", "2001:0db8::/32") + + +def is_in_net(ip, iprange): + if ipaddress.ip_address(ip) in ipaddress.ip_network(iprange): + return True + else: + return False + + +class RFC1918ExpertBot(Bot): + + def init(self): + self.fields = self.parameters.fields.lower().strip().split(",") + self.policy = self.parameters.policy.lower().strip().split(",") + + def process(self): + event = self.receive_message() + + if event is None: + self.acknowledge_message() + return + + for field, policy in zip(self.fields, self.policy): + if field not in event: + continue + value = event.value(field) + for iprange in NETWORKS: + if is_in_net(value, iprange): + if policy == 'del': + self.logger.debug("Value removed from %s." % (field)) + del event[field] + elif policy == 'drop': + self.acknowledge_message() + return + break + self.send_message(event) + self.acknowledge_message() + +if __name__ == "__main__": + bot = RFC1918ExpertBot(sys.argv[1]) + bot.start() diff --git a/intelmq/bots/experts/rfc1918/rfc1918.py b/intelmq/bots/experts/rfc1918/rfc1918.py deleted file mode 100644 index fd8a2976e..000000000 --- a/intelmq/bots/experts/rfc1918/rfc1918.py +++ /dev/null @@ -1,75 +0,0 @@ -from intelmq.lib.bot import Bot, sys -from intelmq.lib.message import Event -from intelmq.bots import utils - -# Source link : http://en.wikipedia.org/wiki/IPv4 -# http://www.ietf.org/rfc/rfc3849.txt -# http://www.ietf.org/rfc/rfc4291.txt - -ipranges = ("0.0.0.0/8", "10.0.0.0/8", "100.64.0.0/10", "127.0.0.0/8", - "169.254.0.0/16", "172.16.0.0/12", "192.0.0.0/24", "192.0.2.0/24", - "192.88.99.0/24", "192.168.0.0/16", "198.18.0.0/15", "198.51.100.0/24", - "203.0.113.0/24", "224.0.0.0/4", "240.0.0.0/4", "255.255.255.255/32", - "fe80::/64", "2001:0db8::/32") - - -class RFC1918Bot(Bot): -# -# RFC 1918 Will Drop Local IP from a given record and a bit more. -# It Check for RFC1918 IPv4 Host -# It Check for Localhosts, multicast, test lans -# It Check for Link Local and Documentation Lan in IPv6 -# -# Need only to feed the parameter "fields" to set the name of the field -# parameter designed to be filtered out. -# -# Several parameters could be used, separated by "," -# -# It could sanitize the whole records with the "drop" parameter set to "yes" -# - - def process(self): - report = self.receive_message() - - banned = [] - - # Read the config to see if we should drop or clean - if self.parameters.drop.upper() == "YES": - drop = True - dtext = "drop" - else: - drop = False - dtext = "apply cleanup" - - fields = self.parameters.fields - self.logger.debug("Will %s on parameter %s" % (dtext, fields)) - - if report: - for field in self.parameters.fields.split(","): - field = field.strip() # If not cleanly inputed in parameter - value = report.value(field) - if value: - found = False - for iprange in ipranges: # for All ranges do the test - if utils.is_in_net(value, iprange): - self.logger.debug("Found %s in %s in record %s" - % (value, iprange, field)) - found = True - if not drop: # if drop is not required we will - # drop only the field - self.logger.debug("Value removed from %s" - % (field)) - report.discard(field, value) - break # We found it exit loop - else: - self.logger.warning("Field %s is non existant" % (field)) - if found: # If the IP was found - if not drop: # and if we don't want the record at all - self.send_message(report) # If we have sanitized, save msg - else: # If the IP has not been found - self.send_message(report) - self.acknowledge_message() - -if __name__ == "__main__": - bot = RFC1918Bot(sys.argv[1]) - bot.start() diff --git a/intelmq/lib/test.py b/intelmq/lib/test.py index f43c75e73..2ef98d538 100644 --- a/intelmq/lib/test.py +++ b/intelmq/lib/test.py @@ -341,6 +341,12 @@ def assertNotRegexpMatchesLog(self, pattern): except AttributeError: self.assertNotRegex(self.loglines_buffer, pattern) + def assertOutputQueueLen(self, queue_len=0): + """ + Asserts that the output queue has the expected length. + """ + self.assertEqual(len(self.get_output_queue()), queue_len) + def assertMessageEqual(self, queue_pos, expected_message): """ Asserts that the given expected_message is diff --git a/intelmq/tests/bots/experts/rfc1918/test_expert.py b/intelmq/tests/bots/experts/rfc1918/test_expert.py new file mode 100644 index 000000000..85d0bd277 --- /dev/null +++ b/intelmq/tests/bots/experts/rfc1918/test_expert.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- +""" +Testing rfc 1918 expert bot. +""" +from __future__ import unicode_literals + +import unittest + +import intelmq.lib.test as test +from intelmq.bots.experts.rfc1918.expert import RFC1918ExpertBot + +INPUT1 = {"__type": "Event", + "source.ip": "93.184.216.34", # example.com + "destination.ip": "192.0.2.9", # TEST-NET-1 + "time.observation": "2015-01-01T00:00:00+00:00", + } +OUTPUT1 = {"__type": "Event", + "source.ip": "93.184.216.34", # example.com + "time.observation": "2015-01-01T00:00:00+00:00", + } +INPUT2 = {"__type": "Event", + "source.ip": "192.168.0.1", # + "time.observation": "2015-01-01T00:00:00+00:00", + } + + +class TestRFC1918ExpertBot(test.BotTestCase, unittest.TestCase): + """ + A TestCase for RFC1918ExpertBot. + """ + + @classmethod + def set_bot(self): + self.bot_reference = RFC1918ExpertBot + self.sysconfig = {'fields': 'destination.ip,source.ip', + 'policy': 'del,drop', + } + self.default_input_message = {'__type': 'Event'} + + def test_del(self): + self.input_message = INPUT1 + self.run_bot() + self.assertMessageEqual(0, OUTPUT1) + + def test_drop(self): + self.input_message = INPUT2 + self.run_bot() + self.assertOutputQueueLen(0) + + +if __name__ == '__main__': + unittest.main() From 2b335b07df9ee14560c2b3b5e4a4703776fcd57b Mon Sep 17 00:00:00 2001 From: robcza Date: Fri, 11 Sep 2015 11:02:40 +0200 Subject: [PATCH 48/51] requests REQUIREMENTS because of restapi output bot --- REQUIREMENTS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/REQUIREMENTS b/REQUIREMENTS index 6aba4ac32..5fa5063d4 100644 --- a/REQUIREMENTS +++ b/REQUIREMENTS @@ -15,3 +15,5 @@ redis>=2.10.3 six>=1.7 unicodecsv>=0.9.4 xmpppy>=0.5.0rc1 +requests>=2.4.2 + From ba40ab0a17d75d375bb08e1da53de27b98e0f087 Mon Sep 17 00:00:00 2001 From: robcza Date: Fri, 11 Sep 2015 11:08:17 +0200 Subject: [PATCH 49/51] sorted --- REQUIREMENTS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/REQUIREMENTS b/REQUIREMENTS index 5fa5063d4..b126d9f74 100644 --- a/REQUIREMENTS +++ b/REQUIREMENTS @@ -12,8 +12,8 @@ python-dateutil>=1.5 pytz>=2012d pyzmq>=14.6.0 redis>=2.10.3 +requests>=2.4.2 six>=1.7 unicodecsv>=0.9.4 xmpppy>=0.5.0rc1 -requests>=2.4.2 From 9dc8b51900c79cf1f7975dc05c51979dd8ca46c5 Mon Sep 17 00:00:00 2001 From: robcza Date: Fri, 11 Sep 2015 11:16:58 +0200 Subject: [PATCH 50/51] requests>=2.4.2 to the REQUIREMENTS3 as well --- REQUIREMENTS3 | 1 + 1 file changed, 1 insertion(+) diff --git a/REQUIREMENTS3 b/REQUIREMENTS3 index 358be9f08..72d6fde40 100644 --- a/REQUIREMENTS3 +++ b/REQUIREMENTS3 @@ -11,6 +11,7 @@ python-dateutil>=1.5 pytz>=2012d pyzmq>=14.6.0 redis>=2.10.3 +requests>=2.4.2 six>=1.7 unicodecsv>=0.9.4 xmpppy>=0.5.0rc1 From 178c4c0b8d6d4648bb4fa35fb8b644349b9bc930 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Fri, 11 Sep 2015 12:48:05 +0200 Subject: [PATCH 51/51] BUG: move modify config to var Signed-off-by: Sebastian Wagner --- intelmq/__init__.py | 2 -- intelmq/bots/BOTS | 7 +++++++ intelmq/bots/experts/modify/expert.py | 5 ++--- intelmq/{conf => bots/experts/modify}/modify.conf | 0 intelmq/tests/bots/experts/modify/test_expert.py | 7 ++++--- setup.py | 8 ++++++-- setup_travis.py | 2 +- 7 files changed, 20 insertions(+), 11 deletions(-) rename intelmq/{conf => bots/experts/modify}/modify.conf (100%) diff --git a/intelmq/__init__.py b/intelmq/__init__.py index 6ca5c4a0c..40bfa7822 100644 --- a/intelmq/__init__.py +++ b/intelmq/__init__.py @@ -2,10 +2,8 @@ DEFAULT_LOGGING_PATH = "/opt/intelmq/var/log/" DEFAULTS_CONF_FILE = "/opt/intelmq/etc/defaults.conf" HARMONIZATION_CONF_FILE = "/opt/intelmq/etc/harmonization.conf" -MODIFY_CONF_FILE = "/opt/intelmq/etc/modify.conf" PIPELINE_CONF_FILE = "/opt/intelmq/etc/pipeline.conf" RUNTIME_CONF_FILE = "/opt/intelmq/etc/runtime.conf" STARTUP_CONF_FILE = "/opt/intelmq/etc/startup.conf" SYSTEM_CONF_FILE = "/opt/intelmq/etc/system.conf" VAR_RUN_PATH = "/opt/intelmq/var/run/" - diff --git a/intelmq/bots/BOTS b/intelmq/bots/BOTS index a68a95c18..e6d53cb52 100644 --- a/intelmq/bots/BOTS +++ b/intelmq/bots/BOTS @@ -517,6 +517,13 @@ "database": "/opt/intelmq/var/lib/bots/maxmind_geoip/GeoLite2-City.mmdb" } }, + "Modify": { + "description": "Modify bot can make nearly arbitrary changes to event's fields based on regex-rules on different values. See docs/Bots.md for some examples.", + "module": "intelmq.bots.experts.modify.expert", + "parameters": { + "configuration_path": "/opt/intelmq/var/lib/bots/modify/modify.conf" + } + }, "RFC 1918": { "description": "RFC 1918 removes fields or discards events if an ip is invalid (local, reserved, documentation).", "module": "intelmq.bots.experts.rfc1918.expert", diff --git a/intelmq/bots/experts/modify/expert.py b/intelmq/bots/experts/modify/expert.py index dd9cc1f6f..317daae64 100644 --- a/intelmq/bots/experts/modify/expert.py +++ b/intelmq/bots/experts/modify/expert.py @@ -7,9 +7,8 @@ from __future__ import unicode_literals import re -from intelmq import MODIFY_CONF_FILE from intelmq.lib.bot import Bot -import intelmq.lib.utils +from intelmq.lib.utils import load_configuration def matches(event, *rules): @@ -34,7 +33,7 @@ def apply_action(event, action): class ModifyExpertBot(Bot): def init(self): - self.config = intelmq.lib.utils.load_configuration(MODIFY_CONF_FILE) + self.config = load_configuration(self.parameters.configuration_path) def process(self): event = self.receive_message() diff --git a/intelmq/conf/modify.conf b/intelmq/bots/experts/modify/modify.conf similarity index 100% rename from intelmq/conf/modify.conf rename to intelmq/bots/experts/modify/modify.conf diff --git a/intelmq/tests/bots/experts/modify/test_expert.py b/intelmq/tests/bots/experts/modify/test_expert.py index 4d8bbc37e..2db740dcf 100644 --- a/intelmq/tests/bots/experts/modify/test_expert.py +++ b/intelmq/tests/bots/experts/modify/test_expert.py @@ -5,6 +5,7 @@ from __future__ import unicode_literals import unittest +from pkg_resources import resource_filename import intelmq.lib.test as test from intelmq.bots.experts.modify.expert import ModifyExpertBot @@ -39,9 +40,9 @@ class TestModifyExpertBot(test.BotTestCase, unittest.TestCase): @classmethod def set_bot(self): self.bot_reference = ModifyExpertBot - self.sysconfig = {'filter': False, - 'overwrite_cc': False, - 'verify_cert': False, + config_path = resource_filename('intelmq', + 'bots/experts/modify/modify.conf') + self.sysconfig = {'configuration_path': config_path } def test_events(self): diff --git a/setup.py b/setup.py index 0086210e5..4655ce422 100644 --- a/setup.py +++ b/setup.py @@ -25,6 +25,7 @@ '/opt/intelmq/var/lib', '/opt/intelmq/var/lib/bots', '/opt/intelmq/var/lib/bots/file-output', + '/opt/intelmq/var/lib/bots/modify', '/opt/intelmq/var/log', '/opt/intelmq/var/run', ] @@ -40,7 +41,7 @@ maintainer='Tomas Lima', maintainer_email='synchroack@gmail.com', packages=find_packages(), - package_data={'intelmq': ['conf/*.conf']}, + package_data={'intelmq': ['conf/*.conf', 'bots/experts/modify/*.conf']}, url='http://pypi.python.org/pypi/intelmq/', license='AGPLv3', description="IntelMQ Tool", @@ -51,7 +52,6 @@ 'intelmq/bots/BOTS', 'intelmq/conf/defaults.conf', 'intelmq/conf/harmonization.conf', - 'intelmq/conf/modify.conf', 'intelmq/conf/pipeline.conf', 'intelmq/conf/runtime.conf', 'intelmq/conf/startup.conf', @@ -64,5 +64,9 @@ 'intelmq/bin/intelmq_psql_initdb.py', ], ), + ('/opt/intelmq/var/lib/bots/modify/', [ + 'intelmq/bots/experts/modify/modify.conf', + ], + ), ], ) diff --git a/setup_travis.py b/setup_travis.py index 8742be7a6..40f4433fa 100644 --- a/setup_travis.py +++ b/setup_travis.py @@ -9,7 +9,7 @@ maintainer='Tomas Lima', maintainer_email='synchroack@gmail.com', packages=find_packages(), - package_data={'intelmq': ['conf/*.conf']}, + package_data={'intelmq': ['conf/*.conf', 'bots/experts/modify/*.conf']}, url='http://pypi.python.org/pypi/intelmq/', license='AGPLv3', description="IntelMQ Tool",