diff --git a/CHANGELOG.md b/CHANGELOG.md index fadcb4fcb..c325ac421 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -67,6 +67,7 @@ CHANGELOG - `bots.collectors.microsoft.collector_interlow`: added for MS interflow API - Automatic ungzipping for .gz files. - added `intelmq.bots.collectors.calidog.collector_certstream` for collecting certstream data (#1120). +- added `intelmq.bots.collectors.shodan.collector_stream` for collecting shodan stream data (#1096). #### Parsers - changed feednames in `bots.parsers.shadowserver`. Please refer to it's README for the exact changes. @@ -94,6 +95,7 @@ CHANGELOG - added `intelmq.bots.parsers.openphish.parser_commercial` - added `intelmq.bots.parsers.microsoft.parser_bingmurls` - added `intelmq.bots.parsers.calidog.parser_certstream` for parsing certstream data (#1120). +- added `intelmq.bots.parsers.shodan.parser` for parsing shodan data (#1096). #### Experts - Added sieve expert for filtering and modifying events (#1083) diff --git a/intelmq/bots/BOTS b/intelmq/bots/BOTS index 073506904..0cab3b268 100644 --- a/intelmq/bots/BOTS +++ b/intelmq/bots/BOTS @@ -201,6 +201,16 @@ "redis_cache_ttl": 604800 } }, + "Shodan Stream": { + "description": "Collects the shodan stream from the shodan API.", + "module": "intelmq.bots.collectors.shodan.collector_stream", + "parameters": { + "api_key": "", + "countries": [], + "feed": "shodan-stream", + "provider": "Shodan" + } + }, "Stomp": { "description": "Stomp Collector - e.g. for CERT.pl's N6 feed via STOMP. Note that rate_limit does not apply for this bot as it is waiting for messages on a stream.", "module": "intelmq.bots.collectors.stomp.collector", @@ -460,6 +470,11 @@ "overwrite": true } }, + "Shodan": { + "description": "Parses Shodan data collected via the API.", + "module": "intelmq.bots.parsers.shodan.parser", + "parameters": {} + }, "Spamhaus CERT": { "description": "Spamhaus CERT Parser is the bot responsible to parse the report and sanitize the information.", "module": "intelmq.bots.parsers.spamhaus.parser_cert", diff --git a/intelmq/bots/collectors/shodan/collector_stream.py b/intelmq/bots/collectors/shodan/collector_stream.py new file mode 100644 index 000000000..4b8401291 --- /dev/null +++ b/intelmq/bots/collectors/shodan/collector_stream.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- +""" +Parameter: +* api_key: The API key +""" +import shodan + +from intelmq.lib.bot import CollectorBot + +URL_LIST = 'https://interflow.azure-api.net/file/api/file/listsharedfiles' +URL_DOWNLOAD = 'https://interflow.azure-api.net/file/api/file/download?fileName=%s' + + +class ShodanStreamCollectorBot(CollectorBot): + def init(self): + self.set_request_parameters() + self.api = shodan.Shodan(self.parameters.api_key) + + def process(self): + for line in self.api.stream.countries(timeout=self.http_timeout_sec, raw=True, countries=self.parameters.countries): + report = self.new_report() + report.add('raw', line) +# proxies=self.proxy, +# verify=self.http_verify_cert, + self.send_message(report) + + +BOT = ShodanStreamCollectorBot diff --git a/intelmq/bots/parsers/shodan/parser.py b/intelmq/bots/parsers/shodan/parser.py new file mode 100644 index 000000000..e85966c92 --- /dev/null +++ b/intelmq/bots/parsers/shodan/parser.py @@ -0,0 +1,136 @@ +""" +Shodan Stream Parser + +Copyright (C) 2018 by nic.at GmbH +""" +import json + +from intelmq.lib.bot import Bot +from intelmq.lib.utils import base64_decode + + +MAPPING = { + 'hash': 'extra.shodan.event_hash', + 'ip': '__IGNORE__', # using ip_str + 'hostnames': 'source.reverse_dns', # TODO: multiple hostname + 'org': 'event_description.target', + 'data': 'extra.data', + 'port': 'source.port', + 'transport': 'protocol.transport', + 'isp': 'extra.isp', + "ftp": { + "features": { + "MLST": { + "parameters": 'extra.ftp.features.mlst', + }, + "UTF8": { + "parameters": 'extra.ftp.utf8.parameters', + }, + "REST": { + "parameters": 'extra.ftp.rest.parameters', + }, + "CLNT": { + "parameters": 'extra.ftp.clnt.parameters', + }, + "MLSD": { + "parameters": 'extra.ftp.mlsd.parameters', + }, + "MFMT": { + "parameters": 'extra.ftp.mfmt.parameters', + }, + "MDTM": { + "parameters": 'extra.ftp.mdtm.parameters', + }, + "SIZE": { + "parameters": 'extra.ftp.size.parameters', + } + }, + "anonymous": 'extra.ftp.anonymous', + "features_hash": '__IGNORE__', + }, + 'http': { + 'robots_hash': '__IGNORE__', + # 'redirects': unknown, + # 'securitytxt': unknown, + 'title': 'extra.http.html.title', + 'sitemap_hash': '__IGNORE__', + 'robots': '__IGNORE__', + 'favicon': '__IGNORE__', + 'host': '__IGNORE__', + 'html': 'extra.http.html.data', + 'location': 'extra.http.location', + # 'components': unknown, + # 'securitytxt_hash': unknown, + 'server': 'extra.http.server', + # 'sitemap': unknown, + }, + 'asn': 'source.asn', + 'html': '__IGNORE__', # use http.html + 'location': { + 'country_code3': '__IGNORE__', # using country_code + 'city': 'source.geolocation.city', + 'region_code': 'extra.region_code', + 'postal_code': 'extra.postal_code', + 'longitude': 'extra.geolocation.longitude', + 'country_code': 'source.geolocation.cc', + 'latitude': 'source.geolocation.latitude', + 'country_name': '__IGNORE__', # using country_code + 'area_code': 'extra.area_code', + 'dma_code': 'extra.dma_code', + }, + 'timestamp': 'time.source', + 'domains': 'source.fqdn', # TODO: multiple domains + 'ip_str': 'source.ip', + 'os': 'extra.os_name', + '_shodan': '__IGNORE__', # for now + # 'opts': unknown + 'tags': 'extra.tags', + } + + +PROTOCOLS = ['ftp', 'http', 'isakmp'] + + +class ShodanParserBot(Bot): + + def init(self): + if getattr(self.parameters, 'ignore_errors', False): + self.ignore_errors = True + else: + self.ignore_errors = False + + def apply_mapping(self, mapping, data): + self.logger.debug('Appylying mapping %r to data %r', mapping, data) + event = {} + for key, value in data.items(): + try: + if value and mapping[key] != '__IGNORE__': + if isinstance(mapping[key], dict): + update = self.apply_mapping(mapping[key], value) + if update: + event.update(update) + else: + event[mapping[key]] = value + except KeyError: + if not self.ignore_errors: + raise + return event + + def process(self): + report = self.receive_message() + raw = base64_decode(report['raw']) + decoded = json.loads(raw) + + event = self.new_event(report) + event['raw'] = raw + event.update(self.apply_mapping(MAPPING, decoded)) + event.add('classification.type', 'other') + event.add('classification.identifier', 'shodan-scan') + for protocol in PROTOCOLS: + if protocol in decoded: + event.add('protocol.application', protocol) + self.send_message(event) + self.acknowledge_message() + + +BOT = ShodanParserBot