From 0276387bf4d31b281e46e9983b4e1986c1d9042f Mon Sep 17 00:00:00 2001 From: William Stearns Date: Mon, 26 Oct 2020 23:50:46 -0400 Subject: [PATCH] Add required utility scripts --- ip2asn.py | 172 +++++++++++++++++++++++ normalize_ip.py | 91 +++++++++++++ scapy_traceroute.py | 324 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 587 insertions(+) create mode 100755 ip2asn.py create mode 100755 normalize_ip.py create mode 100755 scapy_traceroute.py diff --git a/ip2asn.py b/ip2asn.py new file mode 100755 index 0000000..da812f6 --- /dev/null +++ b/ip2asn.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python3 +"""Takes ip addresses provided on stdin (one per line) and outputs info about the ASNs that contain them.""" +#Appears to work just fine under both python2 and python3. + +#Download https://iptoasn.com/data/ip2asn-combined.tsv.gz to current directory, gunzip it. + +import os +import sys +import csv +import fileinput +from ipaddress import summarize_address_range, IPv4Address, IPv6Address #Not needed: ip_address, AddressValueError + + +#Note: netaddr removed as standard python library does not include it, but appears to include ipaddress. See v0.1 for comparison between the two - both provide equal results for all ip ranges. +ip2asn_version = '0.6.1' + + +def load_asn_table(source_file): + """Loads the subnets from ip2asn-combined.tsv.""" + #real 0m46.232s to load + + Paranoid = False #Setting this to True enables additional checks on the ip2asn-combined raw data (that descriptions and countries stay consistent) + + as_info_struct = {} #Dictionary of Lists of dictionaries. + #Top level dictionary has keys = 4_firstoctet for ipv4, 6_firsthexbyte for ipv6. Values are the lists on the next line + #next level lists have keys 0-128; their values are dictionaries. For most specific subnet, search from 128 back to 1 (32 back to 1 for ipv4) + #second level dictionaries; key = IP object, value is as_num + #Adding the first_octet level makes a significant and visual performance increase in lookup time. + + asn_country = {} #Key= as_num, value=2 letter country code + asn_description = {} #Key= as_num, value=asn description + + + if os.path.exists(source_file): + with open(source_file, 'r') as aih: + reader = csv.reader(aih, delimiter='\t') + #Format: range_start range_end AS_number country_code AS_description + for first_ip, last_ip, as_num, country, as_description in reader: + if sys.version_info < (3, 0): + first_ip = unicode(first_ip) + last_ip = unicode(last_ip) + country = unicode(country) + try: + as_description = as_description.decode('utf-8') + except UnicodeDecodeError: + sys.stderr.write("Unable to convert: " + as_description + "\n") + + #Load country and description values into dictionaries for later use. + if as_num in asn_country: + if Paranoid and asn_country[as_num] != country: + sys.stderr.write("country mismatch: for asnum: " + str(as_num) + ", " + asn_country[as_num] + " != " + country + "\n") + else: + asn_country[as_num] = country + + if as_num in asn_description: + if Paranoid and asn_description[as_num] != as_description: + sys.stderr.write("description mismatch: for asnum: " + str(as_num) + ", " + asn_description[as_num] + " != " + as_description + "\n") + else: + asn_description[as_num] = as_description + + #print(first_ip + ',' + last_ip + ',' + as_num + ',' + country + ',' + as_description) + if as_num == '0' and as_description == 'Not routed': + pass + #elif as_num == '0' and as_description != 'Not routed': + # sys.stderr.write('as == 0, desc != not routed\n') + #elif as_num != '0' and as_description == 'Not routed': + # sys.stderr.write('as != 0, desc == not routed\n') + else: + if first_ip.find(':') > -1: + first_addr = IPv6Address(first_ip) + last_addr = IPv6Address(last_ip) + else: + first_addr = IPv4Address(first_ip) + last_addr = IPv4Address(last_ip) + #except: # (AddressValueError, ipaddress.AddressValueError): + + sar_cidrs = list(summarize_address_range(first_addr, last_addr)) + for one_cidr in sar_cidrs: + if one_cidr.version == 4: + first_octet = '4_' + one_cidr.exploded.split('.')[0] + elif one_cidr.version == 6: + first_octet = '6_' + one_cidr.exploded[0:2] + + if first_octet not in as_info_struct: + as_info_struct[first_octet] = [] + for cidr_len in range(0, 129): #Numbers 0 to 128 + as_info_struct[first_octet].append({}) + + one_netmask = one_cidr.prefixlen + #print("Prefixlen: " + str(one_netmask)) + if one_cidr in as_info_struct[first_octet][one_netmask]: + if Paranoid and as_info_struct[one_netmask][one_cidr] != as_num: + sys.stderr.write("For subnet " + str(one_cidr) + ", " + as_info_struct[one_netmask][one_cidr] + " != " + str(as_num) + "\n") + else: + as_info_struct[first_octet][one_netmask][one_cidr] = as_num + else: + sys.stderr.write("ASN Source file " + source_file + " does not exist, unable to lookup ASNs.\n") + + return as_info_struct, asn_country, asn_description + + + +def ip_asn_lookup(ip_string, as_num_d): + """Find the ASN for the given IP address or None if no match found. This returns the most specific subnet in case there are multiple matching cidr blocks.""" + #Approx 0.7 secs/lookup + + if sys.version_info < (3, 0): + ip_string = unicode(ip_string) + + if ip_string.find(':') > -1: + try: + lookup_obj = IPv6Address(ip_string) + first_octet = '6_' + lookup_obj.exploded[0:2] + except: + return None + max_index = 128 + else: + try: + lookup_obj = IPv4Address(ip_string) + first_octet = '4_' + lookup_obj.exploded.split('.')[0] + except: + return None + max_index = 32 + + + if first_octet in as_num_d: + for search_netmask in range(max_index, -1, -1): + for one_net in as_num_d[first_octet][search_netmask]: + if lookup_obj in one_net: + return as_num_d[first_octet][search_netmask][one_net] + #else: + # return None + + return None + + + +def formatted_asn_output(orig_ip_string, out_format, as_num_d, as_country_d, as_descriptions_d): + """Take supplied ip string, look up its ASN, and return a formatted output string.""" + + formatted_output = [] #List of output strings/dictionaries + + clean_ip_string = orig_ip_string.rstrip() + found_as_num = ip_asn_lookup(clean_ip_string, as_num_d) + if out_format == 'passer': + if found_as_num: + formatted_output.append('AS,' + clean_ip_string + ',AS,' + str(found_as_num) + ',' + as_descriptions_d[found_as_num].replace(',', ' ')) + if as_country_d[found_as_num] not in ('', 'Unknown'): + formatted_output.append('GE,' + clean_ip_string + ',CC,' + as_country_d[found_as_num] + ',') + elif out_format == 'json': + if found_as_num: + formatted_output.append({'Type': 'AS', 'IPAddr': clean_ip_string, 'Proto': 'AS', 'State': str(found_as_num), 'Description': as_descriptions_d[found_as_num].replace(',', ' ')}) + if as_country_d[found_as_num] not in ('', 'Unknown'): + formatted_output.append({'Type': 'GE', 'IPAddr': clean_ip_string, 'Proto': 'CC', 'State': as_country_d[found_as_num], 'Description': ''}) + else: + if found_as_num: + formatted_output.append('IP: ' + clean_ip_string + ' ASN: ' + str(found_as_num) + ' Country: ' + as_country_d[found_as_num] + ' Description: ' + as_descriptions_d[found_as_num]) + else: + formatted_output.append('IP: ' + clean_ip_string + ' is not in any asn') + + return formatted_output + + +if __name__ == "__main__": + asn_info_file = './ip2asn-combined.tsv' + requested_format = 'passer' + + as_nums, asn_countries, asn_descriptions = load_asn_table(asn_info_file) + + for line in fileinput.input(): + for one_out in formatted_asn_output(line, requested_format, as_nums, asn_countries, asn_descriptions): + print(one_out) diff --git a/normalize_ip.py b/normalize_ip.py new file mode 100755 index 0000000..a19507f --- /dev/null +++ b/normalize_ip.py @@ -0,0 +1,91 @@ +#!/usr/bin/python +"""Converts ip addresses (ipv4 or ipv6) on stdin to fully exploded ip addresses.""" + +import ipaddress +import sys + + +Devel = False + + +def Debug(DebugStr): + """Prints a note to stderr""" + if Devel != False: + sys.stderr.write(DebugStr + '\n') + + +def ip_addr_obj(raw_addr): + """Returns an ip obj for the input string. The raw_addr string should already have leading and trailing whitespace removed before being handed to this function.""" + + try: + if sys.version_info > (3, 0): + raw_addr_string = str(raw_addr) + else: + raw_addr_string = unicode(raw_addr) + except UnicodeDecodeError: + raw_addr_string = '' + + #if Devel: + # Debug('Cannot convert:' + # Debug(raw_addr) + # raise + #else: + # pass + + ip_obj = None + + if raw_addr_string != '' and not raw_addr_string.endswith(('.256', '.257', '.258', '.259', '.260')): #raw_addr_string.find('.256') == -1 + try: + ip_obj = ipaddress.ip_address(raw_addr_string) + except ValueError: + #See if it's in 2.6.0.0.9.0.0.0.5.3.0.1.B.7.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.1 or 260090005301B7000000000000000001 format + hex_string = raw_addr_string.replace('.', '') + colon_hex_string = hex_string[0:4] + ':' + hex_string[4:8] + ':' + hex_string[8:12] + ':' + hex_string[12:16] + ':' + hex_string[16:20] + ':' + hex_string[20:24] + ':' + hex_string[24:28] + ':' + hex_string[28:32] + try: + ip_obj = ipaddress.ip_address(colon_hex_string) + except ValueError: + if Devel: + Debug(raw_addr_string) + raise + else: + pass + + return ip_obj + + +def explode_ip(ip_obj): + """Converts the input IP object to its exploded form (type "unicode" in python2) ready for printing. If the IP/IP object is invalid, returns an empty string.""" + + if ip_obj is None: + return '' + else: + return ip_obj.exploded + + + +if __name__ == "__main__": + AllSucceeded = True + + for InLine in sys.stdin: + InLine = InLine.replace('\n', '').replace('\r', '') + #Debug('======== ' + InLine) + user_ip_obj = ip_addr_obj(InLine) + + if user_ip_obj is None: + AllSucceeded = False + if Devel: + print('Invalid: ' + InLine) + else: + print('') + else: + print(explode_ip(user_ip_obj)) + + #If not interested in detailed error checking, can also do: + #print(explode_ip(ip_addr_obj(InLine))) + + + if AllSucceeded: + quit(0) + else: + Debug('One or more input lines were not recognized as cidr networks or hosts') + quit(1) diff --git a/scapy_traceroute.py b/scapy_traceroute.py new file mode 100755 index 0000000..2c54628 --- /dev/null +++ b/scapy_traceroute.py @@ -0,0 +1,324 @@ +#!/usr/bin/env python3 +"""Traceroute to a remote host and return the list of IPs transited (specific entries in that list may be None if no reply from that hop). +In the case of an error such as an unresolvable target hostname, a list of (30, by default) Nones will come back.""" +#Program works fine under python2 and python3. +#Many thanks to https://jvns.ca/blog/2013/10/31/day-20-scapy-and-traceroute/ for the initial idea. + + +import os +import sys +import socket +import random +import json +import ipaddress +import errno +#from scapy.all import * +from scapy.all import ICMP, ICMPv6TimeExceeded, IP, IPv6, Raw, Scapy_Exception, UDP, sr1 # pylint: disable=no-name-in-module + + +def ip_addr_obj(raw_addr): + """Returns an ip obj for the input string. The raw_addr string should already have leading and trailing whitespace removed before being handed to this function.""" + + try: + if sys.version_info > (3, 0): + raw_addr_string = str(raw_addr) + else: + raw_addr_string = unicode(raw_addr) + except UnicodeDecodeError: + raw_addr_string = '' + + #if Devel: + # Debug('Cannot convert:' + # Debug(raw_addr) + # raise + #else: + # pass + + ip_obj = None + + if raw_addr_string != '' and not raw_addr_string.endswith(('.256', '.257', '.258', '.259', '.260')): #raw_addr_string.find('.256') == -1 + try: + ip_obj = ipaddress.ip_address(raw_addr_string) + except ValueError: + #See if it's in 2.6.0.0.9.0.0.0.5.3.0.1.B.7.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.1 or 260090005301B7000000000000000001 format + hex_string = raw_addr_string.replace('.', '') + colon_hex_string = hex_string[0:4] + ':' + hex_string[4:8] + ':' + hex_string[8:12] + ':' + hex_string[12:16] + ':' + hex_string[16:20] + ':' + hex_string[20:24] + ':' + hex_string[24:28] + ':' + hex_string[28:32] + try: + ip_obj = ipaddress.ip_address(colon_hex_string) + except ValueError: + #if Devel: + # Debug(raw_addr_string) + # raise + #else: + pass + + return ip_obj + + +def explode_ip(ip_obj): + """Converts the input IP object to its exploded form (type "unicode" in python2) ready for printing. If the IP/IP object is invalid, returns an empty string.""" + + if ip_obj is None: # pylint: disable=no-else-return + return '' + else: + return ip_obj.exploded + + +def is_valid_ipv4_address(address): + """Returns True or False based on whether the address is a valid IPv4 address.""" + + try: + socket.inet_pton(socket.AF_INET, address) + except AttributeError: + try: + socket.inet_aton(address) + except socket.error: + return False + return address.count('.') == 3 + except socket.error: # not a valid address + return False + + return True + + +def is_valid_ipv6_address(address): + """Returns True or False based on whether the address is a valid IPv6 address.""" + + try: + socket.inet_pton(socket.AF_INET6, address) + except socket.error: # not a valid address + return False + return True + + +def load_json_from_file(json_filename): + """Bring in json content from a file and return it as a python data structure (or None if not successful for any reason).""" + + ljff_return = None + + if os.path.exists(json_filename) and os.access(json_filename, os.R_OK): + try: + with open(json_filename) as json_h: + ljff_return = json.loads(json_h.read()) + except: + pass + + return ljff_return + + +def write_object(filename, generic_object): + """Write out an object to a file.""" + + try: + with open(filename, "wb") as write_h: + write_h.write(generic_object.encode('utf-8')) + except: + sys.stderr.write("Problem writing " + filename + ", skipping.") + raise + + #return + + +def mkdir_p(path): + """Create an entire directory branch. Will not complain if the directory already exists.""" + + if not os.path.isdir(path): + try: + os.makedirs(path) + except FileExistsError: + pass + except OSError as exc: + if exc.errno == errno.EEXIST and os.path.isdir(path): + pass + else: + raise + + +def cache_file(parent_cache_dir, ip_addr): + """Returns the correct filename that would hold the path to that IP. Does not care if the file exists or not, but does create the directory that would hold it.""" + + if ':' in ip_addr: #ipv6 address + cache_obj_path = parent_cache_dir + '/ipv6/' + '/'.join(ip_addr.split(':')) + '/' + else: #ipv4 address + cache_obj_path = parent_cache_dir + '/ipv4/' + '/'.join(ip_addr.split('.')) + '/' + + mkdir_p(cache_obj_path) + + return cache_obj_path + ip_addr + '.traceroute.json' + + +def ips_of(one_target): + """Finds a list of IP addresses of the given target, which could be a hostname, an IPv4 address, or an IPv6 address.""" + + ip_list = set([]) + + if is_valid_ipv4_address(one_target): + ip_list.add(one_target) + elif is_valid_ipv6_address(one_target): + ip_list.add(explode_ip(ip_addr_obj(one_target))) + else: + if not one_target.endswith("."): + one_target += '.' + + try: + for one_result in socket.getaddrinfo(one_target, None, socket.AF_INET): + af, _, _, _, sa = one_result #Don't need socktype, proto, canonname + + if af == socket.AF_INET: + ip_list.add(sa[0]) + elif af == socket.AF_INET6: + ip_list.add(explode_ip(ip_addr_obj(sa[0]))) + else: + sys.stderr.write(str(af) + '\n') + #pass + except (socket.gaierror, KeyboardInterrupt, UnicodeError): + return ip_list + + try: + for one_result in socket.getaddrinfo(one_target, None, socket.AF_INET6): + af, _, _, _, sa = one_result #Don't need socktype, proto, canonname + + if af == socket.AF_INET: + ip_list.add(sa[0]) + elif af == socket.AF_INET6: + ip_list.add(explode_ip(ip_addr_obj(sa[0]))) + else: + sys.stderr.write(str(af) + '\n') + #pass + except (socket.gaierror, KeyboardInterrupt, UnicodeError): + return ip_list + + return ip_list + + +def traceroute_hop_list(compressed_target, required_interface, max_packet_wait, max_hops, tr_cache_dir): # pylint: disable=too-many-branches,too-many-statements + """Traceroute to the target IP address (NOT hostname) and return a list of all hops with their IPs (or None if no response).""" + #If you have a hostname, use "for one_ip in ips_of(target_host):" around this function. + #If tr_cache_dir is None, do not cache. If tr_cache_dir is "", use traceroute_cache_dir_default . + + target = explode_ip(ip_addr_obj(compressed_target)) + + hop_list = [None for j in range(max_hops)] + loaded_cached_list = False + + if tr_cache_dir == "": + tr_cache_dir = traceroute_cache_dir_default + if tr_cache_dir: + mkdir_p(tr_cache_dir) + + if os.path.exists(cache_file(tr_cache_dir, target)): + try: + hop_list = load_json_from_file(cache_file(tr_cache_dir, target)) + loaded_cached_list = True + except: + raise + + if not loaded_cached_list: + flowlabel_value = random.randrange(1, 2**20) + + for i in range(0, max_hops): + #sys.stderr.write('.') + + #payload_string = r"abcdefghijklmnopqrstuvwabcdefghi" #Windows ICMP traceroute + payload_string = r"@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_" #Linux UDP traceroute + + pkt = None + try: + pkt = IP(dst=target, ttl=i)/UDP(sport=random.randrange(32768, 65534), dport=33434+i)/Raw(load=payload_string) + address_layer = IP + del pkt[IP].chksum + except socket.gaierror: #We weren't able to find an IPv4 address for this host, retry with IPv6 + try: + pkt = IPv6(dst=target, hlim=i, fl=flowlabel_value)/UDP(sport=random.randrange(32768, 65534), dport=33434+i)/Raw(load=payload_string) + address_layer = IPv6 + del pkt[IPv6].chksum + except socket.gaierror: #Couldn't find IPv6 either, so assume this is a nonexistant hostname. + sys.stderr.write("No IP found for " + str(target) + ", exiting.\n") + break + del pkt[UDP].chksum + + reply = None + if required_interface: + try: + reply = sr1(pkt, verbose=0, timeout=max_packet_wait, iface=required_interface) + except Scapy_Exception: + sys.stderr.write("Unable to write to " + str(required_interface) + ". Are you running as root? Exiting.\n") + break + except KeyError: + pass + except IndexError: #Working around a bug in scapy's sendrecv.py/supersocket.py which gives an IndexError: pop from empty list + pass + else: + try: + reply = sr1(pkt, verbose=0, timeout=max_packet_wait) + except KeyError: + pass + except IndexError: #Working around a bug in scapy's sendrecv.py/supersocket.py which gives an IndexError: pop from empty list + pass + + #sys.stderr.write("intermediate reply: " + str(reply) + '\n') + #sys.stderr.flush() + + if reply is None: #No response received + pass #No need to fill in, we already have None's there. + elif reply.haslayer(ICMPv6TimeExceeded) or (reply.haslayer(ICMP) and reply[ICMP].type == 11): #Intermediate host (Type is time-exceeded) + hop_list[i] = explode_ip(ip_addr_obj(reply[address_layer].src)) + elif reply.haslayer('ICMPv6 Destination Unreachable') or (reply.haslayer(ICMP) and reply[ICMP].type == 3): #Reached target (Type is dest-unreach) + hop_list[i] = explode_ip(ip_addr_obj(reply[address_layer].src)) + del hop_list[i+1:] #Truncate any following nulls + break + else: #Unknown + sys.stderr.write("Unknown reply type:\n") + reply.show() + break + + #sys.stderr.write('\n') + + try: + write_object(cache_file(tr_cache_dir, target), json.dumps(hop_list)) + except: + pass + + #Loop that truncates the list by one element, saves each sublist under the intermediate IP address (and mkdir_p that address too) + truncated_path_to_ip = list(hop_list) #Make a shallow copy of the list so we don't affect the original + del truncated_path_to_ip[-1] #Loop, dropping the rightmost entry each time. Working back through the list of routers, save any that are actually routers for which we don't already have a path to that router. + while truncated_path_to_ip: + if truncated_path_to_ip[-1]: #If not null, i.e., we have an actual router IP address: + router_ip = truncated_path_to_ip[-1] + if not os.path.exists(cache_file(tr_cache_dir, router_ip)): + #sys.stderr.write("____ writing router path:" + router_ip + ":" + str(truncated_path_to_ip) + #sys.stderr.flush + try: + write_object(cache_file(tr_cache_dir, router_ip), json.dumps(truncated_path_to_ip)) + except: + pass + + del truncated_path_to_ip[-1] + + return hop_list + + + +scapy_traceroute_version = '0.2.5' +per_packet_timeout_default = 1 +forced_interface_default = None +ttl_default = 30 +traceroute_cache_dir_default = os.environ["HOME"] + '/.cache/scapy_traceroute/' + +if __name__ == '__main__': + import argparse + + parser = argparse.ArgumentParser(description='scapy_traceroute version ' + str(scapy_traceroute_version)) + parser.add_argument('-p', '--per_packet_timeout', help='Time to wait for a reply for a single packet, can be fractional (default: ' + str(per_packet_timeout_default) + ' ).', required=False, default=per_packet_timeout_default) + parser.add_argument('-f', '--forced_interface', help='Force packets through this interface (needed on macos, default: ' + str(forced_interface_default) + ' ).', required=False, default=forced_interface_default) + parser.add_argument('-t', '--ttl', help='Maximum number of hops to try (default: ' + str(ttl_default) + ')', required=False, default=ttl_default) + parser.add_argument('-c', '--cache_dir', help='Directory tree to hold cached traceroutes (default: ' + str(traceroute_cache_dir_default) + ' ). Use None to not cache results.', required=False, default=traceroute_cache_dir_default) + #parser.add_argument('--debug', help='Show additional debugging information on stderr', required=False, default=False, action='store_true') + (parsed, unparsed) = parser.parse_known_args() + cl_args = vars(parsed) + + for target_host in unparsed: + for one_ip in ips_of(target_host): + sys.stderr.write("==== Traceroute to: " + one_ip + '\n') + sys.stderr.flush() + print(traceroute_hop_list(one_ip, cl_args['forced_interface'], cl_args['per_packet_timeout'], int(cl_args['ttl']), cl_args['cache_dir']))