Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BB2-3486: Mask mbi in logs #1252

Merged
merged 4 commits into from
Oct 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions apps/logging/sensitive_logging_filters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import re
import logging
import logging.config

MBI_WITH_HYPHEN_PATTERN = r"""\b
[1-9](?![SLOIBZsloibz])[A-Za-z](?![SLOIBZsloibz)])[A-Za-z\d]\d
-(?![SLOIBZsloibz])[A-Za-z](?![SLOIBZsloibz])[A-Za-z\d]\d
-((?![SLOIBZsloibz])[A-Za-z]){2}\d{2}
\b
"""

MBI_WITHOUT_HYPHEN_PATTERN = r"""\b
[1-9](?![SLOIBZsloibz])[A-Za-z](?![SLOIBZsloibz)])[A-Za-z\d]\d
(?![SLOIBZsloibz])[A-Za-z](?![SLOIBZsloibz])[A-Za-z\d]\d
((?![SLOIBZsloibzd])[A-Za-z]){2}\d{2}
\b"""

MBI_PATTERN = f'({MBI_WITH_HYPHEN_PATTERN}|{MBI_WITHOUT_HYPHEN_PATTERN})'
SENSITIVE_DATA_FILTER = "sensitive_data_filter"


def mask_if_has_mbi(text):
return re.sub(MBI_PATTERN, '***MBI***', str(text), flags=re.VERBOSE)


def mask_mbi(value_to_mask):
if isinstance(value_to_mask, str):
return mask_if_has_mbi(value_to_mask)

if isinstance(value_to_mask, tuple):
return tuple([mask_if_has_mbi(arg) for arg in value_to_mask])

if isinstance(value_to_mask, list):
return [mask_if_has_mbi(arg) for arg in value_to_mask]

if isinstance(value_to_mask, dict):
for key, value in value_to_mask.items():
value_to_mask[key] = mask_mbi(value)

return value_to_mask


class SensitiveDataFilter(logging.Filter):

def filter(self, record):
try:
record.args = mask_mbi(record.args)
record.msg = mask_mbi(record.msg)
return True
except Exception:
pass
11 changes: 11 additions & 0 deletions hhs_oauth_server/settings/base.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would you mind explaining some of these changes? I'm not too familiar with how some of this is set up, but curious for instance about why the django section needed to be added. I'm doing some reading on this to learn too, but if you can provide some starting context, that might help!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you check these logs

web-1  | 2024-10-17 14:59:44,183 WARNING [35] django.request line:241 Unauthorized: /v2/fhir/Patient/identifier=***MBI***
web-1  | 2024-10-17 14:59:44,187 WARNING [35] django.server line:212 "GET //v2/fhir/Patient/identifier=***MBI*** HTTP/1.1"

Here these logs are coming from
django.request and django.server

Adding django to list of loggers will make sure we are capturing all loggers in filter to hide sensitive information.
If we remove this, then some of logs can still log MBI without going via filter. In this case

web-1  | 2024-10-17 14:59:44,187 WARNING [35] django.server line:212 "GET //v2/fhir/Patient/identifier=***MBI*** HTTP/1.1"

MBI will be exposed if we don't have that entry in list of loggers

Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
from apps.logging.sensitive_logging_filters import SENSITIVE_DATA_FILTER, SensitiveDataFilter
import dj_database_url
import socket
import datetime
Expand Down Expand Up @@ -377,6 +378,12 @@
"console": {
"class": "logging.StreamHandler",
"formatter": "verbose",
"filters": [SENSITIVE_DATA_FILTER],
}
},
"filters": {
"sensitive_data_filter": {
"()": SensitiveDataFilter,
}
},
"loggers": {
Expand Down Expand Up @@ -421,6 +428,10 @@
"handlers": ["console"],
"level": "INFO",
},
'django': {
'handlers': ['console'],
'level': 'INFO',
},
},
},
)
Expand Down
3 changes: 2 additions & 1 deletion hhs_oauth_server/settings/logging_it.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
raise ValueError("Bad settings, expecting handlers defined in settings.LOGGING")

logging_handlers['file'] = {'class': 'logging.FileHandler',
'filename': logfile_path, }
'filename': logfile_path,
"filters": [SENSITIVE_DATA_FILTER]}

loggers = LOGGING.get('loggers')

Expand Down
269 changes: 269 additions & 0 deletions hhs_oauth_server/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
"""

from django.test import TestCase

from apps.logging.sensitive_logging_filters import mask_mbi


from .utils import bool_env, TRUE_LIST, FALSE_LIST, int_env


Expand Down Expand Up @@ -45,3 +49,268 @@ def test_int_values(self):
for x, y in int_list:
result = int_env(x)
self.assertEqual(result, y)


class MBI_tests(TestCase):

def test_mbi_match_dict(self):
valid_mbi = "1EG4-TE5-MK74"

my_dict = {
'key1': valid_mbi,
'key2': {
'key4': valid_mbi
},
'key3': (valid_mbi, valid_mbi),
'key5': [valid_mbi, valid_mbi]
}

masked_mbi_dict = mask_mbi(my_dict)
masked_mbi_string = str(masked_mbi_dict)
self.assertIn('***MBI***', masked_mbi_string)
self.assertNotIn(valid_mbi, masked_mbi_string)

mbi_list = [valid_mbi, valid_mbi]
masked_mbi_list = mask_mbi(mbi_list)
self.assertIn('***MBI***', masked_mbi_list)
self.assertNotIn(valid_mbi, masked_mbi_list)

mbi_tuple = (valid_mbi, valid_mbi)
masked_mbi_tuple = mask_mbi(mbi_tuple)
self.assertIn('***MBI***', masked_mbi_tuple)
self.assertNotIn(valid_mbi, masked_mbi_tuple)

def test_mbi_match(self):

mbi_test_list = [
# Valid MBI
("1EG4-TE5-MK74", True),

# Valid MBI Position 3 as 0
# Position 3 – alpha-numeric values 0 thru 9and A thru Z (minus S, L, O, I, B, Z)
("1E04-TE5-MK74", True),

# Valid MBI Position 4 as 0
# Position 4 – numeric values 0 thru 9
("1EG0-TE5-MK74", True),

# Valid MBI Position 6 as 0
# Position 6 – alpha-numeric values 0 thru 9and A thru Z (minus S, L, O, I, B, Z)
("1EG4-T05-MK74", True),

# Valid MBI Position 7 as 0
# Position 7 – numeric values 0 thru 9
("1EG4-TE0-MK74", True),

# Valid MBI Position 10 as 0
# Position 10 – numeric values 0 thru 9
("1EG4-TE5-MK04", True),

# Valid MBI Position 11 as 0
# Position 11 – numeric values 0 thru 9
("1EG4-TE5-MK70", True),


# Position 1 is invalid
# Position 1 – numeric values 1 thru 9
("AEG4-TE5-MK74", False),

# Position 2 is invalid
# P osition 2 – alphabetic values A thru Z (minus S, L, O, I, B, Z)
("1SG4-TE5-MK74", False),
("1LG4-TE5-MK74", False),
("1OG4-TE5-MK74", False),
("1IG4-TE5-MK74", False),
("1BG4-TE5-MK74", False),
("1ZG4-TE5-MK74", False),
("11G4-TE5-MK74", False),

# Position 3 is invalid
# Position 3 – alpha-numeric values 0 thru 9and A thru Z (minus S, L, O, I, B, Z)
("1ES4-TE5-MK74", False),
("1EL4-TE5-MK74", False),
("1EO4-TE5-MK74", False),
("1EI4-TE5-MK74", False),
("1EB4-TE5-MK74", False),
("1EZ4-TE5-MK74", False),

# Position 4 is invalid
# Position 4 – numeric values 0 thru 9
("1EGA-TE5-MK74", False),

# Position 5 is invalid
# Position 5 – alphabetic values A thru Z (minus S, L, O, I, B, Z)
("1EG4-1E5-MK74", False),
("1EG4-SE5-MK74", False),
("1EG4-LE5-MK74", False),
("1EG4-OE5-MK74", False),
("1EG4-IE5-MK74", False),
("1EG4-BE5-MK74", False),
("1EG4-ZE5-MK74", False),

# Position 6 is invalid
# Position 6 – alpha-numeric values 0 thru 9and A thru Z (minus S, L, O, I, B, Z)
("1EG4-TS5-MK74", False),
("1EG4-TL5-MK74", False),
("1EG4-TO5-MK74", False),
("1EG4-TI5-MK74", False),
("1EG4-TB5-MK74", False),
("1EG4-TZ5-MK74", False),

# Position 7 is invalid
# Position 7 – numeric values 0 thru 9
("1EG4-TEA-MK74", False),

# Position 8 is invalid
# Position 8 – alphabetic values A thru Z (minus S, L, O, I, B, Z)
("1EG4-TE5-1K74", False),
("1EG4-TE5-SK74", False),
("1EG4-TE5-LK74", False),
("1EG4-TE5-OK74", False),
("1EG4-TE5-IK74", False),
("1EG4-TE5-BK74", False),
("1EG4-TE5-ZK74", False),

# Position 9 is invalid
# Position 9 – alphabetic values A thru Z (minus S, L, O, I, B, Z)
("1EG4-TE5-M174", False),
("1EG4-TE5-MS74", False),
("1EG4-TE5-ML74", False),
("1EG4-TE5-MO74", False),
("1EG4-TE5-MOI74", False),
("1EG4-TE5-MKB4", False),
("1EG4-TE5-MKZ4", False),

# Position 10 is invalid
# Position 10 – numeric values 0 thru 9
("1EG4-TE5-MKA4", False),

# Position 11 is invalid
# Position 11 – numeric values 0 thru 9
("1EG4-TE5-MK7A", False),

# WITHOUT HYPHEN MBI TEST CASES BELOW
# Valid MBI
("1EG4TE5MK74", True),

# Valid MBI Position 3 as 0
# Position 3 – alphanumeric values 0 thru 9and A thru Z (minus S, L, O, I, B, Z)
("1E04TE5MK74", True),

# Valid MBI Position 4 as 0
# Position 4 – numeric values 0 thru 9
("1EG0TE5MK74", True),

# Valid MBI Position 6 as 0
# Position 6 – alphanumeric values 0 thru 9and A thru Z (minus S, L, O, I, B, Z)
("1EG4T05MK74", True),

# Valid MBI Position 7 as 0
# Position 7 – numeric values 0 thru 9
("1EG4TE0MK74", True),

# Valid MBI Position 10 as 0
# Position 10 – numeric values 0 thru 9
("1EG4TE5MK04", True),

# Valid MBI Position 11 as 0
# Position 11 – numeric values 0 thru 9
("1EG4TE5MK70", True),


# Position 1 is invalid
# Position 1 – numeric values 1 thru 9
("AEG4TE5MK74", False),

# Position 2 is invalid
# P osition 2 – alphabetic values A thru Z (minus S, L, O, I, B, Z)
("1SG4TE5MK74", False),
("1LG4TE5MK74", False),
("1OG4TE5MK74", False),
("1IG4TE5MK74", False),
("1BG4TE5MK74", False),
("1ZG4TE5MK74", False),
("11G4TE5MK74", False),

# Position 3 is invalid
# Position 3 – alphanumeric values 0 thru 9and A thru Z (minus S, L, O, I, B, Z)
("1ES4TE5MK74", False),
("1EL4TE5MK74", False),
("1EO4TE5MK74", False),
("1EI4TE5MK74", False),
("1EB4TE5MK74", False),
("1EZ4TE5MK74", False),

# Position 4 is invalid
# Position 4 – numeric values 0 thru 9
("1EGATE5MK74", False),

# Position 5 is invalid
# Position 5 – alphabetic values A thru Z (minus S, L, O, I, B, Z)
("1EG41E5MK74", False),
("1EG4SE5MK74", False),
("1EG4LE5MK74", False),
("1EG4OE5MK74", False),
("1EG4IE5MK74", False),
("1EG4BE5MK74", False),
("1EG4ZE5MK74", False),

# Position 6 is invalid
# Position 6 – alphanumeric values 0 thru 9and A thru Z (minus S, L, O, I, B, Z)
("1EG4TS5MK74", False),
("1EG4TL5MK74", False),
("1EG4TO5MK74", False),
("1EG4TI5MK74", False),
("1EG4TB5MK74", False),
("1EG4TZ5MK74", False),

# Position 7 is invalid
# Position 7 – numeric values 0 thru 9
("1EG4TEAMK74", False),

# Position 8 is invalid
# Position 8 – alphabetic values A thru Z (minus S, L, O, I, B, Z)
("1EG4TE51K74", False),
("1EG4TE5SK74", False),
("1EG4TE5LK74", False),
("1EG4TE5OK74", False),
("1EG4TE5IK74", False),
("1EG4TE5BK74", False),
("1EG4TE5ZK74", False),

# Position 9 is invalid
# Position 9 – alphabetic values A thru Z (minus S, L, O, I, B, Z)
("1EG4TE5M174", False),
("1EG4TE5MS74", False),
("1EG4TE5ML74", False),
("1EG4TE5MO74", False),
("1EG4TE5MOI74", False),
("1EG4TE5MKB4", False),
("1EG4TE5MKZ4", False),

# Position 10 is invalid
# Position 10 – numeric values 0 thru 9
("1EG4TE5MKA4", False),

# Position 11 is invalid
# Position 11 – numeric values 0 thru 9
("1EG4TE5MK7A", False),
]

for mbi_value, expected in mbi_test_list:
# Create a text that contains the MBI
uppercase_mbi_text = f"This is a test string with MBI: {mbi_value}, expected: {expected}."
masked_uppercase_text = mask_mbi(uppercase_mbi_text)
lowercase_mbi_text = uppercase_mbi_text.lower()
masked_mbi_lowercase_text = mask_mbi(lowercase_mbi_text)
# Check if the MBI was masked
if expected:
self.assertIn('***MBI***', masked_uppercase_text)
self.assertIn('***MBI***', masked_mbi_lowercase_text)
self.assertNotIn(mbi_value, masked_uppercase_text)
self.assertNotIn(mbi_value.lower(), masked_mbi_lowercase_text)
else:
self.assertNotIn('***MBI***', masked_uppercase_text)
self.assertNotIn('***MBI***', masked_mbi_lowercase_text)
stiwarisemanticbits marked this conversation as resolved.
Show resolved Hide resolved
self.assertIn(mbi_value, masked_uppercase_text)
self.assertIn(mbi_value.lower(), masked_mbi_lowercase_text)
Loading