feat(#3336): VPC integration tests (#3699)

* feat(#3336): Add basic test for aws vpc integration * feat(#3336): Add data generator for vpcflow type * feat(#3336): Add remove_from_bucket test for aws vpc integration * feat(#3336): Add only_logs_after tests for aws vpc integration * feat(#3336): Add path tests for aws vpc integration * feat(#3336): Add path_suffix tests for aws vpc integration * feat(#3336): Add regions tests for aws vpc integration * feat(#3336): Add discard_regex tests for aws vpc integration * feat(#3336): Add only_logs_after tier_1 tests for aws vpc integration * style(#3336): Fix linter issues * feat(#3336): Use join in path builds * feat(#3336): Fix cloudtrail cases * feat(#3336): Detect found and skipped logs * feat(#3336): Improve delete_file_from_s3 fixture * fix(#3336): Add minor fixes --------- Co-authored-by: David Jose Iglesias Lopez <[email protected]>
wazuh · Mar 13, 2023 · a8160da · a8160da
1 parent 775b47b
commit a8160da
Show file tree

Hide file tree

Showing 22 changed files with 325 additions and 37 deletions.
diff --git a/deps/wazuh_testing/wazuh_testing/modules/api/event_monitor.py b/deps/wazuh_testing/wazuh_testing/modules/api/event_monitor.py
@@ -35,9 +35,10 @@ def check_api_event(file_monitor=None, callback='', error_message=None, update_p
  callback (str): log regex to check in the file
  error_message (str): error message to show in case of expected event does not occur
  update_position (boolean): filter configuration parameter to search in the file
- timeout (str): timeout to check the event in the file
+ timeout (int): timeout to check the event in the file
  prefix (str): log pattern regex
  accum_results (int): Accumulation of matches.
+ file_to_monitor (str): File to be monitored.
  """
  file_monitor = FileMonitor(file_to_monitor) if file_monitor is None else file_monitor
  error_message = f"Could not find this event in {file_to_monitor}: {callback}" if error_message is None else \

diff --git a/deps/wazuh_testing/wazuh_testing/modules/aws/constants.py b/deps/wazuh_testing/wazuh_testing/modules/aws/constants.py
@@ -8,6 +8,8 @@
 AWS_LOGS = 'AWSLogs'
 RANDOM_ACCOUNT_ID = '819751203818'
 CLOUDTRAIL = 'CloudTrail'
+VPC_FLOW_LOGS = "vpcflowlogs"
+FLOW_LOG_ID = "fl-0754d951c16f517fa"
 
 EVENT_TIME_FORMAT = '%Y-%m-%dT%H:%M:%SZ'
 PATH_DATE_FORMAT = '%Y/%m/%d'
@@ -16,6 +18,8 @@
 US_EAST_1_REGION = 'us-east-1'
 
 JSON_EXT = '.json'
+LOG_EXT = '.log'
 
 # Bucket types
 CLOUD_TRAIL_TYPE = 'cloudtrail'
+VPC_FLOW_TYPE = 'vpcflow'
diff --git a/deps/wazuh_testing/wazuh_testing/modules/aws/data_generator.py b/deps/wazuh_testing/wazuh_testing/modules/aws/data_generator.py
@@ -1,11 +1,21 @@
 """Utils to generate sample data to AWS"""
+import csv
+import json
 from datetime import datetime
+from io import StringIO
 from os.path import join
 from uuid import uuid4
 
+from wazuh_testing.tools.utils import get_random_ip, get_random_port, get_random_string
+
 from . import constants as cons
 
 
+def get_random_interface_id() -> str:
+ """Return a random interface ID."""
+ return f"eni-{get_random_string(17)}"
+
+
 class DataGenerator:
  BASE_PATH = ''
  BASE_FILE_NAME = ''
@@ -14,15 +24,15 @@ def get_filename(self, *args, **kwargs) -> str:
  """Return the filename according to the integration format.
 
  Returns:
- str: Syntetic filename.
+ str: Synthetic filename.
  """
  raise NotImplementedError()
 
  def get_data_sample(self, *args, **kwargs) -> dict:
  """Return a sample of data according to the integration format.
 
  Returns:
- dict: Syntetic data.
+ dict: Synthetic data.
  """
  raise NotImplementedError()
 
@@ -38,21 +48,21 @@ def get_filename(self, *args, **kwargs) -> str:
  <prefix>/AWSLogs/<suffix>/<organization_id>/<account_id>/CloudTrail/<region>/<year>/<month>/<day>
 
  Returns:
- str: Syntetic filename.
+ str: Synthetic filename.
  """
  now = datetime.now()
  path = join(self.BASE_PATH, now.strftime(cons.PATH_DATE_FORMAT))
  name = f"{self.BASE_FILE_NAME}{now.strftime(cons.FILENAME_DATE_FORMAT)}_{abs(hash(now))}{cons.JSON_EXT}"
 
  return join(path, name)
 
- def get_data_sample(self) -> dict:
+ def get_data_sample(self) -> str:
  """Return a sample of data according to the cloudtrail format.
 
  Returns:
- dict: Syntetic data.
+ str: Synthetic data.
  """
- return {
+ return json.dumps({
  'Records': [
  {
  'eventVersion': '1.08',
@@ -94,12 +104,61 @@ def get_data_sample(self) -> dict:
  'sharedEventID': str(uuid4())
  }
  ]
- }
+ })
+
+
+class VPCDataGenerator(DataGenerator):
+ BASE_PATH = join(cons.AWS_LOGS, cons.RANDOM_ACCOUNT_ID, cons.VPC_FLOW_LOGS, cons.US_EAST_1_REGION)
+ BASE_FILE_NAME = f'{cons.RANDOM_ACCOUNT_ID}_{cons.VPC_FLOW_LOGS}_{cons.US_EAST_1_REGION}_'
+
+ def get_filename(self) -> str:
+ """Return the filename in the VPC format.
+
+ Example:
+ <prefix>/AWSLogs/<suffix>/<organization_id>/<account_id>/vpcflowlogs/<region>/<year>/<month>/<day>
+
+ Returns:
+ str: Synthetic filename.
+ """
+ now = datetime.now()
+ path = join(self.BASE_PATH, now.strftime(cons.PATH_DATE_FORMAT))
+ name = (
+ f'{self.BASE_FILE_NAME}{cons.FLOW_LOG_ID}_{now.strftime(cons.FILENAME_DATE_FORMAT)}_{abs(hash(now))}'
+ f'{cons.LOG_EXT}'
+ )
+
+ return join(path, name)
+
+ def get_data_sample(self) -> str:
+ """Return a sample of data according to the VPC format.
+
+ Returns:
+ str: Synthetic data.
+ """
+ data = [
+ [
+ "version", "account-id", "interface-id", "srcaddr", "dstaddr", "srcport", "dstport", "protocol",
+ "packets", "bytes", "start", "end", "action", "log-status"
+ ]
+ ]
+
+ for _ in range(5):
+ data.append(
+ [
+ "2", cons.RANDOM_ACCOUNT_ID, get_random_interface_id(), get_random_ip(), get_random_ip(),
+ get_random_port(), get_random_port(), "6", "39", "4698", "1622505433", "1622505730", "ACCEPT", "OK"
+ ]
+ )
+ buffer = StringIO()
+ csv.writer(buffer, delimiter=" ").writerows(data)
+
+ return buffer.getvalue()
 
 
 # Maps bucket type with corresponding data generator
 buckets_data_mapping = {
- cons.CLOUD_TRAIL_TYPE: CloudTrailDataGenerator
+ cons.CLOUD_TRAIL_TYPE: CloudTrailDataGenerator,
+ cons.VPC_FLOW_TYPE: VPCDataGenerator
 }
 
 

diff --git a/deps/wazuh_testing/wazuh_testing/modules/aws/db_utils.py b/deps/wazuh_testing/wazuh_testing/modules/aws/db_utils.py
@@ -1,16 +1,29 @@
 import sqlite3
 from collections import namedtuple
 from pathlib import Path
-from typing import Iterator
+from typing import Iterator, Type
 
-from .constants import S3_CLOUDTRAIL_DB_PATH
+from .constants import S3_CLOUDTRAIL_DB_PATH, CLOUD_TRAIL_TYPE, VPC_FLOW_TYPE
 
 SELECT_QUERY_TEMPLATE = 'SELECT * FROM {table_name}'
 
 S3CloudTrailRow = namedtuple(
  'S3CloudTrailRow', 'bucket_path aws_account_id aws_region log_key processed_date created_date'
 )
 
+S3VPCFlowRow = namedtuple(
+ 'S3VPCFlowRow', 'bucket_path aws_account_id aws_region flowlog_id log_key processed_date created_date'
+)
+
+s3_rows_map = {
+ CLOUD_TRAIL_TYPE: S3CloudTrailRow,
+ VPC_FLOW_TYPE: S3VPCFlowRow
+}
+
+
+def _get_s3_row_type(bucket_type: str) -> Type[S3CloudTrailRow]:
+ return s3_rows_map.get(bucket_type, S3CloudTrailRow)
+
 
 def get_db_connection(path: Path) -> sqlite3.Connection:
  return sqlite3.connect(path)
@@ -45,8 +58,9 @@ def get_s3_db_row(table_name: str) -> S3CloudTrailRow:
  connection = get_db_connection(S3_CLOUDTRAIL_DB_PATH)
  cursor = connection.cursor()
  result = cursor.execute(SELECT_QUERY_TEMPLATE.format(table_name=table_name)).fetchone()
+ row_type = _get_s3_row_type(table_name)
 
- return S3CloudTrailRow(*result)
+ return row_type(*result)
 
 
 def get_multiple_s3_db_row(table_name: str) -> Iterator[S3CloudTrailRow]:
@@ -60,9 +74,10 @@ def get_multiple_s3_db_row(table_name: str) -> Iterator[S3CloudTrailRow]:
  """
  connection = get_db_connection(S3_CLOUDTRAIL_DB_PATH)
  cursor = connection.cursor()
+ row_type = _get_s3_row_type(table_name)
 
  for row in cursor.execute(SELECT_QUERY_TEMPLATE.format(table_name=table_name)):
- yield S3CloudTrailRow(*row)
+ yield row_type(*row)
 
 
 def table_exists(table_name: str) -> bool:
@@ -86,4 +101,4 @@ def table_exists(table_name: str) -> bool:
  name NOT LIKE 'sqlite_%';
  """
 
- return table_name in cursor.execute(query).fetchone()
+ return table_name in [result[0] for result in cursor.execute(query).fetchall()]
diff --git a/deps/wazuh_testing/wazuh_testing/modules/aws/event_monitor.py b/deps/wazuh_testing/wazuh_testing/modules/aws/event_monitor.py
@@ -2,6 +2,7 @@
 from typing import Callable
 
 from .cli_utils import analyze_command_output
+from .constants import CLOUD_TRAIL_TYPE, VPC_FLOW_TYPE
 
 PARSER_ERROR = r'.*wm_aws_read\(\): ERROR:.*'
 MODULE_ERROR = r'.*wm_aws_run_s3\(\): ERROR: .*'
@@ -62,6 +63,11 @@ def callback_detect_event_processed(line):
  return line
 
 
+def callback_detect_event_processed_or_skipped(pattern):
+ pattern_regex = re.compile(pattern)
+ return lambda line: pattern_regex.match(line) or callback_detect_event_processed(line)
+
+
 def callback_event_sent_to_analysisd(line):
  if line.startswith(AWS_EVENT_HEADER):
  return line
@@ -76,8 +82,14 @@ def check_processed_logs_from_output(command_output: str, expected_results: int
  )
 
 
-def check_non_processed_logs_from_output(command_output: str, expected_results: int = 1):
- pattern = r'.*DEBUG: \+\+\+ No logs to process in bucket: '
+def check_non_processed_logs_from_output(command_output: str, bucket_type: str, expected_results: int = 1):
+
+ if bucket_type == CLOUD_TRAIL_TYPE:
+ pattern = r'.*DEBUG: \+\+\+ No logs to process in bucket: '
+ elif bucket_type == VPC_FLOW_TYPE:
+ pattern = r'.*DEBUG: \+\+\+ No logs to process for .*'
+ else:
+ pattern = ''
 
  analyze_command_output(
  command_output,

diff --git a/deps/wazuh_testing/wazuh_testing/modules/aws/s3_utils.py b/deps/wazuh_testing/wazuh_testing/modules/aws/s3_utils.py
@@ -31,7 +31,7 @@ def upload_file(bucket_type: str, bucket_name: str) -> str:
 
  # Upload the file
  try:
- obj.put(Body=json.dumps(data).encode())
+ obj.put(Body=data.encode())
  except ClientError as e:
  logger.error(e)
  filename = ''

diff --git a/deps/wazuh_testing/wazuh_testing/tools/utils.py b/deps/wazuh_testing/wazuh_testing/tools/utils.py
@@ -115,6 +115,15 @@ def get_random_ip():
  return fr"{randint(0,255)}.{randint(0,255)}.{randint(0,255)}.{randint(0,255)}"
 
 
+def get_random_port():
+ """Create a port number.
+
+ Return:
+ String: Random port number.
+ """
+ return f"{randint(0, 10000)}"
+
+
 def get_random_string(string_length, digits=True):
  """Create a random string with specified length.
 
@@ -162,12 +171,13 @@ def get_host_name():
 def validate_interval_format(interval):
  """Validate that the interval passed has the format in which the last digit is a letter from those passed and
  the other characters are between 0-9"""
- if interval=='':
+ if interval == '':
  return False
- if interval[-1] not in ['s','m', 'h','d','w','y'] or not isinstance(int(interval[0:-1]), numbers.Number):
+ if interval[-1] not in ['s', 'm', 'h', 'd', 'w', 'y'] or not isinstance(int(interval[0:-1]), numbers.Number):
  return False
  return True
 
+
 def format_ipv6_long(ipv6_address):
  """Return the long form of the address representation in uppercase.
 

diff --git a/tests/integration/test_aws/conftest.py b/tests/integration/test_aws/conftest.py
@@ -80,10 +80,10 @@ def delete_file_from_s3(metadata: dict):
  yield
 
  bucket_name = metadata['bucket_name']
- filename = metadata['filename']
-
- delete_file(filename=filename, bucket_name=bucket_name)
- logger.debug('Deleted file: %s from bucket %s', filename, bucket_name)
+ filename = metadata.get('filename')
+ if filename is not None:
+  delete_file(filename=filename, bucket_name=bucket_name)
+  logger.debug('Deleted file: %s from bucket %s', filename, bucket_name)
 
 # DB fixtures
 

diff --git a/tests/integration/test_aws/data/test_cases/basic_test_module/cases_defaults.yaml b/tests/integration/test_aws/data/test_cases/basic_test_module/cases_defaults.yaml
@@ -6,3 +6,12 @@
  metadata:
  bucket_type: cloudtrail
  bucket_name: wazuh-cloudtrail-integration-tests
+
+- name: vpc_defaults
+ description: VPC default configurations
+ configuration_parameters:
+ BUCKET_TYPE: vpcflow
+ BUCKET_NAME: wazuh-vpcflow-integration-tests
+ metadata:
+ bucket_type: vpcflow
+ bucket_name: wazuh-vpcflow-integration-tests
diff --git a/...s/integration/test_aws/data/test_cases/discard_regex_test_module/cases_discard_regex.yaml b/...s/integration/test_aws/data/test_cases/discard_regex_test_module/cases_discard_regex.yaml
@@ -13,3 +13,19 @@
  discard_regex: .*ec2.amazonaws.com.*
  found_logs: 5
  skipped_logs: 1
+
+- name: vpc_discard_regex
+ description: VPC discard regex configurations
+ configuration_parameters:
+ BUCKET_TYPE: vpcflow
+ BUCKET_NAME: wazuh-vpcflow-integration-tests
+ DISCARD_FIELD: srcport
+ DISCARD_REGEX: "5319"
+ metadata:
+ bucket_type: vpcflow
+ bucket_name: wazuh-vpcflow-integration-tests
+ only_logs_after: 2022-NOV-20
+ discard_field: srcport
+ discard_regex: "5319"
+ found_logs: 5
+ skipped_logs: 1
diff --git a/...ntegration/test_aws/data/test_cases/only_logs_after_test_module/cases_multiple_calls.yaml b/...ntegration/test_aws/data/test_cases/only_logs_after_test_module/cases_multiple_calls.yaml
@@ -4,3 +4,10 @@
  metadata:
  bucket_type: cloudtrail
  bucket_name: wazuh-cloudtrail-integration-tests
+
+- name: vpc_only_logs_after_multiple_calls
+ description: VPC only_logs_after multiple calls configurations
+ configuration_parameters:
+ metadata:
+ bucket_type: vpcflow
+ bucket_name: wazuh-vpcflow-integration-tests
diff --git a/...tion/test_aws/data/test_cases/only_logs_after_test_module/cases_with_only_logs_after.yaml b/...tion/test_aws/data/test_cases/only_logs_after_test_module/cases_with_only_logs_after.yaml
@@ -9,3 +9,15 @@
  bucket_name: wazuh-cloudtrail-integration-tests
  only_logs_after: 2022-NOV-20
  expected_results: 5
+
+- name: vpc_with_only_logs_after
+ description: VPC only logs after configurations
+ configuration_parameters:
+ BUCKET_TYPE: vpcflow
+ BUCKET_NAME: wazuh-vpcflow-integration-tests
+ ONLY_LOGS_AFTER: 2022-NOV-20
+ metadata:
+ bucket_type: vpcflow
+ bucket_name: wazuh-vpcflow-integration-tests
+ only_logs_after: 2022-NOV-20
+ expected_results: 3
diff --git a/...n/test_aws/data/test_cases/only_logs_after_test_module/cases_without_only_logs_after.yaml b/...n/test_aws/data/test_cases/only_logs_after_test_module/cases_without_only_logs_after.yaml
@@ -7,3 +7,13 @@
  bucket_type: cloudtrail
  bucket_name: wazuh-cloudtrail-integration-tests
  expected_results: 1
+
+- name: vpc_without_only_logs_after
+ description: VPC only logs after configurations
+ configuration_parameters:
+ BUCKET_TYPE: vpcflow
+ BUCKET_NAME: wazuh-vpcflow-integration-tests
+ metadata:
+ bucket_type: vpcflow
+ bucket_name: wazuh-vpcflow-integration-tests
+ expected_results: 1