Skip to content

Commit

Permalink
Remove GCP dependencies (google#1440)
Browse files Browse the repository at this point in the history
* Remove gcp dependencies

* Update dockerfiles

* Update dockerfiles

* Update gcp error reporting

* Updates to formatting

* Add unit test

* Update unit test

* Clean up

* Update unit test

* Update error reporting

* Update file

* Update config template

* Catch exception

* Updates

* fix lint
  • Loading branch information
jleaniz committed Mar 18, 2024
1 parent 5b583d5 commit 5c8c638
Show file tree
Hide file tree
Showing 27 changed files with 1,021 additions and 2,186 deletions.
4 changes: 2 additions & 2 deletions docker/api_server/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ WORKDIR /home/turbinia

# Copy requirements and install dependencies to cache them in docker layer
COPY --chown=turbinia:turbinia ./pyproject.toml ./poetry.toml ./poetry.lock /home/turbinia/
RUN poetry install --no-interaction --no-ansi --no-root -E gcp
RUN poetry install --no-interaction --no-ansi --no-root

ENV PATH="/home/turbinia/.venv/bin:$PATH" \
VIRTUAL_ENV=/home/turbinia/.venv
Expand All @@ -44,7 +44,7 @@ COPY --chown=turbinia:turbinia docker/api_server/start.sh /home/turbinia/start.s
RUN chmod +rwx /home/turbinia/start.sh

# Install Turbinia package -- will skip dependencies if installed
RUN poetry install --no-interaction --no-ansi -E gcp
RUN poetry install --no-interaction --no-ansi

CMD ["/home/turbinia/start.sh"]
# Expose Prometheus and API endpoints.
Expand Down
4 changes: 2 additions & 2 deletions docker/server/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ WORKDIR /home/turbinia

# Copy requirements and install dependencies to cache them in docker layer
COPY --chown=turbinia:turbinia ./pyproject.toml ./poetry.toml ./poetry.lock /home/turbinia/
RUN poetry install --no-interaction --no-ansi --no-root -E gcp
RUN poetry install --no-interaction --no-ansi --no-root

ENV PATH="/home/turbinia/.venv/bin:$PATH" \
VIRTUAL_ENV=/home/turbinia/.venv
Expand All @@ -43,7 +43,7 @@ COPY --chown=turbinia:turbinia docker/server/start.sh /home/turbinia/start.sh
RUN chmod +rwx /home/turbinia/start.sh

# Install Turbinia package -- will skip dependencies if installed
RUN poetry install --no-interaction --no-ansi -E gcp
RUN poetry install --no-interaction --no-ansi

CMD ["/home/turbinia/start.sh"]
# Expose Prometheus endpoint.
Expand Down
4 changes: 2 additions & 2 deletions docker/tests/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ WORKDIR /home/turbinia
COPY --chown=turbinia:turbinia ./pyproject.toml ./poetry.toml ./poetry.lock /home/turbinia/

# Install dependencies using Poetry
RUN poetry install --no-interaction --no-ansi -E worker -E gcp --with test --no-root
RUN poetry install --no-interaction --no-ansi -E worker --with test --no-root
RUN poetry run python3 -m pip install impacket --no-deps

# Activate the virtualenv
Expand All @@ -106,7 +106,7 @@ ENV PATH="/home/turbinia/.venv/bin:$PATH" \
# Install Turbinia and dependencies in /home/turbinia/.venv using Poetry
ADD . /home/turbinia/
# RUN if $(cd /tmp/ && git rev-parse --is-shallow-repository); then cd /tmp/ && git fetch --prune --unshallow && git fetch --depth=1 origin +refs/tags/*:refs/tags/*; fi
RUN poetry install --no-interaction --no-ansi -E worker -E gcp --with test
RUN poetry install --no-interaction --no-ansi -E worker --with test
# We need to install the current dev version of turbinia-api-lib for the cli tool test in case the API client changes
RUN cd turbinia/api/client && poetry install
CMD ["/bin/bash"]
4 changes: 2 additions & 2 deletions docker/worker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ WORKDIR /home/turbinia

# Copy requirements and install dependencies to cache them in docker layer
COPY --chown=turbinia:turbinia ./pyproject.toml ./poetry.toml ./poetry.lock /home/turbinia/
RUN poetry install --no-interaction --no-ansi -E worker -E gcp --no-root
RUN poetry install --no-interaction --no-ansi -E worker --no-root
RUN poetry run pip3 install impacket --no-deps
ENV PATH="/home/turbinia/.venv/bin:$PATH" \
VIRTUAL_ENV=/home/turbinia/.venv
Expand All @@ -116,7 +116,7 @@ COPY --chown=turbinia:turbinia docker/worker/start.sh /home/turbinia/start.sh
RUN chmod +rwx /home/turbinia/start.sh

# Install Turbinia package -- will skip dependencies if installed
RUN poetry install --no-interaction --no-ansi -E worker -E gcp
RUN poetry install --no-interaction --no-ansi -E worker

CMD ["/home/turbinia/start.sh"]
# Expose Prometheus endpoint.
Expand Down
1,856 changes: 860 additions & 996 deletions poetry.lock

Large diffs are not rendered by default.

32 changes: 3 additions & 29 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "turbinia"
version = "20231116.2"
version = "20240219"
description = "Automation and Scaling of Digital Forensics Tools"
authors = ["Turbinia Developers <[email protected]>"]
maintainers = ["Turbinia Developers <[email protected]>"]
Expand All @@ -20,23 +20,11 @@ dfimagetools = { version = "^20230806", optional = true }
docker = { version = "^6.1.3" }
fastapi = {extras = ["all"], version = ">=0.75.0,<0.99.0"}
filelock = { version = "*" }
# TODO: Cleanup GCP libs and their dependencies when psq/pubsub is deprecated
google-api-core = { version = "<2.0.0dev", optional = true }
google-api-python-client = { version = "*", optional = true }
google-auth = { version = ">=2.15.0", optional = true }
google-cloud-core = { version = "<2.0dev", optional = true }
google-cloud-datastore = { version = "<=2.0.0", optional = true }
google-cloud-error-reporting = { version = "*", optional = true }
google-cloud-pubsub = { version = "1.7.0", optional = true }
google-cloud-storage = { version = "<=2.2.1", optional = true }
grpcio-status = { version = "<1.49.0,>=1.33.2", optional = true }
libcloudforensics = { version = "20230601" }
libcloudforensics = { version = "20240214" }
pandas = { version = "^2.1.0" }
plaso = { version = "20231224", optional = true }
prometheus_client = { version = "^0.17.1" }
protobuf = { version = ">=3.19.0,<4.0.0dev", optional = true }
proto-plus = { version = "<2.0.0dev,>=1.22.0", optional = true }
psq = { version = "*", optional = true }
pydantic = { version = "^1.10.5,<2"}
pyhindsight = { version = "^20230327.0", optional = true }
redis = { version = "^4.4.4" }
Expand All @@ -51,6 +39,7 @@ fakeredis = "^1.8.1"
google-auth-oauthlib = "^1.1.0"
mock = "*"
pytest = "*"
turbinia-api-lib = "^1.0.2"
yapf = "*"

[tool.poetry.extras]
Expand All @@ -61,21 +50,6 @@ worker = [
"pyhindsight",
]

gcp = [
"google-api-core",
"google-api-python-client",
"google-auth",
"google-cloud-coverage",
"google-cloud-datastore",
"google-cloud-error-reporting",
"google-cloud-pubsub",
"google-cloud-storage",
"grpcio-status",
"protobuf",
"proto-plus",
"psq",
]

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
5 changes: 3 additions & 2 deletions turbinia/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,9 @@ def log_and_report(message, trace):
if config.CLOUD_PROVIDER.lower() == 'gcp' and config.STACKDRIVER_TRACEBACK:
# Only load google_cloud if needed
from turbinia.lib import google_cloud
client = google_cloud.setup_stackdriver_traceback(config.TURBINIA_PROJECT)
client.report_exception()
error_client = google_cloud.GCPErrorReporting()
message = f'{message}:{trace}'
error_client.report(message)


class TurbiniaException(Exception):
Expand Down
155 changes: 2 additions & 153 deletions turbinia/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,12 @@
from datetime import datetime
from datetime import timedelta

import httplib2
import json
import logging
from operator import itemgetter
from operator import attrgetter
import os
import time

from google import auth
from turbinia import config
from turbinia.config import logger
from turbinia.config import DATETIME_FORMAT
Expand Down Expand Up @@ -67,9 +64,7 @@ def get_turbinia_client():
"""
# pylint: disable=no-else-return
setup(is_client=True)
if config.TASK_MANAGER.lower() == 'psq':
return BaseTurbiniaClient()
elif config.TASK_MANAGER.lower() == 'celery':
if config.TASK_MANAGER.lower() == 'celery':
return TurbiniaCeleryClient()
else:
msg = f'Task Manager type "{config.TASK_MANAGER:s}" not implemented'
Expand Down Expand Up @@ -340,123 +335,6 @@ def wait_for_request(

log.info(f'All {len(task_results):d} Tasks completed')

def get_task_data(
self, instance, project, region, days=0, task_id=None, request_id=None,
group_id=None, user=None, function_name='gettasks', output_json=False):
"""Gets task data from Google Cloud Functions.
Args:
instance (string): The Turbinia instance name (by default the same as the
INSTANCE_ID in the config).
project (string): The name of the project.
region (string): The name of the region to execute in.
days (int): The number of days we want history for.
task_id (string): The Id of the task.
group_id (string): The group Id of the requests.
request_id (string): The Id of the request we want tasks for.
user (string): The user of the request we want tasks for.
function_name (string): The GCF function we want to call.
output_json (bool): Whether to return JSON output.
Returns:
(List|JSON string) of Task dict objects
"""
cloud_function = gcp_function.GoogleCloudFunction(project)
func_args = {'instance': instance, 'kind': 'TurbiniaTask'}

if days:
start_time = datetime.now() - timedelta(days=days)
# Format this like '1990-01-01T00:00:00z' so we can cast it directly to a
# javascript Date() object in the cloud function.
start_string = start_time.strftime(DATETIME_FORMAT)
func_args.update({'start_time': start_string})
elif task_id:
func_args.update({'task_id': task_id})
elif group_id:
func_args.update({'group_id': group_id})
elif request_id:
func_args.update({'request_id': request_id})

if user:
func_args.update({'user': user})

response = {}
retry_count = 0
credential_error_count = 0
while not response and retry_count < MAX_RETRIES:
try:
response = cloud_function.ExecuteFunction(
function_name, region, func_args)
except auth.exceptions.RefreshError as exception:
if credential_error_count == 0:
log.info(
'GCP Credentials need to be refreshed by running gcloud auth '
'application-default login, please refresh in another terminal '
'and run turbiniactl -w status -r {0!s} and this process will '
'resume. Error: {1!s}'.format(request_id, exception))
else:
log.debug(
'GCP Credentials need to be refreshed by running gcloud auth '
'application-default login, please refresh in another terminal '
'and run turbiniactl -w status -r {0!s} and this process will '
'resume. Attempt {1:d}. Error: '
'{2!s}'.format(request_id, credential_error_count + 1, exception))
# Note, we are intentionally not incrementing the retry_count here because
# we will retry indefinitely while we wait for the user to reauth.
credential_error_count += 1
except httplib2.ServerNotFoundError as exception:
log.info(
'Error connecting to server, will retry [{0:d} of {1:d} retries]: '
'{2!s}'.format(retry_count, MAX_RETRIES, exception))
retry_count += 1

if not response:
retry_count += 1
time.sleep(RETRY_SLEEP)
elif response.get('error', {}).get('code') == 503:
log.warning(
'Retriable error response from cloud functions: [{0!s}]'.format(
response.get('error')))
retry_count += 1
response = {}
time.sleep(RETRY_SLEEP)

if not response or 'result' not in response:
log.error('No results found')
if response.get('error'):
msg = f"Error executing Cloud Function: [{response.get('error')!s}]."
log.error(msg)
log.debug(f'Invalid or empty GCF response: {response!s}')
raise TurbiniaException(
f'Cloud Function {function_name:s} returned no results.')

try:
results = json.loads(response.get('result'))
except (TypeError, ValueError) as exception:
raise TurbiniaException(
'Could not deserialize result [{0!s}] from GCF: [{1!s}]'.format(
response.get('result'), exception))

task_data = results[0]
if output_json:
try:
json_data = json.dumps(task_data)
except (TypeError, ValueError) as exception:
raise TurbiniaException(
'Could not re-serialize result [{0!s}] from GCF: [{1!s}]'.format(
str(task_data), exception))
return json_data

# Convert run_time/last_update back into datetime objects
for task in task_data:
if task.get('run_time'):
task['run_time'] = timedelta(seconds=task['run_time'])
if task.get('last_update'):
task['last_update'] = datetime.strptime(
task['last_update'], DATETIME_FORMAT)

return task_data

def format_task_detail(self, task, show_files=False):
"""Formats a single task in detail.
Expand Down Expand Up @@ -1019,36 +897,7 @@ def send_request(self, request):
Args:
request: A TurbiniaRequest object.
"""
self.task_manager.server_pubsub.send_request(request)

def close_tasks(
self, instance, project, region, request_id=None, task_id=None, user=None,
requester=None):
"""Close Turbinia Tasks based on Request ID.
Args:
instance (string): The Turbinia instance name (by default the same as the
INSTANCE_ID in the config).
project (string): The name of the project.
region (string): The name of the zone to execute in.
request_id (string): The Id of the request we want tasks for.
task_id (string): The Id of the request we want task for.
user (string): The user of the request we want tasks for.
requester (string): The user making the request to close tasks.
Returns: String of closed Task IDs.
"""
cloud_function = gcp_function.GoogleCloudFunction(project)
func_args = {
'instance': instance,
'kind': 'TurbiniaTask',
'request_id': request_id,
'task_id': task_id,
'user': user,
'requester': requester
}
response = cloud_function.ExecuteFunction('closetasks', region, func_args)
return f"Closed Task IDs: {response.get('result')}"
pass


class TurbiniaCeleryClient(BaseTurbiniaClient):
Expand Down
Loading

0 comments on commit 5c8c638

Please sign in to comment.