From 4ddc2a6130dda1d49bd5cd1074a18afec9e32c1f Mon Sep 17 00:00:00 2001 From: Mark Roberts Date: Wed, 6 Sep 2023 21:50:54 +0000 Subject: [PATCH 01/26] WIP: Add dev container, but permission issue remain when running as user. --- .devcontainer/Dockerfile | 105 ++++++++++++++++++++++ .devcontainer/devcontainer.json | 51 +++++++++++ .dockerignore | 151 ++++++++++++++++++++++++++++++++ 3 files changed, 307 insertions(+) create mode 100644 .devcontainer/Dockerfile create mode 100644 .devcontainer/devcontainer.json create mode 100644 .dockerignore diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 00000000..3fffb953 --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,105 @@ +# This is a multi-stage build to minimize image size +# You can specify python version and linux distro as build args +# Stage 1: Build python venv + dependencies +# Stage 2: Install some system level developer tools +# Stage 3: Combine Stage 2 + Venv + +ARG PYTHON_VERSION=3.11 +ARG LINUX_DISTRO=slim-bookworm + +FROM python:${PYTHON_VERSION}-${LINUX_DISTRO} as venv_base +# Stage 1: Create venv and install MDIO dependencies only + +ENV PYTHONFAULTHANDLER=1 \ + PYTHONUNBUFFERED=1 + +ENV PIP_DISABLE_PIP_VERSION_CHECK=1 \ + PIP_NO_CACHE_DIR=1 \ + PIP_ROOT_USER_ACTION=ignore + +RUN python -m venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" +WORKDIR mdio-python + +# Install all of MDIO dependencies +# 1. Get latest release from MDIO +# 2. Unpack it to WORKDIR +# 3. Install poetry and nox (dev tools) +# 4. Install all of MDIO dependencies with poetry +# && curl -s https://api.github.com/repos/tgsai/mdio-python/releases/latest \ +# | python -c 'import sys, json; print(json.load(sys.stdin)["tarball_url"])' \ +# | xargs curl -LJ \ +# | tar zx --strip=1 \ +COPY . /mdio-python + +# --with dev \ +# --all-extras \ +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + curl \ + && rm -rf /var/lib/apt/lists/* \ + && python3 -m pip install --upgrade pip setuptools wheel \ + && python3 -m pip install \ + msgpack \ + nox \ + nox-poetry \ + "poetry>=1.6.1" \ + && poetry config virtualenvs.create false \ + && poetry install --no-root --no-ansi --with dev + + +FROM python:${PYTHON_VERSION}-${LINUX_DISTRO} as system_tools +# STAGE 2 +# Install `git` and graphviz +# - git for pre-commit stuff +# - graphviz for debugging dask graphs + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + git \ + graphviz \ + && rm -rf /var/lib/apt/lists/* + +FROM system_tools +# STAGE 3 +# Based on STAGE 2 +# 1. Set Python interpreter +# 2. Add expected source dir to PYTHONPATH +# 3. Copy the venv from STAGE 1 +# 4. Set user + +ENV PYTHONFAULTHANDLER=1 \ + PYTHONUNBUFFERED=1 \ + PIP_NO_CACHE_DIR=1 \ + PATH="/opt/venv/bin:$PATH" \ + SHELL=/bin/bash \ + PYTHONPATH=/mdio-python/src + +COPY --from=venv_base --chmod=777 /opt/venv /opt/venv + +RUN python3 -m pip install \ + msgpack + +ARG USERNAME=vscode +ARG USER_UID=1000 +ARG USER_GID=$USER_UID + +# Create the user +RUN groupadd --gid $USER_GID $USERNAME \ + && useradd --uid $USER_UID --gid $USER_GID -m $USERNAME \ + # + # [Optional] Add sudo support. Omit if you don't need to install software after connecting. + && apt-get update \ + && apt-get install -y sudo \ + && echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \ + && chmod 0440 /etc/sudoers.d/$USERNAME \ + && chmod -r 777 /opt/venv + +# ******************************************************** +# * Anything else you want to do like clean up goes here * +# ******************************************************** + +# [Optional] Set the default user. Omit if you want to keep the default as root. +USER $USERNAME + +WORKDIR /mdio-python diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 00000000..b11b67d1 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,51 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, see the +// README at: https://github.com/devcontainers/templates/tree/main/src/python +{ + "build": { + "dockerfile": "Dockerfile", + "context": ".." + }, + //"name": "Python 3", + // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile + // "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bookworm", + //"features": { + // "ghcr.io/devcontainers-contrib/features/poetry:2": {}, + // "ghcr.io/devcontainers-contrib/features/nox:2": {} + // }, + // Features to add to the dev container. More info: https://containers.dev/features. + // "features": {}, + // Use 'forwardPorts' to make a list of ports inside the container available locally. + // "forwardPorts": [], + // "onCreateCommand": { + // "python3 -m pip install venv && python -m venv /opt/venv" + // }, + // "remoteEnv": { + // "PATH": "/opt/venv/bin:${containerEnv:PATH}" + // }, + // Use 'postCreateCommand' to run commands after the container is created. + "postCreateCommand": { + // "pip_install": [ + // "pip", + // "install", + // "msgpack", + // "nox", + // "nox-poetry", + // "'poetry>=1.6.1'" + // ], + "poetry_config": "poetry config virtualenvs.create false", + "poetry install": "poetry install --no-root --with dev --no-ansi" + }, + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + "settings": {}, + "extensions": [ + "streetsidesoftware.code-spell-checker" + ] + } + }, + // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. + // "remoteUser": "root" + "updateRemoteUserUID": true +} \ No newline at end of file diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..a770eaa8 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,151 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +debugging/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# ruff +.ruff_cache/ + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# IDE settings +.vscode/ +.idea/ + +# tests +mdio1/* +*/mdio1/* +pytest-of-* From 743cbda8a90955cf1f23714e8077fffb5e93d828 Mon Sep 17 00:00:00 2001 From: Mark Roberts Date: Fri, 8 Sep 2023 09:56:07 -0500 Subject: [PATCH 02/26] Updates --- .devcontainer/Dockerfile | 110 +++++--------------------------- .devcontainer/devcontainer.json | 27 ++++---- .devcontainer/post-install.sh | 3 + 3 files changed, 34 insertions(+), 106 deletions(-) create mode 100644 .devcontainer/post-install.sh diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 3fffb953..dd7db66b 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -1,105 +1,29 @@ -# This is a multi-stage build to minimize image size -# You can specify python version and linux distro as build args -# Stage 1: Build python venv + dependencies -# Stage 2: Install some system level developer tools -# Stage 3: Combine Stage 2 + Venv -ARG PYTHON_VERSION=3.11 -ARG LINUX_DISTRO=slim-bookworm - -FROM python:${PYTHON_VERSION}-${LINUX_DISTRO} as venv_base -# Stage 1: Create venv and install MDIO dependencies only - -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 - -ENV PIP_DISABLE_PIP_VERSION_CHECK=1 \ - PIP_NO_CACHE_DIR=1 \ - PIP_ROOT_USER_ACTION=ignore -RUN python -m venv /opt/venv -ENV PATH="/opt/venv/bin:$PATH" -WORKDIR mdio-python - -# Install all of MDIO dependencies -# 1. Get latest release from MDIO -# 2. Unpack it to WORKDIR -# 3. Install poetry and nox (dev tools) -# 4. Install all of MDIO dependencies with poetry -# && curl -s https://api.github.com/repos/tgsai/mdio-python/releases/latest \ -# | python -c 'import sys, json; print(json.load(sys.stdin)["tarball_url"])' \ -# | xargs curl -LJ \ -# | tar zx --strip=1 \ -COPY . /mdio-python - -# --with dev \ -# --all-extras \ -RUN apt-get update \ - && apt-get install -y --no-install-recommends \ - curl \ - && rm -rf /var/lib/apt/lists/* \ - && python3 -m pip install --upgrade pip setuptools wheel \ - && python3 -m pip install \ - msgpack \ - nox \ - nox-poetry \ - "poetry>=1.6.1" \ - && poetry config virtualenvs.create false \ - && poetry install --no-root --no-ansi --with dev +ARG PYTHON_VERSION=3.11 +ARG LINUX_DISTRO=bookworm -FROM python:${PYTHON_VERSION}-${LINUX_DISTRO} as system_tools -# STAGE 2 -# Install `git` and graphviz -# - git for pre-commit stuff -# - graphviz for debugging dask graphs +# FROM python:${PYTHON_VERSION}-${LINUX_DISTRO} +FROM mcr.microsoft.com/devcontainers/python:1-${PYTHON_VERSION}-${LINUX_DISTRO} +# Install git for nox pre-commit RUN apt-get update \ && apt-get install -y --no-install-recommends \ git \ - graphviz \ && rm -rf /var/lib/apt/lists/* -FROM system_tools -# STAGE 3 -# Based on STAGE 2 -# 1. Set Python interpreter -# 2. Add expected source dir to PYTHONPATH -# 3. Copy the venv from STAGE 1 -# 4. Set user - -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PIP_NO_CACHE_DIR=1 \ - PATH="/opt/venv/bin:$PATH" \ - SHELL=/bin/bash \ - PYTHONPATH=/mdio-python/src - -COPY --from=venv_base --chmod=777 /opt/venv /opt/venv - -RUN python3 -m pip install \ - msgpack - -ARG USERNAME=vscode -ARG USER_UID=1000 -ARG USER_GID=$USER_UID - -# Create the user -RUN groupadd --gid $USER_GID $USERNAME \ - && useradd --uid $USER_UID --gid $USER_GID -m $USERNAME \ - # - # [Optional] Add sudo support. Omit if you don't need to install software after connecting. - && apt-get update \ - && apt-get install -y sudo \ - && echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \ - && chmod 0440 /etc/sudoers.d/$USERNAME \ - && chmod -r 777 /opt/venv - -# ******************************************************** -# * Anything else you want to do like clean up goes here * -# ******************************************************** +# Poetry +ARG POETRY_VERSION="1.6.1" +# RUN if [ "${POETRY_VERSION}" != "none" ]; then su vscode -c "umask 0002 && pip3 install poetry==${POETRY_VERSION}"; fi +RUN if [ "${POETRY_VERSION}" != "none" ]; then bash -c "umask 0002 && pip3 install poetry==${POETRY_VERSION}"; fi -# [Optional] Set the default user. Omit if you want to keep the default as root. -USER $USERNAME +# Nox +ARG NOX_VERSION="2023.4.22" +# RUN if [ "${NOX_VERSION}" != "none" ]; then su vscode -c "umask 0002 && pip3 install nox-poetry nox==${NOX_VERSION}"; fi +RUN if [ "${NOX_VERSION}" != "none" ]; then bash -c "umask 0002 && pip3 install nox-poetry nox==${NOX_VERSION}"; fi -WORKDIR /mdio-python +# Nox poetry +ARG NOX_POETRY_VERSION="1.0.3" +# RUN if [ "${NOX_POETRY_VERSION}" != "none" ]; then su vscode -c "umask 0002 && pip3 install nox-poetry nox-poetry==${NOX_POETRY_VERSION}"; fi +RUN if [ "${NOX_POETRY_VERSION}" != "none" ]; then bash -c "umask 0002 && pip3 install nox-poetry nox-poetry==${NOX_POETRY_VERSION}"; fi diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index b11b67d1..15e77624 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -3,7 +3,12 @@ { "build": { "dockerfile": "Dockerfile", - "context": ".." + "context": "..", + "args": { + "USERNAME": "${localEnv:USER}", + "USER_ID": "${localEnv:UID}", + "USER_GID": "${localEnv:GID}" + } }, //"name": "Python 3", // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile @@ -24,16 +29,7 @@ // }, // Use 'postCreateCommand' to run commands after the container is created. "postCreateCommand": { - // "pip_install": [ - // "pip", - // "install", - // "msgpack", - // "nox", - // "nox-poetry", - // "'poetry>=1.6.1'" - // ], - "poetry_config": "poetry config virtualenvs.create false", - "poetry install": "poetry install --no-root --with dev --no-ansi" + "post_create_script": "bash ./.devcontainer/post-install.sh" }, // Configure tool-specific properties. "customizations": { @@ -46,6 +42,11 @@ } }, // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. - // "remoteUser": "root" - "updateRemoteUserUID": true + "remoteUser": "root", + // "updateRemoteUserUID": true, + "mounts": [ + // Re-use local Git configuration + "source=${localEnv:HOME}/.gitconfig,target=/home/vscode/.gitconfig,type=bind,consistency=cached", + "source=${localEnv:HOME}/.gitconfig,target=/home/root/.gitconfig,type=bind,consistency=cached" + ] } \ No newline at end of file diff --git a/.devcontainer/post-install.sh b/.devcontainer/post-install.sh new file mode 100644 index 00000000..540061af --- /dev/null +++ b/.devcontainer/post-install.sh @@ -0,0 +1,3 @@ +git config --global --add safe.directory `pwd` +poetry install --with dev --no-ansi +poetry shell \ No newline at end of file From a2f2187ed0661b7cfefb5dd4974f4ca911a72798 Mon Sep 17 00:00:00 2001 From: Mark Roberts Date: Fri, 8 Sep 2023 11:07:40 -0500 Subject: [PATCH 03/26] Update docs for dev environment. --- .devcontainer/devcontainer.json | 2 +- CONTRIBUTING.md | 5 ++++- docs/development_env.md | 10 ++++++++++ docs/index.md | 1 + 4 files changed, 16 insertions(+), 2 deletions(-) create mode 100644 docs/development_env.md diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 15e77624..76b7345d 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -47,6 +47,6 @@ "mounts": [ // Re-use local Git configuration "source=${localEnv:HOME}/.gitconfig,target=/home/vscode/.gitconfig,type=bind,consistency=cached", - "source=${localEnv:HOME}/.gitconfig,target=/home/root/.gitconfig,type=bind,consistency=cached" + "source=${localEnv:HOME}/.gitconfig,target=/root/.gitconfig,type=bind,consistency=cached" ] } \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b1c6a9b6..b84e5f93 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -37,12 +37,15 @@ Request features on the [Issue Tracker]. ## How to set up your development environment -You need Python 3.7+ and the following tools: +You need Python 3.9+ and the following tools: - [Poetry] - [Nox] - [nox-poetry] +A [dev container](./docs/development_env.md) has been setup to provide a clean environment with the required dependencies. + +## How to install and run MDIO Install the package with development requirements: ```console diff --git a/docs/development_env.md b/docs/development_env.md new file mode 100644 index 00000000..e175f2d6 --- /dev/null +++ b/docs/development_env.md @@ -0,0 +1,10 @@ +# Development Environment + +To facilitate development on different systems a [dev container](https://containers.dev/) has been added. This should seamlessly enable development for users of [VSCode](https://code.visualstudio.com/docs/devcontainers/containers) on systems with docker installed. + +For contributing guidelines please look here [link](../CONTRIBUTING.md) + +### known issues: + +* Some effort was take to run without using root inside the container. However nox always seemed to have permissions issues which I've been unable to fix. +* `git config --global --add safe.directory \`pwd\` ` Might be needed inside the container. \ No newline at end of file diff --git a/docs/index.md b/docs/index.md index 918984f2..5987e6ba 100644 --- a/docs/index.md +++ b/docs/index.md @@ -19,6 +19,7 @@ notebooks/compression usage reference contributing +Development Environment Code of Conduct License Changelog From d4ecebc0d58afdbb25afb5d2df0d19bbc44fee95 Mon Sep 17 00:00:00 2001 From: Mark Roberts Date: Fri, 8 Sep 2023 20:18:32 +0000 Subject: [PATCH 04/26] Fix linting. --- .devcontainer/devcontainer.json | 6 +++--- .devcontainer/post-install.sh | 7 +++++-- docs/development_env.md | 6 +++--- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 76b7345d..3f94e61d 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -46,7 +46,7 @@ // "updateRemoteUserUID": true, "mounts": [ // Re-use local Git configuration - "source=${localEnv:HOME}/.gitconfig,target=/home/vscode/.gitconfig,type=bind,consistency=cached", - "source=${localEnv:HOME}/.gitconfig,target=/root/.gitconfig,type=bind,consistency=cached" + "source=${localEnv:HOME}/.gitconfig,target=/home/vscode/.gitconfig_tmp,type=bind,consistency=cached", + "source=${localEnv:HOME}/.gitconfig,target=/root/.gitconfig_tmp,type=bind,consistency=cached" ] -} \ No newline at end of file +} diff --git a/.devcontainer/post-install.sh b/.devcontainer/post-install.sh index 540061af..c61fb324 100644 --- a/.devcontainer/post-install.sh +++ b/.devcontainer/post-install.sh @@ -1,3 +1,6 @@ +cp -f /root/.gitconfig_tmp /root/.gitconfig +cp -f /home/vscode/.gitconfig_tmp /home/vscode/.gitconfig +chmod 777 /home/vscode/.gitconfig git config --global --add safe.directory `pwd` -poetry install --with dev --no-ansi -poetry shell \ No newline at end of file +# poetry install --with dev --no-ansi +# poetry shell diff --git a/docs/development_env.md b/docs/development_env.md index e175f2d6..1e4fdd92 100644 --- a/docs/development_env.md +++ b/docs/development_env.md @@ -1,10 +1,10 @@ # Development Environment -To facilitate development on different systems a [dev container](https://containers.dev/) has been added. This should seamlessly enable development for users of [VSCode](https://code.visualstudio.com/docs/devcontainers/containers) on systems with docker installed. +To facilitate development on different systems a [dev container](https://containers.dev/) has been added. This should seamlessly enable development for users of [VSCode](https://code.visualstudio.com/docs/devcontainers/containers) on systems with docker installed. -For contributing guidelines please look here [link](../CONTRIBUTING.md) +For contributing guidelines please look here [link](../CONTRIBUTING.md) ### known issues: * Some effort was take to run without using root inside the container. However nox always seemed to have permissions issues which I've been unable to fix. -* `git config --global --add safe.directory \`pwd\` ` Might be needed inside the container. \ No newline at end of file +* `git config --global --add safe.directory \`pwd\` ` Might be needed inside the container. From 264351d67fe90ceb6d488f498c5e0c5656be5da0 Mon Sep 17 00:00:00 2001 From: Mark Roberts Date: Fri, 8 Sep 2023 20:30:05 +0000 Subject: [PATCH 05/26] More linting. --- .devcontainer/devcontainer.json | 77 +++++++++++++-------------------- CONTRIBUTING.md | 1 + docs/development_env.md | 6 +-- 3 files changed, 33 insertions(+), 51 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 3f94e61d..6901d020 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,52 +1,33 @@ // For format details, see https://aka.ms/devcontainer.json. For config options, see the // README at: https://github.com/devcontainers/templates/tree/main/src/python { - "build": { - "dockerfile": "Dockerfile", - "context": "..", - "args": { - "USERNAME": "${localEnv:USER}", - "USER_ID": "${localEnv:UID}", - "USER_GID": "${localEnv:GID}" - } - }, - //"name": "Python 3", - // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile - // "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bookworm", - //"features": { - // "ghcr.io/devcontainers-contrib/features/poetry:2": {}, - // "ghcr.io/devcontainers-contrib/features/nox:2": {} - // }, - // Features to add to the dev container. More info: https://containers.dev/features. - // "features": {}, - // Use 'forwardPorts' to make a list of ports inside the container available locally. - // "forwardPorts": [], - // "onCreateCommand": { - // "python3 -m pip install venv && python -m venv /opt/venv" - // }, - // "remoteEnv": { - // "PATH": "/opt/venv/bin:${containerEnv:PATH}" - // }, - // Use 'postCreateCommand' to run commands after the container is created. - "postCreateCommand": { - "post_create_script": "bash ./.devcontainer/post-install.sh" - }, - // Configure tool-specific properties. - "customizations": { - // Configure properties specific to VS Code. - "vscode": { - "settings": {}, - "extensions": [ - "streetsidesoftware.code-spell-checker" - ] - } - }, - // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. - "remoteUser": "root", - // "updateRemoteUserUID": true, - "mounts": [ - // Re-use local Git configuration - "source=${localEnv:HOME}/.gitconfig,target=/home/vscode/.gitconfig_tmp,type=bind,consistency=cached", - "source=${localEnv:HOME}/.gitconfig,target=/root/.gitconfig_tmp,type=bind,consistency=cached" - ] + "build": { + "dockerfile": "Dockerfile", + "context": "..", + "args": { + "USERNAME": "${localEnv:USER}", + "USER_ID": "${localEnv:UID}", + "USER_GID": "${localEnv:GID}" + } + }, + // Use 'postCreateCommand' to run commands after the container is created. + "postCreateCommand": { + "post_create_script": "bash ./.devcontainer/post-install.sh" + }, + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + "settings": {}, + "extensions": ["streetsidesoftware.code-spell-checker"] + } + }, + // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. + "remoteUser": "root", + // "updateRemoteUserUID": true, + "mounts": [ + // Re-use local Git configuration + "source=${localEnv:HOME}/.gitconfig,target=/home/vscode/.gitconfig_tmp,type=bind,consistency=cached", + "source=${localEnv:HOME}/.gitconfig,target=/root/.gitconfig_tmp,type=bind,consistency=cached" + ] } diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b84e5f93..4b0ac9be 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -46,6 +46,7 @@ You need Python 3.9+ and the following tools: A [dev container](./docs/development_env.md) has been setup to provide a clean environment with the required dependencies. ## How to install and run MDIO + Install the package with development requirements: ```console diff --git a/docs/development_env.md b/docs/development_env.md index 1e4fdd92..3656565e 100644 --- a/docs/development_env.md +++ b/docs/development_env.md @@ -1,10 +1,10 @@ # Development Environment -To facilitate development on different systems a [dev container](https://containers.dev/) has been added. This should seamlessly enable development for users of [VSCode](https://code.visualstudio.com/docs/devcontainers/containers) on systems with docker installed. +To facilitate development on different systems a [dev container](https://containers.dev/) has been added. This should seamlessly enable development for users of [VSCode](https://code.visualstudio.com/docs/devcontainers/containers) on systems with docker installed. For contributing guidelines please look here [link](../CONTRIBUTING.md) ### known issues: -* Some effort was take to run without using root inside the container. However nox always seemed to have permissions issues which I've been unable to fix. -* `git config --global --add safe.directory \`pwd\` ` Might be needed inside the container. +- Some effort was take to run without using root inside the container. However nox always seemed to have permissions issues which I've been unable to fix. +- `git config --global --add safe.directory \`pwd\` ` Might be needed inside the container. From 19c180c6aef4a7f9a0032f058f6a1bb428271dc7 Mon Sep 17 00:00:00 2001 From: Mark Roberts Date: Fri, 8 Sep 2023 20:36:47 +0000 Subject: [PATCH 06/26] Fix ssh issue with pushing from container. --- .devcontainer/post-install.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.devcontainer/post-install.sh b/.devcontainer/post-install.sh index c61fb324..1f727909 100644 --- a/.devcontainer/post-install.sh +++ b/.devcontainer/post-install.sh @@ -2,5 +2,7 @@ cp -f /root/.gitconfig_tmp /root/.gitconfig cp -f /home/vscode/.gitconfig_tmp /home/vscode/.gitconfig chmod 777 /home/vscode/.gitconfig git config --global --add safe.directory `pwd` +# Enable ssh with github for git push +ssh-keygen -f "/root/.ssh/known_hosts" -R "github.com" # poetry install --with dev --no-ansi # poetry shell From 89794140e38095b7e0b855aed6d22f221ce03fec Mon Sep 17 00:00:00 2001 From: Mark Roberts Date: Wed, 6 Sep 2023 21:50:54 +0000 Subject: [PATCH 07/26] WIP: Add dev container, but permission issue remain when running as user. --- .devcontainer/Dockerfile | 105 ++++++++++++++++++++++ .devcontainer/devcontainer.json | 51 +++++++++++ .dockerignore | 151 ++++++++++++++++++++++++++++++++ 3 files changed, 307 insertions(+) create mode 100644 .devcontainer/Dockerfile create mode 100644 .devcontainer/devcontainer.json create mode 100644 .dockerignore diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 00000000..3fffb953 --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,105 @@ +# This is a multi-stage build to minimize image size +# You can specify python version and linux distro as build args +# Stage 1: Build python venv + dependencies +# Stage 2: Install some system level developer tools +# Stage 3: Combine Stage 2 + Venv + +ARG PYTHON_VERSION=3.11 +ARG LINUX_DISTRO=slim-bookworm + +FROM python:${PYTHON_VERSION}-${LINUX_DISTRO} as venv_base +# Stage 1: Create venv and install MDIO dependencies only + +ENV PYTHONFAULTHANDLER=1 \ + PYTHONUNBUFFERED=1 + +ENV PIP_DISABLE_PIP_VERSION_CHECK=1 \ + PIP_NO_CACHE_DIR=1 \ + PIP_ROOT_USER_ACTION=ignore + +RUN python -m venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" +WORKDIR mdio-python + +# Install all of MDIO dependencies +# 1. Get latest release from MDIO +# 2. Unpack it to WORKDIR +# 3. Install poetry and nox (dev tools) +# 4. Install all of MDIO dependencies with poetry +# && curl -s https://api.github.com/repos/tgsai/mdio-python/releases/latest \ +# | python -c 'import sys, json; print(json.load(sys.stdin)["tarball_url"])' \ +# | xargs curl -LJ \ +# | tar zx --strip=1 \ +COPY . /mdio-python + +# --with dev \ +# --all-extras \ +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + curl \ + && rm -rf /var/lib/apt/lists/* \ + && python3 -m pip install --upgrade pip setuptools wheel \ + && python3 -m pip install \ + msgpack \ + nox \ + nox-poetry \ + "poetry>=1.6.1" \ + && poetry config virtualenvs.create false \ + && poetry install --no-root --no-ansi --with dev + + +FROM python:${PYTHON_VERSION}-${LINUX_DISTRO} as system_tools +# STAGE 2 +# Install `git` and graphviz +# - git for pre-commit stuff +# - graphviz for debugging dask graphs + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + git \ + graphviz \ + && rm -rf /var/lib/apt/lists/* + +FROM system_tools +# STAGE 3 +# Based on STAGE 2 +# 1. Set Python interpreter +# 2. Add expected source dir to PYTHONPATH +# 3. Copy the venv from STAGE 1 +# 4. Set user + +ENV PYTHONFAULTHANDLER=1 \ + PYTHONUNBUFFERED=1 \ + PIP_NO_CACHE_DIR=1 \ + PATH="/opt/venv/bin:$PATH" \ + SHELL=/bin/bash \ + PYTHONPATH=/mdio-python/src + +COPY --from=venv_base --chmod=777 /opt/venv /opt/venv + +RUN python3 -m pip install \ + msgpack + +ARG USERNAME=vscode +ARG USER_UID=1000 +ARG USER_GID=$USER_UID + +# Create the user +RUN groupadd --gid $USER_GID $USERNAME \ + && useradd --uid $USER_UID --gid $USER_GID -m $USERNAME \ + # + # [Optional] Add sudo support. Omit if you don't need to install software after connecting. + && apt-get update \ + && apt-get install -y sudo \ + && echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \ + && chmod 0440 /etc/sudoers.d/$USERNAME \ + && chmod -r 777 /opt/venv + +# ******************************************************** +# * Anything else you want to do like clean up goes here * +# ******************************************************** + +# [Optional] Set the default user. Omit if you want to keep the default as root. +USER $USERNAME + +WORKDIR /mdio-python diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 00000000..b11b67d1 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,51 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, see the +// README at: https://github.com/devcontainers/templates/tree/main/src/python +{ + "build": { + "dockerfile": "Dockerfile", + "context": ".." + }, + //"name": "Python 3", + // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile + // "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bookworm", + //"features": { + // "ghcr.io/devcontainers-contrib/features/poetry:2": {}, + // "ghcr.io/devcontainers-contrib/features/nox:2": {} + // }, + // Features to add to the dev container. More info: https://containers.dev/features. + // "features": {}, + // Use 'forwardPorts' to make a list of ports inside the container available locally. + // "forwardPorts": [], + // "onCreateCommand": { + // "python3 -m pip install venv && python -m venv /opt/venv" + // }, + // "remoteEnv": { + // "PATH": "/opt/venv/bin:${containerEnv:PATH}" + // }, + // Use 'postCreateCommand' to run commands after the container is created. + "postCreateCommand": { + // "pip_install": [ + // "pip", + // "install", + // "msgpack", + // "nox", + // "nox-poetry", + // "'poetry>=1.6.1'" + // ], + "poetry_config": "poetry config virtualenvs.create false", + "poetry install": "poetry install --no-root --with dev --no-ansi" + }, + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + "settings": {}, + "extensions": [ + "streetsidesoftware.code-spell-checker" + ] + } + }, + // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. + // "remoteUser": "root" + "updateRemoteUserUID": true +} \ No newline at end of file diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..a770eaa8 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,151 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +debugging/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# ruff +.ruff_cache/ + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# IDE settings +.vscode/ +.idea/ + +# tests +mdio1/* +*/mdio1/* +pytest-of-* From 47267c00e402a349061b55c1547bf4ff1e9eb26c Mon Sep 17 00:00:00 2001 From: Mark Roberts Date: Fri, 8 Sep 2023 09:56:07 -0500 Subject: [PATCH 08/26] Updates --- .devcontainer/Dockerfile | 110 +++++--------------------------- .devcontainer/devcontainer.json | 27 ++++---- .devcontainer/post-install.sh | 3 + 3 files changed, 34 insertions(+), 106 deletions(-) create mode 100644 .devcontainer/post-install.sh diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 3fffb953..dd7db66b 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -1,105 +1,29 @@ -# This is a multi-stage build to minimize image size -# You can specify python version and linux distro as build args -# Stage 1: Build python venv + dependencies -# Stage 2: Install some system level developer tools -# Stage 3: Combine Stage 2 + Venv -ARG PYTHON_VERSION=3.11 -ARG LINUX_DISTRO=slim-bookworm - -FROM python:${PYTHON_VERSION}-${LINUX_DISTRO} as venv_base -# Stage 1: Create venv and install MDIO dependencies only - -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 - -ENV PIP_DISABLE_PIP_VERSION_CHECK=1 \ - PIP_NO_CACHE_DIR=1 \ - PIP_ROOT_USER_ACTION=ignore -RUN python -m venv /opt/venv -ENV PATH="/opt/venv/bin:$PATH" -WORKDIR mdio-python - -# Install all of MDIO dependencies -# 1. Get latest release from MDIO -# 2. Unpack it to WORKDIR -# 3. Install poetry and nox (dev tools) -# 4. Install all of MDIO dependencies with poetry -# && curl -s https://api.github.com/repos/tgsai/mdio-python/releases/latest \ -# | python -c 'import sys, json; print(json.load(sys.stdin)["tarball_url"])' \ -# | xargs curl -LJ \ -# | tar zx --strip=1 \ -COPY . /mdio-python - -# --with dev \ -# --all-extras \ -RUN apt-get update \ - && apt-get install -y --no-install-recommends \ - curl \ - && rm -rf /var/lib/apt/lists/* \ - && python3 -m pip install --upgrade pip setuptools wheel \ - && python3 -m pip install \ - msgpack \ - nox \ - nox-poetry \ - "poetry>=1.6.1" \ - && poetry config virtualenvs.create false \ - && poetry install --no-root --no-ansi --with dev +ARG PYTHON_VERSION=3.11 +ARG LINUX_DISTRO=bookworm -FROM python:${PYTHON_VERSION}-${LINUX_DISTRO} as system_tools -# STAGE 2 -# Install `git` and graphviz -# - git for pre-commit stuff -# - graphviz for debugging dask graphs +# FROM python:${PYTHON_VERSION}-${LINUX_DISTRO} +FROM mcr.microsoft.com/devcontainers/python:1-${PYTHON_VERSION}-${LINUX_DISTRO} +# Install git for nox pre-commit RUN apt-get update \ && apt-get install -y --no-install-recommends \ git \ - graphviz \ && rm -rf /var/lib/apt/lists/* -FROM system_tools -# STAGE 3 -# Based on STAGE 2 -# 1. Set Python interpreter -# 2. Add expected source dir to PYTHONPATH -# 3. Copy the venv from STAGE 1 -# 4. Set user - -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PIP_NO_CACHE_DIR=1 \ - PATH="/opt/venv/bin:$PATH" \ - SHELL=/bin/bash \ - PYTHONPATH=/mdio-python/src - -COPY --from=venv_base --chmod=777 /opt/venv /opt/venv - -RUN python3 -m pip install \ - msgpack - -ARG USERNAME=vscode -ARG USER_UID=1000 -ARG USER_GID=$USER_UID - -# Create the user -RUN groupadd --gid $USER_GID $USERNAME \ - && useradd --uid $USER_UID --gid $USER_GID -m $USERNAME \ - # - # [Optional] Add sudo support. Omit if you don't need to install software after connecting. - && apt-get update \ - && apt-get install -y sudo \ - && echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \ - && chmod 0440 /etc/sudoers.d/$USERNAME \ - && chmod -r 777 /opt/venv - -# ******************************************************** -# * Anything else you want to do like clean up goes here * -# ******************************************************** +# Poetry +ARG POETRY_VERSION="1.6.1" +# RUN if [ "${POETRY_VERSION}" != "none" ]; then su vscode -c "umask 0002 && pip3 install poetry==${POETRY_VERSION}"; fi +RUN if [ "${POETRY_VERSION}" != "none" ]; then bash -c "umask 0002 && pip3 install poetry==${POETRY_VERSION}"; fi -# [Optional] Set the default user. Omit if you want to keep the default as root. -USER $USERNAME +# Nox +ARG NOX_VERSION="2023.4.22" +# RUN if [ "${NOX_VERSION}" != "none" ]; then su vscode -c "umask 0002 && pip3 install nox-poetry nox==${NOX_VERSION}"; fi +RUN if [ "${NOX_VERSION}" != "none" ]; then bash -c "umask 0002 && pip3 install nox-poetry nox==${NOX_VERSION}"; fi -WORKDIR /mdio-python +# Nox poetry +ARG NOX_POETRY_VERSION="1.0.3" +# RUN if [ "${NOX_POETRY_VERSION}" != "none" ]; then su vscode -c "umask 0002 && pip3 install nox-poetry nox-poetry==${NOX_POETRY_VERSION}"; fi +RUN if [ "${NOX_POETRY_VERSION}" != "none" ]; then bash -c "umask 0002 && pip3 install nox-poetry nox-poetry==${NOX_POETRY_VERSION}"; fi diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index b11b67d1..15e77624 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -3,7 +3,12 @@ { "build": { "dockerfile": "Dockerfile", - "context": ".." + "context": "..", + "args": { + "USERNAME": "${localEnv:USER}", + "USER_ID": "${localEnv:UID}", + "USER_GID": "${localEnv:GID}" + } }, //"name": "Python 3", // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile @@ -24,16 +29,7 @@ // }, // Use 'postCreateCommand' to run commands after the container is created. "postCreateCommand": { - // "pip_install": [ - // "pip", - // "install", - // "msgpack", - // "nox", - // "nox-poetry", - // "'poetry>=1.6.1'" - // ], - "poetry_config": "poetry config virtualenvs.create false", - "poetry install": "poetry install --no-root --with dev --no-ansi" + "post_create_script": "bash ./.devcontainer/post-install.sh" }, // Configure tool-specific properties. "customizations": { @@ -46,6 +42,11 @@ } }, // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. - // "remoteUser": "root" - "updateRemoteUserUID": true + "remoteUser": "root", + // "updateRemoteUserUID": true, + "mounts": [ + // Re-use local Git configuration + "source=${localEnv:HOME}/.gitconfig,target=/home/vscode/.gitconfig,type=bind,consistency=cached", + "source=${localEnv:HOME}/.gitconfig,target=/home/root/.gitconfig,type=bind,consistency=cached" + ] } \ No newline at end of file diff --git a/.devcontainer/post-install.sh b/.devcontainer/post-install.sh new file mode 100644 index 00000000..540061af --- /dev/null +++ b/.devcontainer/post-install.sh @@ -0,0 +1,3 @@ +git config --global --add safe.directory `pwd` +poetry install --with dev --no-ansi +poetry shell \ No newline at end of file From 13b77e4a106e9762db6d79d2acccfffe63644ead Mon Sep 17 00:00:00 2001 From: Mark Roberts Date: Fri, 8 Sep 2023 11:07:40 -0500 Subject: [PATCH 09/26] Update docs for dev environment. --- .devcontainer/devcontainer.json | 2 +- CONTRIBUTING.md | 5 ++++- docs/development_env.md | 10 ++++++++++ docs/index.md | 1 + 4 files changed, 16 insertions(+), 2 deletions(-) create mode 100644 docs/development_env.md diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 15e77624..76b7345d 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -47,6 +47,6 @@ "mounts": [ // Re-use local Git configuration "source=${localEnv:HOME}/.gitconfig,target=/home/vscode/.gitconfig,type=bind,consistency=cached", - "source=${localEnv:HOME}/.gitconfig,target=/home/root/.gitconfig,type=bind,consistency=cached" + "source=${localEnv:HOME}/.gitconfig,target=/root/.gitconfig,type=bind,consistency=cached" ] } \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b1c6a9b6..b84e5f93 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -37,12 +37,15 @@ Request features on the [Issue Tracker]. ## How to set up your development environment -You need Python 3.7+ and the following tools: +You need Python 3.9+ and the following tools: - [Poetry] - [Nox] - [nox-poetry] +A [dev container](./docs/development_env.md) has been setup to provide a clean environment with the required dependencies. + +## How to install and run MDIO Install the package with development requirements: ```console diff --git a/docs/development_env.md b/docs/development_env.md new file mode 100644 index 00000000..e175f2d6 --- /dev/null +++ b/docs/development_env.md @@ -0,0 +1,10 @@ +# Development Environment + +To facilitate development on different systems a [dev container](https://containers.dev/) has been added. This should seamlessly enable development for users of [VSCode](https://code.visualstudio.com/docs/devcontainers/containers) on systems with docker installed. + +For contributing guidelines please look here [link](../CONTRIBUTING.md) + +### known issues: + +* Some effort was take to run without using root inside the container. However nox always seemed to have permissions issues which I've been unable to fix. +* `git config --global --add safe.directory \`pwd\` ` Might be needed inside the container. \ No newline at end of file diff --git a/docs/index.md b/docs/index.md index 918984f2..5987e6ba 100644 --- a/docs/index.md +++ b/docs/index.md @@ -19,6 +19,7 @@ notebooks/compression usage reference contributing +Development Environment Code of Conduct License Changelog From 5211c51dfe5fa99ff1ee96ff83e6f8bfea8c6025 Mon Sep 17 00:00:00 2001 From: Mark Roberts Date: Fri, 8 Sep 2023 20:18:32 +0000 Subject: [PATCH 10/26] Fix linting. --- .devcontainer/devcontainer.json | 6 +++--- .devcontainer/post-install.sh | 7 +++++-- docs/development_env.md | 6 +++--- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 76b7345d..3f94e61d 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -46,7 +46,7 @@ // "updateRemoteUserUID": true, "mounts": [ // Re-use local Git configuration - "source=${localEnv:HOME}/.gitconfig,target=/home/vscode/.gitconfig,type=bind,consistency=cached", - "source=${localEnv:HOME}/.gitconfig,target=/root/.gitconfig,type=bind,consistency=cached" + "source=${localEnv:HOME}/.gitconfig,target=/home/vscode/.gitconfig_tmp,type=bind,consistency=cached", + "source=${localEnv:HOME}/.gitconfig,target=/root/.gitconfig_tmp,type=bind,consistency=cached" ] -} \ No newline at end of file +} diff --git a/.devcontainer/post-install.sh b/.devcontainer/post-install.sh index 540061af..c61fb324 100644 --- a/.devcontainer/post-install.sh +++ b/.devcontainer/post-install.sh @@ -1,3 +1,6 @@ +cp -f /root/.gitconfig_tmp /root/.gitconfig +cp -f /home/vscode/.gitconfig_tmp /home/vscode/.gitconfig +chmod 777 /home/vscode/.gitconfig git config --global --add safe.directory `pwd` -poetry install --with dev --no-ansi -poetry shell \ No newline at end of file +# poetry install --with dev --no-ansi +# poetry shell diff --git a/docs/development_env.md b/docs/development_env.md index e175f2d6..1e4fdd92 100644 --- a/docs/development_env.md +++ b/docs/development_env.md @@ -1,10 +1,10 @@ # Development Environment -To facilitate development on different systems a [dev container](https://containers.dev/) has been added. This should seamlessly enable development for users of [VSCode](https://code.visualstudio.com/docs/devcontainers/containers) on systems with docker installed. +To facilitate development on different systems a [dev container](https://containers.dev/) has been added. This should seamlessly enable development for users of [VSCode](https://code.visualstudio.com/docs/devcontainers/containers) on systems with docker installed. -For contributing guidelines please look here [link](../CONTRIBUTING.md) +For contributing guidelines please look here [link](../CONTRIBUTING.md) ### known issues: * Some effort was take to run without using root inside the container. However nox always seemed to have permissions issues which I've been unable to fix. -* `git config --global --add safe.directory \`pwd\` ` Might be needed inside the container. \ No newline at end of file +* `git config --global --add safe.directory \`pwd\` ` Might be needed inside the container. From a510871e3a7a44221d3199522b5636ab2cdae9c6 Mon Sep 17 00:00:00 2001 From: Mark Roberts Date: Fri, 8 Sep 2023 20:30:05 +0000 Subject: [PATCH 11/26] More linting. --- .devcontainer/devcontainer.json | 77 +++++++++++++-------------------- CONTRIBUTING.md | 1 + docs/development_env.md | 6 +-- 3 files changed, 33 insertions(+), 51 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 3f94e61d..6901d020 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,52 +1,33 @@ // For format details, see https://aka.ms/devcontainer.json. For config options, see the // README at: https://github.com/devcontainers/templates/tree/main/src/python { - "build": { - "dockerfile": "Dockerfile", - "context": "..", - "args": { - "USERNAME": "${localEnv:USER}", - "USER_ID": "${localEnv:UID}", - "USER_GID": "${localEnv:GID}" - } - }, - //"name": "Python 3", - // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile - // "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bookworm", - //"features": { - // "ghcr.io/devcontainers-contrib/features/poetry:2": {}, - // "ghcr.io/devcontainers-contrib/features/nox:2": {} - // }, - // Features to add to the dev container. More info: https://containers.dev/features. - // "features": {}, - // Use 'forwardPorts' to make a list of ports inside the container available locally. - // "forwardPorts": [], - // "onCreateCommand": { - // "python3 -m pip install venv && python -m venv /opt/venv" - // }, - // "remoteEnv": { - // "PATH": "/opt/venv/bin:${containerEnv:PATH}" - // }, - // Use 'postCreateCommand' to run commands after the container is created. - "postCreateCommand": { - "post_create_script": "bash ./.devcontainer/post-install.sh" - }, - // Configure tool-specific properties. - "customizations": { - // Configure properties specific to VS Code. - "vscode": { - "settings": {}, - "extensions": [ - "streetsidesoftware.code-spell-checker" - ] - } - }, - // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. - "remoteUser": "root", - // "updateRemoteUserUID": true, - "mounts": [ - // Re-use local Git configuration - "source=${localEnv:HOME}/.gitconfig,target=/home/vscode/.gitconfig_tmp,type=bind,consistency=cached", - "source=${localEnv:HOME}/.gitconfig,target=/root/.gitconfig_tmp,type=bind,consistency=cached" - ] + "build": { + "dockerfile": "Dockerfile", + "context": "..", + "args": { + "USERNAME": "${localEnv:USER}", + "USER_ID": "${localEnv:UID}", + "USER_GID": "${localEnv:GID}" + } + }, + // Use 'postCreateCommand' to run commands after the container is created. + "postCreateCommand": { + "post_create_script": "bash ./.devcontainer/post-install.sh" + }, + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + "settings": {}, + "extensions": ["streetsidesoftware.code-spell-checker"] + } + }, + // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. + "remoteUser": "root", + // "updateRemoteUserUID": true, + "mounts": [ + // Re-use local Git configuration + "source=${localEnv:HOME}/.gitconfig,target=/home/vscode/.gitconfig_tmp,type=bind,consistency=cached", + "source=${localEnv:HOME}/.gitconfig,target=/root/.gitconfig_tmp,type=bind,consistency=cached" + ] } diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b84e5f93..4b0ac9be 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -46,6 +46,7 @@ You need Python 3.9+ and the following tools: A [dev container](./docs/development_env.md) has been setup to provide a clean environment with the required dependencies. ## How to install and run MDIO + Install the package with development requirements: ```console diff --git a/docs/development_env.md b/docs/development_env.md index 1e4fdd92..3656565e 100644 --- a/docs/development_env.md +++ b/docs/development_env.md @@ -1,10 +1,10 @@ # Development Environment -To facilitate development on different systems a [dev container](https://containers.dev/) has been added. This should seamlessly enable development for users of [VSCode](https://code.visualstudio.com/docs/devcontainers/containers) on systems with docker installed. +To facilitate development on different systems a [dev container](https://containers.dev/) has been added. This should seamlessly enable development for users of [VSCode](https://code.visualstudio.com/docs/devcontainers/containers) on systems with docker installed. For contributing guidelines please look here [link](../CONTRIBUTING.md) ### known issues: -* Some effort was take to run without using root inside the container. However nox always seemed to have permissions issues which I've been unable to fix. -* `git config --global --add safe.directory \`pwd\` ` Might be needed inside the container. +- Some effort was take to run without using root inside the container. However nox always seemed to have permissions issues which I've been unable to fix. +- `git config --global --add safe.directory \`pwd\` ` Might be needed inside the container. From eca9b68f584a5727c7d6f4a855ebf3b4bc5164ef Mon Sep 17 00:00:00 2001 From: Mark Roberts Date: Fri, 8 Sep 2023 20:36:47 +0000 Subject: [PATCH 12/26] Fix ssh issue with pushing from container. --- .devcontainer/post-install.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.devcontainer/post-install.sh b/.devcontainer/post-install.sh index c61fb324..1f727909 100644 --- a/.devcontainer/post-install.sh +++ b/.devcontainer/post-install.sh @@ -2,5 +2,7 @@ cp -f /root/.gitconfig_tmp /root/.gitconfig cp -f /home/vscode/.gitconfig_tmp /home/vscode/.gitconfig chmod 777 /home/vscode/.gitconfig git config --global --add safe.directory `pwd` +# Enable ssh with github for git push +ssh-keygen -f "/root/.ssh/known_hosts" -R "github.com" # poetry install --with dev --no-ansi # poetry shell From f79028e88fd3cc81245608b1c91545c83797c3ef Mon Sep 17 00:00:00 2001 From: Mark Roberts Date: Thu, 21 Sep 2023 15:38:16 +0000 Subject: [PATCH 13/26] Updates to the devenvonment for jupyter integration. --- .devcontainer/Dockerfile | 14 ++++++ .devcontainer/devcontainer.json | 77 ++++++++++++++++++++------------- .devcontainer/post-install.sh | 4 ++ .gitignore | 8 ++++ 4 files changed, 73 insertions(+), 30 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index dd7db66b..a613cf47 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -27,3 +27,17 @@ RUN if [ "${NOX_VERSION}" != "none" ]; then bash -c "umask 0002 && pip3 install ARG NOX_POETRY_VERSION="1.0.3" # RUN if [ "${NOX_POETRY_VERSION}" != "none" ]; then su vscode -c "umask 0002 && pip3 install nox-poetry nox-poetry==${NOX_POETRY_VERSION}"; fi RUN if [ "${NOX_POETRY_VERSION}" != "none" ]; then bash -c "umask 0002 && pip3 install nox-poetry nox-poetry==${NOX_POETRY_VERSION}"; fi + +# Extra python packages for dev +RUN bash -c "umask 0002 && pip3 install msgpack ipykernel" + +# Create venv +RUN python -m venv /opt/venv +RUN poetry config virtualenvs.create false +ENV PATH="/opt/venv/bin:$PATH" + +# Extra python packages for dev +RUN bash -c "umask 0002 && pip3 install msgpack ipykernel" + +# Allow users to update venv +RUN chmod -R 777 /opt/venv diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 6901d020..0324bdf3 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,33 +1,50 @@ // For format details, see https://aka.ms/devcontainer.json. For config options, see the // README at: https://github.com/devcontainers/templates/tree/main/src/python { - "build": { - "dockerfile": "Dockerfile", - "context": "..", - "args": { - "USERNAME": "${localEnv:USER}", - "USER_ID": "${localEnv:UID}", - "USER_GID": "${localEnv:GID}" - } - }, - // Use 'postCreateCommand' to run commands after the container is created. - "postCreateCommand": { - "post_create_script": "bash ./.devcontainer/post-install.sh" - }, - // Configure tool-specific properties. - "customizations": { - // Configure properties specific to VS Code. - "vscode": { - "settings": {}, - "extensions": ["streetsidesoftware.code-spell-checker"] - } - }, - // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. - "remoteUser": "root", - // "updateRemoteUserUID": true, - "mounts": [ - // Re-use local Git configuration - "source=${localEnv:HOME}/.gitconfig,target=/home/vscode/.gitconfig_tmp,type=bind,consistency=cached", - "source=${localEnv:HOME}/.gitconfig,target=/root/.gitconfig_tmp,type=bind,consistency=cached" - ] -} + "build": { + "dockerfile": "Dockerfile", + "context": "..", + "args": { + "USERNAME": "${localEnv:USER}", + "USER_ID": "${localEnv:UID}", + "USER_GID": "${localEnv:GID}" + } + }, + // Use 'postCreateCommand' to run commands after the container is created. + "postCreateCommand": { + "post_create_script": "bash ./.devcontainer/post-install.sh" + }, + // Forward 8787 to enable us to view dask dashboard + "forwardPorts": [ + 8787 + ], + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + "settings": { + "python.terminal.activateEnvInCurrentTerminal": true, + "python.defaultInterpreterPath": "/opt/venv/bin/python", + }, + "extensions": [ + "ms-python.python", + "ms-python.vscode-pylance", + "ms-toolsai.jupyter", + "ms-toolsai.jupyter-keymap", + "ms-toolsai.jupyter-renderers", + "vscode-icons-team.vscode-icons", + "wayou.vscode-todo-highlight", + "streetsidesoftware.code-spell-checker" + ] + } + }, + // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. + // "remoteUser": "root", + "updateRemoteUserUID": true, + "mounts": [ + // Re-use local Git configuration + "source=${localEnv:HOME}/.gitconfig,target=/home/vscode/.gitconfig_tmp,type=bind,consistency=cached", + "source=${localEnv:HOME}/.gitconfig,target=/root/.gitconfig_tmp,type=bind,consistency=cached", + "source=${localEnv:SCRATCH_DIR}/${localEnv:USER},target=/scratch/,type=bind,consistency=cached" + ] +} \ No newline at end of file diff --git a/.devcontainer/post-install.sh b/.devcontainer/post-install.sh index 1f727909..1c622863 100644 --- a/.devcontainer/post-install.sh +++ b/.devcontainer/post-install.sh @@ -4,5 +4,9 @@ chmod 777 /home/vscode/.gitconfig git config --global --add safe.directory `pwd` # Enable ssh with github for git push ssh-keygen -f "/root/.ssh/known_hosts" -R "github.com" +# Setup venv +source /opt/venv/bin/activate +# Poetry cmds +poetry config virtualenvs.create false # poetry install --with dev --no-ansi # poetry shell diff --git a/.gitignore b/.gitignore index dd4d32ae..f67d2adb 100644 --- a/.gitignore +++ b/.gitignore @@ -70,6 +70,7 @@ instance/ # Sphinx documentation docs/_build/ +docs/jupyter_execute/ # PyBuilder .pybuilder/ @@ -112,6 +113,7 @@ venv/ ENV/ env.bak/ venv.bak/ +venv*/ # Spyder project settings .spyderproject @@ -143,3 +145,9 @@ cython_debug/ # IDE settings .vscode/ .idea/ + +# tests +mdio1/* +*/mdio1/* +pytest-of-* +tmp/ From d028ee5e426811ae646d5b1936f7d2dfe61461d0 Mon Sep 17 00:00:00 2001 From: Mark Roberts Date: Mon, 25 Sep 2023 20:43:09 +0000 Subject: [PATCH 14/26] Add notebook with segy ingestion and export. --- .devcontainer/post-install.sh | 2 +- notebooks/ingestion_and_export.ipynb | 1020 ++++++++++++++++++++++++++ 2 files changed, 1021 insertions(+), 1 deletion(-) create mode 100755 notebooks/ingestion_and_export.ipynb diff --git a/.devcontainer/post-install.sh b/.devcontainer/post-install.sh index 1c622863..3d00766d 100644 --- a/.devcontainer/post-install.sh +++ b/.devcontainer/post-install.sh @@ -8,5 +8,5 @@ ssh-keygen -f "/root/.ssh/known_hosts" -R "github.com" source /opt/venv/bin/activate # Poetry cmds poetry config virtualenvs.create false -# poetry install --with dev --no-ansi +# poetry install --with dev --no-ansi --all-extras # poetry shell diff --git a/notebooks/ingestion_and_export.ipynb b/notebooks/ingestion_and_export.ipynb new file mode 100755 index 00000000..b6b4a4d4 --- /dev/null +++ b/notebooks/ingestion_and_export.ipynb @@ -0,0 +1,1020 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "15742287", + "metadata": {}, + "source": [ + "# Debugging mdio\n", + "\n", + "In this notebook we will configure an environment that is useful for profiling and debugging mdio segy ingestion and export:\n", + "\n", + "- environment\n", + "- SEGY generation\n", + "- segy to mdio\n", + "- mdio to segy\n", + "\n", + "Memory issues:\n", + "https://distributed.dask.org/en/stable/worker-memory.html#memory-not-released-back-to-the-os\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "4bc09589", + "metadata": {}, + "source": [ + "## Environment\n", + "\n", + "First configure environment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a3dd0987", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "# Make sure ipykernel is installed\n", + "!{sys.executable} -m pip install ipykernel\n", + "# Install QC tools\n", + "!{sys.executable} -m pip install matplotlib pandas dask_memusage memray\n", + "# Make sure mdio is installed\n", + "!poetry install --extras \"distributed\"\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "928405ed", + "metadata": {}, + "source": [ + "After the previous cell is run the kernel needs to be restarted so the module gets picked up. Failure to do so will result in the following cell to fail with the error: `ModuleNotFoundError: No module named 'mdio'`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a964af42", + "metadata": {}, + "outputs": [], + "source": [ + "from mdio import mdio_to_segy, MDIOReader\n", + "#import dask.array as dask\n", + "import dask\n", + "from tqdm import tqdm\n", + "import warnings\n", + "warnings.filterwarnings(\"ignore\")\n", + "from dask.diagnostics import ProgressBar\n", + "import time\n", + "import os\n", + "from dask.distributed import LocalCluster, Client" + ] + }, + { + "cell_type": "markdown", + "id": "b6bc1c7e", + "metadata": {}, + "source": [ + "### Setup dask cluster\n", + "\n", + "\n", + "For dask applications the flow can use dask_memusage which is a much simpler profiler based on polling. memray seems to be a recent and significant improvement." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "64f384d6", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "import dask_memusage\n", + "import pandas as pd\n", + "\n", + "\n", + "tmp_path = \"/scratch/tmp2/\"\n", + "MY_TEMP = tmp_path\n", + "\n", + "dask.config.set({\"temporary_directory\": os.path.join(MY_TEMP, \"temp\")})\n", + "\n", + "dask.config.set({\"distributed.comm.timeouts.tcp\": \"90s\"})\n", + "dask.config.set({\"distributed.comm.timeouts.connect\": \"60s\"})\n", + "\n", + "num_cut_dask_workers = 2 \n", + "memory_cut_dask_worker = 60 \n", + "\n", + "gb = 1024**3\n", + "\n", + "use_dask = True\n", + "single_process = False\n", + "\n", + "if use_dask:\n", + " print(\n", + " f\"New local cluster. n_workers {num_cut_dask_workers} mem_limit = {memory_cut_dask_worker} Gb\"\n", + " )\n", + " with dask.config.set({\"distributed.scheduler.worker-saturation\": 1.0}):\n", + " if single_process:\n", + " client = Client(processes=False) \n", + " else:\n", + " cluster = LocalCluster(\n", + " n_workers=num_cut_dask_workers,\n", + " threads_per_worker=1,\n", + " memory_limit=memory_cut_dask_worker * gb,\n", + " )\n", + "\n", + " client = Client(cluster)\n", + "else:\n", + " client = None" + ] + }, + { + "cell_type": "markdown", + "id": "a6abf00f", + "metadata": {}, + "source": [ + "### Setup monitoring dashboard for dask\n", + "\n", + "The dask dashboard should automatically be setup on http://127.0.0.1:8787/status. The [configuration for the dev container](../.devcontainer/devcontainer.json) should have the port forwarding setup for this port enabling this to be viewed. The following cell will also give a summary of the client.\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9b959f19", + "metadata": {}, + "outputs": [], + "source": [ + "client\n" + ] + }, + { + "cell_type": "markdown", + "id": "f5194907", + "metadata": {}, + "source": [ + "#### Check python and mdio versions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7e89ed8d", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "print(f\"Python version: {sys.version}\")\n", + "print(f\"Python path: {sys.executable}\")\n", + "import mdio\n", + "print(f\"mdio version: {mdio.__version__}\")" + ] + }, + { + "cell_type": "markdown", + "id": "a8a4ee0e", + "metadata": {}, + "source": [ + "## SEGY generation" + ] + }, + { + "cell_type": "markdown", + "id": "0cfa333a", + "metadata": {}, + "source": [ + "#### Functions to generate segy files based on tests" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6826ddcc", + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"Test configuration before everything runs.\"\"\"\n", + "\n", + "\n", + "from __future__ import annotations\n", + "\n", + "import os\n", + "\n", + "import numpy as np\n", + "import pytest\n", + "import segyio\n", + "\n", + "from mdio.segy.geometry import StreamerShotGeometryType\n", + "def create_segy_mock_6d(\n", + " fake_segy_tmp: str,\n", + " num_samples: int,\n", + " shots: list,\n", + " cables: list,\n", + " receivers_per_cable: list,\n", + " shot_lines: list = [ # noqa: B006\n", + " 1,\n", + " ],\n", + " comp_types: list = [ # noqa: B006\n", + " 1,\n", + " ],\n", + " chan_header_type: StreamerShotGeometryType = StreamerShotGeometryType.A,\n", + " index_receivers: bool = True,\n", + ") -> str:\n", + " \"\"\"Dummy 6D SEG-Y file for use in tests.\n", + "\n", + " Data will be created with:\n", + "\n", + " offset is byte location 37 - offset 4 bytes\n", + " fldr is byte location 9 - shot 4 byte\n", + " ep is byte location 17 - shot 4 byte\n", + " stae is byte location 137 - cable 2 byte\n", + " tracf is byte location 13 - channel 4 byte\n", + " styp is byte location 133 - shot_line 2 byte\n", + " afilf is byte location 141 - comptype 2 byte\n", + "\n", + " \"\"\"\n", + " spec = segyio.spec()\n", + " segy_file = fake_segy_tmp\n", + "\n", + " shot_count = len(shots)\n", + " total_chan = np.sum(receivers_per_cable)\n", + " trace_count_per_line = shot_count * total_chan\n", + " sline_count = len(shot_lines)\n", + " comp_trace_count = trace_count_per_line * sline_count\n", + " comp_count = len(comp_types)\n", + " trace_count = comp_trace_count * comp_count\n", + "\n", + " spec.format = 1\n", + " spec.samples = range(num_samples)\n", + " spec.tracecount = trace_count\n", + " spec.endian = \"big\"\n", + "\n", + " # Calculate shot, cable, channel/receiver numbers and header values\n", + " cable_headers = []\n", + " channel_headers = []\n", + "\n", + " # TODO: Add strict=True and remove noqa when minimum Python is 3.10\n", + " for cable, num_rec in zip(cables, receivers_per_cable): # noqa: B905\n", + " cable_headers.append(np.repeat(cable, num_rec))\n", + "\n", + " channel_headers.append(np.arange(num_rec) + 1)\n", + "\n", + " cable_headers = np.hstack(cable_headers)\n", + " channel_headers = np.hstack(channel_headers)\n", + "\n", + " if chan_header_type == StreamerShotGeometryType.B:\n", + " channel_headers = np.arange(total_chan) + 1\n", + "\n", + " index_receivers = True\n", + " if chan_header_type == StreamerShotGeometryType.C:\n", + " index_receivers = False\n", + "\n", + " shot_headers = np.hstack([np.repeat(shot, total_chan) for shot in shots])\n", + " cable_headers = np.tile(cable_headers, shot_count)\n", + " channel_headers = np.tile(channel_headers, shot_count)\n", + "\n", + " # Add shot lines\n", + " shot_line_headers = np.hstack(\n", + " [np.repeat(shot_line, trace_count_per_line) for shot_line in shot_lines]\n", + " )\n", + "\n", + " shot_headers = np.tile(shot_headers, sline_count)\n", + " cable_headers = np.tile(cable_headers, sline_count)\n", + " channel_headers = np.tile(channel_headers, sline_count)\n", + "\n", + " # Add multiple components\n", + " comptype_headers = np.hstack(\n", + " [np.repeat(comp, comp_trace_count) for comp in comp_types]\n", + " )\n", + "\n", + " shot_line_headers = np.tile(shot_line_headers, comp_count)\n", + " shot_headers = np.tile(shot_headers, comp_count)\n", + " cable_headers = np.tile(cable_headers, comp_count)\n", + " channel_headers = np.tile(channel_headers, comp_count)\n", + "\n", + " with segyio.create(segy_file, spec) as f:\n", + " for trc_idx in range(trace_count):\n", + " shot = shot_headers[trc_idx]\n", + " cable = cable_headers[trc_idx]\n", + " channel = channel_headers[trc_idx]\n", + " shot_line = shot_line_headers[trc_idx]\n", + " comptype = comptype_headers[trc_idx]\n", + "\n", + " # offset is byte location 37 - offset 4 bytes\n", + " # fldr is byte location 9 - shot 4 byte\n", + " # ep is byte location 17 - shot 4 byte\n", + " # stae is byte location 137 - cable 2 byte\n", + " # tracf is byte location 13 - channel 4 byte\n", + " # styp is byte location 133 - shot_line 2 byte\n", + " # afilf is byte location 141 - comptype 2 byte\n", + "\n", + " if index_receivers:\n", + " f.header[trc_idx].update(\n", + " offset=0,\n", + " fldr=shot,\n", + " ep=shot,\n", + " stae=cable,\n", + " tracf=channel,\n", + " styp=shot_line,\n", + " afilf=comptype,\n", + " )\n", + " else:\n", + " f.header[trc_idx].update(\n", + " offset=0,\n", + " fldr=shot,\n", + " ep=shot,\n", + " stae=cable,\n", + " styp=shot_line,\n", + " afilf=comptype,\n", + " )\n", + "\n", + " samples = np.linspace(start=shot, stop=shot + 1, num=num_samples)\n", + " f.trace[trc_idx] = samples.astype(\"float32\")\n", + "\n", + " f.bin.update()\n", + "\n", + " return segy_file\n", + "\n", + "def segy_mock_6d_shots(segy_path: str) -> dict[str, str]:\n", + " \"\"\"Generate mock 6D shot SEG-Y files.\"\"\"\n", + " num_samples = 25\n", + " shots = [2, 3, 5]\n", + " cables = [0, 101, 201, 301]\n", + " receivers_per_cable = [1, 5, 7, 5]\n", + " shot_lines = [1, 2, 4, 5, 99]\n", + " comp_types = [1, 2, 3, 4]\n", + "\n", + " \n", + " chan_header_type = StreamerShotGeometryType.A,\n", + " \n", + " segy_path = create_segy_mock_6d(\n", + " segy_path,\n", + " num_samples=num_samples,\n", + " shots=shots,\n", + " cables=cables,\n", + " receivers_per_cable=receivers_per_cable,\n", + " chan_header_type=chan_header_type,\n", + " shot_lines=shot_lines,\n", + " comp_types=comp_types,\n", + " )\n", + " return segy_path\n", + "\n", + "def segy_mock_4d_shots(segy_path: str) -> dict[str, str]:\n", + " \"\"\"Generate mock 4D shot SEG-Y files.\"\"\"\n", + " num_samples = 25\n", + " shots = [2, 3, 5]\n", + " cables = [0, 101, 201, 301]\n", + " receivers_per_cable = [1, 5, 7, 5]\n", + " shot_lines = [1,]\n", + " comp_types = [1,]\n", + "\n", + " \n", + " chan_header_type = StreamerShotGeometryType.A,\n", + " \n", + " segy_path = create_segy_mock_6d(\n", + " segy_path,\n", + " num_samples=num_samples,\n", + " shots=shots,\n", + " cables=cables,\n", + " receivers_per_cable=receivers_per_cable,\n", + " chan_header_type=chan_header_type,\n", + " shot_lines=shot_lines,\n", + " comp_types=comp_types,\n", + " )\n", + " return segy_path\n", + "\n", + "def segy_mock_4d_shots_large(segy_path: str, num_shots:int=100) -> dict[str, str]:\n", + " \"\"\"Generate mock 4D shot SEG-Y files at a reasonable scale.\"\"\"\n", + " num_samples = 4000\n", + " num_cables = 12\n", + " num_receivers_per_cable = 250 \n", + " shots = range(num_shots)\n", + " cables = range(num_cables)\n", + " receivers_per_cable = [num_receivers_per_cable,]*num_cables\n", + " shot_lines = [1,]\n", + " comp_types = [1,]\n", + "\n", + " \n", + " chan_header_type = StreamerShotGeometryType.A,\n", + " \n", + " segy_path = create_segy_mock_6d(\n", + " segy_path,\n", + " num_samples=num_samples,\n", + " shots=shots,\n", + " cables=cables,\n", + " receivers_per_cable=receivers_per_cable,\n", + " chan_header_type=chan_header_type,\n", + " shot_lines=shot_lines,\n", + " comp_types=comp_types,\n", + " )\n", + " return segy_path" + ] + }, + { + "cell_type": "markdown", + "id": "75b02400", + "metadata": {}, + "source": [ + "#### segy config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "55229136", + "metadata": {}, + "outputs": [], + "source": [ + "dims = 4\n", + "large_segy = True\n", + "num_shots = 1000\n", + "\n", + "if dims == 6:\n", + " index_header_names = (\"comptype\", \"shot_line\",\"shot_point\", \"cable\", \"channel\")\n", + " index_types = (\"int16\", \"int16\", \"int32\", \"int16\", \"int32\")\n", + " index_bytes= (141, 133, 17, 137, 13)\n", + " chunksize = (1, 2, 4, 2, 128, 1024)\n", + " grid_overrides = {\"AutoChannelWrap\": True}\n", + " num_shots = 3\n", + " segy_path = os.path.join(tmp_path, f\"segy_{dims}d_{num_shots}.sgy\")\n", + " print(segy_path)\n", + " access_pattern=\"012345\"\n", + "elif dims == 4:\n", + " index_header_names = (\"shot_point\", \"cable\", \"channel\")\n", + " index_types = (\"int32\", \"int16\", \"int32\")\n", + " index_bytes= ( 17, 137, 13)\n", + " chunksize = (4, 2, 128, 1024)\n", + " grid_overrides = {\"AutoChannelWrap\": True}\n", + " if large_segy:\n", + " segy_path = os.path.join(tmp_path, f\"segy_{dims}d_{num_shots}.sgy\")\n", + " \n", + " else:\n", + " num_shots = 3\n", + " segy_path = os.path.join(tmp_path, f\"segy_{dims}d_{num_shots}.sgy\")\n", + " access_pattern=\"0123\"\n", + " \n", + "print(segy_path)\n" + ] + }, + { + "cell_type": "markdown", + "id": "5baf43f7", + "metadata": {}, + "source": [ + "#### Create SEGY" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "07a40484", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "if dims == 6:\n", + " segy_path = segy_mock_6d_shots(segy_path)\n", + "elif dims == 4:\n", + " if large_segy:\n", + " segy_path = segy_mock_4d_shots_large(segy_path, num_shots=num_shots)\n", + " else:\n", + " segy_path = segy_mock_4d_shots(segy_path)\n", + " \n", + "print(segy_path)" + ] + }, + { + "cell_type": "markdown", + "id": "45aa8c7f", + "metadata": {}, + "source": [ + "## Ingest segy to mdio\n", + "\n", + "#### Config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fe6f5e95", + "metadata": {}, + "outputs": [], + "source": [ + "mdio_path = os.path.join(tmp_path, f\"segy_{dims}d_import_{num_shots}.mdio\")\n", + "kwargs = {\n", + " 'segy_path': segy_path,\n", + " 'mdio_path_or_buffer': mdio_path,\n", + " 'index_names': index_header_names,\n", + " 'index_bytes': index_bytes,\n", + " 'index_types': index_types,\n", + " 'chunksize': chunksize, # (1, chunksize_2d, -1),\n", + " 'overwrite': True\n", + "}\n", + "if grid_overrides is not None:\n", + " kwargs['grid_overrides'] = grid_overrides\n", + "kwargs" + ] + }, + { + "cell_type": "markdown", + "id": "6ab5982f", + "metadata": {}, + "source": [ + "#### Actual segy to mdio conversion based on config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a622fadc", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "mdio.segy_to_mdio(**kwargs)" + ] + }, + { + "cell_type": "markdown", + "id": "e43e4fdf", + "metadata": {}, + "source": [ + "#### QC of generated mdio file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "caa55b34", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "def info(\n", + " input_mdio_file,\n", + " output_format=\"plain\",\n", + " access_pattern=\"012\",\n", + "):\n", + " \"\"\"Provide information on MDIO dataset.\n", + " By default this returns human readable information about the grid and stats for\n", + " the dataset. If output-format is set to json then a json is returned to\n", + " facilitate parsing.\n", + " \"\"\"\n", + " reader = mdio.MDIOReader(\n", + " input_mdio_file, access_pattern=access_pattern, return_metadata=True\n", + " )\n", + " mdio_dict = {}\n", + " mdio_dict[\"grid\"] = {}\n", + " for axis in reader.grid.dim_names:\n", + " dim = reader.grid.select_dim(axis)\n", + " min = dim.coords[0]\n", + " max = dim.coords[-1]\n", + " size = dim.coords.shape[0]\n", + " axis_dict = {\"name\": axis, \"min\": min, \"max\": max, \"size\": size}\n", + " mdio_dict[\"grid\"][axis] = axis_dict\n", + "\n", + " if output_format == \"plain\":\n", + " print(\"{:<10} {:<10} {:<10} {:<10}\".format(\"NAME\", \"MIN\", \"MAX\", \"SIZE\"))\n", + " print(\"=\" * 40)\n", + "\n", + " for _, axis_dict in mdio_dict[\"grid\"].items():\n", + " print(\n", + " \"{:<10} {:<10} {:<10} {:<10}\".format(\n", + " axis_dict[\"name\"],\n", + " axis_dict[\"min\"],\n", + " axis_dict[\"max\"],\n", + " axis_dict[\"size\"],\n", + " )\n", + " )\n", + "\n", + " print(\"\\n\\n{:<10} {:<10}\".format(\"STAT\", \"VALUE\"))\n", + " print(\"=\" * 20)\n", + " for name, stat in reader.stats.items():\n", + " print(f\"{name:<10} {stat:<10}\")\n", + " if output_format == \"json\":\n", + " mdio_dict[\"stats\"] = reader.stats\n", + " print(mdio_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1fc7838d", + "metadata": {}, + "outputs": [], + "source": [ + "info(\n", + " mdio_path,\n", + " output_format=\"plain\",\n", + " access_pattern=access_pattern,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8feef694", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "reader = mdio.MDIOReader(\n", + " mdio_path, access_pattern=access_pattern, return_metadata=True\n", + ")\n", + "comp_dim = reader.grid.select_dim(index_header_names[0])\n", + "\n", + "print(f\"comp_dim: {comp_dim} for {reader}\")" + ] + }, + { + "cell_type": "markdown", + "id": "b6fffdea", + "metadata": {}, + "source": [ + "## SEGY export (cut)\n", + "\n", + "#### First declare functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "40a44354", + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext memray\n", + "\n", + "import csv\n", + "import psutil\n", + "import matplotlib.pyplot as plt\n", + "\n", + "from distributed.diagnostics import MemorySampler\n", + "from distributed.diagnostics.memray import memray_workers\n", + "\n", + "\n", + "def processing_time(end, start):\n", + " return (end - start) / 60\n", + "\n", + "\n", + "def file_size(file):\n", + " import os\n", + "\n", + " filesize = os.path.getsize(file)\n", + " return filesize\n", + "\n", + "\n", + "def make_folders(folder_path):\n", + " import os\n", + "\n", + " msg = \"Folder already exists\"\n", + " if not os.path.exists(folder_path):\n", + " os.makedirs(folder_path)\n", + " msg = \"Folders created\"\n", + " return msg\n", + "\n", + "def create_segy(mdio_source, temp_local_destination, client, selection_mask=None, access_pattern=\"0123\"):\n", + " start = time.perf_counter()\n", + " access_pattern = \"0123\"\n", + " print(\"Started_conv\")\n", + "\n", + " _ = psutil.cpu_percent(interval=None, percpu=True)\n", + " mdio_to_segy(\n", + " mdio_source,\n", + " temp_local_destination,\n", + " selection_mask=selection_mask,\n", + " access_pattern=access_pattern,\n", + " client=client,\n", + " )\n", + " mdio_to_segy_time = time.perf_counter()\n", + " cpu_mdio_to_segy = psutil.cpu_percent(interval=None, percpu=True)\n", + " max_cpu_mdio_to_sgy = max(cpu_mdio_to_segy)\n", + " min_cpu_usage = min(cpu_mdio_to_segy)\n", + " cpu_usage_avg = np.mean(np.array(cpu_mdio_to_segy))\n", + " print(\"cpu_usage_mdio_to_segy_max\", max_cpu_mdio_to_sgy)\n", + " mem_usage_mdio_to_sgy = int(\n", + " psutil.virtual_memory().total - psutil.virtual_memory().available\n", + " )\n", + " return (\n", + " max_cpu_mdio_to_sgy,\n", + " min_cpu_usage,\n", + " cpu_usage_avg,\n", + " mem_usage_mdio_to_sgy,\n", + " processing_time(mdio_to_segy_time, start),\n", + " )\n", + "\n", + "\n", + "def get_max_mem_from_csv(filename: str):\n", + " \"\"\"Find maximum memory usage from a dask_memusage memory sampler profiler.\"\"\"\n", + " print(f\"mem_file = {filename}\")\n", + " try:\n", + " mem_df = pd.read_csv(filename)\n", + " max_dask_task_memory = int(mem_df[\"max_memory_mb\"].max() * (1024**2))\n", + " except:\n", + " max_mem_array = []\n", + " task_name_array = []\n", + " with open(filename) as fp:\n", + " Lines = fp.readlines()\n", + " for line in Lines:\n", + " csv = line.split(',')\n", + " if len(csv) > 4:\n", + " max_mem = csv[-1]\n", + " task_name = csv[0]\n", + " try:\n", + " my_mm = float(max_mem)\n", + " max_mem_array.append(my_mm)\n", + " task_name_array.append(task_name)\n", + " except:\n", + " continue\n", + " max_index = max_mem_array.index(max(max_mem_array))\n", + " print(f\"max_index={max_index} max_mem={max_mem_array[max_index]}MB max_task_name = {task_name_array[max_index]}\")\n", + "\n", + " max_dask_task_memory = int(max_mem_array[max_index] * (1024**2))\n", + " return max_dask_task_memory\n", + " \n", + "def get_large_mem_fns_from_csv(filename: str, thresh=50.):\n", + " \"\"\"Find functions with a large from a dask_memusage memory sampler profiler.\"\"\"\n", + " print(f\"mem_file = {filename}\")\n", + " task_name_array = []\n", + " with open(filename) as fp:\n", + " Lines = fp.readlines()\n", + " for line in Lines:\n", + " csv = line.split(',')\n", + " if len(csv) > 2:\n", + " max_mem = csv[-1]\n", + " task_name = csv[0]\n", + " try:\n", + " my_mm = float(max_mem)\n", + " if my_mm > thresh:\n", + " task_name_array.append(task_name)\n", + " except:\n", + " continue\n", + " return list(set(task_name_array))\n", + "\n", + " \n", + " \n", + "def plot_function_mem_from_csv(filename: str, fn_name: str):\n", + " \"\"\"Plot memory usage for a single function\n", + " \n", + " Inputs\n", + " ------\n", + " filename: str csv from dask_memusage memory sampler profiler\n", + " fn_name: str name of function to track and plot\"\"\"\n", + " print(f\"mem_file = {filename}\")\n", + " mem_array_1 = []\n", + " mem_array_2 = []\n", + " with open(filename) as fp:\n", + " Lines = fp.readlines()\n", + " for line in Lines:\n", + " csv = line.split(',')\n", + " if len(csv) > 4:\n", + " max_mem = csv[-1]\n", + " max_mem_2 = csv[-2]\n", + " task_name = csv[0]\n", + " try:\n", + " if fn_name in task_name:\n", + " my_mm = float(max_mem)\n", + " mem_array_1.append(my_mm)\n", + " my_mm = float(max_mem_2)\n", + " mem_array_2.append(my_mm)\n", + " except:\n", + " continue\n", + " if len(mem_array_1) > 1:\n", + " plt.figure()\n", + " plt.plot(mem_array_1, label=\"Total\")\n", + " plt.plot(mem_array_2, label=\"Proc\")\n", + " plt.title(f\"{fn_name}\")\n", + " plt.xlabel(\"Occurrence\")\n", + " plt.ylabel(\"Memory\")\n", + " plt.show\n", + " plt.savefig(f'{filename}_{fn_name}.png')\n", + " elif len(mem_array_1) == 1:\n", + " print(f\"{fn_name} used {mem_array_1[0]}mb memory\")\n", + " else:\n", + " print(f\"Had issue reading {fn_name} memory usage\")\n", + " return mem_array_1\n", + "\n", + "def cut(input_mdio: str, run=0, access_pattern=\"0123\", client=None, test_name=\"6372\"):\n", + " \"\"\"Cuts segy from mdio with memory QC\"\"\"\n", + " with open(\n", + " os.path.join(MY_TEMP, test_name+\"_metrics_export.csv\"), \"a+\", newline=\"\"\n", + " ) as write_obj:\n", + " csv_writer = csv.writer(write_obj)\n", + " csv_writer.writerow(\n", + " [\n", + " \"chunk_case\",\n", + " \"file_size\",\n", + " \"reader_shape\",\n", + " \"time\",\n", + " \"cpu_usage_max\",\n", + " \"cpu_usage_min\",\n", + " \"cpu_usage_avg\",\n", + " \"mem_usage\",\n", + " \"run\",\n", + " ]\n", + " )\n", + "\n", + "\n", + " print(\"Converting Multidimio to Segy via Local Dask\")\n", + "\n", + " TEMP_DESTINATION = os.path.join(MY_TEMP, test_name+\".sgy\")\n", + "\n", + " print(\"TEMP_DESTINATION is:\", TEMP_DESTINATION)\n", + " # Set to true to use dask_memusage to track memory usage\n", + " track_memusage = False\n", + " # Set to true to use memray to track memory usage\n", + " use_memray = False\n", + " # Flag to create dask cluster\n", + " use_dask = False\n", + " if client is not None:\n", + " use_dask = True\n", + "\n", + " if use_dask:\n", + " print(\n", + " f\"New local cluster. n_workers {num_cut_dask_workers} mem_limit = {memory_cut_dask_worker} Gb\"\n", + " )\n", + " with dask.config.set({\"distributed.scheduler.worker-saturation\": 1.0}):\n", + " cluster = LocalCluster(\n", + " n_workers=num_cut_dask_workers,\n", + " threads_per_worker=1,\n", + " memory_limit=memory_cut_dask_worker * gb,\n", + " )\n", + "\n", + " client = Client(cluster)\n", + " if track_memusage:\n", + " mem_file = os.path.join(os.getcwd(), f\"{test_name}_ram_usage.csv\")\n", + " dask_memusage.install(client.cluster.scheduler, mem_file)\n", + " else:\n", + " track_memusage = False\n", + " client = None\n", + "\n", + " if track_memusage:\n", + " \n", + " if use_memray:\n", + " with memray_workers(f\"memray_{test_name}\", report_args=('flamegraph', '--temporal')):\n", + " ms = MemorySampler()\n", + "\n", + " with ms.sample(test_name):\n", + " (\n", + " cpu_usage_max,\n", + " cpu_usage_min,\n", + " cpu_usage_avg,\n", + " mem_usage,\n", + " time_taken,\n", + " ) = create_segy(input_mdio, TEMP_DESTINATION, client)\n", + " else:\n", + " with ms.sample(test_name):\n", + " (\n", + " cpu_usage_max,\n", + " cpu_usage_min,\n", + " cpu_usage_avg,\n", + " mem_usage,\n", + " time_taken,\n", + " ) = create_segy(input_mdio, TEMP_DESTINATION, client)\n", + " else:\n", + " (\n", + " cpu_usage_max,\n", + " cpu_usage_min,\n", + " cpu_usage_avg,\n", + " mem_usage,\n", + " time_taken,\n", + " ) = create_segy(input_mdio, TEMP_DESTINATION, client)\n", + "\n", + " if client is not None:\n", + " # mem_file = os.path.join(os.getcwd(), test_name + \"_ram_usage.csv\")\n", + " mem_file = os.path.join(MY_TEMP, test_name + \"_ram_usage.csv\")\n", + " \n", + " dask_memusage.install(client.cluster.scheduler, mem_file)\n", + " ms = MemorySampler()\n", + "\n", + " with ms.sample(test_name):\n", + " (\n", + " cpu_usage_max,\n", + " cpu_usage_min,\n", + " cpu_usage_avg,\n", + " mem_usage,\n", + " time_taken,\n", + " ) = create_segy(input_mdio, TEMP_DESTINATION, client, access_pattern=access_pattern)\n", + " fig = plt.figure()\n", + " memory_plot = ms.plot(align=True)\n", + " fig = memory_plot.get_figure()\n", + " fig.savefig(test_name + \"_.jpg\", bbox_inches=\"tight\")\n", + "\n", + " else:\n", + " (\n", + " cpu_usage_max,\n", + " cpu_usage_min,\n", + " cpu_usage_avg,\n", + " mem_usage,\n", + " time_taken,\n", + " ) = create_segy(input_mdio, TEMP_DESTINATION, client, access_pattern=access_pattern)\n", + "\n", + " print(\"cut completed\")\n", + "\n", + " file_size = os.path.getsize(TEMP_DESTINATION) / (1024**3)\n", + " print(f\"mdio to segy completed in {time_taken}\")\n", + "\n", + " reader = MDIOReader(\n", + " mdio_path_or_buffer=input_mdio, backend=\"dask\", access_pattern=access_pattern,\n", + " )\n", + " \n", + " if client is not None:\n", + " # mem_df = pd.read_csv(mem_file)\n", + " # max_dask_task_memory = int(mem_df[\"max_memory_mb\"].max() * (1024**2))\n", + "\n", + " max_dask_task_memory = get_max_mem_from_csv(mem_file)\n", + " # Find all functions that use a significant amount of memory\n", + " large_mem_fns = get_large_mem_fns_from_csv(mem_file)\n", + " # Make plots of function memory over time\n", + " for fn_name in large_mem_fns:\n", + " plot_function_mem_from_csv(mem_file, fn_name)\n", + " plot_function_mem_from_csv(mem_file, \"write_to_segy_stack-segy_concat\")\n", + " metrics = [\n", + " chunksize,\n", + " file_size,\n", + " reader.shape,\n", + " time_taken,\n", + " cpu_usage_max,\n", + " cpu_usage_min,\n", + " cpu_usage_avg,\n", + " mem_usage,\n", + " max_dask_task_memory,\n", + " run,\n", + " ]\n", + "\n", + " with open(\n", + " os.path.join(MY_TEMP, test_name + \"_metrics_export.csv\"), \"a+\", newline=\"\"\n", + " ) as write_obj:\n", + " csv_writer = csv.writer(write_obj)\n", + " csv_writer.writerow(metrics)\n", + "\n", + " print(f\"{metrics=}\")\n", + " os.remove(TEMP_DESTINATION)\n", + " time.sleep(30)\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "bb1d5240", + "metadata": {}, + "source": [ + "### Run cut" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5882a46a", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "file_list = [os.path.join(MY_TEMP,'segy_4d_import_100.mdio'),os.path.join(MY_TEMP,'segy_4d_import_1000.mdio') ]\n", + "test_name = [\"test_100shots\",\"test_1000shots\"]\n", + "\n", + "file_list = [os.path.join(MY_TEMP,'segy_4d_import_1000.mdio'), ]\n", + "test_name = [\"test_1000shots\",]\n", + "for mdio_file, test_name in zip(file_list, test_name):\n", + " cut(mdio_file, client=client, test_name=test_name)" + ] + }, + { + "cell_type": "markdown", + "id": "0d2c6d09", + "metadata": {}, + "source": [ + "Viewing html from memray using HTML Preview plugin fails. Open in an external web-browser such as chrome. " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From c82edf73ed80a1c52711958281cf8ca453aadfd8 Mon Sep 17 00:00:00 2001 From: Mark Roberts Date: Mon, 25 Sep 2023 21:20:48 +0000 Subject: [PATCH 15/26] Prettier updates. --- .devcontainer/devcontainer.json | 82 ++++++++++++++++----------------- 1 file changed, 40 insertions(+), 42 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 93567e60..b618a526 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,45 +1,43 @@ // For format details, see https://aka.ms/devcontainer.json. For config options, see the // README at: https://github.com/devcontainers/templates/tree/main/src/python { - "build": { - "dockerfile": "Dockerfile", - "context": ".." - }, - // Use 'postCreateCommand' to run commands after the container is created. - "postCreateCommand": { - "post_create_script": "bash ./.devcontainer/post-install.sh" - }, - // Forward 8787 to enable us to view dask dashboard - "forwardPorts": [ - 8787 - ], - // Configure tool-specific properties. - "customizations": { - // Configure properties specific to VS Code. - "vscode": { - "settings": { - "python.terminal.activateEnvInCurrentTerminal": true, - "python.defaultInterpreterPath": "/opt/venv/bin/python" - }, - "extensions": [ - "ms-python.python", - "ms-python.vscode-pylance", - "ms-toolsai.jupyter", - "ms-toolsai.jupyter-keymap", - "ms-toolsai.jupyter-renderers", - "vscode-icons-team.vscode-icons", - "wayou.vscode-todo-highlight", - "streetsidesoftware.code-spell-checker" - ] - } - }, - // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. - // "remoteUser": "root", - "updateRemoteUserUID": true, - "mounts": [ - // Re-use local Git configuration - "source=${localEnv:HOME}/.gitconfig,target=/home/vscode/.gitconfig_tmp,type=bind,consistency=cached", - "source=${localEnv:HOME}/.gitconfig,target=/root/.gitconfig_tmp,type=bind,consistency=cached", - "source=${localEnv:SCRATCH_DIR}/${localEnv:USER},target=/scratch/,type=bind,consistency=cached" - ] -} \ No newline at end of file + "build": { + "dockerfile": "Dockerfile", + "context": ".." + }, + // Use 'postCreateCommand' to run commands after the container is created. + "postCreateCommand": { + "post_create_script": "bash ./.devcontainer/post-install.sh" + }, + // Forward 8787 to enable us to view dask dashboard + "forwardPorts": [8787], + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + "settings": { + "python.terminal.activateEnvInCurrentTerminal": true, + "python.defaultInterpreterPath": "/opt/venv/bin/python" + }, + "extensions": [ + "ms-python.python", + "ms-python.vscode-pylance", + "ms-toolsai.jupyter", + "ms-toolsai.jupyter-keymap", + "ms-toolsai.jupyter-renderers", + "vscode-icons-team.vscode-icons", + "wayou.vscode-todo-highlight", + "streetsidesoftware.code-spell-checker" + ] + } + }, + // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. + // "remoteUser": "root", + "updateRemoteUserUID": true, + "mounts": [ + // Re-use local Git configuration + "source=${localEnv:HOME}/.gitconfig,target=/home/vscode/.gitconfig_tmp,type=bind,consistency=cached", + "source=${localEnv:HOME}/.gitconfig,target=/root/.gitconfig_tmp,type=bind,consistency=cached", + "source=${localEnv:SCRATCH_DIR}/${localEnv:USER},target=/scratch/,type=bind,consistency=cached" + ] +} From 2de3c5db65ee399fdd8a5bb214ae7e6b6e924e62 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 1 Nov 2023 14:17:19 +0000 Subject: [PATCH 16/26] Bump release-drafter/release-drafter from 5.24.0 to 5.25.0 Bumps [release-drafter/release-drafter](https://github.com/release-drafter/release-drafter) from 5.24.0 to 5.25.0. - [Release notes](https://github.com/release-drafter/release-drafter/releases) - [Commits](https://github.com/release-drafter/release-drafter/compare/v5.24.0...v5.25.0) --- updated-dependencies: - dependency-name: release-drafter/release-drafter dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index d807d674..4a121135 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -71,7 +71,7 @@ jobs: repository_url: https://test.pypi.org/legacy/ - name: Publish the release notes - uses: release-drafter/release-drafter@v5.24.0 + uses: release-drafter/release-drafter@v5.25.0 with: publish: ${{ steps.check-version.outputs.tag != '' }} tag: ${{ steps.check-version.outputs.tag }} From 8f1581f99d7079d0452bef0ebbca1ab7178396b7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 2 Nov 2023 13:54:49 +0000 Subject: [PATCH 17/26] Bump virtualenv from 20.24.5 to 20.24.6 in /.github/workflows Bumps [virtualenv](https://github.com/pypa/virtualenv) from 20.24.5 to 20.24.6. - [Release notes](https://github.com/pypa/virtualenv/releases) - [Changelog](https://github.com/pypa/virtualenv/blob/main/docs/changelog.rst) - [Commits](https://github.com/pypa/virtualenv/compare/20.24.5...20.24.6) --- updated-dependencies: - dependency-name: virtualenv dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/constraints.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/constraints.txt b/.github/workflows/constraints.txt index 4fc723e4..faaf781f 100644 --- a/.github/workflows/constraints.txt +++ b/.github/workflows/constraints.txt @@ -1,4 +1,4 @@ pip==23.2.1 nox==2023.4.22 nox-poetry==1.0.3 -virtualenv==20.24.5 +virtualenv==20.24.6 From 1a82626c2477c6057d9311c76a1140b078bdafbb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 2 Nov 2023 14:06:42 +0000 Subject: [PATCH 18/26] Bump pip from 23.2.1 to 23.3.1 in /.github/workflows Bumps [pip](https://github.com/pypa/pip) from 23.2.1 to 23.3.1. - [Changelog](https://github.com/pypa/pip/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/pip/compare/23.2.1...23.3.1) --- updated-dependencies: - dependency-name: pip dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/constraints.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/constraints.txt b/.github/workflows/constraints.txt index faaf781f..96f43acb 100644 --- a/.github/workflows/constraints.txt +++ b/.github/workflows/constraints.txt @@ -1,4 +1,4 @@ -pip==23.2.1 +pip==23.3.1 nox==2023.4.22 nox-poetry==1.0.3 virtualenv==20.24.6 From 40b9f4b053cc7f99d4d34091a0430eaa6596f194 Mon Sep 17 00:00:00 2001 From: Mark Roberts Date: Thu, 2 Nov 2023 15:07:15 +0000 Subject: [PATCH 19/26] Tidy up files based on PR feedback. --- .devcontainer/Dockerfile | 4 ---- .devcontainer/devcontainer.json | 0 .devcontainer/post-install.sh | 5 +++-- docs/development_env.md | 5 ++--- 4 files changed, 5 insertions(+), 9 deletions(-) mode change 100644 => 100755 .devcontainer/Dockerfile mode change 100644 => 100755 .devcontainer/devcontainer.json mode change 100644 => 100755 .devcontainer/post-install.sh mode change 100644 => 100755 docs/development_env.md diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile old mode 100644 new mode 100755 index 7eba8452..3dd31294 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -1,7 +1,6 @@ ARG PYTHON_VERSION=3.11 ARG LINUX_DISTRO=bookworm -# FROM python:${PYTHON_VERSION}-${LINUX_DISTRO} FROM mcr.microsoft.com/devcontainers/python:1-${PYTHON_VERSION}-${LINUX_DISTRO} # Install git for nox pre-commit @@ -12,17 +11,14 @@ RUN apt-get update \ # Poetry ARG POETRY_VERSION="1.6.1" -# RUN if [ "${POETRY_VERSION}" != "none" ]; then su vscode -c "umask 0002 && pip3 install poetry==${POETRY_VERSION}"; fi RUN if [ "${POETRY_VERSION}" != "none" ]; then bash -c "umask 0002 && pip3 install poetry==${POETRY_VERSION}"; fi # Nox ARG NOX_VERSION="2023.4.22" -# RUN if [ "${NOX_VERSION}" != "none" ]; then su vscode -c "umask 0002 && pip3 install nox-poetry nox==${NOX_VERSION}"; fi RUN if [ "${NOX_VERSION}" != "none" ]; then bash -c "umask 0002 && pip3 install nox-poetry nox==${NOX_VERSION}"; fi # Nox poetry ARG NOX_POETRY_VERSION="1.0.3" -# RUN if [ "${NOX_POETRY_VERSION}" != "none" ]; then su vscode -c "umask 0002 && pip3 install nox-poetry nox-poetry==${NOX_POETRY_VERSION}"; fi RUN if [ "${NOX_POETRY_VERSION}" != "none" ]; then bash -c "umask 0002 && pip3 install nox-poetry nox-poetry==${NOX_POETRY_VERSION}"; fi # Extra python packages for dev diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json old mode 100644 new mode 100755 diff --git a/.devcontainer/post-install.sh b/.devcontainer/post-install.sh old mode 100644 new mode 100755 index 3d00766d..4ce7d63a --- a/.devcontainer/post-install.sh +++ b/.devcontainer/post-install.sh @@ -2,11 +2,12 @@ cp -f /root/.gitconfig_tmp /root/.gitconfig cp -f /home/vscode/.gitconfig_tmp /home/vscode/.gitconfig chmod 777 /home/vscode/.gitconfig git config --global --add safe.directory `pwd` + # Enable ssh with github for git push ssh-keygen -f "/root/.ssh/known_hosts" -R "github.com" + # Setup venv source /opt/venv/bin/activate + # Poetry cmds poetry config virtualenvs.create false -# poetry install --with dev --no-ansi --all-extras -# poetry shell diff --git a/docs/development_env.md b/docs/development_env.md old mode 100644 new mode 100755 index 3656565e..f28aaece --- a/docs/development_env.md +++ b/docs/development_env.md @@ -4,7 +4,6 @@ To facilitate development on different systems a [dev container](https://contain For contributing guidelines please look here [link](../CONTRIBUTING.md) -### known issues: +### Known issues: -- Some effort was take to run without using root inside the container. However nox always seemed to have permissions issues which I've been unable to fix. -- `git config --global --add safe.directory \`pwd\` ` Might be needed inside the container. +- `git config --global --add safe.directory \`pwd\` ` might be needed inside the container. From bd4d74953c7bb139d563df70dc14167f7a6c0e3b Mon Sep 17 00:00:00 2001 From: Altay Sansal Date: Fri, 3 Nov 2023 10:31:04 -0500 Subject: [PATCH 20/26] Update docs/development_env.md --- docs/development_env.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/development_env.md b/docs/development_env.md index f28aaece..feedd771 100755 --- a/docs/development_env.md +++ b/docs/development_env.md @@ -4,6 +4,6 @@ To facilitate development on different systems a [dev container](https://contain For contributing guidelines please look here [link](../CONTRIBUTING.md) -### Known issues: +### Known Issues: - `git config --global --add safe.directory \`pwd\` ` might be needed inside the container. From 5198678b259691862224c31f082f6504e90830b8 Mon Sep 17 00:00:00 2001 From: Altay Sansal Date: Fri, 3 Nov 2023 10:37:06 -0500 Subject: [PATCH 21/26] Update CONTRIBUTING.md --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4b0ac9be..40da71cd 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -45,7 +45,7 @@ You need Python 3.9+ and the following tools: A [dev container](./docs/development_env.md) has been setup to provide a clean environment with the required dependencies. -## How to install and run MDIO +## How to Install and Run MDIO Install the package with development requirements: From eb7cadac8368215ee4c507fe56b1f70ca54ddf94 Mon Sep 17 00:00:00 2001 From: Altay Sansal Date: Fri, 3 Nov 2023 10:37:18 -0500 Subject: [PATCH 22/26] Update docs/development_env.md --- docs/development_env.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/development_env.md b/docs/development_env.md index feedd771..1856c68e 100755 --- a/docs/development_env.md +++ b/docs/development_env.md @@ -6,4 +6,4 @@ For contributing guidelines please look here [link](../CONTRIBUTING.md) ### Known Issues: -- `git config --global --add safe.directory \`pwd\` ` might be needed inside the container. +- `git config --global --add safe.directory $(pwd)` might be needed inside the container. From ba51a122130b3ab615b4070502673237de5b17da Mon Sep 17 00:00:00 2001 From: Mark Roberts Date: Fri, 3 Nov 2023 15:41:08 +0000 Subject: [PATCH 23/26] Removed example notebook for segy ingestion and export. --- notebooks/ingestion_and_export.ipynb | 1020 -------------------------- 1 file changed, 1020 deletions(-) delete mode 100755 notebooks/ingestion_and_export.ipynb diff --git a/notebooks/ingestion_and_export.ipynb b/notebooks/ingestion_and_export.ipynb deleted file mode 100755 index b6b4a4d4..00000000 --- a/notebooks/ingestion_and_export.ipynb +++ /dev/null @@ -1,1020 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "15742287", - "metadata": {}, - "source": [ - "# Debugging mdio\n", - "\n", - "In this notebook we will configure an environment that is useful for profiling and debugging mdio segy ingestion and export:\n", - "\n", - "- environment\n", - "- SEGY generation\n", - "- segy to mdio\n", - "- mdio to segy\n", - "\n", - "Memory issues:\n", - "https://distributed.dask.org/en/stable/worker-memory.html#memory-not-released-back-to-the-os\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "4bc09589", - "metadata": {}, - "source": [ - "## Environment\n", - "\n", - "First configure environment" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a3dd0987", - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "# Make sure ipykernel is installed\n", - "!{sys.executable} -m pip install ipykernel\n", - "# Install QC tools\n", - "!{sys.executable} -m pip install matplotlib pandas dask_memusage memray\n", - "# Make sure mdio is installed\n", - "!poetry install --extras \"distributed\"\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "928405ed", - "metadata": {}, - "source": [ - "After the previous cell is run the kernel needs to be restarted so the module gets picked up. Failure to do so will result in the following cell to fail with the error: `ModuleNotFoundError: No module named 'mdio'`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a964af42", - "metadata": {}, - "outputs": [], - "source": [ - "from mdio import mdio_to_segy, MDIOReader\n", - "#import dask.array as dask\n", - "import dask\n", - "from tqdm import tqdm\n", - "import warnings\n", - "warnings.filterwarnings(\"ignore\")\n", - "from dask.diagnostics import ProgressBar\n", - "import time\n", - "import os\n", - "from dask.distributed import LocalCluster, Client" - ] - }, - { - "cell_type": "markdown", - "id": "b6bc1c7e", - "metadata": {}, - "source": [ - "### Setup dask cluster\n", - "\n", - "\n", - "For dask applications the flow can use dask_memusage which is a much simpler profiler based on polling. memray seems to be a recent and significant improvement." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "64f384d6", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "import dask_memusage\n", - "import pandas as pd\n", - "\n", - "\n", - "tmp_path = \"/scratch/tmp2/\"\n", - "MY_TEMP = tmp_path\n", - "\n", - "dask.config.set({\"temporary_directory\": os.path.join(MY_TEMP, \"temp\")})\n", - "\n", - "dask.config.set({\"distributed.comm.timeouts.tcp\": \"90s\"})\n", - "dask.config.set({\"distributed.comm.timeouts.connect\": \"60s\"})\n", - "\n", - "num_cut_dask_workers = 2 \n", - "memory_cut_dask_worker = 60 \n", - "\n", - "gb = 1024**3\n", - "\n", - "use_dask = True\n", - "single_process = False\n", - "\n", - "if use_dask:\n", - " print(\n", - " f\"New local cluster. n_workers {num_cut_dask_workers} mem_limit = {memory_cut_dask_worker} Gb\"\n", - " )\n", - " with dask.config.set({\"distributed.scheduler.worker-saturation\": 1.0}):\n", - " if single_process:\n", - " client = Client(processes=False) \n", - " else:\n", - " cluster = LocalCluster(\n", - " n_workers=num_cut_dask_workers,\n", - " threads_per_worker=1,\n", - " memory_limit=memory_cut_dask_worker * gb,\n", - " )\n", - "\n", - " client = Client(cluster)\n", - "else:\n", - " client = None" - ] - }, - { - "cell_type": "markdown", - "id": "a6abf00f", - "metadata": {}, - "source": [ - "### Setup monitoring dashboard for dask\n", - "\n", - "The dask dashboard should automatically be setup on http://127.0.0.1:8787/status. The [configuration for the dev container](../.devcontainer/devcontainer.json) should have the port forwarding setup for this port enabling this to be viewed. The following cell will also give a summary of the client.\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9b959f19", - "metadata": {}, - "outputs": [], - "source": [ - "client\n" - ] - }, - { - "cell_type": "markdown", - "id": "f5194907", - "metadata": {}, - "source": [ - "#### Check python and mdio versions" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7e89ed8d", - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "print(f\"Python version: {sys.version}\")\n", - "print(f\"Python path: {sys.executable}\")\n", - "import mdio\n", - "print(f\"mdio version: {mdio.__version__}\")" - ] - }, - { - "cell_type": "markdown", - "id": "a8a4ee0e", - "metadata": {}, - "source": [ - "## SEGY generation" - ] - }, - { - "cell_type": "markdown", - "id": "0cfa333a", - "metadata": {}, - "source": [ - "#### Functions to generate segy files based on tests" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6826ddcc", - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"Test configuration before everything runs.\"\"\"\n", - "\n", - "\n", - "from __future__ import annotations\n", - "\n", - "import os\n", - "\n", - "import numpy as np\n", - "import pytest\n", - "import segyio\n", - "\n", - "from mdio.segy.geometry import StreamerShotGeometryType\n", - "def create_segy_mock_6d(\n", - " fake_segy_tmp: str,\n", - " num_samples: int,\n", - " shots: list,\n", - " cables: list,\n", - " receivers_per_cable: list,\n", - " shot_lines: list = [ # noqa: B006\n", - " 1,\n", - " ],\n", - " comp_types: list = [ # noqa: B006\n", - " 1,\n", - " ],\n", - " chan_header_type: StreamerShotGeometryType = StreamerShotGeometryType.A,\n", - " index_receivers: bool = True,\n", - ") -> str:\n", - " \"\"\"Dummy 6D SEG-Y file for use in tests.\n", - "\n", - " Data will be created with:\n", - "\n", - " offset is byte location 37 - offset 4 bytes\n", - " fldr is byte location 9 - shot 4 byte\n", - " ep is byte location 17 - shot 4 byte\n", - " stae is byte location 137 - cable 2 byte\n", - " tracf is byte location 13 - channel 4 byte\n", - " styp is byte location 133 - shot_line 2 byte\n", - " afilf is byte location 141 - comptype 2 byte\n", - "\n", - " \"\"\"\n", - " spec = segyio.spec()\n", - " segy_file = fake_segy_tmp\n", - "\n", - " shot_count = len(shots)\n", - " total_chan = np.sum(receivers_per_cable)\n", - " trace_count_per_line = shot_count * total_chan\n", - " sline_count = len(shot_lines)\n", - " comp_trace_count = trace_count_per_line * sline_count\n", - " comp_count = len(comp_types)\n", - " trace_count = comp_trace_count * comp_count\n", - "\n", - " spec.format = 1\n", - " spec.samples = range(num_samples)\n", - " spec.tracecount = trace_count\n", - " spec.endian = \"big\"\n", - "\n", - " # Calculate shot, cable, channel/receiver numbers and header values\n", - " cable_headers = []\n", - " channel_headers = []\n", - "\n", - " # TODO: Add strict=True and remove noqa when minimum Python is 3.10\n", - " for cable, num_rec in zip(cables, receivers_per_cable): # noqa: B905\n", - " cable_headers.append(np.repeat(cable, num_rec))\n", - "\n", - " channel_headers.append(np.arange(num_rec) + 1)\n", - "\n", - " cable_headers = np.hstack(cable_headers)\n", - " channel_headers = np.hstack(channel_headers)\n", - "\n", - " if chan_header_type == StreamerShotGeometryType.B:\n", - " channel_headers = np.arange(total_chan) + 1\n", - "\n", - " index_receivers = True\n", - " if chan_header_type == StreamerShotGeometryType.C:\n", - " index_receivers = False\n", - "\n", - " shot_headers = np.hstack([np.repeat(shot, total_chan) for shot in shots])\n", - " cable_headers = np.tile(cable_headers, shot_count)\n", - " channel_headers = np.tile(channel_headers, shot_count)\n", - "\n", - " # Add shot lines\n", - " shot_line_headers = np.hstack(\n", - " [np.repeat(shot_line, trace_count_per_line) for shot_line in shot_lines]\n", - " )\n", - "\n", - " shot_headers = np.tile(shot_headers, sline_count)\n", - " cable_headers = np.tile(cable_headers, sline_count)\n", - " channel_headers = np.tile(channel_headers, sline_count)\n", - "\n", - " # Add multiple components\n", - " comptype_headers = np.hstack(\n", - " [np.repeat(comp, comp_trace_count) for comp in comp_types]\n", - " )\n", - "\n", - " shot_line_headers = np.tile(shot_line_headers, comp_count)\n", - " shot_headers = np.tile(shot_headers, comp_count)\n", - " cable_headers = np.tile(cable_headers, comp_count)\n", - " channel_headers = np.tile(channel_headers, comp_count)\n", - "\n", - " with segyio.create(segy_file, spec) as f:\n", - " for trc_idx in range(trace_count):\n", - " shot = shot_headers[trc_idx]\n", - " cable = cable_headers[trc_idx]\n", - " channel = channel_headers[trc_idx]\n", - " shot_line = shot_line_headers[trc_idx]\n", - " comptype = comptype_headers[trc_idx]\n", - "\n", - " # offset is byte location 37 - offset 4 bytes\n", - " # fldr is byte location 9 - shot 4 byte\n", - " # ep is byte location 17 - shot 4 byte\n", - " # stae is byte location 137 - cable 2 byte\n", - " # tracf is byte location 13 - channel 4 byte\n", - " # styp is byte location 133 - shot_line 2 byte\n", - " # afilf is byte location 141 - comptype 2 byte\n", - "\n", - " if index_receivers:\n", - " f.header[trc_idx].update(\n", - " offset=0,\n", - " fldr=shot,\n", - " ep=shot,\n", - " stae=cable,\n", - " tracf=channel,\n", - " styp=shot_line,\n", - " afilf=comptype,\n", - " )\n", - " else:\n", - " f.header[trc_idx].update(\n", - " offset=0,\n", - " fldr=shot,\n", - " ep=shot,\n", - " stae=cable,\n", - " styp=shot_line,\n", - " afilf=comptype,\n", - " )\n", - "\n", - " samples = np.linspace(start=shot, stop=shot + 1, num=num_samples)\n", - " f.trace[trc_idx] = samples.astype(\"float32\")\n", - "\n", - " f.bin.update()\n", - "\n", - " return segy_file\n", - "\n", - "def segy_mock_6d_shots(segy_path: str) -> dict[str, str]:\n", - " \"\"\"Generate mock 6D shot SEG-Y files.\"\"\"\n", - " num_samples = 25\n", - " shots = [2, 3, 5]\n", - " cables = [0, 101, 201, 301]\n", - " receivers_per_cable = [1, 5, 7, 5]\n", - " shot_lines = [1, 2, 4, 5, 99]\n", - " comp_types = [1, 2, 3, 4]\n", - "\n", - " \n", - " chan_header_type = StreamerShotGeometryType.A,\n", - " \n", - " segy_path = create_segy_mock_6d(\n", - " segy_path,\n", - " num_samples=num_samples,\n", - " shots=shots,\n", - " cables=cables,\n", - " receivers_per_cable=receivers_per_cable,\n", - " chan_header_type=chan_header_type,\n", - " shot_lines=shot_lines,\n", - " comp_types=comp_types,\n", - " )\n", - " return segy_path\n", - "\n", - "def segy_mock_4d_shots(segy_path: str) -> dict[str, str]:\n", - " \"\"\"Generate mock 4D shot SEG-Y files.\"\"\"\n", - " num_samples = 25\n", - " shots = [2, 3, 5]\n", - " cables = [0, 101, 201, 301]\n", - " receivers_per_cable = [1, 5, 7, 5]\n", - " shot_lines = [1,]\n", - " comp_types = [1,]\n", - "\n", - " \n", - " chan_header_type = StreamerShotGeometryType.A,\n", - " \n", - " segy_path = create_segy_mock_6d(\n", - " segy_path,\n", - " num_samples=num_samples,\n", - " shots=shots,\n", - " cables=cables,\n", - " receivers_per_cable=receivers_per_cable,\n", - " chan_header_type=chan_header_type,\n", - " shot_lines=shot_lines,\n", - " comp_types=comp_types,\n", - " )\n", - " return segy_path\n", - "\n", - "def segy_mock_4d_shots_large(segy_path: str, num_shots:int=100) -> dict[str, str]:\n", - " \"\"\"Generate mock 4D shot SEG-Y files at a reasonable scale.\"\"\"\n", - " num_samples = 4000\n", - " num_cables = 12\n", - " num_receivers_per_cable = 250 \n", - " shots = range(num_shots)\n", - " cables = range(num_cables)\n", - " receivers_per_cable = [num_receivers_per_cable,]*num_cables\n", - " shot_lines = [1,]\n", - " comp_types = [1,]\n", - "\n", - " \n", - " chan_header_type = StreamerShotGeometryType.A,\n", - " \n", - " segy_path = create_segy_mock_6d(\n", - " segy_path,\n", - " num_samples=num_samples,\n", - " shots=shots,\n", - " cables=cables,\n", - " receivers_per_cable=receivers_per_cable,\n", - " chan_header_type=chan_header_type,\n", - " shot_lines=shot_lines,\n", - " comp_types=comp_types,\n", - " )\n", - " return segy_path" - ] - }, - { - "cell_type": "markdown", - "id": "75b02400", - "metadata": {}, - "source": [ - "#### segy config" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "55229136", - "metadata": {}, - "outputs": [], - "source": [ - "dims = 4\n", - "large_segy = True\n", - "num_shots = 1000\n", - "\n", - "if dims == 6:\n", - " index_header_names = (\"comptype\", \"shot_line\",\"shot_point\", \"cable\", \"channel\")\n", - " index_types = (\"int16\", \"int16\", \"int32\", \"int16\", \"int32\")\n", - " index_bytes= (141, 133, 17, 137, 13)\n", - " chunksize = (1, 2, 4, 2, 128, 1024)\n", - " grid_overrides = {\"AutoChannelWrap\": True}\n", - " num_shots = 3\n", - " segy_path = os.path.join(tmp_path, f\"segy_{dims}d_{num_shots}.sgy\")\n", - " print(segy_path)\n", - " access_pattern=\"012345\"\n", - "elif dims == 4:\n", - " index_header_names = (\"shot_point\", \"cable\", \"channel\")\n", - " index_types = (\"int32\", \"int16\", \"int32\")\n", - " index_bytes= ( 17, 137, 13)\n", - " chunksize = (4, 2, 128, 1024)\n", - " grid_overrides = {\"AutoChannelWrap\": True}\n", - " if large_segy:\n", - " segy_path = os.path.join(tmp_path, f\"segy_{dims}d_{num_shots}.sgy\")\n", - " \n", - " else:\n", - " num_shots = 3\n", - " segy_path = os.path.join(tmp_path, f\"segy_{dims}d_{num_shots}.sgy\")\n", - " access_pattern=\"0123\"\n", - " \n", - "print(segy_path)\n" - ] - }, - { - "cell_type": "markdown", - "id": "5baf43f7", - "metadata": {}, - "source": [ - "#### Create SEGY" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "07a40484", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "if dims == 6:\n", - " segy_path = segy_mock_6d_shots(segy_path)\n", - "elif dims == 4:\n", - " if large_segy:\n", - " segy_path = segy_mock_4d_shots_large(segy_path, num_shots=num_shots)\n", - " else:\n", - " segy_path = segy_mock_4d_shots(segy_path)\n", - " \n", - "print(segy_path)" - ] - }, - { - "cell_type": "markdown", - "id": "45aa8c7f", - "metadata": {}, - "source": [ - "## Ingest segy to mdio\n", - "\n", - "#### Config" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fe6f5e95", - "metadata": {}, - "outputs": [], - "source": [ - "mdio_path = os.path.join(tmp_path, f\"segy_{dims}d_import_{num_shots}.mdio\")\n", - "kwargs = {\n", - " 'segy_path': segy_path,\n", - " 'mdio_path_or_buffer': mdio_path,\n", - " 'index_names': index_header_names,\n", - " 'index_bytes': index_bytes,\n", - " 'index_types': index_types,\n", - " 'chunksize': chunksize, # (1, chunksize_2d, -1),\n", - " 'overwrite': True\n", - "}\n", - "if grid_overrides is not None:\n", - " kwargs['grid_overrides'] = grid_overrides\n", - "kwargs" - ] - }, - { - "cell_type": "markdown", - "id": "6ab5982f", - "metadata": {}, - "source": [ - "#### Actual segy to mdio conversion based on config" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a622fadc", - "metadata": {}, - "outputs": [], - "source": [ - "%%time\n", - "mdio.segy_to_mdio(**kwargs)" - ] - }, - { - "cell_type": "markdown", - "id": "e43e4fdf", - "metadata": {}, - "source": [ - "#### QC of generated mdio file" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "caa55b34", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "def info(\n", - " input_mdio_file,\n", - " output_format=\"plain\",\n", - " access_pattern=\"012\",\n", - "):\n", - " \"\"\"Provide information on MDIO dataset.\n", - " By default this returns human readable information about the grid and stats for\n", - " the dataset. If output-format is set to json then a json is returned to\n", - " facilitate parsing.\n", - " \"\"\"\n", - " reader = mdio.MDIOReader(\n", - " input_mdio_file, access_pattern=access_pattern, return_metadata=True\n", - " )\n", - " mdio_dict = {}\n", - " mdio_dict[\"grid\"] = {}\n", - " for axis in reader.grid.dim_names:\n", - " dim = reader.grid.select_dim(axis)\n", - " min = dim.coords[0]\n", - " max = dim.coords[-1]\n", - " size = dim.coords.shape[0]\n", - " axis_dict = {\"name\": axis, \"min\": min, \"max\": max, \"size\": size}\n", - " mdio_dict[\"grid\"][axis] = axis_dict\n", - "\n", - " if output_format == \"plain\":\n", - " print(\"{:<10} {:<10} {:<10} {:<10}\".format(\"NAME\", \"MIN\", \"MAX\", \"SIZE\"))\n", - " print(\"=\" * 40)\n", - "\n", - " for _, axis_dict in mdio_dict[\"grid\"].items():\n", - " print(\n", - " \"{:<10} {:<10} {:<10} {:<10}\".format(\n", - " axis_dict[\"name\"],\n", - " axis_dict[\"min\"],\n", - " axis_dict[\"max\"],\n", - " axis_dict[\"size\"],\n", - " )\n", - " )\n", - "\n", - " print(\"\\n\\n{:<10} {:<10}\".format(\"STAT\", \"VALUE\"))\n", - " print(\"=\" * 20)\n", - " for name, stat in reader.stats.items():\n", - " print(f\"{name:<10} {stat:<10}\")\n", - " if output_format == \"json\":\n", - " mdio_dict[\"stats\"] = reader.stats\n", - " print(mdio_dict)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1fc7838d", - "metadata": {}, - "outputs": [], - "source": [ - "info(\n", - " mdio_path,\n", - " output_format=\"plain\",\n", - " access_pattern=access_pattern,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8feef694", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "reader = mdio.MDIOReader(\n", - " mdio_path, access_pattern=access_pattern, return_metadata=True\n", - ")\n", - "comp_dim = reader.grid.select_dim(index_header_names[0])\n", - "\n", - "print(f\"comp_dim: {comp_dim} for {reader}\")" - ] - }, - { - "cell_type": "markdown", - "id": "b6fffdea", - "metadata": {}, - "source": [ - "## SEGY export (cut)\n", - "\n", - "#### First declare functions" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "40a44354", - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext memray\n", - "\n", - "import csv\n", - "import psutil\n", - "import matplotlib.pyplot as plt\n", - "\n", - "from distributed.diagnostics import MemorySampler\n", - "from distributed.diagnostics.memray import memray_workers\n", - "\n", - "\n", - "def processing_time(end, start):\n", - " return (end - start) / 60\n", - "\n", - "\n", - "def file_size(file):\n", - " import os\n", - "\n", - " filesize = os.path.getsize(file)\n", - " return filesize\n", - "\n", - "\n", - "def make_folders(folder_path):\n", - " import os\n", - "\n", - " msg = \"Folder already exists\"\n", - " if not os.path.exists(folder_path):\n", - " os.makedirs(folder_path)\n", - " msg = \"Folders created\"\n", - " return msg\n", - "\n", - "def create_segy(mdio_source, temp_local_destination, client, selection_mask=None, access_pattern=\"0123\"):\n", - " start = time.perf_counter()\n", - " access_pattern = \"0123\"\n", - " print(\"Started_conv\")\n", - "\n", - " _ = psutil.cpu_percent(interval=None, percpu=True)\n", - " mdio_to_segy(\n", - " mdio_source,\n", - " temp_local_destination,\n", - " selection_mask=selection_mask,\n", - " access_pattern=access_pattern,\n", - " client=client,\n", - " )\n", - " mdio_to_segy_time = time.perf_counter()\n", - " cpu_mdio_to_segy = psutil.cpu_percent(interval=None, percpu=True)\n", - " max_cpu_mdio_to_sgy = max(cpu_mdio_to_segy)\n", - " min_cpu_usage = min(cpu_mdio_to_segy)\n", - " cpu_usage_avg = np.mean(np.array(cpu_mdio_to_segy))\n", - " print(\"cpu_usage_mdio_to_segy_max\", max_cpu_mdio_to_sgy)\n", - " mem_usage_mdio_to_sgy = int(\n", - " psutil.virtual_memory().total - psutil.virtual_memory().available\n", - " )\n", - " return (\n", - " max_cpu_mdio_to_sgy,\n", - " min_cpu_usage,\n", - " cpu_usage_avg,\n", - " mem_usage_mdio_to_sgy,\n", - " processing_time(mdio_to_segy_time, start),\n", - " )\n", - "\n", - "\n", - "def get_max_mem_from_csv(filename: str):\n", - " \"\"\"Find maximum memory usage from a dask_memusage memory sampler profiler.\"\"\"\n", - " print(f\"mem_file = {filename}\")\n", - " try:\n", - " mem_df = pd.read_csv(filename)\n", - " max_dask_task_memory = int(mem_df[\"max_memory_mb\"].max() * (1024**2))\n", - " except:\n", - " max_mem_array = []\n", - " task_name_array = []\n", - " with open(filename) as fp:\n", - " Lines = fp.readlines()\n", - " for line in Lines:\n", - " csv = line.split(',')\n", - " if len(csv) > 4:\n", - " max_mem = csv[-1]\n", - " task_name = csv[0]\n", - " try:\n", - " my_mm = float(max_mem)\n", - " max_mem_array.append(my_mm)\n", - " task_name_array.append(task_name)\n", - " except:\n", - " continue\n", - " max_index = max_mem_array.index(max(max_mem_array))\n", - " print(f\"max_index={max_index} max_mem={max_mem_array[max_index]}MB max_task_name = {task_name_array[max_index]}\")\n", - "\n", - " max_dask_task_memory = int(max_mem_array[max_index] * (1024**2))\n", - " return max_dask_task_memory\n", - " \n", - "def get_large_mem_fns_from_csv(filename: str, thresh=50.):\n", - " \"\"\"Find functions with a large from a dask_memusage memory sampler profiler.\"\"\"\n", - " print(f\"mem_file = {filename}\")\n", - " task_name_array = []\n", - " with open(filename) as fp:\n", - " Lines = fp.readlines()\n", - " for line in Lines:\n", - " csv = line.split(',')\n", - " if len(csv) > 2:\n", - " max_mem = csv[-1]\n", - " task_name = csv[0]\n", - " try:\n", - " my_mm = float(max_mem)\n", - " if my_mm > thresh:\n", - " task_name_array.append(task_name)\n", - " except:\n", - " continue\n", - " return list(set(task_name_array))\n", - "\n", - " \n", - " \n", - "def plot_function_mem_from_csv(filename: str, fn_name: str):\n", - " \"\"\"Plot memory usage for a single function\n", - " \n", - " Inputs\n", - " ------\n", - " filename: str csv from dask_memusage memory sampler profiler\n", - " fn_name: str name of function to track and plot\"\"\"\n", - " print(f\"mem_file = {filename}\")\n", - " mem_array_1 = []\n", - " mem_array_2 = []\n", - " with open(filename) as fp:\n", - " Lines = fp.readlines()\n", - " for line in Lines:\n", - " csv = line.split(',')\n", - " if len(csv) > 4:\n", - " max_mem = csv[-1]\n", - " max_mem_2 = csv[-2]\n", - " task_name = csv[0]\n", - " try:\n", - " if fn_name in task_name:\n", - " my_mm = float(max_mem)\n", - " mem_array_1.append(my_mm)\n", - " my_mm = float(max_mem_2)\n", - " mem_array_2.append(my_mm)\n", - " except:\n", - " continue\n", - " if len(mem_array_1) > 1:\n", - " plt.figure()\n", - " plt.plot(mem_array_1, label=\"Total\")\n", - " plt.plot(mem_array_2, label=\"Proc\")\n", - " plt.title(f\"{fn_name}\")\n", - " plt.xlabel(\"Occurrence\")\n", - " plt.ylabel(\"Memory\")\n", - " plt.show\n", - " plt.savefig(f'{filename}_{fn_name}.png')\n", - " elif len(mem_array_1) == 1:\n", - " print(f\"{fn_name} used {mem_array_1[0]}mb memory\")\n", - " else:\n", - " print(f\"Had issue reading {fn_name} memory usage\")\n", - " return mem_array_1\n", - "\n", - "def cut(input_mdio: str, run=0, access_pattern=\"0123\", client=None, test_name=\"6372\"):\n", - " \"\"\"Cuts segy from mdio with memory QC\"\"\"\n", - " with open(\n", - " os.path.join(MY_TEMP, test_name+\"_metrics_export.csv\"), \"a+\", newline=\"\"\n", - " ) as write_obj:\n", - " csv_writer = csv.writer(write_obj)\n", - " csv_writer.writerow(\n", - " [\n", - " \"chunk_case\",\n", - " \"file_size\",\n", - " \"reader_shape\",\n", - " \"time\",\n", - " \"cpu_usage_max\",\n", - " \"cpu_usage_min\",\n", - " \"cpu_usage_avg\",\n", - " \"mem_usage\",\n", - " \"run\",\n", - " ]\n", - " )\n", - "\n", - "\n", - " print(\"Converting Multidimio to Segy via Local Dask\")\n", - "\n", - " TEMP_DESTINATION = os.path.join(MY_TEMP, test_name+\".sgy\")\n", - "\n", - " print(\"TEMP_DESTINATION is:\", TEMP_DESTINATION)\n", - " # Set to true to use dask_memusage to track memory usage\n", - " track_memusage = False\n", - " # Set to true to use memray to track memory usage\n", - " use_memray = False\n", - " # Flag to create dask cluster\n", - " use_dask = False\n", - " if client is not None:\n", - " use_dask = True\n", - "\n", - " if use_dask:\n", - " print(\n", - " f\"New local cluster. n_workers {num_cut_dask_workers} mem_limit = {memory_cut_dask_worker} Gb\"\n", - " )\n", - " with dask.config.set({\"distributed.scheduler.worker-saturation\": 1.0}):\n", - " cluster = LocalCluster(\n", - " n_workers=num_cut_dask_workers,\n", - " threads_per_worker=1,\n", - " memory_limit=memory_cut_dask_worker * gb,\n", - " )\n", - "\n", - " client = Client(cluster)\n", - " if track_memusage:\n", - " mem_file = os.path.join(os.getcwd(), f\"{test_name}_ram_usage.csv\")\n", - " dask_memusage.install(client.cluster.scheduler, mem_file)\n", - " else:\n", - " track_memusage = False\n", - " client = None\n", - "\n", - " if track_memusage:\n", - " \n", - " if use_memray:\n", - " with memray_workers(f\"memray_{test_name}\", report_args=('flamegraph', '--temporal')):\n", - " ms = MemorySampler()\n", - "\n", - " with ms.sample(test_name):\n", - " (\n", - " cpu_usage_max,\n", - " cpu_usage_min,\n", - " cpu_usage_avg,\n", - " mem_usage,\n", - " time_taken,\n", - " ) = create_segy(input_mdio, TEMP_DESTINATION, client)\n", - " else:\n", - " with ms.sample(test_name):\n", - " (\n", - " cpu_usage_max,\n", - " cpu_usage_min,\n", - " cpu_usage_avg,\n", - " mem_usage,\n", - " time_taken,\n", - " ) = create_segy(input_mdio, TEMP_DESTINATION, client)\n", - " else:\n", - " (\n", - " cpu_usage_max,\n", - " cpu_usage_min,\n", - " cpu_usage_avg,\n", - " mem_usage,\n", - " time_taken,\n", - " ) = create_segy(input_mdio, TEMP_DESTINATION, client)\n", - "\n", - " if client is not None:\n", - " # mem_file = os.path.join(os.getcwd(), test_name + \"_ram_usage.csv\")\n", - " mem_file = os.path.join(MY_TEMP, test_name + \"_ram_usage.csv\")\n", - " \n", - " dask_memusage.install(client.cluster.scheduler, mem_file)\n", - " ms = MemorySampler()\n", - "\n", - " with ms.sample(test_name):\n", - " (\n", - " cpu_usage_max,\n", - " cpu_usage_min,\n", - " cpu_usage_avg,\n", - " mem_usage,\n", - " time_taken,\n", - " ) = create_segy(input_mdio, TEMP_DESTINATION, client, access_pattern=access_pattern)\n", - " fig = plt.figure()\n", - " memory_plot = ms.plot(align=True)\n", - " fig = memory_plot.get_figure()\n", - " fig.savefig(test_name + \"_.jpg\", bbox_inches=\"tight\")\n", - "\n", - " else:\n", - " (\n", - " cpu_usage_max,\n", - " cpu_usage_min,\n", - " cpu_usage_avg,\n", - " mem_usage,\n", - " time_taken,\n", - " ) = create_segy(input_mdio, TEMP_DESTINATION, client, access_pattern=access_pattern)\n", - "\n", - " print(\"cut completed\")\n", - "\n", - " file_size = os.path.getsize(TEMP_DESTINATION) / (1024**3)\n", - " print(f\"mdio to segy completed in {time_taken}\")\n", - "\n", - " reader = MDIOReader(\n", - " mdio_path_or_buffer=input_mdio, backend=\"dask\", access_pattern=access_pattern,\n", - " )\n", - " \n", - " if client is not None:\n", - " # mem_df = pd.read_csv(mem_file)\n", - " # max_dask_task_memory = int(mem_df[\"max_memory_mb\"].max() * (1024**2))\n", - "\n", - " max_dask_task_memory = get_max_mem_from_csv(mem_file)\n", - " # Find all functions that use a significant amount of memory\n", - " large_mem_fns = get_large_mem_fns_from_csv(mem_file)\n", - " # Make plots of function memory over time\n", - " for fn_name in large_mem_fns:\n", - " plot_function_mem_from_csv(mem_file, fn_name)\n", - " plot_function_mem_from_csv(mem_file, \"write_to_segy_stack-segy_concat\")\n", - " metrics = [\n", - " chunksize,\n", - " file_size,\n", - " reader.shape,\n", - " time_taken,\n", - " cpu_usage_max,\n", - " cpu_usage_min,\n", - " cpu_usage_avg,\n", - " mem_usage,\n", - " max_dask_task_memory,\n", - " run,\n", - " ]\n", - "\n", - " with open(\n", - " os.path.join(MY_TEMP, test_name + \"_metrics_export.csv\"), \"a+\", newline=\"\"\n", - " ) as write_obj:\n", - " csv_writer = csv.writer(write_obj)\n", - " csv_writer.writerow(metrics)\n", - "\n", - " print(f\"{metrics=}\")\n", - " os.remove(TEMP_DESTINATION)\n", - " time.sleep(30)\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "bb1d5240", - "metadata": {}, - "source": [ - "### Run cut" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5882a46a", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "file_list = [os.path.join(MY_TEMP,'segy_4d_import_100.mdio'),os.path.join(MY_TEMP,'segy_4d_import_1000.mdio') ]\n", - "test_name = [\"test_100shots\",\"test_1000shots\"]\n", - "\n", - "file_list = [os.path.join(MY_TEMP,'segy_4d_import_1000.mdio'), ]\n", - "test_name = [\"test_1000shots\",]\n", - "for mdio_file, test_name in zip(file_list, test_name):\n", - " cut(mdio_file, client=client, test_name=test_name)" - ] - }, - { - "cell_type": "markdown", - "id": "0d2c6d09", - "metadata": {}, - "source": [ - "Viewing html from memray using HTML Preview plugin fails. Open in an external web-browser such as chrome. " - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.4" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From f33b1240c36f7d7ee0435f1bf785a2571e3771cc Mon Sep 17 00:00:00 2001 From: Altay Sansal Date: Fri, 3 Nov 2023 10:45:19 -0500 Subject: [PATCH 24/26] consolidate dev container to contributing.md --- CONTRIBUTING.md | 13 ++++++++++++- docs/development_env.md | 9 --------- 2 files changed, 12 insertions(+), 10 deletions(-) delete mode 100755 docs/development_env.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 40da71cd..3cd5a84b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -43,7 +43,16 @@ You need Python 3.9+ and the following tools: - [Nox] - [nox-poetry] -A [dev container](./docs/development_env.md) has been setup to provide a clean environment with the required dependencies. +Another alternative is to use a [Development Container] has been setup to provide +an environment with the required dependencies. This facilitates development on +different systems. + +This should seamlessly enable development for users of [VS Code] on systems with docker installed. + +### Known Issues: + +- `git config --global --add safe.directory $(pwd)` might be needed inside the container. + ## How to Install and Run MDIO @@ -64,6 +73,8 @@ $ poetry run mdio [poetry]: https://python-poetry.org/ [nox]: https://nox.thea.codes/ [nox-poetry]: https://nox-poetry.readthedocs.io/ +[development container]: https://containers.dev/ +[vs code]: https://code.visualstudio.com/docs/devcontainers/containers/ ## How to test the project diff --git a/docs/development_env.md b/docs/development_env.md deleted file mode 100755 index 1856c68e..00000000 --- a/docs/development_env.md +++ /dev/null @@ -1,9 +0,0 @@ -# Development Environment - -To facilitate development on different systems a [dev container](https://containers.dev/) has been added. This should seamlessly enable development for users of [VSCode](https://code.visualstudio.com/docs/devcontainers/containers) on systems with docker installed. - -For contributing guidelines please look here [link](../CONTRIBUTING.md) - -### Known Issues: - -- `git config --global --add safe.directory $(pwd)` might be needed inside the container. From 4f467cecd75f7e3faac8a5cc7125c0963c7249c0 Mon Sep 17 00:00:00 2001 From: Altay Sansal Date: Fri, 3 Nov 2023 10:46:28 -0500 Subject: [PATCH 25/26] remove redundant link --- docs/index.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/index.md b/docs/index.md index f8196a80..ae51a65b 100644 --- a/docs/index.md +++ b/docs/index.md @@ -21,7 +21,6 @@ notebooks/compression usage reference contributing -Development Environment Code of Conduct License Changelog From f0198dd33ff17f0a1548b28818d61800ba126c48 Mon Sep 17 00:00:00 2001 From: Altay Sansal Date: Fri, 3 Nov 2023 10:49:26 -0500 Subject: [PATCH 26/26] lint --- CONTRIBUTING.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3cd5a84b..24ef01d3 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -53,7 +53,6 @@ This should seamlessly enable development for users of [VS Code] on systems with - `git config --global --add safe.directory $(pwd)` might be needed inside the container. - ## How to Install and Run MDIO Install the package with development requirements: