Skip to content

Commit

Permalink
Bug fix and log statements.
Browse files Browse the repository at this point in the history
  • Loading branch information
markspec authored and tasansal committed May 18, 2023
1 parent 7db1c77 commit 9197e4e
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 24 deletions.
30 changes: 21 additions & 9 deletions src/mdio/segy/utilities.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
"""More utilities for reading SEG-Ys."""


from __future__ import annotations

import logging
from enum import Enum
from typing import Sequence

Expand All @@ -16,6 +15,9 @@
from mdio.segy.parsers import parse_trace_headers


logger = logging.getLogger(__name__)


class GeometryTemplateType(Enum):
"""Geometry template types as enum."""

Expand Down Expand Up @@ -107,16 +109,26 @@ def get_grid_plan( # noqa: C901
chan_idx = index_names.index("channel")
if "AutoChannelTraceQC" in grid_overrides:
trace_qc_count = int(grid_overrides["AutoChannelTraceQC"])
unique_cables, cable_chan_min, cable_chan_max, geom_type = qc_index_headers(
unique_cables, cable_chan_min, _cable_chan_max, geom_type = qc_index_headers(
index_headers, index_names, trace_qc_count
)

logger.info(f"Ingesting dataset as {geom_type.name}")
# TODO: Add strict=True and remove noqa when min Python is 3.10
for cable, chan_min, chan_max in zip( # noqa: B905
unique_cables, cable_chan_min, _cable_chan_max
):
logger.info(
f"Cable: {cable} has min chan: {chan_min} and max chan: {chan_max}"
)

# This might be slow and potentially could be improved with a rewrite
# to prevent so many lookups
if geom_type == GeometryTemplateType.STREAMER_B:
for idx, cable in enumerate(unique_cables):
cable_idxs = np.where(index_headers[:, cable_idx] == cable)
cc_min = cable_chan_min[idx]
# print(f"idx = {idx} cable = {cable} cc_min={cc_min}")
index_headers[cable_idxs, chan_idx] = (
index_headers[cable_idxs, chan_idx] - cc_min + 1
)
Expand Down Expand Up @@ -202,13 +214,13 @@ def qc_index_headers(
geom_type = GeometryTemplateType.STREAMER_B
for idx, cable in enumerate(unique_cables):
min_val = cable_chan_min[idx]
max_val = cable_chan_max[cable_idx]
max_val = cable_chan_max[idx]
for idx2, cable2 in enumerate(unique_cables):
if cable2 == cable:
# Don't compare with itself
pass

if cable_chan_min[idx2] < max_val and cable_chan_max[idx2] > min_val:
if (
cable_chan_min[idx2] < max_val
and cable_chan_max[idx2] > min_val
and (cable2 != cable)
):
geom_type = GeometryTemplateType.STREAMER_A

# Return cable_chan_min values
Expand Down
74 changes: 59 additions & 15 deletions tests/integration/test_segy_import_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import os
from os.path import getsize
from typing import List

import dask
import numpy as np
Expand All @@ -18,20 +19,24 @@
dask.config.set(scheduler="synchronous")


def create_4d_segy(file_path, chan_header_type="a", **args):
def create_4d_segy(
file_path: str,
num_samples: int,
fldrs: List,
cables: List,
num_traces: List,
chan_header_type: str = "a",
**args,
):
"""Dummy 4D segy file for use in tests."""
import segyio

fldrs = [2, 3, 5]
cables = [0, 101, 201, 301]
num_traces = [1, 5, 7, 5]

spec = segyio.spec()
d = os.path.join(file_path, "data")
os.makedirs(d, exist_ok=True)
segy_file = os.path.join(d, f"4d_type_{chan_header_type}.sgy")
spec.format = 1
spec.samples = range(25)
spec.samples = range(num_samples)

trace_count = len(fldrs) * np.sum(num_traces)
spec.tracecount = trace_count
Expand All @@ -43,18 +48,18 @@ def create_4d_segy(file_path, chan_header_type="a", **args):
tracf = 0
for fldr in fldrs:
if chan_header_type == "b":
tracf = 0
tracf = 1
# TODO: Add strict=True and remove noqa when min supported Python is 3.10
for cable, length in zip(cables, num_traces): # noqa: B905
if chan_header_type == "a":
tracf = 0
tracf = 1
for _i in range(length):
# segyio names and byte locations for headers can be found at:
# https://segyio.readthedocs.io/en/latest/segyio.html
# fldr is byte location 9 - shot
# ep is byte location 17 - shot
# stae is byte location 137 - cable
# tracf is byte location 13 - channel
# https://segyio.readthedocs.io/en/latest/segyio.html
# fldr is byte location 9 - shot 4 byte
# ep is byte location 17 - shot 4 byte
# stae is byte location 137 - cable 2 byte
# tracf is byte location 13 - channel 4 byte

f.header[trno].update(
offset=1,
Expand All @@ -77,9 +82,10 @@ def create_4d_segy(file_path, chan_header_type="a", **args):

@pytest.mark.parametrize("header_locations", [(17, 137, 13)])
@pytest.mark.parametrize("header_names", [("shot", "cable", "channel")])
@pytest.mark.parametrize("header_lengths", [(4, 2, 4)])
@pytest.mark.parametrize("endian", ["big"])
@pytest.mark.parametrize(
"grid_overrides", [{"AutoChannelWrap": True, "AutoChannelTraceQC": 1000000}]
"grid_overrides", [{"AutoChannelWrap": True, "AutoChannelTraceQC": 100000}, None]
)
@pytest.mark.parametrize("chan_header_type", ["a", "b"])
class TestImport4D:
Expand All @@ -91,24 +97,62 @@ def test_import_4d_segy(
zarr_tmp,
header_locations,
header_names,
header_lengths,
endian,
grid_overrides,
chan_header_type,
):
"""Test importing a SEG-Y file to MDIO."""
segy_path = create_4d_segy(tmp_path, chan_header_type=chan_header_type)
num_samples = 25
fldrs = [2, 3, 5]
cables = [0, 101, 201, 301]
num_traces = [1, 5, 7, 5]
segy_path = create_4d_segy(
tmp_path,
num_samples=num_samples,
fldrs=fldrs,
cables=cables,
num_traces=num_traces,
chan_header_type=chan_header_type,
)

segy_to_mdio(
segy_path=segy_path,
mdio_path_or_buffer=zarr_tmp.__str__(),
index_bytes=header_locations,
index_names=header_names,
index_lengths=header_lengths,
chunksize=(8, 2, 128, 1024),
overwrite=True,
endian=endian,
grid_overrides=grid_overrides,
)

# QC mdio output
mdio = MDIOReader(zarr_tmp.__str__(), access_pattern="0123")
assert mdio.binary_header["Samples"] == num_samples
grid = mdio.grid

print(f"chan_header_type = {chan_header_type}")
print(f"grid_overrides = {grid_overrides}")
print(f"grid.select_dim(header_names[0]) = {grid.select_dim(header_names[0])}")
print(f"grid.select_dim(header_names[1]) = {grid.select_dim(header_names[1])}")
print(f"grid.select_dim(header_names[2]) = {grid.select_dim(header_names[2])}")
assert grid.select_dim(header_names[0]) == Dimension(fldrs, header_names[0])
assert grid.select_dim(header_names[1]) == Dimension(cables, header_names[1])

if "b" in chan_header_type and grid_overrides is None:
assert grid.select_dim(header_names[2]) == Dimension(
range(1, np.sum(num_traces) + 1), header_names[2]
)
else:
assert grid.select_dim(header_names[2]) == Dimension(
range(1, np.amax(num_traces) + 1), header_names[2]
)
assert grid.select_dim("sample") == Dimension(
range(0, num_samples, 1), "sample"
)


@pytest.mark.parametrize("header_locations", [(17, 13)])
@pytest.mark.parametrize("header_names", [("inline", "crossline")])
Expand Down

0 comments on commit 9197e4e

Please sign in to comment.