Skip to content

Commit

Permalink
Merge branch 'oracle-samples:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
richl9 authored Sep 18, 2024
2 parents da9d6d5 + 3afc561 commit 68765a7
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 10 deletions.
54 changes: 54 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,65 @@
Changelog
=========

Release Cycle
-------------

Prior to v1, the version numbers loosely followed the rule that new features
would bump the minor version level, and bug fix releases would bump the patch
version level.

Beginning with v1, a new scheme is adopted which allows for a "development"
version, and a stable version. Using the version numbers `x.y.z`, we have:

1. The **development** version is of the form `x.0.z`, where `x` represents the
major version under development. Each release is performed by incrementing
`z`, the patch level, regardless of the type of changes. The development
version ends with the release of the "stable" version of `x.1.0`. The
development version is maintained on the `main` branch.
2. The **stable** version is of the form `x.y.z`, where `y >= 1`, and `x` is of
course the major version. The "stable" versions are the only ones which are
released to Oracle Linux as RPMs. Releases will generally increment `z`, the
patch version, for bug fix releases. It's possible that in rare cases, we
will increment `y` for backports, in cases where we want to backport a module
to the stable release. The stable version is maintained in a branch named
`stable/vX`, where `X` is replaced with the major version number (e.g.
`stable/v1`).

The stable version is maintained in parallel as the development version is
developed. Fixes in the stable release must first be present in the development
release (and all newer stable releases, if applicable).

For the most part, regular maintenance of the stable version will end with the
release of the next stable version, but maintenance may continue at our
discretion.

Examples:

- `1.1.0` - the initial public release of the `1.x` stable series.
- `1.1.1` - the first bugfix release of the `1.x` stable series.
- `2.0.0` - the initial development version of the `2.x`.
- `2.0.1` - an incremental development release in `2.x` development. It may
contain bug fixes or new features.
- `2.1.0` - the initial public release of the `2.x` stable series.


Unreleased
----------

Changes which are committed to git, but not yet released, may appear here.

1.1.0 - Tue, Aug 27, 2023
-------------------------

This is the first public release of drgn-tools!

* Fixes for the irq, workqueue, bt, & nfs_tools modules.
* Add ls and fsnotify modules.
* Added new helpers for tasks & task states.
* Basic functionality for running on UEK-NEXT (unsupported).
* RPM name updated to "drgn-tools".
* Support for CTF debuginfo added for the locks module.

0.9.1 - Mon, Apr 22, 2023
-------------------------

Expand Down
5 changes: 4 additions & 1 deletion buildrpm/python-drgn-tools.spec
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2024, Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
Name: python-drgn-tools
Version: 0.9.1
Version: 1.1.0
Release: 1%{?dist}
Summary: Helper scripts for drgn, containing the corelens utility

Expand Down Expand Up @@ -61,6 +61,9 @@ rm %{buildroot}/usr/bin/DRGN
%{_mandir}/man1/corelens.1.gz

%changelog
* Tue Aug 27 2024 Stephen Brennan <[email protected]> - 1.1.0-1
- Update to 1.1.0

* Mon Apr 22 2024 Stephen Brennan <[email protected]> - 0.9.1-1
- Update to 0.9.1

Expand Down
77 changes: 69 additions & 8 deletions drgn_tools/slabinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@
Helper to view slabinfo data
"""
import argparse
from typing import List
from typing import NamedTuple
from typing import Set
from typing import Tuple

from drgn import cast
from drgn import FaultError
from drgn import Object
from drgn import Program
from drgn import ProgramFlags
from drgn import Type
from drgn.helpers.linux.cpumask import for_each_present_cpu
from drgn.helpers.linux.list import list_for_each_entry
Expand Down Expand Up @@ -40,6 +43,8 @@ class SlabCacheInfo(NamedTuple):
"""Slab size"""
name: str
"""Name of the slab cache"""
freelist_corrupt_cpus: List[int]
"""A list of CPUs for which the freelist was found to be corrupt"""


def _slab_type(prog: Program) -> Type:
Expand Down Expand Up @@ -204,19 +209,41 @@ def slub_per_cpu_partial_free(cpu_partial: Object) -> int:
return partial_free


def kmem_cache_slub_info(cache: Object) -> Tuple[int, int]:
class _CpuSlubWrapper:
def __init__(self, obj):
self._obj = obj

def __getattr__(self, key):
if key == "cpu_slab":
raise AttributeError("CpuSlubWrapper!")
return self._obj.__getattribute__(key)


def kmem_cache_slub_info(cache: Object) -> Tuple[int, int, List[int]]:
"""
For given kmem_cache object, parse through each cpu
and get number of total slabs and free objects
If the CPU freelist was corrupt, then we do our best effort to count free
objects, but we may undercount them. We set the corruption flag when this
happens.
:param: ``struct kmem_cache`` drgn object
:returns: total slabs, free objects
:returns: total slabs, free objects, corruption instances
"""
prog = cache.prog_
use_slab = _has_struct_slab(prog)

total_slabs = objects = free_objects = 0
slub_helper = _get_slab_cache_helper(cache)

# The "cpu_slab" variable is used by the slab helper to preload the percpu
# freelists. Not only does this duplicate work we're about to do, but also
# corrupt slab caches will crash this function before we can detect which
# CPU is corrupt. Pretend we have no "cpu_slab" variable when getting the
# helper. This depends on implementation details: we will improve the helper
# upstream to avoid this for the future.
slub_helper = _get_slab_cache_helper(_CpuSlubWrapper(cache))
corrupt = []

for cpuid in for_each_present_cpu(prog):
per_cpu_slab = per_cpu_ptr(cache.cpu_slab, cpuid)
Expand All @@ -237,15 +264,25 @@ def kmem_cache_slub_info(cache: Object) -> Tuple[int, int]:
objects = 0

free_objects += objects - page_inuse
cpu_free_objects = slub_get_cpu_freelist_cnt(cpu_freelist, slub_helper)
free_objects += cpu_free_objects

# Easily the most common form of corruption in the slab allocator comes
# from use after free, which overwrites the freelist pointer and causes
# a fault error. Catch this and report it for later.
try:
cpu_free_objects = slub_get_cpu_freelist_cnt(
cpu_freelist, slub_helper
)
except FaultError:
corrupt.append(cpuid)
else:
free_objects += cpu_free_objects

partial_frees = slub_per_cpu_partial_free(cpu_partial)
free_objects += partial_frees

total_slabs += 1

return total_slabs, free_objects
return total_slabs, free_objects, corrupt


def get_kmem_cache_slub_info(cache: Object) -> SlabCacheInfo:
Expand All @@ -255,7 +292,7 @@ def get_kmem_cache_slub_info(cache: Object) -> SlabCacheInfo:
:param cache: ``struct kmem_cache`` drgn object
:returns: a :class:`SlabCacheInfo` with statistics about the cache
"""
total_slabs, free_objects = kmem_cache_slub_info(cache)
total_slabs, free_objects, corrupt = kmem_cache_slub_info(cache)
(
nr_slabs,
nr_total_objs,
Expand All @@ -280,6 +317,7 @@ def get_kmem_cache_slub_info(cache: Object) -> SlabCacheInfo:
total_slabs,
ssize,
cache.name.string_().decode("utf-8"),
corrupt,
)


Expand All @@ -296,19 +334,42 @@ def print_slab_info(prog: Program) -> None:
"NAME",
]
)
corruption = []
for cache in for_each_slab_cache(prog):
slabinfo = get_kmem_cache_slub_info(cache)
maybe_asterisk = ""
if slabinfo.freelist_corrupt_cpus:
maybe_asterisk = "*"
corruption.append(slabinfo)
table.row(
slabinfo.cache.value_(),
slabinfo.objsize,
slabinfo.allocated,
f"{slabinfo.allocated}{maybe_asterisk}",
slabinfo.total,
slabinfo.nr_slabs,
f"{int(slabinfo.ssize / 1024)}k",
slabinfo.name,
)
table.write()

if corruption:
if prog.flags & ProgramFlags.IS_LIVE:
print(
"NOTE: freelist corruption was detected. This is not "
"necessarily an error, as live systems may encounter race "
"conditions."
)
else:
print(
"WARNING: freelist corruption was detected. It is likely that "
"a use-after-free bug occurred."
)
table = FixedTable(["CACHE:<24s", "CORRUPT CPUS"])
for slabinfo in corruption:
cpus = ", ".join(map(str, slabinfo.freelist_corrupt_cpus))
table.row(slabinfo.name, cpus)
table.write()


class SlabInfo(CorelensModule):
"""Print info about each slab cache"""
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

long_description = "drgn helper script repository"

RELEASE_VERSION = "0.9.1"
RELEASE_VERSION = "1.1.0"
PACKAGES = ["drgn_tools"]
if not os.environ.get("DRGN_TOOLS_V2_OMIT"):
PACKAGES.append("drgn_tools.v2")
Expand Down

0 comments on commit 68765a7

Please sign in to comment.