Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

release gil in pixelize_cylinder #5018

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 66 additions & 59 deletions yt/utilities/lib/pixelization_routines.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ cdef extern from "pixelization_constants.hpp":
int WEDGE_NF
np.uint8_t wedge_face_defs[MAX_NUM_FACES][2][2]

cdef extern from "numpy/npy_math.h":
double NPY_PI

@cython.cdivision(True)
@cython.boundscheck(False)
Expand Down Expand Up @@ -566,6 +568,7 @@ def pixelize_cylinder(np.float64_t[:,:] buff,
cdef np.float64_t r_inc, theta_inc
cdef np.float64_t costheta, sintheta
cdef int i, i1, pi, pj
cdef np.float64_t twoPI = 2 * NPY_PI

cdef int imin, imax
imin = np.asarray(radius).argmin()
Expand All @@ -577,7 +580,6 @@ def pixelize_cylinder(np.float64_t[:,:] buff,
imax = np.asarray(theta).argmax()
tmin = theta[imin] - dtheta[imin]
tmax = theta[imax] + dtheta[imax]

cdef np.ndarray[np.uint8_t, ndim=2] mask_arr = np.zeros_like(buff, dtype="uint8")
cdef np.uint8_t[:, :] mask = mask_arr

Expand Down Expand Up @@ -611,65 +613,70 @@ def pixelize_cylinder(np.float64_t[:,:] buff,
rbounds[0] = 0.0
r_inc = 0.5 * fmin(dx, dy)

for i in range(radius.shape[0]):
r0 = radius[i]
theta0 = theta[i]
dr_i = dradius[i]
dtheta_i = dtheta[i]
# Skip out early if we're offsides, for zoomed in plots
if r0 + dr_i < rbounds[0] or r0 - dr_i > rbounds[1]:
continue
theta_i = theta0 - dtheta_i
theta_inc = r_inc / (r0 + dr_i)

while theta_i < theta0 + dtheta_i:
r_i = r0 - dr_i
costheta = math.cos(theta_i)
sintheta = math.sin(theta_i)
while r_i < r0 + dr_i:
if rmax <= r_i:
with nogil:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this a case where we'd want to use

for i in prange(radius.shape[0], nogil=True):
   pass

Copy link
Contributor Author

@chrishavlin chrishavlin Oct 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure! I think prange should work here and could give an extra performance boost. but i'll wait until the rest of the PR is actually working before I change it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok well using prange might be a bit more work than I want to put in right now -- lots of compilation errors along the lines of :

      yt/utilities/lib/pixelization_routines.pyx:635:20: Cannot read reduction variable in loop body
      
      Error compiling Cython file:
      ------------------------------------------------------------
      ...
                  while r_i < r0 + dr_i:
                      if rmax <= r_i:
                          r_i += r_inc
                          continue
                      y = r_i * costheta
                      x = r_i * sintheta
                          ^
      ------------------------------------------------------------
      
      yt/utilities/lib/pixelization_routines.pyx:636:20: Cannot read reduction variable in loop body

for i in range(radius.shape[0]):
r0 = radius[i]
theta0 = theta[i]
dr_i = dradius[i]
dtheta_i = dtheta[i]
# Skip out early if we're offsides, for zoomed in plots
if r0 + dr_i < rbounds[0] or r0 - dr_i > rbounds[1]:
continue
theta_i = theta0 - dtheta_i
theta_inc = r_inc / (r0 + dr_i)

while theta_i < theta0 + dtheta_i:
r_i = r0 - dr_i
costheta = math.cos(theta_i)
sintheta = math.sin(theta_i)
while r_i < r0 + dr_i:
if rmax <= r_i:
r_i += r_inc
continue
y = r_i * costheta
x = r_i * sintheta
pi = <int>((x - x0)/dx)
pj = <int>((y - y0)/dy)
if pi >= 0 and pi < buff.shape[0] and \
pj >= 0 and pj < buff.shape[1]:
# we got a pixel that intersects the grid cell
# now check that this pixel doesn't go beyond the data domain
xp = x0 + pi*dx
yp = y0 + pj*dy
corners[0] = xp*xp + yp*yp
corners[1] = xp*xp + (yp+dy)**2
corners[2] = (xp+dx)**2 + yp*yp
corners[3] = (xp+dx)**2 + (yp+dy)**2
prbounds[0] = prbounds[1] = corners[3]
for i1 in range(3):
prbounds[0] = fmin(prbounds[0], corners[i1])
prbounds[1] = fmax(prbounds[1], corners[i1])
prbounds[0] = math.sqrt(prbounds[0])
prbounds[1] = math.sqrt(prbounds[1])

corners[0] = math.atan2(xp, yp)
corners[1] = math.atan2(xp, yp+dy)
corners[2] = math.atan2(xp+dx, yp)
corners[3] = math.atan2(xp+dx, yp+dy)
ptbounds[0] = ptbounds[1] = corners[3]
for i1 in range(3):
ptbounds[0] = fmin(ptbounds[0], corners[i1])
ptbounds[1] = fmax(ptbounds[1], corners[i1])

# shift to a [0, PI] interval
ptbounds[0] = ptbounds[0] % twoPI
if ptbounds[0] < 0:
ptbounds[0] += NPY_PI
ptbounds[1] = ptbounds[1] % twoPI
if ptbounds[1] < 0:
ptbounds[1] += NPY_PI
Comment on lines +668 to +672
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so I'm not sure why I needed to add the check for negative numbers here, but after switching from ptbounds[1] % (2*np.pi) to ptbounds[1] % twoPI, the result returns numbers in the range (-pi,pi).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need to understand this bit, and possibly come up with a way to perform the computation that doesn't involve branching; hard-wiring a somewhat expensive calculation generally hurts performance less than ruining branch prediction.
I'm writing this comment early in my review but I'll try to come up with more insight.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm taking a bet that the reason why you're seeing a change in behavior is that the function is decorated with @cython.cdivision(True): on main the Python % is used, while with twoPI you're getting a C operator instead.

http://docs.cython.org/en/latest/src/tutorial/caveats.html

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you try this patch

diff --git a/yt/utilities/lib/pixelization_routines.pyx b/yt/utilities/lib/pixelization_routines.pyx
index 194ce7af6..6728375d0 100644
--- a/yt/utilities/lib/pixelization_routines.pyx
+++ b/yt/utilities/lib/pixelization_routines.pyx
@@ -664,12 +664,8 @@ def pixelize_cylinder(np.float64_t[:,:] buff,
                             ptbounds[1] = fmax(ptbounds[1], corners[i1])
 
                         # shift to a [0, PI] interval
-                        ptbounds[0] = ptbounds[0] % twoPI
-                        if ptbounds[0] < 0:
-                            ptbounds[0] += NPY_PI
-                        ptbounds[1] = ptbounds[1] % twoPI
-                        if ptbounds[1] < 0:
-                            ptbounds[1] += NPY_PI
+                        ptbounds[0] = (ptbounds[0]+twoPI) % twoPI
+                        ptbounds[1] = (ptbounds[1]+twoPI) % twoPI
 
                         if prbounds[0] >= rmin and prbounds[1] <= rmax and \
                            ptbounds[0] >= tmin and ptbounds[1] <= tmax:

the result should be the same in all cases, but it avoids branching, so I expect it to be faster.
If this works, we'll want to add a comment for why it's done this way.


if prbounds[0] >= rmin and prbounds[1] <= rmax and \
ptbounds[0] >= tmin and ptbounds[1] <= tmax:
buff[pi, pj] = field[i]
mask[pi, pj] = 1
r_i += r_inc
continue
y = r_i * costheta
x = r_i * sintheta
pi = <int>((x - x0)/dx)
pj = <int>((y - y0)/dy)
if pi >= 0 and pi < buff.shape[0] and \
pj >= 0 and pj < buff.shape[1]:
# we got a pixel that intersects the grid cell
# now check that this pixel doesn't go beyond the data domain
xp = x0 + pi*dx
yp = y0 + pj*dy
corners[0] = xp*xp + yp*yp
corners[1] = xp*xp + (yp+dy)**2
corners[2] = (xp+dx)**2 + yp*yp
corners[3] = (xp+dx)**2 + (yp+dy)**2
prbounds[0] = prbounds[1] = corners[3]
for i1 in range(3):
prbounds[0] = fmin(prbounds[0], corners[i1])
prbounds[1] = fmax(prbounds[1], corners[i1])
prbounds[0] = math.sqrt(prbounds[0])
prbounds[1] = math.sqrt(prbounds[1])

corners[0] = math.atan2(xp, yp)
corners[1] = math.atan2(xp, yp+dy)
corners[2] = math.atan2(xp+dx, yp)
corners[3] = math.atan2(xp+dx, yp+dy)
ptbounds[0] = ptbounds[1] = corners[3]
for i1 in range(3):
ptbounds[0] = fmin(ptbounds[0], corners[i1])
ptbounds[1] = fmax(ptbounds[1], corners[i1])

# shift to a [0, PI] interval
ptbounds[0] = ptbounds[0] % (2*np.pi)
ptbounds[1] = ptbounds[1] % (2*np.pi)

if prbounds[0] >= rmin and prbounds[1] <= rmax and \
ptbounds[0] >= tmin and ptbounds[1] <= tmax:
buff[pi, pj] = field[i]
mask[pi, pj] = 1
r_i += r_inc
theta_i += theta_inc
theta_i += theta_inc

if return_mask:
return mask_arr.astype("bool")
Expand Down
Loading