Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update thinc.ai to v8.1.1 #762

Merged
merged 22 commits into from
Sep 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
2da00a3
Fix typo
cclauss Jul 13, 2022
b4b37ce
Merge pull request #727 from cclauss/patch-1
polm Jul 14, 2022
40c129f
Update build constraints for arm64 and aarch64 wheels (#716)
adrianeboyd Jul 18, 2022
5a4f868
Ops: replace FloatsType by constrained typevar (#720)
danieldk Jul 28, 2022
8e5c743
Unroll `argmax` in `maxout` for small sizes of `P` (#702)
danieldk Jul 28, 2022
42b73c9
Change Docker image tag to thinc-ai (#732)
danieldk Aug 3, 2022
69a280f
Add `with_signpost_interval` layer (#711)
danieldk Aug 3, 2022
1846855
Docs: Fix/update `label_smoothing` description, run prettier (#733)
shadeMe Aug 4, 2022
af0e3de
Add Dish activation (#719)
danieldk Aug 4, 2022
7fcdd0f
Auto-format code with black (#737)
github-actions[bot] Aug 5, 2022
d95b5fc
Increment `blis` version upper-bound to `0.10.0` (#736)
shadeMe Aug 5, 2022
01eb6b7
asarrayDf: take `Sequence[float]`, not `Sequence[int]` (#739)
danieldk Aug 5, 2022
a43635e
Use confection for configurations (#745)
rmitsch Aug 26, 2022
eda4c75
`PyTorchGradScaler`: Cache `_found_inf` on the CPU (#746)
shadeMe Aug 29, 2022
a7bbc48
More general remap_ids (#726)
kadarakos Sep 2, 2022
102d654
Auto-format code with black (#753)
github-actions[bot] Sep 5, 2022
fba3bf0
Switch to macos-latest (#755)
adrianeboyd Sep 6, 2022
fc323e1
`util`: Explicitly call `__dlpack__` built-in method in `xp2tensorflo…
shadeMe Sep 7, 2022
9836e9e
Set version to 8.1.1 (#758)
danieldk Sep 9, 2022
cda32eb
Merge remote-tracking branch 'upstream/master' into thinc.ai-v8.1.1
danieldk Sep 9, 2022
97a1a04
Remove references to FastAPI being an Explosion product (#761)
rmitsch Sep 9, 2022
2a4985b
Merge remote-tracking branch 'upstream/master' into thinc.ai-v8.1.1
danieldk Sep 9, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# Thinc: A refreshing functional take on deep learning, compatible with your favorite libraries

### From the makers of [spaCy](https://spacy.io), [Prodigy](https://prodi.gy) and [FastAPI](https://fastapi.tiangolo.com)
### From the makers of [spaCy](https://spacy.io) and [Prodigy](https://prodi.gy)

[Thinc](https://thinc.ai) is a **lightweight deep learning library** that offers an elegant,
type-checked, functional-programming API for **composing models**, with support
Expand Down
2 changes: 1 addition & 1 deletion azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
imageName: 'windows-2019'
python.version: '3.6'
Python37Mac:
imageName: 'macos-10.15'
imageName: 'macos-latest'
python.version: '3.7'
Python38Linux:
imageName: 'ubuntu-latest'
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ requires = [
"murmurhash>=1.0.2,<1.1.0",
"cymem>=2.0.2,<2.1.0",
"preshed>=3.0.2,<3.1.0",
"blis>=0.7.8,<0.8.0",
"blis>=0.7.8,<0.10.0",
"numpy>=1.15.0",
]
build-backend = "setuptools.build_meta"
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
murmurhash>=1.0.2,<1.1.0
cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0
blis>=0.7.8,<0.8.0
blis>=0.7.8,<0.10.0
srsly>=2.4.0,<3.0.0
wasabi>=0.8.1,<1.1.0
catalogue>=2.0.4,<2.1.0
Expand Down Expand Up @@ -34,3 +34,4 @@ nbformat>=5.0.4,<5.2.0
# Test to_disk/from_disk against pathlib.Path subclasses
pathy>=0.3.5
black>=22.0,<23.0
confection>=0.0.1,<1.0.0
5 changes: 3 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,17 @@ setup_requires =
cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0
murmurhash>=1.0.2,<1.1.0
blis>=0.7.8,<0.8.0
blis>=0.7.8,<0.10.0
install_requires =
# Explosion-provided dependencies
blis>=0.7.8,<0.8.0
blis>=0.7.8,<0.10.0
murmurhash>=1.0.2,<1.1.0
cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0
wasabi>=0.8.1,<1.1.0
srsly>=2.4.0,<3.0.0
catalogue>=2.0.4,<2.1.0
confection>=0.0.1,<1.0.0
# Third-party dependencies
setuptools
numpy>=1.15.0
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
]
COMPILE_OPTIONS = {
"msvc": ["/Ox", "/EHsc"],
"other": ["-O3", "-Wno-strict-prototypes", "-Wno-unused-function"],
"other": ["-O3", "-Wno-strict-prototypes", "-Wno-unused-function", "-std=c++11"],
}
COMPILER_DIRECTIVES = {
"language_level": -3,
Expand Down
2 changes: 1 addition & 1 deletion thinc/about.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__version__ = "8.1.0"
__version__ = "8.1.1"
__release__ = True
3 changes: 2 additions & 1 deletion thinc/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from .layers import CauchySimilarity, ParametricAttention, Logistic
from .layers import resizable, sigmoid_activation, Sigmoid, SparseLinear
from .layers import ClippedLinear, ReluK, HardTanh, HardSigmoid
from .layers import HardSwish, HardSwishMobilenet, Swish, Gelu
from .layers import Dish, HardSwish, HardSwishMobilenet, Swish, Gelu
from .layers import PyTorchWrapper, PyTorchRNNWrapper, PyTorchLSTM
from .layers import TensorFlowWrapper, keras_subclass, MXNetWrapper
from .layers import PyTorchWrapper_v2, Softmax_v2
Expand All @@ -40,6 +40,7 @@
from .layers import with_reshape, with_getitem, strings2arrays, list2array
from .layers import list2ragged, ragged2list, list2padded, padded2list, remap_ids
from .layers import array_getitem, with_cpu, with_debug, with_nvtx_range
from .layers import with_signpost_interval
from .layers import tuplify

from .layers import reduce_first, reduce_last, reduce_max, reduce_mean, reduce_sum
Expand Down
31 changes: 31 additions & 0 deletions thinc/backends/_custom_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,20 @@ __global__ void clipped_linear(T* Y, const T* X, double slope, double offset, do
}


template <typename T>
__global__ void dish(T* Y, const T* X, int N)
{
int _loop_start = blockIdx.x * blockDim.x + threadIdx.x;
int _loop_stride = blockDim.x * gridDim.x;

for (int i = _loop_start; i < N; i += _loop_stride)
{
T x = X[i];
Y[i] = 0.5 * x * (x / sqrt(1 + x * x) + 1);
}
}


template <typename T>
__global__ void gelu(T* Y, const T* X, double threshold, int N)
{
Expand Down Expand Up @@ -414,6 +428,23 @@ __global__ void backprop_hard_swish_mobilenet(T* dX, const T* dY, const T* X, in
}


template <typename T>
__global__ void backprop_dish(T* dX, const T* dY, const T* X, int N)
{

int _loop_start = blockIdx.x * blockDim.x + threadIdx.x;
int _loop_stride = blockDim.x * gridDim.x;

for (int i = _loop_start; i < N; i += _loop_stride)
{
T x = X[i];
T x_sq = x * x;
T x_sq_plus_one = x_sq + 1.0;
dX[i] = dY[i] * (x/sqrt(x_sq_plus_one) - (0.5 * x * x_sq)
/ pow(x_sq_plus_one, static_cast<T>(1.5)) + 0.5);
}
}


template <typename T>
__global__ void backprop_gelu(T* dX, const T* dY, const T* X,
Expand Down
48 changes: 48 additions & 0 deletions thinc/backends/_custom_kernels.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
KERNELS_LIST = [
"backprop_clipped_linear<double>",
"backprop_clipped_linear<float>",
"backprop_dish<double>",
"backprop_dish<float>",
"backprop_gelu<double>",
"backprop_gelu<float>",
"backprop_hard_swish<double>",
Expand All @@ -32,6 +34,8 @@
"backprop_swish<float>",
"clipped_linear<double>",
"clipped_linear<float>",
"dish<double>",
"dish<float>",
"gather_add<double>",
"gather_add<float>",
"gelu<double>",
Expand Down Expand Up @@ -78,6 +82,8 @@ def compile_mmh(src):

clipped_linear_kernel_float = _get_kernel("clipped_linear<float>")
clipped_linear_kernel_double = _get_kernel("clipped_linear<double>")
dish_kernel_float = _get_kernel("dish<float>")
dish_kernel_double = _get_kernel("dish<double>")
gather_add_kernel_float = _get_kernel("gather_add<float>")
gather_add_kernel_double = _get_kernel("gather_add<double>")
gelu_kernel_float = _get_kernel("gelu<float>")
Expand All @@ -98,6 +104,8 @@ def compile_mmh(src):

backprop_clipped_linear_kernel_double = _get_kernel("backprop_clipped_linear<double>")
backprop_clipped_linear_kernel_float = _get_kernel("backprop_clipped_linear<float>")
backprop_dish_kernel_double = _get_kernel("backprop_dish<double>")
backprop_dish_kernel_float = _get_kernel("backprop_dish<float>")
backprop_gelu_kernel_double = _get_kernel("backprop_gelu<double>")
backprop_gelu_kernel_float = _get_kernel("backprop_gelu<float>")
backprop_hard_swish_kernel_double = _get_kernel("backprop_hard_swish<double>")
Expand Down Expand Up @@ -199,6 +207,19 @@ def gather_add(table, indices, *, threads_per_block=128, num_blocks=128):
return out


def dish(X, *, inplace=False, threads_per_block=128, num_blocks=128):
_is_float_array(X)

out = X
if not inplace:
out = _alloc_like(X, zeros=False)
if X.dtype == "float32":
dish_kernel_float((num_blocks,), (threads_per_block,), (out, X, X.size))
else:
dish_kernel_double((num_blocks,), (threads_per_block,), (out, X, X.size))
return out


def gelu(X, *, inplace=False, threshold=6.0, threads_per_block=128, num_blocks=128):
_is_float_array(X)

Expand Down Expand Up @@ -483,6 +504,33 @@ def backprop_hard_swish_mobilenet(
return out


def backprop_dish(
dY,
X,
*,
inplace: bool = False,
threads_per_block=128,
num_blocks=128,
):
_is_float_array(dY)
_is_float_array(X, shape=dY.shape)

out = dY
if not inplace:
out = _alloc_like(dY, zeros=False)

if dY.dtype == "float32":
backprop_dish_kernel_float(
(num_blocks,), (threads_per_block,), (out, dY, X, out.size)
)
else:
backprop_dish_kernel_double(
(num_blocks,), (threads_per_block,), (out, dY, X, out.size)
)

return out


def backprop_gelu(
dY,
X,
Expand Down
74 changes: 64 additions & 10 deletions thinc/backends/cpu_kernels.hh
Original file line number Diff line number Diff line change
Expand Up @@ -27,23 +27,58 @@ struct axpy {

// All elementwise functions, such as most activations, work in-place.

template <typename A, typename L>
L argmax(A* arr, L len)

template <typename T, typename L>
struct argmax_result {
T max;
L max_idx;
};

template <typename T, typename L>
argmax_result<T, L> argmax(T const *arr, L len)
{
static_assert(std::is_floating_point<A>::value,
static_assert(std::is_floating_point<T>::value,
"Array should be floating point");
static_assert(std::is_integral<L>::value, "Array length should be integral");

L max = 0;
argmax_result<T, L> r { arr[0], 0 };

for (L i = 1; i < len; ++i) {
if (arr[i] > arr[max]) {
max = i;
if (arr[i] > r.max) {
r.max = arr[i];
r.max_idx = i;
}
}

return max;
return r;
}

// The next two templates define argmax for a fixed number of elements.

template <typename T, typename L>
argmax_result<T, L> argmax(T a) {
static_assert(std::is_floating_point<T>::value, "Argument should be floating point");
argmax_result<T, L> acc { a, 0 };
return acc;
}

template<typename T, typename L, typename... Args>
argmax_result<T, L> argmax(T a, Args... args) {
static_assert(std::is_floating_point<T>::value, "Arguments should be floating point");

auto acc = argmax<T, L>(args...);

if (acc.max > a) {
acc.max_idx += 1;
} else {
acc.max_idx = 0;
acc.max = a;
}

return acc;
}


template <typename A, typename L>
void vec_add(A* X, const A* Y, A scale, L N)
{
Expand All @@ -62,12 +97,31 @@ void cpu_maxout(A* best__bo, L* which__bo, const A* cands__bop, L B, L O, L P)
"Array should be floating point");
static_assert(std::is_integral<L>::value, "Array length should be integral");

for (int i = 0; i < B * O; ++i) {
which__bo[i] = argmax(cands__bop + i * P, P);
best__bo[i] = cands__bop[i * P + which__bo[i]];
// For small inputs, we use an unrolled argmax.
if (P == 2) {
for (int i = 0; i < B * O; ++i) {
A const *input = cands__bop + i * P;
auto r = argmax<A, L>(input[0], input[1]);
which__bo[i] = r.max_idx;
best__bo[i] = r.max;
}
} else if (P == 3) {
for (int i = 0; i < B * O; ++i) {
A const *input = cands__bop + i * P;
auto r = argmax<A, L>(input[0], input[1], input[2]);
which__bo[i] = r.max_idx;
best__bo[i] = r.max;
}
} else {
for (int i = 0; i < B * O; ++i) {
auto r = argmax<A, L>(cands__bop + i * P, P);
which__bo[i] = r.max_idx;
best__bo[i] = r.max;
}
}
}


template <typename A, typename L>
void cpu_backprop_maxout(A* dX__bop, const A* dX__bo, const L* which__bo,
L B, L O, L P)
Expand Down
12 changes: 12 additions & 0 deletions thinc/backends/cupy_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,18 @@ def gather_add(self, table, indices):
else:
return super().gather_add(table, indices)

def dish(self, X, inplace=False):
if X.dtype in ("float32", "float64"):
return _custom_kernels.dish(X, inplace=inplace)
else:
return super().dish(X, inplace=inplace)

def backprop_dish(self, dY, X, inplace=False):
if X.dtype == dY.dtype and X.dtype in ("float32", "float64"):
return _custom_kernels.backprop_dish(dY, X, inplace=inplace)
else:
return super().backprop_dish(dY, X, inplace=inplace)

def gelu(self, X, inplace=False):
if X.dtype in ("float32", "float64"):
return _custom_kernels.gelu(X, inplace=inplace, threshold=6.0)
Expand Down
Loading