From b8d4b357d5b359daf9e7fce627ce68637a36d636 Mon Sep 17 00:00:00 2001
From: Difers <707065510@qq.com>
Date: Fri, 31 Mar 2023 01:48:41 +0000
Subject: [PATCH 1/8] add test+conv3d_transpose_part2

---
 .../gpudnn/conv_transpose_grad_kernel.cu      |   3 +-
 .../kernels/gpudnn/conv_transpose_kernel.cu   |   6 +-
 .../fluid/tests/unittests/test_conv3d_op.py   | 101 ++++++++++--
 .../unittests/test_conv3d_transpose_op.py     | 154 +++++++++++++++++-
 .../test_conv3d_transpose_part2_op.py         |  18 +-
 5 files changed, 261 insertions(+), 21 deletions(-)

diff --git a/paddle/phi/kernels/gpudnn/conv_transpose_grad_kernel.cu b/paddle/phi/kernels/gpudnn/conv_transpose_grad_kernel.cu
index 50bae0a8bca3e..ea25b795beb65 100644
--- a/paddle/phi/kernels/gpudnn/conv_transpose_grad_kernel.cu
+++ b/paddle/phi/kernels/gpudnn/conv_transpose_grad_kernel.cu
@@ -1070,7 +1070,8 @@ PD_REGISTER_KERNEL(conv3d_transpose_grad,
                    ALL_LAYOUT,
                    phi::Conv3dTransposeGradGPUDNNKernel,
                    float,
-                   float16) {}
+                   float16,
+                   phi::dtype::bfloat16) {}
 #else
 #if CUDNN_VERSION_MIN(8, 1, 0)
 PD_REGISTER_KERNEL(conv2d_transpose_grad,
diff --git a/paddle/phi/kernels/gpudnn/conv_transpose_kernel.cu b/paddle/phi/kernels/gpudnn/conv_transpose_kernel.cu
index df360ab388a6d..56e9212f0b60d 100644
--- a/paddle/phi/kernels/gpudnn/conv_transpose_kernel.cu
+++ b/paddle/phi/kernels/gpudnn/conv_transpose_kernel.cu
@@ -366,7 +366,8 @@ PD_REGISTER_KERNEL(conv3d_transpose,
                    ALL_LAYOUT,
                    phi::Conv3dTransposeGPUDNNKernel,
                    float,
-                   float16) {}
+                   float16,
+                   phi::dtype::bfloat16) {}
 #else
 #if CUDNN_VERSION_MIN(8, 1, 0)
 PD_REGISTER_KERNEL(conv2d_transpose,
@@ -375,8 +376,7 @@ PD_REGISTER_KERNEL(conv2d_transpose,
                    phi::Conv2dTransposeGPUDNNKernel,
                    float,
                    double,
-                   float16,
-                   phi::dtype::bfloat16) {}
+                   float16) {}
 PD_REGISTER_KERNEL(conv3d_transpose,
                    GPUDNN,
                    ALL_LAYOUT,
diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_op.py b/python/paddle/fluid/tests/unittests/test_conv3d_op.py
index 0b843663827c1..d11c6dea7b6ad 100644
--- a/python/paddle/fluid/tests/unittests/test_conv3d_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv3d_op.py
@@ -15,7 +15,7 @@
 import unittest
 
 import numpy as np
-from eager_op_test import OpTest, paddle_static_guard
+from eager_op_test import OpTest, convert_float_to_uint16, paddle_static_guard
 
 import paddle
 from paddle.fluid import core
@@ -179,6 +179,70 @@ def init_kernel_type(self):
     globals()[cls_name] = TestCUDNNCase
 
 
+def create_test_cudnn_bf16_class(parent):
+    @unittest.skipIf(
+        not core.is_compiled_with_cuda()
+        or not core.is_bfloat16_supported(core.CUDAPlace(0)),
+        "core is not compiled with CUDA and do not support bfloat16",
+    )
+    class TestConv3DCUDNNBF16(parent):
+        def init_kernel_type(self):
+            self.use_cudnn = True
+            self.no_need_check_grad = False
+            self.dtype = np.uint16
+
+        def test_check_output(self):
+            # TODO(wangzhongpu): support mkldnn op in dygraph mode
+            place = core.CUDAPlace(0) if self.has_cudnn() else core.CPUPlace()
+            self.check_output_with_place(
+                place, check_dygraph=(not self.use_mkldnn)
+            )
+
+        def test_check_grad(self):
+            if hasattr(self, "no_need_check_grad") and self.no_need_check_grad:
+                return
+            place = core.CUDAPlace(0) if self.has_cudnn() else core.CPUPlace()
+            # TODO(wangzhongpu): support mkldnn op in dygraph mode
+            self.check_grad_with_place(
+                place,
+                {'Input', 'Filter'},
+                'Output',
+                check_dygraph=(not self.use_mkldnn),
+            )
+
+        def test_check_grad_no_filter(self):
+            if hasattr(self, "no_need_check_grad") and self.no_need_check_grad:
+                return
+            place = core.CUDAPlace(0) if self.has_cudnn() else core.CPUPlace()
+            # TODO(wangzhongpu): support mkldnn op in dygraph mode
+            self.check_grad_with_place(
+                place,
+                ['Input'],
+                'Output',
+                # max_relative_error=0.03,
+                no_grad_set={'Filter'},
+                check_dygraph=(not self.use_mkldnn),
+            )
+
+        def test_check_grad_no_input(self):
+            if hasattr(self, "no_need_check_grad") and self.no_need_check_grad:
+                return
+            place = core.CUDAPlace(0) if self.has_cudnn() else core.CPUPlace()
+            # TODO(wangzhongpu): support mkldnn op in dygraph mode
+            self.check_grad_with_place(
+                place,
+                ['Filter'],
+                'Output',
+                # max_relative_error=0.03,
+                no_grad_set={'Input'},
+                check_dygraph=(not self.use_mkldnn),
+            )
+
+    cls_name = "{0}_{1}".format(parent.__name__, "CUDNNBF16OP")
+    TestConv3DCUDNNBF16.__name__ = cls_name
+    globals()[cls_name] = TestConv3DCUDNNBF16
+
+
 def create_test_padding_SAME_class(parent):
     class TestPaddingSMAECase(parent):
         def init_paddings(self):
@@ -332,10 +396,20 @@ def setUp(self):
             conv3d_param,
         ).astype(self.dtype)
 
-        self.inputs = {
-            'Input': OpTest.np_dtype_to_fluid_dtype(input),
-            'Filter': OpTest.np_dtype_to_fluid_dtype(filter),
-        }
+        if self.is_bfloat16_op():
+            output = output.astype(np.float32)
+            self.inputs = {
+                'Input': convert_float_to_uint16(input),
+                'Filter': convert_float_to_uint16(filter),
+            }
+
+        else:
+            output = output.astype(self.dtype)
+            self.inputs = {
+                'Input': OpTest.np_dtype_to_fluid_dtype(input),
+                'Filter': OpTest.np_dtype_to_fluid_dtype(filter),
+            }
+
         self.attrs = {
             'strides': self.stride,
             'paddings': self.pad,
@@ -358,8 +432,6 @@ def test_check_output(self):
         )
 
     def test_check_grad(self):
-        if self.dtype == np.float16:
-            return
         place = core.CUDAPlace(0) if self.has_cudnn() else core.CPUPlace()
         # TODO(wangzhongpu): support mkldnn op in dygraph mode
         self.check_grad_with_place(
@@ -371,8 +443,7 @@ def test_check_grad(self):
         )
 
     def test_check_grad_no_filter(self):
-        if self.dtype == np.float16:
-            return
+
         place = core.CUDAPlace(0) if self.has_cudnn() else core.CPUPlace()
         # TODO(wangzhongpu): support mkldnn op in dygraph mode
         self.check_grad_with_place(
@@ -385,8 +456,7 @@ def test_check_grad_no_filter(self):
         )
 
     def test_check_grad_no_input(self):
-        if self.dtype == np.float16:
-            return
+
         place = core.CUDAPlace(0) if self.has_cudnn() else core.CPUPlace()
         # TODO(wangzhongpu): support mkldnn op in dygraph mode
         self.check_grad_with_place(
@@ -617,6 +687,14 @@ def init_kernel_type(self):
         self.dtype = np.float32 if core.is_compiled_with_rocm() else np.float64
 
 
+# ----------------Conv3DCUDNN bf16----------------
+create_test_cudnn_bf16_class(TestConv3DOp)
+create_test_cudnn_bf16_class(TestWithGroup1)
+create_test_cudnn_bf16_class(TestWithGroup2)
+create_test_cudnn_bf16_class(TestWith1x1)
+create_test_cudnn_bf16_class(TestWithInput1x1Filter1x1)
+
+
 # ---- test asymmetric padding ----
 
 
@@ -1114,4 +1192,5 @@ def run_8():
 
 
 if __name__ == '__main__':
+
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_op.py b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_op.py
index 22e22d9b2f66a..dd7b8283be5c3 100644
--- a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_op.py
@@ -19,9 +19,10 @@
 import paddle
 
 paddle.enable_static()
-from eager_op_test import OpTest
+from eager_op_test import OpTest, convert_float_to_uint16, get_numeric_gradient
 
 from paddle.fluid import core
+from paddle.fluid.tests.unittests.testsuite import create_op
 
 
 def conv3dtranspose_forward_naive(input_, filter_, attrs):
@@ -134,6 +135,101 @@ def _get_padding_with_SAME(input_shape, kernel_size, kernel_stride):
     return out
 
 
+def create_test_cudnn_fp16_class(parent, grad_check=True):
+    @unittest.skipIf(
+        not core.is_compiled_with_cuda(), "core is not compiled with CUDA"
+    )
+    class TestConv3DCUDNNFP16(parent):
+        def init_kernel_type(self):
+            self.use_cudnn = True
+            self.dtype = np.float16
+
+        def test_check_output(self):
+            if core.is_compiled_with_cuda():
+                place = core.CUDAPlace(0)
+                if core.is_float16_supported(place):
+                    self.check_output_with_place(place, atol=2e-2)
+
+        def test_check_grad_no_filter(self):
+            place = core.CUDAPlace(0)
+            if core.is_float16_supported(place) and grad_check:
+                self.check_grad_with_place(
+                    place, ['Input'], 'Output', no_grad_set={'Filter'}
+                )
+
+        def test_check_grad_no_input(self):
+            place = core.CUDAPlace(0)
+            if core.is_float16_supported(place) and grad_check:
+                self.check_grad_with_place(
+                    place, ['Filter'], 'Output', no_grad_set={'Input'}
+                )
+
+    cls_name = "{0}_{1}".format(parent.__name__, "CUDNNFP16OP")
+    TestConv3DCUDNNFP16.__name__ = cls_name
+    globals()[cls_name] = TestConv3DCUDNNFP16
+
+
+def create_test_cudnn_bf16_class(parent):
+    @unittest.skipIf(
+        not core.is_compiled_with_cuda()
+        or not core.is_bfloat16_supported(core.CUDAPlace(0)),
+        "core is not compiled with CUDA and do not support bfloat16",
+    )
+    class TestConv3DCUDNNBF16(parent):
+        def get_numeric_grad(self, place, check_name):
+            scope = core.Scope()
+            self._check_grad_helper()
+            op = create_op(
+                scope, self.op_type, self.inputs, self.outputs, self.attrs
+            )
+            return get_numeric_gradient(
+                place, scope, op, self.inputs_fp32, check_name, ['Output']
+            )
+
+        def init_kernel_type(self):
+            self.use_cudnn = True
+            self.dtype = np.uint16
+
+        def test_check_output(self):
+            place = core.CUDAPlace(0)
+            self.check_output_with_place(place)
+
+        def test_check_grad_no_filter(self):
+            place = core.CUDAPlace(0)
+            numeric_grads = self.get_numeric_grad(place, 'Input')
+            self.check_grad_with_place(
+                place,
+                ['Input'],
+                'Output',
+                no_grad_set={'Filter'},
+                user_defined_grads=[numeric_grads],
+            )
+
+        def test_check_grad(self):
+            if self.use_cudnn:
+                place = core.CUDAPlace(0)
+                self.check_grad_with_place(
+                    place,
+                    {'Input', 'Filter'},
+                    'Output',
+                )
+
+        def test_check_grad_no_input(self):
+            place = core.CUDAPlace(0)
+            numeric_grads = self.get_numeric_grad(place, 'Filter')
+            self.check_grad_with_place(
+                place,
+                ['Filter'],
+                'Output',
+                no_grad_set={'Input'},
+                user_defined_grads=[numeric_grads],
+            )
+
+    cls_name = "{0}_{1}".format(parent.__name__, "CUDNNBF16OP")
+    TestConv3DCUDNNBF16.__name__ = cls_name
+    globals()[cls_name] = TestConv3DCUDNNBF16
+
+
 def conv3d_transpose_wrapper(
     x,
     weight,
@@ -172,12 +268,16 @@ def setUp(self):
         self.pad = [0, 0, 0]
         self.padding_algorithm = "EXPLICIT"
         self.init_op_type()
+        self.init_kernel_type()
         self.init_test_case()
 
-        input_ = np.random.random(self.input_size).astype("float32")
-        filter_ = np.random.random(self.filter_size).astype("float32")
+        if self.is_bfloat16_op():
+            input = np.random.random(self.input_size).astype(np.float32)
+            filter = np.random.random(self.filter_size).astype(np.float32)
+        else:
+            input = np.random.random(self.input_size).astype(self.dtype)
+            filter = np.random.random(self.filter_size).astype(self.dtype)
 
-        self.inputs = {'Input': input_, 'Filter': filter_}
         self.attrs = {
             'strides': self.stride,
             'paddings': self.pad,
@@ -189,9 +289,26 @@ def setUp(self):
         }
 
         output = conv3dtranspose_forward_naive(
-            input_, filter_, self.attrs
+            input, filter, self.attrs
         ).astype("float32")
 
+        if self.is_bfloat16_op():
+            output = output.astype(np.float32)
+            self.inputs = {
+                'Input': convert_float_to_uint16(input),
+                'Filter': convert_float_to_uint16(filter),
+            }
+            self.inputs_fp32 = {
+                'Input': input,
+                'Filter': filter,
+            }
+        else:
+            output = output.astype(self.dtype)
+            self.inputs = {
+                'Input': input,
+                'Filter': filter,
+            }
+
         self.outputs = {'Output': output}
 
     def test_check_output(self):
@@ -264,6 +381,9 @@ def init_op_type(self):
         self.op_type = "conv3d_transpose"
         self.python_api = conv3d_transpose_wrapper
 
+    def init_kernel_type(self):
+        self.dtype = np.float32
+
 
 class TestWithSymmetricPad(TestConv3DTransposeOp):
     def init_test_case(self):
@@ -596,6 +716,30 @@ def init_op_type(self):
         self.python_api = conv3d_transpose_wrapper
 
 
+# ----------------Conv3DTransposeCUDNN fp16----------------
+create_test_cudnn_fp16_class(TestConv3DTransposeOp)
+create_test_cudnn_fp16_class(TestWithSymmetricPad)
+create_test_cudnn_fp16_class(TestWithAsymmetricPad)
+create_test_cudnn_fp16_class(TestWithSAMEPad)
+create_test_cudnn_fp16_class(TestWithVALIDPad)
+create_test_cudnn_fp16_class(TestWithStride)
+create_test_cudnn_fp16_class(TestWithGroups)
+create_test_cudnn_fp16_class(TestWithDilation)
+create_test_cudnn_fp16_class(Test_NHWC)
+
+
+# ----------------Conv3DTransposeCUDNN bf16----------------
+create_test_cudnn_bf16_class(TestConv3DTransposeOp)
+create_test_cudnn_bf16_class(TestWithSymmetricPad)
+create_test_cudnn_bf16_class(TestWithAsymmetricPad)
+create_test_cudnn_bf16_class(TestWithSAMEPad)
+create_test_cudnn_bf16_class(TestWithVALIDPad)
+create_test_cudnn_bf16_class(TestWithStride)
+create_test_cudnn_bf16_class(TestWithGroups)
+create_test_cudnn_bf16_class(TestWithDilation)
+create_test_cudnn_bf16_class(Test_NHWC)
+
+
 class TestConv3dTranspose(unittest.TestCase):
     def error_weight_input(self):
         array = np.array([1], dtype=np.float32)
diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_part2_op.py b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_part2_op.py
index b51cdd9b1087a..ac638867338fc 100644
--- a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_part2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_part2_op.py
@@ -15,7 +15,7 @@
 import unittest
 
 import numpy as np
-from test_conv3d_transpose_op import TestConv3DTransposeOp
+from test_conv3d_transpose_op import TestConv3DTransposeOp, create_test_cudnn_fp16_class, create_test_cudnn_bf16_class
 
 import paddle
 from paddle import fluid
@@ -84,6 +84,22 @@ def init_test_case(self):
         self.data_format = 'NHWC'
 
 
+# ----------------Conv3DTransposeCUDNN fp16----------------
+create_test_cudnn_fp16_class(TestWithSymmetricPad_NHWC)
+create_test_cudnn_fp16_class(TestWithAsymmetricPad_NHWC)
+create_test_cudnn_fp16_class(TestWithGroups_NHWC)
+create_test_cudnn_fp16_class(TestWithStride_NHWC)
+create_test_cudnn_fp16_class(TestWithDilation_NHWC)
+
+
+# ----------------Conv3DTransposeCUDNN bf16----------------
+create_test_cudnn_bf16_class(TestWithSymmetricPad_NHWC)
+create_test_cudnn_bf16_class(TestWithAsymmetricPad_NHWC)
+create_test_cudnn_bf16_class(TestWithGroups_NHWC)
+create_test_cudnn_bf16_class(TestWithStride_NHWC)
+create_test_cudnn_bf16_class(TestWithDilation_NHWC)
+
+
 class TestConv3DTransposeAPI(unittest.TestCase):
     def test_case1(self):
         data1 = paddle.static.data(

From 5172c6c8919ac6044fb53684dfe005c6160d4bb1 Mon Sep 17 00:00:00 2001
From: Difers <707065510@qq.com>
Date: Fri, 31 Mar 2023 02:13:02 +0000
Subject: [PATCH 2/8] fix some merge error

---
 paddle/phi/kernels/gpudnn/conv_transpose_grad_kernel.cu | 3 +--
 paddle/phi/kernels/gpudnn/conv_transpose_kernel.cu      | 6 +++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/paddle/phi/kernels/gpudnn/conv_transpose_grad_kernel.cu b/paddle/phi/kernels/gpudnn/conv_transpose_grad_kernel.cu
index ea25b795beb65..50bae0a8bca3e 100644
--- a/paddle/phi/kernels/gpudnn/conv_transpose_grad_kernel.cu
+++ b/paddle/phi/kernels/gpudnn/conv_transpose_grad_kernel.cu
@@ -1070,8 +1070,7 @@ PD_REGISTER_KERNEL(conv3d_transpose_grad,
                    ALL_LAYOUT,
                    phi::Conv3dTransposeGradGPUDNNKernel,
                    float,
-                   float16,
-                   phi::dtype::bfloat16) {}
+                   float16) {}
 #else
 #if CUDNN_VERSION_MIN(8, 1, 0)
 PD_REGISTER_KERNEL(conv2d_transpose_grad,
diff --git a/paddle/phi/kernels/gpudnn/conv_transpose_kernel.cu b/paddle/phi/kernels/gpudnn/conv_transpose_kernel.cu
index 56e9212f0b60d..df360ab388a6d 100644
--- a/paddle/phi/kernels/gpudnn/conv_transpose_kernel.cu
+++ b/paddle/phi/kernels/gpudnn/conv_transpose_kernel.cu
@@ -366,8 +366,7 @@ PD_REGISTER_KERNEL(conv3d_transpose,
                    ALL_LAYOUT,
                    phi::Conv3dTransposeGPUDNNKernel,
                    float,
-                   float16,
-                   phi::dtype::bfloat16) {}
+                   float16) {}
 #else
 #if CUDNN_VERSION_MIN(8, 1, 0)
 PD_REGISTER_KERNEL(conv2d_transpose,
@@ -376,7 +375,8 @@ PD_REGISTER_KERNEL(conv2d_transpose,
                    phi::Conv2dTransposeGPUDNNKernel,
                    float,
                    double,
-                   float16) {}
+                   float16,
+                   phi::dtype::bfloat16) {}
 PD_REGISTER_KERNEL(conv3d_transpose,
                    GPUDNN,
                    ALL_LAYOUT,

From 2bd3be2530ce7a91955697cf33f13b1ade9f5d88 Mon Sep 17 00:00:00 2001
From: Difers <707065510@qq.com>
Date: Fri, 31 Mar 2023 14:10:12 +0000
Subject: [PATCH 3/8] fix codestyle

---
 .../fluid/tests/unittests/test_conv3d_transpose_part2_op.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_part2_op.py b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_part2_op.py
index ac638867338fc..54d31a7bbd22b 100644
--- a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_part2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_part2_op.py
@@ -15,7 +15,11 @@
 import unittest
 
 import numpy as np
-from test_conv3d_transpose_op import TestConv3DTransposeOp, create_test_cudnn_fp16_class, create_test_cudnn_bf16_class
+from test_conv3d_transpose_op import (
+    TestConv3DTransposeOp,
+    create_test_cudnn_bf16_class,
+    create_test_cudnn_fp16_class,
+)
 
 import paddle
 from paddle import fluid

From 6f661ced2d98b2e2aadc87adc796832c0d057bd2 Mon Sep 17 00:00:00 2001
From: Difers <707065510@qq.com>
Date: Fri, 31 Mar 2023 14:14:05 +0000
Subject: [PATCH 4/8] fix typo

---
 python/paddle/fluid/tests/unittests/test_conv3d_op.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_op.py b/python/paddle/fluid/tests/unittests/test_conv3d_op.py
index d11c6dea7b6ad..b835922e9af76 100644
--- a/python/paddle/fluid/tests/unittests/test_conv3d_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv3d_op.py
@@ -219,7 +219,6 @@ def test_check_grad_no_filter(self):
                 place,
                 ['Input'],
                 'Output',
-                # max_relative_error=0.03,
                 no_grad_set={'Filter'},
                 check_dygraph=(not self.use_mkldnn),
             )
@@ -233,12 +232,11 @@ def test_check_grad_no_input(self):
                 place,
                 ['Filter'],
                 'Output',
-                # max_relative_error=0.03,
                 no_grad_set={'Input'},
                 check_dygraph=(not self.use_mkldnn),
             )
 
-    cls_name = "{0}_{1}".format(parent.__name__, "CUDNNBF16OP")
+    cls_name = "{}_{}".format(parent.__name__, "CUDNNBF16OP")
     TestConv3DCUDNNBF16.__name__ = cls_name
     globals()[cls_name] = TestConv3DCUDNNBF16
 

From 6d68270933acc6248ee10c4d015f05cc3305ca7e Mon Sep 17 00:00:00 2001
From: Difers <707065510@qq.com>
Date: Sat, 1 Apr 2023 01:22:24 +0000
Subject: [PATCH 5/8] fix codestyle

---
 .../paddle/fluid/tests/unittests/test_conv3d_transpose_op.py  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_op.py b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_op.py
index dd7b8283be5c3..375fa481e7169 100644
--- a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_op.py
@@ -164,7 +164,7 @@ def test_check_grad_no_input(self):
                     place, ['Filter'], 'Output', no_grad_set={'Input'}
                 )
 
-    cls_name = "{0}_{1}".format(parent.__name__, "CUDNNFP16OP")
+    cls_name = "{}_{}".format(parent.__name__, "CUDNNFP16OP")
     TestConv3DCUDNNFP16.__name__ = cls_name
     globals()[cls_name] = TestConv3DCUDNNFP16
 
@@ -225,7 +225,7 @@ def test_check_grad_no_input(self):
                 user_defined_grads=[numeric_grads],
             )
 
-    cls_name = "{0}_{1}".format(parent.__name__, "CUDNNBF16OP")
+    cls_name = "{}_{}".format(parent.__name__, "CUDNNBF16OP")
     TestConv3DCUDNNBF16.__name__ = cls_name
     globals()[cls_name] = TestConv3DCUDNNBF16
 

From 574ffba12f569b7b046cd482c44a070b434df626 Mon Sep 17 00:00:00 2001
From: Difers <707065510@qq.com>
Date: Tue, 4 Apr 2023 11:54:19 +0000
Subject: [PATCH 6/8] fix some error

---
 .../fluid/tests/unittests/test_conv3d_op.py   |  2 +-
 .../unittests/test_conv3d_transpose_op.py     | 37 ++++++-------------
 2 files changed, 12 insertions(+), 27 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_op.py b/python/paddle/fluid/tests/unittests/test_conv3d_op.py
index b835922e9af76..b4f0c24d134d3 100644
--- a/python/paddle/fluid/tests/unittests/test_conv3d_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv3d_op.py
@@ -395,7 +395,7 @@ def setUp(self):
         ).astype(self.dtype)
 
         if self.is_bfloat16_op():
-            output = output.astype(np.float32)
+            output = convert_float_to_uint16(output)
             self.inputs = {
                 'Input': convert_float_to_uint16(input),
                 'Filter': convert_float_to_uint16(filter),
diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_op.py b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_op.py
index 375fa481e7169..a040e11d6ee9c 100644
--- a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_op.py
@@ -19,10 +19,9 @@
 import paddle
 
 paddle.enable_static()
-from eager_op_test import OpTest, convert_float_to_uint16, get_numeric_gradient
+from eager_op_test import OpTest, convert_float_to_uint16
 
 from paddle.fluid import core
-from paddle.fluid.tests.unittests.testsuite import create_op
 
 
 def conv3dtranspose_forward_naive(input_, filter_, attrs):
@@ -176,16 +175,6 @@ def create_test_cudnn_bf16_class(parent):
         "core is not compiled with CUDA and do not support bfloat16",
     )
     class TestConv3DCUDNNBF16(parent):
-        def get_numeric_grad(self, place, check_name):
-            scope = core.Scope()
-            self._check_grad_helper()
-            op = create_op(
-                scope, self.op_type, self.inputs, self.outputs, self.attrs
-            )
-            return get_numeric_gradient(
-                place, scope, op, self.inputs_fp32, check_name, ['Output']
-            )
-
         def init_kernel_type(self):
             self.use_cudnn = True
             self.dtype = np.uint16
@@ -194,17 +183,6 @@ def test_check_output(self):
             place = core.CUDAPlace(0)
             self.check_output_with_place(place)
 
-        def test_check_grad_no_filter(self):
-            place = core.CUDAPlace(0)
-            numeric_grads = self.get_numeric_grad(place, 'Input')
-            self.check_grad_with_place(
-                place,
-                ['Input'],
-                'Output',
-                no_grad_set={'Filter'},
-                user_defined_grads=[numeric_grads],
-            )
-
         def test_check_grad(self):
             if self.use_cudnn:
                 place = core.CUDAPlace(0)
@@ -214,15 +192,22 @@ def test_check_grad(self):
                     'Output',
                 )
 
+        def test_check_grad_no_filter(self):
+            place = core.CUDAPlace(0)
+            self.check_grad_with_place(
+                place,
+                ['Input'],
+                'Output',
+                no_grad_set={'Filter'},
+            )
+
         def test_check_grad_no_input(self):
             place = core.CUDAPlace(0)
-            numeric_grads = self.get_numeric_grad(place, 'Filter')
             self.check_grad_with_place(
                 place,
                 ['Filter'],
                 'Output',
                 no_grad_set={'Input'},
-                user_defined_grads=[numeric_grads],
             )
 
     cls_name = "{}_{}".format(parent.__name__, "CUDNNBF16OP")
@@ -293,7 +278,6 @@ def setUp(self):
         ).astype("float32")
 
         if self.is_bfloat16_op():
-            output = output.astype(np.float32)
             self.inputs = {
                 'Input': convert_float_to_uint16(input),
                 'Filter': convert_float_to_uint16(filter),
@@ -302,6 +286,7 @@ def setUp(self):
                 'Input': input,
                 'Filter': filter,
             }
+
         else:
             output = output.astype(self.dtype)
             self.inputs = {

From 43451f8aac27c3e478f2d3d2a78acf57b5315f2f Mon Sep 17 00:00:00 2001
From: Difers <707065510@qq.com>
Date: Tue, 11 Apr 2023 06:30:45 +0000
Subject: [PATCH 7/8] add redef float2uint

---
 .../unittests/test_conv3d_transpose_op.py     | 25 ++++++++++++++-----
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_op.py b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_op.py
index a040e11d6ee9c..cd2bb65d71245 100644
--- a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_op.py
@@ -19,11 +19,25 @@
 import paddle
 
 paddle.enable_static()
-from eager_op_test import OpTest, convert_float_to_uint16
+from eager_op_test import OpTest, copy_bits_from_float_to_uint16
 
 from paddle.fluid import core
 
 
+def convert_float_to_uint16(float_list, data_format="NCHW"):
+    if data_format == "NHWC":
+        float_list = np.transpose(float_list, [0, 4, 1, 2, 3])
+
+    new_output = []
+    for x in np.nditer(float_list):
+        new_output.append(np.uint16(copy_bits_from_float_to_uint16(x)))
+    new_output = np.reshape(new_output, float_list.shape).view(np.uint16)
+
+    if data_format == "NHWC":
+        new_output = np.transpose(new_output, [0, 2, 3, 4, 1])
+    return new_output
+
+
 def conv3dtranspose_forward_naive(input_, filter_, attrs):
     padding_algorithm = attrs['padding_algorithm']
     if padding_algorithm not in ["SAME", "VALID", "EXPLICIT"]:
@@ -282,17 +296,16 @@ def setUp(self):
                 'Input': convert_float_to_uint16(input),
                 'Filter': convert_float_to_uint16(filter),
             }
-            self.inputs_fp32 = {
-                'Input': input,
-                'Filter': filter,
-            }
+            output = convert_float_to_uint16(
+                output, data_format=self.data_format
+            )
 
         else:
-            output = output.astype(self.dtype)
             self.inputs = {
                 'Input': input,
                 'Filter': filter,
             }
+            output = output.astype(self.dtype)
 
         self.outputs = {'Output': output}
 

From 4d11f4da4571738a33aabc40a573a5e4df5ab9c5 Mon Sep 17 00:00:00 2001
From: Difers <707065510@qq.com>
Date: Tue, 18 Apr 2023 13:51:28 +0000
Subject: [PATCH 8/8] fix conv3d and conv3d_transpose

---
 .../fluid/tests/unittests/test_conv3d_op.py   | 79 ++++++++++++-------
 .../unittests/test_conv3d_transpose_op.py     | 29 +++----
 2 files changed, 63 insertions(+), 45 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_op.py b/python/paddle/fluid/tests/unittests/test_conv3d_op.py
index b4f0c24d134d3..d2b6545a81a4c 100644
--- a/python/paddle/fluid/tests/unittests/test_conv3d_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv3d_op.py
@@ -15,10 +15,16 @@
 import unittest
 
 import numpy as np
-from eager_op_test import OpTest, convert_float_to_uint16, paddle_static_guard
+from eager_op_test import (
+    OpTest,
+    convert_float_to_uint16,
+    get_numeric_gradient,
+    paddle_static_guard,
+)
 
 import paddle
 from paddle.fluid import core
+from paddle.fluid.tests.unittests.testsuite import create_op
 
 
 def conv3d_forward_naive(
@@ -186,54 +192,63 @@ def create_test_cudnn_bf16_class(parent):
         "core is not compiled with CUDA and do not support bfloat16",
     )
     class TestConv3DCUDNNBF16(parent):
+        def get_numeric_grad(self, place, check_name):
+            scope = core.Scope()
+            self._check_grad_helper()
+            op = create_op(
+                scope, self.op_type, self.inputs, self.outputs, self.attrs
+            )
+            return get_numeric_gradient(
+                place, scope, op, self.inputs_fp32, check_name, ['Output']
+            )
+
         def init_kernel_type(self):
             self.use_cudnn = True
-            self.no_need_check_grad = False
             self.dtype = np.uint16
 
         def test_check_output(self):
-            # TODO(wangzhongpu): support mkldnn op in dygraph mode
-            place = core.CUDAPlace(0) if self.has_cudnn() else core.CPUPlace()
+            place = core.CUDAPlace(0)
             self.check_output_with_place(
                 place, check_dygraph=(not self.use_mkldnn)
             )
 
-        def test_check_grad(self):
-            if hasattr(self, "no_need_check_grad") and self.no_need_check_grad:
-                return
-            place = core.CUDAPlace(0) if self.has_cudnn() else core.CPUPlace()
-            # TODO(wangzhongpu): support mkldnn op in dygraph mode
-            self.check_grad_with_place(
-                place,
-                {'Input', 'Filter'},
-                'Output',
-                check_dygraph=(not self.use_mkldnn),
-            )
-
         def test_check_grad_no_filter(self):
-            if hasattr(self, "no_need_check_grad") and self.no_need_check_grad:
-                return
-            place = core.CUDAPlace(0) if self.has_cudnn() else core.CPUPlace()
-            # TODO(wangzhongpu): support mkldnn op in dygraph mode
+            place = core.CUDAPlace(0)
+            numeric_grads = self.get_numeric_grad(place, 'Input')
+
             self.check_grad_with_place(
                 place,
                 ['Input'],
                 'Output',
                 no_grad_set={'Filter'},
                 check_dygraph=(not self.use_mkldnn),
+                user_defined_grads=[numeric_grads],
             )
 
         def test_check_grad_no_input(self):
-            if hasattr(self, "no_need_check_grad") and self.no_need_check_grad:
-                return
-            place = core.CUDAPlace(0) if self.has_cudnn() else core.CPUPlace()
-            # TODO(wangzhongpu): support mkldnn op in dygraph mode
+            place = core.CUDAPlace(0)
+            numeric_grads = self.get_numeric_grad(place, 'Filter')
+
             self.check_grad_with_place(
                 place,
                 ['Filter'],
                 'Output',
                 no_grad_set={'Input'},
                 check_dygraph=(not self.use_mkldnn),
+                user_defined_grads=[numeric_grads],
+            )
+
+        def test_check_grad(self):
+            place = core.CUDAPlace(0)
+            numeric_input_grads = self.get_numeric_grad(place, 'Input')
+            numeric_fliter_grads = self.get_numeric_grad(place, 'Filter')
+
+            self.check_grad_with_place(
+                place,
+                {'Input', 'Filter'},
+                'Output',
+                user_defined_grads=[numeric_input_grads, numeric_fliter_grads],
+                check_dygraph=(not self.use_mkldnn),
             )
 
     cls_name = "{}_{}".format(parent.__name__, "CUDNNBF16OP")
@@ -385,14 +400,19 @@ def setUp(self):
             'dilations': self.dilations,
         }
 
-        input = np.random.random(self.input_size).astype(self.dtype)
-        filter = np.random.random(self.filter_size).astype(self.dtype)
+        if self.is_bfloat16_op():
+            input = np.random.random(self.input_size).astype(np.float32)
+            filter = np.random.random(self.filter_size).astype(np.float32)
+        else:
+            input = np.random.random(self.input_size).astype(self.dtype)
+            filter = np.random.random(self.filter_size).astype(self.dtype)
+
         output = conv3d_forward_naive(
             input,
             filter,
             self.groups,
             conv3d_param,
-        ).astype(self.dtype)
+        )
 
         if self.is_bfloat16_op():
             output = convert_float_to_uint16(output)
@@ -400,7 +420,10 @@ def setUp(self):
                 'Input': convert_float_to_uint16(input),
                 'Filter': convert_float_to_uint16(filter),
             }
-
+            self.inputs_fp32 = {
+                'Input': OpTest.np_dtype_to_fluid_dtype(input),
+                'Filter': OpTest.np_dtype_to_fluid_dtype(filter),
+            }
         else:
             output = output.astype(self.dtype)
             self.inputs = {
diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_op.py b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_op.py
index cd2bb65d71245..c0814754cc231 100644
--- a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_op.py
@@ -152,7 +152,7 @@ def create_test_cudnn_fp16_class(parent, grad_check=True):
     @unittest.skipIf(
         not core.is_compiled_with_cuda(), "core is not compiled with CUDA"
     )
-    class TestConv3DCUDNNFP16(parent):
+    class TestConv3DTransposeCUDNNFP16(parent):
         def init_kernel_type(self):
             self.use_cudnn = True
             self.dtype = np.float16
@@ -178,8 +178,8 @@ def test_check_grad_no_input(self):
                 )
 
     cls_name = "{}_{}".format(parent.__name__, "CUDNNFP16OP")
-    TestConv3DCUDNNFP16.__name__ = cls_name
-    globals()[cls_name] = TestConv3DCUDNNFP16
+    TestConv3DTransposeCUDNNFP16.__name__ = cls_name
+    globals()[cls_name] = TestConv3DTransposeCUDNNFP16
 
 
 def create_test_cudnn_bf16_class(parent):
@@ -188,7 +188,7 @@ def create_test_cudnn_bf16_class(parent):
         or not core.is_bfloat16_supported(core.CUDAPlace(0)),
         "core is not compiled with CUDA and do not support bfloat16",
     )
-    class TestConv3DCUDNNBF16(parent):
+    class TestConv3DTransposeCUDNNBF16(parent):
         def init_kernel_type(self):
             self.use_cudnn = True
             self.dtype = np.uint16
@@ -198,13 +198,12 @@ def test_check_output(self):
             self.check_output_with_place(place)
 
         def test_check_grad(self):
-            if self.use_cudnn:
-                place = core.CUDAPlace(0)
-                self.check_grad_with_place(
-                    place,
-                    {'Input', 'Filter'},
-                    'Output',
-                )
+            place = core.CUDAPlace(0)
+            self.check_grad_with_place(
+                place,
+                {'Input', 'Filter'},
+                'Output',
+            )
 
         def test_check_grad_no_filter(self):
             place = core.CUDAPlace(0)
@@ -225,8 +224,8 @@ def test_check_grad_no_input(self):
             )
 
     cls_name = "{}_{}".format(parent.__name__, "CUDNNBF16OP")
-    TestConv3DCUDNNBF16.__name__ = cls_name
-    globals()[cls_name] = TestConv3DCUDNNBF16
+    TestConv3DTransposeCUDNNBF16.__name__ = cls_name
+    globals()[cls_name] = TestConv3DTransposeCUDNNBF16
 
 
 def conv3d_transpose_wrapper(
@@ -296,10 +295,6 @@ def setUp(self):
                 'Input': convert_float_to_uint16(input),
                 'Filter': convert_float_to_uint16(filter),
             }
-            output = convert_float_to_uint16(
-                output, data_format=self.data_format
-            )
-
         else:
             self.inputs = {
                 'Input': input,