Skip to content

Commit

Permalink
push for val
Browse files Browse the repository at this point in the history
  • Loading branch information
Ishaan-Datta committed Sep 12, 2024
1 parent 6a7d199 commit 57563ad
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 106 deletions.
94 changes: 53 additions & 41 deletions conversion_tools/ONNX_TRT.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,30 @@ def get_max_memory():
print(f"Max memory to use: {max_mem / (1024**2)} MB")
return max_mem

def convert_onnx_to_trt(model_path, output_path, batch_size):
def convert_onnx_to_trt(model_path, output_path, FP16, INT8, strip_weights, batch_size, verbose):
# # Simplify the ONNX model (optional)
# print("Loading the ONNX model")
# onnx_model = onnx.load(model_path)
# graph = gs.import_onnx(onnx_model)
# graph.toposort()
# onnx_model = gs.export_onnx(graph)

TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
if verbose:
TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE)
else:
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

builder = trt.Builder(TRT_LOGGER)
config = builder.create_builder_config()

# Set cache
cache = config.create_timing_cache(b"")
config.set_timing_cache(cache, ignore_mismatch=False)

# Set max workspace
max_workspace = (1 << 30) # 15
config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, max_workspace)

network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
parser = trt.OnnxParser(network, TRT_LOGGER)

Expand All @@ -32,55 +46,53 @@ def convert_onnx_to_trt(model_path, output_path, batch_size):
if not parser.parse(model_file.read()):
for error in range(parser.num_errors):
print(parser.get_error(error))
return

inputs = [network.get_input(i) for i in range(network.num_inputs)]
outputs = [network.get_output(i) for i in range(network.num_outputs)]

for input in inputs:
print(f"Model {input.name} shape: {input.shape} {input.dtype}")
for output in outputs:
print(f"Model {output.name} shape: {output.shape} {output.dtype}")

config = builder.create_builder_config()
# config.fp16_mode = FP16_mode
config.max_batch_size = batch_size
config.max_workspace_size = 15
if FP16:
config.set_flag(trt.BuilderFlag.FP16)
elif INT8:
config.set_flag(trt.BuilderFlag.INT8)

if strip_weights:
config.set_flag(trt.BuilderFlag.STRIP_PLAN)

# if batch_size > 1:
# # https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#opt_profiles
# profile = builder.create_optimization_profile()
# min_shape = [1] + shape_input_model[-3:]
# opt_shape = [int(max_batch_size/2)] + shape_input_model[-3:]
# max_shape = shape_input_model
# for input in inputs:
# profile.set_shape(input.name, min_shape, opt_shape, max_shape)
# config.add_optimization_profile(profile)

print("Building TensorRT engine. This may take a few minutes.")
engine = builder.build_cuda_engine(network, config)

# Serialize the TensorRT engine to a file
with open(output_path, 'wb') as f:
f.write(engine.serialize())
engine_bytes = builder.build_serialized_network(network, config)
with open(output_path, "wb") as f:
f.write(engine_bytes)

print("Engine built successfully")
print(f"Converted TensorRT engine saved at {output_path}")
return engine, TRT_LOGGER

# def build_engine(self, onnx_file_path):
# TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
# with trt.Builder(TRT_LOGGER) as builder, \
# builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) as network, \
# trt.OnnxParser(network, TRT_LOGGER) as parser:

# with open(onnx_file_path, 'rb') as model:
# if not parser.parse(model.read()):
# for error in range(parser.num_errors):
# self.get_logger().error(parser.get_error(error))
# return None

# config = builder.create_builder_config()
# config.max_workspace_size = 1 << 30 # 1GB
# builder.max_batch_size = 1

# engine = builder.build_engine(network, config)
# with open("model.trt", "wb") as f:
# f.write(engine.serialize())
# return engine


if __name__ == "__main__":
print("Usage: python3 Onnx_TensorRT.py <model_path> <output_path> FP16_mode batch_size input_shape")
print("Example: python3 Onnx_TensorRT.py ./model.onnx ./model_trt.trt True 1 (1, 3, 224, 224)")
print("Usage: python3 ONNX_TRT.py --model_path=/home/user/Downloads/model.onnx --output_path=/home/user/Downloads/model.engine --FP16=False --INT8=False --strip_weights=False --batch_size=1 --verbose=False ")

parser = argparse.ArgumentParser(description='Convert Onnx model to TensorRT')
parser.add_argument('--modelpath', type=str, default="./model.onnx", required=False, help='Path to the PyTorch model file (.pt)')
parser.add_argument('--outputpath', type=str, default="model_trt.trt", required=False, help='Path to save the converted TensorRT model file (.trt)')
# parser.add_argument('--FP16_mode', type=bool, default=True, help='FP16 mode for TensorRT')
parser.add_argument('--batch_size', type=int, default=1, help='Batch size for TensorRT')
parser.add_argument('--model_path', type=str, default="/home/user/Downloads/model.onnx", required=False, help='Path to the PyTorch model file (.pt)')
parser.add_argument('--output_path', type=str, default="/home/user/Downloads/model.engine", required=False, help='Path to save the converted TensorRT model file (.trt)')
parser.add_argument('--FP16', type=bool, default=False, help="FP16 precision mode")
parser.add_argument('--INT8', type=bool, default=False, help="INT8 precision mode")
parser.add_argument('--strip_weights', type=bool, default=False, help="Strip unnecessary weights")
parser.add_argument('--batch_size', type=int, default=1, help='Batch size')
parser.add_argument('--verbose', type=bool, default=False, help="Verbose TensorRT logging")
args = parser.parse_args()

convert_onnx_to_trt(args.modelpath, args.outputpath, args.batch_size)
convert_onnx_to_trt(args.model_path, args.output_path, args.FP16, args.INT8, args.strip_weights, args.batch_size, args.verbose)
41 changes: 11 additions & 30 deletions ros2_ws/src/python_workspace/python_workspace/camera_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def __init__(self):

# width is 448?
if self.get_parameter('model_type').get_parameter_value().string_value == 'maize':
self.dimensions = (1024, 448)
self.dimensions = (640, 640) # 448, 1014
elif self.get_parameter('model_type').get_parameter_value().string_value == 'weed':
self.dimensions = (1024, 448)
else:
Expand Down Expand Up @@ -101,7 +101,6 @@ def publish_video_frames(self): # replace with decord later
def publish_zed_frames(self):
# Create a ZED camera object
zed = sl.Camera()

# Set configuration parameters
init = sl.InitParameters()
init.camera_resolution = sl.RESOLUTION.HD1080 # HD720
Expand All @@ -117,12 +116,7 @@ def publish_zed_frames(self):

# Set runtime parameters after opening the camera
runtime = sl.RuntimeParameters()

# Prepare new image size to retrieve half-resolution images
image_size = zed.get_camera_information().camera_configuration.resolution
# image_size.width = image_size.width /2 # can we set any arbitary resolution here, instead of resizing later?
# image_size.height = image_size.height /2
image_zed = sl.Mat(image_size.width, image_size.height, sl.MAT_TYPE.U8_C3)
image_zed = sl.Mat()

sensors_data = sl.SensorsData()
previous_velocity = np.array([0.0, 0.0, 0.0])
Expand All @@ -133,7 +127,7 @@ def publish_zed_frames(self):
if err == sl.ERROR_CODE.SUCCESS:
self.index += 1
tic = time.perf_counter()
zed.retrieve_image(image_zed, sl.VIEW.LEFT_UNRECTIFIED, sl.MEM.CPU, image_size)
zed.retrieve_image(image_zed, sl.VIEW.LEFT_UNRECTIFIED)
zed.get_sensors_data(sensors_data, sl.TIME_REFERENCE.IMAGE)
accel_data = sensors_data.get_imu_data().get_linear_acceleration()

Expand All @@ -145,15 +139,15 @@ def publish_zed_frames(self):
previous_velocity = velocity
previous_time = current_time

cv_image = image_zed.get_data()

cv_image = image_zed.get_data()
# Upload the ZED image to CUDA
gpu_image = cv2.cuda_GpuMat()
gpu_image.upload(cv_image)

# Transform to BGR8 format and resize using CUDA
cv2.cuda.cvtColor(gpu_image, cv2.COLOR_BGRA2RGB)
cv2.cuda.resize(gpu_image, self.dimensions)
gpu_image = cv2.cuda.cvtColor(gpu_image, cv2.COLOR_RGBA2RGB)
# crop to ROI and resize
gpu_image = cv2.cuda.resize(gpu_image, self.dimensions)

# Convert the image to float32
# gpu_image = gpu_image.transpose((2, 0, 1)).astype(np.float32)
Expand All @@ -166,10 +160,10 @@ def publish_zed_frames(self):
# cv2.cuda.copyMakeBorder(image_gpu, 0, 1, 0, 0, cv2.BORDER_CONSTANT, image_gpu)

# Download the processed image from GPU to CPU memory
image_bgr = gpu_image.download()
rgb_image = gpu_image.download()
toc = time.perf_counter_ns()
# self.preprocessing_time = (toc - tic)/1e6
self.publish_image(image_bgr)
self.publish_image(rgb_image)
else:
self.get_logger().error("Failed to grab ZED camera frame: {str(err)}")

Expand All @@ -178,23 +172,10 @@ def publish_zed_frames(self):

def publish_image(self, image):
header = Header()
header.stamp = self.get_clock().now().to_msg() # maybe use ros time
header.stamp = self.get_clock().now().to_msg()
header.frame_id = str(self.index)
# header.side = 'left' if self.index % 2 == 0 else 'right'

if self.source_type != 'zed':
# Use CUDA for loading and processing the image
gpu_image = cv2.cuda_GpuMat()
gpu_image.upload(image)

# Convert to BGR8 format and resize using CUDA
cv2.cuda.cvtColor(image, cv2.COLOR_BGR2RGB)
cv2.cuda.resize(gpu_image, self.dimensions)

image = gpu_image.download() # does this update outside scope?

try:
image_msg = self.bridge.cv2_to_imgmsg(image, encoding='8UC4') # rgb8
image_msg = self.bridge.cv2_to_imgmsg(image, encoding="rgb8")
except CvBridgeError as e:
print(e)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import cv2
import numpy as np
# from tracker import *
# add object counter

import rclpy
from rclpy.node import Node
Expand Down
Loading

0 comments on commit 57563ad

Please sign in to comment.