import numpy as np import pycuda.driver as cuda from collections import defaultdict DTYPES = defaultdict(lambda: np.float32) DTYPES["num_detections"] = np.int32 def do_inference( context, bindings, inputs, input_data, outputs, output_data, stream, batch_size=1 ): flattened_input_data = [input_data.ravel()] for i, input_ in enumerate(inputs): cuda.memcpy_htod_async(input_, flattened_input_data[i], stream) context.set_binding_shape(0, input_data.shape) context.execute_async_v2(bindings=bindings, stream_handle=stream.handle) data = [] for i, device_output in enumerate(outputs): size, dtype = output_data[i] host_output = cuda.pagelocked_zeros(batch_size * size, dtype=dtype) cuda.memcpy_dtoh_async(host_output, device_output, stream) data.append(host_output) stream.synchronize() return data def allocate_buffers(engine): inputs = [] outputs = [] data = [] bindings = [] for binding in engine: shape = engine.get_binding_shape(binding) size = calculate_volume(shape) dtype = DTYPES[str(binding)] host_mem = (size, dtype) device_mem = cuda.mem_alloc(size * engine.max_batch_size * dtype().itemsize) bindings.append(int(device_mem)) if engine.binding_is_input(binding): inputs.append(device_mem) else: outputs.append(device_mem) data.append(host_mem) return inputs, outputs, data, bindings def calculate_volume(shape): volume = 1 for dim in shape: # -1 indicates dynamic batching if dim == -1: continue volume *= dim return volume