67 lines
1.7 KiB
Python
67 lines
1.7 KiB
Python
import numpy as np
|
|
import pycuda.driver as cuda
|
|
|
|
from collections import defaultdict
|
|
|
|
DTYPES = defaultdict(lambda: np.float32)
|
|
DTYPES["num_detections"] = np.int32
|
|
|
|
|
|
def do_inference(
|
|
context, bindings, inputs, input_data, outputs, output_data, stream, batch_size=1
|
|
):
|
|
flattened_input_data = [input_data.ravel()]
|
|
for i, input_ in enumerate(inputs):
|
|
cuda.memcpy_htod_async(input_, flattened_input_data[i], stream)
|
|
|
|
context.set_binding_shape(0, input_data.shape)
|
|
context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
|
|
|
|
data = []
|
|
|
|
for i, device_output in enumerate(outputs):
|
|
size, dtype = output_data[i]
|
|
host_output = cuda.pagelocked_zeros(batch_size * size, dtype=dtype)
|
|
|
|
cuda.memcpy_dtoh_async(host_output, device_output, stream)
|
|
data.append(host_output)
|
|
|
|
stream.synchronize()
|
|
|
|
return data
|
|
|
|
|
|
def allocate_buffers(engine):
|
|
inputs = []
|
|
outputs = []
|
|
data = []
|
|
bindings = []
|
|
|
|
for binding in engine:
|
|
shape = engine.get_binding_shape(binding)
|
|
size = calculate_volume(shape)
|
|
dtype = DTYPES[str(binding)]
|
|
host_mem = (size, dtype)
|
|
device_mem = cuda.mem_alloc(size * engine.max_batch_size * dtype().itemsize)
|
|
|
|
bindings.append(int(device_mem))
|
|
|
|
if engine.binding_is_input(binding):
|
|
inputs.append(device_mem)
|
|
else:
|
|
outputs.append(device_mem)
|
|
data.append(host_mem)
|
|
|
|
return inputs, outputs, data, bindings
|
|
|
|
|
|
def calculate_volume(shape):
|
|
volume = 1
|
|
for dim in shape:
|
|
# -1 indicates dynamic batching
|
|
if dim == -1:
|
|
continue
|
|
volume *= dim
|
|
|
|
return volume
|