1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
| # 导入需要的库
import os
import sys
from pathlib import Path
import numpy as np
import cv2
import torch
import torch.backends.cudnn as cudnn
# 初始化目录
FILE = Path(__file__).resolve()
ROOT = FILE.parents[0] # 定义YOLOv5的根目录
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # 将YOLOv5的根目录添加到环境变量中(程序结束后删除)
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
from models.common import DetectMultiBackend
from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams
from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr,
increment_path, non_max_suppression, print_args, scale_segments, strip_optimizer, xyxy2xywh)
from utils.plots import Annotator, colors, save_one_box
from utils.torch_utils import select_device, time_sync
# 导入letterbox
from utils.augmentations import Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective
weights = ROOT / 'weights/best.pt' # 权重文件地址 .pt文件
source = ROOT / 'img' # 测试数据文件(图片或视频)的保存路径
data = ROOT / 'ANNOTATIONS/' # 标签文件地址 .yaml文件
imgsz = (224, 224) # 输入图片的大小 默认640(pixels)
conf_thres = 0.25 # object置信度阈值 默认0.25 用在nms中
iou_thres = 0.45 # 做nms的iou阈值 默认0.45 用在nms中
max_det = 1000 # 每张图片最多的目标数量 用在nms中
device = '0' # 设置代码执行的设备 cuda device, i.e. 0 or 0,1,2,3 or cpu
classes = None # 在nms中是否是只保留某些特定的类 默认是None 就是所有类只要满足条件都可以保留 --class 0, or --class 0 2 3
agnostic_nms = False # 进行nms是否也除去不同类别之间的框 默认False
augment = False # 预测是否也要采用数据增强 TTA 默认False
visualize = False # 特征图可视化 默认FALSE
half = False # 是否使用半精度 Float16 推理 可以缩短推理时间 但是默认是False
dnn = False # 使用OpenCV DNN进行ONNX推理
# 获取设备
device =torch.device('cpu')
# 载入模型
model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data)
stride, names, pt, jit, onnx, engine = model.stride, model.names, model.pt, model.jit, model.onnx, model.engine
imgsz = check_img_size(imgsz, s=stride) # 检查图片尺寸
# Half
# 使用半精度 Float16 推理
half &= (pt or jit or onnx or engine) and device.type != 'cpu' # FP16 supported on limited backends with CUDA
if pt or jit:
model.model.half() if half else model.model.float()
def detect(img):
# Dataloader
# 载入数据
dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
# Run inference
# 开始预测
# model.warmup(imgsz=(1, 3, *imgsz), half=half)
model.warmup(imgsz=(1, 3, *imgsz)) # warmup
dt, seen = [0.0, 0.0, 0.0], 0
# 对图片进行处理
im0 = img
# Padded resize
im = letterbox(im0, imgsz, stride, auto=pt)[0]
# Convert
im = im.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
im = np.ascontiguousarray(im)
t1 = time_sync()
im = torch.from_numpy(im).to(device)
im = im.half() if half else im.float() # uint8 to fp16/32
im /= 255 # 0 - 255 to 0.0 - 1.0
if len(im.shape) == 3:
im = im[None] # expand for batch dim
t2 = time_sync()
dt[0] += t2 - t1
# Inference
# 预测
pred = model(im, augment=augment, visualize=visualize)
t3 = time_sync()
dt[1] += t3 - t2
# NMS
pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
dt[2] += time_sync() - t3
# 用于存放结果
detections = []
# Process predictions
for i, det in enumerate(pred): # per image 每张图片
seen += 1
# im0 = im0s.copy()
if len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_segments(im.shape[2:], det[:, :4], im0.shape).round()
# Write results
# 写入结果
for *xyxy, conf, cls in reversed(det):
# 直接获取 xyxy 坐标
xyxy = [int(coord) for coord in xyxy] # 将坐标转换为整数
cls = names[int(cls)]
conf = float(conf)
detections.append({'class': cls, 'conf': conf, 'position': xywh})
# 输出结果
for i in detections:
print(i)
# 推测的时间
LOGGER.info(f'({t3 - t2:.3f}s)')
return detections
if __name__ == '__main__':
path = '.\img\crazing_2.jpg'
img = cv2.imread(path)
# 传入一张图片
data = detect(img)
# {'class': 'crazing', 'conf': 0.31800347566604614, 'position': [91, 22, 108, 154]}
print(type(data))
|