diff --git a/.gitignore b/.gitignore index d8b9c068bf0a..a6f0a8a0a416 100755 --- a/.gitignore +++ b/.gitignore @@ -37,6 +37,8 @@ data/images/* results*.csv +output.txt + # Datasets ------------------------------------------------------------------------------------------------------------- coco/ coco128/ diff --git a/detect.py b/detect.py index 24724a35f618..dca8851f097d 100644 --- a/detect.py +++ b/detect.py @@ -1,33 +1,3 @@ -# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license -""" -Run YOLOv5 detection inference on images, videos, directories, globs, YouTube, webcam, streams, etc. - -Usage - sources: - $ python detect.py --weights yolov5s.pt --source 0 # webcam - img.jpg # image - vid.mp4 # video - screen # screenshot - path/ # directory - list.txt # list of images - list.streams # list of streams - 'path/*.jpg' # glob - 'https://youtu.be/LNwODJXcvt4' # YouTube - 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream - -Usage - formats: - $ python detect.py --weights yolov5s.pt # PyTorch - yolov5s.torchscript # TorchScript - yolov5s.onnx # ONNX Runtime or OpenCV DNN with --dnn - yolov5s_openvino_model # OpenVINO - yolov5s.engine # TensorRT - yolov5s.mlpackage # CoreML (macOS-only) - yolov5s_saved_model # TensorFlow SavedModel - yolov5s.pb # TensorFlow GraphDef - yolov5s.tflite # TensorFlow Lite - yolov5s_edgetpu.tflite # TensorFlow Edge TPU - yolov5s_paddle_model # PaddlePaddle -""" - import argparse import csv import os @@ -35,6 +5,7 @@ import sys from pathlib import Path +import paho.mqtt.client as mqtt import torch FILE = Path(__file__).resolve() @@ -65,6 +36,84 @@ ) from utils.torch_utils import select_device, smart_inference_mode +MQTT_BROKER = "broker.hivemq.com" +MQTT_PORT = 1883 +MQTT_TOPIC = "Automation001" + + +def on_connect(client, userdata, flags, rc): + if rc == 0: + print("Connected to MQTT broker successfully.") + else: + print(f"Failed to connect to MQTT broker, return code {rc}") + + +def on_publish(client, userdata, mid): + print(f"Message {mid} published.") + + +# Initialize MQTT client +client = mqtt.Client() +client.on_connect = on_connect +client.on_publish = on_publish +client.connect(MQTT_BROKER, MQTT_PORT, 60) # Connect to the broker + +# Store detected objects over multiple frames (Sliding Window) +from collections import deque + +WINDOW_SIZE = 30 +RECOGNITION_THRESHOLD = 0.7 + +# Buffer to store objects detected over last N frames +focus_buffer = deque(maxlen=WINDOW_SIZE) # Track objects over last 5 frames + + +def get_focused_object(detections, img_shape): + """ + Determines the object the user is focusing on by finding the detection closest to the center of the frame and + ensuring persistence over frames. + + :param detections: Tensor of shape (N, 6) containing [x1, y1, x2, y2, confidence, class] + :param img_shape: Shape of the original image (height, width, channels) + :return: The most consistently detected object (class ID) or None if no stable focus. + """ + if len(detections) == 0: + return 80 # No objects detected + + # Step 1: Filter out low-confidence detections + detections = detections[detections[:, 4] > 0.5] # Keep only confidence > 50% + + if len(detections) == 0: + return 80 # No confident detections + + # Step 2: Calculate object centroids + image_center = torch.tensor([img_shape[1] / 2, img_shape[0] / 2]) # (x_center, y_center) + centroids = torch.stack( + [(detections[:, 0] + detections[:, 2]) / 2, (detections[:, 1] + detections[:, 3]) / 2], dim=1 + ) + + # Step 3: Find the object closest to the center + distances = torch.norm(centroids - image_center, dim=1) # Euclidean distance + min_distance_index = torch.argmin(distances) # Index of the closest object + + focused_object = int(detections[min_distance_index, 5]) # Get class ID of the focused object + + # Step 4: Maintain a sliding window of detected objects + focus_buffer.append(focused_object) + + # Step 5: Determine the most frequently appearing object in buffer + focus_counts = {obj: focus_buffer.count(obj) for obj in set(focus_buffer)} + most_frequent_object = max(focus_counts, key=focus_counts.get) # Object appearing most in buffer + + # Only return if it appears in at least 60% of frames in the buffer + if focus_counts[most_frequent_object] >= RECOGNITION_THRESHOLD * len(focus_buffer): + return most_frequent_object + else: + return 80 # No stable focus object + + +# 80 class corresponds + @smart_inference_mode() def run( @@ -98,56 +147,6 @@ def run( dnn=False, # use OpenCV DNN for ONNX inference vid_stride=1, # video frame-rate stride ): - """ - Runs YOLOv5 detection inference on various sources like images, videos, directories, streams, etc. - - Args: - weights (str | Path): Path to the model weights file or a Triton URL. Default is 'yolov5s.pt'. - source (str | Path): Input source, which can be a file, directory, URL, glob pattern, screen capture, or webcam - index. Default is 'data/images'. - data (str | Path): Path to the dataset YAML file. Default is 'data/coco128.yaml'. - imgsz (tuple[int, int]): Inference image size as a tuple (height, width). Default is (640, 640). - conf_thres (float): Confidence threshold for detections. Default is 0.25. - iou_thres (float): Intersection Over Union (IOU) threshold for non-max suppression. Default is 0.45. - max_det (int): Maximum number of detections per image. Default is 1000. - device (str): CUDA device identifier (e.g., '0' or '0,1,2,3') or 'cpu'. Default is an empty string, which uses the - best available device. - view_img (bool): If True, display inference results using OpenCV. Default is False. - save_txt (bool): If True, save results in a text file. Default is False. - save_csv (bool): If True, save results in a CSV file. Default is False. - save_conf (bool): If True, include confidence scores in the saved results. Default is False. - save_crop (bool): If True, save cropped prediction boxes. Default is False. - nosave (bool): If True, do not save inference images or videos. Default is False. - classes (list[int]): List of class indices to filter detections by. Default is None. - agnostic_nms (bool): If True, perform class-agnostic non-max suppression. Default is False. - augment (bool): If True, use augmented inference. Default is False. - visualize (bool): If True, visualize feature maps. Default is False. - update (bool): If True, update all models' weights. Default is False. - project (str | Path): Directory to save results. Default is 'runs/detect'. - name (str): Name of the current experiment; used to create a subdirectory within 'project'. Default is 'exp'. - exist_ok (bool): If True, existing directories with the same name are reused instead of being incremented. Default is - False. - line_thickness (int): Thickness of bounding box lines in pixels. Default is 3. - hide_labels (bool): If True, do not display labels on bounding boxes. Default is False. - hide_conf (bool): If True, do not display confidence scores on bounding boxes. Default is False. - half (bool): If True, use FP16 half-precision inference. Default is False. - dnn (bool): If True, use OpenCV DNN backend for ONNX inference. Default is False. - vid_stride (int): Stride for processing video frames, to skip frames between processing. Default is 1. - - Returns: - None - - Examples: - ```python - from ultralytics import run - - # Run inference on an image - run(source='data/images/example.jpg', weights='yolov5s.pt', device='0') - - # Run inference on a video with specific confidence threshold - run(source='data/videos/example.mp4', weights='yolov5s.pt', conf_thres=0.4, device='0') - ``` - """ source = str(source) save_img = not nosave and not source.endswith(".txt") # save inference images is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS) @@ -165,6 +164,7 @@ def run( device = select_device(device) model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half) stride, names, pt = model.stride, model.names, model.pt + names[80] = "none" imgsz = check_img_size(imgsz, s=stride) # check image size # Dataloader @@ -253,11 +253,16 @@ def write_to_csv(image_name, prediction, confidence): # Write results for *xyxy, conf, cls in reversed(det): - c = int(cls) # integer class + c = get_focused_object(det, im0.shape) # integer class label = names[c] if hide_conf else f"{names[c]}" confidence = float(conf) confidence_str = f"{confidence:.2f}" + if confidence > 0.50: + msg = f"{label}" + # client.publish(MQTT_TOPIC, msg, retain=True) + print(f"data sent to mqtt: {msg}") + if save_csv: write_to_csv(p.name, label, confidence_str) @@ -311,6 +316,11 @@ def write_to_csv(image_name, prediction, confidence): # Print time (inference-only) LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1e3:.1f}ms") + if cv2.waitKey(1) == ord("q"): + cv2.destroyAllWindows + # raise StopIteration + break + # Print results t = tuple(x.t / seen * 1e3 for x in dt) # speeds per image LOGGER.info(f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}" % t) @@ -322,49 +332,6 @@ def write_to_csv(image_name, prediction, confidence): def parse_opt(): - """ - Parse command-line arguments for YOLOv5 detection, allowing custom inference options and model configurations. - - Args: - --weights (str | list[str], optional): Model path or Triton URL. Defaults to ROOT / 'yolov5s.pt'. - --source (str, optional): File/dir/URL/glob/screen/0(webcam). Defaults to ROOT / 'data/images'. - --data (str, optional): Dataset YAML path. Provides dataset configuration information. - --imgsz (list[int], optional): Inference size (height, width). Defaults to [640]. - --conf-thres (float, optional): Confidence threshold. Defaults to 0.25. - --iou-thres (float, optional): NMS IoU threshold. Defaults to 0.45. - --max-det (int, optional): Maximum number of detections per image. Defaults to 1000. - --device (str, optional): CUDA device, i.e., '0' or '0,1,2,3' or 'cpu'. Defaults to "". - --view-img (bool, optional): Flag to display results. Defaults to False. - --save-txt (bool, optional): Flag to save results to *.txt files. Defaults to False. - --save-csv (bool, optional): Flag to save results in CSV format. Defaults to False. - --save-conf (bool, optional): Flag to save confidences in labels saved via --save-txt. Defaults to False. - --save-crop (bool, optional): Flag to save cropped prediction boxes. Defaults to False. - --nosave (bool, optional): Flag to prevent saving images/videos. Defaults to False. - --classes (list[int], optional): List of classes to filter results by, e.g., '--classes 0 2 3'. Defaults to None. - --agnostic-nms (bool, optional): Flag for class-agnostic NMS. Defaults to False. - --augment (bool, optional): Flag for augmented inference. Defaults to False. - --visualize (bool, optional): Flag for visualizing features. Defaults to False. - --update (bool, optional): Flag to update all models in the model directory. Defaults to False. - --project (str, optional): Directory to save results. Defaults to ROOT / 'runs/detect'. - --name (str, optional): Sub-directory name for saving results within --project. Defaults to 'exp'. - --exist-ok (bool, optional): Flag to allow overwriting if the project/name already exists. Defaults to False. - --line-thickness (int, optional): Thickness (in pixels) of bounding boxes. Defaults to 3. - --hide-labels (bool, optional): Flag to hide labels in the output. Defaults to False. - --hide-conf (bool, optional): Flag to hide confidences in the output. Defaults to False. - --half (bool, optional): Flag to use FP16 half-precision inference. Defaults to False. - --dnn (bool, optional): Flag to use OpenCV DNN for ONNX inference. Defaults to False. - --vid-stride (int, optional): Video frame-rate stride, determining the number of frames to skip in between - consecutive frames. Defaults to 1. - - Returns: - argparse.Namespace: Parsed command-line arguments as an argparse.Namespace object. - - Example: - ```python - from ultralytics import YOLOv5 - args = YOLOv5.parse_opt() - ``` - """ parser = argparse.ArgumentParser() parser.add_argument("--weights", nargs="+", type=str, default=ROOT / "yolov5s.pt", help="model path or triton URL") parser.add_argument("--source", type=str, default=ROOT / "data/images", help="file/dir/URL/glob/screen/0(webcam)") @@ -407,28 +374,6 @@ def parse_opt(): def main(opt): - """ - Executes YOLOv5 model inference based on provided command-line arguments, validating dependencies before running. - - Args: - opt (argparse.Namespace): Command-line arguments for YOLOv5 detection. See function `parse_opt` for details. - - Returns: - None - - Note: - This function performs essential pre-execution checks and initiates the YOLOv5 detection process based on user-specified - options. Refer to the usage guide and examples for more information about different sources and formats at: - https://github.com/ultralytics/ultralytics - - Example usage: - - ```python - if __name__ == "__main__": - opt = parse_opt() - main(opt) - ``` - """ check_requirements(ROOT / "requirements.txt", exclude=("tensorboard", "thop")) run(**vars(opt)) diff --git a/pyproject.toml b/pyproject.toml index 9680857af1d7..540d4b07f018 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -145,3 +145,6 @@ close-quotes-on-newline = true [tool.codespell] ignore-words-list = "crate,nd,strack,dota,ane,segway,fo,gool,winn,commend" skip = '*.csv,*venv*,docs/??/,docs/mkdocs_??.yml' + +[tool.setuptools.packages] +find = { include = ["models", "data", "segment", "classify"] } \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index dcd23bf5310c..40ae047c0bdf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -47,3 +47,4 @@ setuptools>=70.0.0 # Snyk vulnerability fix # mss # screenshots # albumentations>=1.0.3 # pycocotools>=2.0.6 # COCO mAP +paho-mqtt>=2.1.0 \ No newline at end of file