Reference for `ultralytics/models/yolo/model.py`

Note

This file is available at https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/model.py. If you spot a problem please help fix it by contributing a Pull Request 🛠️. Thank you 🙏!

ultralytics.models.yolo.model.YOLO

YOLO(model='yolo11n.pt', task=None, verbose=False)

Bases: Model

YOLO (You Only Look Once) object detection model.

This constructor initializes a YOLO model, automatically switching to specialized model types (YOLOWorld or YOLOE) based on the model filename.

Parameters:

Name	Type	Description	Default
`model`	`str \| Path`	Model name or path to model file, i.e. 'yolo11n.pt', 'yolov8n.yaml'.	`'yolo11n.pt'`
`task`	`str \| None`	YOLO task specification, i.e. 'detect', 'segment', 'classify', 'pose', 'obb'. Defaults to auto-detection based on model.	`None`
`verbose`	`bool`	Display model info on load.	`False`

Examples:

>>> from ultralytics import YOLO
>>> model = YOLO("yolov8n.pt")  # load a pretrained YOLOv8n detection model
>>> model = YOLO("yolov8n-seg.pt")  # load a pretrained YOLOv8n segmentation model
>>> model = YOLO("yolo11n.pt")  # load a pretrained YOLOv11n detection model

Source code in ultralytics/models/yolo/model.py

def __init__(self, model="yolo11n.pt", task=None, verbose=False):
    """
    Initialize a YOLO model.

    This constructor initializes a YOLO model, automatically switching to specialized model types
    (YOLOWorld or YOLOE) based on the model filename.

    Args:
        model (str | Path): Model name or path to model file, i.e. 'yolo11n.pt', 'yolov8n.yaml'.
        task (str | None): YOLO task specification, i.e. 'detect', 'segment', 'classify', 'pose', 'obb'.
            Defaults to auto-detection based on model.
        verbose (bool): Display model info on load.

    Examples:
        >>> from ultralytics import YOLO
        >>> model = YOLO("yolov8n.pt")  # load a pretrained YOLOv8n detection model
        >>> model = YOLO("yolov8n-seg.pt")  # load a pretrained YOLOv8n segmentation model
        >>> model = YOLO("yolo11n.pt")  # load a pretrained YOLOv11n detection model
    """
    path = Path(model)
    if "-world" in path.stem and path.suffix in {".pt", ".yaml", ".yml"}:  # if YOLOWorld PyTorch model
        new_instance = YOLOWorld(path, verbose=verbose)
        self.__class__ = type(new_instance)
        self.__dict__ = new_instance.__dict__
    elif "yoloe" in path.stem and path.suffix in {".pt", ".yaml", ".yml"}:  # if YOLOE PyTorch model
        new_instance = YOLOE(path, task=task, verbose=verbose)
        self.__class__ = type(new_instance)
        self.__dict__ = new_instance.__dict__
    else:
        # Continue with default YOLO initialization
        super().__init__(model=model, task=task, verbose=verbose)

task_map `property`

task_map

Map head to model, trainer, validator, and predictor classes.

ultralytics.models.yolo.model.YOLOWorld

YOLOWorld(model='yolov8s-world.pt', verbose=False)

Bases: Model

YOLO-World object detection model.

Loads a YOLOv8-World model for object detection. If no custom class names are provided, it assigns default COCO class names.

Parameters:

Name	Type	Description	Default
`model`	`str \| Path`	Path to the pre-trained model file. Supports .pt and .yaml formats.	`'yolov8s-world.pt'`
`verbose`	`bool`	If True, prints additional information during initialization.	`False`

Source code in ultralytics/models/yolo/model.py

def __init__(self, model="yolov8s-world.pt", verbose=False) -> None:
    """
    Initialize YOLOv8-World model with a pre-trained model file.

    Loads a YOLOv8-World model for object detection. If no custom class names are provided, it assigns default
    COCO class names.

    Args:
        model (str | Path): Path to the pre-trained model file. Supports *.pt and *.yaml formats.
        verbose (bool): If True, prints additional information during initialization.
    """
    super().__init__(model=model, task="detect", verbose=verbose)

    # Assign default COCO class names when there are no custom names
    if not hasattr(self.model, "names"):
        self.model.names = yaml_load(ROOT / "cfg/datasets/coco8.yaml").get("names")

task_map `property`

task_map

Map head to model, validator, and predictor classes.

set_classes

set_classes(classes)

Set the model's class names for detection.

Parameters:

Name	Type	Description	Default
`classes`	`list[str]`	A list of categories i.e. ["person"].	required

Source code in ultralytics/models/yolo/model.py

def set_classes(self, classes):
    """
    Set the model's class names for detection.

    Args:
        classes (list[str]): A list of categories i.e. ["person"].
    """
    self.model.set_classes(classes)
    # Remove background if it's given
    background = " "
    if background in classes:
        classes.remove(background)
    self.model.names = classes

    # Reset method class names
    if self.predictor:
        self.predictor.model.names = classes

ultralytics.models.yolo.model.YOLOE

YOLOE(model='yoloe-v8s-seg.pt', task=None, verbose=False)

Bases: Model

YOLOE object detection and segmentation model.

Parameters:

Name	Type	Description	Default
`model`	`str \| Path`	Path to the pre-trained model file. Supports .pt and .yaml formats.	`'yoloe-v8s-seg.pt'`
`task`	`str`	Task type for the model. Auto-detected if None.	`None`
`verbose`	`bool`	If True, prints additional information during initialization.	`False`

Source code in ultralytics/models/yolo/model.py

def __init__(self, model="yoloe-v8s-seg.pt", task=None, verbose=False) -> None:
    """
    Initialize YOLOE model with a pre-trained model file.

    Args:
        model (str | Path): Path to the pre-trained model file. Supports *.pt and *.yaml formats.
        task (str, optional): Task type for the model. Auto-detected if None.
        verbose (bool): If True, prints additional information during initialization.
    """
    super().__init__(model=model, task=task, verbose=verbose)

    # Assign default COCO class names when there are no custom names
    if not hasattr(self.model, "names"):
        self.model.names = yaml_load(ROOT / "cfg/datasets/coco8.yaml").get("names")

task_map `property`

task_map

Map head to model, validator, and predictor classes.

get_text_pe

get_text_pe(texts)

Get text positional embeddings for the given texts.

Source code in ultralytics/models/yolo/model.py

def get_text_pe(self, texts):
    """Get text positional embeddings for the given texts."""
    assert isinstance(self.model, YOLOEModel)
    return self.model.get_text_pe(texts)

get_visual_pe

get_visual_pe(img, visual)

Get visual positional embeddings for the given image and visual features.

This method extracts positional embeddings from visual features based on the input image. It requires that the model is an instance of YOLOEModel.

Parameters:

Name	Type	Description	Default
`img`	`Tensor`	Input image tensor.	required
`visual`	`Tensor`	Visual features extracted from the image.	required

Returns:

Type	Description
`Tensor`	Visual positional embeddings.

Examples:

>>> model = YOLOE("yoloe-v8s.pt")
>>> img = torch.rand(1, 3, 640, 640)
>>> visual_features = model.model.backbone(img)
>>> pe = model.get_visual_pe(img, visual_features)

Source code in ultralytics/models/yolo/model.py

def get_visual_pe(self, img, visual):
    """
    Get visual positional embeddings for the given image and visual features.

    This method extracts positional embeddings from visual features based on the input image. It requires
    that the model is an instance of YOLOEModel.

    Args:
        img (torch.Tensor): Input image tensor.
        visual (torch.Tensor): Visual features extracted from the image.

    Returns:
        (torch.Tensor): Visual positional embeddings.

    Examples:
        >>> model = YOLOE("yoloe-v8s.pt")
        >>> img = torch.rand(1, 3, 640, 640)
        >>> visual_features = model.model.backbone(img)
        >>> pe = model.get_visual_pe(img, visual_features)
    """
    assert isinstance(self.model, YOLOEModel)
    return self.model.get_visual_pe(img, visual)

get_vocab

get_vocab(names)

Get vocabulary for the given class names.

Source code in ultralytics/models/yolo/model.py

def get_vocab(self, names):
    """Get vocabulary for the given class names."""
    assert isinstance(self.model, YOLOEModel)
    return self.model.get_vocab(names)

predict

predict(
    source=None,
    stream: bool = False,
    visual_prompts: dict = {},
    refer_image=None,
    predictor=None,
    **kwargs
)

Run prediction on images, videos, directories, streams, etc.

Parameters:

Name	Type	Description	Default
`source`	`str \| int \| Image \| ndarray`	Source for prediction. Accepts image paths, directory paths, URL/YouTube streams, PIL images, numpy arrays, or webcam indices.	`None`
`stream`	`bool`	Whether to stream the prediction results. If True, results are yielded as a generator as they are computed.	`False`
`visual_prompts`	`dict`	Dictionary containing visual prompts for the model. Must include 'bboxes' and 'cls' keys when non-empty.	`{}`
`refer_image`	`str \| Image \| ndarray`	Reference image for visual prompts.	`None`
`predictor`	`callable`	Custom predictor function. If None, a predictor is automatically loaded based on the task.	`None`
`**kwargs`	`Any`	Additional keyword arguments passed to the predictor.	`{}`

Returns:

Type	Description
`List \| generator`	List of Results objects or generator of Results objects if stream=True.

Examples:

>>> model = YOLOE("yoloe-v8s-seg.pt")
>>> results = model.predict("path/to/image.jpg")
>>> # With visual prompts
>>> prompts = {"bboxes": [[10, 20, 100, 200]], "cls": ["person"]}
>>> results = model.predict("path/to/image.jpg", visual_prompts=prompts)

Source code in ultralytics/models/yolo/model.py

def predict(
    self,
    source=None,
    stream: bool = False,
    visual_prompts: dict = {},
    refer_image=None,
    predictor=None,
    **kwargs,
):
    """
    Run prediction on images, videos, directories, streams, etc.

    Args:
        source (str | int | PIL.Image | np.ndarray, optional): Source for prediction. Accepts image paths,
            directory paths, URL/YouTube streams, PIL images, numpy arrays, or webcam indices.
        stream (bool): Whether to stream the prediction results. If True, results are yielded as a
            generator as they are computed.
        visual_prompts (dict): Dictionary containing visual prompts for the model. Must include 'bboxes' and
            'cls' keys when non-empty.
        refer_image (str | PIL.Image | np.ndarray, optional): Reference image for visual prompts.
        predictor (callable, optional): Custom predictor function. If None, a predictor is automatically
            loaded based on the task.
        **kwargs (Any): Additional keyword arguments passed to the predictor.

    Returns:
        (List | generator): List of Results objects or generator of Results objects if stream=True.

    Examples:
        >>> model = YOLOE("yoloe-v8s-seg.pt")
        >>> results = model.predict("path/to/image.jpg")
        >>> # With visual prompts
        >>> prompts = {"bboxes": [[10, 20, 100, 200]], "cls": ["person"]}
        >>> results = model.predict("path/to/image.jpg", visual_prompts=prompts)
    """
    if len(visual_prompts):
        assert "bboxes" in visual_prompts and "cls" in visual_prompts, (
            f"Expected 'bboxes' and 'cls' in visual prompts, but got {visual_prompts.keys()}"
        )
        assert len(visual_prompts["bboxes"]) == len(visual_prompts["cls"]), (
            f"Expected equal number of bounding boxes and classes, but got {len(visual_prompts['bboxes'])} and "
            f"{len(visual_prompts['cls'])} respectively"
        )
    self.predictor = (predictor or self._smart_load("predictor"))(
        overrides={
            "task": self.model.task,
            "mode": "predict",
            "save": False,
            "verbose": refer_image is None,
            "batch": 1,
        },
        _callbacks=self.callbacks,
    )

    if len(visual_prompts):
        num_cls = (
            max(len(set(c)) for c in visual_prompts["cls"])
            if isinstance(source, list)  # means multiple images
            else len(set(visual_prompts["cls"]))
        )
        self.model.model[-1].nc = num_cls
        self.model.names = [f"object{i}" for i in range(num_cls)]
        self.predictor.set_prompts(visual_prompts.copy())

    self.predictor.setup_model(model=self.model)

    if refer_image is None and source is not None:
        dataset = load_inference_source(source)
        if dataset.mode in {"video", "stream"}:
            # NOTE: set the first frame as refer image for videos/streams inference
            refer_image = next(iter(dataset))[1][0]
    if refer_image is not None and len(visual_prompts):
        vpe = self.predictor.get_vpe(refer_image)
        self.model.set_classes(self.model.names, vpe)
        self.task = "segment" if isinstance(self.predictor, yolo.segment.SegmentationPredictor) else "detect"
        self.predictor = None  # reset predictor

    return super().predict(source, stream, **kwargs)

set_classes

set_classes(classes, embeddings)

Set the model's class names and embeddings for detection.

Parameters:

Name	Type	Description	Default
`classes`	`list[str]`	A list of categories i.e. ["person"].	required
`embeddings`	`Tensor`	Embeddings corresponding to the classes.	required

Source code in ultralytics/models/yolo/model.py

def set_classes(self, classes, embeddings):
    """
    Set the model's class names and embeddings for detection.

    Args:
        classes (list[str]): A list of categories i.e. ["person"].
        embeddings (torch.Tensor): Embeddings corresponding to the classes.
    """
    assert isinstance(self.model, YOLOEModel)
    self.model.set_classes(classes, embeddings)
    # Verify no background class is present
    assert " " not in classes
    self.model.names = classes

    # Reset method class names
    if self.predictor:
        self.predictor.model.names = classes

set_vocab

set_vocab(vocab, names)

Set vocabulary and class names for the YOLOE model.

This method configures the vocabulary and class names used by the model for text processing and classification tasks. The model must be an instance of YOLOEModel.

Parameters:

Name	Type	Description	Default
`vocab`	`list`	Vocabulary list containing tokens or words used by the model for text processing.	required
`names`	`list`	List of class names that the model can detect or classify.	required

Raises:

Type	Description
`AssertionError`	If the model is not an instance of YOLOEModel.

Examples:

>>> model = YOLOE("yoloe-v8s.pt")
>>> model.set_vocab(["person", "car", "dog"], ["person", "car", "dog"])

Source code in ultralytics/models/yolo/model.py

def set_vocab(self, vocab, names):
    """
    Set vocabulary and class names for the YOLOE model.

    This method configures the vocabulary and class names used by the model for text processing and
    classification tasks. The model must be an instance of YOLOEModel.

    Args:
        vocab (list): Vocabulary list containing tokens or words used by the model for text processing.
        names (list): List of class names that the model can detect or classify.

    Raises:
        AssertionError: If the model is not an instance of YOLOEModel.

    Examples:
        >>> model = YOLOE("yoloe-v8s.pt")
        >>> model.set_vocab(["person", "car", "dog"], ["person", "car", "dog"])
    """
    assert isinstance(self.model, YOLOEModel)
    self.model.set_vocab(vocab, names=names)

val

val(validator=None, load_vp=False, refer_data=None, **kwargs)

Validate the model using text or visual prompts.

Parameters:

Name	Type	Description	Default
`validator`	`callable`	A callable validator function. If None, a default validator is loaded.	`None`
`load_vp`	`bool`	Whether to load visual prompts. If False, text prompts are used.	`False`
`refer_data`	`str`	Path to the reference data for visual prompts.	`None`
`**kwargs`	`Any`	Additional keyword arguments to override default settings.	`{}`

Returns:

Type	Description
`dict`	Validation statistics containing metrics computed during validation.

Source code in ultralytics/models/yolo/model.py

def val(
    self,
    validator=None,
    load_vp=False,
    refer_data=None,
    **kwargs,
):
    """
    Validate the model using text or visual prompts.

    Args:
        validator (callable, optional): A callable validator function. If None, a default validator is loaded.
        load_vp (bool): Whether to load visual prompts. If False, text prompts are used.
        refer_data (str, optional): Path to the reference data for visual prompts.
        **kwargs (Any): Additional keyword arguments to override default settings.

    Returns:
        (dict): Validation statistics containing metrics computed during validation.
    """
    custom = {"rect": not load_vp}  # method defaults
    args = {**self.overrides, **custom, **kwargs, "mode": "val"}  # highest priority args on the right

    validator = (validator or self._smart_load("validator"))(args=args, _callbacks=self.callbacks)
    validator(model=self.model, load_vp=load_vp, refer_data=refer_data)
    self.metrics = validator.metrics
    return validator.metrics

📅 Created 1 year ago ✏️ Updated 13 days ago

Reference for ultralytics/models/yolo/model.py

ultralytics.models.yolo.model.YOLO

task_map property

ultralytics.models.yolo.model.YOLOWorld

task_map property

set_classes

ultralytics.models.yolo.model.YOLOE

task_map property

get_text_pe

get_visual_pe

get_vocab

predict

set_classes

set_vocab

val

Reference for `ultralytics/models/yolo/model.py`

task_map `property`

task_map `property`

task_map `property`