Source code for pytorchvideo.data.encoded_video

# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.

import io
import logging
import pathlib
from typing import BinaryIO, Dict, Optional

import torch
from iopath.common.file_io import g_pathmgr
from pytorchvideo.data.decoder import DecoderType

from .encoded_video_pyav import EncodedVideoPyAV
from .encoded_video_torchvision import EncodedVideoTorchVision
from .video import Video


logger = logging.getLogger(__name__)


[docs]def select_video_class(decoder: str) -> Video:
    """
    Select the class for accessing clips based on provided decoder string

    Args:
        decoder (str): Defines what type of decoder used to decode a video.
    """
    if DecoderType(decoder) == DecoderType.PYAV:
        video_cls = EncodedVideoPyAV
    elif DecoderType(decoder) == DecoderType.TORCHVISION:
        video_cls = EncodedVideoTorchVision
    else:
        raise NotImplementedError(f"Unknown decoder type {decoder}")
    return video_cls


[docs]class EncodedVideo(Video):
    """
    EncodedVideo is an abstraction for accessing clips from an encoded video.
    It supports selective decoding when header information is available.
    """

[docs]    @classmethod
    def from_path(
        cls, file_path: str, decode_audio: bool = True, decoder: str = "pyav"
    ):
        """
        Fetches the given video path using PathManager (allowing remote uris to be
        fetched) and constructs the EncodedVideo object.

        Args:
            file_path (str): a PathManager file-path.
        """
        # We read the file with PathManager so that we can read from remote uris.
        with g_pathmgr.open(file_path, "rb") as fh:
            video_file = io.BytesIO(fh.read())

        return cls(video_file, pathlib.Path(file_path).name, decode_audio, decoder)

[docs]    def __init__(
        self,
        file: BinaryIO,
        video_name: Optional[str] = None,
        decode_audio: bool = True,
        decoder: str = "pyav",
    ) -> None:
        """
        Args:
            file (BinaryIO): a file-like object (e.g. io.BytesIO or io.StringIO) that
                contains the encoded video.

            decoder (str): Defines what type of decoder used to decode a video.
        """
        video_cls = select_video_class(decoder)
        self.encoded_video = video_cls(file, video_name, decode_audio)

    @property
    def name(self) -> Optional[str]:
        """
        Returns:
            name: the name of the stored video if set.
        """
        return self.encoded_video.name

    @property
    def duration(self) -> float:
        """
        Returns:
            duration: the video's duration/end-time in seconds.
        """
        return self.encoded_video.duration

[docs]    def get_clip(
        self, start_sec: float, end_sec: float
    ) -> Dict[str, Optional[torch.Tensor]]:
        """
        Retrieves frames from the encoded video at the specified start and end times
        in seconds (the video always starts at 0 seconds).

        Args:
            start_sec (float): the clip start time in seconds
            end_sec (float): the clip end time in seconds
        Returns:
            clip_data:
                A dictionary mapping the entries at "video" and "audio" to a tensors.

                "video": A tensor of the clip's RGB frames with shape:
                (channel, time, height, width). The frames are of type torch.float32 and
                in the range [0 - 255].

                "audio": A tensor of the clip's audio samples with shape:
                (samples). The samples are of type torch.float32 and
                in the range [0 - 255].

            Returns None if no video or audio found within time range.

        """
        return self.encoded_video.get_clip(start_sec, end_sec)

[docs]    def close(self):
        """
        Closes the internal video container.
        """
        self.encoded_video.close()