Source code for pytorchvideo.data.labeled_video_paths
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from __future__ import annotations
import os
import pathlib
from typing import List, Optional, Tuple
from iopath.common.file_io import g_pathmgr
from torchvision.datasets.folder import make_dataset
[docs]class LabeledVideoPaths:
"""
LabeledVideoPaths contains pairs of video path and integer index label.
"""
[docs] @classmethod
def from_path(cls, data_path: str) -> LabeledVideoPaths:
"""
Factory function that creates a LabeledVideoPaths object depending on the path
type.
- If it is a directory path it uses the LabeledVideoPaths.from_directory function.
- If it's a file it uses the LabeledVideoPaths.from_csv file.
Args:
file_path (str): The path to the file to be read.
"""
if g_pathmgr.isfile(data_path):
return LabeledVideoPaths.from_csv(data_path)
elif g_pathmgr.isdir(data_path):
return LabeledVideoPaths.from_directory(data_path)
else:
raise FileNotFoundError(f"{data_path} not found.")
[docs] @classmethod
def from_csv(cls, file_path: str) -> LabeledVideoPaths:
"""
Factory function that creates a LabeledVideoPaths object by reading a file with the
following format:
<path> <integer_label>
...
<path> <integer_label>
Args:
file_path (str): The path to the file to be read.
"""
assert g_pathmgr.exists(file_path), f"{file_path} not found."
video_paths_and_label = []
with g_pathmgr.open(file_path, "r") as f:
for path_label in f.read().splitlines():
line_split = path_label.rsplit(None, 1)
# The video path file may not contain labels (e.g. for a test split). We
# assume this is the case if only 1 path is found and set the label to
# -1 if so.
if len(line_split) == 1:
file_path = line_split[0]
label = -1
else:
file_path, label = line_split
video_paths_and_label.append((file_path, int(label)))
assert (
len(video_paths_and_label) > 0
), f"Failed to load dataset from {file_path}."
return cls(video_paths_and_label)
[docs] @classmethod
def from_directory(cls, dir_path: str) -> LabeledVideoPaths:
"""
Factory function that creates a LabeledVideoPaths object by parsing the structure
of the given directory's subdirectories into the classification labels. It
expects the directory format to be the following:
dir_path/<class_name>/<video_name>.mp4
Classes are indexed from 0 to the number of classes, alphabetically.
E.g.
dir_path/class_x/xxx.ext
dir_path/class_x/xxy.ext
dir_path/class_x/xxz.ext
dir_path/class_y/123.ext
dir_path/class_y/nsdf3.ext
dir_path/class_y/asd932_.ext
Would produce two classes labeled 0 and 1 with 3 videos paths associated with each.
Args:
dir_path (str): Root directory to the video class directories .
"""
assert g_pathmgr.exists(dir_path), f"{dir_path} not found."
# Find all classes based on directory names. These classes are then sorted and indexed
# from 0 to the number of classes.
classes = sorted((f for f in pathlib.Path(dir_path).iterdir() if f.is_dir()))
class_to_idx = {classes[i]: i for i in range(len(classes))}
video_paths_and_label = make_dataset(
dir_path, class_to_idx, extensions=("mp4", "avi")
)
assert (
len(video_paths_and_label) > 0
), f"Failed to load dataset from {dir_path}."
return cls(video_paths_and_label)
[docs] def __init__(
self, paths_and_labels: List[Tuple[str, Optional[int]]], path_prefix=""
) -> None:
"""
Args:
paths_and_labels [(str, int)]: a list of tuples containing the video
path and integer label.
"""
self._paths_and_labels = paths_and_labels
self._path_prefix = path_prefix
def path_prefix(self, prefix):
self._path_prefix = prefix
path_prefix = property(None, path_prefix)
[docs] def __getitem__(self, index: int) -> Tuple[str, int]:
"""
Args:
index (int): the path and label index.
Returns:
The path and label tuple for the given index.
"""
path, label = self._paths_and_labels[index]
return (os.path.join(self._path_prefix, path), {"label": label})
[docs] def __len__(self) -> int:
"""
Returns:
The number of video paths and label pairs.
"""
return len(self._paths_and_labels)