This results in completely wrong color values when the source video is bt709 and user expect same color space after pixel fmt conversion.
where it skip the lib.sws_setColorspaceDetails completely when color range and color space is the same, which is the wrong way to use sws_scale.
One should always config it.
import warnings
import av
import numpy as np
import subprocess
from av.video.reformatter import ColorRange, Colorspace, Interpolation
def compare_frame_to_ndarray(raw_ndarray, av_frame):
"""
Compare a raw ndarray (YUV444p or RGB24) to a PyAV VideoFrame pixel by pixel.
Args:
raw_ndarray (np.ndarray):
- YUV444p: shape (3, H, W)
- RGB24: shape (H, W, 3)
av_frame (av.video.frame.VideoFrame):
VideoFrame in matching format: 'yuv444p' or 'rgb24'
Returns:
dict: summary with 'mean_diff', 'max_diff', 'num_mismatches', 'total_pixels'
"""
fmt = av_frame.format.name
H, W = raw_ndarray.shape[-2:] if fmt.startswith("yuv") else raw_ndarray.shape[:2]
total_diff = 0
max_diff = 0
num_mismatch = 0
if fmt == "yuv444p":
if raw_ndarray.shape[0] != 3:
raise ValueError("YUV ndarray must have shape (3, H, W)")
# compare plane by plane
for i, plane in enumerate(av_frame.planes):
stride = plane.line_size
buf = memoryview(plane)
plane_array = np.frombuffer(buf, dtype=np.uint8).reshape(H, stride)[:, :W]
diff = np.abs(plane_array.astype(int) - raw_ndarray[i].astype(int))
total_diff += diff.sum()
max_diff = max(max_diff, diff.max())
num_mismatch += np.count_nonzero(diff)
elif fmt == "rgb24":
if raw_ndarray.shape != (H, W, 3):
raise ValueError("RGB ndarray must have shape (H, W, 3)")
# RGB24 usually has one plane
plane = av_frame.planes[0]
stride = plane.line_size
buf = memoryview(plane)
plane_array = np.frombuffer(buf, dtype=np.uint8).reshape(H, stride)[:, :W*3]
plane_array = plane_array.reshape(H, W, 3)
diff = np.abs(plane_array.astype(int) - raw_ndarray.astype(int))
total_diff += diff.sum()
max_diff = diff.max()
num_mismatch += np.count_nonzero(diff)
else:
raise ValueError(f"Unsupported PyAV frame format: {fmt}")
total_pixels = raw_ndarray.size
mean_diff = total_diff / total_pixels
return {
"mean_diff": mean_diff,
"max_diff": int(max_diff),
"num_mismatches": int(num_mismatch),
"total_pixels": int(total_pixels)
}
import numpy as np
def load_yuv_raw(file_path, width, height, format="yuv444p"):
"""
Load a raw YUV frame from disk and reshape it into a NumPy array.
Args:
file_path (str): path to the raw YUV file
width (int): frame width
height (int): frame height
format (str): pixel format, currently only supports 'yuv444p'
Returns:
np.ndarray: shape (3, H, W), dtype=np.uint8, planes in order (Y, U, V)
"""
if format != "yuv444p":
raise ValueError("Currently only 'yuv444p' is supported")
# Load raw bytes
yuv = np.fromfile(file_path, dtype=np.uint8)
# Calculate expected size
expected_size = 3 * width * height
if yuv.size != expected_size:
raise ValueError(f"Expected {expected_size} bytes, got {yuv.size}")
# Split into planes and reshape
Y = yuv[0:width*height].reshape(height, width)
U = yuv[width*height:2*width*height].reshape(height, width)
V = yuv[2*width*height:3*width*height].reshape(height, width)
# Stack into (3, H, W)
return np.stack([Y, U, V], axis=0)
def numpy_to_av_frame_rgb(
rgb_np,
# RGB do not have the space and range concept
# colorspace=Colorspace.DEFAULT,
# color_range=ColorRange.JPEG
):
"""
Convert a NumPy RGB array to a PyAV VideoFrame (rgb24).
Args:
rgb_np (np.ndarray): shape (H, W, 3), dtype=uint8
colorspace (Colorspace, optional): e.g., Colorspace.ITU709
color_range (ColorRange, optional): e.g., ColorRange.JPEG (FULL)
Returns:
av.video.frame.VideoFrame: rgb24 frame filled with the array data
"""
H, W, C = rgb_np.shape
if C != 3:
raise ValueError("Input NumPy array must have 3 channels (H, W, 3)")
# Create empty VideoFrame
frame_rgb = av.VideoFrame(width=W, height=H, format="rgb24")
# Set colorspace and range
# frame_rgb.colorspace = colorspace
# frame_rgb.color_range = color_range
# Fill plane data
plane = frame_rgb.planes[0]
buf = memoryview(plane)
stride = plane.line_size
for y in range(H):
start = y * stride
end = start + W * 3
buf[start:end] = rgb_np[y].tobytes()
return frame_rgb
def check_color_metrics(frame, range: ColorRange, space: Colorspace):
if frame.color_range != range:
warnings.warn(f"{frame.color_range=} should be {range}")
if frame.colorspace != space:
warnings.warn(f"{frame.colorspace=} should be {space}")
W, H = 256, 256
# 1. Create RGB gradient (uint8, packed RGB)
x = np.linspace(0, 255, W, dtype=np.uint8)
y = np.linspace(0, 255, H, dtype=np.uint8)
R = np.tile(x, (H, 1))
G = np.tile(y[:, None], (1, W))
B = 255 - R
rgb_np = np.stack([R, G, B], axis=2) # (H, W, 3)
rgb_np.tofile("/tmp/gradient.rgb")
subprocess.check_call([
"ffmpeg",
"-hide_banner",
"-loglevel", "error",
"-y", # overwrite output
"-f", "rawvideo",
"-pix_fmt", "rgb24",
"-s", f"{W}x{H}",
"-i", "/tmp/gradient.rgb",
"-vf", "scale=sws_flags=neighbor:out_color_matrix=bt709:out_range=pc,format=yuv444p",
"-f", "rawvideo",
"/tmp/gradient.yuv"
])
yuv_planes = load_yuv_raw("/tmp/gradient.yuv", W, H)
print(f"{yuv_planes.shape=}")
frame_rgb = numpy_to_av_frame_rgb(
rgb_np=rgb_np
)
print(">>>rgb raw vs rgb frame")
print(compare_frame_to_ndarray(rgb_np, frame_rgb))
print("<<<<<<<<<<")
frame_yuv = frame_rgb.reformat(
format="yuv444p",
interpolation=Interpolation.POINT,
dst_colorspace=Colorspace.ITU709,
dst_color_range=ColorRange.JPEG
)
check_color_metrics(
frame_yuv,
range=ColorRange.JPEG,
space=Colorspace.ITU709
)
print(">>>yuv raw vs converted yuv frame")
print(compare_frame_to_ndarray(yuv_planes, frame_yuv))
print("<<<<<<<<<<")
# fix the color metrics
frame_yuv.color_range = ColorRange.JPEG
frame_yuv.colorspace = Colorspace.ITU709
frame_rgb_rt = frame_yuv.reformat(
format="rgb24"
)
print(">>>rgb raw vs yuv->rgb converted frame")
print(compare_frame_to_ndarray(rgb_np, frame_rgb_rt))
print("<<<<<<<<<<")
frame_rgb_rt2 = frame_yuv.reformat(
format="rgb24",
interpolation=Interpolation.POINT,
src_colorspace=Colorspace.ITU709,
dst_colorspace=Colorspace.DEFAULT,
src_color_range=ColorRange.JPEG
)
print(">>>rgb raw vs yuv->rgb converted frame")
print(compare_frame_to_ndarray(rgb_np, frame_rgb_rt2))
print("<<<<<<<<<<")
yuv_planes.shape=(3, 256, 256)
>>>rgb raw vs rgb frame
{'mean_diff': np.float64(0.0), 'max_diff': 0, 'num_mismatches': 0, 'total_pixels': 196608}
<<<<<<<<<<
>>>yuv raw vs converted yuv frame
{'mean_diff': np.float64(0.3345896402994792), 'max_diff': 1, 'num_mismatches': 65783, 'total_pixels': 196608}
<<<<<<<<<<
>>>rgb raw vs yuv->rgb converted frame
{'mean_diff': np.float64(9.679051717122396), 'max_diff': 39, 'num_mismatches': 188812, 'total_pixels': 196608}
<<<<<<<<<<
>>>rgb raw vs yuv->rgb converted frame
{'mean_diff': np.float64(0.6810099283854166), 'max_diff': 2, 'num_mismatches': 114061, 'total_pixels': 196608}
<<<<<<<<<<
/tmp/ipykernel_1203989/4113189771.py:146: UserWarning: frame.color_range=0 should be 2
warnings.warn(f"{frame.color_range=} should be {range}")
/tmp/ipykernel_1203989/4113189771.py:148: UserWarning: frame.colorspace=2 should be 1
warnings.warn(f"{frame.colorspace=} should be {space}")
The workaround only exists for rgb<->yuv since rgb does not care about the color matrix and color range.
This does not work for yuv->yuv because usually we want exactly same range and colorspace, then the setColorspaceDetails is skipped results in using default context
wrong behavior
when convert from yuv to rgb or rgb to yuv, or yuv to yuv:
PyAV/av/video/reformatter.py
Line 119 in 5f0e312
This results in completely wrong color values when the source video is bt709 and user expect same color space after pixel fmt conversion.
The bug is on
PyAV/av/video/reformatter.py
Line 196 in 5f0e312
where it skip the lib.sws_setColorspaceDetails completely when color range and color space is the same, which is the wrong way to use sws_scale.
One should always config it.
The bug is apparently introduced by 72621f2 commit
reproduce
run following code
the output
workaround
The workaround only exists for rgb<->yuv since rgb does not care about the color matrix and color range.
This does not work for yuv->yuv because usually we want exactly same range and colorspace, then the setColorspaceDetails is skipped results in using default context