Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 111 additions & 0 deletions av/codec/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,112 @@
_cinit_sentinel = cython.declare(object, object())


@cython.cfunc
def _with_suffix_sei_moved(packet: Packet, is_hevc: cython.bint) -> Packet:
"""Return a packet with suffix SEI NALUs moved to prefix position.

Suffix SEI NALUs (type 6 in H.264, type 40 in HEVC) appearing after VCL
(slice) NALUs are not attached to the decoded frame's side_data by FFmpeg.
Moving them to prefix position ensures they appear in frame.side_data.

Operates directly on the packet's C buffer — no Python bytes copy in the
common case (no suffix SEI found). Returns the same packet object unchanged
when no reordering is needed.
"""
buf: cython.pointer[uint8_t] = packet.ptr.data
n: cython.size_t = packet.ptr.size
i: cython.size_t
hdr_pos: cython.size_t
nalu_type: cython.int
first_vcl: cython.int = -1
has_suffix_sei: cython.bint = False

if n < 5 or buf == cython.NULL:
return packet

# Only process Annex B format (starts with a start code)
if not (
(buf[0] == 0 and buf[1] == 0 and buf[2] == 0 and buf[3] == 1)
or (buf[0] == 0 and buf[1] == 0 and buf[2] == 1)
):
return packet

# Collect NALU start positions from the raw C buffer (no Python bytes copy)
positions: list = []
i = 0
while i < n:
if i + 4 <= n and buf[i] == 0 and buf[i + 1] == 0 and buf[i + 2] == 0 and buf[i + 3] == 1:
positions.append((i, 4))
i += 4
elif i + 3 <= n and buf[i] == 0 and buf[i + 1] == 0 and buf[i + 2] == 1:
positions.append((i, 3))
i += 3
else:
i += 1

if len(positions) < 2:
return packet

# Build (nalu_type, start, end) tuples
nalus: list = []
j: cython.Py_ssize_t
for j in range(len(positions)):
pos, sc_len = positions[j]
hdr_pos = pos + sc_len
if hdr_pos >= n:
return packet
if is_hevc:
nalu_type = (buf[hdr_pos] >> 1) & 0x3F
else:
nalu_type = buf[hdr_pos] & 0x1F
end = positions[j + 1][0] if j + 1 < len(positions) else n
nalus.append((nalu_type, pos, end))

# Scan for first VCL NALU and any suffix SEI after it
sei_type: cython.int = 40 if is_hevc else 6
for j in range(len(nalus)):
ntype = nalus[j][0]
is_vcl = (ntype <= 31) if is_hevc else (1 <= ntype <= 5)
if is_vcl and first_vcl < 0:
first_vcl = j
elif first_vcl >= 0 and ntype == sei_type:
has_suffix_sei = True
break

if first_vcl < 0 or not has_suffix_sei:
return packet # Common case: no suffix SEI, no allocation

# Reorder: prefix NALUs + suffix SEI NALUs + remaining NALUs.
# Use a memoryview of the packet for zero-copy slicing.
mv = memoryview(packet)
suffix_sei_idxs: list = [
j for j in range(first_vcl + 1, len(nalus)) if nalus[j][0] == sei_type
]
suffix_sei_set = set(suffix_sei_idxs)

result = bytearray()
for _, pos, end in nalus[:first_vcl]:
result.extend(mv[pos:end])
for j in suffix_sei_idxs:
_, pos, end = nalus[j]
result.extend(mv[pos:end])
for j in range(first_vcl, len(nalus)):
if j not in suffix_sei_set:
_, pos, end = nalus[j]
result.extend(mv[pos:end])

new_packet = Packet(bytes(result))
new_packet.ptr.pts = packet.ptr.pts
new_packet.ptr.dts = packet.ptr.dts
new_packet.ptr.duration = packet.ptr.duration
new_packet.ptr.flags = packet.ptr.flags
new_packet.ptr.stream_index = packet.ptr.stream_index
new_packet.ptr.time_base = packet.ptr.time_base
new_packet.ptr.pos = packet.ptr.pos
new_packet._stream = packet._stream
return new_packet


@cython.cfunc
def wrap_codec_context(
c_ctx: cython.pointer[lib.AVCodecContext],
Expand Down Expand Up @@ -489,6 +595,11 @@ def decode(self, packet: Packet | None = None):

self.open(strict=False)

if packet is not None:
codec_name = self.codec.name
if codec_name == "h264" or codec_name == "hevc":
packet = _with_suffix_sei_moved(packet, codec_name == "hevc")

res: list = []
for frame in self._send_packet_and_recv(packet):
if isinstance(frame, Frame):
Expand Down
39 changes: 39 additions & 0 deletions tests/test_decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,45 @@ def test_side_data(self) -> None:
frame = next(container.decode(video=0))
assert frame.rotation == -90

def test_suffix_sei_in_frame_side_data(self) -> None:
"""Suffix SEI NALUs (after VCL) must appear in frame.side_data, closes #2160."""
# Encode a single frame to get a valid Annex B H.264 access unit
enc = av.CodecContext.create("libx264", "w")
enc.width = 32
enc.height = 32
enc.pix_fmt = "yuv420p"
enc.options = {"tune": "zerolatency"}
enc.open()
frame = av.VideoFrame(32, 32, "yuv420p")
packets = [bytes(pkt) for pkt in enc.encode(frame)]
packets += [bytes(pkt) for pkt in enc.encode(None)]
assert packets, "No packets encoded"
idr_data = next(p for p in packets if p[:4] == b"\x00\x00\x00\x01")

# Build a suffix SEI NALU (unregistered user data, nal_unit_type=6)
uuid = b"\xde\xad\xbe\xef\xde\xad\xbe\xef\xde\xad\xbe\xef\xde\xad\xbe\xef"
payload = uuid + b"suffix_sei_test"
sei_msg = bytes([5, len(payload)]) + payload
sei_nalu = bytes([0x06]) + sei_msg + bytes([0x80])

# Append suffix SEI after slice NALUs (simulates suffix SEI position)
full_au = idr_data + b"\x00\x00\x01" + sei_nalu

dec = av.CodecContext.create("h264", "r")
dec.options["export_side_data"] = "all"
dec.open()

frames = dec.decode(av.packet.Packet(full_au))
assert frames, "No frames decoded"

found = any(
bytes(sd)[:16] == uuid
for frame in frames
for sd in frame.side_data
if sd.type.name == "SEI_UNREGISTERED"
)
assert found, "Suffix SEI NALU was not found in frame.side_data"

def test_hardware_decode(self) -> None:
hwdevices_available = av.codec.hwaccel.hwdevices_available()
if "HWACCEL_DEVICE_TYPE" not in os.environ:
Expand Down
Loading