diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 5d4507e..7c07d09 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -187,6 +187,7 @@ jobs: artifacts/srtc_subscribe-macos/srtc_subscribe-macos.tar.gz artifacts/srtc_publish-windows/srtc_publish-windows.zip artifacts/srtc_subscribe-windows/srtc_subscribe-windows.zip + artifacts/video-files/sintel-av1.webm artifacts/video-files/sintel-vp8.webm artifacts/video-files/sintel.h264 artifacts/video-files/sintel_with_slices.h264 diff --git a/CMakeLists.txt b/CMakeLists.txt index 2c98770..f3516d3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,16 +33,20 @@ endif () add_library(srtc # Headers + include/srtc/bit_reader.h include/srtc/byte_buffer.h include/srtc/depacketizer.h + include/srtc/depacketizer_av1.h include/srtc/depacketizer_h264.h include/srtc/depacketizer_h265.h include/srtc/depacketizer_opus.h + include/srtc/depacketizer_video.h include/srtc/depacketizer_vp8.h include/srtc/error.h include/srtc/event_loop.h include/srtc/extension_map.h include/srtc/extended_value.h + include/srtc/codec_av1.h include/srtc/codec_h264.h include/srtc/codec_h265.h include/srtc/ice_agent.h @@ -99,17 +103,21 @@ add_library(srtc include/srtc/x509_certificate.h include/srtc/x509_hash.h # Sources + src/bit_reader.cpp src/byte_buffer.cpp + src/codec_av1.cpp + src/codec_h264.cpp + src/codec_h265.cpp src/depacketizer.cpp + src/depacketizer_av1.cpp src/depacketizer_h264.cpp src/depacketizer_h265.cpp src/depacketizer_opus.cpp + src/depacketizer_video.cpp src/depacketizer_vp8.cpp src/error.cpp src/extension_map.cpp src/extended_value.cpp - src/codec_h264.cpp - src/codec_h265.cpp src/ice_agent.cpp src/jitter_buffer.cpp src/logging.cpp @@ -160,8 +168,6 @@ add_library(srtc src/util.cpp src/x509_certificate.cpp src/x509_hash.cpp - include/srtc/codec_av1.h - src/codec_av1.cpp ) if (APPLE) @@ -437,12 +443,16 @@ if (NOT CMAKE_CROSSCOMPILING AND SRTC_BUILD_TOOLS) tools/http_whip_whep.h tools/media_writer.h tools/media_writer.cpp + tools/media_writer_av1.h + tools/media_writer_av1.cpp tools/media_writer_h26x.h tools/media_writer_h26x.cpp tools/media_writer_ogg.h tools/media_writer_ogg.cpp tools/media_writer_vp8.h tools/media_writer_vp8.cpp + tools/media_writer_webm.h + tools/media_writer_webm.cpp ) add_subdirectory(tools/libogg) diff --git a/README.md b/README.md index b5be91f..eb2f982 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ This is srtc, a "simple" WebRTC library (publish side is done and working quite - Depends on OpenSSL (or BoringSSL) only, nothing else. - Portable code in "conservative" C++: language level is C++ 17, and no exceptions or RTTI. - Only one worker thread per PeerConnection. -- Video codecs: VP8, H264 (any profile id), H265. AV1 is coming. +- Video codecs: VP8, H264 (any profile id), H265, AV1. - Audo codec: Opus. - SDP offer generation and SDP response parsing. - ICE / STUN negotiation, DTLS negotiation, SRTP and SRTCP. @@ -45,6 +45,9 @@ Has a command line tool to subscribe to audio and/or video, which can save media Media encoding / decoding and presentation are deliberately out of scope of this library. For publishing, the application needs to provide encoded media samples. For subscribing, the application receives encoded media samples which it needs to decode and present. +The srtc library does handle packetization of media frames into RTP packets when publishing and the reconstruction of media +frames from RTP packets when subscribing (a jitter buffer). + The API is deliberately not compatible with Google's, but the concepts are similar. The Google WebRTC library is inteded for browsers, and therefore its API has to match the API defined for JavaScript and cannot be changed. I decided that it's not necessary to follow the JavaScript API. @@ -167,6 +170,20 @@ To send video to Pion, run the publish sample like this: ./build/srtc_publish[.exe] -f sintel.h265 ``` +#### Using an AV1 input file + +First please run the Pion WebRTC server like this to use AV1 (by default it uses H264): + +```bash +./run.sh -codec av1 +``` + +To send video to Pion, run the publish sample like this: + +```bash +./build/srtc_publish[.exe] -f sintel-av1.webm +``` + ### A command line tool for subscribing ```bash @@ -229,6 +246,24 @@ And then subscribe like this: ./build/srtc_subscribe[.exe] --ov output.h265 ``` +#### Running in AV1 mode + +Run the Pion server like this, just like for publishing, and use the web page to publish media. + +```bash +./run.sh -codec av1 +``` + +And then subscribe like this: + +```bash +./build/srtc_subscribe[.exe] --ov output.webm +``` + +The resulting webm file will not contain any audio, just video - if you'd like to capture audio as well, +please add `--oa output.ogg`. + + ### An Android demo / sample There is an Android demo: diff --git a/convert_with_slices_h265.sh b/convert_with_slices_h265.sh index 38f42d2..eea3737 100755 --- a/convert_with_slices_h265.sh +++ b/convert_with_slices_h265.sh @@ -2,7 +2,7 @@ ffmpeg -i sintel_trailer-720p.mp4 \ -c:v libx265 \ - -x265-params "slices=4" \ + -x265-params "slices=2" \ -crf 23 \ -c:a copy \ sintel_with_slices.h265 diff --git a/include/srtc/bit_reader.h b/include/srtc/bit_reader.h new file mode 100644 index 0000000..c63348c --- /dev/null +++ b/include/srtc/bit_reader.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include + +namespace srtc +{ + +class BitReader +{ +public: + BitReader(const uint8_t* buffer, size_t size) + : data(buffer) + , dataSize(size) + , bitPos(0) + { + } + + uint32_t readBit(); + uint32_t readBits(size_t n); + uint32_t readUnsignedExpGolomb(); + + const uint8_t* const data; + const size_t dataSize; + size_t bitPos; +}; + + +} \ No newline at end of file diff --git a/include/srtc/byte_buffer.h b/include/srtc/byte_buffer.h index 104cfbb..0efef9d 100644 --- a/include/srtc/byte_buffer.h +++ b/include/srtc/byte_buffer.h @@ -91,6 +91,7 @@ class ByteReader [[nodiscard]] uint8_t readU8(); [[nodiscard]] uint16_t readU16(); [[nodiscard]] uint32_t readU32(); + [[nodiscard]] uint32_t readLEB128(); [[nodiscard]] ByteBuffer readByteBuffer(size_t size); diff --git a/include/srtc/codec_h264.h b/include/srtc/codec_h264.h index 82a9305..cf6e883 100644 --- a/include/srtc/codec_h264.h +++ b/include/srtc/codec_h264.h @@ -55,27 +55,10 @@ class NaluParser ////////// bool isParameterNalu(uint8_t naluType); +bool isKeyFrameNalu(uint8_t naluType); +bool isFrameStart(const uint8_t* nalu, size_t size); -////////// - -class BitReader -{ -private: - const uint8_t* const data; - const size_t dataSize; - size_t bitPos; - -public: - BitReader(const uint8_t* buffer, size_t size) - : data(buffer) - , dataSize(size) - , bitPos(0) - { - } - - uint32_t readBit(); - uint32_t readBits(size_t n); - uint32_t readUnsignedExpGolomb(); -}; +bool isSliceNalu(uint8_t naluType); +bool isSliceFrameStart(const uint8_t* data, size_t size); } // namespace srtc::h264 diff --git a/include/srtc/codec_h265.h b/include/srtc/codec_h265.h index 8a3b1df..4c72931 100644 --- a/include/srtc/codec_h265.h +++ b/include/srtc/codec_h265.h @@ -55,7 +55,10 @@ class NaluParser ////////// bool isParameterNalu(uint8_t naluType); -bool isKeyFrameNalu(uint8_t nalu_type); -bool isFrameStart(const uint8_t* frame, size_t size); +bool isKeyFrameNalu(uint8_t naluType); +bool isFrameStart(const uint8_t* nalu, size_t size); + +bool isSliceNalu(uint8_t naluType); +bool isSliceFrameStart(const uint8_t* data, size_t size); } // namespace srtc::h265 diff --git a/include/srtc/depacketizer.h b/include/srtc/depacketizer.h index 1044897..60d33f3 100644 --- a/include/srtc/depacketizer.h +++ b/include/srtc/depacketizer.h @@ -24,7 +24,6 @@ class Depacketizer virtual void reset() = 0; - virtual void extract(std::vector& out, const JitterBufferItem* packet) = 0; virtual void extract(std::vector& out, const std::vector& packetList) = 0; static std::pair, Error> make(const std::shared_ptr& track); diff --git a/include/srtc/depacketizer_av1.h b/include/srtc/depacketizer_av1.h new file mode 100644 index 0000000..607cb9a --- /dev/null +++ b/include/srtc/depacketizer_av1.h @@ -0,0 +1,27 @@ +#pragma once + +#include "srtc/depacketizer_video.h" + +namespace srtc +{ + +class DepacketizerAV1 final : public DepacketizerVideo +{ +public: + explicit DepacketizerAV1(const std::shared_ptr& track); + ~DepacketizerAV1() override; + + void reset() override; + + void extract(std::vector& out, const std::vector& packetList) override; + +protected: + [[nodiscard]] bool isFrameStart(const ByteBuffer& payload) const override; + +private: + bool mSeenNewSequence; + + void extractImpl(std::vector& out, const JitterBufferItem* packet, ByteBuffer&& frame); +}; + +} // namespace srtc \ No newline at end of file diff --git a/include/srtc/depacketizer_h264.h b/include/srtc/depacketizer_h264.h index 1690a26..c20ce6a 100644 --- a/include/srtc/depacketizer_h264.h +++ b/include/srtc/depacketizer_h264.h @@ -1,30 +1,30 @@ #pragma once #include "srtc/byte_buffer.h" -#include "srtc/depacketizer.h" +#include "srtc/depacketizer_video.h" namespace srtc { -class DepacketizerH264 final : public Depacketizer +class DepacketizerH264 final : public DepacketizerVideo { public: explicit DepacketizerH264(const std::shared_ptr& track); ~DepacketizerH264() override; - [[nodiscard]] PacketKind getPacketKind(const ByteBuffer& payload, bool marker) const override; - void reset() override; - void extract(std::vector& out, const JitterBufferItem* packet) override; void extract(std::vector& out, const std::vector& packetList) override; +protected: + [[nodiscard]] bool isFrameStart(const ByteBuffer& payload) const override; + private: uint8_t mHaveBits; ByteBuffer mFrameBuffer; uint64_t mLastRtpTimestamp; - void extractImpl(std::vector& out, const JitterBufferItem* packet, ByteBuffer&& frame); + void extractImpl(std::vector& out, const JitterBufferItem* packet, ByteBuffer&& nalu); }; } // namespace srtc \ No newline at end of file diff --git a/include/srtc/depacketizer_h265.h b/include/srtc/depacketizer_h265.h index 89d48ec..941f89c 100644 --- a/include/srtc/depacketizer_h265.h +++ b/include/srtc/depacketizer_h265.h @@ -1,30 +1,30 @@ #pragma once #include "srtc/byte_buffer.h" -#include "srtc/depacketizer.h" +#include "srtc/depacketizer_video.h" namespace srtc { -class DepacketizerH265 final : public Depacketizer +class DepacketizerH265 final : public DepacketizerVideo { public: explicit DepacketizerH265(const std::shared_ptr& track); ~DepacketizerH265() override; - [[nodiscard]] PacketKind getPacketKind(const ByteBuffer& payload, bool marker) const override; - void reset() override; - void extract(std::vector& out, const JitterBufferItem* packet) override; void extract(std::vector& out, const std::vector& packetList) override; +protected: + [[nodiscard]] bool isFrameStart(const ByteBuffer& payload) const override; + private: uint8_t mHaveBits; ByteBuffer mFrameBuffer; uint64_t mLastRtpTimestamp; - void extractImpl(std::vector& out, const JitterBufferItem* packet, ByteBuffer&& frame); + void extractImpl(std::vector& out, const JitterBufferItem* packet, ByteBuffer&& nalu); }; } // namespace srtc \ No newline at end of file diff --git a/include/srtc/depacketizer_opus.h b/include/srtc/depacketizer_opus.h index 8ec4acf..143a951 100644 --- a/include/srtc/depacketizer_opus.h +++ b/include/srtc/depacketizer_opus.h @@ -15,7 +15,6 @@ class DepacketizerOpus final : public Depacketizer void reset() override; - void extract(std::vector& out, const JitterBufferItem* packet) override; void extract(std::vector& out, const std::vector& packetList) override; }; diff --git a/include/srtc/depacketizer_video.h b/include/srtc/depacketizer_video.h new file mode 100644 index 0000000..c14ff0f --- /dev/null +++ b/include/srtc/depacketizer_video.h @@ -0,0 +1,20 @@ +#pragma once + +#include "srtc/byte_buffer.h" +#include "srtc/depacketizer.h" + +namespace srtc +{ + +class DepacketizerVideo : public Depacketizer +{ +public: + explicit DepacketizerVideo(const std::shared_ptr& track); + ~DepacketizerVideo() override; + + [[nodiscard]] PacketKind getPacketKind(const ByteBuffer& payload, bool marker) const final; + +protected: + [[nodiscard]] virtual bool isFrameStart(const ByteBuffer& payload) const = 0; +}; +} // namespace srtc \ No newline at end of file diff --git a/include/srtc/depacketizer_vp8.h b/include/srtc/depacketizer_vp8.h index 649fb81..c2af12b 100644 --- a/include/srtc/depacketizer_vp8.h +++ b/include/srtc/depacketizer_vp8.h @@ -1,23 +1,23 @@ #pragma once -#include "srtc/depacketizer.h" +#include "srtc/depacketizer_video.h" namespace srtc { -class DepacketizerVP8 final : public Depacketizer +class DepacketizerVP8 final : public DepacketizerVideo { public: explicit DepacketizerVP8(const std::shared_ptr& track); ~DepacketizerVP8() override; - [[nodiscard]] PacketKind getPacketKind(const ByteBuffer& payload, bool marker) const override; - void reset() override; - void extract(std::vector& out, const JitterBufferItem* packet) override; void extract(std::vector& out, const std::vector& packetList) override; +protected: + [[nodiscard]] bool isFrameStart(const ByteBuffer& payload) const override; + private: bool mSeenKeyFrame; diff --git a/include/srtc/jitter_buffer.h b/include/srtc/jitter_buffer.h index 6591521..1d7d3bc 100644 --- a/include/srtc/jitter_buffer.h +++ b/include/srtc/jitter_buffer.h @@ -53,7 +53,7 @@ class JitterBuffer const std::chrono::steady_clock::time_point& when_nack_abandon, uint64_t seq_ext); - void extractBufferList(std::vector& out, uint64_t start, uint64_t max); + void fillItemList(std::vector& out, uint64_t start, uint64_t max); void deleteItemList(uint64_t start, uint64_t max); void appendToResult(std::vector>& result, JitterBufferItem* item, @@ -85,8 +85,8 @@ class JitterBuffer std::chrono::steady_clock::time_point mBaseTime; uint64_t mBaseRtpTimestamp; + std::vector mTempPacketList; std::vector mTempFrameList; - std::vector mTempBufferList; std::optional mLastFrameTimeStamp; }; diff --git a/include/srtc/packetizer_h264.h b/include/srtc/packetizer_h264.h index 4f071fb..1f72982 100644 --- a/include/srtc/packetizer_h264.h +++ b/include/srtc/packetizer_h264.h @@ -21,8 +21,8 @@ class PacketizerH264 final : public PacketizerVideo const ByteBuffer& frame) override; private: - srtc::ByteBuffer mSPS; // Without Annex B header - srtc::ByteBuffer mPPS; + ByteBuffer mSPS; // Without Annex B header + ByteBuffer mPPS; }; } // namespace srtc diff --git a/include/srtc/packetizer_h265.h b/include/srtc/packetizer_h265.h index 66f3977..7428042 100644 --- a/include/srtc/packetizer_h265.h +++ b/include/srtc/packetizer_h265.h @@ -21,9 +21,9 @@ class PacketizerH265 final : public PacketizerVideo const ByteBuffer& frame) override; private: - srtc::ByteBuffer mVPS; // Without Annex B header - srtc::ByteBuffer mSPS; - srtc::ByteBuffer mPPS; + ByteBuffer mVPS; // Without Annex B header + ByteBuffer mSPS; + ByteBuffer mPPS; }; } // namespace srtc diff --git a/sintel_with_slices.h264 b/sintel_with_slices.h264 index 0202dbd..9992272 100644 Binary files a/sintel_with_slices.h264 and b/sintel_with_slices.h264 differ diff --git a/sintel_with_slices.h265 b/sintel_with_slices.h265 index 488d985..62ae1f5 100644 Binary files a/sintel_with_slices.h265 and b/sintel_with_slices.h265 differ diff --git a/src/bit_reader.cpp b/src/bit_reader.cpp new file mode 100644 index 0000000..872ecf1 --- /dev/null +++ b/src/bit_reader.cpp @@ -0,0 +1,43 @@ +#include "srtc/bit_reader.h" + +namespace srtc +{ + +uint32_t BitReader::readBit() +{ + if ((bitPos >> 3) >= dataSize) + return 0; + + uint8_t byte = data[bitPos >> 3]; + uint32_t bit = (byte >> (7 - (bitPos & 7))) & 1; + bitPos++; + return bit; +} + +uint32_t BitReader::readBits(size_t n) +{ + uint32_t value = 0; + for (size_t i = 0; i < n; i++) { + value = (value << 1) | readBit(); + } + return value; +} + +uint32_t BitReader::readUnsignedExpGolomb() +{ + // Count leading zeros + int leadingZeros = 0; + while (readBit() == 0 && leadingZeros < 32) { + leadingZeros++; + } + + if (leadingZeros == 0) { + return 0; + } + + // Read remaining bits + uint32_t remainingBits = readBits(leadingZeros); + return (1 << leadingZeros) - 1 + remainingBits; +} + +} // namespace srtc diff --git a/src/byte_buffer.cpp b/src/byte_buffer.cpp index 2040630..92f9bbf 100644 --- a/src/byte_buffer.cpp +++ b/src/byte_buffer.cpp @@ -342,6 +342,29 @@ uint32_t ByteReader::readU32() return res; } +uint32_t ByteReader::readLEB128() +{ + uint32_t result = 0; + int shift = 0; + + while (mPos < mLen) { + const uint8_t byte = mBuf[mPos++]; + + result |= (static_cast(byte & 0x7F) << shift); + + if ((byte & 0x80) == 0) { + break; + } + + shift += 7; + if (shift >= 32) { + break; + } + } + + return result; +} + ByteBuffer ByteReader::readByteBuffer(size_t size) { assert(mPos + size <= mLen); diff --git a/src/codec_av1.cpp b/src/codec_av1.cpp index 8c90594..78cb2cf 100644 --- a/src/codec_av1.cpp +++ b/src/codec_av1.cpp @@ -179,3 +179,4 @@ bool isKeyFrameObu(uint8_t obuType, const uint8_t* data, size_t size) } } // namespace srtc::av1 + diff --git a/src/codec_h264.cpp b/src/codec_h264.cpp index 2a6f224..ecf365a 100644 --- a/src/codec_h264.cpp +++ b/src/codec_h264.cpp @@ -1,4 +1,5 @@ #include "srtc/codec_h264.h" +#include "srtc/bit_reader.h" namespace { @@ -109,42 +110,38 @@ bool isParameterNalu(uint8_t naluType) return naluType == NaluType::SPS || naluType == NaluType::PPS; } -////////// +bool isKeyFrameNalu(uint8_t naluType) +{ + return naluType == NaluType::KeyFrame; +} -uint32_t BitReader::readBit() +bool isFrameStart(const uint8_t* nalu, size_t size) { - if ((bitPos >> 3) >= dataSize) - return 0; + if (size > 0) { + const auto naluType = nalu[0] & 0x1F; + if (naluType == NaluType::KeyFrame || naluType == NaluType::NonKeyFrame) { + if (size > 1) { + BitReader reader(nalu + 1, size - 1); + return reader.readUnsignedExpGolomb() == 0; + } + } + } - uint8_t byte = data[bitPos >> 3]; - uint32_t bit = (byte >> (7 - (bitPos & 7))) & 1; - bitPos++; - return bit; + return false; } -uint32_t BitReader::readBits(size_t n) +bool isSliceNalu(uint8_t naluType) { - uint32_t value = 0; - for (size_t i = 0; i < n; i++) { - value = (value << 1) | readBit(); - } - return value; + return naluType == NaluType::NonKeyFrame || naluType == NaluType::KeyFrame; } -uint32_t BitReader::readUnsignedExpGolomb() +bool isSliceFrameStart(const uint8_t* data, size_t size) { - // Count leading zeros - int leadingZeros = 0; - while (readBit() == 0 && leadingZeros < 32) { - leadingZeros++; + if (size > 0) { + BitReader reader(data, size); + return reader.readUnsignedExpGolomb() == 0; } - - if (leadingZeros == 0) - return 0; - - // Read remaining bits - uint32_t remainingBits = readBits(leadingZeros); - return (1 << leadingZeros) - 1 + remainingBits; + return false; } } // namespace srtc::h264 diff --git a/src/codec_h265.cpp b/src/codec_h265.cpp index fd1e9ea..06eb67f 100644 --- a/src/codec_h265.cpp +++ b/src/codec_h265.cpp @@ -109,25 +109,40 @@ bool isParameterNalu(uint8_t naluType) return naluType == NaluType::VPS || naluType == NaluType::SPS || naluType == NaluType::PPS; } -bool isKeyFrameNalu(uint8_t nalu_type) +bool isKeyFrameNalu(uint8_t naluType) { - return nalu_type == srtc::h265::NaluType::KeyFrame19 || nalu_type == srtc::h265::NaluType::KeyFrame20 || - nalu_type == srtc::h265::NaluType::KeyFrame21; + return naluType == NaluType::KeyFrame19 || naluType == NaluType::KeyFrame20 || naluType == NaluType::KeyFrame21; } -bool isFrameStart(const uint8_t* frame, size_t size) +bool isFrameStart(const uint8_t* nalu, size_t size) { if (size < 3) { return false; } - const auto nalu_type = (frame[0] >> 1) & 0x3F; + const auto nalu_type = (nalu[0] >> 1) & 0x3F; if (nalu_type <= 21) { // Regular slice - check first_slice_segment_in_pic_flag - return (frame[2] & 0x80) != 0; + return (nalu[2] & 0x80) != 0; } else { // Non-slice NAL unit return false; } } + +bool isSliceNalu(uint8_t naluType) +{ + return naluType <= 21; +} + +bool isSliceFrameStart(const uint8_t* data, size_t size) +{ + if (size == 0) { + return false; + } + + // check first_slice_segment_in_pic_flag + return (data[0] & 0x80) != 0; +} + } // namespace srtc::h265 diff --git a/src/depacketizer.cpp b/src/depacketizer.cpp index a4f248d..48b29c9 100644 --- a/src/depacketizer.cpp +++ b/src/depacketizer.cpp @@ -1,4 +1,6 @@ #include "srtc/depacketizer.h" + +#include "srtc/depacketizer_av1.h" #include "srtc/depacketizer_h264.h" #include "srtc/depacketizer_h265.h" #include "srtc/depacketizer_opus.h" @@ -25,6 +27,8 @@ std::pair, Error> Depacketizer::make(const std::sh return { std::make_shared(track), Error::OK }; case Codec::H265: return { std::make_shared(track), Error::OK }; + case Codec::AV1: + return { std::make_shared(track), Error::OK }; case Codec::Opus: return { std::make_shared(track), Error::OK }; default: diff --git a/src/depacketizer_av1.cpp b/src/depacketizer_av1.cpp new file mode 100644 index 0000000..6460f95 --- /dev/null +++ b/src/depacketizer_av1.cpp @@ -0,0 +1,240 @@ +#include "srtc/depacketizer_av1.h" + +#include "srtc/codec_av1.h" +#include "srtc/logging.h" +#include "srtc/util.h" + +#define LOG(level, ...) srtc::log(level, "DepacketizerAV1", __VA_ARGS__) + +namespace +{ + +// Chrome sends AV1 data without the size in the OBUs, which kind of makes sense because they can save one LEB128 per +// OBU. When we produce our output, we have to write OBUs with a size, and for that, we have to buffer each OBU's data +// until we are ready to emit and then we know its size (a new OBU starts, or we reach the end of the packet list). + +class BufferedObu +{ +public: + BufferedObu(); + ~BufferedObu() = default; + + void setHeader(uint8_t header); + void setExtension(uint8_t extension); + + void append(const uint8_t* data, size_t size); + + void flushTo(srtc::ByteWriter& out); + +private: + uint8_t mHeader; // or 0 since OBU types start at 1 + uint8_t mExtension; + srtc::ByteBuffer mBuf; +}; + +BufferedObu::BufferedObu() + : mHeader(0) + , mExtension(0) +{ +} + +void BufferedObu::setHeader(uint8_t header) +{ + mHeader = header; +} + +void BufferedObu::setExtension(uint8_t extension) +{ + mExtension = extension; +} + +void BufferedObu::append(const uint8_t* data, size_t size) +{ + mBuf.append(data, size); +} + +void BufferedObu::flushTo(srtc::ByteWriter& out) +{ + if (mHeader != 0) { + out.writeU8(mHeader | (1 << 1)); // force has_size + if ((mHeader & (1 << 2)) != 0) { + // Extension + out.writeU8(mExtension); + } + + out.writeLEB128(mBuf.size()); + out.write(mBuf); + + mBuf.clear(); + mHeader = 0; + mExtension = 0; + } +} + +} // namespace + +namespace srtc +{ + +DepacketizerAV1::DepacketizerAV1(const std::shared_ptr& track) + : DepacketizerVideo(track) + , mSeenNewSequence(false) +{ +} + +DepacketizerAV1::~DepacketizerAV1() = default; + +void DepacketizerAV1::reset() +{ + mSeenNewSequence = false; +} + +void DepacketizerAV1::extract(std::vector& out, const std::vector& packetList) +{ + out.clear(); + + ByteBuffer buf; + ByteWriter w(buf); + + BufferedObu bufferedObu; + + for (const auto packet : packetList) { + srtc::ByteReader reader(packet->payload); + + if (reader.remaining() >= 1) { + // https://aomediacodec.github.io/av1-rtp-spec/#aggregation-header + // |Z|Y|WW|N|-|-|-| + const auto packetHeader = reader.readU8(); + const auto valueZ = (packetHeader & (1 << 7)) != 0; + const auto valueW = (packetHeader >> 4) & 0x03u; + const auto valueN = (packetHeader >> 3) & 0x01u; + + if (valueN) { + mSeenNewSequence = true; + } + + auto obuIndex = 0u; + + while (reader.remaining() >= 1) { + // Size unless it's the last OBU and we are told there is no size + size_t obuSize; + if (valueW > 0 && obuIndex == valueW - 1) { + obuSize = reader.remaining(); + } else { + obuSize = reader.readLEB128(); + } + + if (reader.remaining() >= obuSize && obuSize > 0) { + if (!valueZ) { + // Start of a new OBU + bufferedObu.flushTo(w); + + const auto obuHeader = reader.readU8(); + obuSize -= 1; + + const auto obuHasExtension = (obuHeader >> 2) & 0x01; + + bufferedObu.setHeader(obuHeader); + if (obuHasExtension && reader.remaining() >= 1) { + const auto obuExtension = reader.readU8(); + obuSize -= 1; + + bufferedObu.setExtension(obuExtension); + } + } + + bufferedObu.append(packet->payload.data() + reader.position(), obuSize); + reader.skip(obuSize); + } else { + break; + } + + if (valueW > 0 && obuIndex == valueW - 1) { + // Reached the last one + break; + } + + obuIndex += 1; + } + } + } + + // Flush + bufferedObu.flushTo(w); + + if (!buf.empty()) { + extractImpl(out, packetList.back(), std::move(buf)); + } +} + +bool DepacketizerAV1::isFrameStart(const ByteBuffer& payload) const +{ + ByteReader reader(payload); + + if (reader.remaining() >= 1) { + // https://aomediacodec.github.io/av1-rtp-spec/#aggregation-header + // |Z|Y|WW|N|-|-|-| + const auto packetHeader = reader.readU8(); + const auto valueZ = (packetHeader & (1 << 7)) != 0; + if (valueZ) { + return false; + } + + const auto valueW = (packetHeader >> 4) & 0x03u; + auto obuIndex = 0u; + + while (reader.remaining() >= 1) { + // Size unless it's the last OBU and we are told there is no size + size_t obuSize; + if (valueW != 0 && obuIndex == valueW - 1) { + obuSize = reader.remaining(); + } else { + obuSize = reader.readLEB128(); + } + + if (reader.remaining() >= 1 && obuSize >= 1) { + // https://aomediacodec.github.io/av1-spec/#obu-header-syntax + const auto obuHeader = reader.readU8(); + const auto obuType = (obuHeader >> 3) & 0x0Fu; + + if (obuType == av1::ObuType::SequenceHeader) { + return true; + } + if (av1::isFrameObuType(obuType)) { + return true; + } + + obuSize -= 1; + } + + if (valueW != 0 && obuIndex == valueW - 1) { + // Reached the last one + break; + } + + // Advance + reader.skip(obuSize); + obuIndex += 1; + } + } + + return false; +} + +void DepacketizerAV1::extractImpl(std::vector& out, const JitterBufferItem* packet, ByteBuffer&& frame) +{ + if (frame.empty()) { + return; + } + + if (!mSeenNewSequence) { + LOG(SRTC_LOG_V, "Not emitting a non-key frame until there is a new sequence"); + return; + } + + if (packet->marker) { + out.push_back(std::move(frame)); + } +} + +} // namespace srtc \ No newline at end of file diff --git a/src/depacketizer_h264.cpp b/src/depacketizer_h264.cpp index 2c70694..ab7be4c 100644 --- a/src/depacketizer_h264.cpp +++ b/src/depacketizer_h264.cpp @@ -1,8 +1,11 @@ #include "srtc/depacketizer_h264.h" #include "srtc/codec_h264.h" #include "srtc/logging.h" +#include "srtc/track.h" #include "srtc/util.h" +#include + #define LOG(level, ...) srtc::log(level, "Depacketizer_H264", __VA_ARGS__) namespace @@ -16,52 +19,37 @@ constexpr auto kHaveAll = kHaveSPS | kHavePPS | kHaveKey; const uint8_t kAnnexB[4] = { 0, 0, 0, 1 }; +bool isFrameStartImpl(uint8_t naluType, const uint8_t* data, size_t size) +{ + if (naluType == srtc::h264::NaluType::SPS) { + // Key frames start with SPS + return true; + } + if (naluType == srtc::h264::NaluType::NonKeyFrame && size > 0) { + // Non-key frames start first_mb_in_slice == 0 + if (srtc::h264::isSliceFrameStart(data, size)) { + return true; + } + } + + return false; +} + } // namespace namespace srtc { DepacketizerH264::DepacketizerH264(const std::shared_ptr& track) - : Depacketizer(track) + : DepacketizerVideo(track) , mHaveBits(0) , mLastRtpTimestamp(0) { + assert(track->getCodec() == Codec::H264); } DepacketizerH264::~DepacketizerH264() = default; -PacketKind DepacketizerH264::getPacketKind(const ByteBuffer& payload, bool marker) const -{ - ByteReader reader(payload); - if (reader.remaining() >= 1) { - // https://datatracker.ietf.org/doc/html/rfc6184#section-5.4 - const auto value = reader.readU8(); - const auto type = value & 0x1F; - - if (type == h264::kPacket_STAP_A) { - // https://datatracker.ietf.org/doc/html/rfc6184#section-5.7.1 - return PacketKind::Standalone; - } else if (type == h264::kPacket_FU_A) { - // https://datatracker.ietf.org/doc/html/rfc6184#section-5.8 - if (reader.remaining() >= 1) { - const auto header = reader.readU8(); - if ((header & (1 << 7)) != 0) { - return PacketKind::Start; - } else if ((header & (1 << 6)) != 0) { - return PacketKind::End; - } else { - return PacketKind::Middle; - } - } - } else if (type >= 1 && type <= 23) { - // https://datatracker.ietf.org/doc/html/rfc6184#section-5.4 - return PacketKind::Standalone; - } - } - - return PacketKind::Standalone; -} - void DepacketizerH264::reset() { mHaveBits = 0; @@ -69,88 +57,147 @@ void DepacketizerH264::reset() mLastRtpTimestamp = 0; } -void DepacketizerH264::extract(std::vector& out, const JitterBufferItem* packet) +void DepacketizerH264::extract(std::vector& out, const std::vector& packetList) { out.clear(); - ByteReader reader(packet->payload); - if (reader.remaining() >= 1) { - // https://datatracker.ietf.org/doc/html/rfc6184#section-5.4 - const auto value = reader.readU8(); - const auto type = value & 0x1F; + std::unique_ptr fu_buf; + std::unique_ptr fu_wrt; - if (type == h264::kPacket_STAP_A) { - // https://datatracker.ietf.org/doc/html/rfc6184#section-5.7.1 - while (reader.remaining() >= 2) { - const auto size = reader.readU16(); - if (reader.remaining() < size) { - break; - } + for (const auto packet : packetList) { + ByteReader reader(packet->payload); - ByteBuffer buf(packet->payload.data() + reader.position(), size); - extractImpl(out, packet, std::move(buf)); + if (reader.remaining() > 1) { + const auto indicator = reader.readU8(); - reader.skip(size); - } - } else { // https://datatracker.ietf.org/doc/html/rfc6184#section-5.4 - extractImpl(out, packet, packet->payload.copy()); + const auto nri = indicator & 0x60u; + const auto type = indicator & 0x1Fu; + + if (type == h264::kPacket_STAP_A) { + // https://datatracker.ietf.org/doc/html/rfc6184#section-5.7.1 + while (reader.remaining() >= 2) { + const auto size = reader.readU16(); + if (reader.remaining() >= size && size > 0) { + ByteBuffer buf(packet->payload.data() + reader.position(), size); + + extractImpl(out, packet, std::move(buf)); + reader.skip(size); + } else { + break; + } + } + } else if (type == h264::kPacket_FU_A) { + // https://datatracker.ietf.org/doc/html/rfc6184#section-5.8 + if (reader.remaining() >= 1) { + const auto fuHeader = reader.readU8(); + const auto fuIsStart = (fuHeader & (1 << 7)) != 0; + const auto fuIsEnd = (fuHeader & (1 << 6)) != 0; + const auto fuNaluType = fuHeader & 0x1Fu; + + if (fuIsStart) { + fu_buf = std::make_unique(fuHeader); + fu_wrt = std::make_unique(*fu_buf); + fu_wrt->writeU8(nri | fuNaluType); + } + + if (reader.remaining() > 0) { + fu_wrt->write(packet->payload.data() + reader.position(), reader.remaining()); + } + + if (fuIsEnd && fu_buf) { + extractImpl(out, packet, std::move(*fu_buf)); + } + } + } else if (type < 23) { + // https://datatracker.ietf.org/doc/html/rfc6184#section-5.6 + extractImpl(out, packet, packet->payload.copy()); + } } } } -void DepacketizerH264::extract(std::vector& out, const std::vector& packetList) +bool DepacketizerH264::isFrameStart(const ByteBuffer& payload) const { - out.clear(); + ByteReader reader(payload); - ByteBuffer buf; - ByteWriter w(buf); + if (reader.remaining() > 1) { + const auto indicator = reader.readU8(); - for (const auto packet : packetList) { - ByteReader reader(packet->payload); + // https://datatracker.ietf.org/doc/html/rfc6184#section-5.4 + // const auto nri = indicator & 0x60u; + const auto type = indicator & 0x1Fu; - if (reader.remaining() > 2) { - const auto indicator = reader.readU8(); - const auto header = reader.readU8(); + if (type == h264::kPacket_STAP_A) { + // https://datatracker.ietf.org/doc/html/rfc6184#section-5.7.1 + while (reader.remaining() >= 2) { + const auto size = reader.readU16(); + if (reader.remaining() >= size && size > 0) { + const auto naluData = payload.data() + reader.position(); + const auto naluSize = size; + const auto naluType = naluData[0] & 0x1F; - const auto nri = indicator & 0x60; - const auto type = header & 0x1F; + if (naluSize > 1 && isFrameStartImpl(naluType, naluData + 1, naluSize - 1)) { + return true; + } - if (buf.empty()) { - w.writeU8(nri | type); + reader.skip(size); + } else { + break; + } } + } else if (type == h264::kPacket_FU_A) { + // https://datatracker.ietf.org/doc/html/rfc6184#section-5.8 + if (reader.remaining() >= 1) { + const auto fuHeader = reader.readU8(); + const auto fuIsStart = (fuHeader & (1 << 7)) != 0; + const auto fuNaluType = fuHeader & 0x1Fu; + + if (fuIsStart) { + const auto naluData = payload.data() + reader.position(); + const auto naluSize = reader.remaining(); + if (isFrameStartImpl(fuNaluType, naluData, naluSize)) { + return true; + } + } + } + } else if (type <= 23) { + // https://datatracker.ietf.org/doc/html/rfc6184#section-5.6 + const auto naluData = payload.data() + reader.position(); + const auto naluSize = reader.remaining(); - const auto pos = reader.position(); - w.write(packet->payload.data() + pos, packet->payload.size() - pos); + if (isFrameStartImpl(type, naluData, naluSize)) { + return true; + } } } - extractImpl(out, packetList.back(), std::move(buf)); + return false; } -void DepacketizerH264::extractImpl(std::vector& out, const JitterBufferItem* packet, ByteBuffer&& frame) +void DepacketizerH264::extractImpl(std::vector& out, const JitterBufferItem* packet, ByteBuffer&& nalu) { - if (frame.empty()) { + if (nalu.empty()) { return; } if ((mHaveBits & kHaveAll) != kHaveAll) { // Wait to emit until we have a key frame - const auto type = frame.front() & 0x1F; - switch (type) { - case h264::NaluType::NonKeyFrame: - LOG(SRTC_LOG_V, "Not emitting a non-key frame until there is a keyframe"); - return; + const auto nalu_type = nalu.front() & 0x1F; + switch (nalu_type) { case h264::NaluType::SPS: mHaveBits |= kHaveSPS; break; case h264::NaluType::PPS: mHaveBits |= kHavePPS; break; - case h264::NaluType::KeyFrame: - mHaveBits |= kHaveKey; - break; default: + if (h264::isKeyFrameNalu(nalu_type)) { + mHaveBits |= kHaveKey; + } else if (h264::isSliceNalu(nalu_type)) { + LOG(SRTC_LOG_V, "Not emitting a non-key frame until there is a keyframe"); + return; + } break; } } @@ -161,7 +208,7 @@ void DepacketizerH264::extractImpl(std::vector& out, const JitterBuf } mFrameBuffer.append(kAnnexB, sizeof(kAnnexB)); - mFrameBuffer.append(frame); + mFrameBuffer.append(nalu); if (packet->marker) { out.push_back(std::move(mFrameBuffer)); diff --git a/src/depacketizer_h265.cpp b/src/depacketizer_h265.cpp index c7147a7..1b13f1c 100644 --- a/src/depacketizer_h265.cpp +++ b/src/depacketizer_h265.cpp @@ -1,8 +1,11 @@ #include "srtc/depacketizer_h265.h" #include "srtc/codec_h265.h" #include "srtc/logging.h" +#include "srtc/track.h" #include "srtc/util.h" +#include + #define LOG(level, ...) srtc::log(level, "Depacketizer_H265", __VA_ARGS__) namespace @@ -17,53 +20,37 @@ constexpr auto kHaveAll = kHaveVPS | kHaveSPS | kHavePPS | kHaveKey; const uint8_t kAnnexB[4] = { 0, 0, 0, 1 }; +bool isFrameStartImpl(uint8_t naluType, const uint8_t* data, size_t size) +{ + if (naluType == srtc::h265::NaluType::VPS) { + // Key frames start with VPS + return true; + } + if (!srtc::h265::isKeyFrameNalu(naluType) && srtc::h265::isSliceNalu(naluType) && size > 0) { + // Non-key frames start with an indicator bit + if (srtc::h265::isSliceFrameStart(data, size)) { + return true; + } + } + + return false; +} + } // namespace namespace srtc { DepacketizerH265::DepacketizerH265(const std::shared_ptr& track) - : Depacketizer(track) + : DepacketizerVideo(track) , mHaveBits(0) , mLastRtpTimestamp(0) { + assert(track->getCodec() == Codec::H265); } DepacketizerH265::~DepacketizerH265() = default; -PacketKind DepacketizerH265::getPacketKind(const ByteBuffer& payload, bool marker) const -{ - ByteReader reader(payload); - if (reader.remaining() >= 1) { - // https://datatracker.ietf.org/doc/html/rfc7798#section-1.1.4 - const auto value = reader.readU8(); - const auto nalu_type = (value >> 1) & 0x3F; - - if (nalu_type == h265::kPacket_AP) { - // https://datatracker.ietf.org/doc/html/rfc7798#section-4.4.2 - return PacketKind::Standalone; - } else if (nalu_type == h265::kPacket_FU) { - // https://datatracker.ietf.org/doc/html/rfc7798#section-4.4.3 - if (reader.remaining() >= 2) { - reader.skip(1); - const auto fuHeader = reader.readU8(); - if ((fuHeader & (1 << 7)) != 0) { - return PacketKind::Start; - } else if ((fuHeader & (1 << 6)) != 0) { - return PacketKind::End; - } else { - return PacketKind::Middle; - } - } - } else if (nalu_type >= 0 && nalu_type <= 40) { - // https://datatracker.ietf.org/doc/html/rfc7798#section-4.4.1 - return PacketKind::Standalone; - } - } - - return PacketKind::Standalone; -} - void DepacketizerH265::reset() { mHaveBits = 0; @@ -71,81 +58,127 @@ void DepacketizerH265::reset() mLastRtpTimestamp = 0; } -void DepacketizerH265::extract(std::vector& out, const JitterBufferItem* packet) +void DepacketizerH265::extract(std::vector& out, const std::vector& packetList) { out.clear(); - ByteReader reader(packet->payload); - if (reader.remaining() >= 1) { - // https://datatracker.ietf.org/doc/html/rfc7798#section-1.1.4 - const auto value = reader.readU8(); - const auto nalu_type = (value >> 1) & 0x3F; - - if (nalu_type == h265::kPacket_AP) { - // https://datatracker.ietf.org/doc/html/rfc7798#section-4.4.2 - if (reader.remaining() >= 1) { - reader.skip(1); - } else { - return; - } + std::unique_ptr fu_buf; + std::unique_ptr fu_wrt; - while (reader.remaining() >= 2) { - const auto size = reader.readU16(); - if (reader.remaining() < size) { - break; + for (const auto packet : packetList) { + ByteReader reader(packet->payload); + if (reader.remaining() >= 2) { + const auto nalUnitHeader = reader.readU16(); + const auto type = (nalUnitHeader >> 9) & 0x3Fu; + const auto layerId = (nalUnitHeader >> 3) & 0x3Fu; + const auto temporalId = nalUnitHeader & 0x7u; + + if (type == h265::kPacket_AP) { + // https://datatracker.ietf.org/doc/html/rfc7798#section-4.4.2 + while (reader.remaining() >= 2) { + const auto size = reader.readU16(); + if (reader.remaining() >= size && size >= 2) { + ByteBuffer buf(packet->payload.data() + reader.position(), size); + + extractImpl(out, packet, std::move(buf)); + reader.skip(size); + } else { + break; + } } - - ByteBuffer buf(packet->payload.data() + reader.position(), size); - extractImpl(out, packet, std::move(buf)); - - reader.skip(size); + } else if (type == h265::kPacket_FU) { + // https://datatracker.ietf.org/doc/html/rfc7798#section-4.4.3 + if (reader.remaining() >= 1) { + const auto fuHeader = reader.readU8(); + const auto fuIsStart = (fuHeader & (1 << 7)) != 0; + const auto fuIsEnd = (fuHeader & (1 << 6)) != 0; + const auto fuType = fuHeader & 0x3Fu; + + if (fuIsStart) { + fu_buf = std::make_unique(fuHeader); + fu_wrt = std::make_unique(*fu_buf); + + fu_wrt->writeU16((fuType << 9) | (layerId << 3) | temporalId); + } + + if (reader.remaining() > 0) { + fu_wrt->write(packet->payload.data() + reader.position(), reader.remaining()); + } + + if (fuIsEnd && fu_buf) { + extractImpl(out, packet, std::move(*fu_buf)); + } + } + } else if (type <= 40) { + // https://datatracker.ietf.org/doc/html/rfc7798#section-4.4.1 + extractImpl(out, packet, packet->payload.copy()); } - } else { - // https://datatracker.ietf.org/doc/html/rfc7798#section-4.4.1 - extractImpl(out, packet, packet->payload.copy()); } } } -void DepacketizerH265::extract(std::vector& out, const std::vector& packetList) +bool DepacketizerH265::isFrameStart(const ByteBuffer& payload) const { - out.clear(); - - ByteBuffer buf; - ByteWriter w(buf); - - for (const auto packet : packetList) { - ByteReader reader(packet->payload); + ByteReader reader(payload); + if (reader.remaining() >= 2) { + const auto nalUnitHeader = reader.readU16(); + const auto type = (nalUnitHeader >> 9) & 0x3Fu; - if (reader.remaining() > 2) { - const auto payloadHeader = reader.readU16(); - const auto fuHeader = reader.readU8(); + if (type == h265::kPacket_AP) { + // https://datatracker.ietf.org/doc/html/rfc7798#section-4.4.2 + while (reader.remaining() >= 2) { + const auto size = reader.readU16(); + if (reader.remaining() >= size && size >= 2) { + const auto apData = payload.data() + reader.position(); + const auto apHeader = (apData[0] << 8) | apData[1]; + const auto apNaluType = (apHeader >> 9) & 0x3Fu; + + if (size > 2 && isFrameStartImpl(apNaluType, apData + 2, size - 2)) { + return true; + } + } else { + break; + } + } + } else if (type == h265::kPacket_FU) { + // https://datatracker.ietf.org/doc/html/rfc7798#section-4.4.3 + if (reader.remaining() >= 1) { + const auto fuHeader = reader.readU8(); + const auto fuIsStart = (fuHeader & (1 << 7)) != 0; + const auto fuNaluType = fuHeader & 0x3Fu; - uint8_t layerId = (payloadHeader >> 3) & 0x3F; - uint8_t temporalId = payloadHeader & 0x07; - uint8_t nalu_type = fuHeader & 0x3F; + const auto fuData = payload.data() + reader.position(); + const auto fuSize = reader.remaining(); - if (buf.empty()) { - w.writeU16((nalu_type << 9) | (layerId << 3) | temporalId); + if (fuIsStart) { + if (fuSize > 0 && isFrameStartImpl(fuNaluType, fuData, fuSize)) { + return true; + } + } } + } else if (type <= 40) { + // https://datatracker.ietf.org/doc/html/rfc7798#section-4.4.1 + const auto data = payload.data() + reader.position(); + const auto size = reader.remaining(); - const auto pos = reader.position(); - w.write(packet->payload.data() + pos, packet->payload.size() - pos); + if (isFrameStartImpl(type, data, size)) { + return true; + } } } - extractImpl(out, packetList.back(), std::move(buf)); + return false; } -void DepacketizerH265::extractImpl(std::vector& out, const JitterBufferItem* packet, ByteBuffer&& frame) +void DepacketizerH265::extractImpl(std::vector& out, const JitterBufferItem* packet, ByteBuffer&& nalu) { - if (frame.empty()) { + if (nalu.empty()) { return; } if ((mHaveBits & kHaveAll) != kHaveAll) { // Wait to emit until we have a key frame - const auto nalu_type = (frame.front() >> 1) & 0x3F; + const auto nalu_type = (nalu.front() >> 1) & 0x3F; switch (nalu_type) { case h265::NaluType::VPS: mHaveBits |= kHaveVPS; @@ -159,7 +192,7 @@ void DepacketizerH265::extractImpl(std::vector& out, const JitterBuf default: if (h265::isKeyFrameNalu(nalu_type)) { mHaveBits |= kHaveKey; - } else { + } else if (h265::isSliceNalu(nalu_type)) { LOG(SRTC_LOG_V, "Not emitting a non-key frame until there is a keyframe"); return; } @@ -173,7 +206,7 @@ void DepacketizerH265::extractImpl(std::vector& out, const JitterBuf } mFrameBuffer.append(kAnnexB, sizeof(kAnnexB)); - mFrameBuffer.append(frame); + mFrameBuffer.append(nalu); if (packet->marker) { out.push_back(std::move(mFrameBuffer)); diff --git a/src/depacketizer_opus.cpp b/src/depacketizer_opus.cpp index bbea51e..ef0e640 100644 --- a/src/depacketizer_opus.cpp +++ b/src/depacketizer_opus.cpp @@ -1,4 +1,5 @@ #include "srtc/depacketizer_opus.h" +#include "srtc/track.h" #include @@ -8,7 +9,9 @@ namespace srtc DepacketizerOpus::DepacketizerOpus(const std::shared_ptr& track) : Depacketizer(track) { + assert(track->getCodec() == Codec::Opus); } + DepacketizerOpus::~DepacketizerOpus() = default; PacketKind DepacketizerOpus::getPacketKind(const ByteBuffer& payload, bool marker) const @@ -21,22 +24,17 @@ void DepacketizerOpus::reset() // Nothing } -void DepacketizerOpus::extract(std::vector& out, const JitterBufferItem* packet) +void DepacketizerOpus::extract(std::vector& out, const std::vector& packetList) { - assert(getPacketKind(packet->payload, packet->marker) == PacketKind::Standalone); - out.clear(); + assert(packetList.size() == 1); + + const auto packet = packetList[0]; + assert(getPacketKind(packet->payload, packet->marker) == PacketKind::Standalone); if (!packet->payload.empty()) { out.emplace_back(packet->payload.copy()); } } -void DepacketizerOpus::extract(std::vector& out, const std::vector& packetList) -{ - // Opus packets are always standalone - out.clear(); - assert(false); -} - } // namespace srtc \ No newline at end of file diff --git a/src/depacketizer_video.cpp b/src/depacketizer_video.cpp new file mode 100644 index 0000000..b804b2f --- /dev/null +++ b/src/depacketizer_video.cpp @@ -0,0 +1,31 @@ +#include "srtc/depacketizer_video.h" +#include "srtc/track.h" + +#include + +namespace srtc +{ + +DepacketizerVideo::DepacketizerVideo(const std::shared_ptr& track) + : Depacketizer(track) +{ + assert(track->getMediaType() == MediaType::Video); +} + +DepacketizerVideo::~DepacketizerVideo() = default; + +PacketKind DepacketizerVideo::getPacketKind(const ByteBuffer& payload, bool marker) const +{ + if (isFrameStart(payload)) { + if (marker) { + return PacketKind::Standalone; + } + return PacketKind::Start; + } + if (marker) { + return PacketKind::End; + } + return PacketKind::Middle; +} + +} // namespace srtc \ No newline at end of file diff --git a/src/depacketizer_vp8.cpp b/src/depacketizer_vp8.cpp index 1570eb4..7593cf4 100644 --- a/src/depacketizer_vp8.cpp +++ b/src/depacketizer_vp8.cpp @@ -1,8 +1,10 @@ #include "srtc/depacketizer_vp8.h" - #include "srtc/logging.h" +#include "srtc/track.h" #include "srtc/util.h" +#include + #define LOG(level, ...) srtc::log(level, "DepacketizerVP8", __VA_ARGS__) namespace @@ -88,64 +90,19 @@ namespace srtc { DepacketizerVP8::DepacketizerVP8(const std::shared_ptr& track) - : Depacketizer(track) + : DepacketizerVideo(track) , mSeenKeyFrame(false) { + assert(track->getCodec() == Codec::VP8); } DepacketizerVP8::~DepacketizerVP8() = default; -PacketKind DepacketizerVP8::getPacketKind(const ByteBuffer& payload, bool marker) const -{ - // https://datatracker.ietf.org/doc/html/rfc7741#section-4.2 - - // |X|R|N|S|R| PID | - - const auto data = payload.data(); - const auto size = payload.size(); - - if (size >= 1) { - const auto firstByte = data[0]; - const auto start = (firstByte & (1 << 4)) != 0; - const auto pid = firstByte & 0x07; - - if (start && pid == 0) { - if (marker) { - return PacketKind::Standalone; - } - return PacketKind::Start; - } else if (marker) { - return PacketKind::End; - } else { - return PacketKind::Middle; - } - } - - return PacketKind::Standalone; -} - void DepacketizerVP8::reset() { mSeenKeyFrame = false; } -void DepacketizerVP8::extract(std::vector& out, const JitterBufferItem* packet) -{ - out.clear(); - - ByteBuffer buf; - ByteWriter w(buf); - - const uint8_t* payloadData = nullptr; - size_t payloadSize = 0; - if (!extractPayload(packet->payload, payloadData, payloadSize)) { - return; - } - w.write(payloadData, payloadSize); - - extractImpl(out, packet, std::move(buf)); -} - void DepacketizerVP8::extract(std::vector& out, const std::vector& packetList) { out.clear(); @@ -165,6 +122,22 @@ void DepacketizerVP8::extract(std::vector& out, const std::vector& out, const JitterBufferItem* packet, ByteBuffer&& frame) { if (frame.empty()) { diff --git a/src/jitter_buffer.cpp b/src/jitter_buffer.cpp index 68aeba0..9894c54 100644 --- a/src/jitter_buffer.cpp +++ b/src/jitter_buffer.cpp @@ -12,6 +12,8 @@ #define LOG(level, ...) srtc::log(level, "JitterBuffer", __VA_ARGS__) +// #define VERBOSE_LOGGING + namespace { @@ -269,6 +271,72 @@ void JitterBuffer::consume(const std::shared_ptr& packet) item->payload = std::move(payload); item->kind = mDepacketizer->getPacketKind(item->payload, item->marker); + + // Some packetizers (I'm looking at you AV1) cannot tell if a packet starts a brand-new frame or is a continuation. + // In this case they'll return "Start" packet kind, and we fix things by extending a frame based on the RTP timestamp. + + if (item->kind == PacketKind::Start) { + if (mMinSeq < item->seq_ext) { + const auto prev_seq = item->seq_ext - 1; + const auto prev_index = prev_seq & mCapacityMask; + const auto prev_item = mItemList[prev_index]; + assert(prev_item); + + if (prev_item->rtp_timestamp_ext == item->rtp_timestamp_ext) { + if (prev_item->kind == PacketKind::Start || prev_item->kind == PacketKind::Middle) { + item->kind = PacketKind::Middle; + } + } + } + + if (item->seq_ext + 1 < mMaxSeq) { + const auto next_seq = item->seq_ext + 1; + const auto next_index = next_seq & mCapacityMask; + const auto next_item = mItemList[next_index]; + assert(next_item); + + if (next_item->rtp_timestamp_ext == item->rtp_timestamp_ext) { + if (next_item->kind == PacketKind::Start) { + next_item->kind = PacketKind::Middle; + } + } + } + } + +#ifdef VERBOSE_LOGGING + if (mTrack->getMediaType() == MediaType::Video) { + std::printf( + "Consume seq = %" PRIu64 ", size = %zu, marker = %d\n", seq_ext, payload.size(), packet->getMarker()); + + for (auto debug_seq = mMinSeq; debug_seq < mMaxSeq; debug_seq += 1) { + const char* label = "?"; + const auto debug_index = debug_seq & mCapacityMask; + const auto debug_item = mItemList[debug_index]; + assert(debug_item); + + if (!debug_item->received) { + label = "fill"; + } else { + switch (debug_item->kind) { + case PacketKind::Start: + label = "start"; + break; + case PacketKind::Middle: + label = "middle"; + break; + case PacketKind::End: + label = "end"; + break; + case PacketKind::Standalone: + label = "standalone"; + break; + } + } + + std::printf("item seq = %10" PRIu64 ", type = %10s, size = %4zu\n", debug_seq, label, debug_item->payload.size()); + } + } +#endif } int JitterBuffer::getTimeoutMillis(int defaultTimeout) const @@ -352,7 +420,12 @@ std::vector> JitterBuffer::processDeque() if (item->received && diff_millis(item->when_dequeue, now) <= 0) { if (item->kind == PacketKind::Standalone) { // A standalone packet, which is ready to be extracted, possibly into multiple frames - mDepacketizer->extract(mTempFrameList, item); + mTempPacketList.clear(); + mTempPacketList.push_back(item); + + // Extract + mTempFrameList.clear(); + mDepacketizer->extract(mTempFrameList, mTempPacketList); // Append to result frame list appendToResult(result, item, item, now, mTempFrameList); @@ -366,24 +439,26 @@ std::vector> JitterBuffer::processDeque() // Start of a multi-packet sequence uint64_t maxSeq = 0; if (findMultiPacketSequence(maxSeq)) { - // Create a list of buffers - extractBufferList(mTempBufferList, seq, maxSeq); + // Create a list of packets + mTempPacketList.clear(); + fillItemList(mTempPacketList, seq, maxSeq); #ifdef NDEBUG #else - assert(!mTempBufferList.empty()); - assert(mDepacketizer->getPacketKind(mTempBufferList.front()->payload, - mTempBufferList.front()->marker) == PacketKind::Start); - for (size_t i = 1; i < mTempBufferList.size() - 1; i += 1) { - assert(mDepacketizer->getPacketKind(mTempBufferList[i]->payload, mTempBufferList[i]->marker) == + assert(!mTempPacketList.empty()); + assert(mDepacketizer->getPacketKind(mTempPacketList.front()->payload, + mTempPacketList.front()->marker) == PacketKind::Start); + for (size_t i = 1; i < mTempPacketList.size() - 1; i += 1) { + assert(mDepacketizer->getPacketKind(mTempPacketList[i]->payload, mTempPacketList[i]->marker) == PacketKind::Middle); } - assert(mDepacketizer->getPacketKind(mTempBufferList.back()->payload, - mTempBufferList.back()->marker) == PacketKind::End); + assert(mDepacketizer->getPacketKind(mTempPacketList.back()->payload, + mTempPacketList.back()->marker) == PacketKind::End); #endif - // Extract, possibly into multiple frames (theoretical) - mDepacketizer->extract(mTempFrameList, mTempBufferList); + // Extract, possibly into multiple frames + mTempFrameList.clear(); + mDepacketizer->extract(mTempFrameList, mTempPacketList); // Append to result frame list const auto maxIndex = maxSeq & mCapacityMask; @@ -520,10 +595,8 @@ JitterBufferItem* JitterBuffer::newLostItem(const std::chrono::steady_clock::tim return item_lost; } -void JitterBuffer::extractBufferList(std::vector& out, uint64_t start, uint64_t max) +void JitterBuffer::fillItemList(std::vector& out, uint64_t start, uint64_t max) { - out.clear(); - for (uint64_t seq = start; seq <= max; seq += 1) { const auto index = seq & mCapacityMask; auto item = mItemList[index]; @@ -582,7 +655,7 @@ void JitterBuffer::appendToResult(std::vectorreceived); @@ -610,7 +683,7 @@ bool JitterBuffer::findMultiPacketSequence(uint64_t& outEnd) bool JitterBuffer::findNextToDequeue(const std::chrono::steady_clock::time_point& now) { - auto index = (mMinSeq)&mCapacityMask; + auto index = mMinSeq & mCapacityMask; auto item = mItemList[index]; assert(item); assert(item->received); diff --git a/src/packetizer_av1.cpp b/src/packetizer_av1.cpp index 61f623d..407939b 100644 --- a/src/packetizer_av1.cpp +++ b/src/packetizer_av1.cpp @@ -16,25 +16,25 @@ #define LOG(level, ...) srtc::log(level, "Packetizer_AV1", __VA_ARGS__) +//#define VERBOSE_LOGGING + namespace { -#if 0 +#ifdef VERBOSE_LOGGING void dumpFrame(int64_t pts_usec, const srtc::ByteBuffer& frame) { - static uint32_t n = 0; + std::printf("PUB AV1 Frame: ts = %" PRIu64 "\n", pts_usec); for (srtc::av1::ObuParser parser(frame); parser; parser.next()) { const auto obuType = parser.currType(); - const auto isFrame = srtc::av1::isFrameObuType(obuType); - const auto isKeyFrame = isFrame && srtc::av1::isKeyFrameObu(parser.currData(), parser.currSize()); - std::cout << "AV1 " << std::setw(4) << n << ", pts = " << std::setw(8) << pts_usec - << ", OBU type = " << static_cast(obuType) << ", size = " << std::setw(5) << parser.currSize() - << ", key = " << isKeyFrame << ", end = " << parser.isAtEnd() << std::endl; - } + const auto obuData = parser.currData(); + const auto obuSize = parser.currSize(); + const auto isKeyFrame = srtc::av1::isKeyFrameObu(obuType, obuData, obuSize); - n += 1; + std::printf("PUB AV1 OBU: type = %2u, key = %d, size = %4zu\n", obuType, isKeyFrame, parser.currSize()); + } } #endif @@ -80,7 +80,7 @@ std::list> PacketizerAV1::generate(const std::shared_ // We need to know if there is a key frame (new coded video sequence) bool isNewCodedVideoSequence = false; for (av1::ObuParser parser(frame); parser; parser.next()) { - if (av1::isKeyFrameObu(parser.currType(), parser.currData(), parser.currSize())) { + if (parser.currType() == av1::ObuType::SequenceHeader) { isNewCodedVideoSequence = true; break; } @@ -101,6 +101,10 @@ std::list> PacketizerAV1::generate(const std::shared_ uint8_t padding = 0; auto packetNumber = 0u; +#ifdef VERBOSE_LOGGING + dumpFrame(pts_usec, frame); +#endif + for (av1::ObuParser parser(frame); parser; parser.next()) { const auto obuType = parser.currType(); if (obuType == av1::ObuType::TemporalDelimiter) { diff --git a/src/packetizer_h264.cpp b/src/packetizer_h264.cpp index 4bdc4dc..84a926a 100644 --- a/src/packetizer_h264.cpp +++ b/src/packetizer_h264.cpp @@ -124,20 +124,20 @@ std::list> PacketizerH264::generate(const std::shared if (!isParameterNalu(naluType)) { // Now the frame itself - const auto naluDataPtr = parser.currData(); - const auto naluDataSize = parser.currDataSize(); + const auto naluData = parser.currData(); + const auto naluSize = parser.currDataSize(); - uint8_t padding = getPadding(track, simulcast, twcc, naluDataSize); + uint8_t padding = getPadding(track, simulcast, twcc, naluSize); RtpExtension extension = buildExtension(track, simulcast, twcc, naluType == NaluType::KeyFrame, 0); const auto basicPacketSize = getBasicPacketSize(mediaProtectionOverhead); auto packetSize = adjustPacketSize(basicPacketSize, padding, extension); - if (packetSize >= naluDataSize) { + if (packetSize >= naluSize) { // https://datatracker.ietf.org/doc/html/rfc6184#section-5.6 const auto marker = parser.isAtEnd(); const auto [rollover, sequence] = packetSource->getNextSequence(); - auto payload = ByteBuffer{ naluDataPtr, naluDataSize }; + auto payload = ByteBuffer{ naluData, naluSize }; result.push_back(std::make_shared(track, marker, rollover, @@ -146,29 +146,29 @@ std::list> PacketizerH264::generate(const std::shared padding, std::move(extension), std::move(payload))); - } else if (naluDataSize > 1) { + } else if (naluSize > 1) { // https://datatracker.ietf.org/doc/html/rfc6184#section-5.8 - const auto nri = static_cast(naluDataPtr[0] & 0x60); + const auto nri = static_cast(naluData[0] & 0x60); // The "+1" is to skip the NALU type - auto dataPtr = naluDataPtr + 1; - auto dataSize = naluDataSize - 1; + auto currData = naluData + 1; + auto currSize = naluSize - 1; auto packetNumber = 0u; - while (dataSize > 0) { + while (currSize > 0) { const auto [rollover, sequence] = packetSource->getNextSequence(); if (packetNumber > 0) { - padding = getPadding(track, simulcast, twcc, naluDataSize); + padding = getPadding(track, simulcast, twcc, naluSize); extension = buildExtension(track, simulcast, twcc, naluType == NaluType::KeyFrame, packetNumber); } // The "-2" is for FU_A headers packetSize = adjustPacketSize(basicPacketSize - 2, padding, extension); - if (packetNumber == 0 && packetSize >= dataSize) { + if (packetNumber == 0 && packetSize >= currSize) { // The frame now fits in one packet, but a FU-A cannot have both start and end - packetSize = dataSize - 10; + packetSize = currSize - 10; } ByteBuffer payload; @@ -179,15 +179,15 @@ std::list> PacketizerH264::generate(const std::shared writer.writeU8(fuIndicator); const auto isStart = packetNumber == 0; - const auto isEnd = dataSize <= packetSize; + const auto isEnd = currSize <= packetSize; const uint8_t fuHeader = (isStart ? (1 << 7) : 0) | (isEnd ? (1 << 6) : 0) | static_cast(naluType); writer.writeU8(fuHeader); const auto marker = isEnd && parser.isAtEnd(); - const auto writeNow = std::min(dataSize, packetSize); - writer.write(dataPtr, writeNow); + const auto writeNow = std::min(currSize, packetSize); + writer.write(currData, writeNow); result.push_back(std::make_shared(track, marker, @@ -198,8 +198,8 @@ std::list> PacketizerH264::generate(const std::shared std::move(extension), std::move(payload))); - dataPtr += writeNow; - dataSize -= writeNow; + currData += writeNow; + currSize -= writeNow; packetNumber += 1; } } diff --git a/src/packetizer_h265.cpp b/src/packetizer_h265.cpp index 26d7b55..842aab1 100644 --- a/src/packetizer_h265.cpp +++ b/src/packetizer_h265.cpp @@ -133,20 +133,20 @@ std::list> PacketizerH265::generate(const std::shared if (!isParameterNalu(naluType)) { // Now the frame itself - const auto naluDataPtr = parser.currData(); - const auto naluDataSize = parser.currDataSize(); + const auto naluData = parser.currData(); + const auto naluSize = parser.currDataSize(); - uint8_t padding = getPadding(track, simulcast, twcc, naluDataSize); + uint8_t padding = getPadding(track, simulcast, twcc, naluSize); RtpExtension extension = buildExtension(track, simulcast, twcc, isKeyFrameNalu(naluType), 0); const auto basicPacketSize = getBasicPacketSize(mediaProtectionOverhead); auto packetSize = adjustPacketSize(basicPacketSize, padding, extension); - if (packetSize >= naluDataSize) { + if (packetSize >= naluSize) { // https://datatracker.ietf.org/doc/html/rfc7798#section-4.4.1 const auto marker = parser.isAtEnd(); const auto [rollover, sequence] = packetSource->getNextSequence(); - auto payload = ByteBuffer{ naluDataPtr, naluDataSize }; + auto payload = ByteBuffer{ naluData, naluSize }; result.push_back(std::make_shared(track, marker, rollover, @@ -155,28 +155,28 @@ std::list> PacketizerH265::generate(const std::shared padding, std::move(extension), std::move(payload))); - } else if (naluDataSize > 2) { - // https://datatracker.ietf.org/doc/html/rfc7798#section-4.4.2 - uint8_t layerId = ((naluDataPtr[0] & 0x01) << 5) | ((naluDataPtr[1] >> 3) & 0x1F); - uint8_t temporalId = naluDataPtr[1] & 0x07; + } else if (naluSize > 2) { + // https://datatracker.ietf.org/doc/html/rfc7798#section-4.4.3 + uint8_t layerId = ((naluData[0] & 0x01) << 5) | ((naluData[1] >> 3) & 0x1F); + uint8_t temporalId = naluData[1] & 0x07; - auto dataPtr = naluDataPtr + 2; - auto dataSize = naluDataSize - 2; + auto currData = naluData + 2; + auto currSize = naluSize - 2; auto packetNumber = 0u; - while (dataSize > 0) { + while (currSize > 0) { const auto [rollover, sequence] = packetSource->getNextSequence(); if (packetNumber > 0) { - padding = getPadding(track, simulcast, twcc, naluDataSize); + padding = getPadding(track, simulcast, twcc, naluSize); extension = buildExtension(track, simulcast, twcc, isKeyFrameNalu(naluType), packetNumber); } // The "-3" is for FU headers packetSize = adjustPacketSize(basicPacketSize - 3, padding, extension); - if (packetNumber == 0 && packetSize >= dataSize) { + if (packetNumber == 0 && packetSize >= currSize) { // The frame now fits in one packet, but a FU cannot have both start and end - packetSize = dataSize - 10; + packetSize = currSize - 10; } ByteBuffer payload; @@ -187,15 +187,15 @@ std::list> PacketizerH265::generate(const std::shared writer.writeU16(payloadHeader); const auto isStart = packetNumber == 0; - const auto isEnd = dataSize <= packetSize; + const auto isEnd = currSize <= packetSize; const uint8_t fuHeader = (isStart ? (1 << 7) : 0) | (isEnd ? (1 << 6) : 0) | static_cast(naluType & 0x3F); writer.writeU8(fuHeader); const auto marker = isEnd && parser.isAtEnd(); - const auto writeNow = std::min(dataSize, packetSize); - writer.write(dataPtr, writeNow); + const auto writeNow = std::min(currSize, packetSize); + writer.write(currData, writeNow); result.push_back(std::make_shared(track, marker, @@ -206,8 +206,8 @@ std::list> PacketizerH265::generate(const std::shared std::move(extension), std::move(payload))); - dataPtr += writeNow; - dataSize -= writeNow; + currData += writeNow; + currSize -= writeNow; packetNumber += 1; } } diff --git a/src/packetizer_vp8.cpp b/src/packetizer_vp8.cpp index 6c71e81..aeb1ac0 100644 --- a/src/packetizer_vp8.cpp +++ b/src/packetizer_vp8.cpp @@ -58,16 +58,16 @@ std::list> PacketizerVP8::generate(const std::shared_ const auto tagFrameType = tag & 0x01; // https://datatracker.ietf.org/doc/html/rfc7741#section-4.2 - auto dataPtr = frame.data(); - auto dataSize = frame.size(); + auto currData = frame.data(); + auto currSize = frame.size(); const auto basicPacketSize = getBasicPacketSize(mediaProtectionOverhead); auto packetNumber = 0u; - while (dataSize > 0) { + while (currSize > 0) { const auto [rollover, sequence] = packetSource->getNextSequence(); - const auto padding = getPadding(track, simulcast, twcc, dataSize); + const auto padding = getPadding(track, simulcast, twcc, currSize); RtpExtension extension = buildExtension(track, simulcast, twcc, tagFrameType == 0, packetNumber); // The "-1" is for VP8 payload descriptor @@ -80,17 +80,17 @@ std::list> PacketizerVP8::generate(const std::shared_ writer.writeU8((tagFrameType << 5) | ((packetNumber == 0 ? 1 : 0) << 4)); // Payload - const auto writeNow = std::min(dataSize, packetSize); - writer.write(dataPtr, writeNow); + const auto writeNow = std::min(currSize, packetSize); + writer.write(currData, writeNow); // Make a packet - const auto marker = dataSize <= packetSize; + const auto marker = currSize <= packetSize; result.push_back(std::make_shared( track, marker, rollover, sequence, frameTimestamp, padding, std::move(extension), std::move(payload))); // Advance - dataPtr += writeNow; - dataSize -= writeNow; + currData += writeNow; + currSize -= writeNow; packetNumber += 1; } diff --git a/tools/media_reader_h264.cpp b/tools/media_reader_h264.cpp index c090f81..d7fb396 100644 --- a/tools/media_reader_h264.cpp +++ b/tools/media_reader_h264.cpp @@ -1,6 +1,6 @@ #include "media_reader_h264.h" - #include "srtc/codec_h264.h" +#include "srtc/bit_reader.h" #include #include @@ -42,9 +42,7 @@ LoadedMedia MediaReaderH264::loadMedia(bool print_info) const break; case srtc::h264::NaluType::KeyFrame: case srtc::h264::NaluType::NonKeyFrame: - srtc::h264::BitReader br = { parser.currData() + 1, parser.currDataSize() - 1 }; - const auto first_mb_in_slice = br.readUnsignedExpGolomb(); - if (first_mb_in_slice == 0) { + if (srtc::h264::isFrameStart(parser.currData(), parser.currDataSize())) { if (!frame.empty()) { LoadedFrame loaded_frame = {}; loaded_frame.pts_usec = pts_usec; @@ -107,7 +105,7 @@ void MediaReaderH264::printInfo(const srtc::ByteBuffer& data) const case srtc::h264::NaluType::KeyFrame: case srtc::h264::NaluType::NonKeyFrame: frame_nalu_count += 1; - srtc::h264::BitReader br = { parser.currData() + 1, parser.currDataSize() - 1 }; + srtc::BitReader br = { parser.currData() + 1, parser.currDataSize() - 1 }; const auto first_mb_in_slice = br.readUnsignedExpGolomb(); if (first_mb_in_slice == 0) { all_frame_count += 1; diff --git a/tools/media_writer_av1.cpp b/tools/media_writer_av1.cpp new file mode 100644 index 0000000..c02e2c3 --- /dev/null +++ b/tools/media_writer_av1.cpp @@ -0,0 +1,183 @@ +#include "media_writer_av1.h" +#include "srtc/bit_reader.h" +#include "srtc/byte_buffer.h" +#include "srtc/codec_av1.h" + +#include +#include + +MediaWriterAV1::MediaWriterAV1(const std::string& filename, const std::shared_ptr& track) + : MediaWriter(filename) + , mTrack(track) + , mOutAllFrameCount(0) + , mOutKeyFrameCount(0) + , mOutByteCount(0) + , mBaseRtpTimestamp(0) +{ + checkExtension({ ".webm" }); +} + +MediaWriterAV1::~MediaWriterAV1() +{ + if (!mFrameList.empty()) { + uint16_t frameWidth = 1920; + uint16_t frameHeight = 1080; + extractAV1Dimensions(frameWidth, frameHeight); + + FILE* file = fopen(mFilename.c_str(), "wb"); + if (!file) { + std::printf("*** Cannot open output file %s\n", mFilename.c_str()); + exit(1); + } + + MediaWriterWebm writer(file, "V_AV1", frameWidth, frameHeight, mFrameList); + writer.write(); + + fclose(file); + + std::printf("AV1: Wrote %zu frames, %zu key frames, %zu bytes to %s\n", + mOutAllFrameCount, + mOutKeyFrameCount, + mOutByteCount, + mFilename.c_str()); + } +} + +void MediaWriterAV1::write(const std::shared_ptr& frame) +{ + // Check if it's a key frame + const auto frameData = frame->data.data(); + const auto frameSize = frame->data.size(); + + bool isKeyFrame = false; + for (srtc::av1::ObuParser parser(frame->data); parser; parser.next()) { + const auto obuType = parser.currType(); + if (obuType == srtc::av1::ObuType::SequenceHeader) { + isKeyFrame = true; + break; + } + + const auto obuData = parser.currData(); + const auto obuSize = parser.currSize(); + if (srtc::av1::isKeyFrameObu(obuType, obuData, obuSize)) { + isKeyFrame = true; + break; + } + } + + if (isKeyFrame) { + // Maintain key frame count + mOutKeyFrameCount += 1; + } + + // Calculate pts + int64_t pts_usec = 0; + if (mOutAllFrameCount == 0) { + mBaseRtpTimestamp = frame->rtp_timestamp_ext; + std::printf("AV1: Started buffering video frames, will save when exiting from Ctrl+C\n"); + } else { + pts_usec = static_cast(frame->rtp_timestamp_ext - mBaseRtpTimestamp) * 1000 / 90; + } + + mOutAllFrameCount += 1; + mOutByteCount += frame->data.size(); + + MediaWriterWebm::Frame outFrame; + outFrame.pts_usec = pts_usec; + outFrame.data = std::move(frame->data); + outFrame.is_keyframe = isKeyFrame; + + mFrameList.push_back(std::move(outFrame)); +} + +bool MediaWriterAV1::extractAV1Dimensions(uint16_t& width, uint16_t& height) const +{ + // Find first keyframe with sequence header + for (const auto& frame : mFrameList) { + if (frame.is_keyframe && !frame.data.empty()) { + if (extractAV1Dimensions(frame.data, width, height)) { + return true; + } + } + } + + // No keyframe found or unable to extract dimensions + return false; +} + +bool MediaWriterAV1::extractAV1Dimensions(const srtc::ByteBuffer& frame, uint16_t& width, uint16_t& height) const +{ + for (srtc::av1::ObuParser parser(frame); parser; parser.next()) { + const auto obuType = parser.currType(); + if (obuType == srtc::av1::ObuType::SequenceHeader) { + const auto obuData = parser.currData(); + const auto obuSize = parser.currSize(); + + if (obuSize < 8) { + continue; // Too small to contain dimensions + } + + // Parse AV1 sequence header according to specification + // Reference: https://aomediacodec.github.io/av1-spec/#sequence-header-obu-syntax + + // I took a lot of shortcuts, but it handles AV1 coming out of Chrome + + srtc::BitReader reader(obuData, obuSize); + + uint8_t seq_profile = reader.readBits(3); + uint8_t still_picture = reader.readBit(); + uint8_t reduced_still_picture_header = reader.readBit(); + if (reduced_still_picture_header) { + // Doesn't apply + return false; + } + + uint8_t decoder_model_info_present_flag = 0; + uint8_t timing_info_present_flag = reader.readBit(); + if (timing_info_present_flag) { + // We don't handle this case + return false; + } + + uint8_t initial_display_delay_present_flag = reader.readBit(); + uint8_t operating_points_cnt_minus_1 = reader.readBits(5); + + for (size_t i = 0; i <= operating_points_cnt_minus_1; i++) { + uint32_t operating_point_idc_i = reader.readBits(12); + uint32_t seq_level_i = reader.readBits(5); + if (seq_level_i > 7) { + // seq_tier[i] + reader.readBit(); + } + if (decoder_model_info_present_flag) { + // decoder_model_present_for_this_op[ i ] + uint32_t decoder_model_present_for_this_op_i = reader.readBit(); + if (decoder_model_present_for_this_op_i) { + // We don't handle this case + return false; + } + } + if (initial_display_delay_present_flag) { + uint32_t initial_display_delay_present_for_this_op_i = reader.readBit(); + if (initial_display_delay_present_for_this_op_i) { + // initial_display_delay_minus_1[ i ] + reader.readBits(4); + } + } + } + + // Read the dimensions + uint32_t frame_width_bits_minus_1 = reader.readBits(4); + uint32_t frame_height_bits_minus_1 = reader.readBits(4); + uint32_t max_frame_width_minus_1 = reader.readBits(frame_width_bits_minus_1 + 1); + uint32_t max_frame_height_minus_1 = reader.readBits(frame_height_bits_minus_1 + 1); + + width = max_frame_width_minus_1 + 1; + height = max_frame_height_minus_1 + 1; + + return true; + } + } + + return false; +} \ No newline at end of file diff --git a/tools/media_writer_av1.h b/tools/media_writer_av1.h new file mode 100644 index 0000000..dc28c5d --- /dev/null +++ b/tools/media_writer_av1.h @@ -0,0 +1,29 @@ +#pragma once + +#include "media_writer.h" +#include "media_writer_webm.h" + +#include "srtc/track.h" +#include "srtc/byte_buffer.h" + +class MediaWriterAV1 final : public MediaWriter +{ +public: + MediaWriterAV1(const std::string& filename, const std::shared_ptr& track); + ~MediaWriterAV1() override; + +protected: + void write(const std::shared_ptr& frame) override; + +private: + const std::shared_ptr mTrack; + size_t mOutAllFrameCount; + size_t mOutKeyFrameCount; + size_t mOutByteCount; + + std::vector mFrameList; + uint64_t mBaseRtpTimestamp; + + bool extractAV1Dimensions(uint16_t& width, uint16_t& height) const; + bool extractAV1Dimensions(const srtc::ByteBuffer& frame, uint16_t& width, uint16_t& height) const; +}; \ No newline at end of file diff --git a/tools/media_writer_h26x.cpp b/tools/media_writer_h26x.cpp index e79734d..ab59647 100644 --- a/tools/media_writer_h26x.cpp +++ b/tools/media_writer_h26x.cpp @@ -39,6 +39,7 @@ void MediaWriterH26x::write(const std::shared_ptr& frame) const auto& data = frame->data; fwrite(data.data(), data.size(), 1, mFile); + fflush(mFile); mOutFrameCount += 1; mOutByteCount += data.size(); diff --git a/tools/media_writer_ogg.cpp b/tools/media_writer_ogg.cpp index 10c24ec..d37045f 100644 --- a/tools/media_writer_ogg.cpp +++ b/tools/media_writer_ogg.cpp @@ -94,6 +94,8 @@ void MediaWriterOgg::write(const std::shared_ptr& frame) fwrite(m_og.body, 1, m_og.body_len, mFile); } + fflush(mFile); + m_packetno = 1; } diff --git a/tools/media_writer_vp8.cpp b/tools/media_writer_vp8.cpp index 7d03f15..8c48035 100644 --- a/tools/media_writer_vp8.cpp +++ b/tools/media_writer_vp8.cpp @@ -1,40 +1,9 @@ #include "media_writer_vp8.h" +#include "srtc/byte_buffer.h" #include #include -namespace -{ - -// Write multi-byte values in big-endian byte order -inline void writeBE16(std::vector& data, uint16_t value) -{ - data.push_back((value >> 8) & 0xFF); - data.push_back(value & 0xFF); -} - -inline void writeBE32(std::vector& data, uint32_t value) -{ - data.push_back((value >> 24) & 0xFF); - data.push_back((value >> 16) & 0xFF); - data.push_back((value >> 8) & 0xFF); - data.push_back(value & 0xFF); -} - -inline void writeBE64(std::vector& data, uint64_t value) -{ - data.push_back((value >> 56) & 0xFF); - data.push_back((value >> 48) & 0xFF); - data.push_back((value >> 40) & 0xFF); - data.push_back((value >> 32) & 0xFF); - data.push_back((value >> 24) & 0xFF); - data.push_back((value >> 16) & 0xFF); - data.push_back((value >> 8) & 0xFF); - data.push_back(value & 0xFF); -} - -} // namespace - MediaWriterVP8::MediaWriterVP8(const std::string& filename, const std::shared_ptr& track) : MediaWriter(filename) , mTrack(track) @@ -48,8 +17,21 @@ MediaWriterVP8::MediaWriterVP8(const std::string& filename, const std::shared_pt MediaWriterVP8::~MediaWriterVP8() { - if (mOutAllFrameCount > 0 && mOutByteCount > 0) { - writeWebM(); + if (!mFrameList.empty()) { + uint16_t frameWidth = 1920; + uint16_t frameHeight = 1080; + extractVP8Dimensions(frameWidth, frameHeight); + + FILE* file = fopen(mFilename.c_str(), "wb"); + if (!file) { + std::printf("*** Cannot open output file %s\n", mFilename.c_str()); + exit(1); + } + + MediaWriterWebm writer(file, "V_VP8", frameWidth, frameHeight, mFrameList); + writer.write(); + + fclose(file); std::printf("VP8: Wrote %zu frames, %zu key frames, %zu bytes to %s\n", mOutAllFrameCount, @@ -71,8 +53,9 @@ void MediaWriterVP8::write(const std::shared_ptr& frame) const auto tag = frameData[0] | (frameData[1] << 8) | (frameData[2] << 16); const auto tagFrameType = tag & 0x01; + bool is_keyframe = (tagFrameType == 0 && frameSize > 10); - if (tagFrameType == 0 && frameSize > 10) { + if (is_keyframe) { // Maintain key frame count mOutKeyFrameCount += 1; } @@ -89,387 +72,19 @@ void MediaWriterVP8::write(const std::shared_ptr& frame) mOutAllFrameCount += 1; mOutByteCount += frame->data.size(); - VP8Frame outFrame; + MediaWriterWebm::Frame outFrame; outFrame.pts_usec = pts_usec; outFrame.data = std::move(frame->data); + outFrame.is_keyframe = is_keyframe; mFrameList.push_back(std::move(outFrame)); } -void MediaWriterVP8::writeWebM() -{ - if (mFrameList.empty()) { - std::printf("VP8: Have not seen any video frames, won't write %s\n", mFilename.c_str()); - return; - } - - FILE* file = std::fopen(mFilename.c_str(), "wb"); - if (!file) { - std::printf("VP8: Failed to create %s\n", mFilename.c_str()); - return; - } - - // Calculate duration - uint64_t duration_ns = 0; - if (mFrameList.size() > 1) { - duration_ns = static_cast(mFrameList.back().pts_usec - mFrameList.front().pts_usec) * 1000; - } - - writeEBMLHeader(file); - - // Write Segment header with proper size calculation - // First write to temporary buffer to calculate size - std::vector segment_content; - - // Create temporary file for size calculation - FILE* temp_file = tmpfile(); - writeSegmentInfo(temp_file, duration_ns); - writeTracks(temp_file); - writeClusters(temp_file); - - // Get size and copy data - long segment_data_size = ftell(temp_file); - segment_content.resize(segment_data_size); - fseek(temp_file, 0, SEEK_SET); - if (fread(segment_content.data(), 1, segment_data_size, temp_file) != segment_data_size) { - std::printf("VP8: Failed to read temporary file\n"); - exit(1); - } - fclose(temp_file); - - // Write Segment with known size - uint32_t segment_id = 0x18538067; - writeEBMLElement(file, segment_id, segment_content.data(), segment_data_size); - - std::fclose(file); -} - -void MediaWriterVP8::writeEBMLHeader(FILE* file) -{ - // EBML Header (0x1A45DFA3) - const uint32_t ebml_header_id = 0x1A45DFA3; - - // Build header content - std::vector header_data; - - // EBMLVersion (0x4286) = 1 - uint8_t ebml_version[] = { 0x42, 0x86, 0x81, 0x01 }; - header_data.insert(header_data.end(), ebml_version, ebml_version + sizeof(ebml_version)); - - // EBMLReadVersion (0x42F7) = 1 - uint8_t ebml_read_version[] = { 0x42, 0xF7, 0x81, 0x01 }; - header_data.insert(header_data.end(), ebml_read_version, ebml_read_version + sizeof(ebml_read_version)); - - // EBMLMaxIDLength (0x42F2) = 4 - uint8_t ebml_max_id[] = { 0x42, 0xF2, 0x81, 0x04 }; - header_data.insert(header_data.end(), ebml_max_id, ebml_max_id + sizeof(ebml_max_id)); - - // EBMLMaxSizeLength (0x42F3) = 8 - uint8_t ebml_max_size[] = { 0x42, 0xF3, 0x81, 0x08 }; - header_data.insert(header_data.end(), ebml_max_size, ebml_max_size + sizeof(ebml_max_size)); - - // DocType (0x4282) = "webm" - const char* doctype = "webm"; - uint8_t doctype_header[] = { 0x42, 0x82, 0x84 }; - header_data.insert(header_data.end(), doctype_header, doctype_header + sizeof(doctype_header)); - header_data.insert(header_data.end(), doctype, doctype + strlen(doctype)); - - // DocTypeVersion (0x4287) = 2 - uint8_t doctype_version[] = { 0x42, 0x87, 0x81, 0x02 }; - header_data.insert(header_data.end(), doctype_version, doctype_version + sizeof(doctype_version)); - - // DocTypeReadVersion (0x4285) = 2 - uint8_t doctype_read_version[] = { 0x42, 0x85, 0x81, 0x02 }; - header_data.insert(header_data.end(), doctype_read_version, doctype_read_version + sizeof(doctype_read_version)); - - writeEBMLElement(file, ebml_header_id, header_data.data(), header_data.size()); -} - -void MediaWriterVP8::writeSegmentInfo(FILE* file, uint64_t duration_ns) -{ - // Info (0x1549A966) - const uint32_t info_id = 0x1549A966; - - std::vector info_data; - - // TimecodeScale (0x2AD7B1) = 1000000 (1ms) - uint32_t timecode_scale = 1000000; - uint8_t timecode_header[] = { 0x2A, 0xD7, 0xB1, 0x84 }; // 4-byte size - info_data.insert(info_data.end(), timecode_header, timecode_header + sizeof(timecode_header)); - - writeBE32(info_data, timecode_scale); - - // MuxingApp (0x4D80) = "srtc" - const char* muxing_app = "srtc"; - uint8_t muxing_header[] = { 0x4D, 0x80, 0x84 }; - info_data.insert(info_data.end(), muxing_header, muxing_header + sizeof(muxing_header)); - info_data.insert(info_data.end(), muxing_app, muxing_app + strlen(muxing_app)); - - // WritingApp (0x5741) = "srtc" - const char* writing_app = "srtc"; - uint8_t writing_header[] = { 0x57, 0x41, 0x84 }; - info_data.insert(info_data.end(), writing_header, writing_header + sizeof(writing_header)); - info_data.insert(info_data.end(), writing_app, writing_app + strlen(writing_app)); - - // Duration (0x4489) - optional - if (duration_ns > 0) { - double duration_ms = static_cast(duration_ns) / 1000000.0; - uint8_t duration_header[] = { 0x44, 0x89, 0x88 }; - info_data.insert(info_data.end(), duration_header, duration_header + sizeof(duration_header)); - - uint64_t duration_bits; - memcpy(&duration_bits, &duration_ms, 8); - writeBE64(info_data, duration_bits); - } - - writeEBMLElement(file, info_id, info_data.data(), info_data.size()); -} - -void MediaWriterVP8::writeTracks(FILE* file) -{ - // Tracks (0x1654AE6B) - const uint32_t tracks_id = 0x1654AE6B; - - std::vector tracks_data; - - // TrackEntry (0xAE) - std::vector track_data; - - // TrackNumber (0xD7) = 1 - uint8_t track_number[] = { 0xD7, 0x81, 0x01 }; - track_data.insert(track_data.end(), track_number, track_number + sizeof(track_number)); - - // TrackUID (0x73C5) = 1 - uint8_t track_uid[] = { 0x73, 0xC5, 0x81, 0x01 }; - track_data.insert(track_data.end(), track_uid, track_uid + sizeof(track_uid)); - - // TrackType (0x83) = 1 (video) - uint8_t track_type[] = { 0x83, 0x81, 0x01 }; - track_data.insert(track_data.end(), track_type, track_type + sizeof(track_type)); - - // CodecID (0x86) = "V_VP8" - const char* codec_id = "V_VP8"; - uint8_t codec_header[] = { 0x86, 0x85 }; - track_data.insert(track_data.end(), codec_header, codec_header + sizeof(codec_header)); - track_data.insert(track_data.end(), codec_id, codec_id + strlen(codec_id)); - - // Video (0xE0) - std::vector video_data; - - // Extract actual dimensions from VP8 frames - uint16_t frame_width = 1920; // fallback - uint16_t frame_height = 1080; // fallback - extractVP8Dimensions(frame_width, frame_height); - - // PixelWidth (0xB0) - uint8_t width_header[] = { 0xB0, 0x82 }; - video_data.insert(video_data.end(), width_header, width_header + sizeof(width_header)); - writeBE16(video_data, frame_width); - - // PixelHeight (0xBA) - uint8_t height_header[] = { 0xBA, 0x82 }; - video_data.insert(video_data.end(), height_header, height_header + sizeof(height_header)); - writeBE16(video_data, frame_height); - - // Write Video element - uint8_t video_header[] = { 0xE0 }; - track_data.insert(track_data.end(), video_header, video_header + sizeof(video_header)); - - std::vector video_size_bytes; - uint64_t video_size = video_data.size(); - int width = getVarIntWidth(video_size); - uint8_t first_byte = (1 << (8 - width)) | ((video_size >> ((width - 1) * 8)) & ((1 << (8 - width)) - 1)); - video_size_bytes.push_back(first_byte); - for (int i = width - 2; i >= 0; i--) { - video_size_bytes.push_back((video_size >> (i * 8)) & 0xFF); - } - track_data.insert(track_data.end(), video_size_bytes.begin(), video_size_bytes.end()); - track_data.insert(track_data.end(), video_data.begin(), video_data.end()); - - // Write TrackEntry - uint8_t track_entry_header[] = { 0xAE }; - tracks_data.insert(tracks_data.end(), track_entry_header, track_entry_header + sizeof(track_entry_header)); - - std::vector track_size_bytes; - uint64_t track_size = track_data.size(); - int width2 = getVarIntWidth(track_size); - uint8_t track_first_byte = (1 << (8 - width2)) | ((track_size >> ((width2 - 1) * 8)) & ((1 << (8 - width2)) - 1)); - track_size_bytes.push_back(track_first_byte); - for (int i = width2 - 2; i >= 0; i--) { - track_size_bytes.push_back((track_size >> (i * 8)) & 0xFF); - } - tracks_data.insert(tracks_data.end(), track_size_bytes.begin(), track_size_bytes.end()); - tracks_data.insert(tracks_data.end(), track_data.begin(), track_data.end()); - - writeEBMLElement(file, tracks_id, tracks_data.data(), tracks_data.size()); -} - -void MediaWriterVP8::writeClusters(FILE* file) -{ - if (mFrameList.empty()) - return; - - // Write one cluster per frame for simplicity - for (size_t i = 0; i < mFrameList.size(); i++) { - const auto& frame = mFrameList[i]; - - // Cluster (0x1F43B675) - const uint32_t cluster_id = 0x1F43B675; - - std::vector cluster_data; - - // Timecode (0xE7) - in milliseconds - uint64_t timecode_ms = frame.pts_usec / 1000; - - // Create temporary buffer for timecode element - std::vector timecode_data; - int tc_width = getVarIntWidth(timecode_ms); - for (int j = tc_width - 1; j >= 0; j--) { - timecode_data.push_back((timecode_ms >> (j * 8)) & 0xFF); - } - - // Write timecode element header manually - cluster_data.push_back(0xE7); // Timecode ID - writeVarIntToBuffer(cluster_data, timecode_data.size()); - cluster_data.insert(cluster_data.end(), timecode_data.begin(), timecode_data.end()); - - // SimpleBlock (0xA3) - std::vector block_data; - - // Track number (1) - variable integer - block_data.push_back(0x81); // track 1 - - // Timestamp relative to cluster (0 for now) - block_data.push_back(0x00); - block_data.push_back(0x00); - - // Flags - keyframe detection - uint8_t flags = 0x00; - if (i == 0 || isKeyFrame(frame)) { - flags |= 0x80; // Keyframe - } - block_data.push_back(flags); - - // Frame data - block_data.insert(block_data.end(), frame.data.data(), frame.data.data() + frame.data.size()); - - // Write SimpleBlock - uint8_t block_header[] = { 0xA3 }; - cluster_data.insert(cluster_data.end(), block_header, block_header + sizeof(block_header)); - - std::vector block_size_bytes; - uint64_t block_size = block_data.size(); - int width = getVarIntWidth(block_size); - uint8_t first_byte = (1 << (8 - width)) | ((block_size >> ((width - 1) * 8)) & ((1 << (8 - width)) - 1)); - block_size_bytes.push_back(first_byte); - for (int j = width - 2; j >= 0; j--) { - block_size_bytes.push_back((block_size >> (j * 8)) & 0xFF); - } - cluster_data.insert(cluster_data.end(), block_size_bytes.begin(), block_size_bytes.end()); - cluster_data.insert(cluster_data.end(), block_data.begin(), block_data.end()); - - writeEBMLElement(file, cluster_id, cluster_data.data(), cluster_data.size()); - } -} - -bool MediaWriterVP8::isKeyFrame(const VP8Frame& frame) -{ - if (frame.data.size() < 3) { - return false; - } - - const auto* frameData = frame.data.data(); - const auto tag = frameData[0] | (frameData[1] << 8) | (frameData[2] << 16); - const auto tagFrameType = tag & 0x01; - - return tagFrameType == 0; -} - -void MediaWriterVP8::writeEBMLElement(FILE* file, uint32_t id, const void* data, size_t size) -{ - // Write element ID (big endian, variable length) - if (id & 0xFF000000) { - uint8_t bytes[4] = { static_cast((id >> 24) & 0xFF), - static_cast((id >> 16) & 0xFF), - static_cast((id >> 8) & 0xFF), - static_cast(id & 0xFF) }; - std::fwrite(bytes, 4, 1, file); - } else if (id & 0x00FF0000) { - uint8_t bytes[3] = { static_cast((id >> 16) & 0xFF), - static_cast((id >> 8) & 0xFF), - static_cast(id & 0xFF) }; - std::fwrite(bytes, 3, 1, file); - } else if (id & 0x0000FF00) { - uint8_t bytes[2] = { static_cast((id >> 8) & 0xFF), static_cast(id & 0xFF) }; - std::fwrite(bytes, 2, 1, file); - } else { - const auto id_byte = static_cast(id); - std::fwrite(&id_byte, 1, 1, file); - } - - // Write size - writeVarInt(file, size); - - // Write data - if (data && size > 0) { - std::fwrite(data, size, 1, file); - } -} - -void MediaWriterVP8::writeVarInt(FILE* file, uint64_t value) -{ - const auto width = getVarIntWidth(value); - - // EBML variable integer: first byte has leading 1 bit, followed by width-1 zero bits, then data - uint8_t first_byte = (1 << (8 - width)) | ((value >> ((width - 1) * 8)) & ((1 << (8 - width)) - 1)); - std::fwrite(&first_byte, 1, 1, file); - - // Write remaining bytes - for (int i = width - 2; i >= 0; i--) { - uint8_t byte = (value >> (i * 8)) & 0xFF; - std::fwrite(&byte, 1, 1, file); - } -} - -void MediaWriterVP8::writeVarIntToBuffer(std::vector& buffer, uint64_t value) -{ - const auto width = getVarIntWidth(value); - - // EBML variable integer: first byte has leading 1 bit, followed by width-1 zero bits, then data - uint8_t first_byte = (1 << (8 - width)) | ((value >> ((width - 1) * 8)) & ((1 << (8 - width)) - 1)); - buffer.push_back(first_byte); - - // Write remaining bytes - for (int i = width - 2; i >= 0; i--) { - uint8_t byte = (value >> (i * 8)) & 0xFF; - buffer.push_back(byte); - } -} - -int MediaWriterVP8::getVarIntWidth(uint64_t value) -{ - if (value <= 127) - return 1; // 2^7 - 1 - if (value <= 16383) - return 2; // 2^14 - 1 - if (value <= 2097151) - return 3; // 2^21 - 1 - if (value <= 268435455) - return 4; // 2^28 - 1 - if (value <= 34359738367ULL) - return 5; // 2^35 - 1 - if (value <= 4398046511103ULL) - return 6; // 2^42 - 1 - if (value <= 562949953421311ULL) - return 7; // 2^49 - 1 - return 8; -} - bool MediaWriterVP8::extractVP8Dimensions(uint16_t& width, uint16_t& height) const { // Find first keyframe for (const auto& frame : mFrameList) { - if (isKeyFrame(frame) && frame.data.size() >= 10) { + if (frame.is_keyframe && frame.data.size() >= 10) { const auto* frameData = frame.data.data(); // VP8 keyframe structure (RFC 6386 Section 9.1) diff --git a/tools/media_writer_vp8.h b/tools/media_writer_vp8.h index bc97d50..a1f2aee 100644 --- a/tools/media_writer_vp8.h +++ b/tools/media_writer_vp8.h @@ -1,10 +1,10 @@ #pragma once #include "media_writer.h" +#include "media_writer_webm.h" #include "srtc/track.h" - -#include +#include "srtc/byte_buffer.h" class MediaWriterVP8 final : public MediaWriter { @@ -21,22 +21,8 @@ class MediaWriterVP8 final : public MediaWriter size_t mOutKeyFrameCount; size_t mOutByteCount; - struct VP8Frame { - int64_t pts_usec; - srtc::ByteBuffer data; - }; - std::vector mFrameList; + std::vector mFrameList; uint64_t mBaseRtpTimestamp; - void writeWebM(); - static void writeEBMLHeader(FILE* file); - static void writeSegmentInfo(FILE* file, uint64_t duration_ns); - void writeTracks(FILE* file); - void writeClusters(FILE* file); - static void writeEBMLElement(FILE* file, uint32_t id, const void* data, size_t size); - static void writeVarInt(FILE* file, uint64_t value); - static void writeVarIntToBuffer(std::vector& buffer, uint64_t value); - static int getVarIntWidth(uint64_t value); - static bool isKeyFrame(const VP8Frame& frame); bool extractVP8Dimensions(uint16_t& width, uint16_t& height) const; }; \ No newline at end of file diff --git a/tools/media_writer_webm.cpp b/tools/media_writer_webm.cpp new file mode 100644 index 0000000..50c2ac7 --- /dev/null +++ b/tools/media_writer_webm.cpp @@ -0,0 +1,361 @@ +#include "media_writer_webm.h" + +#include + +namespace +{ + +// Helper to write variable-length EBML integer to ByteBuffer +void writeVarIntToBuffer(srtc::ByteWriter& writer, uint64_t value) +{ + int width = 1; + if (value > 127) + width = 2; + if (value > 16383) + width = 3; + if (value > 2097151) + width = 4; + if (value > 268435455) + width = 5; + if (value > 34359738367ULL) + width = 6; + if (value > 4398046511103ULL) + width = 7; + if (value > 562949953421311ULL) + width = 8; + + uint8_t first_byte = (1 << (8 - width)) | ((value >> ((width - 1) * 8)) & ((1 << (8 - width)) - 1)); + writer.writeU8(first_byte); + + for (int i = width - 2; i >= 0; i--) { + writer.writeU8((value >> (i * 8)) & 0xFF); + } +} + +} // namespace + +MediaWriterWebm::MediaWriterWebm(FILE* file, + const std::string& codecId, + uint32_t frameWidth, + uint32_t frameHeight, + const std::vector& frameList) + : mFile(file) + , mCodecId(codecId) + , mFrameWidth(frameWidth) + , mFrameHeight(frameHeight) + , mFrameList(frameList) +{ +} + +void MediaWriterWebm::write() +{ + writeWebM(); +} + +MediaWriterWebm::~MediaWriterWebm() = default; + +void MediaWriterWebm::writeWebM() +{ + // Calculate duration + uint64_t duration_ns = 0; + if (mFrameList.size() > 1) { + duration_ns = static_cast(mFrameList.back().pts_usec - mFrameList.front().pts_usec) * 1000; + } + + writeEBMLHeader(); + + // Write Segment header with proper size calculation + // First write to temporary buffer to calculate size + std::vector segment_content; + + // Create temporary file for size calculation + FILE* temp_file = tmpfile(); + writeSegmentInfo(temp_file, duration_ns); + writeTracks(temp_file); + writeClusters(temp_file); + + // Get size and copy data + long segment_data_size = ftell(temp_file); + segment_content.resize(segment_data_size); + fseek(temp_file, 0, SEEK_SET); + if (fread(segment_content.data(), 1, segment_data_size, temp_file) != segment_data_size) { + std::printf("WebM: Failed to read temporary file\n"); + exit(1); + } + fclose(temp_file); + + // Write Segment with known size + uint32_t segment_id = 0x18538067; + writeEBMLElement(mFile, segment_id, segment_content.data(), segment_data_size); +} + +void MediaWriterWebm::writeEBMLHeader() +{ + // EBML Header (0x1A45DFA3) + const uint32_t ebml_header_id = 0x1A45DFA3; + + // Build header content using ByteBuffer and ByteWriter + srtc::ByteBuffer header_buffer; + srtc::ByteWriter writer(header_buffer); + + // EBMLVersion (0x4286) = 1 + const uint8_t ebml_version[] = { 0x42, 0x86, 0x81, 0x01 }; + writer.write(ebml_version, sizeof(ebml_version)); + + // EBMLReadVersion (0x42F7) = 1 + const uint8_t ebml_read_version[] = { 0x42, 0xF7, 0x81, 0x01 }; + writer.write(ebml_read_version, sizeof(ebml_read_version)); + + // EBMLMaxIDLength (0x42F2) = 4 + const uint8_t ebml_max_id[] = { 0x42, 0xF2, 0x81, 0x04 }; + writer.write(ebml_max_id, sizeof(ebml_max_id)); + + // EBMLMaxSizeLength (0x42F3) = 8 + const uint8_t ebml_max_size[] = { 0x42, 0xF3, 0x81, 0x08 }; + writer.write(ebml_max_size, sizeof(ebml_max_size)); + + // DocType (0x4282) = "webm" + const char* doctype = "webm"; + const uint8_t doctype_header[] = { 0x42, 0x82, 0x84 }; + writer.write(doctype_header, sizeof(doctype_header)); + writer.write(reinterpret_cast(doctype), strlen(doctype)); + + // DocTypeVersion (0x4287) = 2 + const uint8_t doctype_version[] = { 0x42, 0x87, 0x81, 0x02 }; + writer.write(doctype_version, sizeof(doctype_version)); + + // DocTypeReadVersion (0x4285) = 2 + const uint8_t doctype_read_version[] = { 0x42, 0x85, 0x81, 0x02 }; + writer.write(doctype_read_version, sizeof(doctype_read_version)); + + writeEBMLElement(mFile, ebml_header_id, header_buffer.data(), header_buffer.size()); +} + +void MediaWriterWebm::writeSegmentInfo(FILE* file, uint64_t duration_ns) +{ + // Info (0x1549A966) + const uint32_t info_id = 0x1549A966; + + srtc::ByteBuffer info_buffer; + srtc::ByteWriter writer(info_buffer); + + // TimecodeScale (0x2AD7B1) = 1000000 (1ms) + uint32_t timecode_scale = 1000000; + const uint8_t timecode_header[] = { 0x2A, 0xD7, 0xB1, 0x84 }; // 4-byte size + writer.write(timecode_header, sizeof(timecode_header)); + writer.writeU32(timecode_scale); + + // MuxingApp (0x4D80) = "srtc" + const char* muxing_app = "srtc"; + const uint8_t muxing_header[] = { 0x4D, 0x80, 0x84 }; + writer.write(muxing_header, sizeof(muxing_header)); + writer.write(reinterpret_cast(muxing_app), strlen(muxing_app)); + + // WritingApp (0x5741) = "srtc" + const char* writing_app = "srtc"; + const uint8_t writing_header[] = { 0x57, 0x41, 0x84 }; + writer.write(writing_header, sizeof(writing_header)); + writer.write(reinterpret_cast(writing_app), strlen(writing_app)); + + // Duration (0x4489) - optional + if (duration_ns > 0) { + double duration_ms = static_cast(duration_ns) / 1000000.0; + const uint8_t duration_header[] = { 0x44, 0x89, 0x88 }; + writer.write(duration_header, sizeof(duration_header)); + + uint64_t duration_bits; + memcpy(&duration_bits, &duration_ms, 8); + writer.writeU64(duration_bits); + } + + writeEBMLElement(file, info_id, info_buffer.data(), info_buffer.size()); +} + +void MediaWriterWebm::writeTracks(FILE* file) +{ + // Tracks (0x1654AE6B) + const uint32_t tracks_id = 0x1654AE6B; + + srtc::ByteBuffer tracks_buffer; + srtc::ByteWriter tracks_writer(tracks_buffer); + + // TrackEntry (0xAE) + srtc::ByteBuffer track_buffer; + srtc::ByteWriter track_writer(track_buffer); + + // TrackNumber (0xD7) = 1 + const uint8_t track_number[] = { 0xD7, 0x81, 0x01 }; + track_writer.write(track_number, sizeof(track_number)); + + // TrackUID (0x73C5) = 1 + const uint8_t track_uid[] = { 0x73, 0xC5, 0x81, 0x01 }; + track_writer.write(track_uid, sizeof(track_uid)); + + // TrackType (0x83) = 1 (video) + const uint8_t track_type[] = { 0x83, 0x81, 0x01 }; + track_writer.write(track_type, sizeof(track_type)); + + // CodecID (0x86) = "V_VP8" or "V_AV1" + track_writer.writeU8(0x86); + writeVarIntToBuffer(track_writer, mCodecId.size()); + track_writer.write(reinterpret_cast(mCodecId.data()), mCodecId.size()); + + // Video (0xE0) + srtc::ByteBuffer video_buffer; + srtc::ByteWriter video_writer(video_buffer); + + // Extract actual dimensions from VP8 frames + uint16_t frame_width = mFrameWidth; + uint16_t frame_height = mFrameHeight; + + // PixelWidth (0xB0) + const uint8_t width_header[] = { 0xB0, 0x82 }; + video_writer.write(width_header, sizeof(width_header)); + video_writer.writeU16(frame_width); + + // PixelHeight (0xBA) + const uint8_t height_header[] = { 0xBA, 0x82 }; + video_writer.write(height_header, sizeof(height_header)); + video_writer.writeU16(frame_height); + + // Write Video element + const uint8_t video_header[] = { 0xE0 }; + track_writer.write(video_header, sizeof(video_header)); + writeVarIntToBuffer(track_writer, video_buffer.size()); + track_writer.write(video_buffer.data(), video_buffer.size()); + + // Write TrackEntry + const uint8_t track_entry_header[] = { 0xAE }; + tracks_writer.write(track_entry_header, sizeof(track_entry_header)); + writeVarIntToBuffer(tracks_writer, track_buffer.size()); + tracks_writer.write(track_buffer.data(), track_buffer.size()); + + writeEBMLElement(file, tracks_id, tracks_buffer.data(), tracks_buffer.size()); +} + +void MediaWriterWebm::writeClusters(FILE* file) +{ + if (mFrameList.empty()) + return; + + // Write one cluster per frame for simplicity + for (size_t i = 0; i < mFrameList.size(); i++) { + const auto& frame = mFrameList[i]; + + // Cluster (0x1F43B675) + const uint32_t cluster_id = 0x1F43B675; + + srtc::ByteBuffer cluster_buffer; + srtc::ByteWriter cluster_writer(cluster_buffer); + + // Timecode (0xE7) - in milliseconds + uint64_t timecode_ms = frame.pts_usec / 1000; + + // Create temporary buffer for timecode element + srtc::ByteBuffer timecode_buffer; + srtc::ByteWriter timecode_writer(timecode_buffer); + int tc_width = getVarIntWidth(timecode_ms); + for (int j = tc_width - 1; j >= 0; j--) { + timecode_writer.writeU8((timecode_ms >> (j * 8)) & 0xFF); + } + + // Write timecode element header manually + cluster_writer.writeU8(0xE7); // Timecode ID + writeVarIntToBuffer(cluster_writer, timecode_buffer.size()); + cluster_writer.write(timecode_buffer.data(), timecode_buffer.size()); + + // SimpleBlock (0xA3) + srtc::ByteBuffer block_buffer; + srtc::ByteWriter block_writer(block_buffer); + + // Track number (1) - variable integer + block_writer.writeU8(0x81); // track 1 + + // Timestamp relative to cluster (0 for now) + block_writer.writeU8(0x00); + block_writer.writeU8(0x00); + + // Flags - keyframe detection + uint8_t flags = 0x00; + if (i == 0 || frame.is_keyframe) { + flags |= 0x80; // Keyframe + } + block_writer.writeU8(flags); + + // Frame data + block_writer.write(frame.data.data(), frame.data.size()); + + // Write SimpleBlock + const uint8_t block_header[] = { 0xA3 }; + cluster_writer.write(block_header, sizeof(block_header)); + writeVarIntToBuffer(cluster_writer, block_buffer.size()); + cluster_writer.write(block_buffer.data(), block_buffer.size()); + + writeEBMLElement(file, cluster_id, cluster_buffer.data(), cluster_buffer.size()); + } +} + +void MediaWriterWebm::writeEBMLElement(FILE* file, uint32_t id, const void* data, size_t size) +{ + // Write element ID (big endian, variable length) + if (id & 0xFF000000) { + uint8_t bytes[4] = { static_cast((id >> 24) & 0xFF), + static_cast((id >> 16) & 0xFF), + static_cast((id >> 8) & 0xFF), + static_cast(id & 0xFF) }; + std::fwrite(bytes, 4, 1, file); + } else if (id & 0x00FF0000) { + uint8_t bytes[3] = { static_cast((id >> 16) & 0xFF), + static_cast((id >> 8) & 0xFF), + static_cast(id & 0xFF) }; + std::fwrite(bytes, 3, 1, file); + } else if (id & 0x0000FF00) { + uint8_t bytes[2] = { static_cast((id >> 8) & 0xFF), static_cast(id & 0xFF) }; + std::fwrite(bytes, 2, 1, file); + } else { + const auto id_byte = static_cast(id); + std::fwrite(&id_byte, 1, 1, file); + } + + // Write size + writeVarInt(file, size); + + // Write data + if (data && size > 0) { + std::fwrite(data, size, 1, file); + } +} + +void MediaWriterWebm::writeVarInt(FILE* file, uint64_t value) +{ + const auto width = getVarIntWidth(value); + + // EBML variable integer: first byte has leading 1 bit, followed by width-1 zero bits, then data + uint8_t first_byte = (1 << (8 - width)) | ((value >> ((width - 1) * 8)) & ((1 << (8 - width)) - 1)); + std::fwrite(&first_byte, 1, 1, file); + + // Write remaining bytes + for (int i = width - 2; i >= 0; i--) { + uint8_t byte = (value >> (i * 8)) & 0xFF; + std::fwrite(&byte, 1, 1, file); + } +} + +int MediaWriterWebm::getVarIntWidth(uint64_t value) +{ + if (value <= 127) + return 1; // 2^7 - 1 + if (value <= 16383) + return 2; // 2^14 - 1 + if (value <= 2097151) + return 3; // 2^21 - 1 + if (value <= 268435455) + return 4; // 2^28 - 1 + if (value <= 34359738367ULL) + return 5; // 2^35 - 1 + if (value <= 4398046511103ULL) + return 6; // 2^42 - 1 + if (value <= 562949953421311ULL) + return 7; // 2^49 - 1 + return 8; +} diff --git a/tools/media_writer_webm.h b/tools/media_writer_webm.h new file mode 100644 index 0000000..27cabb8 --- /dev/null +++ b/tools/media_writer_webm.h @@ -0,0 +1,43 @@ +#pragma once + +#include "srtc/byte_buffer.h" + +#include +#include +#include + +class MediaWriterWebm +{ +public: + struct Frame { + int64_t pts_usec; + bool is_keyframe; + srtc::ByteBuffer data; + }; + + MediaWriterWebm(FILE* file, + const std::string& codecId, + uint32_t frameWidth, + uint32_t frameHeight, + const std::vector& frameList); + + void write(); + + ~MediaWriterWebm(); + +private: + FILE* const mFile; + const std::string mCodecId; + const uint32_t mFrameWidth; + const uint32_t mFrameHeight; + const std::vector& mFrameList; + + void writeWebM(); + void writeEBMLHeader(); + static void writeSegmentInfo(FILE* file, uint64_t duration_ns); + void writeTracks(FILE* file); + void writeClusters(FILE* file); + static void writeEBMLElement(FILE* file, uint32_t id, const void* data, size_t size); + static void writeVarInt(FILE* file, uint64_t value); + static int getVarIntWidth(uint64_t value); +}; \ No newline at end of file diff --git a/tools/srtc_subscribe.cpp b/tools/srtc_subscribe.cpp index 3ca369a..e746c99 100644 --- a/tools/srtc_subscribe.cpp +++ b/tools/srtc_subscribe.cpp @@ -9,6 +9,7 @@ #include "media_writer_h26x.h" #include "media_writer_ogg.h" #include "media_writer_vp8.h" +#include "media_writer_av1.h" #include "http_whip_whep.h" @@ -211,10 +212,14 @@ int main(int argc, char* argv[]) SubVideoCodec videoCodecH265 = {}; videoCodecH265.codec = Codec::H265; + SubVideoCodec videoCodecAV1 = {}; + videoCodecAV1.codec = Codec::AV1; + SubVideoConfig videoConfig = {}; videoConfig.codec_list.push_back(videoCodecVP8); videoConfig.codec_list.push_back(videoCodecH264); videoConfig.codec_list.push_back(videoCodecH265); + videoConfig.codec_list.push_back(videoCodecAV1); SubAudioCodec audioCodec = {}; audioCodec.codec = Codec::Opus; @@ -287,6 +292,9 @@ int main(int argc, char* argv[]) } else if (codec == srtc::Codec::H264 || codec == srtc::Codec::H265) { mediaWriterVideo = std::make_shared(gOutputVideoFilename, track); mediaWriterVideo->start(); + } else if (codec == srtc::Codec::AV1) { + mediaWriterVideo = std::make_shared(gOutputVideoFilename, track); + mediaWriterVideo->start(); } else { std::cout << "Saving video output is requested, but the video codec is not one we support" << std::endl; exit(1);