From baf307a18238b30bbf735fdcf127b5f71e48e658 Mon Sep 17 00:00:00 2001 From: shogo4405 Date: Sat, 18 Jan 2025 16:31:26 +0900 Subject: [PATCH] Support the OPUS format for Enhanced RTMP. --- .../macOS/CameraIngestViewController.swift | 4 + HaishinKit/Sources/Codec/AudioCodec.swift | 84 ++++++++++------- .../Sources/Codec/AudioCodecSettings.swift | 56 +++++++++--- .../Sources/Codec/OpusHeaderPacket.swift | 30 +++++++ HaishinKit/Sources/RTMP/RTMPConnection.swift | 25 +++++- HaishinKit/Sources/RTMP/RTMPEnhanced.swift | 89 ++++++++++++++++++- HaishinKit/Sources/RTMP/RTMPFoundation.swift | 2 +- HaishinKit/Sources/RTMP/RTMPMessage.swift | 37 ++++++-- ...Tests.swift => RTMPVideoFourCCTests.swift} | 2 +- 9 files changed, 270 insertions(+), 59 deletions(-) create mode 100644 HaishinKit/Sources/Codec/OpusHeaderPacket.swift rename HaishinKit/Tests/RTMP/{FLVVideoFourCCTests.swift => RTMPVideoFourCCTests.swift} (94%) diff --git a/Examples/macOS/CameraIngestViewController.swift b/Examples/macOS/CameraIngestViewController.swift index e4f2210d2..22ad8fd83 100644 --- a/Examples/macOS/CameraIngestViewController.swift +++ b/Examples/macOS/CameraIngestViewController.swift @@ -36,6 +36,10 @@ final class CameraIngestViewController: NSViewController { await netStreamSwitcher.setPreference(Preference.default) let stream = await netStreamSwitcher.stream if let stream { + var audioSettings = AudioCodecSettings() + audioSettings.format = .opus + await stream.setAudioSettings(audioSettings) + await stream.addOutput(lfView!) await mixer.addOutput(stream) } diff --git a/HaishinKit/Sources/Codec/AudioCodec.swift b/HaishinKit/Sources/Codec/AudioCodec.swift index 5c9a269aa..7e15c27c9 100644 --- a/HaishinKit/Sources/Codec/AudioCodec.swift +++ b/HaishinKit/Sources/Codec/AudioCodec.swift @@ -6,7 +6,7 @@ import AVFoundation * - seealso: https://developer.apple.com/library/ios/technotes/tn2236/_index.html */ final class AudioCodec { - static let frameCamacity: UInt32 = 1024 + static let defaultInputBuffersCursor = 0 /// Specifies the settings for audio codec. var settings: AudioCodecSettings = .default { @@ -32,8 +32,8 @@ final class AudioCodec { guard inputFormat != oldValue else { return } - cursor = 0 inputBuffers.removeAll() + inputBuffersCursor = Self.defaultInputBuffersCursor outputBuffers.removeAll() audioConverter = makeAudioConverter() for _ in 0...Continuation? { didSet { @@ -52,6 +53,7 @@ final class AudioCodec { } private var outputBuffers: [AVAudioBuffer] = [] private var audioConverter: AVAudioConverter? + private var inputBuffersCursor = AudioCodec.defaultInputBuffersCursor func append(_ sampleBuffer: CMSampleBuffer) { guard isRunning else { @@ -91,33 +93,49 @@ final class AudioCodec { return } var error: NSError? - let outputBuffer = self.outputBuffer - let outputStatus = audioConverter.convert(to: outputBuffer, error: &error) { _, inputStatus in - switch self.inputBuffer { - case let inputBuffer as AVAudioCompressedBuffer: - inputBuffer.copy(audioBuffer) - case let inputBuffer as AVAudioPCMBuffer: - if !inputBuffer.copy(audioBuffer) { - inputBuffer.muted(true) + if let audioBuffer = audioBuffer as? AVAudioPCMBuffer { + ringBuffer?.append(audioBuffer, when: when) + } + var outputStatus: AVAudioConverterOutputStatus = .endOfStream + repeat { + let outputBuffer = self.outputBuffer + outputStatus = audioConverter.convert(to: outputBuffer, error: &error) { inNumberFrames, inputStatus in + switch self.inputBuffer { + case let inputBuffer as AVAudioCompressedBuffer: + inputBuffer.copy(audioBuffer) + inputStatus.pointee = .haveData + return inputBuffer + case let inputBuffer as AVAudioPCMBuffer: + if inNumberFrames <= (self.ringBuffer?.counts ?? 0) { + _ = self.ringBuffer?.render(inNumberFrames, ioData: inputBuffer.mutableAudioBufferList) + inputBuffer.frameLength = inNumberFrames + inputStatus.pointee = .haveData + self.audioTime.advanced(AVAudioFramePosition(inNumberFrames)) + return self.inputBuffer + } else { + inputStatus.pointee = .noDataNow + return nil + } + default: + inputStatus.pointee = .noDataNow + return nil + } + } + switch outputStatus { + case .haveData: + if audioTime.hasAnchor { + continuation?.yield((outputBuffer, audioTime.at)) + } else { + continuation?.yield((outputBuffer, when)) + } + inputBuffersCursor += 1 + if inputBuffersCursor == inputBuffers.count { + inputBuffersCursor = Self.defaultInputBuffersCursor } default: - break + releaseOutputBuffer(outputBuffer) } - inputStatus.pointee = .haveData - return self.inputBuffer - } - switch outputStatus { - case .haveData: - continuation?.yield((outputBuffer, when)) - case .error: - break - default: - break - } - cursor += 1 - if cursor == inputBuffers.count { - cursor = 0 - } + } while(outputStatus == .haveData && settings.format != .pcm) } private func makeInputBuffer() -> AVAudioBuffer? { @@ -126,8 +144,9 @@ final class AudioCodec { } switch inputFormat.formatDescription.mediaSubType { case .linearPCM: - let buffer = AVAudioPCMBuffer(pcmFormat: inputFormat, frameCapacity: Self.frameCamacity) - buffer?.frameLength = Self.frameCamacity + let frameCapacity = settings.format.makeFramesPerPacket(inputFormat.sampleRate) + let buffer = AVAudioPCMBuffer(pcmFormat: inputFormat, frameCapacity: frameCapacity) + buffer?.frameLength = frameCapacity return buffer default: return AVAudioCompressedBuffer(format: inputFormat, packetCapacity: 1, maximumPacketSize: 1024) @@ -145,6 +164,9 @@ final class AudioCodec { } let converter = AVAudioConverter(from: inputFormat, to: outputFormat) settings.apply(converter, oldValue: nil) + if inputFormat.formatDescription.mediaSubType == .linearPCM { + ringBuffer = AudioRingBuffer(inputFormat) + } return converter } } @@ -170,7 +192,7 @@ extension AudioCodec: Codec { } private var inputBuffer: AVAudioBuffer { - return inputBuffers[cursor] + return inputBuffers[inputBuffersCursor] } } @@ -180,6 +202,7 @@ extension AudioCodec: Runner { guard !isRunning else { return } + audioTime.reset() audioConverter?.reset() isRunning = true } @@ -190,5 +213,6 @@ extension AudioCodec: Runner { } isRunning = false continuation = nil + ringBuffer = nil } } diff --git a/HaishinKit/Sources/Codec/AudioCodecSettings.swift b/HaishinKit/Sources/Codec/AudioCodecSettings.swift index 56a84c008..8c89adb26 100644 --- a/HaishinKit/Sources/Codec/AudioCodecSettings.swift +++ b/HaishinKit/Sources/Codec/AudioCodecSettings.swift @@ -11,9 +11,11 @@ public struct AudioCodecSettings: Codable, Sendable { public static let maximumNumberOfChannels: UInt32 = 8 /// The type of the AudioCodec supports format. - enum Format: Codable { + public enum Format: Codable, Sendable { /// The AAC format. case aac + /// The OPUS format. + case opus /// The PCM format. case pcm @@ -21,6 +23,8 @@ public struct AudioCodecSettings: Codable, Sendable { switch self { case .aac: return kAudioFormatMPEG4AAC + case .opus: + return kAudioFormatOpus case .pcm: return kAudioFormatLinearPCM } @@ -30,6 +34,8 @@ public struct AudioCodecSettings: Codable, Sendable { switch self { case .aac: return UInt32(MPEG4ObjectID.AAC_LC.rawValue) + case .opus: + return 0 case .pcm: return kAudioFormatFlagIsNonInterleaved | kAudioFormatFlagIsPacked @@ -37,19 +43,12 @@ public struct AudioCodecSettings: Codable, Sendable { } } - var framesPerPacket: UInt32 { - switch self { - case .aac: - return 1024 - case .pcm: - return 1 - } - } - var packetSize: UInt32 { switch self { case .aac: return 1 + case .opus: + return 1 case .pcm: return 1024 } @@ -59,6 +58,8 @@ public struct AudioCodecSettings: Codable, Sendable { switch self { case .aac: return 0 + case .opus: + return 0 case .pcm: return 32 } @@ -68,6 +69,8 @@ public struct AudioCodecSettings: Codable, Sendable { switch self { case .aac: return 0 + case .opus: + return 0 case .pcm: return (bitsPerChannel / 8) } @@ -77,6 +80,8 @@ public struct AudioCodecSettings: Codable, Sendable { switch self { case .aac: return 0 + case .opus: + return 0 case .pcm: return (bitsPerChannel / 8) } @@ -86,6 +91,8 @@ public struct AudioCodecSettings: Codable, Sendable { switch self { case .aac: return 6 + case .opus: + return 6 case .pcm: return 1 } @@ -95,15 +102,32 @@ public struct AudioCodecSettings: Codable, Sendable { switch self { case .aac: return 1 + case .opus: + return 1 case .pcm: return 24 } } + func makeFramesPerPacket(_ sampleRate: Double) -> UInt32 { + switch self { + case .aac: + return 1024 + case .opus: + // https://www.rfc-editor.org/rfc/rfc6716#section-2.1.4 + let frameDurationSec = 0.02 + return UInt32(sampleRate * frameDurationSec) + case .pcm: + return 1 + } + } + func makeAudioBuffer(_ format: AVAudioFormat) -> AVAudioBuffer? { switch self { case .aac: return AVAudioCompressedBuffer(format: format, packetCapacity: 1, maximumPacketSize: 1024 * Int(format.channelCount)) + case .opus: + return AVAudioCompressedBuffer(format: format, packetCapacity: 1, maximumPacketSize: 1024 * Int(format.channelCount)) case .pcm: return AVAudioPCMBuffer(pcmFormat: format, frameCapacity: 1024) } @@ -116,7 +140,7 @@ public struct AudioCodecSettings: Codable, Sendable { mFormatID: formatID, mFormatFlags: formatFlags, mBytesPerPacket: bytesPerPacket, - mFramesPerPacket: framesPerPacket, + mFramesPerPacket: makeFramesPerPacket(format.sampleRate), mBytesPerFrame: bytesPerFrame, mChannelsPerFrame: min( config?.channelCount ?? format.channelCount, @@ -142,13 +166,19 @@ public struct AudioCodecSettings: Codable, Sendable { public var channelMap: [Int]? /// Specifies the output format. - var format: AudioCodecSettings.Format = .aac + public var format: AudioCodecSettings.Format = .aac /// Creates a new instance. - public init(bitRate: Int = AudioCodecSettings.defaultBitRate, downmix: Bool = true, channelMap: [Int]? = nil) { + public init( + bitRate: Int = AudioCodecSettings.defaultBitRate, + downmix: Bool = true, + channelMap: [Int]? = nil, + format: AudioCodecSettings.Format = .aac + ) { self.bitRate = bitRate self.downmix = downmix self.channelMap = channelMap + self.format = format } func apply(_ converter: AVAudioConverter?, oldValue: AudioCodecSettings?) { diff --git a/HaishinKit/Sources/Codec/OpusHeaderPacket.swift b/HaishinKit/Sources/Codec/OpusHeaderPacket.swift new file mode 100644 index 000000000..e312bd4fe --- /dev/null +++ b/HaishinKit/Sources/Codec/OpusHeaderPacket.swift @@ -0,0 +1,30 @@ +import CoreMedia +import Foundation + +struct OpusHeaderPacket { + static let signature = "OpusHead" + + let channels: Int + let sampleRate: Double + + var payload: Data { + var data = Data() + data.append(contentsOf: Self.signature.utf8) + data.append(0x01) + data.append(UInt8(channels)) + data.append(UInt16(0).data) + data.append(UInt32(sampleRate).data) + data.append(UInt16(0).data) + data.append(0x00) + return data + } + + init?(formatDescription: CMFormatDescription?) { + guard + let streamDescription = formatDescription?.audioStreamBasicDescription else { + return nil + } + channels = Int(streamDescription.mChannelsPerFrame) + sampleRate = streamDescription.mSampleRate + } +} diff --git a/HaishinKit/Sources/RTMP/RTMPConnection.swift b/HaishinKit/Sources/RTMP/RTMPConnection.swift index 2d8f755e6..a94508a8b 100644 --- a/HaishinKit/Sources/RTMP/RTMPConnection.swift +++ b/HaishinKit/Sources/RTMP/RTMPConnection.swift @@ -34,8 +34,8 @@ public actor RTMPConnection: NetworkConnection { public static let defaultWindowSizeS: Int64 = 250000 /// The supported protocols are rtmp, rtmps, rtmpt and rtmps. public static let supportedProtocols: Set = ["rtmp", "rtmps"] - /// The supported fourCcList are hvc1. - public static let supportedFourCcList = ["hvc1"] + /// The supported fourCcList. + public static let supportedFourCcList = [RTMPVideoFourCC.hevc.description, RTMPAudioFourCC.opus.description] /// The default RTMP port is 1935. public static let defaultPort: Int = 1935 /// The default RTMPS port is 443. @@ -51,6 +51,14 @@ public actor RTMPConnection: NetworkConnection { /// The default an rtmp request time out value (ms). public static let defaultRequestTimeout: UInt64 = 3000 + static let videoFourCcInfoMap: AMFObject = [ + RTMPVideoFourCC.hevc.description: FourCcInfoMask.canDecode.rawValue | FourCcInfoMask.canEncode.rawValue + ] + + static let audioFourCcInfoMap: AMFObject = [ + RTMPAudioFourCC.opus.description: FourCcInfoMask.canEncode.rawValue + ] + private static let connectTransactionId = 1 /** @@ -134,6 +142,19 @@ public actor RTMPConnection: NetworkConnection { case clientSeek = 1 } + enum FourCcInfoMask: Int { + case canDecode = 0x01 + case canEncode = 0x02 + case canForward = 0x04 + } + + enum CapsEx: Int { + case recoonect = 0x01 + case multitrack = 0x02 + case modEx = 0x04 + case timestampNanoOffset = 0x08 + } + /// The URL of .swf. public let swfUrl: String? /// The URL of an HTTP referer. diff --git a/HaishinKit/Sources/RTMP/RTMPEnhanced.swift b/HaishinKit/Sources/RTMP/RTMPEnhanced.swift index 9dd359d6b..1e2885510 100644 --- a/HaishinKit/Sources/RTMP/RTMPEnhanced.swift +++ b/HaishinKit/Sources/RTMP/RTMPEnhanced.swift @@ -1,7 +1,77 @@ +enum RTMPAudioFourCC: UInt32 { + case ac3 = 0x61632D33 // ac-3 + case eac3 = 0x65632D33 // ec-3 + case opus = 0x4F707573 // Opus + case mp3 = 0x2E6D7033 // .mp3 + case flac = 0x664C6143 // fLaC + case aac = 0x6D703461 // mp4a + + var isSupported: Bool { + switch self { + case .ac3: + return false + case .eac3: + return false + case .opus: + return true + case .mp3: + return false + case .flac: + return false + case .aac: + return false + } + } +} + +extension RTMPAudioFourCC: CustomStringConvertible { + var description: String { + switch self { + case .ac3: + return "ac-3" + case .eac3: + return "ex-3" + case .opus: + return "Opus" + case .mp3: + return ".mp3" + case .flac: + return "fLaC" + case .aac: + return "mp4a" + } + } +} + +enum RTMPAudioPacketType: UInt8 { + case sequenceStart = 0 + case codedFrames = 1 + case sequenceEnd = 2 + case multiChannelConfig = 4 + case multiTrack = 5 + case modEx = 7 +} + +enum RTMPAudioPacketModExType: Int { + case timestampOffsetNano = 0 +} + +enum RTMPAVMultiTrackType: Int { + case oneTrack = 0 + case manyTracks = 1 + case manyTracksManyCOdecs = 2 +} + +enum RTMPAudioChannelOrder: Int { + case unspecified = 0 + case native = 1 + case custom = 2 +} + enum RTMPVideoFourCC: UInt32 { - case av1 = 0x61763031 // { 'a', 'v', '0', '1' } - case vp9 = 0x76703039 // { 'v', 'p', '0', '9' } - case hevc = 0x68766331 // { 'h', 'v', 'c', '1' } + case av1 = 0x61763031 // av01 + case vp9 = 0x76703039 // vp09 + case hevc = 0x68766331 // hvc1 var isSupported: Bool { switch self { @@ -15,6 +85,19 @@ enum RTMPVideoFourCC: UInt32 { } } +extension RTMPVideoFourCC: CustomStringConvertible { + var description: String { + switch self { + case .av1: + return "av01" + case .vp9: + return "vp09" + case .hevc: + return "hvc1" + } + } +} + enum RTMPVideoPacketType: UInt8 { case sequenceStart = 0 case codedFrames = 1 diff --git a/HaishinKit/Sources/RTMP/RTMPFoundation.swift b/HaishinKit/Sources/RTMP/RTMPFoundation.swift index b6a0b450a..e1cd16efb 100644 --- a/HaishinKit/Sources/RTMP/RTMPFoundation.swift +++ b/HaishinKit/Sources/RTMP/RTMPFoundation.swift @@ -38,7 +38,7 @@ enum RTMPAudioCodec: UInt8 { case g711A = 7 /// The G.711 mu-law codec. case g711MU = 8 - /// The signal FOURCC mode. + /// The signal FOURCC mode. E-RTMP. case exheader = 9 /// The AAC codec. case aac = 10 diff --git a/HaishinKit/Sources/RTMP/RTMPMessage.swift b/HaishinKit/Sources/RTMP/RTMPMessage.swift index 0d6e60f7f..daeccbf95 100644 --- a/HaishinKit/Sources/RTMP/RTMPMessage.swift +++ b/HaishinKit/Sources/RTMP/RTMPMessage.swift @@ -387,14 +387,25 @@ struct RTMPAudioMessage: RTMPMessage { } init?(streamId: UInt32, timestamp: UInt32, formatDescription: CMFormatDescription?) { - guard let config = AudioSpecificConfig(formatDescription: formatDescription) else { - return nil - } self.streamId = streamId self.timestamp = timestamp - var buffer = Data([Self.AAC_HEADER, RTMPAACPacketType.seq.rawValue]) - buffer.append(contentsOf: config.bytes) - self.payload = buffer + switch formatDescription?.mediaSubType { + case .opus: + guard let header = OpusHeaderPacket(formatDescription: formatDescription) else { + return nil + } + var buffer = Data([RTMPAudioCodec.exheader.rawValue << 4 | RTMPAudioPacketType.sequenceStart.rawValue]) + buffer.append(contentsOf: RTMPAudioFourCC.opus.rawValue.bigEndian.data) + buffer.append(header.payload) + self.payload = buffer + default: + guard let config = AudioSpecificConfig(formatDescription: formatDescription) else { + return nil + } + var buffer = Data([Self.AAC_HEADER, RTMPAACPacketType.seq.rawValue]) + buffer.append(contentsOf: config.bytes) + self.payload = buffer + } } init?(streamId: UInt32, timestamp: UInt32, audioBuffer: AVAudioCompressedBuffer?) { @@ -403,9 +414,17 @@ struct RTMPAudioMessage: RTMPMessage { } self.streamId = streamId self.timestamp = timestamp - var buffer = Data([Self.AAC_HEADER, RTMPAACPacketType.raw.rawValue]) - buffer.append(audioBuffer.data.assumingMemoryBound(to: UInt8.self), count: Int(audioBuffer.byteLength)) - self.payload = buffer + switch audioBuffer.format.formatDescription.mediaSubType { + case .opus: + var buffer = Data([RTMPAudioCodec.exheader.rawValue << 4 | RTMPAudioPacketType.codedFrames.rawValue]) + buffer.append(contentsOf: RTMPAudioFourCC.opus.rawValue.bigEndian.data) + buffer.append(audioBuffer.data.assumingMemoryBound(to: UInt8.self), count: Int(audioBuffer.byteLength)) + self.payload = buffer + default: + var buffer = Data([Self.AAC_HEADER, RTMPAACPacketType.raw.rawValue]) + buffer.append(audioBuffer.data.assumingMemoryBound(to: UInt8.self), count: Int(audioBuffer.byteLength)) + self.payload = buffer + } } func copyMemory(_ audioBuffer: AVAudioCompressedBuffer?) { diff --git a/HaishinKit/Tests/RTMP/FLVVideoFourCCTests.swift b/HaishinKit/Tests/RTMP/RTMPVideoFourCCTests.swift similarity index 94% rename from HaishinKit/Tests/RTMP/FLVVideoFourCCTests.swift rename to HaishinKit/Tests/RTMP/RTMPVideoFourCCTests.swift index c9b81b6d3..1a29f1ca5 100644 --- a/HaishinKit/Tests/RTMP/FLVVideoFourCCTests.swift +++ b/HaishinKit/Tests/RTMP/RTMPVideoFourCCTests.swift @@ -4,7 +4,7 @@ import Testing @testable import HaishinKit -@Suite struct FLVVideoFourCCTests { +@Suite struct RTMPVideoFourCCTests { @Test func main() { #expect("av01" == str4(n: Int(RTMPVideoFourCC.av1.rawValue))) #expect("hvc1" == str4(n: Int(RTMPVideoFourCC.hevc.rawValue)))