diff --git a/Sources/YouTubeKit/Cipher.swift b/Sources/YouTubeKit/Cipher.swift index fa47f41..89b3ed8 100644 --- a/Sources/YouTubeKit/Cipher.swift +++ b/Sources/YouTubeKit/Cipher.swift @@ -13,7 +13,7 @@ class Cipher { let js: String - private let transformPlan: [String] + private let transformPlan: [(func: JSFunction, param: Int)] private let transformMap: [String: JSFunction] private let jsFuncPatterns = [ @@ -30,16 +30,18 @@ class Cipher { init(js: String) throws { self.js = js - self.transformPlan = try Cipher.getTransformPlan(js: js) + + let rawTransformPlan = try Cipher.getRawTransformPlan(js: js) let varRegex = NSRegularExpression(#"^\$*\w+\W"#) - guard let varMatch = varRegex.firstMatch(in: transformPlan[0], group: 0) else { + guard let varMatch = varRegex.firstMatch(in: rawTransformPlan[0], group: 0) else { throw YouTubeKitError.regexMatchError } var variable = varMatch.content _ = variable.popLast() self.transformMap = try Cipher.getTransformMap(js: js, variable: variable) + self.transformPlan = try Cipher.getDecodedTransformPlan(rawPlan: rawTransformPlan, variable: variable, transformMap: transformMap) self.throttlingPlan = try Cipher.getThrottlingPlan(js: js) self.throttlingArray = try Cipher.getThrottlingFunctionArray(js: js) @@ -83,10 +85,24 @@ class Cipher { } /// Decipher the signature - func getSignature(cipheredSignature: String) -> String { + func getSignature(cipheredSignature: String) -> String? { var signature = Array(cipheredSignature) - // TODO: apply transform functions + guard !transformPlan.isEmpty else { + return nil + } + + // apply transform functions + for (function, param) in transformPlan { + switch function { + case .reverse: + signature.reverse() + case .splice: + signature = Array(signature.dropFirst(param)) + case .swap: + (signature[0], signature[param % signature.count]) = (signature[param % signature.count], signature[0]) + } + } return String(signature) } @@ -133,7 +149,7 @@ class Cipher { /// Extract the "transform plan". /// The "transform plan" is the functions that the ciphered signature is cycled through to obtain the actual signature. - class func getTransformPlan(js: String) throws -> [String] { + class func getRawTransformPlan(js: String) throws -> [String] { let name = try getInitialFunctionName(js: js) let pattern = NSRegularExpression(NSRegularExpression.escapedPattern(for: name) + #"=function\(\w\)\{[a-z=\.\(\"\)]*;(.*);(?:.+)\}"#) os_log("getting transform plan", log: log, type: .debug) @@ -143,6 +159,33 @@ class Cipher { throw YouTubeKitError.regexMatchError } + /// Transforms raw transform plan in to a decoded transform plan with functions and parameters + /// - Note: returns empty array if transformation failed + class func getDecodedTransformPlan(rawPlan: [String], variable: String, transformMap: [String: JSFunction]) throws -> [(func: JSFunction, param: Int)] { + let pattern = try NSRegularExpression(pattern: NSRegularExpression.escapedPattern(for: variable) + #"\.(.+)\(.+,(\d+)\)"#) // expecting e.g. "wP.Nl(a,65)" + + var result: [(func: JSFunction, param: Int)] = [] + + for functionCall in rawPlan { + guard let (_, matchGroups) = pattern.allMatches(in: functionCall, includingGroups: [1, 2]).first, + let functionName = matchGroups[1]?.content, + let parameter = matchGroups[2]?.content + else { + os_log("failed to decode function call %{public}@", log: log, type: .error, functionCall) + return [] + } + + guard let decodedParameter = Int(parameter) else { return [] } + guard let function = transformMap[functionName] else { + os_log("failed to find function %{public}@", log: log, type: .error, functionName) + return [] + } + + result.append((func: function, param: decodedParameter)) + } + return result + } + /// Extract the "transform object". /// The "transform object" contains the function definitions referenced in the transform plan". The ``variable`` argument is the obfuscated variable name /// which contains these functions, for example, given the function call ``DE.AJ(a,15)`` returned by the transform plan, "DE" would be the var. diff --git a/Sources/YouTubeKit/Extensions/Concurrency.swift b/Sources/YouTubeKit/Extensions/Concurrency.swift new file mode 100644 index 0000000..8015dac --- /dev/null +++ b/Sources/YouTubeKit/Extensions/Concurrency.swift @@ -0,0 +1,35 @@ +// +// Concurrency.swift +// YouTubeKit +// +// Created by Alexander Eichhorn on 13.12.23. +// + +import Foundation + +@available(iOS 13.0, watchOS 6.0, tvOS 13.0, macOS 10.15, *) +extension Sequence { + + func asyncMap(_ transform: (Element) async throws -> T) async rethrows -> [T] { + var values = [T]() + + for element in self { + try await values.append(transform(element)) + } + + return values + } + + func concurrentMap(_ transform: @escaping (Element) async -> T) async -> [T] { + let tasks = map { element in + Task { + await transform(element) + } + } + + return await tasks.asyncMap { task in + await task.value + } + } + +} diff --git a/Sources/YouTubeKit/Extraction.swift b/Sources/YouTubeKit/Extraction.swift index d0745ed..52fd3e3 100644 --- a/Sources/YouTubeKit/Extraction.swift +++ b/Sources/YouTubeKit/Extraction.swift @@ -86,6 +86,12 @@ class Extraction { throw YouTubeKitError.regexMatchError } + /// Tries to find video info in watch html directly + class func getVideoInfo(fromHTML html: String) throws -> InnerTube.VideoInfo { + let pattern = NSRegularExpression(#"ytInitialPlayerResponse\s*=\s*"#) + return try parseForObject(InnerTube.VideoInfo.self, html: html, precedingRegex: pattern) + } + /// Return the playability status and status explanation of the video /// For example, a video may have a status of LOGIN\_REQUIRED, and an explanation /// of "This is a private video. Please sign in to verify that you may see it." @@ -280,6 +286,8 @@ class Extraction { class func applySignature(streamManifest: inout [InnerTube.StreamingData.Format], videoInfo: InnerTube.VideoInfo, js: String) throws { var cipher = ThrowingLazy(try Cipher(js: js)) + var invalidStreamIndices = [Int]() + for (i, stream) in streamManifest.enumerated() { if let url = stream.url { if url.contains("signature") || (stream.s == nil && (url.contains("&sig=") || url.contains("&lsig="))) { @@ -288,6 +296,10 @@ class Extraction { } if let cipheredSignature = stream.s { + // Remove the stream from `streamManifest` for now, as signature extraction currently doesn't work most of time + invalidStreamIndices.append(i) + continue // Skip the rest of the code as we are removing this stream + let signature = try cipher.value.getSignature(cipheredSignature: cipheredSignature) os_log("finished descrambling signature for itag=%{public}i", log: log, type: .debug, stream.itag) @@ -311,6 +323,11 @@ class Extraction { } } } + + // Remove invalid streams + for index in invalidStreamIndices.reversed() { + streamManifest.remove(at: index) + } } /// Breaks up the data in the ``type`` key of the manifest, which contains the diff --git a/Sources/YouTubeKit/YouTube.swift b/Sources/YouTubeKit/YouTube.swift index 28bb95a..0fbc8ee 100644 --- a/Sources/YouTubeKit/YouTube.swift +++ b/Sources/YouTubeKit/YouTube.swift @@ -6,6 +6,7 @@ // import Foundation +import os.log @available(iOS 13.0, watchOS 6.0, tvOS 13.0, macOS 10.15, *) public class YouTube { @@ -16,7 +17,7 @@ public class YouTube { private static var __js: String? // caches js between calls private static var __jsURL: URL? - private var _videoInfo: InnerTube.VideoInfo? + private var _videoInfos: [InnerTube.VideoInfo]? private var _watchHTML: String? private var _embedHTML: String? @@ -33,8 +34,7 @@ public class YouTube { /// - Note: Currently doesn't respect `method` set. It always uses `.local` public var metadata: YouTubeMetadata? { get async throws { - guard let videoDetails = try await videoInfo.videoDetails else { return nil } - return .metadata(from: videoDetails) + return .metadata(from: try await videoDetails) } } @@ -44,6 +44,10 @@ public class YouTube { URL(string: "https://youtube.com/watch?v=\(videoID)")! } + private var extendedWatchURL: URL { + URL(string: "https://youtube.com/watch?v=\(videoID)&bpctr=9999999999&has_verified=1")! + } + var embedURL: URL { URL(string: "https://www.youtube.com/embed/\(videoID)")! } @@ -59,6 +63,8 @@ public class YouTube { let methods: [ExtractionMethod] + private let log = OSLog(YouTube.self) + /// - parameter methods: Methods used to extract streams from the video - ordered by priority (Default: only local) public init(videoID: String, proxies: [String: URL] = [:], useOAuth: Bool = false, allowOAuthCache: Bool = false, methods: [ExtractionMethod] = [.local]) { self.videoID = videoID @@ -85,7 +91,7 @@ public class YouTube { if let cached = _watchHTML { return cached } - var request = URLRequest(url: watchURL) + var request = URLRequest(url: extendedWatchURL) request.setValue("Mozilla/5.0", forHTTPHeaderField: "User-Agent") request.setValue("en-US,en", forHTTPHeaderField: "accept-language") let (data, _) = try await URLSession.shared.data(for: request) @@ -112,7 +118,7 @@ public class YouTube { /// check whether the video is available public func checkAvailability() async throws { let (status, messages) = try Extraction.playabilityStatus(watchHTML: await watchHTML) - let streamingData = try await videoInfo.streamingData + let streamingData = try await videoInfos.map { $0.streamingData } for reason in messages { switch status { @@ -126,7 +132,7 @@ public class YouTube { } case .error: throw YouTubeKitError.videoUnavailable - case .liveStream where streamingData?.hlsManifestUrl == nil : + case .liveStream where streamingData.allSatisfy { $0?.hlsManifestUrl == nil } : throw YouTubeKitError.liveStreamError case .ok, .none, .liveStream: continue @@ -193,20 +199,39 @@ public class YouTube { let result = try await Task.retry(with: methods) { method in switch method { case .local: - var streamManifest = Extraction.applyDescrambler(streamData: try await streamingData) + let allStreamingData = try await self.streamingData + let videoInfos = try await self.videoInfos - do { - try await Extraction.applySignature(streamManifest: &streamManifest, videoInfo: videoInfo, js: js) - } catch { - // to force an update to the js file, we clear the cache and retry - _js = nil - _jsURL = nil - YouTube.__js = nil - YouTube.__jsURL = nil - try await Extraction.applySignature(streamManifest: &streamManifest, videoInfo: videoInfo, js: js) + var streams = [Stream]() + var existingITags = Set() + + for (streamingData, videoInfo) in zip(allStreamingData, videoInfos) { + + var streamManifest = Extraction.applyDescrambler(streamData: streamingData) + + do { + try await Extraction.applySignature(streamManifest: &streamManifest, videoInfo: videoInfo, js: js) + } catch { + // to force an update to the js file, we clear the cache and retry + _js = nil + _jsURL = nil + YouTube.__js = nil + YouTube.__jsURL = nil + try await Extraction.applySignature(streamManifest: &streamManifest, videoInfo: videoInfo, js: js) + } + + let newStreams = streamManifest.compactMap { try? Stream(format: $0) } + + // make sure only one stream per itag exists + for stream in newStreams { + if existingITags.insert(stream.itag.itag).inserted { + print(stream) + streams.append(stream) + } + } } - return streamManifest.compactMap { try? Stream(format: $0) } + return streams case .remote(let serverURL): @@ -227,21 +252,22 @@ public class YouTube { public var livestreams: [Livestream] { get async throws { var livestreams = [Livestream]() - if let hlsManifestUrl = try await streamingData.hlsManifestUrl.flatMap({ URL(string: $0) }) { - livestreams.append(Livestream(url: hlsManifestUrl, streamType: .hls)) - } + let hlsURLs = try await streamingData.compactMap { $0.hlsManifestUrl }.compactMap { URL(string: $0) } + livestreams.append(contentsOf: hlsURLs.map { Livestream(url: $0, streamType: .hls) }) return livestreams } } /// streaming data from video info - var streamingData: InnerTube.StreamingData { + var streamingData: [InnerTube.StreamingData] { get async throws { - if let streamingData = try await videoInfo.streamingData { + let streamingData = try await videoInfos.compactMap { $0.streamingData } + if !streamingData.isEmpty { return streamingData } else { try await bypassAgeGate() - if let streamingData = try await videoInfo.streamingData { + let streamingData = try await videoInfos.compactMap { $0.streamingData } + if !streamingData.isEmpty { return streamingData } else { throw YouTubeKitError.extractError @@ -253,7 +279,7 @@ public class YouTube { /// Video details from video info. var videoDetails: InnerTube.VideoInfo.VideoDetails { get async throws { - if let videoDetails = try await videoInfo.videoDetails { + if let videoDetails = try await videoInfos.lazy.compactMap({ $0.videoDetails }).first { return videoDetails } else { throw YouTubeKitError.extractError @@ -261,17 +287,58 @@ public class YouTube { } } - var videoInfo: InnerTube.VideoInfo { + var videoInfos: [InnerTube.VideoInfo] { get async throws { - if let cached = _videoInfo { + if let cached = _videoInfos { return cached } - let innertube = InnerTube(useOAuth: useOAuth, allowCache: allowOAuthCache) + // try extracting video infos from watch html directly as well + let watchVideoInfoTask = Task { + do { + return try await Extraction.getVideoInfo(fromHTML: watchHTML) + } catch let error { + os_log("Couldn't extract video info from main watch html: %{public}@", log: log, type: .debug, error.localizedDescription) + return nil + } + } + + let innertubeClients: [InnerTube.ClientType] = [.ios, .android] + + let results: [Result] = await innertubeClients.concurrentMap { [videoID, useOAuth, allowOAuthCache] client in + let innertube = InnerTube(client: client, useOAuth: useOAuth, allowCache: allowOAuthCache) + + do { + let innertubeResponse = try await innertube.player(videoID: videoID) + return .success(innertubeResponse) + } catch let error { + return .failure(error) + } + } + + var videoInfos = [InnerTube.VideoInfo]() + var errors = [Error]() + + for result in results { + switch result { + case .success(let innertubeResponse): + videoInfos.append(innertubeResponse) + case .failure(let error): + errors.append(error) + } + } + + // append potentially extracted video info (with least priority) + if let watchVideoInfo = await watchVideoInfoTask.value { + videoInfos.append(watchVideoInfo) + } + + if videoInfos.isEmpty { + throw errors.first ?? YouTubeKitError.extractError + } - let innertubeResponse = try await innertube.player(videoID: videoID) - _videoInfo = innertubeResponse - return innertubeResponse + _videoInfos = videoInfos + return videoInfos } } @@ -283,7 +350,7 @@ public class YouTube { throw YouTubeKitError.videoAgeRestricted } - _videoInfo = innertubeResponse + _videoInfos = [innertubeResponse] } /// Interface to query both adaptive (DASH) and progressive streams. diff --git a/Tests/YouTubeKitTests/YouTubeKitTests.swift b/Tests/YouTubeKitTests/YouTubeKitTests.swift index a01c9a7..9671e7d 100644 --- a/Tests/YouTubeKitTests/YouTubeKitTests.swift +++ b/Tests/YouTubeKitTests/YouTubeKitTests.swift @@ -34,6 +34,10 @@ final class YouTubeKitTests: XCTestCase { let bestAudioStream = streams.filterAudioOnly().filter { $0.fileExtension == .m4a }.highestAudioBitrateStream() print(bestAudioStream) + XCTAssert(!streams.filterVideoOnly().isEmpty) + XCTAssert(!streams.filterAudioOnly().isEmpty) + XCTAssert(!streams.filterVideoAndAudio().isEmpty) + XCTAssertEqual(bestAudioStream?.url, bestAudioStreamLegacy?.url) try await checkStreamReachability(bestAudioStream) @@ -57,6 +61,10 @@ final class YouTubeKitTests: XCTestCase { //print(streams.filterAudioOnly().filter { $0.subtype == "mp4" }.highestAudioBitrateStream()?.url) print(streams.filterVideoOnly().highestResolutionStream()) + XCTAssert(!streams.filterVideoOnly().isEmpty) + XCTAssert(!streams.filterAudioOnly().isEmpty) + XCTAssert(!streams.filterVideoAndAudio().isEmpty) + try await checkStreamReachability(streams.filterVideoOnly().highestResolutionStream()) // test Cipher initialization directly (in case not lazily loaded) @@ -79,6 +87,10 @@ final class YouTubeKitTests: XCTestCase { //print(streams.filterVideoOnly().highestResolutionStream()) print(streams.filter { $0.isProgressive && $0.fileExtension == .mp4 }.lowestResolutionStream()!) + XCTAssert(!streams.filterVideoOnly().isEmpty) + XCTAssert(!streams.filterAudioOnly().isEmpty) + XCTAssert(!streams.filterVideoAndAudio().isEmpty) + try await checkStreamReachability(streams.filterVideoOnly().highestResolutionStream()) // test Cipher initialization directly (in case not lazily loaded) @@ -100,6 +112,10 @@ final class YouTubeKitTests: XCTestCase { //print(streams.filterAudioOnly().filter { $0.subtype == "mp4" }.highestAudioBitrateStream()?.url) print(streams.filterVideoOnly().highestResolutionStream()) + XCTAssert(!streams.filterVideoOnly().isEmpty) + XCTAssert(!streams.filterAudioOnly().isEmpty) + XCTAssert(!streams.filterVideoAndAudio().isEmpty) + try await checkStreamReachability(streams.filterVideoOnly().highestResolutionStream()) // test Cipher initialization directly (in case not lazily loaded) @@ -119,6 +135,11 @@ final class YouTubeKitTests: XCTestCase { print(streams.count) //print(streams.filterAudioOnly().filter { $0.subtype == "mp4" }.highestAudioBitrateStream()?.url) print(streams.filter { $0.isProgressive }.highestResolutionStream()) + + XCTAssert(!streams.filterVideoOnly().isEmpty) + XCTAssert(!streams.filterAudioOnly().isEmpty) + XCTAssert(!streams.filterVideoAndAudio().isEmpty) + } catch let error { XCTFail("did throw error: \(error)") } @@ -158,6 +179,10 @@ final class YouTubeKitTests: XCTestCase { checkStreams(streams) print(streams.count) + XCTAssert(!streams.filterVideoOnly().isEmpty) + XCTAssert(!streams.filterAudioOnly().isEmpty) + XCTAssert(!streams.filterVideoAndAudio().isEmpty) + try await checkStreamReachability(streams.filterVideoOnly().highestResolutionStream()) } catch let error { XCTFail("did throw error: \(error)")