From 1ff3d644e5f64746bb04e4e90c36d1e408e6babc Mon Sep 17 00:00:00 2001 From: Matt <85322+mattmassicotte@users.noreply.github.com> Date: Sat, 27 Nov 2021 20:34:24 -0500 Subject: [PATCH] On-device symblication --- Package.swift | 5 +- README.md | 21 ++- Sources/BinaryImage/BinaryImage.h | 71 +++++++++ Sources/BinaryImage/BinaryImage.m | 71 +++++++++ Sources/Meter/CallStackTree.swift | 28 +++- Sources/Meter/DiagnosticPayload.swift | 14 ++ Sources/Meter/DlfcnSymbolicator.swift | 55 +++++++ Sources/Meter/Symbolicator.swift | 139 ++++++++++++++++++ Tests/BinaryImageTests/BinaryImageTests.swift | 20 +++ Tests/MeterTests/SymbolicationTests.swift | 74 ++++++++++ 10 files changed, 491 insertions(+), 7 deletions(-) create mode 100644 Sources/BinaryImage/BinaryImage.h create mode 100644 Sources/BinaryImage/BinaryImage.m create mode 100644 Sources/Meter/DlfcnSymbolicator.swift create mode 100644 Sources/Meter/Symbolicator.swift create mode 100644 Tests/BinaryImageTests/BinaryImageTests.swift create mode 100644 Tests/MeterTests/SymbolicationTests.swift diff --git a/Package.swift b/Package.swift index 68b9f04..a65a59b 100644 --- a/Package.swift +++ b/Package.swift @@ -10,11 +10,14 @@ let package = Package( ], dependencies: [], targets: [ - .target(name: "Meter", dependencies: []), + .target(name: "BinaryImage", dependencies: [], publicHeadersPath: ""), + .target(name: "Meter", dependencies: ["BinaryImage"]), .testTarget(name: "MeterTests", dependencies: ["Meter"], resources: [ .copy("Resources"), ]), + .testTarget(name: "BinaryImageTests", + dependencies: ["BinaryImage"]), ] ) diff --git a/README.md b/README.md index 89db489..9dd76cc 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Meter is a companion library to [MetricKit](https://developer.apple.com/document - API for `MXCallStackTree` - Types for `MXDiagnostic` emulation and coding - `MXMetricManager`-like interface for unsupported platforms -- On-device symbolication (still under investigation) +- On-device symbolication ## Integration @@ -34,7 +34,7 @@ for frame in tree.callStacks[0].frames { ### MXMetricManager and Diagnostics Polyfill -MetricKit's crash reporting facilities require iOS 14, and isn't supported at all for tvOS, watchOS, or macOS. You may want to start moving towards using it as a standard interface between your app and whatever system consumes the data. Meter offers an API that's very similar to MetricKit's `MXMetricManager` to help do just that. +MetricKit's crash reporting facilities require iOS 14/macOS 12.0, and isn't supported at all for tvOS or watchOS. You may want to start moving towards using it as a standard interface between your app and whatever system consumes the data. Meter offers an API that's very similar to MetricKit's `MXMetricManager` to help do just that. ```swift // adding a subscriber @@ -51,7 +51,7 @@ extension MyObject: MeterPayloadSubscriber { MeterPayloadManager.shared.deliver(payloads) ``` -This makes it easier to support the full capabilities of MetricKit when available, and gracefully degrade when they aren't. It can be nice to have a uniform interface to whatever backend system you are using to consume the reports. And, as you move towards an iOS 14 minimum, and as (hopefully) Apple starts supporting MetricKit on more platforms, it will be easier to pull out Meter altogether. +This makes it easier to support the full capabilities of MetricKit when available, and gracefully degrade when they aren't. It can be nice to have a uniform interface to whatever backend system you are using to consume the reports. And, as you move towards a supported minimum, and as (hopefully) Apple starts supporting MetricKit on all platforms, it will be easier to pull out Meter altogether. Backwards compatibility is still up to you, though. One solution is [ImpactMeterAdapter](https://github.com/ChimeHQ/ImpactMeterAdapter), which uses [Impact](https://github.com/ChimeHQ/Impact) to collect crash data for OSes that don't support `MXCrashDiagnostic`. @@ -59,9 +59,20 @@ If you're also looking for a way to transmit report data to your server, check o ### On-Device Symbolication -The stack traces provided by MetricKit, like other types of crash logs, are not symbolicated. There are a bunch of different ways to tackle this problem, but one very convenient option is just to do it as a post-processing step on the device where the crash occurred. The `dlopen` family of APIs could be one approach. It has had some real limitions in the past, particularly on iOS. But, still worth a look. +The stack traces provided by MetricKit, like other types of crash logs, are not symbolicated. There are a bunch of different ways to tackle this problem, but one very convenient option is just to do it as a post-processing step on the device where the crash occurred. This does come, however, with one major drawback. It only works when you still have access to the same binaries. OS updates will almost certainly change all the OS binaries. The same is true for an app update, though in that case, an off-line symbolication step using a dSYM is still doable. -Right now, this functionality is still in the investigation phase. But, if you have thoughts, please get in touch! +Meter provides an API for performing symbolication, via the `Symbolicator` protocol. The core of this protocol should be usable to symbolicate any address, and is not tied to MetricKit. But, the protocol also does include a number of convenience methods that can operate on the various MetricKit classes. The result uses the Meter's wrapper classes to return `Frame` instances which include a `symbolInfo` property. This property can be accessed directly or just re-encoded for transport. + +```swift +let symbolicator = DlfcnSymbolicator() +let symPayload = symbolicator.symbolicate(payload: diagnosticPayload) +``` + +#### DlfcnSymbolicator + +This class implements the `Symbolicator` protocol, and uses the functions with `dlfcn.h` to determine symbol/offset. This works, but does have some limitations. First, it relies on looking up symbols in the **currently executing** process, so it will only work if the needed binary is currently loaded. Second, these functions return `` for some binary's symbols. I know the symbol information is still accessible from the binary, so it's unclear why this is done. + +This is a relatively inexpensive symbolication pass, and is a first effort. Further work here is definitely necessary. ### Suggestions or Feedback diff --git a/Sources/BinaryImage/BinaryImage.h b/Sources/BinaryImage/BinaryImage.h new file mode 100644 index 0000000..c5d9e92 --- /dev/null +++ b/Sources/BinaryImage/BinaryImage.h @@ -0,0 +1,71 @@ +#ifndef BinaryImage_h +#define BinaryImage_h + +#define _Noescape __attribute__((noescape)) +#define ASSUME_NONNULL_BEGIN _Pragma("clang assume_nonnull begin") +#define ASSUME_NONNULL_END _Pragma("clang assume_nonnull end") + +#include +#include +#include + +#if __OBJC__ +#import +#endif + +ASSUME_NONNULL_BEGIN + +typedef struct { + uintptr_t address; + intptr_t loadAddress; + uintptr_t length; +} MachODataRegion; + +typedef struct { + const uint8_t* uuid; + intptr_t slide; + MachODataRegion ehFrameRegion; + MachODataRegion unwindInfoRegion; + uintptr_t loadAddress; + uintptr_t textSize; + const char* path; +} MachOData; + +#if __LP64__ +typedef struct mach_header_64 MachOHeader; +typedef struct section_64 MachOSection; +typedef struct segment_command_64 SegmentCommand; +typedef struct section_64 Section; + +const static uint32_t LCSegment = LC_SEGMENT_64; +#else +typedef struct mach_header MachOHeader; +typedef struct section MachOSection; +typedef struct segment_command SegmentCommand; +typedef struct section Section; + +const static uint32_t LCSegment = LC_SEGMENT; +#endif + +typedef struct { + const char* name; + const MachOHeader* header; +} BinaryImage; + +typedef void (^BinaryImageIterator)(BinaryImage image, bool* stop); + +void BinaryImageEnumerateLoadedImages(_Noescape BinaryImageIterator iterator); + +typedef void (^BinaryImageLoadCommandIterator)(const struct load_command* lcmd, uint32_t cmdCode, bool* stop); + +void BinaryImageEnumerateLoadCommands(const MachOHeader* header, _Noescape BinaryImageLoadCommandIterator iterator); + +uint8_t* _Nullable BinaryImageGetUUIDBytesFromLoadCommand(const struct load_command* lcmd, uint32_t cmdCode); + +#if __OBJC__ +NSUUID* _Nullable BinaryuImageUUIDFromLoadCommand(const struct load_command* lcmd, uint32_t cmdCode); +#endif + +ASSUME_NONNULL_END + +#endif /* BinaryImage_h */ diff --git a/Sources/BinaryImage/BinaryImage.m b/Sources/BinaryImage/BinaryImage.m new file mode 100644 index 0000000..76b0c7b --- /dev/null +++ b/Sources/BinaryImage/BinaryImage.m @@ -0,0 +1,71 @@ +#import + +#include "BinaryImage.h" +#include + +void BinaryImageEnumerateLoadedImages(BinaryImageIterator iterator) { + for (uint32_t i = 0; i < _dyld_image_count(); ++i) { + BinaryImage image = {0}; + + image.name = _dyld_get_image_name(i); + image.header = (MachOHeader*)_dyld_get_image_header(i); + + bool stop = false; + + iterator(image, &stop); + + if (stop) { + break; + } + } +} + +void BinaryImageEnumerateLoadCommands(const MachOHeader* header, BinaryImageLoadCommandIterator iterator) { + if (header == NULL) { + return; + } + + const uint8_t *ptr = (uint8_t *)header + sizeof(MachOHeader); + + for (uint32_t i = 0; i < header->ncmds; ++i) { + const struct load_command* const lcmd = (struct load_command*)ptr; + const uint32_t cmdCode = lcmd->cmd & ~LC_REQ_DYLD; + + bool stop = false; + + iterator(lcmd, cmdCode, &stop); + + if (stop) { + break; + } + + ptr += lcmd->cmdsize; + } +} + +uint8_t* BinaryImageGetUUIDBytesFromLoadCommand(const struct load_command* lcmd, uint32_t cmdCode) { + if (lcmd == NULL || cmdCode != LC_UUID) { + return NULL; + } + + return ((struct uuid_command*)lcmd)->uuid; +} + +NSUUID* BinaryuImageUUIDFromLoadCommand(const struct load_command* lcmd, uint32_t cmdCode) { + const uint8_t* bytes = BinaryImageGetUUIDBytesFromLoadCommand(lcmd, cmdCode); + + return [[NSUUID alloc] initWithUUIDBytes:bytes]; +} + +bool ImpactBinaryImageGetData(const MachOHeader* header, const char* path, MachOData* data) { + if (header == NULL || data == NULL) { + return false; + } + + const uint8_t *ptr = (uint8_t *)header + sizeof(MachOHeader); + + data->loadAddress = (uintptr_t)header; + data->path = path; + + return true; +} diff --git a/Sources/Meter/CallStackTree.swift b/Sources/Meter/CallStackTree.swift index 039aa44..f079ab9 100644 --- a/Sources/Meter/CallStackTree.swift +++ b/Sources/Meter/CallStackTree.swift @@ -41,6 +41,8 @@ public struct Frame: Codable { public var address: Int public var subFrames: [Frame]? + public var symbolInfo: [SymbolInfo]? + public init(binaryUUID: UUID? = nil, offsetIntoBinaryTextSegment: Int? = nil, sampleCount: Int? = nil, binaryName: String? = nil, address: Int, subFrames: [Frame]) { self.binaryUUID = binaryUUID self.offsetIntoBinaryTextSegment = offsetIntoBinaryTextSegment @@ -48,6 +50,17 @@ public struct Frame: Codable { self.binaryName = binaryName self.address = address self.subFrames = subFrames + self.symbolInfo = nil + } + + public init(frame: Frame, symbolInfo: [SymbolInfo], symbolicatedSubFrames: [Frame]?) { + self.binaryUUID = frame.binaryUUID + self.offsetIntoBinaryTextSegment = frame.offsetIntoBinaryTextSegment + self.sampleCount = frame.sampleCount + self.binaryName = frame.binaryName + self.address = frame.address + self.subFrames = symbolicatedSubFrames + self.symbolInfo = symbolInfo } public var flattenedFrames: [Frame] { @@ -89,6 +102,19 @@ public struct Frame: Codable { extension Frame: Hashable { } +public extension Frame { + var symbolicationTarget: SymbolicationTarget? { + guard + let uuid = binaryUUID, + let loadAddress = binaryLoadAddress + else { + return nil + } + + return SymbolicationTarget(uuid: uuid, loadAddress: loadAddress, path: nil) + } +} + public class CallStack: NSObject, Codable { /// Indicates which thread caused the crash public var threadAttributed: Bool @@ -120,7 +146,7 @@ public class CallStackTree: Codable { #if os(iOS) || os(macOS) @available(iOS 14.0, macOS 12.0, *) - static func from(callStackTree: MXCallStackTree) throws -> CallStackTreeProtocol { + public static func from(callStackTree: MXCallStackTree) throws -> CallStackTreeProtocol { let data = callStackTree.jsonRepresentation() return try from(data: data) diff --git a/Sources/Meter/DiagnosticPayload.swift b/Sources/Meter/DiagnosticPayload.swift index 91eeec1..7ba28dd 100644 --- a/Sources/Meter/DiagnosticPayload.swift +++ b/Sources/Meter/DiagnosticPayload.swift @@ -107,6 +107,20 @@ public class CrashMetaData: MetaDataProtocol, Codable { self.signal = signal } + public init(diagnostic: CrashDiagnosticProtocol) { + self.deviceType = diagnostic.metaData.deviceType + self.applicationBuildVersion = diagnostic.metaData.applicationBuildVersion + self.applicationVersion = diagnostic.applicationVersion + self.osVersion = diagnostic.metaData.osVersion + self.platformArchitecture = diagnostic.metaData.platformArchitecture + self.regionFormat = diagnostic.metaData.regionFormat + self.virtualMemoryRegionInfo = diagnostic.virtualMemoryRegionInfo + self.exceptionType = diagnostic.exceptionType?.intValue + self.terminationReason = diagnostic.terminationReason + self.exceptionCode = diagnostic.exceptionCode?.intValue + self.signal = diagnostic.signal?.intValue + } + public func jsonRepresentation() -> Data { return (try? JSONEncoder().encode(self)) ?? Data() } diff --git a/Sources/Meter/DlfcnSymbolicator.swift b/Sources/Meter/DlfcnSymbolicator.swift new file mode 100644 index 0000000..03232eb --- /dev/null +++ b/Sources/Meter/DlfcnSymbolicator.swift @@ -0,0 +1,55 @@ +import Foundation +import BinaryImage + +public class DlfcnSymbolicator { + private var pathCache: [String : String] + + public init() { + self.pathCache = [:] + } + + private lazy var imageMap: [UUID: BinaryImage] = { + var map: [UUID: BinaryImage] = [:] + + BinaryImageEnumerateLoadedImages { image, _ in + BinaryImageEnumerateLoadCommands(image.header) { lcmd, code, stop in + switch code { + case UInt32(LC_UUID): + if let uuid = BinaryuImageUUIDFromLoadCommand(lcmd, code) { + map[uuid] = image + } + + stop.pointee = true + default: + break + } + } + } + + return map + }() +} + +extension DlfcnSymbolicator: Symbolicator { + public func symbolicate(address: Int, in target: SymbolicationTarget) -> [SymbolInfo] { + guard let loadedImage = imageMap[target.uuid] else { + return [] + } + + let loadAddress = Int(bitPattern: loadedImage.header) + let relativeAddress = address - target.loadAddress + let processAddress = loadAddress + relativeAddress + let ptr = UnsafeRawPointer(bitPattern: processAddress) + var info: Dl_info = Dl_info() + + guard dladdr(ptr, &info) != 0 else { + return [] + } + + let offset = processAddress - Int(bitPattern: info.dli_saddr) + let name = String(cString: info.dli_sname) + let symbolInfo = SymbolInfo(symbol: name, offset: offset) + + return [symbolInfo] + } +} diff --git a/Sources/Meter/Symbolicator.swift b/Sources/Meter/Symbolicator.swift new file mode 100644 index 0000000..e35b153 --- /dev/null +++ b/Sources/Meter/Symbolicator.swift @@ -0,0 +1,139 @@ +import Foundation +import BinaryImage +import MetricKit + +public struct SymbolInfo: Codable, Hashable { + public var offset: Int? + public var symbol: String + public var file: String? + public var lineNumber: Int? + + public init(symbol: String, offset: Int? = nil, file: String? = nil, lineNumber: Int? = nil) { + self.symbol = symbol + self.offset = offset + self.file = file + self.lineNumber = lineNumber + } +} + +extension SymbolInfo: CustomDebugStringConvertible { + public var debugDescription: String { + var str = "" + } +} + +public struct SymbolicationTarget { + public var uuid: UUID + public var loadAddress: Int + public var path: String? + + public init(uuid: UUID, loadAddress: Int, path: String? = nil) { + self.uuid = uuid + self.loadAddress = loadAddress + self.path = path + } + + public init?(uuid: String, loadAddress: Int, path: String? = nil) { + guard let value = UUID(uuidString: uuid) else { return nil } + + self.uuid = value + self.loadAddress = loadAddress + self.path = path + } +} + +public protocol Symbolicator { + func symbolicate(address: Int, in target: SymbolicationTarget) -> [SymbolInfo] +} + +public extension Symbolicator { + func symbolicate(frame: Frame) -> Frame { + let subframes = frame.subFrames ?? [] + let symSubframes = subframes.map({ symbolicate(frame: $0) }) + + let addr = frame.address + let info = frame.symbolicationTarget.map({ symbolicate(address: addr, in: $0) }) ?? [] + + return Frame(frame: frame, symbolInfo: info, symbolicatedSubFrames: symSubframes) + } + + func symbolicate(callStack: CallStack) -> CallStack { + let symFrames = callStack.rootFrames.map({ symbolicate(frame: $0) }) + let attributed = callStack.threadAttributed + + return CallStack(threadAttributed: attributed, rootFrames: symFrames) + } + + func symbolicate(tree: CallStackTreeProtocol) -> CallStackTree { + let stacks = tree.callStacks.map({ symbolicate(callStack: $0) }) + + return CallStackTree(callStacks: stacks, + callStackPerThread: tree.callStackPerThread) + } + + func symbolicate(diagnostic: CrashDiagnosticProtocol) -> CrashDiagnostic { + let metadata = CrashMetaData(diagnostic: diagnostic) + + let symTree = symbolicate(tree: diagnostic.callStackTree) + + return CrashDiagnostic(metaData: metadata, callStackTree: symTree) + } + + func symbolicate(payload: DiagnosticPayloadProtocol) -> DiagnosticPayload { + let symDiagnostics = payload.crashDiagnostics?.map({ symbolicate(diagnostic: $0) }) + + return DiagnosticPayload(timeStampBegin: payload.timeStampBegin, + timeStampEnd: payload.timeStampEnd, + crashDiagnostics: symDiagnostics) + } +} + +extension Symbolicator { + +} + +extension Symbolicator { + func lookupPath(for binary: Binary) -> String? { + guard let name = binary.name else { + return nil + } + + let manager = FileManager.default + + switch name { + case "dyld", "libdyld.dylib": + return "/usr/lib/" + name + case "libsystem_kernel.dylib", "libsystem_pthread.dylib", "libdispatch.dylib": + return "/usr/lib/system/" + name + default: + break + } + + // /usr/lib/? + let usrLibPath = "/usr/lib/" + name + if manager.isReadableFile(atPath: usrLibPath) { + return usrLibPath + } + + let usrLibSystem = "/usr/lib/system/" + name + if manager.isReadableFile(atPath: usrLibSystem) { + return usrLibSystem + } + + return nil + } +} diff --git a/Tests/BinaryImageTests/BinaryImageTests.swift b/Tests/BinaryImageTests/BinaryImageTests.swift new file mode 100644 index 0000000..34cd971 --- /dev/null +++ b/Tests/BinaryImageTests/BinaryImageTests.swift @@ -0,0 +1,20 @@ +import XCTest +@testable import BinaryImage + +final class BinaryImageTests: XCTestCase { + func testIteration() { + var images: [BinaryImage] = [] + + BinaryImageEnumerateLoadedImages { image, _ in + images.append(image) + } + + let matchingImage = images.first { image in + let name = String(cString: image.name) + + return name == "/usr/lib/system/libsystem_kernel.dylib" + } + + XCTAssertNotNil(matchingImage) + } +} diff --git a/Tests/MeterTests/SymbolicationTests.swift b/Tests/MeterTests/SymbolicationTests.swift new file mode 100644 index 0000000..bea6238 --- /dev/null +++ b/Tests/MeterTests/SymbolicationTests.swift @@ -0,0 +1,74 @@ +import XCTest +@testable import Meter +import BinaryImage + +struct MockSymbolicator { + var mockResults: [Int: [SymbolInfo]] + + init() { + self.mockResults = [:] + } +} + +extension MockSymbolicator: Symbolicator { + func symbolicate(address: Int, in target: SymbolicationTarget) -> [SymbolInfo] { + return mockResults[address] ?? [] + } +} + +final class SymbolicationTests: XCTestCase { + func testDlfcnSymbolicator() throws { + // This is a fragile test, because it will only work when running + // against 12.0 (21A344) on arm. I can think of a way to build a + // more robust test, but it will take quite a bit of work. + let symbolicator = DlfcnSymbolicator() + let target = try XCTUnwrap(SymbolicationTarget(uuid: "17550b77-d255-389a-b779-906af75314b6", + loadAddress: 0x19d0c4000, + path: "/usr/lib/system/libsystem_kernel.dylib")) + + let infoArray = symbolicator.symbolicate(address: 0x19d0c59b4, in: target) + + XCTAssertEqual(infoArray.count, 1) + XCTAssertEqual(infoArray[0].symbol, "mach_msg_trap") + XCTAssertEqual(infoArray[0].offset, 8) + XCTAssertNil(infoArray[0].lineNumber) + XCTAssertNil(infoArray[0].file) + } + + func testSymbolicateCallStack() throws { + let uuidB = UUID() + let frameB = Frame(binaryUUID: uuidB, + offsetIntoBinaryTextSegment: 2000, + sampleCount: 1, + binaryName: "binaryB", + address: 2020, + subFrames: []) + let uuidA = UUID() + let frameA = Frame(binaryUUID: uuidA, + offsetIntoBinaryTextSegment: 1000, + sampleCount: 1, + binaryName: "binaryA", + address: 1015, subFrames: [frameB]) + let callStack = CallStack(threadAttributed: true, rootFrames: [frameA]) + + let symbolInfoB = SymbolInfo(symbol: "symbolB", offset: 10) + let symbolInfoA = SymbolInfo(symbol: "symbolA", offset: 10) + + var mockSymbolicator = MockSymbolicator() + + mockSymbolicator.mockResults[frameB.address] = [symbolInfoB] + mockSymbolicator.mockResults[frameA.address] = [symbolInfoA] + + let symbolicatedStack = mockSymbolicator.symbolicate(callStack: callStack) + + XCTAssertEqual(symbolicatedStack.threadAttributed, true) + XCTAssertEqual(symbolicatedStack.rootFrames.count, 1) + XCTAssertEqual(symbolicatedStack.rootFrames[0].symbolInfo?.count, 1) + XCTAssertEqual(symbolicatedStack.rootFrames[0].symbolInfo?[0].symbol, "symbolA") + XCTAssertEqual(symbolicatedStack.rootFrames[0].symbolInfo?[0].offset, 10) + + XCTAssertEqual(symbolicatedStack.rootFrames[0].subFrames?.count, 1) + XCTAssertEqual(symbolicatedStack.rootFrames[0].subFrames?[0].symbolInfo?[0].symbol, "symbolB") + XCTAssertEqual(symbolicatedStack.rootFrames[0].subFrames?[0].symbolInfo?[0].offset, 10) + } +}