diff --git a/ARM/CMakeLists.txt b/ARM/CMakeLists.txt index 990cf122..e76eb22b 100644 --- a/ARM/CMakeLists.txt +++ b/ARM/CMakeLists.txt @@ -46,4 +46,4 @@ add_llvm_library(mctollARMRaiser Target ) -target_link_libraries(mctollARMRaiser PRIVATE mctollRaiser) \ No newline at end of file +target_link_libraries(mctollARMRaiser PRIVATE mctollRaiser) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0889066e..66980f84 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -54,6 +54,10 @@ configure_file( ${LLVM_INCLUDE_DIR}/Raisers.def ) +set(LLVM_TARGET_DEFINITIONS Opts.td) +tablegen(LLVM Opts.inc -gen-opt-parser-defs) +add_public_tablegen_target(MctoolOptsTableGen) + add_subdirectory(test) add_llvm_tool(llvm-mctoll @@ -62,6 +66,8 @@ add_llvm_tool(llvm-mctoll MachODump.cpp EmitRaisedOutputPass.cpp PeepholeOptimizationPass.cpp + DEPENDS + MctoolOptsTableGen ) # Link against LLVM libraries and target-specific Raiser libraries diff --git a/MachODump.cpp b/MachODump.cpp index 462e61f1..195f4898 100644 --- a/MachODump.cpp +++ b/MachODump.cpp @@ -60,43 +60,19 @@ using namespace llvm; using namespace llvm::mctoll; using namespace object; -extern cl::opt MCPU; -extern cl::list MAttrs; - -static cl::opt - UseDbg("g", - cl::desc("Print line information from debug info if available")); - -static cl::opt DSYMFile("dsym", - cl::desc("Use .dSYM file for debug info")); - -static cl::opt FullLeadingAddr("full-leading-addr", - cl::desc("Print full leading address")); - -static cl::opt NoLeadingHeaders("no-leading-headers", - cl::desc("Print no leading headers")); -cl::opt - ArchiveMemberOffsets("archive-member-offsets", - cl::desc("Print the offset to each archive member for " - "Mach-O archives (requires -macho and " - "-archive-headers)")); - -cl::opt - mctoll::NonVerbose("non-verbose", - cl::desc("Print the info for Mach-O objects in " - "non-verbose or numeric form (requires -macho)")); - -cl::opt mctoll::DisSymName( - "dis-symname", - cl::desc("disassemble just this symbol's instructions (requires -macho)")); - -static cl::opt NoSymbolicOperands( - "no-symbolic-operands", - cl::desc("do not symbolic operands when disassembling (requires -macho)")); - -static cl::list - ArchFlags("arch", cl::desc("architecture(s) from a Mach-O file to dump"), - cl::ZeroOrMore); +extern std::string MCPU; +extern std::vector MAttrs; + +static std::string DisSymName; +static bool PrintImmHex; +static bool UseDbg; +static std::string DSYMFile; +static bool FullLeadingAddr; +static bool NoLeadingHeaders; +// static bool ArchiveMemberOffsets; +static bool NonVerbose; +static bool NoSymbolicOperands; +static std::vector ArchFlags; bool ArchAll = false; diff --git a/Opts.td b/Opts.td new file mode 100644 index 00000000..7d4ccb28 --- /dev/null +++ b/Opts.td @@ -0,0 +1,103 @@ +include "llvm/Option/OptParser.td" + +// Don't show this flags +def HelpSkipped : OptionFlag; + +def help : Flag<["--"], "help">, + HelpText<"Display available options (--help-hidden for more)">; +def h : Flag<["-"], "h">, Alias, HelpText<"Alias for --help">, Flags<[HelpHidden]>; + +def help_hidden : Flag<["--"], "help-hidden">, + Flags<[HelpHidden]>, + HelpText<"Display all available options">; + +def version : Flag<["--"], "version">, + HelpText<"Display the version of this program">; + +def debug : Flag<["-"], "debug">, Flags<[HelpHidden]>; + +def raise : Flag<["--"], "raise">, + HelpText<"Raise machine instruction">; +def : Flag<["-"], "d">, Alias, HelpText<"Alias for --raise">; + +def include_file_EQ : Joined<["--"], "include-file=">, + HelpText<"Header file with function prototypes using standard C syntax.">; +def : Separate<["--"], "include-file">, Alias, Flags<[HelpSkipped]>; +def : Separate<["-"], "I">, Alias, + HelpText<"Alias for --include-file">; + +def include_files_EQ : Joined<["--"], "include-files=">, + HelpText<"List of comma-seperated header files with function prototypes using standard C syntax.">; + +def filter_functions_file_EQ : Joined<["--"], "filter-functions-file=">, + HelpText<"Specify which functions to raise via a configuration file.">; +def : Separate<["--"], "filter-functions-file">, Alias, Flags<[HelpSkipped]>; +def : Separate<["-"], "f">, Alias, + HelpText<"Alias for --filter-functions-file">; + +def mcpu_EQ : Joined<["--"], "mcpu=">, + MetaVarName<"cpu-name">, + HelpText<"Target a specific cpu type (--mcpu=help for details)">, + Flags<[HelpHidden]>; + +def mattr_EQ : Joined<["--"], "mattr=">, + MetaVarName<"a1,+a2,-a3,...">, + HelpText<"Target specific attributes (--mattr=help for details)">, + Flags<[HelpHidden]>; + +def outfile_EQ : Joined<["--"], "outfile=">, + HelpText<"Output filename">; +def : Separate<["--"], "outfile">, Alias, Flags<[HelpSkipped]>; +def : Separate<["-"], "o">, Alias, + HelpText<"Alias for --outfile">; + +class OutputFormatOpts + : KeyPathAndMacro<"OutputFormatOpts->", base, "OUTPUT_FORMAT_"> {} + +def output_format_EQ : Joined<["--"], "output-format=">, + HelpText<"Output format: " + "'ll' (emit llvm text bitcode ('.ll') file) | " + "'bc' (emit llvm binary bitcode ('.bc') file) | " + "'null' (emit nothing, for performance testing). " + "Default is 'll'." + >, + Values<"ll,bc,null">, + NormalizedValuesScope<"OutputFormatTy">, + NormalizedValues<["LL", "BC", "Null"]>, + MarshallingInfoEnum, "LL">, + Flags<[HelpHidden]>; + +def run_pass_EQ : Joined<["--"], "run-pass=">, + MetaVarName<"pass-name">, + HelpText<"Run compiler only for specified passes (comma separated list)">, + Flags<[HelpHidden]>; +def : Separate<["--"], "run-pass">, Alias, Flags<[HelpSkipped]>; + +def start_address_EQ : Joined<["--"], "start-address=">, + MetaVarName<"address">, + HelpText<"Disassemble beginning at address">, + Flags<[HelpHidden]>; +def : Separate<["--"], "start-address">, Alias, Flags<[HelpSkipped]>; +def stop_address_EQ : Joined<["--"], "stop-address=">, + MetaVarName<"address">, + HelpText<"Stop disassembly at address">, + Flags<[HelpHidden]>; +def : Separate<["--"], "stop-address">, Alias, Flags<[HelpSkipped]>; + +def section_EQ : Joined<["--"], "section=">, + HelpText<"Operate on the specified sections only. " + "With --macho dump segment,section">, + Flags<[HelpHidden]>; +def : Separate<["--"], "section">, Alias, Flags<[HelpSkipped]>; +def : Separate<["-"], "j">, Alias, + HelpText<"Alias for --section">, + Flags<[HelpHidden]>; + +def sysyroot_EQ : Joined<["--"], "sysroot=">, + HelpText<"Toolchain sysroot">; +def : Separate<["--"], "sysroot">, Alias, Flags<[HelpSkipped]>; + +def target_EQ : Separate<["-"], "target">, + HelpText<"Target triple to disassemble for, " + "see --version for available targets">, + Flags<[HelpHidden]>; diff --git a/Raiser/IncludedFileInfo.cpp b/Raiser/IncludedFileInfo.cpp index b1540463..687a0ffe 100644 --- a/Raiser/IncludedFileInfo.cpp +++ b/Raiser/IncludedFileInfo.cpp @@ -79,7 +79,7 @@ class FuncDeclVisitor : public clang::RecursiveASTVisitor { std::pair( FuncDecl->getQualifiedNameAsString(), Entry)); LLVM_DEBUG(dbgs() << FuncDecl->getQualifiedNameAsString() - << " : Entry found at " + << " : Added entry found at " << FuncDecl->getLocation().printToString( Context.getSourceManager()) << "\n"); @@ -151,16 +151,16 @@ class FuncDeclFinder : public clang::ASTConsumer { void HandleTranslationUnit(clang::ASTContext &Context) final { auto Decls = Context.getTranslationUnitDecl()->decls(); - clang::SourceManager &SourceManager(Context.getSourceManager()); for (auto &Decl : Decls) { - if (Decl->isFunctionOrFunctionTemplate()) { - const auto &FileID = SourceManager.getFileID(Decl->getLocation()); - if (FileID != SourceManager.getMainFileID()) - continue; + if (Decl->isFunctionOrFunctionTemplate() && Decl->isFirstDecl()) { clang::FunctionDecl *FuncDecl = Decl->getAsFunction(); + LLVM_DEBUG(dbgs() << FuncDecl->getQualifiedNameAsString() << " : Visit " + << FuncDecl->getLocation().printToString( + Context.getSourceManager()) + << "\n"); Visitor.TraverseFunctionDecl(FuncDecl); } else if (Decl->getKind() == clang::Decl::Kind::Var) { - auto VarDecl = dyn_cast(Decl); + auto *VarDecl = dyn_cast(Decl); IncludedFileInfo::ExternalVariables.insert( VarDecl->getQualifiedNameAsString()); } @@ -225,42 +225,33 @@ Function *IncludedFileInfo::CreateFunction(StringRef &CFuncName, } bool IncludedFileInfo::getExternalFunctionPrototype( - std::vector &FileNames, std::string &CompDBDir) { - static llvm::cl::OptionCategory InclFileParseCategory( - "parse-header-files options"); + std::vector &FileNames, std::string &Target, + std::string &SysRoot) { std::vector ArgPtrVec; ArgPtrVec.push_back("parse-header-files"); - if (!CompDBDir.empty()) { - ArgPtrVec.push_back("-p"); - ArgPtrVec.push_back(CompDBDir.c_str()); - } - if (llvm::DebugFlag) - ArgPtrVec.push_back("-debug"); + ArgPtrVec.push_back("--"); - // Dummy positional arguments to satisfy the requirement of having at least - // two positional arguments. - ArgPtrVec.push_back("dummy-positional-arg-1"); - ArgPtrVec.push_back("dummy-positional-arg-2"); - - if (CompDBDir.empty()) - ArgPtrVec.push_back("--"); + if (llvm::DebugFlag) + ArgPtrVec.push_back("-v"); + if (!Target.empty()) { + ArgPtrVec.push_back("-target"); + ArgPtrVec.push_back(Target.c_str()); + } + if (!SysRoot.empty()) { + ArgPtrVec.push_back("--sysroot"); + ArgPtrVec.push_back(SysRoot.c_str()); + } auto *ToolArgv = ArgPtrVec.data(); int ArgSz = ArgPtrVec.size(); - // Construct a CommonOptionsParser object for the Compilations. - auto ExpParser = clang::tooling::CommonOptionsParser::create( - ArgSz, ToolArgv, InclFileParseCategory); - - if (!ExpParser) { - llvm::errs() << ExpParser.takeError(); - return 1; - } - clang::tooling::CommonOptionsParser &OptParser = ExpParser.get(); - // Pass include FileNames vector and NOT OptParser.getSourcePathList() since - // only a dymmy-positional-arg was passed while constructing OptParser. - clang::tooling::ClangTool Tool(OptParser.getCompilations(), FileNames); + std::string ErrorMessage; + std::unique_ptr Compilations = + clang::tooling::FixedCompilationDatabase::loadFromCommandLine(ArgSz, ToolArgv, ErrorMessage); + if (!ErrorMessage.empty()) + llvm::errs() << ErrorMessage.append("\n"); + clang::tooling::ClangTool Tool(*Compilations, FileNames); int Success = Tool.run( clang::tooling::newFrontendActionFactory().get()); switch (Success) { diff --git a/Raiser/IncludedFileInfo.h b/Raiser/IncludedFileInfo.h index 3f0f5fdc..edd9d394 100644 --- a/Raiser/IncludedFileInfo.h +++ b/Raiser/IncludedFileInfo.h @@ -40,7 +40,8 @@ class IncludedFileInfo { static std::set ExternalVariables; static bool getExternalFunctionPrototype(std::vector &FileNames, - std::string &CompDBDir); + std::string &Target, + std::string &SysRoot); static bool IsExternalVariable(std::string Name); }; diff --git a/X86/CMakeLists.txt b/X86/CMakeLists.txt index 89a88994..f875d4c7 100644 --- a/X86/CMakeLists.txt +++ b/X86/CMakeLists.txt @@ -37,4 +37,4 @@ add_llvm_library(mctollX86Raiser TransformUtils ) -target_link_libraries(mctollX86Raiser PRIVATE mctollRaiser) \ No newline at end of file +target_link_libraries(mctollX86Raiser PRIVATE mctollRaiser) diff --git a/doc/sysroot_flag.md b/doc/sysroot_flag.md new file mode 100644 index 00000000..2cfbf81c --- /dev/null +++ b/doc/sysroot_flag.md @@ -0,0 +1,68 @@ +## Using --sysroot flag + +--sysroot points to toolchain root. It is useful during development on a non-Linux machine to raise Linux binaries or on a Linux machine using a toolchain other than the one installed. + +* On linux sysroot is "/" +* On MAC may show `xcrun --show-sdk-path` + +### Cross-building Linux x86_64 and arm binaries + +Instructions to build a minimal functional toolchain that may be used with `llvm-mctoll` are provided [here](https://github.com/sv99/llvm-mctoll-toolchains). These facilitate cross compilation of and raising of x86_64-linux-gnu and for arm-linux-gnueabihf targets. You may choose to create and use a toolchain of your choice. +```bash +# toolchain directory +# ~/toolchain/arm-linux-gnueabihf +# ~/toolchain/x86_64-linux-gnu +# clang must be built with ARM support! + +# ELF 32-bit ARM Linux +clang --sysroot ~/toolchain/arm-linux-gnueabihf \ + -target arm-linux-gnueabihf -fuse-ld=lld \ + -o hello-arm -v hello.c +file helllo-arm + +# ELF 64-bit x86_64 Linux +clang --sysroot ~/toolchain/x86_64-linux-gnu \ + -target x86_64-linux-gnu -fuse-ld=lld \ + -o hello-lin -v hello.c +file helllo-lin +``` + +The above has been tested to work on MacOS (and Linux). + +### Raising Linux binary on a non-Linux host + +Toolchains needs for successful parsing header file. + +```c +# header-inc.h +#include +``` + +```bash +# ELF 32-bit ARM Linux +llvm-mctoll --sysroot ~/toolchain/arm-linux-gnueabihf \ + -target arm-linux-gnueabihf -I header-inc.h \ + -debug -d hello-arm +# ELF 64-bit x86_64 Linux +lvm-mctoll --sysroot ~/toolchain/x86_64-linux-gnu \ + -target x86_64-linux-gnu -I header-inc.h \ + -debug -d hello-arm + ``` + +### Running Linux ARM binary using docker on a non-Linux development host + +[multiarch/qemu-user-static](https://github.com/multiarch/qemu-user-static) is to enable +an execution of different multi-architecture containers + +```bash +# prepare docker +docker run --rm --privileged multiarch/qemu-user-static --reset -p yes +``` + +```bash +# run x86_64-linux-gnu binary +docker run --rm -it -v $(pwd):/work amd64/ubuntu:20.04 /work/hello-linux + +# run arm-linux-gnueabihf binary +docker run --rm -it -v $(pwd):/work arm32v7/ubuntu:20.04 /work/hello-arm +``` \ No newline at end of file diff --git a/llvm-mctoll.cpp b/llvm-mctoll.cpp index a8266af2..0a278395 100644 --- a/llvm-mctoll.cpp +++ b/llvm-mctoll.cpp @@ -56,17 +56,20 @@ #include "llvm/Object/MachO.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Object/Wasm.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Option/Option.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Errc.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Format.h" +#include "llvm/Support/InitLLVM.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/Host.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/Signals.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetSelect.h" @@ -88,118 +91,90 @@ using namespace llvm; using namespace llvm::mctoll; using namespace object; -static cl::OptionCategory LLVMMCToLLCategory("llvm-mctoll options"); - -static cl::list InputFileNames(cl::Positional, - cl::desc(""), - cl::OneOrMore); -static cl::opt OutputFilename("outfile", - cl::desc("Output filename"), - cl::value_desc("filename"), - cl::cat(LLVMMCToLLCategory), - cl::NotHidden); -cl::alias OutputFilenameshort("o", cl::desc("Alias for --outfile"), - cl::aliasopt(OutputFilename), - cl::cat(LLVMMCToLLCategory), cl::NotHidden); - -cl::opt - MCPU("mcpu", - cl::desc("Target a specific cpu type (-mcpu=help for details)"), - cl::value_desc("cpu-name"), cl::init("")); - -cl::list - MAttrs("mattr", cl::CommaSeparated, - cl::desc("Target specific attributes (-mattr=help for details)"), - cl::value_desc("a1,+a2,-a3,...")); - -// Output file type. Default is binary bitcode. -cl::opt OutputFormat( - "output-format", cl::init(CGFT_AssemblyFile), - cl::desc("Output format (default: binary bitcode):"), - cl::values(clEnumValN(CGFT_AssemblyFile, "ll", - "Emit llvm text bitcode ('.ll') file"), - clEnumValN(CGFT_ObjectFile, "bc", - "Emit llvm binary bitcode ('.bc') file"), - clEnumValN(CGFT_Null, "null", - "Emit nothing, for performance testing")), - cl::cat(LLVMMCToLLCategory), cl::NotHidden); - -cl::opt mctoll::Disassemble("raise", - cl::desc("Raise machine instruction"), - cl::cat(LLVMMCToLLCategory), cl::NotHidden); - -cl::alias Disassembled("d", cl::desc("Alias for -raise"), - cl::aliasopt(Disassemble), cl::cat(LLVMMCToLLCategory), - cl::NotHidden); - -static cl::opt - MachOOpt("macho", cl::desc("Use MachO specific object file parser")); -static cl::alias MachOm("m", cl::desc("Alias for --macho"), - cl::aliasopt(MachOOpt)); - -static cl::opt NoVerify("disable-verify", cl::Hidden, - cl::desc("Do not verify input module")); - -cl::opt - mctoll::TripleName("triple", - cl::desc("Target triple to disassemble for, " - "see -version for available targets")); - -cl::opt - mctoll::ArchName("arch-name", - cl::desc("Target arch to disassemble for, " - "see -version for available targets")); - -cl::opt mctoll::FilterFunctionSet( - "filter-functions-file", - cl::desc("Specify which functions to raise via a configuration file."), - cl::cat(LLVMMCToLLCategory), cl::NotHidden); - -cl::alias static FilterFunctionSetF( - "f", cl::desc("Alias for --filter-functions-file"), - cl::aliasopt(FilterFunctionSet), cl::cat(LLVMMCToLLCategory), - cl::NotHidden); - -cl::list - mctoll::FilterSections("section", - cl::desc("Operate on the specified sections only. " - "With -macho dump segment,section")); - -cl::alias static FilterSectionsj("j", cl::desc("Alias for --section"), - cl::aliasopt(FilterSections)); - -cl::opt - mctoll::PrintImmHex("print-imm-hex", - cl::desc("Use hex format for immediate values")); - -cl::opt PrintFaultMaps("fault-map-section", - cl::desc("Display contents of faultmap section")); - -cl::opt - StartAddress("start-address", cl::desc("Disassemble beginning at address"), - cl::value_desc("address"), cl::init(0)); -cl::opt StopAddress("stop-address", - cl::desc("Stop disassembly at address"), - cl::value_desc("address"), - cl::init(UINT64_MAX)); -cl::list mctoll::IncludeFileNames( - "include-files", cl::CommaSeparated, - cl::desc("List of comma-seperated header files with function prototypes " - "using standard C syntax."), - cl::cat(LLVMMCToLLCategory), cl::NotHidden); - -cl::alias static IncludeFileNamesShort( - "I", cl::desc("Alias for --include-files="), - cl::aliasopt(IncludeFileNames), cl::cat(LLVMMCToLLCategory), cl::NotHidden); - -cl::opt mctoll::CompilationDBDir( - "compilation-db-path", - cl::desc("Absolute directory path to either compile_commands.json or " - "compile_flags.txt with any additional details needed to parse " - "include files. " - "See https://clang.llvm.org/docs/JSONCompilationDatabase.html for " - "details."), - cl::cat(LLVMMCToLLCategory), cl::NotHidden); +namespace { + +using namespace llvm::opt; // for HelpHidden in Opts.inc +// custom Flag for opt::DriverFlag defined in the llvm/Option/Option.h +enum MyFlag { + HelpSkipped = (1 << 4) +}; + +enum ID { + OPT_INVALID = 0, // This is not an option ID. +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELPTEXT, METAVAR, VALUES) \ + OPT_##ID, +#include "Opts.inc" +#undef OPTION +}; + +#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; +#include "Opts.inc" +#undef PREFIX + +const opt::OptTable::Info InfoTable[] = { +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELPTEXT, METAVAR, VALUES) \ + { \ + PREFIX, NAME, HELPTEXT, \ + METAVAR, OPT_##ID, opt::Option::KIND##Class, \ + PARAM, FLAGS, OPT_##GROUP, \ + OPT_##ALIAS, ALIASARGS, VALUES}, +#include "Opts.inc" +#undef OPTION +}; + +class MctollOptTable : public opt::OptTable { +public: + MctollOptTable(const char *Usage, const char *Description) + : OptTable(InfoTable), Usage(Usage), Description(Description) { + setGroupedShortOptions(true); + } + + void printHelp(StringRef Argv0, bool ShowHidden = false) const { + Argv0 = sys::path::filename(Argv0); + unsigned FlagsToExclude = HelpSkipped | (ShowHidden ? 0 : HelpHidden); + opt::OptTable::printHelp(outs(), (Argv0 + Usage).str().c_str(), Description, + 0, FlagsToExclude, ShowHidden); + // TODO Replace this with OptTable API once it adds extrahelp support. + outs() << "\nPass @FILE as argument to read options from FILE.\n"; + } + +private: + const char *Usage; + const char *Description; +}; + +enum OutputFormatTy { OF_LL, OF_BC, OF_Null, OF_Unknown }; + +} // namespace + +#define DEBUG_TYPE "mctoll" + +static std::vector InputFileNames; +static std::string OutputFilename; +std::string MCPU; +std::vector MAttrs; +OutputFormatTy OutputFormat; // Output file type. Default is binary bitcode. +bool mctoll::Disassemble; +static bool MachOOpt; +static bool NoVerify; +std::string mctoll::TargetName; +std::string mctoll::TripleName; +std::string mctoll::SysRoot; +std::string mctoll::ArchName; +static std::string FilterConfigFileName; +std::vector mctoll::FilterSections; + +static uint64_t StartAddress; +static bool HasStartAddressFlag; +static uint64_t StopAddress = UINT64_MAX; +static bool HasStopAddressFlag; + +std::vector mctoll::IncludeFileNames; +std::string mctoll::CompilationDBDir; + +static bool PrintImmHex; namespace { static ManagedStatic> RunPassNames; @@ -216,15 +191,6 @@ struct RunPassOption { }; } // namespace -#define DEBUG_TYPE "mctoll" - -static RunPassOption RunPassOpt; - -static cl::opt> RunPass( - "run-pass", - cl::desc("Run compiler only for specified passes (comma separated list)"), - cl::value_desc("pass-name"), cl::ZeroOrMore, cl::location(RunPassOpt)); - namespace { typedef std::function FilterPredicate; @@ -296,7 +262,7 @@ SectionFilter ToolSectionFilter(llvm::object::ObjectFile const &O) { static const Target *getTarget(const ObjectFile *Obj = nullptr) { // Figure out the target triple. llvm::Triple TheTriple("unknown-unknown-unknown"); - if (mctoll::TripleName.empty()) { + if (TripleName.empty()) { if (Obj) { auto Arch = Obj->getArch(); TheTriple.setArch(Triple::ArchType(Arch)); @@ -343,8 +309,8 @@ static const Target *getTarget(const ObjectFile *Obj = nullptr) { error("Unsupported target " + TheTriple.getArchName()); } - // A few of opcodes in ARMv4 or ARMv5 are indentified as ARMv6 opcodes, - // so unify the triple Archs lower then ARMv6 to ARMv6 temporarily. + // A few of opcodes in ARMv4 or ARMv5 are identified as ARMv6 opcodes, + // so unify the triple Archs lower than ARMv6 to ARMv6 temporarily. if (TheTriple.getArchName() == "armv4t" || TheTriple.getArchName() == "armv5te" || TheTriple.getArchName() == "armv5" || TheTriple.getArchName() == "armv5t") @@ -355,7 +321,7 @@ static const Target *getTarget(const ObjectFile *Obj = nullptr) { return TheTarget; } -static std::unique_ptr GetOutputStream(StringRef InfileName) { +static std::unique_ptr getOutputStream(StringRef InfileName) { // If output file name is not explicitly specified construct a name based on // the input file name. if (OutputFilename.empty()) { @@ -368,30 +334,22 @@ static std::unique_ptr GetOutputStream(StringRef InfileName) { OutputFilename = std::string(InfileName); switch (OutputFormat) { - case CGFT_AssemblyFile: + case OF_LL: OutputFilename += "-dis.ll"; break; // Just uses enum CGFT_ObjectFile represent llvm bitcode file type // provisionally. - case CGFT_ObjectFile: + case OF_BC: OutputFilename += "-dis.bc"; break; - case CGFT_Null: + default: OutputFilename += ".null"; break; } } // Decide if we need "binary" output. - bool Binary = false; - switch (OutputFormat) { - case CGFT_AssemblyFile: - break; - case CGFT_ObjectFile: - case CGFT_Null: - Binary = true; - break; - } + bool Binary = OutputFormat != OF_LL; // Open the file. std::error_code EC; @@ -734,16 +692,8 @@ static bool isAFunctionSymbol(const ObjectFile *Obj, SymbolInfoTy &Symbol) { return false; } -//#ifdef __cplusplus -// extern "C" { -//#endif - -#define MODULE_RAISER(TargetName) \ - extern "C" void register##TargetName##ModuleRaiser(); +#define MODULE_RAISER(TargetName) extern "C" void register##TargetName##ModuleRaiser(); #include "Raisers.def" -//#ifdef __cplusplus -//} -//#endif static void InitializeAllModuleRaisers() { #define MODULE_RAISER(TargetName) register##TargetName##ModuleRaiser(); @@ -1002,7 +952,6 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { uint64_t Index; FunctionFilter *FuncFilter = moduleRaiser->getFunctionFilter(); - auto FilterConfigFileName = FilterFunctionSet.getValue(); if (!FilterConfigFileName.empty()) { if (!FuncFilter->readFilterFunctionConfigFile(FilterConfigFileName)) { dbgs() << "Unable to read function filter configuration file " @@ -1068,7 +1017,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { auto &SymStr = Symbols[si].Name; bool raiseFuncSymbol = true; - if ((!FilterFunctionSet.getValue().empty())) { + if ((!FilterConfigFileName.empty())) { // Check the symbol name whether it should be excluded or not. // Check in a non-empty exclude list if (!FuncFilter->isFilterSetEmpty(FunctionFilter::FILTER_EXCLUDE)) { @@ -1109,7 +1058,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { // However, in a raiser, we are conceptually walking the traditional // compiler pipeline backwards. So we build MachineFunction from // the binary before building Function object. Given the dependency, - // build a place holder Function object to allow for building the + // build a placeholder Function object to allow for building the // MachineFunction object. // This Function object is NOT populated when raising MachineFunction // abstraction of the binary function. Instead, a new Function is @@ -1364,7 +1313,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { Triple TheTriple = Triple(TripleName); // Decide where to send the output. - std::unique_ptr Out = GetOutputStream(Obj->getFileName()); + std::unique_ptr Out = getOutputStream(Obj->getFileName()); if (!Out) return; @@ -1377,6 +1326,22 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { LLVMTargetMachine &LLVMTM = static_cast(*Target); + CodeGenFileType OutputFileType; + + switch (OutputFormat) { + case OF_LL: + OutputFileType = CGFT_AssemblyFile; + break; + // Just uses enum CGFT_ObjectFile represent llvm bitcode file type + // provisionally. + case OF_BC: + OutputFileType = CGFT_ObjectFile; + break; + default: + OutputFileType = CGFT_Null; + break; + } + if (RunPassNames->empty()) { TargetPassConfig &TPC = *LLVMTM.createPassConfig(PM); if (TPC.hasLimitedCodeGenPipeline()) { @@ -1393,7 +1358,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { PM.add(new PeepholeOptimizationPass()); // Add print pass to emit ouptut file. - PM.add(new EmitRaisedOutputPass(*OS, OutputFormat)); + PM.add(new EmitRaisedOutputPass(*OS, OutputFileType)); TPC.printAndVerify(""); for (const std::string &RunPassName : *RunPassNames) { @@ -1404,7 +1369,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { TPC.setInitialized(); } else if (Target->addPassesToEmitFile( PM, *OS, nullptr, /* no dwarf output file stream*/ - OutputFormat, NoVerify, machineModuleInfo)) { + OutputFileType, NoVerify, machineModuleInfo)) { outs() << ToolName << "run system pass!\n"; } @@ -1500,11 +1465,103 @@ static void DumpInput(StringRef file) { report_error(errorCodeToError(object_error::invalid_file_type), file); } +[[noreturn]] static void reportCmdLineError(const Twine &Message) { + WithColor::error(errs(), ToolName) << Message << "\n"; + exit(1); +} + +template +static void parseIntArg(const llvm::opt::InputArgList &InputArgs, int ID, + T &Value) { + if (const opt::Arg *A = InputArgs.getLastArg(ID)) { + StringRef V(A->getValue()); + if (!llvm::to_integer(V, Value, 0)) { + reportCmdLineError(A->getSpelling() + + ": expected a non-negative integer, but got '" + V + + "'"); + } + } +} + +static void invalidArgValue(const opt::Arg *A) { + reportCmdLineError("'" + StringRef(A->getValue()) + + "' is not a valid value for '" + A->getSpelling() + "'"); +} + +static std::vector +commaSeparatedValues(const llvm::opt::InputArgList &InputArgs, int ID) { + std::vector Values; + for (StringRef Value : InputArgs.getAllArgValues(ID)) { + llvm::SmallVector SplitValues; + llvm::SplitString(Value, SplitValues, ","); + for (StringRef SplitValue : SplitValues) + Values.push_back(SplitValue.str()); + } + return Values; +} + +static void parseOptions(const llvm::opt::InputArgList &InputArgs) { + llvm::DebugFlag = InputArgs.hasArg(OPT_debug); + Disassemble = InputArgs.hasArg(OPT_raise); + FilterConfigFileName = InputArgs.getLastArgValue(OPT_filter_functions_file_EQ).str(); + MCPU = InputArgs.getLastArgValue(OPT_mcpu_EQ).str(); + MAttrs = commaSeparatedValues(InputArgs, OPT_mattr_EQ); + FilterSections = InputArgs.getAllArgValues(OPT_section_EQ); + parseIntArg(InputArgs, OPT_start_address_EQ, StartAddress); + HasStartAddressFlag = InputArgs.hasArg(OPT_start_address_EQ); + parseIntArg(InputArgs, OPT_stop_address_EQ, StopAddress); + HasStopAddressFlag = InputArgs.hasArg(OPT_stop_address_EQ); + TargetName = InputArgs.getLastArgValue(OPT_target_EQ).str(); + SysRoot = InputArgs.getLastArgValue(OPT_sysyroot_EQ).str(); + OutputFilename = InputArgs.getLastArgValue(OPT_outfile_EQ).str(); + + InputFileNames = InputArgs.getAllArgValues(OPT_INPUT); + if (InputFileNames.empty()) + reportCmdLineError("no input file"); + + IncludeFileNames = InputArgs.getAllArgValues(OPT_include_file_EQ); + std::string IncludeFileNames2 = InputArgs.getLastArgValue(OPT_include_files_EQ).str(); + if (!IncludeFileNames2.empty()) { + SmallVector FNames; + StringRef(IncludeFileNames2).split(FNames, ',', -1, false); + for (auto N : FNames) + IncludeFileNames.push_back(std::string(N)); + } + + if (const opt::Arg *A = InputArgs.getLastArg(OPT_output_format_EQ)) { + OutputFormat = StringSwitch(A->getValue()) + .Case("ll", OF_LL) + .Case("BC", OF_BC) + .Case("Null", OF_Null) + .Default(OF_Unknown); + if (OutputFormat == OF_Unknown) + invalidArgValue(A); + } +} + int main(int argc, char **argv) { - // Print a stack trace if we signal out. - sys::PrintStackTraceOnErrorSignal(argv[0]); - PrettyStackTraceProgram X(argc, argv); - llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. + InitLLVM X(argc, argv); + + // parse command line + BumpPtrAllocator A; + StringSaver Saver(A); + MctollOptTable Tbl(" [options] ", + "MC to LLVM IR raiser"); + ToolName = argv[0]; + opt::InputArgList Args = + Tbl.parseArgs(argc, argv, OPT_UNKNOWN, Saver, + [&](StringRef Msg) { + error(Msg); + exit(1); + }); + if (Args.size() == 0 || Args.hasArg(OPT_help)) { + Tbl.printHelp(ToolName); + return 0; + } + if (Args.hasArg(OPT_help_hidden)) { + Tbl.printHelp(ToolName, /*ShowHidden=*/true); + return 0; + } // Initialize targets and assembly printers/parsers. llvm::InitializeAllTargets(); @@ -1512,14 +1569,14 @@ int main(int argc, char **argv) { llvm::InitializeAllTargetMCs(); llvm::InitializeAllDisassemblers(); - // Register the target printer for --version. - cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion); - - cl::HideUnrelatedOptions(LLVMMCToLLCategory); - - cl::ParseCommandLineOptions(argc, argv, "MC to LLVM IR raiser\n"); + if (Args.hasArg(OPT_version)) { + cl::PrintVersionMessage(); + outs() << '\n'; + TargetRegistry::printRegisteredTargetsForVersion(outs()); + return 0; + } - ToolName = argv[0]; + parseOptions(Args); // Set appropriate bug report message llvm::setBugReportMsg( @@ -1546,19 +1603,20 @@ int main(int argc, char **argv) { // Stash output file name as well since it would also be reset during parsing // done by clang::tooling::CommonOptionsParser invoked in // getExternalFunctionPrototype(). - auto OF = OutputFilename.getValue(); + auto OF = OutputFilename; if (!IncludeFNames.empty()) { - if (!IncludedFileInfo::getExternalFunctionPrototype( - IncludeFNames, mctoll::CompilationDBDir)) { + if (!IncludedFileInfo::getExternalFunctionPrototype(IncludeFNames, + TargetName, + SysRoot)) { dbgs() << "Unable to read external function prototype. Ignoring\n"; } } // Restore stashed Outputfilename - OutputFilename.setValue(OF); + OutputFilename = OF; // Disassemble contents of .text section. Disassemble = true; - FilterSections.addValue(".text"); + FilterSections.push_back(".text"); llvm::setCurrentDebugType(DEBUG_TYPE); std::for_each(InputFNames.begin(), InputFNames.end(), DumpInput); diff --git a/llvm-mctoll.h b/llvm-mctoll.h index 27ee5df1..efc71b4c 100644 --- a/llvm-mctoll.h +++ b/llvm-mctoll.h @@ -29,19 +29,14 @@ class RelocationRef; namespace mctoll { -extern cl::opt TripleName; -extern cl::opt ArchName; -extern cl::opt FilterFunctionSet; -extern cl::list FilterSections; -extern cl::opt Disassemble; -extern cl::opt DisSymName; -extern cl::opt NonVerbose; -extern cl::opt SymbolTable; -extern cl::opt UnwindInfo; -extern cl::opt PrintImmHex; -extern cl::opt DwarfDumpType; -extern cl::list IncludeFileNames; -extern cl::opt CompilationDBDir; +extern std::string TargetName; +extern std::string TripleName; +extern std::string SysRoot; +extern std::string ArchName; +extern std::vector FilterSections; +extern bool Disassemble; +extern std::vector IncludeFileNames; +extern std::string CompilationDBDir; // Various helper functions. bool isRelocAddressLess(object::RelocationRef A, object::RelocationRef B);