Skip to content

Commit

Permalink
Implement Late Externalization for STRONG externalization type
Browse files Browse the repository at this point in the history
Take the following code as example:
```
\#include <stdio.h>
int main(void) {
  printf("Hello, world\n");
  return 0;
}
```
And assume we want to strongly externalize the function `printf`.
The resulting code would be:
```
\#define _STDIO_H	1
\#define __GLIBC_INTERNAL_STARTING_HEADER_IMPLEMENTATION
\#include <bits/libc-header-start.h>
\#define __need_size_t
\#define __need_NULL
\#include <stddef.h>
\#define __need___va_list
\#include <stdarg.h>
\#include <bits/types.h>
\#include <bits/types/__fpos_t.h>
\#include <bits/types/__fpos64_t.h>
\#include <bits/types/__FILE.h>
\#include <bits/types/FILE.h>
\#include <bits/types/struct_FILE.h>
\# include <bits/types/cookie_io_functions_t.h>
\#define L_tmpnam 20
\#include <bits/stdio_lim.h>
\#define __attr_dealloc_fclose __attr_dealloc (fclose, 1)
/** clang-extract: from /usr/include/stdio.h:361:1  */
static int (*klpe_printf)(const char *restrict, ...);

\#include <bits/floatn.h>
/** clang-extract: from /tmp/rewrite-3.c:5:1  */
int main(void)
{
  (*klpe_printf)("Hello, world!\n");
  return 0;
}
```
Clearly, those headers and defines could be avoided. For example, this would be perfectly valid:
```
\#include <stdio.h>
/** clang-extract: from /tmp/rewrite-3.c:5:1  */
static int (*klpe_printf)(const char *restrict, ...);

/** clang-extract: from /tmp/rewrite-3.c:4:1  */
int main(void)
{
  (*klpe_printf)("Hello, world!\n");
  return 0;
}
```
Because the definition of `klpe_printf` is located right before its
first use. This commit does this by computing a possible place where
the externalized variable can be output *just before* the first use,
hence we externalize the variable later than the original code would
suggest.

To enable this option, the user must pass `-DCE_LATE_EXTERNALIZE` to
clang-extract.

Signed-off-by: Giuliano Belinassi <[email protected]>
  • Loading branch information
giulianobelinassi committed Jul 11, 2024
1 parent d8e2bb0 commit 74a63c6
Show file tree
Hide file tree
Showing 19 changed files with 371 additions and 49 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ Clang-extract support many options which controls the output code:
- `-DCE_IPACLONES_PATH=<arg>` Path to gcc .ipa-clones files generated by gcc. Used to decide if desired function to extract was inlined into other functions.
- `-DCE_SYMVERS_PATH=<arg>` Path to kernel Modules.symvers file. Only used when `-D__KERNEL__` is specified.
- `-DCE_DSC_OUTPUT=<arg>` Libpulp .dsc file output, used for userspace livepatching.
- `-DCE_LATE_EXTERNALIZE` Enable late externalization (declare externalized variables later than the original). May reduce code output when `-DCE_KEEP_INCLUDES` is enabled.

For more switches, see
```
Expand Down
11 changes: 11 additions & 0 deletions libcextract/ArgvParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ ArgvParser::ArgvParser(int argc, char **argv)
RenameSymbols(false),
Kernel(false),
Ibt(false),
AllowLateExternalization(false),
PatchObject(""),
DebuginfoPath(nullptr),
IpaclonesPath(nullptr),
SymversPath(nullptr),
Expand Down Expand Up @@ -147,6 +149,9 @@ void ArgvParser::Print_Usage_Message(void)
" -DCE_OUTPUT_FUNCTION_PROTOTYPE_HEADER=<arg>\n"
" Outputs a header file with a foward declaration of all\n"
" functions. This header is not self-compilable.\n"
" -DCE_LATE_EXTERNALIZE Enable late externalization (declare externalized variables\n"
" later than the original). May reduce code output when\n"
" -DCE_KEEP_INCLUDES is enabled\n"
"\n";

llvm::outs() << "The following arguments are ignored by clang-extract:\n";
Expand Down Expand Up @@ -256,6 +261,12 @@ bool ArgvParser::Handle_Clang_Extract_Arg(const char *str)

return true;
}
if (!strcmp("-DCE_LATE_EXTERNALIZE", str)) {
AllowLateExternalization = true;

return true;
}

if (!strcmp("--help", str)) {
Print_Usage_Message();
exit(0);
Expand Down
9 changes: 9 additions & 0 deletions libcextract/ArgvParser.hh
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,11 @@ class ArgvParser
return OutputFunctionPrototypeHeader;
}

inline bool Get_Allow_Late_Externalization(void)
{
return AllowLateExternalization;
}

const char *Get_Input_File(void);

/** Print help usage message. */
Expand All @@ -152,6 +157,10 @@ class ArgvParser
bool Kernel;
/* If the file was compiled with IBT support */
bool Ibt;

/* If set, then clang-extract may write the externalized decl later than the
original code. */
bool AllowLateExternalization;
std::string PatchObject;

const char *DebuginfoPath;
Expand Down
39 changes: 39 additions & 0 deletions libcextract/LLVMMisc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,3 +119,42 @@ Decl *Get_Bodyless_Or_Itself(Decl *decl)
Decl *bodyless = Get_Bodyless_Decl(decl);
return bodyless ? bodyless : decl;
}

/* Get the TopLevel Decl that contains the location loc. */
Decl *Get_Toplevel_Decl_At_Location(ASTUnit *ast, const SourceLocation &loc)
{
SourceManager &SM = ast->getSourceManager();
/* We don't have a way of accessing the TopLevel vector directly, hence we
do this. */
char *p = (char *) &(*ast->top_level_begin());
char *q = (char *) &(*ast->top_level_end());

int n = (((ptrdiff_t)(q - p))/sizeof(Decl *));

Decl **array = (Decl **)p;

/* Do binary search. */
int low = 0;
int high = n-1;
while (low <= high) {
int mid = low + (high - low)/2;

Decl *decl = array[mid];
/* Get rid of some weird macro locations. We want the location where
it was expanded. */
SourceRange decl_range(SM.getExpansionLoc(decl->getBeginLoc()),
SM.getExpansionLoc(decl->getEndLoc()));

if (decl_range.fullyContains(loc)) {
return decl;
}

if (SM.isBeforeInTranslationUnit(decl_range.getBegin(), loc)) {
low = mid + 1;
} else {
high = mid - 1;
}
}

return nullptr;
}
3 changes: 3 additions & 0 deletions libcextract/LLVMMisc.hh
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,6 @@ Decl *Get_Bodyless_Decl(Decl *decl);
FunctionDecl *Get_Bodyless_Or_Itself(FunctionDecl *decl);
TagDecl *Get_Bodyless_Or_Itself(TagDecl *decl);
Decl *Get_Bodyless_Or_Itself(Decl *decl);

/* Get the TopLevel Decl that contains the location loc. */
Decl *Get_Toplevel_Decl_At_Location(ASTUnit *ast, const SourceLocation &loc);
4 changes: 3 additions & 1 deletion libcextract/Passes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,9 @@ class FunctionExternalizerPass : public Pass
virtual bool Run_Pass(PassManager::Context *ctx)
{
/* Issue externalization. */
SymbolExternalizer externalizer(ctx->AST.get(), ctx->IA, ctx->Ibt, ctx->PatchObject, ctx->DumpPasses);
SymbolExternalizer externalizer(ctx->AST.get(), ctx->IA, ctx->Ibt,
ctx->AllowLateExternalizations,
ctx->PatchObject, ctx->DumpPasses);
if (ctx->RenameSymbols)
/* The FuncExtractNames will be modified, as the function will be renamed. */
externalizer.Externalize_Symbols(ctx->Externalize, ctx->FuncExtractNames);
Expand Down
4 changes: 4 additions & 0 deletions libcextract/Passes.hh
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class PassManager {
RenameSymbols(args.Should_Rename_Symbols()),
Kernel(args.Is_Kernel()),
Ibt(args.Has_Ibt()),
AllowLateExternalizations(args.Get_Allow_Late_Externalization()),
PatchObject(args.Get_PatchObject()),
HeadersToExpand(args.Get_Headers_To_Expand()),
ClangArgs(args.Get_Args_To_Clang()),
Expand Down Expand Up @@ -107,6 +108,9 @@ class PassManager {
/** If the code was compiled with IBT support */
bool Ibt;

/** If we can late externalize variables. */
bool AllowLateExternalizations;

/** Object that will be patched. */
std::string PatchObject;

Expand Down
Loading

0 comments on commit 74a63c6

Please sign in to comment.