Skip to content

Commit 9ca884a

Browse files
Dev VMclaude
andcommitted
Emit naked functions as LLVM IR with inline asm instead of module asm
This fixes issue #4294 where LTO linking fails with "symbol already defined" errors for naked template functions. The root cause was that module-level assembly from multiple compilation units gets concatenated during LTO before COMDAT deduplication can occur. The fix emits naked functions as proper LLVM IR functions with: - The 'naked' attribute (suppresses prologue/epilogue generation) - LinkOnceODRLinkage for template instantiations - COMDAT groups for proper symbol deduplication during LTO - Inline asm containing the function body - OptimizeNone and NoInline attributes to prevent LLVM from cloning the function during optimization passes (which would duplicate labels) Labels in the inline asm use printLabelName() for consistency with label references generated by the asm parser, ensuring labels are properly quoted to match the format used in jump instructions. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 091a6e9 commit 9ca884a

4 files changed

Lines changed: 427 additions & 85 deletions

File tree

gen/naked.cpp

Lines changed: 98 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "dmd/template.h"
1717
#include "gen/dvalue.h"
1818
#include "gen/funcgenstate.h"
19+
#include "gen/functions.h"
1920
#include "gen/irstate.h"
2021
#include "gen/llvm.h"
2122
#include "gen/llvmhelpers.h"
@@ -24,6 +25,7 @@
2425
#include "ir/irfunction.h"
2526
#include "llvm/IR/InlineAsm.h"
2627
#include <cassert>
28+
#include <sstream>
2729

2830
using namespace dmd;
2931

@@ -128,9 +130,11 @@ class ToNakedIRVisitor : public Visitor {
128130
stmt->loc.toChars());
129131
LOG_SCOPE;
130132

133+
// Use printLabelName to match how label references are generated in asm-x86.h.
134+
// This ensures label definitions match the quoted format used in jump instructions.
131135
printLabelName(irs->nakedAsm, mangleExact(irs->func()->decl),
132136
stmt->ident->toChars());
133-
irs->nakedAsm << ":";
137+
irs->nakedAsm << ":\n";
134138

135139
if (stmt->statement) {
136140
stmt->statement->accept(this);
@@ -144,108 +148,117 @@ void DtoDefineNakedFunction(FuncDeclaration *fd) {
144148
IF_LOG Logger::println("DtoDefineNakedFunction(%s)", mangleExact(fd));
145149
LOG_SCOPE;
146150

147-
// we need to do special processing on the body, since we only want
148-
// to allow actual inline asm blocks to reach the final asm output
149-
150-
std::ostringstream &asmstr = gIR->nakedAsm;
151-
152-
// build function header
151+
const char *mangle = mangleExact(fd);
152+
const auto &triple = *global.params.targetTriple;
153153

154-
// FIXME: could we perhaps use llvm asmwriter to give us these details ?
154+
// Get or create the LLVM function first, before visiting the body.
155+
// The visitor may call Declaration_codegen which needs an IR insert point.
156+
llvm::Module &module = gIR->module;
157+
llvm::Function *func = module.getFunction(mangle);
155158

156-
const char *mangle = mangleExact(fd);
157-
std::string fullmangle; // buffer only
159+
if (!func) {
160+
// Create function type using the existing infrastructure
161+
llvm::FunctionType *funcType = DtoFunctionType(fd);
158162

159-
const auto &triple = *global.params.targetTriple;
160-
bool const isWin = triple.isOSWindows();
161-
bool const isDarwin = triple.isOSDarwin();
162-
163-
// osx is different
164-
// also mangling has an extra underscore prefixed
165-
if (isDarwin) {
166-
fullmangle += '_';
167-
fullmangle += mangle;
168-
mangle = fullmangle.c_str();
169-
170-
asmstr << "\t.section\t__TEXT,__text,regular,pure_instructions"
171-
<< std::endl;
172-
asmstr << "\t.globl\t" << mangle << std::endl;
163+
// Create function with appropriate linkage
164+
llvm::GlobalValue::LinkageTypes linkage;
173165
if (fd->isInstantiated()) {
174-
asmstr << "\t.weak_definition\t" << mangle << std::endl;
166+
linkage = llvm::GlobalValue::LinkOnceODRLinkage;
167+
} else {
168+
linkage = llvm::GlobalValue::ExternalLinkage;
175169
}
176-
asmstr << "\t.p2align\t4, 0x90" << std::endl;
177-
asmstr << mangle << ":" << std::endl;
170+
171+
func = llvm::Function::Create(funcType, linkage, mangle, &module);
172+
} else if (!func->empty()) {
173+
// Function already has a body - this can happen if the function was
174+
// already defined (e.g., template instantiation in another module).
175+
// Don't add another body.
176+
return;
177+
} else if (func->hasFnAttribute(llvm::Attribute::Naked)) {
178+
// Function already has naked attribute - it was already processed
179+
return;
178180
}
179-
// Windows is different
180-
else if (isWin) {
181-
// mangled names starting with '?' (MSVC++ symbols) apparently need quoting
182-
if (mangle[0] == '?') {
183-
fullmangle += '"';
184-
fullmangle += mangle;
185-
fullmangle += '"';
186-
mangle = fullmangle.c_str();
187-
} else if (triple.isArch32Bit()) {
188-
// prepend extra underscore for Windows x86
189-
fullmangle += '_';
190-
fullmangle += mangle;
191-
mangle = fullmangle.c_str();
192-
}
193181

194-
asmstr << "\t.def\t" << mangle << ";" << std::endl;
195-
// hard code these two numbers for now since gas ignores .scl and llvm
196-
// is defaulting to .type 32 for everything I have seen
197-
asmstr << "\t.scl\t2;" << std::endl;
198-
asmstr << "\t.type\t32;" << std::endl;
199-
asmstr << "\t.endef" << std::endl;
182+
// Set naked attribute - this tells LLVM not to generate prologue/epilogue
183+
func->addFnAttr(llvm::Attribute::Naked);
200184

201-
if (fd->isInstantiated()) {
202-
asmstr << "\t.section\t.text,\"xr\",discard," << mangle << std::endl;
203-
} else {
204-
asmstr << "\t.text" << std::endl;
205-
}
206-
asmstr << "\t.globl\t" << mangle << std::endl;
207-
asmstr << "\t.p2align\t4, 0x90" << std::endl;
208-
asmstr << mangle << ":" << std::endl;
209-
} else {
210-
if (fd->isInstantiated()) {
211-
asmstr << "\t.section\t.text." << mangle << ",\"axG\",@progbits,"
212-
<< mangle << ",comdat" << std::endl;
213-
asmstr << "\t.weak\t" << mangle << std::endl;
214-
} else {
215-
asmstr << "\t.text" << std::endl;
216-
asmstr << "\t.globl\t" << mangle << std::endl;
217-
}
218-
asmstr << "\t.p2align\t4, 0x90" << std::endl;
219-
asmstr << "\t.type\t" << mangle << ",@function" << std::endl;
220-
asmstr << mangle << ":" << std::endl;
185+
// Prevent optimizations that might clone or modify the function.
186+
// The inline asm contains labels that would conflict if duplicated.
187+
func->addFnAttr(llvm::Attribute::OptimizeNone);
188+
func->addFnAttr(llvm::Attribute::NoInline);
189+
190+
// For template instantiations, set up COMDAT for deduplication
191+
if (fd->isInstantiated()) {
192+
func->setComdat(module.getOrInsertComdat(mangle));
221193
}
222194

223-
// emit body
224-
ToNakedIRVisitor v(gIR);
225-
fd->fbody->accept(&v);
195+
// Set other common attributes
196+
func->addFnAttr(llvm::Attribute::NoUnwind);
197+
198+
// Create entry basic block and set insert point before visiting body.
199+
// The visitor's ExpStatement::visit may call Declaration_codegen for
200+
// static symbols, which may need an active IR insert point.
201+
llvm::BasicBlock *entryBB =
202+
llvm::BasicBlock::Create(gIR->context(), "entry", func);
203+
204+
// Save current insert point and switch to new function
205+
llvm::IRBuilderBase::InsertPoint savedIP = gIR->ir->saveIP();
206+
gIR->ir->SetInsertPoint(entryBB);
207+
208+
// Clear the nakedAsm stream and collect the function body
209+
std::ostringstream &asmstr = gIR->nakedAsm;
210+
asmstr.str("");
211+
212+
// Use the visitor to collect asm statements into nakedAsm
213+
ToNakedIRVisitor visitor(gIR);
214+
fd->fbody->accept(&visitor);
226215

227-
// We could have generated new errors in toNakedIR(), but we are in codegen
228-
// already so we have to abort here.
229216
if (global.errors) {
230217
fatal();
231218
}
232219

233-
// emit size after body
234-
// llvm does this on linux, but not on osx or Win
235-
if (!(isWin || isDarwin)) {
236-
asmstr << "\t.size\t" << mangle << ", .-" << mangle << std::endl
237-
<< std::endl;
220+
// Get the collected asm string and escape $ characters for LLVM inline asm.
221+
// In LLVM inline asm, $N refers to operand N, so literal $ must be escaped as $$.
222+
std::string asmBody;
223+
{
224+
std::string raw = asmstr.str();
225+
asmBody.reserve(raw.size() * 2); // Worst case: all $ characters
226+
for (char c : raw) {
227+
if (c == '$') {
228+
asmBody += "$$";
229+
} else {
230+
asmBody += c;
231+
}
232+
}
238233
}
234+
asmstr.str(""); // Clear for potential reuse
239235

240-
gIR->module.appendModuleInlineAsm(asmstr.str());
241-
asmstr.str("");
236+
// Create inline asm - the entire function body is a single asm block
237+
// No constraints needed since naked functions handle everything in asm
238+
llvm::FunctionType *asmFuncType =
239+
llvm::FunctionType::get(llvm::Type::getVoidTy(gIR->context()), false);
240+
241+
llvm::InlineAsm *inlineAsm = llvm::InlineAsm::get(
242+
asmFuncType,
243+
asmBody,
244+
"", // No constraints
245+
true, // Has side effects
246+
false, // Not align stack
247+
llvm::InlineAsm::AD_ATT // AT&T syntax
248+
);
242249

250+
gIR->ir->CreateCall(inlineAsm);
251+
252+
// Naked functions don't return normally through LLVM IR
253+
gIR->ir->CreateUnreachable();
254+
255+
// Restore insert point
256+
gIR->ir->restoreIP(savedIP);
257+
258+
// Handle DLL export on Windows
243259
if (global.params.dllexport ||
244-
(global.params.targetTriple->isOSWindows() && fd->isExport())) {
245-
// Embed a linker switch telling the MS linker to export the naked function.
246-
// This mimics the effect of the dllexport attribute for regular functions.
247-
const auto linkerSwitch = std::string("/EXPORT:") + mangle;
248-
gIR->addLinkerOption(llvm::StringRef(linkerSwitch));
260+
(triple.isOSWindows() && fd->isExport())) {
261+
func->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
249262
}
250263
}
251264

@@ -436,7 +449,7 @@ DValue *DtoInlineAsmExpr(Loc loc, FuncDeclaration *fd,
436449
LLSmallVector<LLValue *, 8> operands;
437450
LLSmallVector<LLType *, 8> indirectTypes;
438451
operands.reserve(n);
439-
452+
440453
Type *returnType = fd->type->nextOf();
441454
const size_t cisize = constraintInfo.size();
442455
const size_t minRequired = n + (returnType->ty == TY::Tvoid ? 0 : 1);
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
// Tests corner cases for naked functions with DMD-style inline asm.
2+
//
3+
// This tests:
4+
// 1. Stack manipulation (push/pop)
5+
// 2. Forward and backward jumps
6+
// 3. Nested labels
7+
// 4. Naked function calling convention
8+
9+
// REQUIRES: target_X86
10+
11+
// RUN: %ldc -mtriple=x86_64-linux-gnu -O0 -output-s -of=%t.s %s
12+
// RUN: FileCheck %s --check-prefix=ASM < %t.s
13+
14+
// RUN: %ldc -mtriple=x86_64-linux-gnu -O0 -run %s
15+
16+
module naked_asm_corner_cases;
17+
18+
// Test 1: Stack manipulation with push/pop
19+
// ASM-LABEL: stackManipulation:
20+
// ASM-NOT: pushq %rbp
21+
// ASM-NOT: movq %rsp, %rbp
22+
// ASM: pushq %rbx
23+
// ASM: movl $42, %eax
24+
// ASM: movl %eax, %ebx
25+
// ASM: movl %ebx, %eax
26+
// ASM: popq %rbx
27+
// ASM: retq
28+
extern(C) int stackManipulation() {
29+
asm { naked; }
30+
asm {
31+
push RBX; // Save callee-saved register
32+
mov EAX, 42;
33+
mov EBX, EAX; // Use the saved register
34+
mov EAX, EBX;
35+
pop RBX; // Restore
36+
ret;
37+
}
38+
}
39+
40+
// Test 2: Forward jump (jump to label defined later)
41+
// ASM-LABEL: forwardJump:
42+
// ASM: jmp .LforwardJump_skip
43+
// ASM: .LforwardJump_skip:
44+
// ASM: retq
45+
extern(C) int forwardJump() {
46+
asm { naked; }
47+
asm {
48+
mov EAX, 1;
49+
jmp skip; // Forward jump
50+
mov EAX, 0; // Should be skipped
51+
skip:
52+
ret;
53+
}
54+
}
55+
56+
// Test 3: Backward jump (loop)
57+
// ASM-LABEL: backwardJump:
58+
// ASM: .LbackwardJump_again:
59+
// ASM: incl %eax
60+
// ASM: cmpl $5, %eax
61+
// ASM: jl .LbackwardJump_again
62+
extern(C) int backwardJump() {
63+
asm { naked; }
64+
asm {
65+
xor EAX, EAX;
66+
again:
67+
inc EAX;
68+
cmp EAX, 5;
69+
jl again; // Backward jump
70+
ret;
71+
}
72+
}
73+
74+
// Test 4: Multiple control flow paths
75+
// ASM-LABEL: multiPath:
76+
// ASM: .LmultiPath_path1:
77+
// ASM: .LmultiPath_path2:
78+
// ASM: .LmultiPath_done:
79+
extern(C) int multiPath(int x) {
80+
asm { naked; }
81+
version(D_InlineAsm_X86_64) asm {
82+
// x is in EDI on SysV ABI
83+
test EDI, EDI;
84+
jz path1;
85+
jmp path2;
86+
path1:
87+
mov EAX, 10;
88+
jmp done;
89+
path2:
90+
mov EAX, 20;
91+
done:
92+
ret;
93+
}
94+
}
95+
96+
// Test 5: Naked function with static variable declaration (triggers Declaration_codegen)
97+
// This tests that static declarations inside naked functions work correctly.
98+
// The visitor's ExpStatement::visit calls Declaration_codegen for these,
99+
// which requires an active IR insert point.
100+
// ASM-LABEL: nakedWithStaticDecl:
101+
// ASM: movl $42, %eax
102+
// ASM: retq
103+
extern(C) int nakedWithStaticDecl() {
104+
// Static variable declaration - triggers Declaration_codegen in visitor
105+
static immutable int staticVal = 42;
106+
asm { naked; }
107+
asm {
108+
mov EAX, 42; // Use literal value since asm can't reference D variables
109+
ret;
110+
}
111+
}
112+
113+
// Test 6: Runtime verification
114+
void main() {
115+
// Verify stack manipulation works
116+
assert(stackManipulation() == 42, "stackManipulation failed");
117+
118+
// Verify forward jump works
119+
assert(forwardJump() == 1, "forwardJump failed");
120+
121+
// Verify backward jump (loop) works
122+
assert(backwardJump() == 5, "backwardJump failed");
123+
124+
// Verify multi-path control flow
125+
assert(multiPath(0) == 10, "multiPath(0) failed");
126+
assert(multiPath(1) == 20, "multiPath(1) failed");
127+
128+
// Verify naked function with static declaration works
129+
assert(nakedWithStaticDecl() == 42, "nakedWithStaticDecl failed");
130+
}

0 commit comments

Comments
 (0)