Skip to content

Commit 57ebbc8

Browse files
authored
Merge pull request #1694 from Shaikh-Ubaid/elf_structs
X64: Use Struct for ELF Ehdr and Phdr headers
2 parents f6bcbd2 + 1a99f9a commit 57ebbc8

File tree

4 files changed

+279
-99
lines changed

4 files changed

+279
-99
lines changed

src/libasr/codegen/asr_to_wasm.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <chrono>
44
#include <iomanip>
55
#include <fstream>
6+
#include <climits>
67

78
#include <libasr/asr.h>
89
#include <libasr/exception.h>
@@ -526,7 +527,7 @@ class ASRToWASMVisitor : public ASR::BaseVisitor<ASRToWASMVisitor> {
526527

527528
using namespace wasm;
528529
int kind = ASRUtils::extract_kind_from_ttype_t(v->m_type);
529-
uint32_t global_var_idx = -1;
530+
uint32_t global_var_idx = UINT_MAX;
530531
switch (v->m_type->type){
531532
case ASR::ttypeType::Integer: {
532533
uint64_t init_val = 0;
@@ -583,7 +584,7 @@ class ASRToWASMVisitor : public ASR::BaseVisitor<ASRToWASMVisitor> {
583584
global_var_idx = m_wa.declare_global_var(i32, 0);
584585
}
585586
}
586-
LCOMPILERS_ASSERT(global_var_idx >= 0);
587+
LCOMPILERS_ASSERT(global_var_idx < UINT_MAX);
587588
m_global_var_idx_map[get_hash((ASR::asr_t *)v)] = global_var_idx;
588589
}
589590

src/libasr/codegen/wasm_to_x64.cpp

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -581,8 +581,6 @@ class X64Visitor : public WASMDecoder<X64Visitor>,
581581
void visit_F32Sqrt() { visit_F64Sqrt(); }
582582

583583
void gen_x64_bytes() {
584-
emit_elf64_header(m_a);
585-
586584
// declare compile-time strings
587585
std::string base_memory = " "; /* in wasm backend, memory starts after 4 bytes*/
588586
for (uint32_t i = 0; i < data_segments.size(); i++) {
@@ -592,7 +590,6 @@ class X64Visitor : public WASMDecoder<X64Visitor>,
592590

593591
NO_OF_IMPORTS = imports.size();
594592

595-
m_a.align_by_byte(0x1000);
596593
m_a.add_label("text_segment_start");
597594
for (uint32_t idx = 0; idx < type_indices.size(); idx++) {
598595
m_a.add_label(exports[idx + 1].name);
@@ -616,9 +613,9 @@ class X64Visitor : public WASMDecoder<X64Visitor>,
616613
emit_double_const(m_a, d.first, d.second);
617614
}
618615

616+
m_a.align_by_byte(0x1000);
619617
m_a.add_label("text_segment_end");
620618

621-
m_a.align_by_byte(0x1000);
622619
m_a.add_label("data_segment_start");
623620
for (auto &s : label_to_str) {
624621
emit_data_string(m_a, s.first, s.second);
@@ -647,8 +644,6 @@ class X64Visitor : public WASMDecoder<X64Visitor>,
647644
}
648645
}
649646
m_a.add_label("data_segment_end");
650-
651-
emit_elf64_footer(m_a);
652647
}
653648
};
654649

@@ -700,15 +695,15 @@ Result<int> wasm_to_x64(Vec<uint8_t> &wasm_bytes, Allocator &al,
700695

701696
{
702697
auto t1 = std::chrono::high_resolution_clock::now();
703-
m_a.save_binary(filename);
698+
m_a.save_binary64(filename);
704699
auto t2 = std::chrono::high_resolution_clock::now();
705700
time_save =
706701
std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1)
707702
.count();
708703
}
709704

710705
//! Helpful for debugging
711-
// std::cout << x64_visitor.m_a.get_asm() << std::endl;
706+
// std::cout << x64_visitor.m_a.get_asm64() << std::endl;
712707

713708
if (time_report) {
714709
std::cout << "Codegen Time report:" << std::endl;

src/libasr/codegen/x86_assembler.cpp

Lines changed: 179 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,25 @@
1010

1111
namespace LCompilers {
1212

13+
void X86Assembler::save_binary64(const std::string &filename) {
14+
Vec<uint8_t> header = create_elf64_x86_header(
15+
m_al, origin(), get_defined_symbol("_start").value,
16+
compute_seg_size("text_segment_start", "text_segment_end"),
17+
compute_seg_size("data_segment_start", "data_segment_end"));
18+
{
19+
std::ofstream out;
20+
out.open(filename);
21+
out.write((const char*) header.p, header.size());
22+
out.write((const char*) m_code.p, m_code.size());
23+
}
24+
#ifdef LFORTRAN_LINUX
25+
int mod = 0755;
26+
if (chmod(filename.c_str(),mod) < 0) {
27+
throw AssemblerError("chmod failed");
28+
}
29+
#endif
30+
}
31+
1332
void X86Assembler::save_binary(const std::string &filename) {
1433
{
1534
std::ofstream out;
@@ -25,6 +44,36 @@ void X86Assembler::save_binary(const std::string &filename) {
2544
#endif
2645
}
2746

47+
// ELF header structure for 32-bit
48+
struct Elf32_Ehdr {
49+
uint8_t ident[16];
50+
uint16_t type;
51+
uint16_t machine;
52+
uint32_t version;
53+
uint32_t entry;
54+
uint32_t phoff;
55+
uint32_t shoff;
56+
uint32_t flags;
57+
uint16_t ehsize;
58+
uint16_t phentsize;
59+
uint16_t phnum;
60+
uint16_t shentsize;
61+
uint16_t shnum;
62+
uint16_t shstrndx;
63+
};
64+
65+
// Program header structure for 32-bit
66+
struct Elf32_Phdr {
67+
uint32_t type;
68+
uint32_t offset;
69+
uint32_t vaddr;
70+
uint32_t paddr;
71+
uint32_t filesz;
72+
uint32_t memsz;
73+
uint32_t flags;
74+
uint32_t align;
75+
};
76+
2877
void emit_elf32_header(X86Assembler &a, uint32_t p_flags) {
2978
/* Elf32_Ehdr */
3079
a.add_label("ehdr");
@@ -283,94 +332,142 @@ void emit_print_float(X86Assembler &a, const std::string &name) {
283332

284333
/************************* 64-bit functions **************************/
285334

286-
void emit_elf64_header(X86Assembler &a) {
287-
/* Elf64_Ehdr */
288-
a.add_label("ehdr");
289-
// e_ident
290-
a.asm_db_imm8(0x7F);
291-
a.asm_db_imm8('E');
292-
a.asm_db_imm8('L');
293-
a.asm_db_imm8('F');
294-
a.asm_db_imm8(2);
295-
a.asm_db_imm8(1);
296-
a.asm_db_imm8(1);
297-
a.asm_db_imm8(0);
335+
// ELF header structure for 64-bit
336+
struct Elf64_Ehdr {
337+
uint8_t ident[16];
338+
uint16_t type;
339+
uint16_t machine;
340+
uint32_t version;
341+
uint64_t entry;
342+
uint64_t phoff;
343+
uint64_t shoff;
344+
uint32_t flags;
345+
uint16_t ehsize;
346+
uint16_t phentsize;
347+
uint16_t phnum;
348+
uint16_t shentsize;
349+
uint16_t shnum;
350+
uint16_t shstrndx;
351+
};
352+
353+
// Program header structure for 64-bit
354+
struct Elf64_Phdr {
355+
uint32_t type;
356+
uint32_t flags;
357+
uint64_t offset;
358+
uint64_t vaddr;
359+
uint64_t paddr;
360+
uint64_t filesz;
361+
uint64_t memsz;
362+
uint64_t align;
363+
};
364+
365+
Elf64_Ehdr get_elf_header(uint64_t asm_entry) {
366+
Elf64_Ehdr e;
367+
e.ident[0] = 0x7f; // magic number
368+
e.ident[1] = 'E';
369+
e.ident[2] = 'L';
370+
e.ident[3] = 'F';
371+
e.ident[4] = 2; // file class (64-bit)
372+
e.ident[5] = 1; // data encoding (little endian)
373+
e.ident[6] = 1; // ELF version
374+
e.ident[7] = 0; // padding
375+
e.ident[8] = 0;
376+
e.ident[9] = 0;
377+
e.ident[10] = 0;
378+
e.ident[11] = 0;
379+
e.ident[12] = 0;
380+
e.ident[13] = 0;
381+
e.ident[14] = 0;
382+
e.ident[15] = 0;
383+
e.type = 2;
384+
e.machine = 0x3e;
385+
e.version = 1;
386+
e.entry = asm_entry;
387+
e.phoff = sizeof(Elf64_Ehdr);
388+
e.shoff = 0;
389+
e.flags = 0;
390+
e.ehsize = sizeof(Elf64_Ehdr);
391+
e.phentsize = sizeof(Elf64_Phdr);
392+
e.phnum = 3;
393+
e.shentsize = 0;
394+
e.shnum = 0;
395+
e.shstrndx = 0;
396+
return e;
397+
}
298398

299-
a.asm_db_imm8(0);
300-
a.asm_db_imm8(0);
301-
a.asm_db_imm8(0);
302-
a.asm_db_imm8(0);
399+
Elf64_Phdr get_seg_header(uint32_t flags, uint64_t origin_addr,
400+
uint64_t seg_size, uint64_t prev_seg_offset, uint64_t prev_seg_size) {
401+
Elf64_Phdr p;
402+
p.type = 1;
403+
p.flags = flags;
404+
p.offset = prev_seg_offset + prev_seg_size;
405+
p.vaddr = origin_addr + p.offset;
406+
p.paddr = p.vaddr;
407+
p.filesz = seg_size;
408+
p.memsz = p.filesz;
409+
p.align = 0x1000;
410+
return p;
411+
}
303412

304-
a.asm_db_imm8(0);
305-
a.asm_db_imm8(0);
306-
a.asm_db_imm8(0);
307-
a.asm_db_imm8(0);
413+
template <typename T>
414+
void append_header_bytes(Allocator &al, T src, Vec<uint8_t> &des) {
415+
char *byteArray = (char *)&src;
416+
for (size_t i = 0; i < sizeof(src); i++) {
417+
des.push_back(al, byteArray[i]);
418+
}
419+
}
308420

309-
a.asm_dw_imm16(2); // e_type
310-
a.asm_dw_imm16(0x3e); // e_machine
311-
a.asm_dd_imm32(1); // e_version
312-
a.asm_dq_label("_start"); // e_entry
313-
a.asm_dq_label("e_phoff"); // e_phoff
314-
a.asm_dq_imm64(0); // e_shoff
315-
a.asm_dd_imm32(0); // e_flags
316-
a.asm_dw_label("ehdrsize"); // e_ehsize
317-
a.asm_dw_label("phdrsize"); // e_phentsize
318-
a.asm_dw_imm16(3); // e_phnum
319-
a.asm_dw_imm16(0); // e_shentsize
320-
a.asm_dw_imm16(0); // e_shnum
321-
a.asm_dw_imm16(0); // e_shstrndx
322421

323-
/* Elf64_Phdr */
324-
a.add_label("phdr");
325-
a.asm_dd_imm32(1); // p_type
326-
a.asm_dd_imm32(4); // p_flags (permission to read only)
327-
a.asm_dq_imm64(0); // p_offset
328-
a.asm_dq_imm64(a.origin()); // p_vaddr
329-
a.asm_dq_imm64(a.origin()); // p_paddr
330-
a.asm_dq_label("phdr_size"); // p_filesz
331-
a.asm_dq_label("phdr_size"); // p_memsz
332-
a.asm_dq_imm64(0x1000); // p_align
333-
334-
/* text_segment_phdr */
335-
a.add_label("text_phdr");
336-
a.asm_dd_imm32(1); // p_type
337-
a.asm_dd_imm32(5); // p_flags (permission to read and execute)
338-
a.asm_dq_label("text_segment_offset"); // p_offset
339-
a.asm_dq_label("text_segment_start"); // p_vaddr
340-
a.asm_dq_label("text_segment_start"); // p_paddr
341-
a.asm_dq_label("text_segment_size"); // p_filesz
342-
a.asm_dq_label("text_segment_size"); // p_memsz
343-
a.asm_dq_imm64(0x1000); // p_align
344-
345-
/* data_segment_phdr */
346-
a.add_label("data_phdr");
347-
a.asm_dd_imm32(1); // p_type
348-
a.asm_dd_imm32(6); // p_flags (permission to read and write)
349-
a.asm_dq_label("data_segment_offset"); // p_offset
350-
a.asm_dq_label("data_segment_start"); // p_vaddr
351-
a.asm_dq_label("data_segment_start"); // p_paddr
352-
a.asm_dq_label("data_segment_size"); // p_filesz
353-
a.asm_dq_label("data_segment_size"); // p_memsz
354-
a.asm_dq_imm64(0x1000); // p_align
355-
}
422+
void align_by_byte(Allocator &al, Vec<uint8_t> &code, uint64_t alignment) {
423+
uint64_t code_size = code.size() ;
424+
uint64_t padding_size = (alignment * ceil(code_size / (double)alignment)) - code_size;
425+
for (size_t i = 0; i < padding_size; i++) {
426+
code.push_back(al, 0);
427+
}
428+
}
356429

357-
void emit_elf64_footer(X86Assembler &a) {
358-
a.add_var("ehdrsize", "ehdr", "phdr");
359-
a.add_var("phdrsize", "phdr", "text_phdr");
360-
a.add_var64("e_phoff", "ehdr", "phdr");
361-
a.add_var64("phdr_size", "ehdr", "text_segment_start");
362-
a.add_var64("text_segment_offset", "ehdr", "text_segment_start");
363-
a.add_var64("text_segment_size", "text_segment_start", "text_segment_end");
364-
a.add_var64("data_segment_offset", "ehdr", "data_segment_start");
365-
a.add_var64("data_segment_size", "data_segment_start", "data_segment_end");
366-
}
430+
Vec<uint8_t> create_elf64_x86_header(Allocator &al, uint64_t origin, uint64_t entry,
431+
uint64_t text_seg_size, uint64_t data_seg_size) {
367432

368-
void emit_exit_64(X86Assembler &a, std::string name, int exit_code) {
369-
a.add_label(name);
370-
// void exit(int status);
371-
a.asm_mov_r64_imm64(LCompilers::X64Reg::rax, 60); // sys_exit
372-
a.asm_mov_r64_imm64(LCompilers::X64Reg::rdi, exit_code); // exit code
373-
a.asm_syscall(); // syscall
433+
/*
434+
The header segment is a segment which holds the elf and program headers.
435+
Its size currently is
436+
sizeof(Elf64_Ehdr) + 3 * sizeof(Elf64_Phdr)
437+
that is, 64 + 3 * 56 = 232
438+
Since, it is a segment, it needs to be aligned by boundary 0x1000
439+
(we add temporary zero bytes as padding to accomplish this alignment)
440+
441+
Thus, the header segment size for us currently is 0x1000.
442+
443+
For now, we are hardcoding this size here.
444+
445+
TODO: Later compute this header segment size dynamically depending
446+
on the different segments present
447+
*/
448+
const int HEADER_SEGMENT_SIZE = 0x1000;
449+
450+
// adjust/offset the origin address as per the extra bytes of HEADER_SEGMENT_SIZE
451+
uint64_t origin_addr = origin - HEADER_SEGMENT_SIZE;
452+
453+
Elf64_Ehdr e = get_elf_header(entry);
454+
Elf64_Phdr p_program = get_seg_header(4, origin_addr, HEADER_SEGMENT_SIZE, 0, 0);
455+
Elf64_Phdr p_text_seg = get_seg_header(5, origin_addr, text_seg_size, p_program.offset, p_program.filesz);
456+
Elf64_Phdr p_data_seg = get_seg_header(6, origin_addr, data_seg_size, p_text_seg.offset, p_text_seg.filesz);
457+
458+
Vec<uint8_t> header;
459+
header.reserve(al, HEADER_SEGMENT_SIZE);
460+
461+
{
462+
append_header_bytes(al, e, header);
463+
append_header_bytes(al, p_program, header);
464+
append_header_bytes(al, p_text_seg, header);
465+
append_header_bytes(al, p_data_seg, header);
466+
467+
LCompilers::align_by_byte(al, header, 0x1000);
468+
}
469+
470+
return header;
374471
}
375472

376473
void emit_print_64(X86Assembler &a, const std::string &msg_label, uint64_t size)

0 commit comments

Comments
 (0)