Skip to content
5 changes: 3 additions & 2 deletions lib/caotral.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def compile!(input:, assembler: "as", linker: "ld", output: "tmp", debug: false,
execf = "#{basename}#{File.extname(d)}"
compile(input:, output: basename+".s", debug:, shared:)
assemble(input: basename+".s", output: basename+".o", assembler:, debug:, shared:)
link(input: basename+".o", output: execf, linker:, debug:, shared:)
link(input: [basename+".o"], output: execf, linker:, debug:, shared:)
end
def compile(input:, output: "tmp.s", debug: false, shared: false)
Caotral::Compiler.compile!(input:, output:, debug:)
Expand All @@ -23,6 +23,7 @@ def assemble(input:, output: "tmp.o", debug: false, shared: false, assembler: "a
Caotral::Assembler.assemble!(input:, output:, debug:, assembler:, shared:)
end
def link(input:, output: "tmp", linker: "ld", debug: false, shared: false)
Caotral::Linker.link!(input:, output:, linker:, debug:, shared:)
inputs = Array === input ? input : [input]
Caotral::Linker.link!(inputs:, output:, linker:, debug:, shared:)
end
end
1 change: 1 addition & 0 deletions lib/caotral/binary/elf.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def find_by_name(section_name) = @sections.find { |s| section_name == s.section_
def select_by_name(section_name) = @sections.select { |s| section_name == s.section_name }
def index(section_name) = @sections.index { |s| section_name == s.section_name }
def select_by_names(section_names) = @sections.select { |section| section_names.any? { |name| name === section.section_name.to_s } }
def without_sections(names) = @sections.reject { |s| names.any? { |name| name === s.section_name.to_s } }
end
end
end
4 changes: 2 additions & 2 deletions lib/caotral/binary/elf/section/rel.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def initialize(addend: true)
def set!(offset: nil, info: nil, addend: nil)
@offset = num2bytes(offset, 8) if check(offset, 8)
@info = num2bytes(info, 8) if check(info, 8)
@addend = num2bytes(addend, 8) if check(addend, 8)
@addend = [addend].pack("q<").unpack("C*") if check(addend, 8)
self
end

Expand All @@ -23,7 +23,7 @@ def offset = @offset.pack("C*").unpack1("Q<")
def info = @info.pack("C*").unpack1("Q<")
def addend
raise "No addend field in this REL entry" unless addend?
@addend.pack("C*").unpack1("Q<")
@addend.pack("C*").unpack1("q<")
end
def sym = @info.pack("C*").unpack1("Q<") >> 32
def type = @info.pack("C*").unpack1("Q<") & 0xffffffff
Expand Down
3 changes: 3 additions & 0 deletions lib/caotral/binary/elf/section/symtab.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ def set!(name: nil, info: nil, other: nil, shndx: nil, value: nil, size: nil)
def name_offset = @name.pack("C*").unpack1("L<")
def value = @value.pack("C*").unpack1("Q<")
def info = @info.pack("C*").unpack1("C")
def shndx = @shndx.pack("C*").unpack1("S<")
def bind = info >> 4
def type = info & 0x0f

private def bytes = [@name, @info, @other, @shndx, @value, @size]
end
Expand Down
21 changes: 11 additions & 10 deletions lib/caotral/linker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,21 @@

module Caotral
class Linker
def self.link!(input:, output: "a.out", linker: "mold", debug: false, shared: false) = new(input:, output:, linker:, debug:, shared:).link
def self.link!(inputs:, output: "a.out", linker: "mold", debug: false, shared: false) = new(inputs:, output:, linker:, debug:, shared:).link

def initialize(input:, output: "a.out", linker: "mold", linker_options: [], shared: false, debug: false)
@input, @output, @linker = input, output, linker
def initialize(inputs:, output: "a.out", linker: "mold", linker_options: [], shared: false, debug: false)
@inputs, @output, @linker = inputs, output, linker
@options = linker_options
@debug, @shared = debug, shared
end

def link(input: @input, output: @output, debug: @debug, shared: @shared)
return to_elf(input:, output:, debug:) if @linker == "self"
def link(inputs: @inputs, output: @output, debug: @debug, shared: @shared)
return to_elf(inputs:, output:, debug:) if @linker == "self"

IO.popen(link_command).close
end

def link_command(input: @input, output: @output, debug: @debug, shared: @shared)
def link_command(inputs: @inputs, output: @output)
ld_path = []

if @shared
Expand All @@ -39,18 +39,19 @@ def link_command(input: @input, output: @output, debug: @debug, shared: @shared)

ld_path << "#{libpath}/libc.so"
ld_path << "#{libpath}/crtn.o"
cmd = [@linker, "-o", @output, "-m", "elf_x86_64", *@options, *ld_path, @input].join(' ')
cmd = [@linker, "-o", @output, "-m", "elf_x86_64", *@options, *ld_path, *inputs].join(' ')
puts cmd if @debug
cmd
end

def libpath = @libpath ||= File.dirname(Dir.glob("/usr/lib*/**/crti.o").last)
def gcc_libpath = @gcc_libpath ||= File.dirname(Dir.glob("/usr/lib/gcc/x86_64-*/*/crtbegin.o").last)

def to_elf(input: @input, output: @output, debug: @debug)
elf_obj = Caotral::Binary::ELF::Reader.new(input:, debug:).read
builder = Caotral::Linker::Builder.new(elf_obj:)
def to_elf(inputs: @inputs, output: @output, debug: @debug)
elf_objs = inputs.map { |input| Caotral::Binary::ELF::Reader.new(input:, debug:).read }
builder = Caotral::Linker::Builder.new(elf_objs:)
builder.resolve_symbols
elf_obj = builder.build
Caotral::Linker::Writer.new(elf_obj:, output:, debug:).write
end
end
Expand Down
255 changes: 244 additions & 11 deletions lib/caotral/linker/builder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,260 @@
module Caotral
class Linker
class Builder
include Caotral::Binary::ELF::Utils
R_X86_64_PC32 = 2
R_X86_64_PLT32 = 4
SYMTAB_BIND = { locals: 0, globals: 1, weaks: 2, }.freeze
BIND_BY_VALUE = SYMTAB_BIND.invert.freeze
attr_reader :symbols
RELOCATION_SECTION_NAMES = [".rela.text", ".rel.text"].freeze
ALLOW_RELOCATION_TYPES = [R_X86_64_PC32, R_X86_64_PLT32].freeze

def initialize(elf_obj:)
@elf_obj = elf_obj
attr_reader :symbols, :executable, :debug

def initialize(elf_objs:, executable: true, debug: false)
@elf_objs = elf_objs
@symbols = { locals: Set.new, globals: Set.new, weaks: Set.new }
@executable, @debug = executable, debug
end

def build
raise Caotral::Binary::ELF::Error, "no ELF objects to link" if @elf_objs.empty?
elf = Caotral::Binary::ELF.new
elf_obj = @elf_objs.first
null_section = Caotral::Binary::ELF::Section.new(
body: nil,
section_name: "",
header: Caotral::Binary::ELF::SectionHeader.new
)
text_section = Caotral::Binary::ELF::Section.new(
body: String.new,
section_name: ".text",
header: Caotral::Binary::ELF::SectionHeader.new
)
strtab_section = Caotral::Binary::ELF::Section.new(
body: Caotral::Binary::ELF::Section::Strtab.new("\0".b),
section_name: ".strtab",
header: Caotral::Binary::ELF::SectionHeader.new
)
symtab_section = Caotral::Binary::ELF::Section.new(
body: [],
section_name: ".symtab",
header: Caotral::Binary::ELF::SectionHeader.new
)
shstrtab_section = Caotral::Binary::ELF::Section.new(
body: Caotral::Binary::ELF::Section::Strtab.new("\0".b),
section_name: ".shstrtab",
header: Caotral::Binary::ELF::SectionHeader.new
)
start_bytes = [0xe8, *[0] * 4, 0x48, 0x89, 0xc7, 0x48, 0xc7, 0xc0, 0x3c, 0x00, 0x00, 0x00, 0x0f, 0x05]
exec_text_offset = 0x1000
base_addr = 0x400000
vaddr = base_addr + exec_text_offset
start_len = start_bytes.length
sections = []
rel_sections = []
elf.header = elf_obj.header.dup
strtab_names = []
text_offsets = {}
text_offset = 0
sym_by_elf = Hash.new { |h, k| h[k] = [] }
@elf_objs.each do |elf_obj|
text = elf_obj.find_by_name(".text")
unless text.nil?
text_section.body << text.body
text_offsets[elf_obj.object_id] = text_offset
size = text.body.bytesize
text_offset += size
end
strtab = elf_obj.find_by_name(".strtab")
strtab.body.names.split("\0").each { |name| strtab_names << name } unless strtab.nil?
symtab = elf_obj.find_by_name(".symtab")
base_index = nil
unless symtab.nil?
base_index = symtab_section.body.size
symtab.body.each_with_index do |st, index|
sym = Caotral::Binary::ELF::Section::Symtab.new
name, info, other, shndx, value, size = st.build.unpack("L<CCS<Q<Q<")
sym_by_elf[elf_obj] << sym
value += text_offsets.fetch(elf_obj.object_id, 0) if shndx != 0
sym.set!(name:, info:, other:, shndx:, value:, size:)
sym.name_string = strtab.body.lookup(name) unless strtab.nil?
symtab_section.body << sym
end
end
rels = elf_obj.select_by_names(RELOCATION_SECTION_NAMES).map do |section|
rel_section = Caotral::Binary::ELF::Section.new(
body: [],
section_name: section.section_name,
header: Caotral::Binary::ELF::SectionHeader.new
)
section.body.each do |rel|
offset = rel.offset + text_offsets.fetch(elf_obj.object_id, 0)
addend = rel.addend? ? rel.addend : nil
new_rel = Caotral::Binary::ELF::Section::Rel.new(addend: rel.addend?)
sym = base_index.nil? ? rel.sym : base_index + rel.sym
info = (sym << 32) | rel.type
new_rel.set!(offset:, info:, addend:)
rel_section.body << new_rel
end
rel_section
end
rel_sections += rels
end
strtab_section.body.names = strtab_names.to_a.sort.join("\0") + "\0"
sections << null_section

main_sym = symtab_section.body.find { |sym| sym.name_string == "main" }
raise Caotral::Binary::ELF::Error, "main function not found" if executable && main_sym.nil?
main_offset = main_sym.nil? ? 0 : main_sym.value + start_len
start_bytes[1, 4] = num2bytes((main_offset - 5), 4)
text_section.body.prepend(start_bytes.pack("C*"))

text_section.header.set!(
type: 1,
flags: 6,
addr: vaddr,
offset: exec_text_offset,
size: text_section.body.bytesize,
addralign: 16
)

sections << text_section
strtab_section.header.set!(type: 3, flags: 0, addralign: 1, entsize: 0)
sections << strtab_section
symtab_section.body.each do |sym|
next if sym.shndx == 0
name = strtab_section.body.offset_of(sym.name_string)
value = sym.value + start_len
sym.set!(name:, value:)
end

old_syms = symtab_section.body.dup
symtab_section.body.sort_by! { |sym| sym.info >> 4 }
local_count = symtab_section.body.count { |sym| (sym.info >> 4) == SYMTAB_BIND[:locals] }

symtab_section.header.set!(
type: 2,
flags: 0,
link: elf.sections.index(strtab_section),
info: local_count,
addralign: 8,
entsize: 24
)

sections << symtab_section

rel_sections.each { |s| sections << s.dup }

shstrtab_section.header.set!(
type: 3,
flags: 0,
addralign: 1,
entsize: 0
)

@elf_objs.first.without_sections([".text", ".strtab", ".symtab", ".shstrtab", /\.rela?\./]).each do |section|
sections << section.dup
end

sections << shstrtab_section

shstrtab_section_names = [*sections.map(&:section_name), "\0"].join("\0")
shstrtab_section.body.names = shstrtab_section_names

section_map = Hash.new { |h, k| h[k] = {} }
@elf_objs.each do |elf_obj|
elf_obj.sections.each_with_index do |section, index|
newndx = sections.index { |s| s.section_name == section.section_name }
section_map[elf_obj][index] = newndx unless newndx.nil?
end
end

resolved_index = {}
symtab_section.body.each_with_index do |sym, index|
name = sym.name_string
next if name.empty? || sym.shndx == 0 || sym.bind != 1
resolved_index[name] ||= index
end

sym_by_elf.each do |elf_obj, syms|
syms.each do |sym|
next if sym.shndx == 0
shndx = section_map[elf_obj][sym.shndx]
sym.set!(shndx:)
end
end

rel_sections.each do |rel_section|
rel_section.body.each do |rel|
orig_sym = old_syms[rel.sym]
next if orig_sym.nil?
name = orig_sym.name_string
new_index = resolved_index[name]
next if new_index.nil?
rel.set!(info: (new_index << 32) | rel.type)
end

rel_section.header.set!(
type: rel_type(rel_section),
flags: 0,
link: sections.index(symtab_section),
info: ref_index(sections, rel_section.section_name),
addralign: 8,
entsize: rel_entsize(rel_section)
)
end

rel_sections.each do |rel|
target = sections[rel.header.info]
bytes = target.body.dup
symtab_body = symtab_section.body
rel.body.each do |entry|
next unless ALLOW_RELOCATION_TYPES.include?(entry.type)
sym = symtab_body[entry.sym]
next if sym.nil? || sym.shndx == 0
target_addr = target == text_section ? vaddr : target.header.addr
sym_addr = sym.shndx >= 0xff00 ? sym.value : sections[sym.shndx].then { |st| st.header.addr + sym.value }
sym_offset = entry.offset + start_len
sym_addend = entry.addend? ? entry.addend : bytes[sym_offset, 4].unpack1("l<")
value = sym_addr + sym_addend - (target_addr + sym_offset)
bytes[sym_offset, 4] = [value].pack("l<")
end
target.body = bytes
end

sections = sections.reject { |section| RELOCATION_SECTION_NAMES.any? { |name| name === section.section_name.to_s } } if executable
sections.each { |section| elf.sections << section }

elf
end

def resolve_symbols
@elf_obj.find_by_name(".symtab").body.each do |symtab|
name = symtab.name_string
next if name.empty?
info = symtab.info
bind = BIND_BY_VALUE.fetch(info >> 4)
if bind == :globals && @symbols[bind].include?(name)
raise Caotral::Binary::ELF::Error,"cannot add into globals: #{name}"
@elf_objs.each do |elf_obj|
elf_obj.find_by_name(".symtab").body.each do |symtab|
name = symtab.name_string
next if name.empty?
info = symtab.info
bind = BIND_BY_VALUE.fetch(info >> 4)
if bind == :globals && @symbols[bind].include?(name) && symtab.shndx != 0
raise Caotral::Binary::ELF::Error,"cannot add into globals: #{name}"
end
@symbols[bind] << name
end
@symbols[bind] << name
end
@symbols
end

private
def ref_index(sections, section_name)
raise Caotral::Binary::ELF::Error, "invalid section name: #{section_name}" if section_name.nil?
ref_names = "." + section_name.split(".").filter { |sn| !sn.empty? && sn != "rel" && sn != "rela" }.join(".")
ref = sections.find { |s| ref_names === s.section_name.to_s }
sections.index(ref)
end

def rel_type(section) = section.section_name&.start_with?(".rela.") ? 4 : 9
def rel_entsize(section) = section.section_name&.start_with?(".rela.") ? 24 : 16
end
end
end
Loading