diff --git a/exe/yarv b/exe/yarv deleted file mode 100755 index 3efb23ff..00000000 --- a/exe/yarv +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -$:.unshift(File.expand_path("../lib", __dir__)) - -require "syntax_tree" - -# Require these here so that we can run binding.irb without having them require -# anything that we've already patched. -require "irb" -require "irb/completion" -require "irb/color_printer" -require "readline" - -# First, create an instance of our virtual machine. -events = - if ENV["DEBUG"] - SyntaxTree::YARV::VM::STDOUTEvents.new - else - SyntaxTree::YARV::VM::NullEvents.new - end - -vm = SyntaxTree::YARV::VM.new(events) - -# Next, set up a bunch of aliases for methods that we're going to hook into in -# order to set up our virtual machine. -class << Kernel - alias yarv_require require - alias yarv_require_relative require_relative - alias yarv_load load - alias yarv_eval eval - alias yarv_throw throw - alias yarv_catch catch -end - -# Next, patch the methods that we just aliased so that they use our virtual -# machine's versions instead. This allows us to load Ruby files and have them -# execute in our virtual machine instead of the runtime environment. -[Kernel, Kernel.singleton_class].each do |klass| - klass.define_method(:require) { |filepath| vm.require(filepath) } - - klass.define_method(:load) { |filepath| vm.load(filepath) } - - # klass.define_method(:require_relative) do |filepath| - # vm.require_relative(filepath) - # end - - # klass.define_method(:eval) do | - # source, - # binding = TOPLEVEL_BINDING, - # filename = "(eval)", - # lineno = 1 - # | - # vm.eval(source, binding, filename, lineno) - # end - - # klass.define_method(:throw) { |tag, value = nil| vm.throw(tag, value) } - - # klass.define_method(:catch) { |tag, &block| vm.catch(tag, &block) } -end - -# Finally, require the file that we want to execute. -vm.require_resolved(ARGV.shift) diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index 2c824f71..90fb7fe7 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -35,7 +35,6 @@ module SyntaxTree autoload :PrettyPrintVisitor, "syntax_tree/pretty_print_visitor" autoload :Search, "syntax_tree/search" autoload :WithScope, "syntax_tree/with_scope" - autoload :YARV, "syntax_tree/yarv" # This holds references to objects that respond to both #parse and #format # so that we can use them in the CLI. diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb deleted file mode 100644 index bd5c54b9..00000000 --- a/lib/syntax_tree/yarv.rb +++ /dev/null @@ -1,36 +0,0 @@ -# frozen_string_literal: true - -require "stringio" - -require_relative "yarv/basic_block" -require_relative "yarv/bf" -require_relative "yarv/calldata" -require_relative "yarv/compiler" -require_relative "yarv/control_flow_graph" -require_relative "yarv/data_flow_graph" -require_relative "yarv/decompiler" -require_relative "yarv/disassembler" -require_relative "yarv/instruction_sequence" -require_relative "yarv/instructions" -require_relative "yarv/legacy" -require_relative "yarv/local_table" -require_relative "yarv/sea_of_nodes" -require_relative "yarv/assembler" -require_relative "yarv/vm" - -module SyntaxTree - # This module provides an object representation of the YARV bytecode. - module YARV - # Compile the given source into a YARV instruction sequence. - def self.compile(source, options = Compiler::Options.new) - SyntaxTree.parse(source).accept(Compiler.new(options)) - end - - # Compile and interpret the given source. - def self.interpret(source, options = Compiler::Options.new) - iseq = RubyVM::InstructionSequence.compile(source, **options) - iseq = InstructionSequence.from(iseq.to_a) - VM.new.run_top_frame(iseq) - end - end -end diff --git a/lib/syntax_tree/yarv/assembler.rb b/lib/syntax_tree/yarv/assembler.rb deleted file mode 100644 index a48c58fd..00000000 --- a/lib/syntax_tree/yarv/assembler.rb +++ /dev/null @@ -1,462 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - module YARV - class Assembler - class ObjectVisitor < Compiler::RubyVisitor - def visit_dyna_symbol(node) - if node.parts.empty? - :"" - else - raise CompilationError - end - end - - def visit_string_literal(node) - case node.parts.length - when 0 - "" - when 1 - raise CompilationError unless node.parts.first.is_a?(TStringContent) - node.parts.first.value - else - raise CompilationError - end - end - end - - CALLDATA_FLAGS = { - "ARGS_SPLAT" => CallData::CALL_ARGS_SPLAT, - "ARGS_BLOCKARG" => CallData::CALL_ARGS_BLOCKARG, - "FCALL" => CallData::CALL_FCALL, - "VCALL" => CallData::CALL_VCALL, - "ARGS_SIMPLE" => CallData::CALL_ARGS_SIMPLE, - "KWARG" => CallData::CALL_KWARG, - "KW_SPLAT" => CallData::CALL_KW_SPLAT, - "TAILCALL" => CallData::CALL_TAILCALL, - "SUPER" => CallData::CALL_SUPER, - "ZSUPER" => CallData::CALL_ZSUPER, - "OPT_SEND" => CallData::CALL_OPT_SEND, - "KW_SPLAT_MUT" => CallData::CALL_KW_SPLAT_MUT - }.freeze - - DEFINED_TYPES = [ - nil, - "nil", - "instance-variable", - "local-variable", - "global-variable", - "class variable", - "constant", - "method", - "yield", - "super", - "self", - "true", - "false", - "assignment", - "expression", - "ref", - "func", - "constant-from" - ].freeze - - attr_reader :lines - - def initialize(lines) - @lines = lines - end - - def assemble - iseq = InstructionSequence.new("
", "", 1, :top) - assemble_iseq(iseq, lines) - - iseq.compile! - iseq - end - - def self.assemble(source) - new(source.lines(chomp: true)).assemble - end - - def self.assemble_file(filepath) - new(File.readlines(filepath, chomp: true)).assemble - end - - private - - def assemble_iseq(iseq, lines) - labels = Hash.new { |hash, name| hash[name] = iseq.label } - line_index = 0 - - while line_index < lines.length - line = lines[line_index] - line_index += 1 - - case line.strip - when "", /^;/ - # skip over blank lines and comments - next - when /^(\w+):$/ - # create labels - iseq.push(labels[$1]) - next - when /^__END__/ - # skip over the rest of the file when we hit __END__ - return - end - - insn, operands = line.split(" ", 2) - - case insn - when "adjuststack" - iseq.adjuststack(parse_number(operands)) - when "anytostring" - iseq.anytostring - when "branchif" - iseq.branchif(labels[operands]) - when "branchnil" - iseq.branchnil(labels[operands]) - when "branchunless" - iseq.branchunless(labels[operands]) - when "checkkeyword" - kwbits_index, keyword_index = operands.split(/,\s*/) - iseq.checkkeyword( - parse_number(kwbits_index), - parse_number(keyword_index) - ) - when "checkmatch" - iseq.checkmatch(parse_number(operands)) - when "checktype" - iseq.checktype(parse_number(operands)) - when "concatarray" - iseq.concatarray - when "concatstrings" - iseq.concatstrings(parse_number(operands)) - when "defineclass" - body = parse_nested(lines[line_index..]) - line_index += body.length - - name_value, flags_value = operands.split(/,\s*/) - name = parse_symbol(name_value) - flags = parse_number(flags_value) - - class_iseq = iseq.class_child_iseq(name.to_s, 1) - assemble_iseq(class_iseq, body) - iseq.defineclass(name, class_iseq, flags) - when "defined" - type, object, message = operands.split(/,\s*/) - iseq.defined( - DEFINED_TYPES.index(type), - parse_symbol(object), - parse_string(message) - ) - when "definemethod" - body = parse_nested(lines[line_index..]) - line_index += body.length - - name = parse_symbol(operands) - method_iseq = iseq.method_child_iseq(name.to_s, 1) - assemble_iseq(method_iseq, body) - - iseq.definemethod(name, method_iseq) - when "definesmethod" - body = parse_nested(lines[line_index..]) - line_index += body.length - - name = parse_symbol(operands) - method_iseq = iseq.method_child_iseq(name.to_s, 1) - - assemble_iseq(method_iseq, body) - iseq.definesmethod(name, method_iseq) - when "dup" - iseq.dup - when "dupn" - iseq.dupn(parse_number(operands)) - when "duparray" - iseq.duparray(parse_type(operands, Array)) - when "duphash" - iseq.duphash(parse_type(operands, Hash)) - when "expandarray" - number, flags = operands.split(/,\s*/) - iseq.expandarray(parse_number(number), parse_number(flags)) - when "getblockparam" - lookup = find_local(iseq, operands) - iseq.getblockparam(lookup.index, lookup.level) - when "getblockparamproxy" - lookup = find_local(iseq, operands) - iseq.getblockparamproxy(lookup.index, lookup.level) - when "getclassvariable" - iseq.getclassvariable(parse_symbol(operands)) - when "getconstant" - iseq.getconstant(parse_symbol(operands)) - when "getglobal" - iseq.getglobal(parse_symbol(operands)) - when "getinstancevariable" - iseq.getinstancevariable(parse_symbol(operands)) - when "getlocal" - lookup = find_local(iseq, operands) - iseq.getlocal(lookup.index, lookup.level) - when "getspecial" - key, type = operands.split(/,\s*/) - iseq.getspecial(parse_number(key), parse_number(type)) - when "intern" - iseq.intern - when "invokeblock" - iseq.invokeblock( - operands ? parse_calldata(operands) : YARV.calldata(nil, 0) - ) - when "invokesuper" - calldata = - if operands - parse_calldata(operands) - else - YARV.calldata( - nil, - 0, - CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE | - CallData::CALL_SUPER - ) - end - - block_iseq = - if lines[line_index].start_with?(" ") - body = parse_nested(lines[line_index..]) - line_index += body.length - - block_iseq = iseq.block_child_iseq(1) - assemble_iseq(block_iseq, body) - block_iseq - end - - iseq.invokesuper(calldata, block_iseq) - when "jump" - iseq.jump(labels[operands]) - when "leave" - iseq.leave - when "newarray" - iseq.newarray(parse_number(operands)) - when "newarraykwsplat" - iseq.newarraykwsplat(parse_number(operands)) - when "newhash" - iseq.newhash(parse_number(operands)) - when "newrange" - iseq.newrange(parse_options(operands, [0, 1])) - when "nop" - iseq.nop - when "objtostring" - iseq.objtostring(YARV.calldata(:to_s)) - when "once" - block_iseq = - if lines[line_index].start_with?(" ") - body = parse_nested(lines[line_index..]) - line_index += body.length - - block_iseq = iseq.block_child_iseq(1) - assemble_iseq(block_iseq, body) - block_iseq - end - - iseq.once(block_iseq, iseq.inline_storage) - when "opt_and" - iseq.send(YARV.calldata(:&, 1)) - when "opt_aref" - iseq.send(YARV.calldata(:[], 1)) - when "opt_aref_with" - iseq.opt_aref_with(parse_string(operands), YARV.calldata(:[], 1)) - when "opt_aset" - iseq.send(YARV.calldata(:[]=, 2)) - when "opt_aset_with" - iseq.opt_aset_with(parse_string(operands), YARV.calldata(:[]=, 2)) - when "opt_case_dispatch" - cdhash_value, else_label_value = operands.split(/\s*\},\s*/) - cdhash_value.sub!(/\A\{/, "") - - pairs = - cdhash_value - .split(/\s*,\s*/) - .map! { |pair| pair.split(/\s*=>\s*/) } - - cdhash = pairs.to_h { |value, nm| [parse(value), labels[nm]] } - else_label = labels[else_label_value] - - iseq.opt_case_dispatch(cdhash, else_label) - when "opt_div" - iseq.send(YARV.calldata(:/, 1)) - when "opt_empty_p" - iseq.send(YARV.calldata(:empty?)) - when "opt_eq" - iseq.send(YARV.calldata(:==, 1)) - when "opt_ge" - iseq.send(YARV.calldata(:>=, 1)) - when "opt_gt" - iseq.send(YARV.calldata(:>, 1)) - when "opt_getconstant_path" - iseq.opt_getconstant_path(parse_type(operands, Array)) - when "opt_le" - iseq.send(YARV.calldata(:<=, 1)) - when "opt_length" - iseq.send(YARV.calldata(:length)) - when "opt_lt" - iseq.send(YARV.calldata(:<, 1)) - when "opt_ltlt" - iseq.send(YARV.calldata(:<<, 1)) - when "opt_minus" - iseq.send(YARV.calldata(:-, 1)) - when "opt_mod" - iseq.send(YARV.calldata(:%, 1)) - when "opt_mult" - iseq.send(YARV.calldata(:*, 1)) - when "opt_neq" - iseq.send(YARV.calldata(:!=, 1)) - when "opt_newarray_max" - iseq.newarray(parse_number(operands)) - iseq.send(YARV.calldata(:max)) - when "opt_newarray_min" - iseq.newarray(parse_number(operands)) - iseq.send(YARV.calldata(:min)) - when "opt_nil_p" - iseq.send(YARV.calldata(:nil?)) - when "opt_not" - iseq.send(YARV.calldata(:!)) - when "opt_or" - iseq.send(YARV.calldata(:|, 1)) - when "opt_plus" - iseq.send(YARV.calldata(:+, 1)) - when "opt_regexpmatch2" - iseq.send(YARV.calldata(:=~, 1)) - when "opt_reverse" - iseq.send(YARV.calldata(:reverse)) - when "opt_send_without_block" - iseq.send(parse_calldata(operands)) - when "opt_size" - iseq.send(YARV.calldata(:size)) - when "opt_str_freeze" - iseq.putstring(parse_string(operands)) - iseq.send(YARV.calldata(:freeze)) - when "opt_str_uminus" - iseq.putstring(parse_string(operands)) - iseq.send(YARV.calldata(:-@)) - when "opt_succ" - iseq.send(YARV.calldata(:succ)) - when "pop" - iseq.pop - when "putnil" - iseq.putnil - when "putobject" - iseq.putobject(parse(operands)) - when "putself" - iseq.putself - when "putspecialobject" - iseq.putspecialobject(parse_options(operands, [1, 2, 3])) - when "putstring" - iseq.putstring(parse_string(operands)) - when "send" - block_iseq = - if lines[line_index].start_with?(" ") - body = parse_nested(lines[line_index..]) - line_index += body.length - - block_iseq = iseq.block_child_iseq(1) - assemble_iseq(block_iseq, body) - block_iseq - end - - iseq.send(parse_calldata(operands), block_iseq) - when "setblockparam" - lookup = find_local(iseq, operands) - iseq.setblockparam(lookup.index, lookup.level) - when "setconstant" - iseq.setconstant(parse_symbol(operands)) - when "setglobal" - iseq.setglobal(parse_symbol(operands)) - when "setlocal" - lookup = find_local(iseq, operands) - iseq.setlocal(lookup.index, lookup.level) - when "setn" - iseq.setn(parse_number(operands)) - when "setclassvariable" - iseq.setclassvariable(parse_symbol(operands)) - when "setinstancevariable" - iseq.setinstancevariable(parse_symbol(operands)) - when "setspecial" - iseq.setspecial(parse_number(operands)) - when "splatarray" - iseq.splatarray(parse_options(operands, [true, false])) - when "swap" - iseq.swap - when "throw" - iseq.throw(parse_number(operands)) - when "topn" - iseq.topn(parse_number(operands)) - when "toregexp" - options, length = operands.split(", ") - iseq.toregexp(parse_number(options), parse_number(length)) - when "ARG_REQ" - iseq.argument_size += 1 - iseq.local_table.plain(operands.to_sym) - when "ARG_BLOCK" - iseq.argument_options[:block_start] = iseq.argument_size - iseq.local_table.block(operands.to_sym) - iseq.argument_size += 1 - else - raise "Could not understand: #{line}" - end - end - end - - def find_local(iseq, operands) - name_string, level_string = operands.split(/,\s*/) - name = name_string.to_sym - level = level_string.to_i - - iseq.local_table.plain(name) - iseq.local_table.find(name, level) - end - - def parse(value) - program = SyntaxTree.parse(value) - raise if program.statements.body.length != 1 - - program.statements.body.first.accept(ObjectVisitor.new) - end - - def parse_options(value, options) - parse(value).tap { raise unless options.include?(_1) } - end - - def parse_type(value, type) - parse(value).tap { raise unless _1.is_a?(type) } - end - - def parse_number(value) - parse_type(value, Integer) - end - - def parse_string(value) - parse_type(value, String) - end - - def parse_symbol(value) - parse_type(value, Symbol) - end - - def parse_nested(lines) - body = lines.take_while { |line| line.match?(/^($|;| )/) } - body.map! { |line| line.delete_prefix!(" ") || +"" } - end - - def parse_calldata(value) - message, argc_value, flags_value = value.split - flags = - if flags_value - flags_value.split("|").map(&CALLDATA_FLAGS).inject(:|) - else - CallData::CALL_ARGS_SIMPLE - end - - YARV.calldata(message.to_sym, argc_value.to_i, flags) - end - end - end -end diff --git a/lib/syntax_tree/yarv/basic_block.rb b/lib/syntax_tree/yarv/basic_block.rb deleted file mode 100644 index 6798a092..00000000 --- a/lib/syntax_tree/yarv/basic_block.rb +++ /dev/null @@ -1,53 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - module YARV - # This object represents a single basic block, wherein all contained - # instructions do not branch except for the last one. - class BasicBlock - # This is the unique identifier for this basic block. - attr_reader :id - - # This is the index into the list of instructions where this block starts. - attr_reader :block_start - - # This is the set of instructions that this block contains. - attr_reader :insns - - # This is an array of basic blocks that lead into this block. - attr_reader :incoming_blocks - - # This is an array of basic blocks that this block leads into. - attr_reader :outgoing_blocks - - def initialize(block_start, insns) - @id = "block_#{block_start}" - - @block_start = block_start - @insns = insns - - @incoming_blocks = [] - @outgoing_blocks = [] - end - - # Yield each instruction in this basic block along with its index from the - # original instruction sequence. - def each_with_length - return enum_for(:each_with_length) unless block_given? - - length = block_start - insns.each do |insn| - yield insn, length - length += insn.length - end - end - - # This method is used to verify that the basic block is well formed. It - # checks that the only instruction in this basic block that branches is - # the last instruction. - def verify - insns[0...-1].each { |insn| raise unless insn.branch_targets.empty? } - end - end - end -end diff --git a/lib/syntax_tree/yarv/bf.rb b/lib/syntax_tree/yarv/bf.rb deleted file mode 100644 index 21bc2982..00000000 --- a/lib/syntax_tree/yarv/bf.rb +++ /dev/null @@ -1,176 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - module YARV - # Parses the given source code into a syntax tree, compiles that syntax tree - # into YARV bytecode. - class Bf - attr_reader :source - - def initialize(source) - @source = source - end - - def compile - # Set up the top-level instruction sequence that will be returned. - iseq = InstructionSequence.new("", "", 1, :top) - - # Set up the $tape global variable that will hold our state. - iseq.duphash({ 0 => 0 }) - iseq.setglobal(:$tape) - iseq.getglobal(:$tape) - iseq.putobject(0) - iseq.send(YARV.calldata(:default=, 1)) - - # Set up the $cursor global variable that will hold the current position - # in the tape. - iseq.putobject(0) - iseq.setglobal(:$cursor) - - stack = [] - source - .each_char - .chunk do |char| - # For each character, we're going to assign a type to it. This - # allows a couple of optimizations to be made by combining multiple - # instructions into single instructions, e.g., +++ becomes a single - # change_by(3) instruction. - case char - when "+", "-" - :change - when ">", "<" - :shift - when "." - :output - when "," - :input - when "[", "]" - :loop - else - :ignored - end - end - .each do |type, chunk| - # For each chunk, we're going to emit the appropriate instruction. - case type - when :change - change_by(iseq, chunk.count("+") - chunk.count("-")) - when :shift - shift_by(iseq, chunk.count(">") - chunk.count("<")) - when :output - chunk.length.times { output_char(iseq) } - when :input - chunk.length.times { input_char(iseq) } - when :loop - chunk.each do |char| - case char - when "[" - stack << loop_start(iseq) - when "]" - loop_end(iseq, *stack.pop) - end - end - end - end - - iseq.leave - iseq.compile! - iseq - end - - private - - # $tape[$cursor] += value - def change_by(iseq, value) - iseq.getglobal(:$tape) - iseq.getglobal(:$cursor) - - iseq.getglobal(:$tape) - iseq.getglobal(:$cursor) - iseq.send(YARV.calldata(:[], 1)) - - if value < 0 - iseq.putobject(-value) - iseq.send(YARV.calldata(:-, 1)) - else - iseq.putobject(value) - iseq.send(YARV.calldata(:+, 1)) - end - - iseq.send(YARV.calldata(:[]=, 2)) - iseq.pop - end - - # $cursor += value - def shift_by(iseq, value) - iseq.getglobal(:$cursor) - - if value < 0 - iseq.putobject(-value) - iseq.send(YARV.calldata(:-, 1)) - else - iseq.putobject(value) - iseq.send(YARV.calldata(:+, 1)) - end - - iseq.setglobal(:$cursor) - end - - # $stdout.putc($tape[$cursor].chr) - def output_char(iseq) - iseq.getglobal(:$stdout) - - iseq.getglobal(:$tape) - iseq.getglobal(:$cursor) - iseq.send(YARV.calldata(:[], 1)) - iseq.send(YARV.calldata(:chr)) - - iseq.send(YARV.calldata(:putc, 1)) - iseq.pop - end - - # $tape[$cursor] = $stdin.getc.ord - def input_char(iseq) - iseq.getglobal(:$tape) - iseq.getglobal(:$cursor) - - iseq.getglobal(:$stdin) - iseq.send(YARV.calldata(:getc)) - iseq.send(YARV.calldata(:ord)) - - iseq.send(YARV.calldata(:[]=, 2)) - iseq.pop - end - - # unless $tape[$cursor] == 0 - def loop_start(iseq) - start_label = iseq.label - end_label = iseq.label - - iseq.push(start_label) - iseq.getglobal(:$tape) - iseq.getglobal(:$cursor) - iseq.send(YARV.calldata(:[], 1)) - - iseq.putobject(0) - iseq.send(YARV.calldata(:==, 1)) - iseq.branchif(end_label) - - [start_label, end_label] - end - - # Jump back to the start of the loop. - def loop_end(iseq, start_label, end_label) - iseq.getglobal(:$tape) - iseq.getglobal(:$cursor) - iseq.send(YARV.calldata(:[], 1)) - - iseq.putobject(0) - iseq.send(YARV.calldata(:==, 1)) - iseq.branchunless(start_label) - - iseq.push(end_label) - end - end - end -end diff --git a/lib/syntax_tree/yarv/calldata.rb b/lib/syntax_tree/yarv/calldata.rb deleted file mode 100644 index 278a3dd9..00000000 --- a/lib/syntax_tree/yarv/calldata.rb +++ /dev/null @@ -1,97 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - module YARV - # This is an operand to various YARV instructions that represents the - # information about a specific call site. - class CallData - flags = %i[ - CALL_ARGS_SPLAT - CALL_ARGS_BLOCKARG - CALL_FCALL - CALL_VCALL - CALL_ARGS_SIMPLE - CALL_KWARG - CALL_KW_SPLAT - CALL_TAILCALL - CALL_SUPER - CALL_ZSUPER - CALL_OPT_SEND - CALL_KW_SPLAT_MUT - ] - - # Insert the legacy CALL_BLOCKISEQ flag for Ruby 3.2 and earlier. - flags.insert(5, :CALL_BLOCKISEQ) if RUBY_VERSION < "3.3" - - # Set the flags as constants on the class. - flags.each_with_index { |name, index| const_set(name, 1 << index) } - - attr_reader :method, :argc, :flags, :kw_arg - - def initialize( - method, - argc = 0, - flags = CallData::CALL_ARGS_SIMPLE, - kw_arg = nil - ) - @method = method - @argc = argc - @flags = flags - @kw_arg = kw_arg - end - - def flag?(mask) - flags.anybits?(mask) - end - - def to_h - result = { mid: method, flag: flags, orig_argc: argc } - result[:kw_arg] = kw_arg if kw_arg - result - end - - def inspect - names = [] - names << :ARGS_SPLAT if flag?(CALL_ARGS_SPLAT) - names << :ARGS_BLOCKARG if flag?(CALL_ARGS_BLOCKARG) - names << :FCALL if flag?(CALL_FCALL) - names << :VCALL if flag?(CALL_VCALL) - names << :ARGS_SIMPLE if flag?(CALL_ARGS_SIMPLE) - names << :KWARG if flag?(CALL_KWARG) - names << :KW_SPLAT if flag?(CALL_KW_SPLAT) - names << :TAILCALL if flag?(CALL_TAILCALL) - names << :SUPER if flag?(CALL_SUPER) - names << :ZSUPER if flag?(CALL_ZSUPER) - names << :OPT_SEND if flag?(CALL_OPT_SEND) - names << :KW_SPLAT_MUT if flag?(CALL_KW_SPLAT_MUT) - - parts = [] - parts << "mid:#{method}" if method - parts << "argc:#{argc}" - parts << "kw:[#{kw_arg.join(", ")}]" if kw_arg - parts << names.join("|") if names.any? - - "" - end - - def self.from(serialized) - new( - serialized[:mid], - serialized[:orig_argc], - serialized[:flag], - serialized[:kw_arg] - ) - end - end - - # A convenience method for creating a CallData object. - def self.calldata( - method, - argc = 0, - flags = CallData::CALL_ARGS_SIMPLE, - kw_arg = nil - ) - CallData.new(method, argc, flags, kw_arg) - end - end -end diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb deleted file mode 100644 index 0f7e7372..00000000 --- a/lib/syntax_tree/yarv/compiler.rb +++ /dev/null @@ -1,2307 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - module YARV - # This class is an experiment in transforming Syntax Tree nodes into their - # corresponding YARV instruction sequences. It attempts to mirror the - # behavior of RubyVM::InstructionSequence.compile. - # - # You use this as with any other visitor. First you parse code into a tree, - # then you visit it with this compiler. Visiting the root node of the tree - # will return a SyntaxTree::YARV::Compiler::InstructionSequence object. - # With that object you can call #to_a on it, which will return a serialized - # form of the instruction sequence as an array. This array _should_ mirror - # the array given by RubyVM::InstructionSequence#to_a. - # - # As an example, here is how you would compile a single expression: - # - # program = SyntaxTree.parse("1 + 2") - # program.accept(SyntaxTree::YARV::Compiler.new).to_a - # - # [ - # "YARVInstructionSequence/SimpleDataFormat", - # 3, - # 1, - # 1, - # {:arg_size=>0, :local_size=>0, :stack_max=>2}, - # "", - # "", - # "", - # 1, - # :top, - # [], - # {}, - # [], - # [ - # [:putobject_INT2FIX_1_], - # [:putobject, 2], - # [:opt_plus, {:mid=>:+, :flag=>16, :orig_argc=>1}], - # [:leave] - # ] - # ] - # - # Note that this is the same output as calling: - # - # RubyVM::InstructionSequence.compile("1 + 2").to_a - # - class Compiler < BasicVisitor - # This represents a set of options that can be passed to the compiler to - # control how it compiles the code. It mirrors the options that can be - # passed to RubyVM::InstructionSequence.compile, except it only includes - # options that actually change the behavior. - class Options - def initialize( - frozen_string_literal: false, - inline_const_cache: true, - operands_unification: true, - peephole_optimization: true, - specialized_instruction: true, - tailcall_optimization: false - ) - @frozen_string_literal = frozen_string_literal - @inline_const_cache = inline_const_cache - @operands_unification = operands_unification - @peephole_optimization = peephole_optimization - @specialized_instruction = specialized_instruction - @tailcall_optimization = tailcall_optimization - end - - def to_hash - { - frozen_string_literal: @frozen_string_literal, - inline_const_cache: @inline_const_cache, - operands_unification: @operands_unification, - peephole_optimization: @peephole_optimization, - specialized_instruction: @specialized_instruction, - tailcall_optimization: @tailcall_optimization - } - end - - def frozen_string_literal! - @frozen_string_literal = true - end - - def frozen_string_literal? - @frozen_string_literal - end - - def inline_const_cache? - @inline_const_cache - end - - def operands_unification? - @operands_unification - end - - def peephole_optimization? - @peephole_optimization - end - - def specialized_instruction? - @specialized_instruction - end - - def tailcall_optimization? - @tailcall_optimization - end - end - - # This visitor is responsible for converting Syntax Tree nodes into their - # corresponding Ruby structures. This is used to convert the operands of - # some instructions like putobject that push a Ruby object directly onto - # the stack. It is only used when the entire structure can be represented - # at compile-time, as opposed to constructed at run-time. - class RubyVisitor < BasicVisitor - # This error is raised whenever a node cannot be converted into a Ruby - # object at compile-time. - class CompilationError < StandardError - end - - # This will attempt to compile the given node. If it's possible, then - # it will return the compiled object. Otherwise it will return nil. - def self.compile(node) - node.accept(new) - rescue CompilationError - end - - visit_methods do - def visit_array(node) - node.contents ? visit_all(node.contents.parts) : [] - end - - def visit_bare_assoc_hash(node) - node.assocs.to_h do |assoc| - # We can only convert regular key-value pairs. A double splat ** - # operator means it has to be converted at run-time. - raise CompilationError unless assoc.is_a?(Assoc) - [visit(assoc.key), visit(assoc.value)] - end - end - - def visit_float(node) - node.value.to_f - end - - alias visit_hash visit_bare_assoc_hash - - def visit_imaginary(node) - node.value.to_c - end - - def visit_int(node) - case (value = node.value) - when /^0b/ - value[2..].to_i(2) - when /^0o/ - value[2..].to_i(8) - when /^0d/ - value[2..].to_i - when /^0x/ - value[2..].to_i(16) - else - value.to_i - end - end - - def visit_label(node) - node.value.chomp(":").to_sym - end - - def visit_mrhs(node) - visit_all(node.parts) - end - - def visit_qsymbols(node) - node.elements.map { |element| visit(element).to_sym } - end - - def visit_qwords(node) - visit_all(node.elements) - end - - def visit_range(node) - left, right = [visit(node.left), visit(node.right)] - node.operator.value === ".." ? left..right : left...right - end - - def visit_rational(node) - node.value.to_r - end - - def visit_regexp_literal(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - Regexp.new( - node.parts.first.value, - visit_regexp_literal_flags(node) - ) - else - # Any interpolation of expressions or variables will result in the - # regular expression being constructed at run-time. - raise CompilationError - end - end - - def visit_symbol_literal(node) - node.value.value.to_sym - end - - def visit_symbols(node) - node.elements.map { |element| visit(element).to_sym } - end - - def visit_tstring_content(node) - node.value - end - - def visit_var_ref(node) - raise CompilationError unless node.value.is_a?(Kw) - - case node.value.value - when "nil" - nil - when "true" - true - when "false" - false - else - raise CompilationError - end - end - - def visit_word(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - node.parts.first.value - else - # Any interpolation of expressions or variables will result in the - # string being constructed at run-time. - raise CompilationError - end - end - - def visit_words(node) - visit_all(node.elements) - end - end - - # This isn't actually a visit method, though maybe it should be. It is - # responsible for converting the set of string options on a regular - # expression into its equivalent integer. - def visit_regexp_literal_flags(node) - node - .options - .chars - .inject(0) do |accum, option| - accum | - case option - when "i" - Regexp::IGNORECASE - when "x" - Regexp::EXTENDED - when "m" - Regexp::MULTILINE - else - raise "Unknown regexp option: #{option}" - end - end - end - - def visit_unsupported(_node) - raise CompilationError - end - - # Please forgive the metaprogramming here. This is used to create visit - # methods for every node that we did not explicitly handle. By default - # each of these methods will raise a CompilationError. - handled = instance_methods(false) - (Visitor.instance_methods(false) - handled).each do |method| - alias_method method, :visit_unsupported - end - end - - # These options mirror the compilation options that we currently support - # that can be also passed to RubyVM::InstructionSequence.compile. - attr_reader :options - - # The current instruction sequence that is being compiled. - attr_reader :iseq - - # A boolean to track if we're currently compiling the last statement - # within a set of statements. This information is necessary to determine - # if we need to return the value of the last statement. - attr_reader :last_statement - - def initialize(options = Options.new) - @options = options - @iseq = nil - @last_statement = false - end - - def visit_BEGIN(node) - visit(node.statements) - end - - def visit_CHAR(node) - if options.frozen_string_literal? - iseq.putobject(node.value[1..]) - else - iseq.putstring(node.value[1..]) - end - end - - def visit_END(node) - start_line = node.location.start_line - once_iseq = - with_child_iseq(iseq.block_child_iseq(start_line)) do - postexe_iseq = - with_child_iseq(iseq.block_child_iseq(start_line)) do - iseq.event(:RUBY_EVENT_B_CALL) - - *statements, last_statement = node.statements.body - visit_all(statements) - with_last_statement { visit(last_statement) } - - iseq.event(:RUBY_EVENT_B_RETURN) - iseq.leave - end - - iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) - iseq.send( - YARV.calldata(:"core#set_postexe", 0, CallData::CALL_FCALL), - postexe_iseq - ) - iseq.leave - end - - iseq.once(once_iseq, iseq.inline_storage) - iseq.pop - end - - def visit_alias(node) - iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) - iseq.putspecialobject(PutSpecialObject::OBJECT_CBASE) - visit(node.left) - visit(node.right) - iseq.send(YARV.calldata(:"core#set_method_alias", 3)) - end - - def visit_aref(node) - calldata = YARV.calldata(:[], 1) - visit(node.collection) - - if !options.frozen_string_literal? && - options.specialized_instruction? && (node.index.parts.length == 1) - arg = node.index.parts.first - - if arg.is_a?(StringLiteral) && (arg.parts.length == 1) - string_part = arg.parts.first - - if string_part.is_a?(TStringContent) - iseq.opt_aref_with(string_part.value, calldata) - return - end - end - end - - visit(node.index) - iseq.send(calldata) - end - - def visit_arg_block(node) - visit(node.value) - end - - def visit_arg_paren(node) - visit(node.arguments) - end - - def visit_arg_star(node) - visit(node.value) - iseq.splatarray(false) - end - - def visit_args(node) - visit_all(node.parts) - end - - def visit_array(node) - if (compiled = RubyVisitor.compile(node)) - iseq.duparray(compiled) - elsif node.contents && node.contents.parts.length == 1 && - node.contents.parts.first.is_a?(BareAssocHash) && - node.contents.parts.first.assocs.length == 1 && - node.contents.parts.first.assocs.first.is_a?(AssocSplat) - iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) - iseq.newhash(0) - visit(node.contents.parts.first) - iseq.send(YARV.calldata(:"core#hash_merge_kwd", 2)) - iseq.newarraykwsplat(1) - else - length = 0 - - node.contents.parts.each do |part| - if part.is_a?(ArgStar) - if length > 0 - iseq.newarray(length) - length = 0 - end - - visit(part.value) - iseq.concatarray - else - visit(part) - length += 1 - end - end - - iseq.newarray(length) if length > 0 - iseq.concatarray if length > 0 && length != node.contents.parts.length - end - end - - def visit_aryptn(node) - end - - def visit_assign(node) - case node.target - when ARefField - calldata = YARV.calldata(:[]=, 2) - - if !options.frozen_string_literal? && - options.specialized_instruction? && - (node.target.index.parts.length == 1) - arg = node.target.index.parts.first - - if arg.is_a?(StringLiteral) && (arg.parts.length == 1) - string_part = arg.parts.first - - if string_part.is_a?(TStringContent) - visit(node.target.collection) - visit(node.value) - iseq.swap - iseq.topn(1) - iseq.opt_aset_with(string_part.value, calldata) - iseq.pop - return - end - end - end - - iseq.putnil - visit(node.target.collection) - visit(node.target.index) - visit(node.value) - iseq.setn(3) - iseq.send(calldata) - iseq.pop - when ConstPathField - names = constant_names(node.target) - name = names.pop - - if RUBY_VERSION >= "3.2" - iseq.opt_getconstant_path(names) - visit(node.value) - iseq.swap - iseq.topn(1) - iseq.swap - iseq.setconstant(name) - else - visit(node.value) - iseq.dup if last_statement? - iseq.opt_getconstant_path(names) - iseq.setconstant(name) - end - when Field - iseq.putnil - visit(node.target) - visit(node.value) - iseq.setn(2) - iseq.send(YARV.calldata(:"#{node.target.name.value}=", 1)) - iseq.pop - when TopConstField - name = node.target.constant.value.to_sym - - if RUBY_VERSION >= "3.2" - iseq.putobject(Object) - visit(node.value) - iseq.swap - iseq.topn(1) - iseq.swap - iseq.setconstant(name) - else - visit(node.value) - iseq.dup if last_statement? - iseq.putobject(Object) - iseq.setconstant(name) - end - when VarField - visit(node.value) - iseq.dup if last_statement? - - case node.target.value - when Const - iseq.putspecialobject(PutSpecialObject::OBJECT_CONST_BASE) - iseq.setconstant(node.target.value.value.to_sym) - when CVar - iseq.setclassvariable(node.target.value.value.to_sym) - when GVar - iseq.setglobal(node.target.value.value.to_sym) - when Ident - lookup = visit(node.target) - - if lookup.local.is_a?(LocalTable::BlockLocal) - iseq.setblockparam(lookup.index, lookup.level) - else - iseq.setlocal(lookup.index, lookup.level) - end - when IVar - iseq.setinstancevariable(node.target.value.value.to_sym) - end - end - end - - def visit_assoc(node) - visit(node.key) - visit(node.value) - end - - def visit_assoc_splat(node) - visit(node.value) - end - - def visit_backref(node) - iseq.getspecial(GetSpecial::SVAR_BACKREF, node.value[1..].to_i << 1) - end - - def visit_bare_assoc_hash(node) - if (compiled = RubyVisitor.compile(node)) - iseq.duphash(compiled) - else - visit_all(node.assocs) - end - end - - def visit_begin(node) - end - - def visit_binary(node) - case node.operator - when :"&&" - done_label = iseq.label - - visit(node.left) - iseq.dup - iseq.branchunless(done_label) - - iseq.pop - visit(node.right) - iseq.push(done_label) - when :"||" - visit(node.left) - iseq.dup - - skip_right_label = iseq.label - iseq.branchif(skip_right_label) - iseq.pop - - visit(node.right) - iseq.push(skip_right_label) - else - visit(node.left) - visit(node.right) - iseq.send(YARV.calldata(node.operator, 1)) - end - end - - def visit_block(node) - with_child_iseq(iseq.block_child_iseq(node.location.start_line)) do - iseq.event(:RUBY_EVENT_B_CALL) - visit(node.block_var) - visit(node.bodystmt) - iseq.event(:RUBY_EVENT_B_RETURN) - iseq.leave - end - end - - def visit_block_var(node) - params = node.params - - if params.requireds.length == 1 && params.optionals.empty? && - !params.rest && params.posts.empty? && params.keywords.empty? && - !params.keyword_rest && !params.block - iseq.argument_options[:ambiguous_param0] = true - end - - visit(node.params) - - node.locals.each { |local| iseq.local_table.plain(local.value.to_sym) } - end - - def visit_blockarg(node) - iseq.argument_options[:block_start] = iseq.argument_size - iseq.local_table.block(node.name.value.to_sym) - iseq.argument_size += 1 - end - - def visit_bodystmt(node) - visit(node.statements) - end - - def visit_break(node) - end - - def visit_call(node) - if node.is_a?(CallNode) - return( - visit_call( - CommandCall.new( - receiver: node.receiver, - operator: node.operator, - message: node.message, - arguments: node.arguments, - block: nil, - location: node.location - ) - ) - ) - end - - # Track whether or not this is a method call on a block proxy receiver. - # If it is, we can potentially do tailcall optimizations on it. - block_receiver = false - - if node.receiver - if node.receiver.is_a?(VarRef) - lookup = iseq.local_variable(node.receiver.value.value.to_sym) - - if lookup.local.is_a?(LocalTable::BlockLocal) - iseq.getblockparamproxy(lookup.index, lookup.level) - block_receiver = true - else - visit(node.receiver) - end - else - visit(node.receiver) - end - else - iseq.putself - end - - after_call_label = nil - if node.operator&.value == "&." - iseq.dup - after_call_label = iseq.label - iseq.branchnil(after_call_label) - end - - arg_parts = argument_parts(node.arguments) - argc = arg_parts.length - flag = 0 - - arg_parts.each do |arg_part| - case arg_part - when ArgBlock - argc -= 1 - flag |= CallData::CALL_ARGS_BLOCKARG - visit(arg_part) - when ArgStar - flag |= CallData::CALL_ARGS_SPLAT - visit(arg_part) - when ArgsForward - flag |= CallData::CALL_TAILCALL if options.tailcall_optimization? - - flag |= CallData::CALL_ARGS_SPLAT - lookup = iseq.local_table.find(:*) - iseq.getlocal(lookup.index, lookup.level) - iseq.splatarray(arg_parts.length != 1) - - flag |= CallData::CALL_ARGS_BLOCKARG - lookup = iseq.local_table.find(:&) - iseq.getblockparamproxy(lookup.index, lookup.level) - when BareAssocHash - flag |= CallData::CALL_KW_SPLAT - visit(arg_part) - else - visit(arg_part) - end - end - - block_iseq = visit(node.block) if node.block - - # If there's no block and we don't already have any special flags set, - # then we can safely call this simple arguments. Note that has to be the - # first flag we set after looking at the arguments to get the flags - # correct. - flag |= CallData::CALL_ARGS_SIMPLE if block_iseq.nil? && flag == 0 - - # If there's no receiver, then this is an "fcall". - flag |= CallData::CALL_FCALL if node.receiver.nil? - - # If we're calling a method on the passed block object and we have - # tailcall optimizations turned on, then we can set the tailcall flag. - if block_receiver && options.tailcall_optimization? - flag |= CallData::CALL_TAILCALL - end - - iseq.send( - YARV.calldata(node.message.value.to_sym, argc, flag), - block_iseq - ) - iseq.event(after_call_label) if after_call_label - end - - def visit_case(node) - visit(node.value) if node.value - - clauses = [] - else_clause = nil - current = node.consequent - - while current - clauses << current - - if (current = current.consequent).is_a?(Else) - else_clause = current - break - end - end - - branches = - clauses.map do |clause| - visit(clause.arguments) - iseq.topn(1) - iseq.send( - YARV.calldata( - :===, - 1, - CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE - ) - ) - - label = iseq.label - iseq.branchif(label) - [clause, label] - end - - iseq.pop - else_clause ? visit(else_clause) : iseq.putnil - iseq.leave - - branches.each_with_index do |(clause, label), index| - iseq.leave if index != 0 - iseq.push(label) - iseq.pop - visit(clause) - end - end - - def visit_class(node) - name = node.constant.constant.value.to_sym - class_iseq = - with_child_iseq( - iseq.class_child_iseq(name, node.location.start_line) - ) do - iseq.event(:RUBY_EVENT_CLASS) - visit(node.bodystmt) - iseq.event(:RUBY_EVENT_END) - iseq.leave - end - - flags = DefineClass::TYPE_CLASS - - case node.constant - when ConstPathRef - flags |= DefineClass::FLAG_SCOPED - visit(node.constant.parent) - when ConstRef - iseq.putspecialobject(PutSpecialObject::OBJECT_CONST_BASE) - when TopConstRef - flags |= DefineClass::FLAG_SCOPED - iseq.putobject(Object) - end - - if node.superclass - flags |= DefineClass::FLAG_HAS_SUPERCLASS - visit(node.superclass) - else - iseq.putnil - end - - iseq.defineclass(name, class_iseq, flags) - end - - def visit_command(node) - visit_call( - CommandCall.new( - receiver: nil, - operator: nil, - message: node.message, - arguments: node.arguments, - block: node.block, - location: node.location - ) - ) - end - - def visit_command_call(node) - visit_call( - CommandCall.new( - receiver: node.receiver, - operator: node.operator, - message: node.message, - arguments: node.arguments, - block: node.block, - location: node.location - ) - ) - end - - def visit_const_path_field(node) - visit(node.parent) - end - - def visit_const_path_ref(node) - names = constant_names(node) - iseq.opt_getconstant_path(names) - end - - def visit_def(node) - name = node.name.value.to_sym - method_iseq = - iseq.method_child_iseq(name.to_s, node.location.start_line) - - with_child_iseq(method_iseq) do - visit(node.params) if node.params - iseq.event(:RUBY_EVENT_CALL) - visit(node.bodystmt) - iseq.event(:RUBY_EVENT_RETURN) - iseq.leave - end - - if node.target - visit(node.target) - iseq.definesmethod(name, method_iseq) - else - iseq.definemethod(name, method_iseq) - end - - iseq.putobject(name) - end - - def visit_defined(node) - case node.value - when Assign - # If we're assigning to a local variable, then we need to make sure - # that we put it into the local table. - if node.value.target.is_a?(VarField) && - node.value.target.value.is_a?(Ident) - iseq.local_table.plain(node.value.target.value.value.to_sym) - end - - iseq.putobject("assignment") - when VarRef - value = node.value.value - name = value.value.to_sym - - case value - when Const - iseq.putnil - iseq.defined(Defined::TYPE_CONST, name, "constant") - when CVar - iseq.putnil - iseq.defined(Defined::TYPE_CVAR, name, "class variable") - when GVar - iseq.putnil - iseq.defined(Defined::TYPE_GVAR, name, "global-variable") - when Ident - iseq.putobject("local-variable") - when IVar - iseq.definedivar(name, iseq.inline_storage, "instance-variable") - when Kw - case name - when :false - iseq.putobject("false") - when :nil - iseq.putobject("nil") - when :self - iseq.putobject("self") - when :true - iseq.putobject("true") - end - end - when VCall - iseq.putself - - name = node.value.value.value.to_sym - iseq.defined(Defined::TYPE_FUNC, name, "method") - when YieldNode - iseq.putnil - iseq.defined(Defined::TYPE_YIELD, false, "yield") - when ZSuper - iseq.putnil - iseq.defined(Defined::TYPE_ZSUPER, false, "super") - else - iseq.putobject("expression") - end - end - - def visit_dyna_symbol(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - iseq.putobject(node.parts.first.value.to_sym) - end - end - - def visit_else(node) - visit(node.statements) - iseq.pop unless last_statement? - end - - def visit_elsif(node) - visit_if( - IfNode.new( - predicate: node.predicate, - statements: node.statements, - consequent: node.consequent, - location: node.location - ) - ) - end - - def visit_ensure(node) - end - - def visit_field(node) - visit(node.parent) - end - - def visit_float(node) - iseq.putobject(node.accept(RubyVisitor.new)) - end - - def visit_fndptn(node) - end - - def visit_for(node) - visit(node.collection) - - name = node.index.value.value.to_sym - iseq.local_table.plain(name) - - block_iseq = - with_child_iseq( - iseq.block_child_iseq(node.statements.location.start_line) - ) do - iseq.argument_options[:lead_num] ||= 0 - iseq.argument_options[:lead_num] += 1 - iseq.argument_options[:ambiguous_param0] = true - - iseq.argument_size += 1 - iseq.local_table.plain(2) - - iseq.getlocal(0, 0) - - local_variable = iseq.local_variable(name) - iseq.setlocal(local_variable.index, local_variable.level) - - iseq.event(:RUBY_EVENT_B_CALL) - iseq.nop - - visit(node.statements) - iseq.event(:RUBY_EVENT_B_RETURN) - iseq.leave - end - - iseq.send(YARV.calldata(:each, 0, 0), block_iseq) - end - - def visit_hash(node) - if (compiled = RubyVisitor.compile(node)) - iseq.duphash(compiled) - else - visit_all(node.assocs) - iseq.newhash(node.assocs.length * 2) - end - end - - def visit_hshptn(node) - end - - def visit_heredoc(node) - if node.beginning.value.end_with?("`") - visit_xstring_literal(node) - elsif node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - visit(node.parts.first) - else - length = visit_string_parts(node) - iseq.concatstrings(length) - end - end - - def visit_if(node) - if node.predicate.is_a?(RangeNode) - true_label = iseq.label - false_label = iseq.label - end_label = iseq.label - - iseq.getspecial(GetSpecial::SVAR_FLIPFLOP_START, 0) - iseq.branchif(true_label) - - visit(node.predicate.left) - iseq.branchunless(end_label) - - iseq.putobject(true) - iseq.setspecial(GetSpecial::SVAR_FLIPFLOP_START) - - iseq.push(true_label) - visit(node.predicate.right) - iseq.branchunless(false_label) - - iseq.putobject(false) - iseq.setspecial(GetSpecial::SVAR_FLIPFLOP_START) - - iseq.push(false_label) - visit(node.statements) - iseq.leave - iseq.push(end_label) - iseq.putnil - else - consequent_label = iseq.label - - visit(node.predicate) - iseq.branchunless(consequent_label) - visit(node.statements) - - if last_statement? - iseq.leave - iseq.push(consequent_label) - node.consequent ? visit(node.consequent) : iseq.putnil - else - iseq.pop - - if node.consequent - done_label = iseq.label - iseq.jump(done_label) - iseq.push(consequent_label) - visit(node.consequent) - iseq.push(done_label) - else - iseq.push(consequent_label) - end - end - end - end - - def visit_if_op(node) - visit_if( - IfNode.new( - predicate: node.predicate, - statements: - Statements.new(body: [node.truthy], location: Location.default), - consequent: - Else.new( - keyword: Kw.new(value: "else", location: Location.default), - statements: - Statements.new( - body: [node.falsy], - location: Location.default - ), - location: Location.default - ), - location: Location.default - ) - ) - end - - def visit_imaginary(node) - iseq.putobject(node.accept(RubyVisitor.new)) - end - - def visit_int(node) - iseq.putobject(node.accept(RubyVisitor.new)) - end - - def visit_kwrest_param(node) - iseq.argument_options[:kwrest] = iseq.argument_size - iseq.argument_size += 1 - iseq.local_table.plain(node.name.value.to_sym) - end - - def visit_label(node) - iseq.putobject(node.accept(RubyVisitor.new)) - end - - def visit_lambda(node) - lambda_iseq = - with_child_iseq(iseq.block_child_iseq(node.location.start_line)) do - iseq.event(:RUBY_EVENT_B_CALL) - visit(node.params) - visit(node.statements) - iseq.event(:RUBY_EVENT_B_RETURN) - iseq.leave - end - - iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) - iseq.send(YARV.calldata(:lambda, 0, CallData::CALL_FCALL), lambda_iseq) - end - - def visit_lambda_var(node) - visit_block_var(node) - end - - def visit_massign(node) - visit(node.value) - iseq.dup - visit(node.target) - end - - def visit_method_add_block(node) - visit_call( - CommandCall.new( - receiver: node.call.receiver, - operator: node.call.operator, - message: node.call.message, - arguments: node.call.arguments, - block: node.block, - location: node.location - ) - ) - end - - def visit_mlhs(node) - lookups = [] - node.parts.each do |part| - case part - when VarField - lookups << visit(part) - end - end - - iseq.expandarray(lookups.length, 0) - lookups.each { |lookup| iseq.setlocal(lookup.index, lookup.level) } - end - - def visit_module(node) - name = node.constant.constant.value.to_sym - module_iseq = - with_child_iseq( - iseq.module_child_iseq(name, node.location.start_line) - ) do - iseq.event(:RUBY_EVENT_CLASS) - visit(node.bodystmt) - iseq.event(:RUBY_EVENT_END) - iseq.leave - end - - flags = DefineClass::TYPE_MODULE - - case node.constant - when ConstPathRef - flags |= DefineClass::FLAG_SCOPED - visit(node.constant.parent) - when ConstRef - iseq.putspecialobject(PutSpecialObject::OBJECT_CONST_BASE) - when TopConstRef - flags |= DefineClass::FLAG_SCOPED - iseq.putobject(Object) - end - - iseq.putnil - iseq.defineclass(name, module_iseq, flags) - end - - def visit_mrhs(node) - if (compiled = RubyVisitor.compile(node)) - iseq.duparray(compiled) - else - visit_all(node.parts) - iseq.newarray(node.parts.length) - end - end - - def visit_next(node) - end - - def visit_not(node) - visit(node.statement) - iseq.send(YARV.calldata(:!)) - end - - def visit_opassign(node) - flag = CallData::CALL_ARGS_SIMPLE - if node.target.is_a?(ConstPathField) || node.target.is_a?(TopConstField) - flag |= CallData::CALL_FCALL - end - - case (operator = node.operator.value.chomp("=").to_sym) - when :"&&" - done_label = iseq.label - - with_opassign(node) do - iseq.dup - iseq.branchunless(done_label) - iseq.pop - visit(node.value) - end - - case node.target - when ARefField - iseq.leave - iseq.push(done_label) - iseq.setn(3) - iseq.adjuststack(3) - when ConstPathField, TopConstField - iseq.push(done_label) - iseq.swap - iseq.pop - else - iseq.push(done_label) - end - when :"||" - if node.target.is_a?(ConstPathField) || - node.target.is_a?(TopConstField) - opassign_defined(node) - iseq.swap - iseq.pop - elsif node.target.is_a?(VarField) && - [Const, CVar, GVar].include?(node.target.value.class) - opassign_defined(node) - else - skip_value_label = iseq.label - - with_opassign(node) do - iseq.dup - iseq.branchif(skip_value_label) - iseq.pop - visit(node.value) - end - - if node.target.is_a?(ARefField) - iseq.leave - iseq.push(skip_value_label) - iseq.setn(3) - iseq.adjuststack(3) - else - iseq.push(skip_value_label) - end - end - else - with_opassign(node) do - visit(node.value) - iseq.send(YARV.calldata(operator, 1, flag)) - end - end - end - - def visit_params(node) - if node.requireds.any? - iseq.argument_options[:lead_num] = 0 - - node.requireds.each do |required| - iseq.local_table.plain(required.value.to_sym) - iseq.argument_size += 1 - iseq.argument_options[:lead_num] += 1 - end - end - - node.optionals.each do |(optional, value)| - index = iseq.local_table.size - name = optional.value.to_sym - - iseq.local_table.plain(name) - iseq.argument_size += 1 - - unless iseq.argument_options.key?(:opt) - start_label = iseq.label - iseq.push(start_label) - iseq.argument_options[:opt] = [start_label] - end - - visit(value) - iseq.setlocal(index, 0) - - arg_given_label = iseq.label - iseq.push(arg_given_label) - iseq.argument_options[:opt] << arg_given_label - end - - visit(node.rest) if node.rest - - if node.posts.any? - iseq.argument_options[:post_start] = iseq.argument_size - iseq.argument_options[:post_num] = 0 - - node.posts.each do |post| - iseq.local_table.plain(post.value.to_sym) - iseq.argument_size += 1 - iseq.argument_options[:post_num] += 1 - end - end - - if node.keywords.any? - iseq.argument_options[:kwbits] = 0 - iseq.argument_options[:keyword] = [] - - keyword_bits_name = node.keyword_rest ? 3 : 2 - iseq.argument_size += 1 - keyword_bits_index = iseq.local_table.locals.size + node.keywords.size - - node.keywords.each_with_index do |(keyword, value), keyword_index| - name = keyword.value.chomp(":").to_sym - index = iseq.local_table.size - - iseq.local_table.plain(name) - iseq.argument_size += 1 - iseq.argument_options[:kwbits] += 1 - - if value.nil? - iseq.argument_options[:keyword] << name - elsif (compiled = RubyVisitor.compile(value)) - iseq.argument_options[:keyword] << [name, compiled] - else - skip_value_label = iseq.label - - iseq.argument_options[:keyword] << [name] - iseq.checkkeyword(keyword_bits_index, keyword_index) - iseq.branchif(skip_value_label) - visit(value) - iseq.setlocal(index, 0) - iseq.push(skip_value_label) - end - end - - iseq.local_table.plain(keyword_bits_name) - end - - if node.keyword_rest.is_a?(ArgsForward) - if RUBY_VERSION >= "3.2" - iseq.local_table.plain(:*) - iseq.local_table.plain(:&) - iseq.local_table.plain(:"...") - - iseq.argument_options[:rest_start] = iseq.argument_size - iseq.argument_options[:block_start] = iseq.argument_size + 1 - - iseq.argument_size += 2 - else - iseq.local_table.plain(:*) - iseq.local_table.plain(:&) - - iseq.argument_options[:rest_start] = iseq.argument_size - iseq.argument_options[:block_start] = iseq.argument_size + 1 - - iseq.argument_size += 2 - end - elsif node.keyword_rest - visit(node.keyword_rest) - end - - visit(node.block) if node.block - end - - def visit_paren(node) - visit(node.contents) - end - - def visit_pinned_begin(node) - end - - def visit_pinned_var_ref(node) - end - - def visit_program(node) - node.statements.body.each do |statement| - break unless statement.is_a?(Comment) - - if statement.value == "# frozen_string_literal: true" - options.frozen_string_literal! - end - end - - preexes = [] - statements = [] - - node.statements.body.each do |statement| - case statement - when Comment, EmbDoc, EndContent, VoidStmt - # ignore - when BEGINBlock - preexes << statement - else - statements << statement - end - end - - top_iseq = - InstructionSequence.new( - "", - "", - 1, - :top, - nil, - options - ) - - with_child_iseq(top_iseq) do - visit_all(preexes) - - if statements.empty? - iseq.putnil - else - *statements, last_statement = statements - visit_all(statements) - with_last_statement { visit(last_statement) } - end - - iseq.leave - end - - top_iseq.compile! - top_iseq - end - - def visit_qsymbols(node) - iseq.duparray(node.accept(RubyVisitor.new)) - end - - def visit_qwords(node) - if options.frozen_string_literal? - iseq.duparray(node.accept(RubyVisitor.new)) - else - visit_all(node.elements) - iseq.newarray(node.elements.length) - end - end - - def visit_range(node) - if (compiled = RubyVisitor.compile(node)) - iseq.putobject(compiled) - else - visit(node.left) - visit(node.right) - iseq.newrange(node.operator.value == ".." ? 0 : 1) - end - end - - def visit_rassign(node) - iseq.putnil - - if node.operator.is_a?(Kw) - match_label = iseq.label - - visit(node.value) - iseq.dup - - visit_pattern(node.pattern, match_label) - - iseq.pop - iseq.pop - iseq.putobject(false) - iseq.leave - - iseq.push(match_label) - iseq.adjuststack(2) - iseq.putobject(true) - else - no_key_label = iseq.label - end_leave_label = iseq.label - end_label = iseq.label - - iseq.putnil - iseq.putobject(false) - iseq.putnil - iseq.putnil - visit(node.value) - iseq.dup - - visit_pattern(node.pattern, end_label) - - # First we're going to push the core onto the stack, then we'll check - # if the value to match is truthy. If it is, we'll jump down to raise - # NoMatchingPatternKeyError. Otherwise we'll raise - # NoMatchingPatternError. - iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) - iseq.topn(4) - iseq.branchif(no_key_label) - - # Here we're going to raise NoMatchingPatternError. - iseq.putobject(NoMatchingPatternError) - iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) - iseq.putobject("%p: %s") - iseq.topn(4) - iseq.topn(7) - iseq.send(YARV.calldata(:"core#sprintf", 3)) - iseq.send(YARV.calldata(:"core#raise", 2)) - iseq.jump(end_leave_label) - - # Here we're going to raise NoMatchingPatternKeyError. - iseq.push(no_key_label) - iseq.putobject(NoMatchingPatternKeyError) - iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) - iseq.putobject("%p: %s") - iseq.topn(4) - iseq.topn(7) - iseq.send(YARV.calldata(:"core#sprintf", 3)) - iseq.topn(7) - iseq.topn(9) - iseq.send( - YARV.calldata(:new, 1, CallData::CALL_KWARG, %i[matchee key]) - ) - iseq.send(YARV.calldata(:"core#raise", 1)) - - iseq.push(end_leave_label) - iseq.adjuststack(7) - iseq.putnil - iseq.leave - - iseq.push(end_label) - iseq.adjuststack(6) - iseq.putnil - end - end - - def visit_rational(node) - iseq.putobject(node.accept(RubyVisitor.new)) - end - - def visit_redo(node) - end - - def visit_regexp_literal(node) - if (compiled = RubyVisitor.compile(node)) - iseq.putobject(compiled) - else - flags = RubyVisitor.new.visit_regexp_literal_flags(node) - length = visit_string_parts(node) - iseq.toregexp(flags, length) - end - end - - def visit_rescue(node) - end - - def visit_rescue_ex(node) - end - - def visit_rescue_mod(node) - end - - def visit_rest_param(node) - iseq.local_table.plain(node.name.value.to_sym) - iseq.argument_options[:rest_start] = iseq.argument_size - iseq.argument_size += 1 - end - - def visit_retry(node) - end - - def visit_return(node) - end - - def visit_sclass(node) - visit(node.target) - iseq.putnil - - singleton_iseq = - with_child_iseq( - iseq.singleton_class_child_iseq(node.location.start_line) - ) do - iseq.event(:RUBY_EVENT_CLASS) - visit(node.bodystmt) - iseq.event(:RUBY_EVENT_END) - iseq.leave - end - - iseq.defineclass( - :singletonclass, - singleton_iseq, - DefineClass::TYPE_SINGLETON_CLASS - ) - end - - def visit_statements(node) - statements = - node.body.select do |statement| - case statement - when Comment, EmbDoc, EndContent, VoidStmt - false - else - true - end - end - - statements.empty? ? iseq.putnil : visit_all(statements) - end - - def visit_string_concat(node) - value = node.left.parts.first.value + node.right.parts.first.value - - visit_string_literal( - StringLiteral.new( - parts: [TStringContent.new(value: value, location: node.location)], - quote: node.left.quote, - location: node.location - ) - ) - end - - def visit_string_embexpr(node) - visit(node.statements) - end - - def visit_string_literal(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - visit(node.parts.first) - else - length = visit_string_parts(node) - iseq.concatstrings(length) - end - end - - def visit_super(node) - iseq.putself - visit(node.arguments) - iseq.invokesuper( - YARV.calldata( - nil, - argument_parts(node.arguments).length, - CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE | - CallData::CALL_SUPER - ), - nil - ) - end - - def visit_symbol_literal(node) - iseq.putobject(node.accept(RubyVisitor.new)) - end - - def visit_symbols(node) - if (compiled = RubyVisitor.compile(node)) - iseq.duparray(compiled) - else - node.elements.each do |element| - if element.parts.length == 1 && - element.parts.first.is_a?(TStringContent) - iseq.putobject(element.parts.first.value.to_sym) - else - length = visit_string_parts(element) - iseq.concatstrings(length) - iseq.intern - end - end - - iseq.newarray(node.elements.length) - end - end - - def visit_top_const_ref(node) - iseq.opt_getconstant_path(constant_names(node)) - end - - def visit_tstring_content(node) - if options.frozen_string_literal? - iseq.putobject(node.accept(RubyVisitor.new)) - else - iseq.putstring(node.accept(RubyVisitor.new)) - end - end - - def visit_unary(node) - method_id = - case node.operator - when "+", "-" - "#{node.operator}@" - else - node.operator - end - - visit_call( - CommandCall.new( - receiver: node.statement, - operator: nil, - message: Ident.new(value: method_id, location: Location.default), - arguments: nil, - block: nil, - location: Location.default - ) - ) - end - - def visit_undef(node) - node.symbols.each_with_index do |symbol, index| - iseq.pop if index != 0 - iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) - iseq.putspecialobject(PutSpecialObject::OBJECT_CBASE) - visit(symbol) - iseq.send(YARV.calldata(:"core#undef_method", 2)) - end - end - - def visit_unless(node) - statements_label = iseq.label - - visit(node.predicate) - iseq.branchunless(statements_label) - node.consequent ? visit(node.consequent) : iseq.putnil - - if last_statement? - iseq.leave - iseq.push(statements_label) - visit(node.statements) - else - iseq.pop - - if node.consequent - done_label = iseq.label - iseq.jump(done_label) - iseq.push(statements_label) - visit(node.consequent) - iseq.push(done_label) - else - iseq.push(statements_label) - end - end - end - - def visit_until(node) - predicate_label = iseq.label - statements_label = iseq.label - - iseq.jump(predicate_label) - iseq.putnil - iseq.pop - iseq.jump(predicate_label) - - iseq.push(statements_label) - visit(node.statements) - iseq.pop - - iseq.push(predicate_label) - visit(node.predicate) - iseq.branchunless(statements_label) - iseq.putnil if last_statement? - end - - def visit_var_field(node) - case node.value - when CVar, IVar - name = node.value.value.to_sym - iseq.inline_storage_for(name) - when Ident - name = node.value.value.to_sym - - if (local_variable = iseq.local_variable(name)) - local_variable - else - iseq.local_table.plain(name) - iseq.local_variable(name) - end - end - end - - def visit_var_ref(node) - case node.value - when Const - iseq.opt_getconstant_path(constant_names(node)) - when CVar - name = node.value.value.to_sym - iseq.getclassvariable(name) - when GVar - iseq.getglobal(node.value.value.to_sym) - when Ident - lookup = iseq.local_variable(node.value.value.to_sym) - - case lookup.local - when LocalTable::BlockLocal - iseq.getblockparam(lookup.index, lookup.level) - when LocalTable::PlainLocal - iseq.getlocal(lookup.index, lookup.level) - end - when IVar - name = node.value.value.to_sym - iseq.getinstancevariable(name) - when Kw - case node.value.value - when "false" - iseq.putobject(false) - when "nil" - iseq.putnil - when "self" - iseq.putself - when "true" - iseq.putobject(true) - end - end - end - - def visit_vcall(node) - iseq.putself - iseq.send( - YARV.calldata( - node.value.value.to_sym, - 0, - CallData::CALL_FCALL | CallData::CALL_VCALL | - CallData::CALL_ARGS_SIMPLE - ) - ) - end - - def visit_when(node) - visit(node.statements) - end - - def visit_while(node) - predicate_label = iseq.label - statements_label = iseq.label - - iseq.jump(predicate_label) - iseq.putnil - iseq.pop - iseq.jump(predicate_label) - - iseq.push(statements_label) - visit(node.statements) - iseq.pop - - iseq.push(predicate_label) - visit(node.predicate) - iseq.branchif(statements_label) - iseq.putnil if last_statement? - end - - def visit_word(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - visit(node.parts.first) - else - length = visit_string_parts(node) - iseq.concatstrings(length) - end - end - - def visit_words(node) - if options.frozen_string_literal? && - (compiled = RubyVisitor.compile(node)) - iseq.duparray(compiled) - else - visit_all(node.elements) - iseq.newarray(node.elements.length) - end - end - - def visit_xstring_literal(node) - iseq.putself - length = visit_string_parts(node) - iseq.concatstrings(node.parts.length) if length > 1 - iseq.send( - YARV.calldata( - :`, - 1, - CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE - ) - ) - end - - def visit_yield(node) - parts = argument_parts(node.arguments) - visit_all(parts) - iseq.invokeblock(YARV.calldata(nil, parts.length)) - end - - def visit_zsuper(_node) - iseq.putself - iseq.invokesuper( - YARV.calldata( - nil, - 0, - CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE | - CallData::CALL_SUPER | CallData::CALL_ZSUPER - ), - nil - ) - end - - private - - # This is a helper that is used in places where arguments may be present - # or they may be wrapped in parentheses. It's meant to descend down the - # tree and return an array of argument nodes. - def argument_parts(node) - case node - when nil - [] - when Args - node.parts - when ArgParen - if node.arguments.is_a?(ArgsForward) - [node.arguments] - else - node.arguments.parts - end - when Paren - node.contents.parts - end - end - - # Constant names when they are being assigned or referenced come in as a - # tree, but it's more convenient to work with them as an array. This - # method converts them into that array. This is nice because it's the - # operand that goes to opt_getconstant_path in Ruby 3.2. - def constant_names(node) - current = node - names = [] - - while current.is_a?(ConstPathField) || current.is_a?(ConstPathRef) - names.unshift(current.constant.value.to_sym) - current = current.parent - end - - case current - when VarField, VarRef - names.unshift(current.value.value.to_sym) - when TopConstRef - names.unshift(current.constant.value.to_sym) - names.unshift(:"") - end - - names - end - - # For the most part when an OpAssign (operator assignment) node with a ||= - # operator is being compiled it's a matter of reading the target, checking - # if the value should be evaluated, evaluating it if so, and then writing - # the result back to the target. - # - # However, in certain kinds of assignments (X, ::X, X::Y, @@x, and $x) we - # first check if the value is defined using the defined instruction. I - # don't know why it is necessary, and suspect that it isn't. - def opassign_defined(node) - value_label = iseq.label - skip_value_label = iseq.label - - case node.target - when ConstPathField - visit(node.target.parent) - name = node.target.constant.value.to_sym - - iseq.dup - iseq.defined(Defined::TYPE_CONST_FROM, name, true) - when TopConstField - name = node.target.constant.value.to_sym - - iseq.putobject(Object) - iseq.dup - iseq.defined(Defined::TYPE_CONST_FROM, name, true) - when VarField - name = node.target.value.value.to_sym - iseq.putnil - - case node.target.value - when Const - iseq.defined(Defined::TYPE_CONST, name, true) - when CVar - iseq.defined(Defined::TYPE_CVAR, name, true) - when GVar - iseq.defined(Defined::TYPE_GVAR, name, true) - end - end - - iseq.branchunless(value_label) - - case node.target - when ConstPathField, TopConstField - iseq.dup - iseq.putobject(true) - iseq.getconstant(name) - when VarField - case node.target.value - when Const - iseq.opt_getconstant_path(constant_names(node.target)) - when CVar - iseq.getclassvariable(name) - when GVar - iseq.getglobal(name) - end - end - - iseq.dup - iseq.branchif(skip_value_label) - - iseq.pop - iseq.push(value_label) - visit(node.value) - - case node.target - when ConstPathField, TopConstField - iseq.dupn(2) - iseq.swap - iseq.setconstant(name) - when VarField - iseq.dup - - case node.target.value - when Const - iseq.putspecialobject(PutSpecialObject::OBJECT_CONST_BASE) - iseq.setconstant(name) - when CVar - iseq.setclassvariable(name) - when GVar - iseq.setglobal(name) - end - end - - iseq.push(skip_value_label) - end - - # Whenever a value is interpolated into a string-like structure, these - # three instructions are pushed. - def push_interpolate - iseq.dup - iseq.objtostring( - YARV.calldata( - :to_s, - 0, - CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE - ) - ) - iseq.anytostring - end - - # Visit a type of pattern in a pattern match. - def visit_pattern(node, end_label) - case node - when AryPtn - length_label = iseq.label - match_failure_label = iseq.label - match_error_label = iseq.label - - # If there's a constant, then check if we match against that constant - # or not first. Branch to failure if we don't. - if node.constant - iseq.dup - visit(node.constant) - iseq.checkmatch(CheckMatch::VM_CHECKMATCH_TYPE_CASE) - iseq.branchunless(match_failure_label) - end - - # First, check if the #deconstruct cache is nil. If it is, we're going - # to call #deconstruct on the object and cache the result. - iseq.topn(2) - deconstruct_label = iseq.label - iseq.branchnil(deconstruct_label) - - # Next, ensure that the cached value was cached correctly, otherwise - # fail the match. - iseq.topn(2) - iseq.branchunless(match_failure_label) - - # Since we have a valid cached value, we can skip past the part where - # we call #deconstruct on the object. - iseq.pop - iseq.topn(1) - iseq.jump(length_label) - - # Check if the object responds to #deconstruct, fail the match - # otherwise. - iseq.event(deconstruct_label) - iseq.dup - iseq.putobject(:deconstruct) - iseq.send(YARV.calldata(:respond_to?, 1)) - iseq.setn(3) - iseq.branchunless(match_failure_label) - - # Call #deconstruct and ensure that it's an array, raise an error - # otherwise. - iseq.send(YARV.calldata(:deconstruct)) - iseq.setn(2) - iseq.dup - iseq.checktype(CheckType::TYPE_ARRAY) - iseq.branchunless(match_error_label) - - # Ensure that the deconstructed array has the correct size, fail the - # match otherwise. - iseq.push(length_label) - iseq.dup - iseq.send(YARV.calldata(:length)) - iseq.putobject(node.requireds.length) - iseq.send(YARV.calldata(:==, 1)) - iseq.branchunless(match_failure_label) - - # For each required element, check if the deconstructed array contains - # the element, otherwise jump out to the top-level match failure. - iseq.dup - node.requireds.each_with_index do |required, index| - iseq.putobject(index) - iseq.send(YARV.calldata(:[], 1)) - - case required - when VarField - lookup = visit(required) - iseq.setlocal(lookup.index, lookup.level) - else - visit(required) - iseq.checkmatch(CheckMatch::VM_CHECKMATCH_TYPE_CASE) - iseq.branchunless(match_failure_label) - end - - if index < node.requireds.length - 1 - iseq.dup - else - iseq.pop - iseq.jump(end_label) - end - end - - # Set up the routine here to raise an error to indicate that the type - # of the deconstructed array was incorrect. - iseq.push(match_error_label) - iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) - iseq.putobject(TypeError) - iseq.putobject("deconstruct must return Array") - iseq.send(YARV.calldata(:"core#raise", 2)) - iseq.pop - - # Patch all of the match failures to jump here so that we pop a final - # value before returning to the parent node. - iseq.push(match_failure_label) - iseq.pop - when VarField - lookup = visit(node) - iseq.setlocal(lookup.index, lookup.level) - iseq.jump(end_label) - end - end - - # There are a lot of nodes in the AST that act as contains of parts of - # strings. This includes things like string literals, regular expressions, - # heredocs, etc. This method will visit all the parts of a string within - # those containers. - def visit_string_parts(node) - length = 0 - - unless node.parts.first.is_a?(TStringContent) - iseq.putobject("") - length += 1 - end - - node.parts.each do |part| - case part - when StringDVar - visit(part.variable) - push_interpolate - when StringEmbExpr - visit(part) - push_interpolate - when TStringContent - iseq.putobject(part.accept(RubyVisitor.new)) - end - - length += 1 - end - - length - end - - # The current instruction sequence that we're compiling is always stored - # on the compiler. When we descend into a node that has its own - # instruction sequence, this method can be called to temporarily set the - # new value of the instruction sequence, yield, and then set it back. - def with_child_iseq(child_iseq) - parent_iseq = iseq - - begin - @iseq = child_iseq - yield - child_iseq - ensure - @iseq = parent_iseq - end - end - - # When we're compiling the last statement of a set of statements within a - # scope, the instructions sometimes change from pops to leaves. These - # kinds of peephole optimizations can reduce the overall number of - # instructions. Therefore, we keep track of whether we're compiling the - # last statement of a scope and allow visit methods to query that - # information. - def with_last_statement - previous = @last_statement - @last_statement = true - - begin - yield - ensure - @last_statement = previous - end - end - - def last_statement? - @last_statement - end - - # OpAssign nodes can have a number of different kinds of nodes as their - # "target" (i.e., the left-hand side of the assignment). When compiling - # these nodes we typically need to first fetch the current value of the - # variable, then perform some kind of action, then store the result back - # into the variable. This method handles that by first fetching the value, - # then yielding to the block, then storing the result. - def with_opassign(node) - case node.target - when ARefField - iseq.putnil - visit(node.target.collection) - visit(node.target.index) - - iseq.dupn(2) - iseq.send(YARV.calldata(:[], 1)) - - yield - - iseq.setn(3) - iseq.send(YARV.calldata(:[]=, 2)) - iseq.pop - when ConstPathField - name = node.target.constant.value.to_sym - - visit(node.target.parent) - iseq.dup - iseq.putobject(true) - iseq.getconstant(name) - - yield - - if node.operator.value == "&&=" - iseq.dupn(2) - else - iseq.swap - iseq.topn(1) - end - - iseq.swap - iseq.setconstant(name) - when TopConstField - name = node.target.constant.value.to_sym - - iseq.putobject(Object) - iseq.dup - iseq.putobject(true) - iseq.getconstant(name) - - yield - - if node.operator.value == "&&=" - iseq.dupn(2) - else - iseq.swap - iseq.topn(1) - end - - iseq.swap - iseq.setconstant(name) - when VarField - case node.target.value - when Const - names = constant_names(node.target) - iseq.opt_getconstant_path(names) - - yield - - iseq.dup - iseq.putspecialobject(PutSpecialObject::OBJECT_CONST_BASE) - iseq.setconstant(names.last) - when CVar - name = node.target.value.value.to_sym - iseq.getclassvariable(name) - - yield - - iseq.dup - iseq.setclassvariable(name) - when GVar - name = node.target.value.value.to_sym - iseq.getglobal(name) - - yield - - iseq.dup - iseq.setglobal(name) - when Ident - local_variable = visit(node.target) - iseq.getlocal(local_variable.index, local_variable.level) - - yield - - iseq.dup - iseq.setlocal(local_variable.index, local_variable.level) - when IVar - name = node.target.value.value.to_sym - iseq.getinstancevariable(name) - - yield - - iseq.dup - iseq.setinstancevariable(name) - end - end - end - end - end -end diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb deleted file mode 100644 index 2829bb21..00000000 --- a/lib/syntax_tree/yarv/control_flow_graph.rb +++ /dev/null @@ -1,257 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - module YARV - # This class represents a control flow graph of a YARV instruction sequence. - # It constructs a graph of basic blocks that hold subsets of the list of - # instructions from the instruction sequence. - # - # You can use this class by calling the ::compile method and passing it a - # YARV instruction sequence. It will return a control flow graph object. - # - # iseq = RubyVM::InstructionSequence.compile("1 + 2") - # iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a) - # cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) - # - class ControlFlowGraph - # This class is responsible for creating a control flow graph from the - # given instruction sequence. - class Compiler - # This is the instruction sequence that is being compiled. - attr_reader :iseq - - # This is a hash of indices in the YARV instruction sequence that point - # to their corresponding instruction. - attr_reader :insns - - # This is a hash of labels that point to their corresponding index into - # the YARV instruction sequence. Note that this is not the same as the - # index into the list of instructions on the instruction sequence - # object. Instead, this is the index into the C array, so it includes - # operands. - attr_reader :labels - - def initialize(iseq) - @iseq = iseq - - @insns = {} - @labels = {} - - length = 0 - iseq.insns.each do |insn| - case insn - when Instruction - @insns[length] = insn - length += insn.length - when InstructionSequence::Label - @labels[insn] = length - end - end - end - - # This method is used to compile the instruction sequence into a control - # flow graph. It returns an instance of ControlFlowGraph. - def compile - blocks = build_basic_blocks - - connect_basic_blocks(blocks) - prune_basic_blocks(blocks) - - ControlFlowGraph.new(iseq, insns, blocks.values).tap(&:verify) - end - - private - - # Finds the indices of the instructions that start a basic block because - # they're either: - # - # * the start of an instruction sequence - # * the target of a branch - # * fallen through to from a branch - # - def find_basic_block_starts - block_starts = Set.new([0]) - - insns.each do |index, insn| - branch_targets = insn.branch_targets - - if branch_targets.any? - branch_targets.each do |branch_target| - block_starts.add(labels[branch_target]) - end - - block_starts.add(index + insn.length) if insn.falls_through? - end - end - - block_starts.to_a.sort - end - - # Builds up a set of basic blocks by iterating over the starts of each - # block. They are keyed by the index of their first instruction. - def build_basic_blocks - block_starts = find_basic_block_starts - - length = 0 - blocks = - iseq - .insns - .grep(Instruction) - .slice_after do |insn| - length += insn.length - block_starts.include?(length) - end - - block_starts - .zip(blocks) - .to_h do |block_start, insns| - # It's possible that we have not detected a block start but still - # have branching instructions inside of a basic block. This can - # happen if you have an unconditional jump which is followed by - # instructions that are unreachable. As of Ruby 3.2, this is - # possible with something as simple as "1 => a". In this case we - # can discard all instructions that follow branching instructions. - block_insns = - insns.slice_after { |insn| insn.branch_targets.any? }.first - - [block_start, BasicBlock.new(block_start, block_insns)] - end - end - - # Connect the blocks by letting them know which blocks are incoming and - # outgoing from each block. - def connect_basic_blocks(blocks) - blocks.each do |block_start, block| - insn = block.insns.last - - insn.branch_targets.each do |branch_target| - block.outgoing_blocks << blocks.fetch(labels[branch_target]) - end - - if (insn.branch_targets.empty? && !insn.leaves?) || - insn.falls_through? - fall_through_start = block_start + block.insns.sum(&:length) - block.outgoing_blocks << blocks.fetch(fall_through_start) - end - - block.outgoing_blocks.each do |outgoing_block| - outgoing_block.incoming_blocks << block - end - end - end - - # If there are blocks that are unreachable, we can remove them from the - # graph entirely at this point. - def prune_basic_blocks(blocks) - visited = Set.new - queue = [blocks.fetch(0)] - - until queue.empty? - current_block = queue.shift - next if visited.include?(current_block) - - visited << current_block - queue.concat(current_block.outgoing_blocks) - end - - blocks.select! { |_, block| visited.include?(block) } - end - end - - # This is the instruction sequence that this control flow graph - # corresponds to. - attr_reader :iseq - - # This is the list of instructions that this control flow graph contains. - # It is effectively the same as the list of instructions in the - # instruction sequence but with line numbers and events filtered out. - attr_reader :insns - - # This is the set of basic blocks that this control-flow graph contains. - attr_reader :blocks - - def initialize(iseq, insns, blocks) - @iseq = iseq - @insns = insns - @blocks = blocks - end - - def disasm - fmt = Disassembler.new(iseq) - fmt.puts("== cfg: #{iseq.inspect}") - - blocks.each do |block| - fmt.puts(block.id) - fmt.with_prefix(" ") do |prefix| - unless block.incoming_blocks.empty? - from = block.incoming_blocks.map(&:id) - fmt.puts("#{prefix}== from: #{from.join(", ")}") - end - - fmt.format_insns!(block.insns, block.block_start) - - to = block.outgoing_blocks.map(&:id) - to << "leaves" if block.insns.last.leaves? - fmt.puts("#{prefix}== to: #{to.join(", ")}") - end - end - - fmt.string - end - - def to_dfg - DataFlowGraph.compile(self) - end - - def to_son - to_dfg.to_son - end - - def to_mermaid - Mermaid.flowchart do |flowchart| - disasm = Disassembler::Squished.new - - blocks.each do |block| - flowchart.subgraph(block.id) do - previous = nil - - block.each_with_length do |insn, length| - node = - flowchart.node( - "node_#{length}", - "%04d %s" % [length, insn.disasm(disasm)] - ) - - flowchart.link(previous, node) if previous - previous = node - end - end - end - - blocks.each do |block| - block.outgoing_blocks.each do |outgoing| - offset = - block.block_start + block.insns.sum(&:length) - - block.insns.last.length - - from = flowchart.fetch("node_#{offset}") - to = flowchart.fetch("node_#{outgoing.block_start}") - flowchart.link(from, to) - end - end - end - end - - # This method is used to verify that the control flow graph is well - # formed. It does this by checking that each basic block is itself well - # formed. - def verify - blocks.each(&:verify) - end - - def self.compile(iseq) - Compiler.new(iseq).compile - end - end - end -end diff --git a/lib/syntax_tree/yarv/data_flow_graph.rb b/lib/syntax_tree/yarv/data_flow_graph.rb deleted file mode 100644 index aedee9ba..00000000 --- a/lib/syntax_tree/yarv/data_flow_graph.rb +++ /dev/null @@ -1,338 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - module YARV - # Constructs a data-flow-graph of a YARV instruction sequence, via a - # control-flow-graph. Data flow is discovered locally and then globally. The - # graph only considers data flow through the stack - local variables and - # objects are considered fully escaped in this analysis. - # - # You can use this class by calling the ::compile method and passing it a - # control flow graph. It will return a data flow graph object. - # - # iseq = RubyVM::InstructionSequence.compile("1 + 2") - # iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a) - # cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) - # dfg = SyntaxTree::YARV::DataFlowGraph.compile(cfg) - # - class DataFlowGraph - # This object represents the flow of data between instructions. - class DataFlow - attr_reader :in - attr_reader :out - - def initialize - @in = [] - @out = [] - end - end - - # This represents an object that goes on the stack that is passed between - # basic blocks. - class BlockArgument - attr_reader :name - - def initialize(name) - @name = name - end - - def local? - false - end - - def to_str - name.to_s - end - end - - # This represents an object that goes on the stack that is passed between - # instructions within a basic block. - class LocalArgument - attr_reader :name, :length - - def initialize(length) - @length = length - end - - def local? - true - end - - def to_str - length.to_s - end - end - - attr_reader :cfg, :insn_flows, :block_flows - - def initialize(cfg, insn_flows, block_flows) - @cfg = cfg - @insn_flows = insn_flows - @block_flows = block_flows - end - - def blocks - cfg.blocks - end - - def disasm - fmt = Disassembler.new(cfg.iseq) - fmt.puts("== dfg: #{cfg.iseq.inspect}") - - blocks.each do |block| - fmt.puts(block.id) - fmt.with_prefix(" ") do |prefix| - unless block.incoming_blocks.empty? - from = block.incoming_blocks.map(&:id) - fmt.puts("#{prefix}== from: #{from.join(", ")}") - end - - block_flow = block_flows.fetch(block.id) - unless block_flow.in.empty? - fmt.puts("#{prefix}== in: #{block_flow.in.join(", ")}") - end - - fmt.format_insns!(block.insns, block.block_start) do |_, length| - insn_flow = insn_flows[length] - next if insn_flow.in.empty? && insn_flow.out.empty? - - fmt.print(" # ") - unless insn_flow.in.empty? - fmt.print("in: #{insn_flow.in.join(", ")}") - fmt.print("; ") unless insn_flow.out.empty? - end - - unless insn_flow.out.empty? - fmt.print("out: #{insn_flow.out.join(", ")}") - end - end - - to = block.outgoing_blocks.map(&:id) - to << "leaves" if block.insns.last.leaves? - fmt.puts("#{prefix}== to: #{to.join(", ")}") - - unless block_flow.out.empty? - fmt.puts("#{prefix}== out: #{block_flow.out.join(", ")}") - end - end - end - - fmt.string - end - - def to_son - SeaOfNodes.compile(self) - end - - def to_mermaid - Mermaid.flowchart do |flowchart| - disasm = Disassembler::Squished.new - - blocks.each do |block| - block_flow = block_flows.fetch(block.id) - graph_name = - if block_flow.in.any? - "#{block.id} #{block_flows[block.id].in.join(", ")}" - else - block.id - end - - flowchart.subgraph(graph_name) do - previous = nil - - block.each_with_length do |insn, length| - node = - flowchart.node( - "node_#{length}", - "%04d %s" % [length, insn.disasm(disasm)], - shape: :rounded - ) - - flowchart.link(previous, node, color: :red) if previous - insn_flows[length].in.each do |input| - if input.is_a?(LocalArgument) - from = flowchart.fetch("node_#{input.length}") - flowchart.link(from, node, color: :green) - end - end - - previous = node - end - end - end - - blocks.each do |block| - block.outgoing_blocks.each do |outgoing| - offset = - block.block_start + block.insns.sum(&:length) - - block.insns.last.length - - from = flowchart.fetch("node_#{offset}") - to = flowchart.fetch("node_#{outgoing.block_start}") - flowchart.link(from, to, color: :red) - end - end - end - end - - # Verify that we constructed the data flow graph correctly. - def verify - # Check that the first block has no arguments. - raise unless block_flows.fetch(blocks.first.id).in.empty? - - # Check all control flow edges between blocks pass the right number of - # arguments. - blocks.each do |block| - block_flow = block_flows.fetch(block.id) - - if block.outgoing_blocks.empty? - # With no outgoing blocks, there should be no output arguments. - raise unless block_flow.out.empty? - else - # Check with outgoing blocks... - block.outgoing_blocks.each do |outgoing_block| - outgoing_flow = block_flows.fetch(outgoing_block.id) - - # The block should have as many output arguments as the - # outgoing block has input arguments. - raise unless block_flow.out.size == outgoing_flow.in.size - end - end - end - end - - def self.compile(cfg) - Compiler.new(cfg).compile - end - - # This class is responsible for creating a data flow graph from the given - # control flow graph. - class Compiler - # This is the control flow graph that is being compiled. - attr_reader :cfg - - # This data structure will hold the data flow between instructions - # within individual basic blocks. - attr_reader :insn_flows - - # This data structure will hold the data flow between basic blocks. - attr_reader :block_flows - - def initialize(cfg) - @cfg = cfg - @insn_flows = cfg.insns.to_h { |length, _| [length, DataFlow.new] } - @block_flows = cfg.blocks.to_h { |block| [block.id, DataFlow.new] } - end - - def compile - find_internal_flow - find_external_flow - DataFlowGraph.new(cfg, insn_flows, block_flows).tap(&:verify) - end - - private - - # Find the data flow within each basic block. Using an abstract stack, - # connect from consumers of data to the producers of that data. - def find_internal_flow - cfg.blocks.each do |block| - block_flow = block_flows.fetch(block.id) - stack = [] - - # Go through each instruction in the block. - block.each_with_length do |insn, length| - insn_flow = insn_flows[length] - - # How many values will be missing from the local stack to run this - # instruction? This will be used to determine if the values that - # are being used by this instruction are coming from previous - # instructions or from previous basic blocks. - missing = insn.pops - stack.size - - # For every value the instruction pops off the stack. - insn.pops.times do - # Was the value it pops off from another basic block? - if stack.empty? - # If the stack is empty, then there aren't enough values being - # pushed from previous instructions to fulfill the needs of - # this instruction. In that case the values must be coming - # from previous basic blocks. - missing -= 1 - argument = BlockArgument.new(:"in_#{missing}") - - insn_flow.in.unshift(argument) - block_flow.in.unshift(argument) - else - # Since there are values in the stack, we can connect this - # consumer to the producer of the value. - insn_flow.in.unshift(stack.pop) - end - end - - # Record on our abstract stack that this instruction pushed - # this value onto the stack. - insn.pushes.times { stack << LocalArgument.new(length) } - end - - # Values that are left on the stack after going through all - # instructions are arguments to the basic block that we jump to. - stack.reverse_each.with_index do |producer, index| - block_flow.out << producer - - argument = BlockArgument.new(:"out_#{index}") - insn_flows[producer.length].out << argument - end - end - - # Go backwards and connect from producers to consumers. - cfg.insns.each_key do |length| - # For every instruction that produced a value used in this - # instruction... - insn_flows[length].in.each do |producer| - # If it's actually another instruction and not a basic block - # argument... - if producer.is_a?(LocalArgument) - # Record in the producing instruction that it produces a value - # used by this construction. - insn_flows[producer.length].out << LocalArgument.new(length) - end - end - end - end - - # Find the data that flows between basic blocks. - def find_external_flow - stack = [*cfg.blocks] - - until stack.empty? - block = stack.pop - block_flow = block_flows.fetch(block.id) - - block.incoming_blocks.each do |incoming_block| - incoming_flow = block_flows.fetch(incoming_block.id) - - # Does a predecessor block have fewer outputs than the successor - # has inputs? - if incoming_flow.out.size < block_flow.in.size - # If so then add arguments to pass data through from the - # incoming block's incoming blocks. - (block_flow.in.size - incoming_flow.out.size).times do |index| - name = BlockArgument.new(:"pass_#{index}") - - incoming_flow.in.unshift(name) - incoming_flow.out.unshift(name) - end - - # Since we modified the incoming block, add it back to the stack - # so it'll be considered as an outgoing block again, and - # propogate the external data flow back up the control flow - # graph. - stack << incoming_block - end - end - end - end - end - end - end -end diff --git a/lib/syntax_tree/yarv/decompiler.rb b/lib/syntax_tree/yarv/decompiler.rb deleted file mode 100644 index 6a2cddbd..00000000 --- a/lib/syntax_tree/yarv/decompiler.rb +++ /dev/null @@ -1,263 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - module YARV - # This class is responsible for taking a compiled instruction sequence and - # walking through it to generate equivalent Ruby code. - class Decompiler - # When we're decompiling, we use a looped case statement to emulate - # jumping around in the same way the virtual machine would. This class - # provides convenience methods for generating the AST nodes that have to - # do with that label. - class BlockLabel - include DSL - attr_reader :name - - def initialize(name) - @name = name - end - - def field - VarField(Ident(name)) - end - - def ref - VarRef(Ident(name)) - end - end - - include DSL - attr_reader :iseq, :block_label - - def initialize(iseq) - @iseq = iseq - @block_label = BlockLabel.new("__block_label") - end - - def to_ruby - Program(decompile(iseq)) - end - - private - - def node_for(value) - case value - when Integer - Int(value.to_s) - when Symbol - SymbolLiteral(Ident(value.name)) - end - end - - def decompile(iseq) - label = :label_0 - clauses = {} - clause = [] - - iseq.insns.each do |insn| - case insn - when InstructionSequence::Label - unless clause.last.is_a?(Next) - clause << Assign(block_label.field, node_for(insn.name)) - end - - clauses[label] = clause - clause = [] - label = insn.name - when BranchIf - body = [ - Assign(block_label.field, node_for(insn.label.name)), - Next(Args([])) - ] - - clause << UnlessNode(clause.pop, Statements(body), nil) - when BranchUnless - body = [ - Assign(block_label.field, node_for(insn.label.name)), - Next(Args([])) - ] - - clause << IfNode(clause.pop, Statements(body), nil) - when Dup - clause << clause.last - when DupHash - assocs = - insn.object.map do |key, value| - Assoc(node_for(key), node_for(value)) - end - - clause << HashLiteral(LBrace("{"), assocs) - when GetGlobal - clause << VarRef(GVar(insn.name.name)) - when GetLocalWC0 - local = iseq.local_table.locals[insn.index] - clause << VarRef(Ident(local.name.name)) - when Jump - clause << Assign(block_label.field, node_for(insn.label.name)) - clause << Next(Args([])) - when Leave - value = Args([clause.pop]) - clause << (iseq.type != :top ? Break(value) : ReturnNode(value)) - when OptAnd, OptDiv, OptEq, OptGE, OptGT, OptLE, OptLT, OptLTLT, - OptMinus, OptMod, OptMult, OptOr, OptPlus - left, right = clause.pop(2) - clause << Binary(left, insn.calldata.method, right) - when OptAref - collection, arg = clause.pop(2) - clause << ARef(collection, Args([arg])) - when OptAset - collection, arg, value = clause.pop(3) - - clause << if value.is_a?(Binary) && value.left.is_a?(ARef) && - collection === value.left.collection && - arg === value.left.index.parts[0] - OpAssign( - ARefField(collection, Args([arg])), - Op("#{value.operator}="), - value.right - ) - else - Assign(ARefField(collection, Args([arg])), value) - end - when OptNEq - left, right = clause.pop(2) - clause << Binary(left, :"!=", right) - when OptSendWithoutBlock - method = insn.calldata.method.name - argc = insn.calldata.argc - - if insn.calldata.flag?(CallData::CALL_FCALL) - if argc == 0 - clause.pop - clause << CallNode(nil, nil, Ident(method), Args([])) - elsif argc == 1 && method.end_with?("=") - _receiver, argument = clause.pop(2) - clause << Assign( - CallNode(nil, nil, Ident(method[0..-2]), nil), - argument - ) - else - _receiver, *arguments = clause.pop(argc + 1) - clause << CallNode( - nil, - nil, - Ident(method), - ArgParen(Args(arguments)) - ) - end - else - if argc == 0 - clause << CallNode(clause.pop, Period("."), Ident(method), nil) - elsif argc == 1 && method.end_with?("=") - receiver, argument = clause.pop(2) - clause << Assign( - Field(receiver, Period("."), Ident(method[0..-2])), - argument - ) - else - receiver, *arguments = clause.pop(argc + 1) - clause << CallNode( - receiver, - Period("."), - Ident(method), - ArgParen(Args(arguments)) - ) - end - end - when Pop - # skip - when PutObject - case insn.object - when Float - clause << FloatLiteral(insn.object.inspect) - when Integer - clause << Int(insn.object.inspect) - else - raise "Unknown object type: #{insn.object.class.name}" - end - when PutObjectInt2Fix0 - clause << Int("0") - when PutObjectInt2Fix1 - clause << Int("1") - when PutSelf - clause << VarRef(Kw("self")) - when SetGlobal - target = GVar(insn.name.name) - value = clause.pop - - clause << if value.is_a?(Binary) && VarRef(target) === value.left - OpAssign(VarField(target), Op("#{value.operator}="), value.right) - else - Assign(VarField(target), value) - end - when SetLocalWC0 - target = Ident(local_name(insn.index, 0)) - value = clause.pop - - clause << if value.is_a?(Binary) && VarRef(target) === value.left - OpAssign(VarField(target), Op("#{value.operator}="), value.right) - else - Assign(VarField(target), value) - end - else - raise "Unknown instruction #{insn}" - end - end - - # If there's only one clause, then we don't need a case statement, and - # we can just disassemble the first clause. - clauses[label] = clause - return Statements(clauses.values.first) if clauses.size == 1 - - # Here we're going to build up a big case statement that will handle all - # of the different labels. - current = nil - clauses.reverse_each do |current_label, current_clause| - current = - When( - Args([node_for(current_label)]), - Statements(current_clause), - current - ) - end - switch = Case(Kw("case"), block_label.ref, current) - - # Here we're going to make sure that any locals that were established in - # the label_0 block are initialized so that scoping rules work - # correctly. - stack = [] - locals = [block_label.name] - - clauses[:label_0].each do |node| - if node.is_a?(Assign) && node.target.is_a?(VarField) && - node.target.value.is_a?(Ident) - value = node.target.value.value - next if locals.include?(value) - - stack << Assign(node.target, VarRef(Kw("nil"))) - locals << value - end - end - - # Finally, we'll set up the initial label and loop the entire case - # statement. - stack << Assign(block_label.field, node_for(:label_0)) - stack << MethodAddBlock( - CallNode(nil, nil, Ident("loop"), Args([])), - BlockNode( - Kw("do"), - nil, - BodyStmt(Statements([switch]), nil, nil, nil, nil) - ) - ) - Statements(stack) - end - - def local_name(index, level) - current = iseq - level.times { current = current.parent_iseq } - current.local_table.locals[index].name.name - end - end - end -end diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb deleted file mode 100644 index dac220fd..00000000 --- a/lib/syntax_tree/yarv/disassembler.rb +++ /dev/null @@ -1,236 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - module YARV - class Disassembler - # This class is another object that handles disassembling a YARV - # instruction sequence but it renders it without any of the extra spacing - # or alignment. - class Squished - def calldata(value) - value.inspect - end - - def enqueue(iseq) - end - - def event(name) - end - - def inline_storage(cache) - "" - end - - def instruction(name, operands = []) - operands.empty? ? name : "#{name} #{operands.join(", ")}" - end - - def label(value) - "%04d" % value.name["label_".length..] - end - - def local(index, **) - index.inspect - end - - def object(value) - value.inspect - end - end - - attr_reader :output, :queue - - attr_reader :current_prefix - attr_accessor :current_iseq - - def initialize(current_iseq = nil) - @output = StringIO.new - @queue = [] - - @current_prefix = "" - @current_iseq = current_iseq - end - - ######################################################################## - # Helpers for various instructions - ######################################################################## - - def calldata(value) - value.inspect - end - - def enqueue(iseq) - queue << iseq - end - - def event(name) - case name - when :RUBY_EVENT_B_CALL - "Bc" - when :RUBY_EVENT_B_RETURN - "Br" - when :RUBY_EVENT_CALL - "Ca" - when :RUBY_EVENT_CLASS - "Cl" - when :RUBY_EVENT_END - "En" - when :RUBY_EVENT_LINE - "Li" - when :RUBY_EVENT_RETURN - "Re" - else - raise "Unknown event: #{name}" - end - end - - def inline_storage(cache) - "" - end - - def instruction(name, operands = []) - operands.empty? ? name : "%-38s %s" % [name, operands.join(", ")] - end - - def label(value) - value.name["label_".length..] - end - - def local(index, explicit: nil, implicit: nil) - current = current_iseq - (explicit || implicit).times { current = current.parent_iseq } - - value = "#{current.local_table.name_at(index)}@#{index}" - value << ", #{explicit}" if explicit - value - end - - def object(value) - value.inspect - end - - ######################################################################## - # Entrypoints - ######################################################################## - - def format! - while (@current_iseq = queue.shift) - output << "\n" if output.pos > 0 - format_iseq(@current_iseq) - end - end - - def format_insns!(insns, length = 0) - events = [] - lines = [] - - insns.each do |insn| - case insn - when Integer - lines << insn - when Symbol - events << event(insn) - when InstructionSequence::Label - # skip - else - output << "#{current_prefix}%04d " % length - - disasm = insn.disasm(self) - output << disasm - - if lines.any? - output << " " * (65 - disasm.length) if disasm.length < 65 - elsif events.any? - output << " " * (39 - disasm.length) if disasm.length < 39 - end - - if lines.any? - output << "(%4d)" % lines.last - lines.clear - end - - if events.any? - output << "[#{events.join}]" - events.clear - end - - # A hook here to allow for custom formatting of instructions after - # the main body has been processed. - yield insn, length if block_given? - - output << "\n" - length += insn.length - end - end - end - - def print(string) - output.print(string) - end - - def puts(string) - output.puts(string) - end - - def string - output.string - end - - def with_prefix(value) - previous = @current_prefix - - begin - @current_prefix = value - yield value - ensure - @current_prefix = previous - end - end - - private - - def format_iseq(iseq) - output << "#{current_prefix}== disasm: #{iseq.inspect} " - - if iseq.catch_table.any? - output << "(catch: TRUE)\n" - output << "#{current_prefix}== catch table\n" - - with_prefix("#{current_prefix}| ") do - iseq.catch_table.each do |entry| - case entry - when InstructionSequence::CatchBreak - output << "#{current_prefix}catch type: break\n" - format_iseq(entry.iseq) - when InstructionSequence::CatchNext - output << "#{current_prefix}catch type: next\n" - when InstructionSequence::CatchRedo - output << "#{current_prefix}catch type: redo\n" - when InstructionSequence::CatchRescue - output << "#{current_prefix}catch type: rescue\n" - format_iseq(entry.iseq) - end - end - end - - output << "#{current_prefix}|#{"-" * 72}\n" - else - output << "(catch: FALSE)\n" - end - - if (local_table = iseq.local_table) && !local_table.empty? - output << "#{current_prefix}local table (size: #{local_table.size})\n" - - locals = - local_table.locals.each_with_index.map do |local, index| - "[%2d] %s@%d" % [local_table.offset(index), local.name, index] - end - - output << "#{current_prefix}#{locals.join(" ")}\n" - end - - format_insns!(iseq.insns) - end - end - end -end diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb deleted file mode 100644 index 4f2e0d9a..00000000 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ /dev/null @@ -1,1357 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - # This module provides an object representation of the YARV bytecode. - module YARV - # This class is meant to mirror RubyVM::InstructionSequence. It contains a - # list of instructions along with the metadata pertaining to them. It also - # functions as a builder for the instruction sequence. - class InstructionSequence - # This provides a handle to the rb_iseq_load function, which allows you - # to pass a serialized iseq to Ruby and have it return a - # RubyVM::InstructionSequence object. - def self.iseq_load(iseq) - require "fiddle" - - @iseq_load_function ||= - Fiddle::Function.new( - Fiddle::Handle::DEFAULT["rb_iseq_load"], - [Fiddle::TYPE_VOIDP] * 3, - Fiddle::TYPE_VOIDP - ) - - Fiddle.dlunwrap(@iseq_load_function.call(Fiddle.dlwrap(iseq), 0, nil)) - rescue LoadError - raise "Could not load the Fiddle library" - rescue NameError - raise "Unable to find rb_iseq_load" - rescue Fiddle::DLError - raise "Unable to perform a dynamic load" - end - - # When the list of instructions is first being created, it's stored as a - # linked list. This is to make it easier to perform peephole optimizations - # and other transformations like instruction specialization. - class InstructionList - class Node - attr_accessor :value, :next_node - - def initialize(value, next_node = nil) - @value = value - @next_node = next_node - end - end - - include Enumerable - attr_reader :head_node, :tail_node - - def initialize - @head_node = nil - @tail_node = nil - end - - def each(&_blk) - return to_enum(__method__) unless block_given? - each_node { |node| yield node.value } - end - - def each_node - return to_enum(__method__) unless block_given? - node = head_node - - while node - yield node, node.value - node = node.next_node - end - end - - def push(instruction) - node = Node.new(instruction) - - if head_node.nil? - @head_node = node - @tail_node = node - else - @tail_node.next_node = node - @tail_node = node - end - - node - end - end - - MAGIC = "YARVInstructionSequence/SimpleDataFormat" - - # This object is used to track the size of the stack at any given time. It - # is effectively a mini symbolic interpreter. It's necessary because when - # instruction sequences get serialized they include a :stack_max field on - # them. This field is used to determine how much stack space to allocate - # for the instruction sequence. - class Stack - attr_reader :current_size, :maximum_size - - def initialize - @current_size = 0 - @maximum_size = 0 - end - - def change_by(value) - @current_size += value - @maximum_size = @current_size if @current_size > @maximum_size - end - end - - # This represents the destination of instructions that jump. Initially it - # does not track its position so that when we perform optimizations the - # indices don't get messed up. - class Label - attr_reader :name - - # When we're serializing the instruction sequence, we need to be able to - # look up the label from the branch instructions and then access the - # subsequent node. So we'll store the reference here. - attr_accessor :node - - def initialize(name = nil) - @name = name - end - - def patch!(name) - @name = name - end - - def inspect - name.inspect - end - end - - # The name of the instruction sequence. - attr_reader :name - - # The source location of the instruction sequence. - attr_reader :file, :line - - # The type of the instruction sequence. - attr_reader :type - - # The parent instruction sequence, if there is one. - attr_reader :parent_iseq - - # This is the list of information about the arguments to this - # instruction sequence. - attr_accessor :argument_size - attr_reader :argument_options - - # The catch table for this instruction sequence. - attr_reader :catch_table - - # The list of instructions for this instruction sequence. - attr_reader :insns - - # The table of local variables. - attr_reader :local_table - - # The hash of names of instance and class variables pointing to the - # index of their associated inline storage. - attr_reader :inline_storages - - # The index of the next inline storage that will be created. - attr_reader :storage_index - - # An object that will track the current size of the stack and the - # maximum size of the stack for this instruction sequence. - attr_reader :stack - - # These are various compilation options provided. - attr_reader :options - - def initialize( - name, - file, - line, - type, - parent_iseq = nil, - options = Compiler::Options.new - ) - @name = name - @file = file - @line = line - @type = type - @parent_iseq = parent_iseq - - @argument_size = 0 - @argument_options = {} - @catch_table = [] - - @local_table = LocalTable.new - @inline_storages = {} - @insns = InstructionList.new - @storage_index = 0 - @stack = Stack.new - - @options = options - end - - ########################################################################## - # Query methods - ########################################################################## - - def local_variable(name, level = 0) - if (lookup = local_table.find(name, level)) - lookup - elsif parent_iseq - parent_iseq.local_variable(name, level + 1) - end - end - - def inline_storage - storage = storage_index - @storage_index += 1 - storage - end - - def inline_storage_for(name) - inline_storages[name] = inline_storage unless inline_storages.key?(name) - - inline_storages[name] - end - - def length - insns - .each - .inject(0) do |sum, insn| - case insn - when Integer, Label, Symbol - sum - else - sum + insn.length - end - end - end - - def eval - InstructionSequence.iseq_load(to_a).eval - end - - def to_a - versions = RUBY_VERSION.split(".").map(&:to_i) - - # Dump all of the instructions into a flat list. - dumped = - insns.map do |insn| - case insn - when Integer, Symbol - insn - when Label - insn.name - else - insn.to_a(self) - end - end - - dumped_options = argument_options.dup - dumped_options[:opt].map!(&:name) if dumped_options[:opt] - - metadata = { - arg_size: argument_size, - local_size: local_table.size, - stack_max: stack.maximum_size, - node_id: -1, - node_ids: [-1] * insns.length - } - - metadata[:parser] = :prism if RUBY_VERSION >= "3.3" - - # Next, return the instruction sequence as an array. - [ - MAGIC, - versions[0], - versions[1], - 1, - metadata, - name, - file, - "", - line, - type, - local_table.names, - dumped_options, - catch_table.map(&:to_a), - dumped - ] - end - - def to_cfg - ControlFlowGraph.compile(self) - end - - def to_dfg - to_cfg.to_dfg - end - - def to_son - to_dfg.to_son - end - - def disasm - fmt = Disassembler.new - fmt.enqueue(self) - fmt.format! - fmt.string - end - - def inspect - "#:1 (#{line},0)-(#{line},0)>" - end - - # This method converts our linked list of instructions into a final array - # and performs any other compilation steps necessary. - def compile! - specialize_instructions! if options.specialized_instruction? - - catch_table.each do |catch_entry| - if !catch_entry.is_a?(CatchBreak) && catch_entry.iseq - catch_entry.iseq.compile! - end - end - - length = 0 - insns.each do |insn| - case insn - when Integer, Symbol - # skip - when Label - insn.patch!(:"label_#{length}") - when DefineClass - insn.class_iseq.compile! - length += insn.length - when DefineMethod, DefineSMethod - insn.method_iseq.compile! - length += insn.length - when InvokeSuper, Send - insn.block_iseq.compile! if insn.block_iseq - length += insn.length - when Once - insn.iseq.compile! - length += insn.length - else - length += insn.length - end - end - - @insns = insns.to_a - end - - def specialize_instructions! - insns.each_node do |node, value| - case value - when NewArray - next unless node.next_node - - next_node = node.next_node - next unless next_node.value.is_a?(Send) - next if next_node.value.block_iseq - - calldata = next_node.value.calldata - next unless calldata.flags == CallData::CALL_ARGS_SIMPLE - next unless calldata.argc == 0 - - case calldata.method - when :min - node.value = - if RUBY_VERSION < "3.3" - Legacy::OptNewArrayMin.new(value.number) - else - OptNewArraySend.new(value.number, :min) - end - - node.next_node = next_node.next_node - when :max - node.value = - if RUBY_VERSION < "3.3" - Legacy::OptNewArrayMax.new(value.number) - else - OptNewArraySend.new(value.number, :max) - end - - node.next_node = next_node.next_node - when :hash - next if RUBY_VERSION < "3.3" - node.value = OptNewArraySend.new(value.number, :hash) - node.next_node = next_node.next_node - end - when PutObject, PutString - next unless node.next_node - next if value.is_a?(PutObject) && !value.object.is_a?(String) - - next_node = node.next_node - next unless next_node.value.is_a?(Send) - next if next_node.value.block_iseq - - calldata = next_node.value.calldata - next unless calldata.flags == CallData::CALL_ARGS_SIMPLE - next unless calldata.argc == 0 - - case calldata.method - when :freeze - node.value = OptStrFreeze.new(value.object, calldata) - node.next_node = next_node.next_node - when :-@ - node.value = OptStrUMinus.new(value.object, calldata) - node.next_node = next_node.next_node - end - when Send - calldata = value.calldata - - if !value.block_iseq && - !calldata.flag?(CallData::CALL_ARGS_BLOCKARG) - # Specialize the send instruction. If it doesn't have a block - # attached, then we will replace it with an opt_send_without_block - # and do further specializations based on the called method and - # the number of arguments. - node.value = - case [calldata.method, calldata.argc] - when [:length, 0] - OptLength.new(calldata) - when [:size, 0] - OptSize.new(calldata) - when [:empty?, 0] - OptEmptyP.new(calldata) - when [:nil?, 0] - OptNilP.new(calldata) - when [:succ, 0] - OptSucc.new(calldata) - when [:!, 0] - OptNot.new(calldata) - when [:+, 1] - OptPlus.new(calldata) - when [:-, 1] - OptMinus.new(calldata) - when [:*, 1] - OptMult.new(calldata) - when [:/, 1] - OptDiv.new(calldata) - when [:%, 1] - OptMod.new(calldata) - when [:==, 1] - OptEq.new(calldata) - when [:!=, 1] - OptNEq.new(YARV.calldata(:==, 1), calldata) - when [:=~, 1] - OptRegExpMatch2.new(calldata) - when [:<, 1] - OptLT.new(calldata) - when [:<=, 1] - OptLE.new(calldata) - when [:>, 1] - OptGT.new(calldata) - when [:>=, 1] - OptGE.new(calldata) - when [:<<, 1] - OptLTLT.new(calldata) - when [:[], 1] - OptAref.new(calldata) - when [:&, 1] - OptAnd.new(calldata) - when [:|, 1] - OptOr.new(calldata) - when [:[]=, 2] - OptAset.new(calldata) - else - OptSendWithoutBlock.new(calldata) - end - end - end - end - end - - ########################################################################## - # Child instruction sequence methods - ########################################################################## - - def child_iseq(name, line, type) - InstructionSequence.new(name, file, line, type, self, options) - end - - def block_child_iseq(line) - current = self - current = current.parent_iseq while current.type == :block - child_iseq("block in #{current.name}", line, :block) - end - - def class_child_iseq(name, line) - child_iseq("", line, :class) - end - - def method_child_iseq(name, line) - child_iseq(name, line, :method) - end - - def module_child_iseq(name, line) - child_iseq("", line, :class) - end - - def singleton_class_child_iseq(line) - child_iseq("singleton class", line, :class) - end - - ########################################################################## - # Catch table methods - ########################################################################## - - class CatchEntry - attr_reader :iseq, :begin_label, :end_label, :exit_label, :restore_sp - - def initialize(iseq, begin_label, end_label, exit_label, restore_sp) - @iseq = iseq - @begin_label = begin_label - @end_label = end_label - @exit_label = exit_label - @restore_sp = restore_sp - end - end - - class CatchBreak < CatchEntry - def to_a - [ - :break, - iseq.to_a, - begin_label.name, - end_label.name, - exit_label.name, - restore_sp - ] - end - end - - class CatchEnsure < CatchEntry - def to_a - [ - :ensure, - iseq.to_a, - begin_label.name, - end_label.name, - exit_label.name - ] - end - end - - class CatchNext < CatchEntry - def to_a - [:next, nil, begin_label.name, end_label.name, exit_label.name] - end - end - - class CatchRedo < CatchEntry - def to_a - [:redo, nil, begin_label.name, end_label.name, exit_label.name] - end - end - - class CatchRescue < CatchEntry - def to_a - [ - :rescue, - iseq.to_a, - begin_label.name, - end_label.name, - exit_label.name - ] - end - end - - class CatchRetry < CatchEntry - def to_a - [:retry, nil, begin_label.name, end_label.name, exit_label.name] - end - end - - def catch_break(iseq, begin_label, end_label, exit_label, restore_sp) - catch_table << CatchBreak.new( - iseq, - begin_label, - end_label, - exit_label, - restore_sp - ) - end - - def catch_ensure(iseq, begin_label, end_label, exit_label, restore_sp) - catch_table << CatchEnsure.new( - iseq, - begin_label, - end_label, - exit_label, - restore_sp - ) - end - - def catch_next(begin_label, end_label, exit_label, restore_sp) - catch_table << CatchNext.new( - nil, - begin_label, - end_label, - exit_label, - restore_sp - ) - end - - def catch_redo(begin_label, end_label, exit_label, restore_sp) - catch_table << CatchRedo.new( - nil, - begin_label, - end_label, - exit_label, - restore_sp - ) - end - - def catch_rescue(iseq, begin_label, end_label, exit_label, restore_sp) - catch_table << CatchRescue.new( - iseq, - begin_label, - end_label, - exit_label, - restore_sp - ) - end - - def catch_retry(begin_label, end_label, exit_label, restore_sp) - catch_table << CatchRetry.new( - nil, - begin_label, - end_label, - exit_label, - restore_sp - ) - end - - ########################################################################## - # Instruction push methods - ########################################################################## - - def label - Label.new - end - - def push(value) - node = insns.push(value) - - case value - when Array, Integer, Symbol - value - when Label - value.node = node - value - else - stack.change_by(-value.pops + value.pushes) - value - end - end - - def event(name) - push(name) - end - - def adjuststack(number) - push(AdjustStack.new(number)) - end - - def anytostring - push(AnyToString.new) - end - - def branchif(label) - push(BranchIf.new(label)) - end - - def branchnil(label) - push(BranchNil.new(label)) - end - - def branchunless(label) - push(BranchUnless.new(label)) - end - - def checkkeyword(keyword_bits_index, keyword_index) - push(CheckKeyword.new(keyword_bits_index, keyword_index)) - end - - def checkmatch(type) - push(CheckMatch.new(type)) - end - - def checktype(type) - push(CheckType.new(type)) - end - - def concatarray - push(ConcatArray.new) - end - - def concatstrings(number) - push(ConcatStrings.new(number)) - end - - def concattoarray(object) - push(ConcatToArray.new(object)) - end - - def defineclass(name, class_iseq, flags) - push(DefineClass.new(name, class_iseq, flags)) - end - - def defined(type, name, message) - push(Defined.new(type, name, message)) - end - - def definedivar(name, cache, message) - if RUBY_VERSION < "3.3" - push(PutNil.new) - push(Defined.new(Defined::TYPE_IVAR, name, message)) - else - push(DefinedIVar.new(name, cache, message)) - end - end - - def definemethod(name, method_iseq) - push(DefineMethod.new(name, method_iseq)) - end - - def definesmethod(name, method_iseq) - push(DefineSMethod.new(name, method_iseq)) - end - - def dup - push(Dup.new) - end - - def duparray(object) - push(DupArray.new(object)) - end - - def duphash(object) - push(DupHash.new(object)) - end - - def dupn(number) - push(DupN.new(number)) - end - - def expandarray(length, flags) - push(ExpandArray.new(length, flags)) - end - - def getblockparam(index, level) - push(GetBlockParam.new(index, level)) - end - - def getblockparamproxy(index, level) - push(GetBlockParamProxy.new(index, level)) - end - - def getclassvariable(name) - if RUBY_VERSION < "3.0" - push(Legacy::GetClassVariable.new(name)) - else - push(GetClassVariable.new(name, inline_storage_for(name))) - end - end - - def getconstant(name) - push(GetConstant.new(name)) - end - - def getglobal(name) - push(GetGlobal.new(name)) - end - - def getinstancevariable(name) - if RUBY_VERSION < "3.2" - push(GetInstanceVariable.new(name, inline_storage_for(name))) - else - push(GetInstanceVariable.new(name, inline_storage)) - end - end - - def getlocal(index, level) - if options.operands_unification? - # Specialize the getlocal instruction based on the level of the - # local variable. If it's 0 or 1, then there's a specialized - # instruction that will look at the current scope or the parent - # scope, respectively, and requires fewer operands. - case level - when 0 - push(GetLocalWC0.new(index)) - when 1 - push(GetLocalWC1.new(index)) - else - push(GetLocal.new(index, level)) - end - else - push(GetLocal.new(index, level)) - end - end - - def getspecial(key, type) - push(GetSpecial.new(key, type)) - end - - def intern - push(Intern.new) - end - - def invokeblock(calldata) - push(InvokeBlock.new(calldata)) - end - - def invokesuper(calldata, block_iseq) - push(InvokeSuper.new(calldata, block_iseq)) - end - - def jump(label) - push(Jump.new(label)) - end - - def leave - push(Leave.new) - end - - def newarray(number) - push(NewArray.new(number)) - end - - def newarraykwsplat(number) - push(NewArrayKwSplat.new(number)) - end - - def newhash(number) - push(NewHash.new(number)) - end - - def newrange(exclude_end) - push(NewRange.new(exclude_end)) - end - - def nop - push(Nop.new) - end - - def objtostring(calldata) - push(ObjToString.new(calldata)) - end - - def once(iseq, cache) - push(Once.new(iseq, cache)) - end - - def opt_aref_with(object, calldata) - push(OptArefWith.new(object, calldata)) - end - - def opt_aset_with(object, calldata) - push(OptAsetWith.new(object, calldata)) - end - - def opt_case_dispatch(case_dispatch_hash, else_label) - push(OptCaseDispatch.new(case_dispatch_hash, else_label)) - end - - def opt_getconstant_path(names) - if RUBY_VERSION < "3.2" || !options.inline_const_cache? - cache = nil - cache_filled_label = nil - - if options.inline_const_cache? - cache = inline_storage - cache_filled_label = label - opt_getinlinecache(cache_filled_label, cache) - - if names[0] == :"" - names.shift - pop - putobject(Object) - end - elsif names[0] == :"" - names.shift - putobject(Object) - else - putnil - end - - names.each_with_index do |name, index| - putobject(index == 0) - getconstant(name) - end - - if options.inline_const_cache? - opt_setinlinecache(cache) - push(cache_filled_label) - end - else - push(OptGetConstantPath.new(names)) - end - end - - def opt_getinlinecache(label, cache) - push(Legacy::OptGetInlineCache.new(label, cache)) - end - - def opt_setinlinecache(cache) - push(Legacy::OptSetInlineCache.new(cache)) - end - - def pop - push(Pop.new) - end - - def pushtoarraykwsplat - push(PushToArrayKwSplat.new) - end - - def putchilledstring(object) - push(PutChilledString.new(object)) - end - - def putnil - push(PutNil.new) - end - - def putobject(object) - if options.operands_unification? - # Specialize the putobject instruction based on the value of the - # object. If it's 0 or 1, then there's a specialized instruction - # that will push the object onto the stack and requires fewer - # operands. - if object.eql?(0) - push(PutObjectInt2Fix0.new) - elsif object.eql?(1) - push(PutObjectInt2Fix1.new) - else - push(PutObject.new(object)) - end - else - push(PutObject.new(object)) - end - end - - def putself - push(PutSelf.new) - end - - def putspecialobject(object) - push(PutSpecialObject.new(object)) - end - - def putstring(object) - push(PutString.new(object)) - end - - def send(calldata, block_iseq = nil) - push(Send.new(calldata, block_iseq)) - end - - def setblockparam(index, level) - push(SetBlockParam.new(index, level)) - end - - def setclassvariable(name) - if RUBY_VERSION < "3.0" - push(Legacy::SetClassVariable.new(name)) - else - push(SetClassVariable.new(name, inline_storage_for(name))) - end - end - - def setconstant(name) - push(SetConstant.new(name)) - end - - def setglobal(name) - push(SetGlobal.new(name)) - end - - def setinstancevariable(name) - if RUBY_VERSION < "3.2" - push(SetInstanceVariable.new(name, inline_storage_for(name))) - else - push(SetInstanceVariable.new(name, inline_storage)) - end - end - - def setlocal(index, level) - if options.operands_unification? - # Specialize the setlocal instruction based on the level of the - # local variable. If it's 0 or 1, then there's a specialized - # instruction that will write to the current scope or the parent - # scope, respectively, and requires fewer operands. - case level - when 0 - push(SetLocalWC0.new(index)) - when 1 - push(SetLocalWC1.new(index)) - else - push(SetLocal.new(index, level)) - end - else - push(SetLocal.new(index, level)) - end - end - - def setn(number) - push(SetN.new(number)) - end - - def setspecial(key) - push(SetSpecial.new(key)) - end - - def splatarray(flag) - push(SplatArray.new(flag)) - end - - def swap - push(Swap.new) - end - - def throw(type) - push(Throw.new(type)) - end - - def topn(number) - push(TopN.new(number)) - end - - def toregexp(options, length) - push(ToRegExp.new(options, length)) - end - - # This method will create a new instruction sequence from a serialized - # RubyVM::InstructionSequence object. - def self.from(source, options = Compiler::Options.new, parent_iseq = nil) - iseq = - new(source[5], source[6], source[8], source[9], parent_iseq, options) - - # set up the labels object so that the labels are shared between the - # location in the instruction sequence and the instructions that - # reference them - labels = Hash.new { |hash, name| hash[name] = Label.new(name) } - - # set up the correct argument size - iseq.argument_size = source[4][:arg_size] - - # set up all of the locals - source[10].each { |local| iseq.local_table.plain(local) } - - # set up the argument options - iseq.argument_options.merge!(source[11]) - if iseq.argument_options[:opt] - iseq.argument_options[:opt].map! { |opt| labels[opt] } - end - - # track the child block iseqs so that our catch table can point to the - # correctly created iseqs - block_iseqs = [] - - # set up all of the instructions - source[13].each do |insn| - # add line numbers - if insn.is_a?(Integer) - iseq.push(insn) - next - end - - # add events and labels - if insn.is_a?(Symbol) - if insn.start_with?("label_") - iseq.push(labels[insn]) - else - iseq.push(insn) - end - next - end - - # add instructions, mapped to our own instruction classes - type, *opnds = insn - - case type - when :adjuststack - iseq.adjuststack(opnds[0]) - when :anytostring - iseq.anytostring - when :branchif - iseq.branchif(labels[opnds[0]]) - when :branchnil - iseq.branchnil(labels[opnds[0]]) - when :branchunless - iseq.branchunless(labels[opnds[0]]) - when :checkkeyword - iseq.checkkeyword(iseq.local_table.size - opnds[0] + 2, opnds[1]) - when :checkmatch - iseq.checkmatch(opnds[0]) - when :checktype - iseq.checktype(opnds[0]) - when :concatarray - iseq.concatarray - when :concatstrings - iseq.concatstrings(opnds[0]) - when :concattoarray - iseq.concattoarray(opnds[0]) - when :defineclass - iseq.defineclass(opnds[0], from(opnds[1], options, iseq), opnds[2]) - when :defined - iseq.defined(opnds[0], opnds[1], opnds[2]) - when :definedivar - iseq.definedivar(opnds[0], opnds[1], opnds[2]) - when :definemethod - iseq.definemethod(opnds[0], from(opnds[1], options, iseq)) - when :definesmethod - iseq.definesmethod(opnds[0], from(opnds[1], options, iseq)) - when :dup - iseq.dup - when :duparray - iseq.duparray(opnds[0]) - when :duphash - iseq.duphash(opnds[0]) - when :dupn - iseq.dupn(opnds[0]) - when :expandarray - iseq.expandarray(opnds[0], opnds[1]) - when :getblockparam, :getblockparamproxy, :getlocal, :getlocal_WC_0, - :getlocal_WC_1, :setblockparam, :setlocal, :setlocal_WC_0, - :setlocal_WC_1 - current = iseq - level = 0 - - case type - when :getlocal_WC_1, :setlocal_WC_1 - level = 1 - when :getblockparam, :getblockparamproxy, :getlocal, :setblockparam, - :setlocal - level = opnds[1] - end - - level.times { current = current.parent_iseq } - index = current.local_table.size - opnds[0] + 2 - - case type - when :getblockparam - iseq.getblockparam(index, level) - when :getblockparamproxy - iseq.getblockparamproxy(index, level) - when :getlocal, :getlocal_WC_0, :getlocal_WC_1 - iseq.getlocal(index, level) - when :setblockparam - iseq.setblockparam(index, level) - when :setlocal, :setlocal_WC_0, :setlocal_WC_1 - iseq.setlocal(index, level) - end - when :getclassvariable - iseq.push(GetClassVariable.new(opnds[0], opnds[1])) - when :getconstant - iseq.getconstant(opnds[0]) - when :getglobal - iseq.getglobal(opnds[0]) - when :getinstancevariable - iseq.push(GetInstanceVariable.new(opnds[0], opnds[1])) - when :getspecial - iseq.getspecial(opnds[0], opnds[1]) - when :intern - iseq.intern - when :invokeblock - iseq.invokeblock(CallData.from(opnds[0])) - when :invokesuper - block_iseq = opnds[1] ? from(opnds[1], options, iseq) : nil - iseq.invokesuper(CallData.from(opnds[0]), block_iseq) - when :jump - iseq.jump(labels[opnds[0]]) - when :leave - iseq.leave - when :newarray - iseq.newarray(opnds[0]) - when :newarraykwsplat - iseq.newarraykwsplat(opnds[0]) - when :newhash - iseq.newhash(opnds[0]) - when :newrange - iseq.newrange(opnds[0]) - when :nop - iseq.nop - when :objtostring - iseq.objtostring(CallData.from(opnds[0])) - when :once - iseq.once(from(opnds[0], options, iseq), opnds[1]) - when :opt_and, :opt_aref, :opt_aset, :opt_div, :opt_empty_p, :opt_eq, - :opt_ge, :opt_gt, :opt_le, :opt_length, :opt_lt, :opt_ltlt, - :opt_minus, :opt_mod, :opt_mult, :opt_nil_p, :opt_not, :opt_or, - :opt_plus, :opt_regexpmatch2, :opt_send_without_block, :opt_size, - :opt_succ - iseq.send(CallData.from(opnds[0]), nil) - when :opt_aref_with - iseq.opt_aref_with(opnds[0], CallData.from(opnds[1])) - when :opt_aset_with - iseq.opt_aset_with(opnds[0], CallData.from(opnds[1])) - when :opt_case_dispatch - hash = - opnds[0] - .each_slice(2) - .to_h - .transform_values { |value| labels[value] } - iseq.opt_case_dispatch(hash, labels[opnds[1]]) - when :opt_getconstant_path - iseq.opt_getconstant_path(opnds[0]) - when :opt_getinlinecache - iseq.opt_getinlinecache(labels[opnds[0]], opnds[1]) - when :opt_newarray_max - iseq.newarray(opnds[0]) - iseq.send(YARV.calldata(:max)) - when :opt_newarray_min - iseq.newarray(opnds[0]) - iseq.send(YARV.calldata(:min)) - when :opt_newarray_send - mid = opnds[1] - if RUBY_VERSION >= "3.4" - mid = %i[max min hash pack pack_buffer include?][mid - 1] - end - - iseq.newarray(opnds[0]) - iseq.send(CallData.new(mid)) - when :opt_neq - iseq.push( - OptNEq.new(CallData.from(opnds[0]), CallData.from(opnds[1])) - ) - when :opt_setinlinecache - iseq.opt_setinlinecache(opnds[0]) - when :opt_str_freeze - iseq.putstring(opnds[0]) - iseq.send(YARV.calldata(:freeze)) - when :opt_str_uminus - iseq.putstring(opnds[0]) - iseq.send(YARV.calldata(:-@)) - when :pop - iseq.pop - when :pushtoarraykwsplat - iseq.pushtoarraykwsplat - when :putchilledstring - iseq.putchilledstring(opnds[0]) - when :putnil - iseq.putnil - when :putobject - iseq.putobject(opnds[0]) - when :putobject_INT2FIX_0_ - iseq.putobject(0) - when :putobject_INT2FIX_1_ - iseq.putobject(1) - when :putself - iseq.putself - when :putstring - iseq.putstring(opnds[0]) - when :putspecialobject - iseq.putspecialobject(opnds[0]) - when :send - block_iseq = opnds[1] ? from(opnds[1], options, iseq) : nil - block_iseqs << block_iseq if block_iseq - iseq.send(CallData.from(opnds[0]), block_iseq) - when :setclassvariable - iseq.push(SetClassVariable.new(opnds[0], opnds[1])) - when :setconstant - iseq.setconstant(opnds[0]) - when :setglobal - iseq.setglobal(opnds[0]) - when :setinstancevariable - iseq.push(SetInstanceVariable.new(opnds[0], opnds[1])) - when :setn - iseq.setn(opnds[0]) - when :setspecial - iseq.setspecial(opnds[0]) - when :splatarray - iseq.splatarray(opnds[0]) - when :swap - iseq.swap - when :throw - iseq.throw(opnds[0]) - when :topn - iseq.topn(opnds[0]) - when :toregexp - iseq.toregexp(opnds[0], opnds[1]) - else - raise "Unknown instruction type: #{type}" - end - end - - # set up the catch table - source[12].each do |entry| - case entry[0] - when :break - if entry[1] - break_iseq = - block_iseqs.find do |block_iseq| - block_iseq.name == entry[1][5] && - block_iseq.file == entry[1][6] && - block_iseq.line == entry[1][8] - end - - iseq.catch_break( - break_iseq || from(entry[1], options, iseq), - labels[entry[2]], - labels[entry[3]], - labels[entry[4]], - entry[5] - ) - else - iseq.catch_break( - nil, - labels[entry[2]], - labels[entry[3]], - labels[entry[4]], - entry[5] - ) - end - when :ensure - iseq.catch_ensure( - from(entry[1], options, iseq), - labels[entry[2]], - labels[entry[3]], - labels[entry[4]], - entry[5] - ) - when :next - iseq.catch_next( - labels[entry[2]], - labels[entry[3]], - labels[entry[4]], - entry[5] - ) - when :rescue - iseq.catch_rescue( - from(entry[1], options, iseq), - labels[entry[2]], - labels[entry[3]], - labels[entry[4]], - entry[5] - ) - when :redo - iseq.catch_redo( - labels[entry[2]], - labels[entry[3]], - labels[entry[4]], - entry[5] - ) - when :retry - iseq.catch_retry( - labels[entry[2]], - labels[entry[3]], - labels[entry[4]], - entry[5] - ) - else - raise "unknown catch type: #{entry[0]}" - end - end - - iseq.compile! if iseq.type == :top - iseq - end - end - end -end diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb deleted file mode 100644 index 02188dfe..00000000 --- a/lib/syntax_tree/yarv/instructions.rb +++ /dev/null @@ -1,5885 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - module YARV - # This is a base class for all YARV instructions. It provides a few - # convenience methods for working with instructions. - class Instruction - # This method creates an instruction that represents the canonical - # (non-specialized) form of this instruction. If this instruction is not - # a specialized instruction, then this method returns `self`. - def canonical - self - end - - # This returns the size of the instruction in terms of the number of slots - # it occupies in the instruction sequence. Effectively this is 1 plus the - # number of operands. - def length - 1 - end - - # This returns the number of values that are pushed onto the stack. - def pushes - 0 - end - - # This returns the number of values that are popped off the stack. - def pops - 0 - end - - # This returns an array of labels. - def branch_targets - [] - end - - # Whether or not this instruction leaves the current frame. - def leaves? - false - end - - # Whether or not this instruction falls through to the next instruction if - # its branching fails. - def falls_through? - false - end - - # Does the instruction have side effects? Control-flow counts as a - # side-effect, as do some special-case instructions like Leave. By default - # every instruction is marked as having side effects. - def side_effects? - true - end - end - - # ### Summary - # - # `adjuststack` accepts a single integer argument and removes that many - # elements from the top of the stack. - # - # ### Usage - # - # ~~~ruby - # x = [true] - # x[0] ||= nil - # x[0] - # ~~~ - # - class AdjustStack < Instruction - attr_reader :number - - def initialize(number) - @number = number - end - - def disasm(fmt) - fmt.instruction("adjuststack", [fmt.object(number)]) - end - - def to_a(_iseq) - [:adjuststack, number] - end - - def deconstruct_keys(_keys) - { number: number } - end - - def ==(other) - other.is_a?(AdjustStack) && other.number == number - end - - def length - 2 - end - - def pops - number - end - - def call(vm) - vm.pop(number) - end - end - - # ### Summary - # - # `anytostring` ensures that the value on top of the stack is a string. - # - # It pops two values off the stack. If the first value is a string it - # pushes it back on the stack. If the first value is not a string, it uses - # Ruby's built in string coercion to coerce the second value to a string - # and then pushes that back on the stack. - # - # This is used in conjunction with `objtostring` as a fallback for when an - # object's `to_s` method does not return a string. - # - # ### Usage - # - # ~~~ruby - # "#{5}" - # ~~~ - # - class AnyToString < Instruction - def disasm(fmt) - fmt.instruction("anytostring") - end - - def to_a(_iseq) - [:anytostring] - end - - def deconstruct_keys(_keys) - {} - end - - def ==(other) - other.is_a?(AnyToString) - end - - def pops - 2 - end - - def pushes - 1 - end - - def call(vm) - original, value = vm.pop(2) - - if value.is_a?(String) - vm.push(value) - else - vm.push("#<#{original.class.name}:0000>") - end - end - end - - # ### Summary - # - # `branchif` has one argument: the jump index. It pops one value off the - # stack: the jump condition. - # - # If the value popped off the stack is true, `branchif` jumps to - # the jump index and continues executing there. - # - # ### Usage - # - # ~~~ruby - # x = true - # x ||= "foo" - # puts x - # ~~~ - # - class BranchIf < Instruction - attr_reader :label - - def initialize(label) - @label = label - end - - def disasm(fmt) - fmt.instruction("branchif", [fmt.label(label)]) - end - - def to_a(_iseq) - [:branchif, label.name] - end - - def deconstruct_keys(_keys) - { label: label } - end - - def ==(other) - other.is_a?(BranchIf) && other.label == label - end - - def length - 2 - end - - def pops - 1 - end - - def call(vm) - vm.jump(label) if vm.pop - end - - def branch_targets - [label] - end - - def falls_through? - true - end - end - - # ### Summary - # - # `branchnil` has one argument: the jump index. It pops one value off the - # stack: the jump condition. - # - # If the value popped off the stack is nil, `branchnil` jumps to - # the jump index and continues executing there. - # - # ### Usage - # - # ~~~ruby - # x = nil - # if x&.to_s - # puts "hi" - # end - # ~~~ - # - class BranchNil < Instruction - attr_reader :label - - def initialize(label) - @label = label - end - - def disasm(fmt) - fmt.instruction("branchnil", [fmt.label(label)]) - end - - def to_a(_iseq) - [:branchnil, label.name] - end - - def deconstruct_keys(_keys) - { label: label } - end - - def ==(other) - other.is_a?(BranchNil) && other.label == label - end - - def length - 2 - end - - def pops - 1 - end - - def call(vm) - vm.jump(label) if vm.pop.nil? - end - - def branch_targets - [label] - end - - def falls_through? - true - end - end - - # ### Summary - # - # `branchunless` has one argument: the jump index. It pops one value off - # the stack: the jump condition. - # - # If the value popped off the stack is false or nil, `branchunless` jumps - # to the jump index and continues executing there. - # - # ### Usage - # - # ~~~ruby - # if 2 + 3 - # puts "foo" - # end - # ~~~ - # - class BranchUnless < Instruction - attr_reader :label - - def initialize(label) - @label = label - end - - def disasm(fmt) - fmt.instruction("branchunless", [fmt.label(label)]) - end - - def to_a(_iseq) - [:branchunless, label.name] - end - - def deconstruct_keys(_keys) - { label: label } - end - - def ==(other) - other.is_a?(BranchUnless) && other.label == label - end - - def length - 2 - end - - def pops - 1 - end - - def call(vm) - vm.jump(label) unless vm.pop - end - - def branch_targets - [label] - end - - def falls_through? - true - end - end - - # ### Summary - # - # `checkkeyword` checks if a keyword was passed at the callsite that - # called into the method represented by the instruction sequence. It has - # two arguments: the index of the local variable that stores the keywords - # metadata and the index of the keyword within that metadata. It pushes - # a boolean onto the stack indicating whether or not the keyword was - # given. - # - # ### Usage - # - # ~~~ruby - # def evaluate(value: rand) - # value - # end - # - # evaluate(value: 3) - # ~~~ - # - class CheckKeyword < Instruction - attr_reader :keyword_bits_index, :keyword_index - - def initialize(keyword_bits_index, keyword_index) - @keyword_bits_index = keyword_bits_index - @keyword_index = keyword_index - end - - def disasm(fmt) - fmt.instruction( - "checkkeyword", - [fmt.object(keyword_bits_index), fmt.object(keyword_index)] - ) - end - - def to_a(iseq) - [ - :checkkeyword, - iseq.local_table.offset(keyword_bits_index), - keyword_index - ] - end - - def deconstruct_keys(_keys) - { keyword_bits_index: keyword_bits_index, keyword_index: keyword_index } - end - - def ==(other) - other.is_a?(CheckKeyword) && - other.keyword_bits_index == keyword_bits_index && - other.keyword_index == keyword_index - end - - def length - 3 - end - - def pushes - 1 - end - - def call(vm) - vm.push(vm.local_get(keyword_bits_index, 0)[keyword_index]) - end - end - - # ### Summary - # - # `checkmatch` checks if the current pattern matches the current value. It - # pops the target and the pattern off the stack and pushes a boolean onto - # the stack if it matches or not. - # - # ### Usage - # - # ~~~ruby - # foo in Foo - # ~~~ - # - class CheckMatch < Instruction - VM_CHECKMATCH_TYPE_WHEN = 1 - VM_CHECKMATCH_TYPE_CASE = 2 - VM_CHECKMATCH_TYPE_RESCUE = 3 - VM_CHECKMATCH_TYPE_MASK = 0x03 - VM_CHECKMATCH_ARRAY = 0x04 - - attr_reader :type - - def initialize(type) - @type = type - end - - def disasm(fmt) - fmt.instruction("checkmatch", [fmt.object(type)]) - end - - def to_a(_iseq) - [:checkmatch, type] - end - - def deconstruct_keys(_keys) - { type: type } - end - - def ==(other) - other.is_a?(CheckMatch) && other.type == type - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def call(vm) - target, pattern = vm.pop(2) - - vm.push( - if type & VM_CHECKMATCH_ARRAY > 0 - pattern.any? { |item| check?(item, target) } - else - check?(pattern, target) - end - ) - end - - private - - def check?(pattern, target) - case type & VM_CHECKMATCH_TYPE_MASK - when VM_CHECKMATCH_TYPE_WHEN - pattern - when VM_CHECKMATCH_TYPE_CASE - pattern === target - when VM_CHECKMATCH_TYPE_RESCUE - unless pattern.is_a?(Module) - raise TypeError, "class or module required for rescue clause" - end - - pattern === target - end - end - end - - # ### Summary - # - # `checktype` checks if the value on top of the stack is of a certain type. - # The type is the only argument. It pops the value off the stack and pushes - # a boolean onto the stack indicating whether or not the value is of the - # given type. - # - # ### Usage - # - # ~~~ruby - # foo in [bar] - # ~~~ - # - class CheckType < Instruction - TYPE_OBJECT = 0x01 - TYPE_CLASS = 0x02 - TYPE_MODULE = 0x03 - TYPE_FLOAT = 0x04 - TYPE_STRING = 0x05 - TYPE_REGEXP = 0x06 - TYPE_ARRAY = 0x07 - TYPE_HASH = 0x08 - TYPE_STRUCT = 0x09 - TYPE_BIGNUM = 0x0a - TYPE_FILE = 0x0b - TYPE_DATA = 0x0c - TYPE_MATCH = 0x0d - TYPE_COMPLEX = 0x0e - TYPE_RATIONAL = 0x0f - TYPE_NIL = 0x11 - TYPE_TRUE = 0x12 - TYPE_FALSE = 0x13 - TYPE_SYMBOL = 0x14 - TYPE_FIXNUM = 0x15 - TYPE_UNDEF = 0x16 - - attr_reader :type - - def initialize(type) - @type = type - end - - def disasm(fmt) - name = - case type - when TYPE_OBJECT - "T_OBJECT" - when TYPE_CLASS - "T_CLASS" - when TYPE_MODULE - "T_MODULE" - when TYPE_FLOAT - "T_FLOAT" - when TYPE_STRING - "T_STRING" - when TYPE_REGEXP - "T_REGEXP" - when TYPE_ARRAY - "T_ARRAY" - when TYPE_HASH - "T_HASH" - when TYPE_STRUCT - "T_STRUCT" - when TYPE_BIGNUM - "T_BIGNUM" - when TYPE_FILE - "T_FILE" - when TYPE_DATA - "T_DATA" - when TYPE_MATCH - "T_MATCH" - when TYPE_COMPLEX - "T_COMPLEX" - when TYPE_RATIONAL - "T_RATIONAL" - when TYPE_NIL - "T_NIL" - when TYPE_TRUE - "T_TRUE" - when TYPE_FALSE - "T_FALSE" - when TYPE_SYMBOL - "T_SYMBOL" - when TYPE_FIXNUM - "T_FIXNUM" - when TYPE_UNDEF - "T_UNDEF" - end - - fmt.instruction("checktype", [name]) - end - - def to_a(_iseq) - [:checktype, type] - end - - def deconstruct_keys(_keys) - { type: type } - end - - def ==(other) - other.is_a?(CheckType) && other.type == type - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - # TODO: This is incorrect. The instruction only pushes a single value - # onto the stack. However, if this is set to 1, we no longer match the - # output of RubyVM::InstructionSequence. So leaving this here until we - # can investigate further. - 2 - end - - def call(vm) - object = vm.pop - result = - case type - when TYPE_OBJECT - raise NotImplementedError, "checktype TYPE_OBJECT" - when TYPE_CLASS - object.is_a?(Class) - when TYPE_MODULE - object.is_a?(Module) - when TYPE_FLOAT - object.is_a?(Float) - when TYPE_STRING - object.is_a?(String) - when TYPE_REGEXP - object.is_a?(Regexp) - when TYPE_ARRAY - object.is_a?(Array) - when TYPE_HASH - object.is_a?(Hash) - when TYPE_STRUCT - object.is_a?(Struct) - when TYPE_BIGNUM - raise NotImplementedError, "checktype TYPE_BIGNUM" - when TYPE_FILE - object.is_a?(File) - when TYPE_DATA - raise NotImplementedError, "checktype TYPE_DATA" - when TYPE_MATCH - raise NotImplementedError, "checktype TYPE_MATCH" - when TYPE_COMPLEX - object.is_a?(Complex) - when TYPE_RATIONAL - object.is_a?(Rational) - when TYPE_NIL - object.nil? - when TYPE_TRUE - object == true - when TYPE_FALSE - object == false - when TYPE_SYMBOL - object.is_a?(Symbol) - when TYPE_FIXNUM - object.is_a?(Integer) - when TYPE_UNDEF - raise NotImplementedError, "checktype TYPE_UNDEF" - end - - vm.push(result) - end - end - - # ### Summary - # - # `concatarray` concatenates the two Arrays on top of the stack. - # - # It coerces the two objects at the top of the stack into Arrays by - # calling `to_a` if necessary, and makes sure to `dup` the first Array if - # it was already an Array, to avoid mutating it when concatenating. - # - # ### Usage - # - # ~~~ruby - # [1, *2] - # ~~~ - # - class ConcatArray < Instruction - def disasm(fmt) - fmt.instruction("concatarray") - end - - def to_a(_iseq) - [:concatarray] - end - - def deconstruct_keys(_keys) - {} - end - - def ==(other) - other.is_a?(ConcatArray) - end - - def pops - 2 - end - - def pushes - 1 - end - - def call(vm) - left, right = vm.pop(2) - vm.push([*left, *right]) - end - end - - # ### Summary - # - # `concatstrings` pops a number of strings from the stack joins them - # together into a single string and pushes that string back on the stack. - # - # This does no coercion and so is always used in conjunction with - # `objtostring` and `anytostring` to ensure the stack contents are always - # strings. - # - # ### Usage - # - # ~~~ruby - # "#{5}" - # ~~~ - # - class ConcatStrings < Instruction - attr_reader :number - - def initialize(number) - @number = number - end - - def disasm(fmt) - fmt.instruction("concatstrings", [fmt.object(number)]) - end - - def to_a(_iseq) - [:concatstrings, number] - end - - def deconstruct_keys(_keys) - { number: number } - end - - def ==(other) - other.is_a?(ConcatStrings) && other.number == number - end - - def length - 2 - end - - def pops - number - end - - def pushes - 1 - end - - def call(vm) - vm.push(vm.pop(number).join) - end - end - - # ### Summary - # - # `concattoarray` pops a single value off the stack and attempts to concat - # it to the Array on top of the stack. If the value is not an Array, it - # will be coerced into one. - # - # ### Usage - # - # ~~~ruby - # [1, *2] - # ~~~ - # - class ConcatToArray < Instruction - attr_reader :object - - def initialize(object) - @object = object - end - - def disasm(fmt) - fmt.instruction("concattoarray", [fmt.object(object)]) - end - - def to_a(_iseq) - [:concattoarray, object] - end - - def deconstruct_keys(_keys) - { object: object } - end - - def ==(other) - other.is_a?(ConcatToArray) && other.object == object - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - 1 - end - - def call(vm) - array, value = vm.pop(2) - vm.push(array.concat(Array(value))) - end - end - - # ### Summary - # - # `defineclass` defines a class. First it pops the superclass off the - # stack, then it pops the object off the stack that the class should be - # defined under. It has three arguments: the name of the constant, the - # instruction sequence associated with the class, and various flags that - # indicate if it is a singleton class, a module, or a regular class. - # - # ### Usage - # - # ~~~ruby - # class Foo - # end - # ~~~ - # - class DefineClass < Instruction - TYPE_CLASS = 0 - TYPE_SINGLETON_CLASS = 1 - TYPE_MODULE = 2 - FLAG_SCOPED = 8 - FLAG_HAS_SUPERCLASS = 16 - - attr_reader :name, :class_iseq, :flags - - def initialize(name, class_iseq, flags) - @name = name - @class_iseq = class_iseq - @flags = flags - end - - def disasm(fmt) - fmt.enqueue(class_iseq) - fmt.instruction( - "defineclass", - [fmt.object(name), class_iseq.name, fmt.object(flags)] - ) - end - - def to_a(_iseq) - [:defineclass, name, class_iseq.to_a, flags] - end - - def deconstruct_keys(_keys) - { name: name, class_iseq: class_iseq, flags: flags } - end - - def ==(other) - other.is_a?(DefineClass) && other.name == name && - other.class_iseq == class_iseq && other.flags == flags - end - - def length - 4 - end - - def pops - 2 - end - - def pushes - 1 - end - - def call(vm) - object, superclass = vm.pop(2) - - if name == :singletonclass - vm.push(vm.run_class_frame(class_iseq, object.singleton_class)) - elsif object.const_defined?(name) - vm.push(vm.run_class_frame(class_iseq, object.const_get(name))) - elsif flags & TYPE_MODULE > 0 - clazz = Module.new - object.const_set(name, clazz) - vm.push(vm.run_class_frame(class_iseq, clazz)) - else - clazz = - if flags & FLAG_HAS_SUPERCLASS > 0 - Class.new(superclass) - else - Class.new - end - - object.const_set(name, clazz) - vm.push(vm.run_class_frame(class_iseq, clazz)) - end - end - end - - # ### Summary - # - # `defined` checks if the top value of the stack is defined. If it is, it - # pushes its value onto the stack. Otherwise it pushes `nil`. - # - # ### Usage - # - # ~~~ruby - # defined?(x) - # ~~~ - # - class Defined < Instruction - TYPE_NIL = 1 - TYPE_IVAR = 2 - TYPE_LVAR = 3 - TYPE_GVAR = 4 - TYPE_CVAR = 5 - TYPE_CONST = 6 - TYPE_METHOD = 7 - TYPE_YIELD = 8 - TYPE_ZSUPER = 9 - TYPE_SELF = 10 - TYPE_TRUE = 11 - TYPE_FALSE = 12 - TYPE_ASGN = 13 - TYPE_EXPR = 14 - TYPE_REF = 15 - TYPE_FUNC = 16 - TYPE_CONST_FROM = 17 - - attr_reader :type, :name, :message - - def initialize(type, name, message) - @type = type - @name = name - @message = message - end - - def disasm(fmt) - type_name = - case type - when TYPE_NIL - "nil" - when TYPE_IVAR - "ivar" - when TYPE_LVAR - "lvar" - when TYPE_GVAR - "gvar" - when TYPE_CVAR - "cvar" - when TYPE_CONST - "const" - when TYPE_METHOD - "method" - when TYPE_YIELD - "yield" - when TYPE_ZSUPER - "zsuper" - when TYPE_SELF - "self" - when TYPE_TRUE - "true" - when TYPE_FALSE - "false" - when TYPE_ASGN - "asgn" - when TYPE_EXPR - "expr" - when TYPE_REF - "ref" - when TYPE_FUNC - "func" - when TYPE_CONST_FROM - "constant-from" - end - - fmt.instruction( - "defined", - [type_name, fmt.object(name), fmt.object(message)] - ) - end - - def to_a(_iseq) - [:defined, type, name, message] - end - - def deconstruct_keys(_keys) - { type: type, name: name, message: message } - end - - def ==(other) - other.is_a?(Defined) && other.type == type && other.name == name && - other.message == message - end - - def length - 4 - end - - def pops - 1 - end - - def pushes - 1 - end - - def call(vm) - object = vm.pop - - result = - case type - when TYPE_NIL, TYPE_SELF, TYPE_TRUE, TYPE_FALSE, TYPE_ASGN, TYPE_EXPR - message - when TYPE_IVAR - message if vm.frame._self.instance_variable_defined?(name) - when TYPE_LVAR - raise NotImplementedError, "defined TYPE_LVAR" - when TYPE_GVAR - message if global_variables.include?(name) - when TYPE_CVAR - clazz = vm.frame._self - clazz = clazz.singleton_class unless clazz.is_a?(Module) - message if clazz.class_variable_defined?(name) - when TYPE_CONST - clazz = vm.frame._self - clazz = clazz.singleton_class unless clazz.is_a?(Module) - message if clazz.const_defined?(name) - when TYPE_METHOD - raise NotImplementedError, "defined TYPE_METHOD" - when TYPE_YIELD - raise NotImplementedError, "defined TYPE_YIELD" - when TYPE_ZSUPER - raise NotImplementedError, "defined TYPE_ZSUPER" - when TYPE_REF - raise NotImplementedError, "defined TYPE_REF" - when TYPE_FUNC - message if object.respond_to?(name, true) - when TYPE_CONST_FROM - defined = - vm.frame.nesting.any? { |scope| scope.const_defined?(name, true) } - message if defined - end - - vm.push(result) - end - end - - # ### Summary - # - # `definedivar` checks if an instance variable is defined. It is a - # specialization of the `defined` instruction. It accepts three arguments: - # the name of the instance variable, an inline cache, and the string that - # should be pushed onto the stack in the event that the instance variable - # is defined. - # - # ### Usage - # - # ~~~ruby - # defined?(@value) - # ~~~ - # - class DefinedIVar < Instruction - attr_reader :name, :cache, :message - - def initialize(name, cache, message) - @name = name - @cache = cache - @message = message - end - - def disasm(fmt) - fmt.instruction( - "definedivar", - [fmt.object(name), fmt.inline_storage(cache), fmt.object(message)] - ) - end - - def to_a(_iseq) - [:definedivar, name, cache, message] - end - - def deconstruct_keys(_keys) - { name: name, cache: cache, message: message } - end - - def ==(other) - other.is_a?(DefinedIVar) && other.name == name && - other.cache == cache && other.message == message - end - - def length - 4 - end - - def pushes - 1 - end - - def call(vm) - result = (message if vm.frame._self.instance_variable_defined?(name)) - - vm.push(result) - end - end - - # ### Summary - # - # `definemethod` defines a method on the class of the current value of - # `self`. It accepts two arguments. The first is the name of the method - # being defined. The second is the instruction sequence representing the - # body of the method. - # - # ### Usage - # - # ~~~ruby - # def value = "value" - # ~~~ - # - class DefineMethod < Instruction - attr_reader :method_name, :method_iseq - - def initialize(method_name, method_iseq) - @method_name = method_name - @method_iseq = method_iseq - end - - def disasm(fmt) - fmt.enqueue(method_iseq) - fmt.instruction( - "definemethod", - [fmt.object(method_name), method_iseq.name] - ) - end - - def to_a(_iseq) - [:definemethod, method_name, method_iseq.to_a] - end - - def deconstruct_keys(_keys) - { method_name: method_name, method_iseq: method_iseq } - end - - def ==(other) - other.is_a?(DefineMethod) && other.method_name == method_name && - other.method_iseq == method_iseq - end - - def length - 3 - end - - def call(vm) - name = method_name - nesting = vm.frame.nesting - iseq = method_iseq - - vm - .frame - ._self - .__send__(:define_method, name) do |*args, **kwargs, &block| - vm.run_method_frame( - name, - nesting, - iseq, - self, - *args, - **kwargs, - &block - ) - end - end - end - - # ### Summary - # - # `definesmethod` defines a method on the singleton class of the current - # value of `self`. It accepts two arguments. The first is the name of the - # method being defined. The second is the instruction sequence representing - # the body of the method. It pops the object off the stack that the method - # should be defined on. - # - # ### Usage - # - # ~~~ruby - # def self.value = "value" - # ~~~ - # - class DefineSMethod < Instruction - attr_reader :method_name, :method_iseq - - def initialize(method_name, method_iseq) - @method_name = method_name - @method_iseq = method_iseq - end - - def disasm(fmt) - fmt.enqueue(method_iseq) - fmt.instruction( - "definesmethod", - [fmt.object(method_name), method_iseq.name] - ) - end - - def to_a(_iseq) - [:definesmethod, method_name, method_iseq.to_a] - end - - def deconstruct_keys(_keys) - { method_name: method_name, method_iseq: method_iseq } - end - - def ==(other) - other.is_a?(DefineSMethod) && other.method_name == method_name && - other.method_iseq == method_iseq - end - - def length - 3 - end - - def pops - 1 - end - - def call(vm) - name = method_name - nesting = vm.frame.nesting - iseq = method_iseq - - vm - .frame - ._self - .__send__(:define_singleton_method, name) do |*args, **kwargs, &block| - vm.run_method_frame( - name, - nesting, - iseq, - self, - *args, - **kwargs, - &block - ) - end - end - end - - # ### Summary - # - # `dup` copies the top value of the stack and pushes it onto the stack. - # - # ### Usage - # - # ~~~ruby - # $global = 5 - # ~~~ - # - class Dup < Instruction - def disasm(fmt) - fmt.instruction("dup") - end - - def to_a(_iseq) - [:dup] - end - - def deconstruct_keys(_keys) - {} - end - - def ==(other) - other.is_a?(Dup) - end - - def pops - 1 - end - - def pushes - 2 - end - - def call(vm) - vm.push(vm.stack.last.dup) - end - - def side_effects? - false - end - end - - # ### Summary - # - # `duparray` dups an Array literal and pushes it onto the stack. - # - # ### Usage - # - # ~~~ruby - # [true] - # ~~~ - # - class DupArray < Instruction - attr_reader :object - - def initialize(object) - @object = object - end - - def disasm(fmt) - fmt.instruction("duparray", [fmt.object(object)]) - end - - def to_a(_iseq) - [:duparray, object] - end - - def deconstruct_keys(_keys) - { object: object } - end - - def ==(other) - other.is_a?(DupArray) && other.object == object - end - - def length - 2 - end - - def pushes - 1 - end - - def call(vm) - vm.push(object.dup) - end - end - - # ### Summary - # - # `duphash` dups a Hash literal and pushes it onto the stack. - # - # ### Usage - # - # ~~~ruby - # { a: 1 } - # ~~~ - # - class DupHash < Instruction - attr_reader :object - - def initialize(object) - @object = object - end - - def disasm(fmt) - fmt.instruction("duphash", [fmt.object(object)]) - end - - def to_a(_iseq) - [:duphash, object] - end - - def deconstruct_keys(_keys) - { object: object } - end - - def ==(other) - other.is_a?(DupHash) && other.object == object - end - - def length - 2 - end - - def pushes - 1 - end - - def call(vm) - vm.push(object.dup) - end - end - - # ### Summary - # - # `dupn` duplicates the top `n` stack elements. - # - # ### Usage - # - # ~~~ruby - # Object::X ||= true - # ~~~ - # - class DupN < Instruction - attr_reader :number - - def initialize(number) - @number = number - end - - def disasm(fmt) - fmt.instruction("dupn", [fmt.object(number)]) - end - - def to_a(_iseq) - [:dupn, number] - end - - def deconstruct_keys(_keys) - { number: number } - end - - def ==(other) - other.is_a?(DupN) && other.number == number - end - - def length - 2 - end - - def pushes - number - end - - def call(vm) - values = vm.pop(number) - vm.push(*values) - vm.push(*values) - end - end - - # ### Summary - # - # `expandarray` looks at the top of the stack, and if the value is an array - # it replaces it on the stack with `number` elements of the array, or `nil` - # if the elements are missing. - # - # ### Usage - # - # ~~~ruby - # x, = [true, false, nil] - # ~~~ - # - class ExpandArray < Instruction - attr_reader :number, :flags - - def initialize(number, flags) - @number = number - @flags = flags - end - - def disasm(fmt) - fmt.instruction("expandarray", [fmt.object(number), fmt.object(flags)]) - end - - def to_a(_iseq) - [:expandarray, number, flags] - end - - def deconstruct_keys(_keys) - { number: number, flags: flags } - end - - def ==(other) - other.is_a?(ExpandArray) && other.number == number && - other.flags == flags - end - - def length - 3 - end - - def pops - 1 - end - - def pushes - number - end - - def call(vm) - object = vm.pop - object = - if Array === object - object.dup - elsif object.respond_to?(:to_ary, true) - object.to_ary - else - [object] - end - - splat_flag = flags & 0x01 > 0 - postarg_flag = flags & 0x02 > 0 - - if number == 0 && splat_flag == 0 - # no space left on stack - elsif postarg_flag - values = [] - - if number > object.size - (number - object.size).times { values.push(nil) } - end - [number, object.size].min.times { values.push(object.pop) } - values.push(object.to_a) if splat_flag - - values.each { |item| vm.push(item) } - else - values = [] - - [number, object.size].min.times { values.push(object.shift) } - if number > values.size - (number - values.size).times { values.push(nil) } - end - values.push(object.to_a) if splat_flag - - values.reverse_each { |item| vm.push(item) } - end - end - end - - # ### Summary - # - # `getblockparam` is a similar instruction to `getlocal` in that it looks - # for a local variable in the current instruction sequence's local table and - # walks recursively up the parent instruction sequences until it finds it. - # The local it retrieves, however, is a special block local that was passed - # to the current method. It pushes the value of the block local onto the - # stack. - # - # ### Usage - # - # ~~~ruby - # def foo(&block) - # block - # end - # ~~~ - # - class GetBlockParam < Instruction - attr_reader :index, :level - - def initialize(index, level) - @index = index - @level = level - end - - def disasm(fmt) - fmt.instruction("getblockparam", [fmt.local(index, explicit: level)]) - end - - def to_a(iseq) - current = iseq - level.times { current = iseq.parent_iseq } - [:getblockparam, current.local_table.offset(index), level] - end - - def deconstruct_keys(_keys) - { index: index, level: level } - end - - def ==(other) - other.is_a?(GetBlockParam) && other.index == index && - other.level == level - end - - def length - 3 - end - - def pushes - 1 - end - - def call(vm) - vm.push(vm.local_get(index, level)) - end - end - - # ### Summary - # - # `getblockparamproxy` is almost the same as `getblockparam` except that it - # pushes a proxy object onto the stack instead of the actual value of the - # block local. This is used when a method is being called on the block - # local. - # - # ### Usage - # - # ~~~ruby - # def foo(&block) - # block.call - # end - # ~~~ - # - class GetBlockParamProxy < Instruction - attr_reader :index, :level - - def initialize(index, level) - @index = index - @level = level - end - - def disasm(fmt) - fmt.instruction( - "getblockparamproxy", - [fmt.local(index, explicit: level)] - ) - end - - def to_a(iseq) - current = iseq - level.times { current = iseq.parent_iseq } - [:getblockparamproxy, current.local_table.offset(index), level] - end - - def deconstruct_keys(_keys) - { index: index, level: level } - end - - def ==(other) - other.is_a?(GetBlockParamProxy) && other.index == index && - other.level == level - end - - def length - 3 - end - - def pushes - 1 - end - - def call(vm) - vm.push(vm.local_get(index, level)) - end - end - - # ### Summary - # - # `getclassvariable` looks for a class variable in the current class and - # pushes its value onto the stack. It uses an inline cache to reduce the - # need to lookup the class variable in the class hierarchy every time. - # - # ### Usage - # - # ~~~ruby - # @@class_variable - # ~~~ - # - class GetClassVariable < Instruction - attr_reader :name, :cache - - def initialize(name, cache) - @name = name - @cache = cache - end - - def disasm(fmt) - fmt.instruction( - "getclassvariable", - [fmt.object(name), fmt.inline_storage(cache)] - ) - end - - def to_a(_iseq) - [:getclassvariable, name, cache] - end - - def deconstruct_keys(_keys) - { name: name, cache: cache } - end - - def ==(other) - other.is_a?(GetClassVariable) && other.name == name && - other.cache == cache - end - - def length - 3 - end - - def pushes - 1 - end - - def call(vm) - clazz = vm.frame._self - clazz = clazz.class unless clazz.is_a?(Class) - vm.push(clazz.class_variable_get(name)) - end - end - - # ### Summary - # - # `getconstant` performs a constant lookup and pushes the value of the - # constant onto the stack. It pops both the class it should look in and - # whether or not it should look globally as well. - # - # ### Usage - # - # ~~~ruby - # Constant - # ~~~ - # - class GetConstant < Instruction - attr_reader :name - - def initialize(name) - @name = name - end - - def disasm(fmt) - fmt.instruction("getconstant", [fmt.object(name)]) - end - - def to_a(_iseq) - [:getconstant, name] - end - - def deconstruct_keys(_keys) - { name: name } - end - - def ==(other) - other.is_a?(GetConstant) && other.name == name - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def call(vm) - const_base, allow_nil = vm.pop(2) - - if const_base - if const_base.const_defined?(name) - vm.push(const_base.const_get(name)) - return - end - elsif const_base.nil? && allow_nil - vm.frame.nesting.reverse_each do |clazz| - if clazz.const_defined?(name) - vm.push(clazz.const_get(name)) - return - end - end - end - - raise NameError, "uninitialized constant #{name}" - end - end - - # ### Summary - # - # `getglobal` pushes the value of a global variables onto the stack. - # - # ### Usage - # - # ~~~ruby - # $$ - # ~~~ - # - class GetGlobal < Instruction - attr_reader :name - - def initialize(name) - @name = name - end - - def disasm(fmt) - fmt.instruction("getglobal", [fmt.object(name)]) - end - - def to_a(_iseq) - [:getglobal, name] - end - - def deconstruct_keys(_keys) - { name: name } - end - - def ==(other) - other.is_a?(GetGlobal) && other.name == name - end - - def length - 2 - end - - def pushes - 1 - end - - def call(vm) - # Evaluating the name of the global variable because there isn't a - # reflection API for global variables. - vm.push(eval(name.to_s, binding, __FILE__, __LINE__)) - end - end - - # ### Summary - # - # `getinstancevariable` pushes the value of an instance variable onto the - # stack. It uses an inline cache to avoid having to look up the instance - # variable in the class hierarchy every time. - # - # This instruction has two forms, but both have the same structure. Before - # Ruby 3.2, the inline cache corresponded to both the get and set - # instructions and could be shared. Since Ruby 3.2, it uses object shapes - # instead so the caches are unique per instruction. - # - # ### Usage - # - # ~~~ruby - # @instance_variable - # ~~~ - # - class GetInstanceVariable < Instruction - attr_reader :name, :cache - - def initialize(name, cache) - @name = name - @cache = cache - end - - def disasm(fmt) - fmt.instruction( - "getinstancevariable", - [fmt.object(name), fmt.inline_storage(cache)] - ) - end - - def to_a(_iseq) - [:getinstancevariable, name, cache] - end - - def deconstruct_keys(_keys) - { name: name, cache: cache } - end - - def ==(other) - other.is_a?(GetInstanceVariable) && other.name == name && - other.cache == cache - end - - def length - 3 - end - - def pushes - 1 - end - - def call(vm) - method = Object.instance_method(:instance_variable_get) - vm.push(method.bind(vm.frame._self).call(name)) - end - end - - # ### Summary - # - # `getlocal` fetches the value of a local variable from a frame determined - # by the level and index arguments. The level is the number of frames back - # to look and the index is the index in the local table. It pushes the value - # it finds onto the stack. - # - # ### Usage - # - # ~~~ruby - # value = 5 - # tap { tap { value } } - # ~~~ - # - class GetLocal < Instruction - attr_reader :index, :level - - def initialize(index, level) - @index = index - @level = level - end - - def disasm(fmt) - fmt.instruction("getlocal", [fmt.local(index, explicit: level)]) - end - - def to_a(iseq) - current = iseq - level.times { current = current.parent_iseq } - [:getlocal, current.local_table.offset(index), level] - end - - def deconstruct_keys(_keys) - { index: index, level: level } - end - - def ==(other) - other.is_a?(GetLocal) && other.index == index && other.level == level - end - - def length - 3 - end - - def pushes - 1 - end - - def call(vm) - vm.push(vm.local_get(index, level)) - end - end - - # ### Summary - # - # `getlocal_WC_0` is a specialized version of the `getlocal` instruction. It - # fetches the value of a local variable from the current frame determined by - # the index given as its only argument. - # - # ### Usage - # - # ~~~ruby - # value = 5 - # value - # ~~~ - # - class GetLocalWC0 < Instruction - attr_reader :index - - def initialize(index) - @index = index - end - - def disasm(fmt) - fmt.instruction("getlocal_WC_0", [fmt.local(index, implicit: 0)]) - end - - def to_a(iseq) - [:getlocal_WC_0, iseq.local_table.offset(index)] - end - - def deconstruct_keys(_keys) - { index: index } - end - - def ==(other) - other.is_a?(GetLocalWC0) && other.index == index - end - - def length - 2 - end - - def pushes - 1 - end - - def canonical - GetLocal.new(index, 0) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `getlocal_WC_1` is a specialized version of the `getlocal` instruction. It - # fetches the value of a local variable from the parent frame determined by - # the index given as its only argument. - # - # ### Usage - # - # ~~~ruby - # value = 5 - # self.then { value } - # ~~~ - # - class GetLocalWC1 < Instruction - attr_reader :index - - def initialize(index) - @index = index - end - - def disasm(fmt) - fmt.instruction("getlocal_WC_1", [fmt.local(index, implicit: 1)]) - end - - def to_a(iseq) - [:getlocal_WC_1, iseq.parent_iseq.local_table.offset(index)] - end - - def deconstruct_keys(_keys) - { index: index } - end - - def ==(other) - other.is_a?(GetLocalWC1) && other.index == index - end - - def length - 2 - end - - def pushes - 1 - end - - def canonical - GetLocal.new(index, 1) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `getspecial` pushes the value of a special local variable onto the stack. - # - # ### Usage - # - # ~~~ruby - # 1 if (a == 1) .. (b == 2) - # ~~~ - # - class GetSpecial < Instruction - SVAR_LASTLINE = 0 # $_ - SVAR_BACKREF = 1 # $~ - SVAR_FLIPFLOP_START = 2 # flipflop - - attr_reader :key, :type - - def initialize(key, type) - @key = key - @type = type - end - - def disasm(fmt) - fmt.instruction("getspecial", [fmt.object(key), fmt.object(type)]) - end - - def to_a(_iseq) - [:getspecial, key, type] - end - - def deconstruct_keys(_keys) - { key: key, type: type } - end - - def ==(other) - other.is_a?(GetSpecial) && other.key == key && other.type == type - end - - def length - 3 - end - - def pushes - 1 - end - - def call(vm) - case key - when SVAR_LASTLINE - raise NotImplementedError, "getspecial SVAR_LASTLINE" - when SVAR_BACKREF - raise NotImplementedError, "getspecial SVAR_BACKREF" - when SVAR_FLIPFLOP_START - vm.frame_svar.svars[SVAR_FLIPFLOP_START] - end - end - end - - # ### Summary - # - # `intern` converts the top element of the stack to a symbol and pushes the - # symbol onto the stack. - # - # ### Usage - # - # ~~~ruby - # :"#{"foo"}" - # ~~~ - # - class Intern < Instruction - def disasm(fmt) - fmt.instruction("intern") - end - - def to_a(_iseq) - [:intern] - end - - def deconstruct_keys(_keys) - {} - end - - def ==(other) - other.is_a?(Intern) - end - - def pops - 1 - end - - def pushes - 1 - end - - def call(vm) - vm.push(vm.pop.to_sym) - end - end - - # ### Summary - # - # `invokeblock` invokes the block given to the current method. It pops the - # arguments for the block off the stack and pushes the result of running the - # block onto the stack. - # - # ### Usage - # - # ~~~ruby - # def foo - # yield - # end - # ~~~ - # - class InvokeBlock < Instruction - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("invokeblock", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:invokeblock, calldata.to_h] - end - - def deconstruct_keys(_keys) - { calldata: calldata } - end - - def ==(other) - other.is_a?(InvokeBlock) && other.calldata == calldata - end - - def length - 2 - end - - def pops - calldata.argc - end - - def pushes - 1 - end - - def call(vm) - vm.push(vm.frame_yield.block.call(*vm.pop(calldata.argc))) - end - end - - # ### Summary - # - # `invokesuper` is similar to the `send` instruction, except that it calls - # the super method. It pops the receiver and arguments off the stack and - # pushes the return value onto the stack. - # - # ### Usage - # - # ~~~ruby - # def foo - # super - # end - # ~~~ - # - class InvokeSuper < Instruction - attr_reader :calldata, :block_iseq - - def initialize(calldata, block_iseq) - @calldata = calldata - @block_iseq = block_iseq - end - - def disasm(fmt) - fmt.enqueue(block_iseq) if block_iseq - fmt.instruction( - "invokesuper", - [fmt.calldata(calldata), block_iseq&.name || "nil"] - ) - end - - def to_a(_iseq) - [:invokesuper, calldata.to_h, block_iseq&.to_a] - end - - def deconstruct_keys(_keys) - { calldata: calldata, block_iseq: block_iseq } - end - - def ==(other) - other.is_a?(InvokeSuper) && other.calldata == calldata && - other.block_iseq == block_iseq - end - - def pops - argb = (calldata.flag?(CallData::CALL_ARGS_BLOCKARG) ? 1 : 0) - argb + calldata.argc + 1 - end - - def pushes - 1 - end - - def call(vm) - block = - if (iseq = block_iseq) - frame = vm.frame - ->(*args, **kwargs, &blk) do - vm.run_block_frame(iseq, frame, *args, **kwargs, &blk) - end - end - - keywords = - if calldata.kw_arg - calldata.kw_arg.zip(vm.pop(calldata.kw_arg.length)).to_h - else - {} - end - - arguments = vm.pop(calldata.argc) - receiver = vm.pop - - method = receiver.method(vm.frame.name).super_method - vm.push(method.call(*arguments, **keywords, &block)) - end - end - - # ### Summary - # - # `jump` unconditionally jumps to the label given as its only argument. - # - # ### Usage - # - # ~~~ruby - # x = 0 - # if x == 0 - # puts "0" - # else - # puts "2" - # end - # ~~~ - # - class Jump < Instruction - attr_reader :label - - def initialize(label) - @label = label - end - - def disasm(fmt) - fmt.instruction("jump", [fmt.label(label)]) - end - - def to_a(_iseq) - [:jump, label.name] - end - - def deconstruct_keys(_keys) - { label: label } - end - - def ==(other) - other.is_a?(Jump) && other.label == label - end - - def length - 2 - end - - def call(vm) - vm.jump(label) - end - - def branch_targets - [label] - end - end - - # ### Summary - # - # `leave` exits the current frame. - # - # ### Usage - # - # ~~~ruby - # ;; - # ~~~ - # - class Leave < Instruction - def disasm(fmt) - fmt.instruction("leave") - end - - def to_a(_iseq) - [:leave] - end - - def deconstruct_keys(_keys) - {} - end - - def ==(other) - other.is_a?(Leave) - end - - def pops - 1 - end - - def pushes - # TODO: This is wrong. It should be 1. But it's 0 for now because - # otherwise the stack size is incorrectly calculated. - 0 - end - - def call(vm) - vm.leave - end - - def leaves? - true - end - end - - # ### Summary - # - # `newarray` puts a new array initialized with `number` values from the - # stack. It pops `number` values off the stack and pushes the array onto the - # stack. - # - # ### Usage - # - # ~~~ruby - # ["string"] - # ~~~ - # - class NewArray < Instruction - attr_reader :number - - def initialize(number) - @number = number - end - - def disasm(fmt) - fmt.instruction("newarray", [fmt.object(number)]) - end - - def to_a(_iseq) - [:newarray, number] - end - - def deconstruct_keys(_keys) - { number: number } - end - - def ==(other) - other.is_a?(NewArray) && other.number == number - end - - def length - 2 - end - - def pops - number - end - - def pushes - 1 - end - - def call(vm) - vm.push(vm.pop(number)) - end - end - - # ### Summary - # - # `newarraykwsplat` is a specialized version of `newarray` that takes a ** - # splat argument. It pops `number` values off the stack and pushes the array - # onto the stack. - # - # ### Usage - # - # ~~~ruby - # ["string", **{ foo: "bar" }] - # ~~~ - # - class NewArrayKwSplat < Instruction - attr_reader :number - - def initialize(number) - @number = number - end - - def disasm(fmt) - fmt.instruction("newarraykwsplat", [fmt.object(number)]) - end - - def to_a(_iseq) - [:newarraykwsplat, number] - end - - def deconstruct_keys(_keys) - { number: number } - end - - def ==(other) - other.is_a?(NewArrayKwSplat) && other.number == number - end - - def length - 2 - end - - def pops - number - end - - def pushes - 1 - end - - def call(vm) - vm.push(vm.pop(number)) - end - end - - # ### Summary - # - # `newhash` puts a new hash onto the stack, using `number` elements from the - # stack. `number` needs to be even. It pops `number` elements off the stack - # and pushes a hash onto the stack. - # - # ### Usage - # - # ~~~ruby - # def foo(key, value) - # { key => value } - # end - # ~~~ - # - class NewHash < Instruction - attr_reader :number - - def initialize(number) - @number = number - end - - def disasm(fmt) - fmt.instruction("newhash", [fmt.object(number)]) - end - - def to_a(_iseq) - [:newhash, number] - end - - def deconstruct_keys(_keys) - { number: number } - end - - def ==(other) - other.is_a?(NewHash) && other.number == number - end - - def length - 2 - end - - def pops - number - end - - def pushes - 1 - end - - def call(vm) - vm.push(vm.pop(number).each_slice(2).to_h) - end - end - - # ### Summary - # - # `newrange` creates a new range object from the top two values on the - # stack. It pops both of them off, and then pushes on the new range. It - # takes one argument which is 0 if the end is included or 1 if the end value - # is excluded. - # - # ### Usage - # - # ~~~ruby - # x = 0 - # y = 1 - # p (x..y), (x...y) - # ~~~ - # - class NewRange < Instruction - attr_reader :exclude_end - - def initialize(exclude_end) - @exclude_end = exclude_end - end - - def disasm(fmt) - fmt.instruction("newrange", [fmt.object(exclude_end)]) - end - - def to_a(_iseq) - [:newrange, exclude_end] - end - - def deconstruct_keys(_keys) - { exclude_end: exclude_end } - end - - def ==(other) - other.is_a?(NewRange) && other.exclude_end == exclude_end - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def call(vm) - vm.push(Range.new(*vm.pop(2), exclude_end == 1)) - end - end - - # ### Summary - # - # `nop` is a no-operation instruction. It is used to pad the instruction - # sequence so there is a place for other instructions to jump to. - # - # ### Usage - # - # ~~~ruby - # raise rescue true - # ~~~ - # - class Nop < Instruction - def disasm(fmt) - fmt.instruction("nop") - end - - def to_a(_iseq) - [:nop] - end - - def deconstruct_keys(_keys) - {} - end - - def ==(other) - other.is_a?(Nop) - end - - def call(vm) - end - - def side_effects? - false - end - end - - # ### Summary - # - # `objtostring` pops a value from the stack, calls `to_s` on that value and - # then pushes the result back to the stack. - # - # It has various fast paths for classes like String, Symbol, Module, Class, - # etc. For everything else it calls `to_s`. - # - # ### Usage - # - # ~~~ruby - # "#{5}" - # ~~~ - # - class ObjToString < Instruction - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("objtostring", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:objtostring, calldata.to_h] - end - - def deconstruct_keys(_keys) - { calldata: calldata } - end - - def ==(other) - other.is_a?(ObjToString) && other.calldata == calldata - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - 1 - end - - def call(vm) - vm.push(vm.pop.to_s) - end - end - - # ### Summary - # - # `once` is an instruction that wraps an instruction sequence and ensures - # that is it only ever executed once for the lifetime of the program. It - # uses a cache to ensure that it is only executed once. It pushes the result - # of running the instruction sequence onto the stack. - # - # ### Usage - # - # ~~~ruby - # END { puts "END" } - # ~~~ - # - class Once < Instruction - attr_reader :iseq, :cache - - def initialize(iseq, cache) - @iseq = iseq - @cache = cache - end - - def disasm(fmt) - fmt.enqueue(iseq) - fmt.instruction("once", [iseq.name, fmt.inline_storage(cache)]) - end - - def to_a(_iseq) - [:once, iseq.to_a, cache] - end - - def deconstruct_keys(_keys) - { iseq: iseq, cache: cache } - end - - def ==(other) - other.is_a?(Once) && other.iseq == iseq && other.cache == cache - end - - def length - 3 - end - - def pushes - 1 - end - - def call(vm) - return if @executed - vm.push(vm.run_block_frame(iseq, vm.frame)) - @executed = true - end - end - - # ### Summary - # - # `opt_and` is a specialization of the `opt_send_without_block` instruction - # that occurs when the `&` operator is used. There is a fast path for if - # both operands are integers. It pops both the receiver and the argument off - # the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # 2 & 3 - # ~~~ - # - class OptAnd < Instruction - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_and", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_and, calldata.to_h] - end - - def deconstruct_keys(_keys) - { calldata: calldata } - end - - def ==(other) - other.is_a?(OptAnd) && other.calldata == calldata - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_aref` is a specialization of the `opt_send_without_block` instruction - # that occurs when the `[]` operator is used. There are fast paths if the - # receiver is an integer, array, or hash. - # - # ### Usage - # - # ~~~ruby - # 7[2] - # ~~~ - # - class OptAref < Instruction - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_aref", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_aref, calldata.to_h] - end - - def deconstruct_keys(_keys) - { calldata: calldata } - end - - def ==(other) - other.is_a?(OptAref) && other.calldata == calldata - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_aref_with` is a specialization of the `opt_aref` instruction that - # occurs when the `[]` operator is used with a string argument known at - # compile time. There are fast paths if the receiver is a hash. It pops the - # receiver off the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # { 'test' => true }['test'] - # ~~~ - # - class OptArefWith < Instruction - attr_reader :object, :calldata - - def initialize(object, calldata) - @object = object - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction( - "opt_aref_with", - [fmt.object(object), fmt.calldata(calldata)] - ) - end - - def to_a(_iseq) - [:opt_aref_with, object, calldata.to_h] - end - - def deconstruct_keys(_keys) - { object: object, calldata: calldata } - end - - def ==(other) - other.is_a?(OptArefWith) && other.object == object && - other.calldata == calldata - end - - def length - 3 - end - - def pops - 1 - end - - def pushes - 1 - end - - def call(vm) - vm.push(vm.pop[object]) - end - end - - # ### Summary - # - # `opt_aset` is an instruction for setting the hash value by the key in - # the `recv[obj] = set` format. It is a specialization of the - # `opt_send_without_block` instruction. It pops the receiver, the key, and - # the value off the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # {}[:key] = value - # ~~~ - # - class OptAset < Instruction - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_aset", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_aset, calldata.to_h] - end - - def deconstruct_keys(_keys) - { calldata: calldata } - end - - def ==(other) - other.is_a?(OptAset) && other.calldata == calldata - end - - def length - 2 - end - - def pops - 3 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_aset_with` is an instruction for setting the hash value by the known - # string key in the `recv[obj] = set` format. It pops the receiver and the - # value off the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # {}["key"] = value - # ~~~ - # - class OptAsetWith < Instruction - attr_reader :object, :calldata - - def initialize(object, calldata) - @object = object - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction( - "opt_aset_with", - [fmt.object(object), fmt.calldata(calldata)] - ) - end - - def to_a(_iseq) - [:opt_aset_with, object, calldata.to_h] - end - - def deconstruct_keys(_keys) - { object: object, calldata: calldata } - end - - def ==(other) - other.is_a?(OptAsetWith) && other.object == object && - other.calldata == calldata - end - - def length - 3 - end - - def pops - 2 - end - - def pushes - 1 - end - - def call(vm) - hash, value = vm.pop(2) - vm.push(hash[object] = value) - end - end - - # ### Summary - # - # `opt_case_dispatch` is a branch instruction that moves the control flow - # for case statements that have clauses where they can all be used as hash - # keys for an internal hash. - # - # It has two arguments: the `case_dispatch_hash` and an `else_label`. It - # pops one value off the stack: a hash key. `opt_case_dispatch` looks up the - # key in the `case_dispatch_hash` and jumps to the corresponding label if - # there is one. If there is no value in the `case_dispatch_hash`, - # `opt_case_dispatch` jumps to the `else_label` index. - # - # ### Usage - # - # ~~~ruby - # case 1 - # when 1 - # puts "foo" - # else - # puts "bar" - # end - # ~~~ - # - class OptCaseDispatch < Instruction - attr_reader :case_dispatch_hash, :else_label - - def initialize(case_dispatch_hash, else_label) - @case_dispatch_hash = case_dispatch_hash - @else_label = else_label - end - - def disasm(fmt) - fmt.instruction( - "opt_case_dispatch", - ["", fmt.label(else_label)] - ) - end - - def to_a(_iseq) - [ - :opt_case_dispatch, - case_dispatch_hash.flat_map { |key, value| [key, value.name] }, - else_label.name - ] - end - - def deconstruct_keys(_keys) - { case_dispatch_hash: case_dispatch_hash, else_label: else_label } - end - - def ==(other) - other.is_a?(OptCaseDispatch) && - other.case_dispatch_hash == case_dispatch_hash && - other.else_label == else_label - end - - def length - 3 - end - - def pops - 1 - end - - def call(vm) - vm.jump(case_dispatch_hash.fetch(vm.pop, else_label)) - end - - def branch_targets - case_dispatch_hash.values.push(else_label) - end - - def falls_through? - true - end - end - - # ### Summary - # - # `opt_div` is a specialization of the `opt_send_without_block` instruction - # that occurs when the `/` operator is used. There are fast paths for if - # both operands are integers, or if both operands are floats. It pops both - # the receiver and the argument off the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # 2 / 3 - # ~~~ - # - class OptDiv < Instruction - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_div", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_div, calldata.to_h] - end - - def deconstruct_keys(_keys) - { calldata: calldata } - end - - def ==(other) - other.is_a?(OptDiv) && other.calldata == calldata - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_empty_p` is an optimization applied when the method `empty?` is - # called. It pops the receiver off the stack and pushes on the result of the - # method call. - # - # ### Usage - # - # ~~~ruby - # "".empty? - # ~~~ - # - class OptEmptyP < Instruction - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_empty_p", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_empty_p, calldata.to_h] - end - - def deconstruct_keys(_keys) - { calldata: calldata } - end - - def ==(other) - other.is_a?(OptEmptyP) && other.calldata == calldata - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_eq` is a specialization of the `opt_send_without_block` instruction - # that occurs when the == operator is used. Fast paths exist when both - # operands are integers, floats, symbols or strings. It pops both the - # receiver and the argument off the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # 2 == 2 - # ~~~ - # - class OptEq < Instruction - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_eq", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_eq, calldata.to_h] - end - - def deconstruct_keys(_keys) - { calldata: calldata } - end - - def ==(other) - other.is_a?(OptEq) && other.calldata == calldata - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_ge` is a specialization of the `opt_send_without_block` instruction - # that occurs when the >= operator is used. Fast paths exist when both - # operands are integers or floats. It pops both the receiver and the - # argument off the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # 4 >= 3 - # ~~~ - # - class OptGE < Instruction - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_ge", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_ge, calldata.to_h] - end - - def deconstruct_keys(_keys) - { calldata: calldata } - end - - def ==(other) - other.is_a?(OptGE) && other.calldata == calldata - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_getconstant_path` performs a constant lookup on a chain of constant - # names. It accepts as its argument an array of constant names, and pushes - # the value of the constant onto the stack. - # - # ### Usage - # - # ~~~ruby - # ::Object - # ~~~ - # - class OptGetConstantPath < Instruction - attr_reader :names - - def initialize(names) - @names = names - end - - def disasm(fmt) - cache = "" - fmt.instruction("opt_getconstant_path", [cache]) - end - - def to_a(_iseq) - [:opt_getconstant_path, names] - end - - def deconstruct_keys(_keys) - { names: names } - end - - def ==(other) - other.is_a?(OptGetConstantPath) && other.names == names - end - - def length - 2 - end - - def pushes - 1 - end - - def call(vm) - current = vm.frame._self - current = current.class unless current.is_a?(Class) - - names.each do |name| - current = name == :"" ? Object : current.const_get(name) - end - - vm.push(current) - end - end - - # ### Summary - # - # `opt_gt` is a specialization of the `opt_send_without_block` instruction - # that occurs when the > operator is used. Fast paths exist when both - # operands are integers or floats. It pops both the receiver and the - # argument off the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # 4 > 3 - # ~~~ - # - class OptGT < Instruction - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_gt", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_gt, calldata.to_h] - end - - def deconstruct_keys(_keys) - { calldata: calldata } - end - - def ==(other) - other.is_a?(OptGT) && other.calldata == calldata - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_le` is a specialization of the `opt_send_without_block` instruction - # that occurs when the <= operator is used. Fast paths exist when both - # operands are integers or floats. It pops both the receiver and the - # argument off the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # 3 <= 4 - # ~~~ - # - class OptLE < Instruction - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_le", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_le, calldata.to_h] - end - - def deconstruct_keys(_keys) - { calldata: calldata } - end - - def ==(other) - other.is_a?(OptLE) && other.calldata == calldata - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_length` is a specialization of `opt_send_without_block`, when the - # `length` method is called. There are fast paths when the receiver is - # either a string, hash, or array. It pops the receiver off the stack and - # pushes on the result of the method call. - # - # ### Usage - # - # ~~~ruby - # "".length - # ~~~ - # - class OptLength < Instruction - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_length", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_length, calldata.to_h] - end - - def deconstruct_keys(_keys) - { calldata: calldata } - end - - def ==(other) - other.is_a?(OptLength) && other.calldata == calldata - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_lt` is a specialization of the `opt_send_without_block` instruction - # that occurs when the < operator is used. Fast paths exist when both - # operands are integers or floats. It pops both the receiver and the - # argument off the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # 3 < 4 - # ~~~ - # - class OptLT < Instruction - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_lt", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_lt, calldata.to_h] - end - - def deconstruct_keys(_keys) - { calldata: calldata } - end - - def ==(other) - other.is_a?(OptLT) && other.calldata == calldata - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_ltlt` is a specialization of the `opt_send_without_block` instruction - # that occurs when the `<<` operator is used. Fast paths exists when the - # receiver is either a String or an Array. It pops both the receiver and the - # argument off the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # "" << 2 - # ~~~ - # - class OptLTLT < Instruction - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_ltlt", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_ltlt, calldata.to_h] - end - - def deconstruct_keys(_keys) - { calldata: calldata } - end - - def ==(other) - other.is_a?(OptLTLT) && other.calldata == calldata - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_minus` is a specialization of the `opt_send_without_block` - # instruction that occurs when the `-` operator is used. There are fast - # paths for if both operands are integers or if both operands are floats. It - # pops both the receiver and the argument off the stack and pushes on the - # result. - # - # ### Usage - # - # ~~~ruby - # 3 - 2 - # ~~~ - # - class OptMinus < Instruction - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_minus", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_minus, calldata.to_h] - end - - def deconstruct_keys(_keys) - { calldata: calldata } - end - - def ==(other) - other.is_a?(OptMinus) && other.calldata == calldata - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_mod` is a specialization of the `opt_send_without_block` instruction - # that occurs when the `%` operator is used. There are fast paths for if - # both operands are integers or if both operands are floats. It pops both - # the receiver and the argument off the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # 4 % 2 - # ~~~ - # - class OptMod < Instruction - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_mod", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_mod, calldata.to_h] - end - - def deconstruct_keys(_keys) - { calldata: calldata } - end - - def ==(other) - other.is_a?(OptMod) && other.calldata == calldata - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_mult` is a specialization of the `opt_send_without_block` instruction - # that occurs when the `*` operator is used. There are fast paths for if - # both operands are integers or floats. It pops both the receiver and the - # argument off the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # 3 * 2 - # ~~~ - # - class OptMult < Instruction - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_mult", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_mult, calldata.to_h] - end - - def deconstruct_keys(_keys) - { calldata: calldata } - end - - def ==(other) - other.is_a?(OptMult) && other.calldata == calldata - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_neq` is an optimization that tests whether two values at the top of - # the stack are not equal by testing their equality and calling the `!` on - # the result. This allows `opt_neq` to use the fast paths optimized in - # `opt_eq` when both operands are Integers, Floats, Symbols, or Strings. It - # pops both the receiver and the argument off the stack and pushes on the - # result. - # - # ### Usage - # - # ~~~ruby - # 2 != 2 - # ~~~ - # - class OptNEq < Instruction - attr_reader :eq_calldata, :neq_calldata - - def initialize(eq_calldata, neq_calldata) - @eq_calldata = eq_calldata - @neq_calldata = neq_calldata - end - - def disasm(fmt) - fmt.instruction( - "opt_neq", - [fmt.calldata(eq_calldata), fmt.calldata(neq_calldata)] - ) - end - - def to_a(_iseq) - [:opt_neq, eq_calldata.to_h, neq_calldata.to_h] - end - - def deconstruct_keys(_keys) - { eq_calldata: eq_calldata, neq_calldata: neq_calldata } - end - - def ==(other) - other.is_a?(OptNEq) && other.eq_calldata == eq_calldata && - other.neq_calldata == neq_calldata - end - - def length - 3 - end - - def pops - 2 - end - - def pushes - 1 - end - - def call(vm) - receiver, argument = vm.pop(2) - vm.push(receiver != argument) - end - end - - # ### Summary - # - # `opt_newarray_send` is a specialization that occurs when a dynamic array - # literal is created and immediately sent the `min`, `max`, or `hash` - # methods. It pops the values of the array off the stack and pushes on the - # result of the method call. - # - # ### Usage - # - # ~~~ruby - # [a, b, c].max - # ~~~ - # - class OptNewArraySend < Instruction - attr_reader :number, :method - - def initialize(number, method) - @number = number - @method = method - end - - def disasm(fmt) - fmt.instruction( - "opt_newarray_send", - [fmt.object(number), fmt.object(method)] - ) - end - - def to_a(_iseq) - [:opt_newarray_send, number, method] - end - - def deconstruct_keys(_keys) - { number: number, method: method } - end - - def ==(other) - other.is_a?(OptNewArraySend) && other.number == number && - other.method == method - end - - def length - 3 - end - - def pops - number - end - - def pushes - 1 - end - - def call(vm) - vm.push(vm.pop(number).__send__(method)) - end - end - - # ### Summary - # - # `opt_nil_p` is an optimization applied when the method `nil?` is called. - # It returns true immediately when the receiver is `nil` and defers to the - # `nil?` method in other cases. It pops the receiver off the stack and - # pushes on the result. - # - # ### Usage - # - # ~~~ruby - # "".nil? - # ~~~ - # - class OptNilP < Instruction - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_nil_p", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_nil_p, calldata.to_h] - end - - def deconstruct_keys(_keys) - { calldata: calldata } - end - - def ==(other) - other.is_a?(OptNilP) && other.calldata == calldata - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_not` negates the value on top of the stack by calling the `!` method - # on it. It pops the receiver off the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # !true - # ~~~ - # - class OptNot < Instruction - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_not", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_not, calldata.to_h] - end - - def deconstruct_keys(_keys) - { calldata: calldata } - end - - def ==(other) - other.is_a?(OptNot) && other.calldata == calldata - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_or` is a specialization of the `opt_send_without_block` instruction - # that occurs when the `|` operator is used. There is a fast path for if - # both operands are integers. It pops both the receiver and the argument off - # the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # 2 | 3 - # ~~~ - # - class OptOr < Instruction - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_or", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_or, calldata.to_h] - end - - def deconstruct_keys(_keys) - { calldata: calldata } - end - - def ==(other) - other.is_a?(OptOr) && other.calldata == calldata - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_plus` is a specialization of the `opt_send_without_block` instruction - # that occurs when the `+` operator is used. There are fast paths for if - # both operands are integers, floats, strings, or arrays. It pops both the - # receiver and the argument off the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # 2 + 3 - # ~~~ - # - class OptPlus < Instruction - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_plus", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_plus, calldata.to_h] - end - - def deconstruct_keys(_keys) - { calldata: calldata } - end - - def ==(other) - other.is_a?(OptPlus) && other.calldata == calldata - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_regexpmatch2` is a specialization of the `opt_send_without_block` - # instruction that occurs when the `=~` operator is used. It pops both the - # receiver and the argument off the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # /a/ =~ "a" - # ~~~ - # - class OptRegExpMatch2 < Instruction - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_regexpmatch2", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_regexpmatch2, calldata.to_h] - end - - def deconstruct_keys(_keys) - { calldata: calldata } - end - - def ==(other) - other.is_a?(OptRegExpMatch2) && other.calldata == calldata - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_send_without_block` is a specialization of the send instruction that - # occurs when a method is being called without a block. It pops the receiver - # and the arguments off the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # puts "Hello, world!" - # ~~~ - # - class OptSendWithoutBlock < Instruction - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_send_without_block", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_send_without_block, calldata.to_h] - end - - def deconstruct_keys(_keys) - { calldata: calldata } - end - - def ==(other) - other.is_a?(OptSendWithoutBlock) && other.calldata == calldata - end - - def length - 2 - end - - def pops - 1 + calldata.argc - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_size` is a specialization of `opt_send_without_block`, when the - # `size` method is called. There are fast paths when the receiver is either - # a string, hash, or array. It pops the receiver off the stack and pushes on - # the result. - # - # ### Usage - # - # ~~~ruby - # "".size - # ~~~ - # - class OptSize < Instruction - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_size", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_size, calldata.to_h] - end - - def deconstruct_keys(_keys) - { calldata: calldata } - end - - def ==(other) - other.is_a?(OptSize) && other.calldata == calldata - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_str_freeze` pushes a frozen known string value with no interpolation - # onto the stack using the #freeze method. If the method gets overridden, - # this will fall back to a send. - # - # ### Usage - # - # ~~~ruby - # "hello".freeze - # ~~~ - # - class OptStrFreeze < Instruction - attr_reader :object, :calldata - - def initialize(object, calldata) - @object = object - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction( - "opt_str_freeze", - [fmt.object(object), fmt.calldata(calldata)] - ) - end - - def to_a(_iseq) - [:opt_str_freeze, object, calldata.to_h] - end - - def deconstruct_keys(_keys) - { object: object, calldata: calldata } - end - - def ==(other) - other.is_a?(OptStrFreeze) && other.object == object && - other.calldata == calldata - end - - def length - 3 - end - - def pushes - 1 - end - - def call(vm) - vm.push(object.freeze) - end - end - - # ### Summary - # - # `opt_str_uminus` pushes a frozen known string value with no interpolation - # onto the stack. If the method gets overridden, this will fall back to a - # send. - # - # ### Usage - # - # ~~~ruby - # -"string" - # ~~~ - # - class OptStrUMinus < Instruction - attr_reader :object, :calldata - - def initialize(object, calldata) - @object = object - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction( - "opt_str_uminus", - [fmt.object(object), fmt.calldata(calldata)] - ) - end - - def to_a(_iseq) - [:opt_str_uminus, object, calldata.to_h] - end - - def deconstruct_keys(_keys) - { object: object, calldata: calldata } - end - - def ==(other) - other.is_a?(OptStrUMinus) && other.object == object && - other.calldata == calldata - end - - def length - 3 - end - - def pushes - 1 - end - - def call(vm) - vm.push(-object) - end - end - - # ### Summary - # - # `opt_succ` is a specialization of the `opt_send_without_block` instruction - # when the method being called is `succ`. Fast paths exist when the receiver - # is either a String or a Fixnum. It pops the receiver off the stack and - # pushes on the result. - # - # ### Usage - # - # ~~~ruby - # "".succ - # ~~~ - # - class OptSucc < Instruction - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_succ", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_succ, calldata.to_h] - end - - def deconstruct_keys(_keys) - { calldata: calldata } - end - - def ==(other) - other.is_a?(OptSucc) && other.calldata == calldata - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `pop` pops the top value off the stack. - # - # ### Usage - # - # ~~~ruby - # a ||= 2 - # ~~~ - # - class Pop < Instruction - def disasm(fmt) - fmt.instruction("pop") - end - - def to_a(_iseq) - [:pop] - end - - def deconstruct_keys(_keys) - {} - end - - def ==(other) - other.is_a?(Pop) - end - - def pops - 1 - end - - def call(vm) - vm.pop - end - - def side_effects? - false - end - end - - # ### Summary - # - # `pushtoarraykwsplat` is used to append a hash literal that is being - # splatted onto an array. - # - # ### Usage - # - # ~~~ruby - # ["string", **{ foo: "bar" }] - # ~~~ - # - class PushToArrayKwSplat < Instruction - def disasm(fmt) - fmt.instruction("pushtoarraykwsplat") - end - - def to_a(_iseq) - [:pushtoarraykwsplat] - end - - def deconstruct_keys(_keys) - {} - end - - def ==(other) - other.is_a?(PushToArrayKwSplat) - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def call(vm) - array, hash = vm.pop(2) - vm.push(array << hash) - end - end - - # ### Summary - # - # `putnil` pushes a global nil object onto the stack. - # - # ### Usage - # - # ~~~ruby - # nil - # ~~~ - # - class PutNil < Instruction - def disasm(fmt) - fmt.instruction("putnil") - end - - def to_a(_iseq) - [:putnil] - end - - def deconstruct_keys(_keys) - {} - end - - def ==(other) - other.is_a?(PutNil) - end - - def pushes - 1 - end - - def canonical - PutObject.new(nil) - end - - def call(vm) - canonical.call(vm) - end - - def side_effects? - false - end - end - - # ### Summary - # - # `putobject` pushes a known value onto the stack. - # - # ### Usage - # - # ~~~ruby - # 5 - # ~~~ - # - class PutObject < Instruction - attr_reader :object - - def initialize(object) - @object = object - end - - def disasm(fmt) - fmt.instruction("putobject", [fmt.object(object)]) - end - - def to_a(_iseq) - [:putobject, object] - end - - def deconstruct_keys(_keys) - { object: object } - end - - def ==(other) - other.is_a?(PutObject) && other.object == object - end - - def length - 2 - end - - def pushes - 1 - end - - def call(vm) - vm.push(object) - end - - def side_effects? - false - end - end - - # ### Summary - # - # `putobject_INT2FIX_0_` pushes 0 on the stack. It is a specialized - # instruction resulting from the operand unification optimization. It is - # equivalent to `putobject 0`. - # - # ### Usage - # - # ~~~ruby - # 0 - # ~~~ - # - class PutObjectInt2Fix0 < Instruction - def disasm(fmt) - fmt.instruction("putobject_INT2FIX_0_") - end - - def to_a(_iseq) - [:putobject_INT2FIX_0_] - end - - def deconstruct_keys(_keys) - {} - end - - def ==(other) - other.is_a?(PutObjectInt2Fix0) - end - - def pushes - 1 - end - - def canonical - PutObject.new(0) - end - - def call(vm) - canonical.call(vm) - end - - def side_effects? - false - end - end - - # ### Summary - # - # `putobject_INT2FIX_1_` pushes 1 on the stack. It is a specialized - # instruction resulting from the operand unification optimization. It is - # equivalent to `putobject 1`. - # - # ### Usage - # - # ~~~ruby - # 1 - # ~~~ - # - class PutObjectInt2Fix1 < Instruction - def disasm(fmt) - fmt.instruction("putobject_INT2FIX_1_") - end - - def to_a(_iseq) - [:putobject_INT2FIX_1_] - end - - def deconstruct_keys(_keys) - {} - end - - def ==(other) - other.is_a?(PutObjectInt2Fix1) - end - - def pushes - 1 - end - - def canonical - PutObject.new(1) - end - - def call(vm) - canonical.call(vm) - end - - def side_effects? - false - end - end - - # ### Summary - # - # `putself` pushes the current value of self onto the stack. - # - # ### Usage - # - # ~~~ruby - # puts "Hello, world!" - # ~~~ - # - class PutSelf < Instruction - def disasm(fmt) - fmt.instruction("putself") - end - - def to_a(_iseq) - [:putself] - end - - def deconstruct_keys(_keys) - {} - end - - def ==(other) - other.is_a?(PutSelf) - end - - def pushes - 1 - end - - def call(vm) - vm.push(vm.frame._self) - end - - def side_effects? - false - end - end - - # ### Summary - # - # `putspecialobject` pushes one of three special objects onto the stack. - # These are either the VM core special object, the class base special - # object, or the constant base special object. - # - # ### Usage - # - # ~~~ruby - # alias foo bar - # ~~~ - # - class PutSpecialObject < Instruction - OBJECT_VMCORE = 1 - OBJECT_CBASE = 2 - OBJECT_CONST_BASE = 3 - - attr_reader :object - - def initialize(object) - @object = object - end - - def disasm(fmt) - fmt.instruction("putspecialobject", [fmt.object(object)]) - end - - def to_a(_iseq) - [:putspecialobject, object] - end - - def deconstruct_keys(_keys) - { object: object } - end - - def ==(other) - other.is_a?(PutSpecialObject) && other.object == object - end - - def length - 2 - end - - def pushes - 1 - end - - def call(vm) - case object - when OBJECT_VMCORE - vm.push(vm.frozen_core) - when OBJECT_CBASE - value = vm.frame._self - value = value.singleton_class unless value.is_a?(Class) - vm.push(value) - when OBJECT_CONST_BASE - vm.push(vm.const_base) - end - end - end - - # ### Summary - # - # `putchilledstring` pushes an unfrozen string literal onto the stack that - # acts like a frozen string. This is a migration path to frozen string - # literals as the default in the future. - # - # ### Usage - # - # ~~~ruby - # "foo" - # ~~~ - # - class PutChilledString < Instruction - attr_reader :object - - def initialize(object) - @object = object - end - - def disasm(fmt) - fmt.instruction("putchilledstring", [fmt.object(object)]) - end - - def to_a(_iseq) - [:putchilledstring, object] - end - - def deconstruct_keys(_keys) - { object: object } - end - - def ==(other) - other.is_a?(PutChilledString) && other.object == object - end - - def length - 2 - end - - def pushes - 1 - end - - def call(vm) - vm.push(object.dup) - end - end - - # ### Summary - # - # `putstring` pushes an unfrozen string literal onto the stack. - # - # ### Usage - # - # ~~~ruby - # "foo" - # ~~~ - # - class PutString < Instruction - attr_reader :object - - def initialize(object) - @object = object - end - - def disasm(fmt) - fmt.instruction("putstring", [fmt.object(object)]) - end - - def to_a(_iseq) - [:putstring, object] - end - - def deconstruct_keys(_keys) - { object: object } - end - - def ==(other) - other.is_a?(PutString) && other.object == object - end - - def length - 2 - end - - def pushes - 1 - end - - def call(vm) - vm.push(object.dup) - end - end - - # ### Summary - # - # `send` invokes a method with an optional block. It pops its receiver and - # the arguments for the method off the stack and pushes the return value - # onto the stack. It has two arguments: the calldata for the call site and - # the optional block instruction sequence. - # - # ### Usage - # - # ~~~ruby - # "hello".tap { |i| p i } - # ~~~ - # - class Send < Instruction - attr_reader :calldata, :block_iseq - - def initialize(calldata, block_iseq) - @calldata = calldata - @block_iseq = block_iseq - end - - def disasm(fmt) - fmt.enqueue(block_iseq) if block_iseq - fmt.instruction( - "send", - [fmt.calldata(calldata), block_iseq&.name || "nil"] - ) - end - - def to_a(_iseq) - [:send, calldata.to_h, block_iseq&.to_a] - end - - def deconstruct_keys(_keys) - { calldata: calldata, block_iseq: block_iseq } - end - - def ==(other) - other.is_a?(Send) && other.calldata == calldata && - other.block_iseq == block_iseq - end - - def length - 3 - end - - def pops - argb = (calldata.flag?(CallData::CALL_ARGS_BLOCKARG) ? 1 : 0) - argb + calldata.argc + 1 - end - - def pushes - 1 - end - - def call(vm) - block = - if (iseq = block_iseq) - frame = vm.frame - ->(*args, **kwargs, &blk) do - vm.run_block_frame(iseq, frame, *args, **kwargs, &blk) - end - elsif calldata.flag?(CallData::CALL_ARGS_BLOCKARG) - vm.pop - end - - keywords = - if calldata.kw_arg - calldata.kw_arg.zip(vm.pop(calldata.kw_arg.length)).to_h - else - {} - end - - arguments = vm.pop(calldata.argc) - receiver = vm.pop - - vm.push( - receiver.__send__(calldata.method, *arguments, **keywords, &block) - ) - end - end - - # ### Summary - # - # `setblockparam` sets the value of a block local variable on a frame - # determined by the level and index arguments. The level is the number of - # frames back to look and the index is the index in the local table. It pops - # the value it is setting off the stack. - # - # ### Usage - # - # ~~~ruby - # def foo(&bar) - # bar = baz - # end - # ~~~ - # - class SetBlockParam < Instruction - attr_reader :index, :level - - def initialize(index, level) - @index = index - @level = level - end - - def disasm(fmt) - fmt.instruction("setblockparam", [fmt.local(index, explicit: level)]) - end - - def to_a(iseq) - current = iseq - level.times { current = current.parent_iseq } - [:setblockparam, current.local_table.offset(index), level] - end - - def deconstruct_keys(_keys) - { index: index, level: level } - end - - def ==(other) - other.is_a?(SetBlockParam) && other.index == index && - other.level == level - end - - def length - 3 - end - - def pops - 1 - end - - def call(vm) - vm.local_set(index, level, vm.pop) - end - end - - # ### Summary - # - # `setclassvariable` looks for a class variable in the current class and - # sets its value to the value it pops off the top of the stack. It uses an - # inline cache to reduce the need to lookup the class variable in the class - # hierarchy every time. - # - # ### Usage - # - # ~~~ruby - # @@class_variable = 1 - # ~~~ - # - class SetClassVariable < Instruction - attr_reader :name, :cache - - def initialize(name, cache) - @name = name - @cache = cache - end - - def disasm(fmt) - fmt.instruction( - "setclassvariable", - [fmt.object(name), fmt.inline_storage(cache)] - ) - end - - def to_a(_iseq) - [:setclassvariable, name, cache] - end - - def deconstruct_keys(_keys) - { name: name, cache: cache } - end - - def ==(other) - other.is_a?(SetClassVariable) && other.name == name && - other.cache == cache - end - - def length - 3 - end - - def pops - 1 - end - - def call(vm) - clazz = vm.frame._self - clazz = clazz.class unless clazz.is_a?(Class) - clazz.class_variable_set(name, vm.pop) - end - end - - # ### Summary - # - # `setconstant` pops two values off the stack: the value to set the - # constant to and the constant base to set it in. - # - # ### Usage - # - # ~~~ruby - # Constant = 1 - # ~~~ - # - class SetConstant < Instruction - attr_reader :name - - def initialize(name) - @name = name - end - - def disasm(fmt) - fmt.instruction("setconstant", [fmt.object(name)]) - end - - def to_a(_iseq) - [:setconstant, name] - end - - def deconstruct_keys(_keys) - { name: name } - end - - def ==(other) - other.is_a?(SetConstant) && other.name == name - end - - def length - 2 - end - - def pops - 2 - end - - def call(vm) - value, parent = vm.pop(2) - parent.const_set(name, value) - end - end - - # ### Summary - # - # `setglobal` sets the value of a global variable to a value popped off the - # top of the stack. - # - # ### Usage - # - # ~~~ruby - # $global = 5 - # ~~~ - # - class SetGlobal < Instruction - attr_reader :name - - def initialize(name) - @name = name - end - - def disasm(fmt) - fmt.instruction("setglobal", [fmt.object(name)]) - end - - def to_a(_iseq) - [:setglobal, name] - end - - def deconstruct_keys(_keys) - { name: name } - end - - def ==(other) - other.is_a?(SetGlobal) && other.name == name - end - - def length - 2 - end - - def pops - 1 - end - - def call(vm) - # Evaluating the name of the global variable because there isn't a - # reflection API for global variables. - eval("#{name} = vm.pop", binding, __FILE__, __LINE__) - end - end - - # ### Summary - # - # `setinstancevariable` pops a value off the top of the stack and then sets - # the instance variable associated with the instruction to that value. - # - # This instruction has two forms, but both have the same structure. Before - # Ruby 3.2, the inline cache corresponded to both the get and set - # instructions and could be shared. Since Ruby 3.2, it uses object shapes - # instead so the caches are unique per instruction. - # - # ### Usage - # - # ~~~ruby - # @instance_variable = 1 - # ~~~ - # - class SetInstanceVariable < Instruction - attr_reader :name, :cache - - def initialize(name, cache) - @name = name - @cache = cache - end - - def disasm(fmt) - fmt.instruction( - "setinstancevariable", - [fmt.object(name), fmt.inline_storage(cache)] - ) - end - - def to_a(_iseq) - [:setinstancevariable, name, cache] - end - - def deconstruct_keys(_keys) - { name: name, cache: cache } - end - - def ==(other) - other.is_a?(SetInstanceVariable) && other.name == name && - other.cache == cache - end - - def length - 3 - end - - def pops - 1 - end - - def call(vm) - method = Object.instance_method(:instance_variable_set) - method.bind(vm.frame._self).call(name, vm.pop) - end - end - - # ### Summary - # - # `setlocal` sets the value of a local variable on a frame determined by the - # level and index arguments. The level is the number of frames back to - # look and the index is the index in the local table. It pops the value it - # is setting off the stack. - # - # ### Usage - # - # ~~~ruby - # value = 5 - # tap { tap { value = 10 } } - # ~~~ - # - class SetLocal < Instruction - attr_reader :index, :level - - def initialize(index, level) - @index = index - @level = level - end - - def disasm(fmt) - fmt.instruction("setlocal", [fmt.local(index, explicit: level)]) - end - - def to_a(iseq) - current = iseq - level.times { current = current.parent_iseq } - [:setlocal, current.local_table.offset(index), level] - end - - def deconstruct_keys(_keys) - { index: index, level: level } - end - - def ==(other) - other.is_a?(SetLocal) && other.index == index && other.level == level - end - - def length - 3 - end - - def pops - 1 - end - - def call(vm) - vm.local_set(index, level, vm.pop) - end - end - - # ### Summary - # - # `setlocal_WC_0` is a specialized version of the `setlocal` instruction. It - # sets the value of a local variable on the current frame to the value at - # the top of the stack as determined by the index given as its only - # argument. - # - # ### Usage - # - # ~~~ruby - # value = 5 - # ~~~ - # - class SetLocalWC0 < Instruction - attr_reader :index - - def initialize(index) - @index = index - end - - def disasm(fmt) - fmt.instruction("setlocal_WC_0", [fmt.local(index, implicit: 0)]) - end - - def to_a(iseq) - [:setlocal_WC_0, iseq.local_table.offset(index)] - end - - def deconstruct_keys(_keys) - { index: index } - end - - def ==(other) - other.is_a?(SetLocalWC0) && other.index == index - end - - def length - 2 - end - - def pops - 1 - end - - def canonical - SetLocal.new(index, 0) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `setlocal_WC_1` is a specialized version of the `setlocal` instruction. It - # sets the value of a local variable on the parent frame to the value at the - # top of the stack as determined by the index given as its only argument. - # - # ### Usage - # - # ~~~ruby - # value = 5 - # self.then { value = 10 } - # ~~~ - # - class SetLocalWC1 < Instruction - attr_reader :index - - def initialize(index) - @index = index - end - - def disasm(fmt) - fmt.instruction("setlocal_WC_1", [fmt.local(index, implicit: 1)]) - end - - def to_a(iseq) - [:setlocal_WC_1, iseq.parent_iseq.local_table.offset(index)] - end - - def deconstruct_keys(_keys) - { index: index } - end - - def ==(other) - other.is_a?(SetLocalWC1) && other.index == index - end - - def length - 2 - end - - def pops - 1 - end - - def canonical - SetLocal.new(index, 1) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `setn` sets a value in the stack to a value popped off the top of the - # stack. It then pushes that value onto the top of the stack as well. - # - # ### Usage - # - # ~~~ruby - # {}[:key] = 'val' - # ~~~ - # - class SetN < Instruction - attr_reader :number - - def initialize(number) - @number = number - end - - def disasm(fmt) - fmt.instruction("setn", [fmt.object(number)]) - end - - def to_a(_iseq) - [:setn, number] - end - - def deconstruct_keys(_keys) - { number: number } - end - - def ==(other) - other.is_a?(SetN) && other.number == number - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - 1 - end - - def call(vm) - vm.stack[-number - 1] = vm.stack.last - end - end - - # ### Summary - # - # `setspecial` pops a value off the top of the stack and sets a special - # local variable to that value. The special local variable is determined by - # the key given as its only argument. - # - # ### Usage - # - # ~~~ruby - # baz if (foo == 1) .. (bar == 1) - # ~~~ - # - class SetSpecial < Instruction - attr_reader :key - - def initialize(key) - @key = key - end - - def disasm(fmt) - fmt.instruction("setspecial", [fmt.object(key)]) - end - - def to_a(_iseq) - [:setspecial, key] - end - - def deconstruct_keys(_keys) - { key: key } - end - - def ==(other) - other.is_a?(SetSpecial) && other.key == key - end - - def length - 2 - end - - def pops - 1 - end - - def call(vm) - case key - when GetSpecial::SVAR_LASTLINE - raise NotImplementedError, "setspecial SVAR_LASTLINE" - when GetSpecial::SVAR_BACKREF - raise NotImplementedError, "setspecial SVAR_BACKREF" - when GetSpecial::SVAR_FLIPFLOP_START - vm.frame_svar.svars[GetSpecial::SVAR_FLIPFLOP_START] - end - end - end - - # ### Summary - # - # `splatarray` coerces the array object at the top of the stack into Array - # by calling `to_a`. It pushes a duplicate of the array if there is a flag, - # and the original array if there isn't one. - # - # ### Usage - # - # ~~~ruby - # x = *(5) - # ~~~ - # - class SplatArray < Instruction - attr_reader :flag - - def initialize(flag) - @flag = flag - end - - def disasm(fmt) - fmt.instruction("splatarray", [fmt.object(flag)]) - end - - def to_a(_iseq) - [:splatarray, flag] - end - - def deconstruct_keys(_keys) - { flag: flag } - end - - def ==(other) - other.is_a?(SplatArray) && other.flag == flag - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - 1 - end - - def call(vm) - value = vm.pop - - vm.push( - if Array === value - value.instance_of?(Array) ? value.dup : Array[*value] - elsif value.nil? - value.to_a - else - if value.respond_to?(:to_a, true) - result = value.to_a - - if result.nil? - [value] - elsif !result.is_a?(Array) - raise TypeError, "expected to_a to return an Array" - end - else - [value] - end - end - ) - end - end - - # ### Summary - # - # `swap` swaps the top two elements in the stack. - # - # ### TracePoint - # - # `swap` does not dispatch any events. - # - # ### Usage - # - # ~~~ruby - # !!defined?([[]]) - # ~~~ - # - class Swap < Instruction - def disasm(fmt) - fmt.instruction("swap") - end - - def to_a(_iseq) - [:swap] - end - - def deconstruct_keys(_keys) - {} - end - - def ==(other) - other.is_a?(Swap) - end - - def pops - 2 - end - - def pushes - 2 - end - - def call(vm) - left, right = vm.pop(2) - vm.push(right, left) - end - end - - # ### Summary - # - # `throw` pops a value off the top of the stack and throws it. It is caught - # using the instruction sequence's (or an ancestor's) catch table. It pushes - # on the result of throwing the value. - # - # ### Usage - # - # ~~~ruby - # [1, 2, 3].map { break 2 } - # ~~~ - # - class Throw < Instruction - RUBY_TAG_NONE = 0x0 - RUBY_TAG_RETURN = 0x1 - RUBY_TAG_BREAK = 0x2 - RUBY_TAG_NEXT = 0x3 - RUBY_TAG_RETRY = 0x4 - RUBY_TAG_REDO = 0x5 - RUBY_TAG_RAISE = 0x6 - RUBY_TAG_THROW = 0x7 - RUBY_TAG_FATAL = 0x8 - - VM_THROW_NO_ESCAPE_FLAG = 0x8000 - VM_THROW_STATE_MASK = 0xff - - attr_reader :type - - def initialize(type) - @type = type - end - - def disasm(fmt) - fmt.instruction("throw", [fmt.object(type)]) - end - - def to_a(_iseq) - [:throw, type] - end - - def deconstruct_keys(_keys) - { type: type } - end - - def ==(other) - other.is_a?(Throw) && other.type == type - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - 1 - end - - def call(vm) - state = type & VM_THROW_STATE_MASK - value = vm.pop - - case state - when RUBY_TAG_NONE - case value - when nil - # do nothing - when Exception - raise value - else - raise NotImplementedError - end - when RUBY_TAG_RETURN - raise VM::ReturnError.new(value, error_backtrace(vm)) - when RUBY_TAG_BREAK - raise VM::BreakError.new(value, error_backtrace(vm)) - when RUBY_TAG_NEXT - raise VM::NextError.new(value, error_backtrace(vm)) - else - raise NotImplementedError, "Unknown throw kind #{state}" - end - end - - private - - def error_backtrace(vm) - backtrace = [] - current = vm.frame - - while current - backtrace << "#{current.iseq.file}:#{current.line}:in" \ - "`#{current.iseq.name}'" - current = current.parent - end - - [*backtrace, *caller] - end - end - - # ### Summary - # - # `topn` pushes a single value onto the stack that is a copy of the value - # within the stack that is `number` of slots down from the top. - # - # ### Usage - # - # ~~~ruby - # case 3 - # when 1..5 - # puts "foo" - # end - # ~~~ - # - class TopN < Instruction - attr_reader :number - - def initialize(number) - @number = number - end - - def disasm(fmt) - fmt.instruction("topn", [fmt.object(number)]) - end - - def to_a(_iseq) - [:topn, number] - end - - def deconstruct_keys(_keys) - { number: number } - end - - def ==(other) - other.is_a?(TopN) && other.number == number - end - - def length - 2 - end - - def pushes - 1 - end - - def call(vm) - vm.push(vm.stack[-number - 1]) - end - end - - # ### Summary - # - # `toregexp` pops a number of values off the stack, combines them into a new - # regular expression, and pushes the new regular expression onto the stack. - # - # ### Usage - # - # ~~~ruby - # /foo #{bar}/ - # ~~~ - # - class ToRegExp < Instruction - attr_reader :options, :length - - def initialize(options, length) - @options = options - @length = length - end - - def disasm(fmt) - fmt.instruction("toregexp", [fmt.object(options), fmt.object(length)]) - end - - def to_a(_iseq) - [:toregexp, options, length] - end - - def deconstruct_keys(_keys) - { options: options, length: length } - end - - def ==(other) - other.is_a?(ToRegExp) && other.options == options && - other.length == length - end - - def pops - length - end - - def pushes - 1 - end - - def call(vm) - vm.push(Regexp.new(vm.pop(length).join, options)) - end - end - end -end diff --git a/lib/syntax_tree/yarv/legacy.rb b/lib/syntax_tree/yarv/legacy.rb deleted file mode 100644 index 8715993a..00000000 --- a/lib/syntax_tree/yarv/legacy.rb +++ /dev/null @@ -1,340 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - module YARV - # This module contains the instructions that used to be a part of YARV but - # have been replaced or removed in more recent versions. - module Legacy - # ### Summary - # - # `getclassvariable` looks for a class variable in the current class and - # pushes its value onto the stack. - # - # This version of the `getclassvariable` instruction is no longer used - # since in Ruby 3.0 it gained an inline cache.` - # - # ### Usage - # - # ~~~ruby - # @@class_variable - # ~~~ - # - class GetClassVariable < Instruction - attr_reader :name - - def initialize(name) - @name = name - end - - def disasm(fmt) - fmt.instruction("getclassvariable", [fmt.object(name)]) - end - - def to_a(_iseq) - [:getclassvariable, name] - end - - def deconstruct_keys(_keys) - { name: name } - end - - def ==(other) - other.is_a?(GetClassVariable) && other.name == name - end - - def length - 2 - end - - def pushes - 1 - end - - def canonical - YARV::GetClassVariable.new(name, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_getinlinecache` is a wrapper around a series of `putobject` and - # `getconstant` instructions that allows skipping past them if the inline - # cache is currently set. It pushes the value of the cache onto the stack - # if it is set, otherwise it pushes `nil`. - # - # This instruction is no longer used since in Ruby 3.2 it was replaced by - # the consolidated `opt_getconstant_path` instruction. - # - # ### Usage - # - # ~~~ruby - # Constant - # ~~~ - # - class OptGetInlineCache < Instruction - attr_reader :label, :cache - - def initialize(label, cache) - @label = label - @cache = cache - end - - def disasm(fmt) - fmt.instruction( - "opt_getinlinecache", - [fmt.label(label), fmt.inline_storage(cache)] - ) - end - - def to_a(_iseq) - [:opt_getinlinecache, label.name, cache] - end - - def deconstruct_keys(_keys) - { label: label, cache: cache } - end - - def ==(other) - other.is_a?(OptGetInlineCache) && other.label == label && - other.cache == cache - end - - def length - 3 - end - - def pushes - 1 - end - - def call(vm) - vm.push(nil) - end - - def branch_targets - [label] - end - - def falls_through? - true - end - end - - # ### Summary - # - # `opt_newarray_max` is a specialization that occurs when the `max` method - # is called on an array literal. It pops the values of the array off the - # stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # [a, b, c].max - # ~~~ - # - class OptNewArrayMax < Instruction - attr_reader :number - - def initialize(number) - @number = number - end - - def disasm(fmt) - fmt.instruction("opt_newarray_max", [fmt.object(number)]) - end - - def to_a(_iseq) - [:opt_newarray_max, number] - end - - def deconstruct_keys(_keys) - { number: number } - end - - def ==(other) - other.is_a?(OptNewArrayMax) && other.number == number - end - - def length - 2 - end - - def pops - number - end - - def pushes - 1 - end - - def call(vm) - vm.push(vm.pop(number).max) - end - end - - # ### Summary - # - # `opt_newarray_min` is a specialization that occurs when the `min` method - # is called on an array literal. It pops the values of the array off the - # stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # [a, b, c].min - # ~~~ - # - class OptNewArrayMin < Instruction - attr_reader :number - - def initialize(number) - @number = number - end - - def disasm(fmt) - fmt.instruction("opt_newarray_min", [fmt.object(number)]) - end - - def to_a(_iseq) - [:opt_newarray_min, number] - end - - def deconstruct_keys(_keys) - { number: number } - end - - def ==(other) - other.is_a?(OptNewArrayMin) && other.number == number - end - - def length - 2 - end - - def pops - number - end - - def pushes - 1 - end - - def call(vm) - vm.push(vm.pop(number).min) - end - end - - # ### Summary - # - # `opt_setinlinecache` sets an inline cache for a constant lookup. It pops - # the value it should set off the top of the stack. It uses this value to - # set the cache. It then pushes that value back onto the top of the stack. - # - # This instruction is no longer used since in Ruby 3.2 it was replaced by - # the consolidated `opt_getconstant_path` instruction. - # - # ### Usage - # - # ~~~ruby - # Constant - # ~~~ - # - class OptSetInlineCache < Instruction - attr_reader :cache - - def initialize(cache) - @cache = cache - end - - def disasm(fmt) - fmt.instruction("opt_setinlinecache", [fmt.inline_storage(cache)]) - end - - def to_a(_iseq) - [:opt_setinlinecache, cache] - end - - def deconstruct_keys(_keys) - { cache: cache } - end - - def ==(other) - other.is_a?(OptSetInlineCache) && other.cache == cache - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - 1 - end - - def call(vm) - end - end - - # ### Summary - # - # `setclassvariable` looks for a class variable in the current class and - # sets its value to the value it pops off the top of the stack. - # - # This version of the `setclassvariable` instruction is no longer used - # since in Ruby 3.0 it gained an inline cache. - # - # ### Usage - # - # ~~~ruby - # @@class_variable = 1 - # ~~~ - # - class SetClassVariable < Instruction - attr_reader :name - - def initialize(name) - @name = name - end - - def disasm(fmt) - fmt.instruction("setclassvariable", [fmt.object(name)]) - end - - def to_a(_iseq) - [:setclassvariable, name] - end - - def deconstruct_keys(_keys) - { name: name } - end - - def ==(other) - other.is_a?(SetClassVariable) && other.name == name - end - - def length - 2 - end - - def pops - 1 - end - - def canonical - YARV::SetClassVariable.new(name, nil) - end - - def call(vm) - canonical.call(vm) - end - end - end - end -end diff --git a/lib/syntax_tree/yarv/local_table.rb b/lib/syntax_tree/yarv/local_table.rb deleted file mode 100644 index 54cc55ad..00000000 --- a/lib/syntax_tree/yarv/local_table.rb +++ /dev/null @@ -1,89 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - module YARV - # This represents every local variable associated with an instruction - # sequence. There are two kinds of locals: plain locals that are what you - # expect, and block proxy locals, which represent local variables - # associated with blocks that were passed into the current instruction - # sequence. - class LocalTable - # A local representing a block passed into the current instruction - # sequence. - class BlockLocal - attr_reader :name - - def initialize(name) - @name = name - end - end - - # A regular local variable. - class PlainLocal - attr_reader :name - - def initialize(name) - @name = name - end - end - - # The result of looking up a local variable in the current local table. - class Lookup - attr_reader :local, :index, :level - - def initialize(local, index, level) - @local = local - @index = index - @level = level - end - end - - attr_reader :locals - - def initialize - @locals = [] - end - - def empty? - locals.empty? - end - - def find(name, level = 0) - index = locals.index { |local| local.name == name } - Lookup.new(locals[index], index, level) if index - end - - def has?(name) - locals.any? { |local| local.name == name } - end - - def names - locals.map(&:name) - end - - def name_at(index) - locals[index].name - end - - def size - locals.length - end - - # Add a BlockLocal to the local table. - def block(name) - locals << BlockLocal.new(name) unless has?(name) - end - - # Add a PlainLocal to the local table. - def plain(name) - locals << PlainLocal.new(name) unless has?(name) - end - - # This is the offset from the top of the stack where this local variable - # lives. - def offset(index) - size - (index - 3) - 1 - end - end - end -end diff --git a/lib/syntax_tree/yarv/sea_of_nodes.rb b/lib/syntax_tree/yarv/sea_of_nodes.rb deleted file mode 100644 index 33ef14f7..00000000 --- a/lib/syntax_tree/yarv/sea_of_nodes.rb +++ /dev/null @@ -1,534 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - module YARV - # A sea of nodes is an intermediate representation used by a compiler to - # represent both control and data flow in the same graph. The way we use it - # allows us to have the vertices of the graph represent either an - # instruction in the instruction sequence or a synthesized node that we add - # to the graph. The edges of the graph represent either control flow or data - # flow. - class SeaOfNodes - # This object represents a node in the graph that holds a YARV - # instruction. - class InsnNode - attr_reader :inputs, :outputs, :insn, :offset - - def initialize(insn, offset) - @inputs = [] - @outputs = [] - - @insn = insn - @offset = offset - end - - def id - offset - end - - def label - "%04d %s" % [offset, insn.disasm(Disassembler::Squished.new)] - end - end - - # Phi nodes are used to represent the merging of data flow from multiple - # incoming blocks. - class PhiNode - attr_reader :inputs, :outputs, :id - - def initialize(id) - @inputs = [] - @outputs = [] - @id = id - end - - def label - "#{id} φ" - end - end - - # Merge nodes are present in any block that has multiple incoming blocks. - # It provides a place for Phi nodes to attach their results. - class MergeNode - attr_reader :inputs, :outputs, :id - - def initialize(id) - @inputs = [] - @outputs = [] - @id = id - end - - def label - "#{id} ψ" - end - end - - # The edge of a graph represents either control flow or data flow. - class Edge - TYPES = %i[data control info].freeze - - attr_reader :from - attr_reader :to - attr_reader :type - attr_reader :label - - def initialize(from, to, type, label) - raise unless TYPES.include?(type) - - @from = from - @to = to - @type = type - @label = label - end - end - - # A subgraph represents the local data and control flow of a single basic - # block. - class SubGraph - attr_reader :first_fixed, :last_fixed, :inputs, :outputs - - def initialize(first_fixed, last_fixed, inputs, outputs) - @first_fixed = first_fixed - @last_fixed = last_fixed - @inputs = inputs - @outputs = outputs - end - end - - # The compiler is responsible for taking a data flow graph and turning it - # into a sea of nodes. - class Compiler - attr_reader :dfg, :nodes - - def initialize(dfg) - @dfg = dfg - @nodes = [] - - # We need to put a unique ID on the synthetic nodes in the graph, so - # we keep a counter that we increment any time we create a new - # synthetic node. - @id_counter = 999 - end - - def compile - local_graphs = {} - dfg.blocks.each do |block| - local_graphs[block.id] = create_local_graph(block) - end - - connect_local_graphs_control(local_graphs) - connect_local_graphs_data(local_graphs) - cleanup_phi_nodes - cleanup_insn_nodes - - SeaOfNodes.new(dfg, nodes, local_graphs).tap(&:verify) - end - - private - - # Counter for synthetic nodes. - def id_counter - @id_counter += 1 - end - - # Create a sub-graph for a single basic block - block block argument - # inputs and outputs will be left dangling, to be connected later. - def create_local_graph(block) - block_flow = dfg.block_flows.fetch(block.id) - - # A map of instructions to nodes. - insn_nodes = {} - - # Create a node for each instruction in the block. - block.each_with_length do |insn, offset| - node = InsnNode.new(insn, offset) - insn_nodes[offset] = node - nodes << node - end - - # The first and last node in the sub-graph, and the last fixed node. - previous_fixed = nil - first_fixed = nil - last_fixed = nil - - # The merge node for the phi nodes to attach to. - merge_node = nil - - # If there is more than one predecessor and we have basic block - # arguments coming in, then we need a merge node for the phi nodes to - # attach to. - if block.incoming_blocks.size > 1 && !block_flow.in.empty? - merge_node = MergeNode.new(id_counter) - nodes << merge_node - - previous_fixed = merge_node - first_fixed = merge_node - last_fixed = merge_node - end - - # Connect local control flow (only nodes with side effects.) - block.each_with_length do |insn, length| - if insn.side_effects? - insn_node = insn_nodes[length] - connect previous_fixed, insn_node, :control if previous_fixed - previous_fixed = insn_node - first_fixed ||= insn_node - last_fixed = insn_node - end - end - - # Connect basic block arguments. - inputs = {} - outputs = {} - block_flow.in.each do |arg| - # Each basic block argument gets a phi node. Even if there's only - # one predecessor! We'll tidy this up later. - phi = PhiNode.new(id_counter) - connect(phi, merge_node, :info) if merge_node - nodes << phi - inputs[arg] = phi - - block.each_with_length do |_, consumer_offset| - consumer_flow = dfg.insn_flows[consumer_offset] - consumer_flow.in.each_with_index do |producer, input_index| - if producer == arg - connect(phi, insn_nodes[consumer_offset], :data, input_index) - end - end - end - - block_flow.out.each { |out| outputs[out] = phi if out == arg } - end - - # Connect local dataflow from consumers back to producers. - block.each_with_length do |_, consumer_offset| - consumer_flow = dfg.insn_flows.fetch(consumer_offset) - consumer_flow.in.each_with_index do |producer, input_index| - if producer.local? - connect( - insn_nodes[producer.length], - insn_nodes[consumer_offset], - :data, - input_index - ) - end - end - end - - # Connect dataflow from producers that leaves the block. - block.each_with_length do |_, producer_pc| - dfg - .insn_flows - .fetch(producer_pc) - .out - .each do |consumer| - unless consumer.local? - # This is an argument to the successor block - not to an - # instruction here. - outputs[consumer.name] = insn_nodes[producer_pc] - end - end - end - - # A graph with only side-effect free instructions will currently have - # no fixed nodes! In that case just use the first instruction's node - # for both first and last. But it's a bug that it'll appear in the - # control flow path! - SubGraph.new( - first_fixed || insn_nodes[block.block_start], - last_fixed || insn_nodes[block.block_start], - inputs, - outputs - ) - end - - # Connect control flow that flows between basic blocks. - def connect_local_graphs_control(local_graphs) - dfg.blocks.each do |predecessor| - predecessor_last = local_graphs[predecessor.id].last_fixed - predecessor.outgoing_blocks.each_with_index do |successor, index| - label = - if index > 0 && - index == (predecessor.outgoing_blocks.length - 1) - # If there are multiple outgoing blocks from this block, then - # the last one is a fallthrough. Otherwise it's a branch. - :fallthrough - else - :"branch#{index}" - end - - connect( - predecessor_last, - local_graphs[successor.id].first_fixed, - :control, - label - ) - end - end - end - - # Connect data flow that flows between basic blocks. - def connect_local_graphs_data(local_graphs) - dfg.blocks.each do |predecessor| - arg_outs = local_graphs[predecessor.id].outputs.values - arg_outs.each_with_index do |arg_out, arg_n| - predecessor.outgoing_blocks.each do |successor| - successor_graph = local_graphs[successor.id] - arg_in = successor_graph.inputs.values[arg_n] - - # We're connecting to a phi node, so we may need a special - # label. - raise unless arg_in.is_a?(PhiNode) - - label = - case arg_out - when InsnNode - # Instructions that go into a phi node are labelled by the - # offset of last instruction in the block that executed - # them. This way you know which value to use for the phi, - # based on the last instruction you executed. - dfg.blocks.find do |block| - block_start = block.block_start - block_end = - block_start + block.insns.sum(&:length) - - block.insns.last.length - - if (block_start..block_end).cover?(arg_out.offset) - break block_end - end - end - when PhiNode - # Phi nodes to phi nodes are not labelled. - else - raise - end - - connect(arg_out, arg_in, :data, label) - end - end - end - end - - # We don't always build things in an optimal way. Go back and fix up - # some mess we left. Ideally we wouldn't create these problems in the - # first place. - def cleanup_phi_nodes - nodes.dup.each do |node| # dup because we're mutating - next unless node.is_a?(PhiNode) - - if node.inputs.size == 1 - # Remove phi nodes with a single input. - connect_over(node) - remove(node) - elsif node.inputs.map(&:from).uniq.size == 1 - # Remove phi nodes where all inputs are the same. - producer_edge = node.inputs.first - consumer_edge = node.outputs.find { |e| !e.to.is_a?(MergeNode) } - connect( - producer_edge.from, - consumer_edge.to, - :data, - consumer_edge.label - ) - remove(node) - end - end - end - - # Eliminate as many unnecessary nodes as we can. - def cleanup_insn_nodes - nodes.dup.each do |node| - next unless node.is_a?(InsnNode) - - case node.insn - when AdjustStack - # If there are any inputs to the adjust stack that are immediately - # discarded, we can remove them from the input list. - number = node.insn.number - - node.inputs.dup.each do |input_edge| - next if input_edge.type != :data - - from = input_edge.from - next unless from.is_a?(InsnNode) - - if from.inputs.empty? && from.outputs.size == 1 - number -= 1 - remove(input_edge.from) - elsif from.insn.is_a?(Dup) - number -= 1 - connect_over(from) - remove(from) - - new_edge = node.inputs.last - new_edge.from.outputs.delete(new_edge) - node.inputs.delete(new_edge) - end - end - - if number == 0 - connect_over(node) - remove(node) - else - next_node = - if number == 1 - InsnNode.new(Pop.new, node.offset) - else - InsnNode.new(AdjustStack.new(number), node.offset) - end - - next_node.inputs.concat(node.inputs) - next_node.outputs.concat(node.outputs) - - # Dynamically finding the index of the node in the nodes array - # because we're mutating the array as we go. - nodes[nodes.index(node)] = next_node - end - when Jump - # When you have a jump instruction that only has one input and one - # output, you can just connect over top of it and remove it. - if node.inputs.size == 1 && node.outputs.size == 1 - connect_over(node) - remove(node) - end - when Pop - from = node.inputs.find { |edge| edge.type == :data }.from - next unless from.is_a?(InsnNode) - - removed = - if from.inputs.empty? && from.outputs.size == 1 - remove(from) - true - elsif from.insn.is_a?(Dup) - connect_over(from) - remove(from) - - new_edge = node.inputs.last - new_edge.from.outputs.delete(new_edge) - node.inputs.delete(new_edge) - true - else - false - end - - if removed - connect_over(node) - remove(node) - end - end - end - end - - # Connect one node to another. - def connect(from, to, type, label = nil) - raise if from == to - raise if !to.is_a?(PhiNode) && type == :data && label.nil? - - edge = Edge.new(from, to, type, label) - from.outputs << edge - to.inputs << edge - end - - # Connect all of the inputs to all of the outputs of a node. - def connect_over(node) - node.inputs.each do |producer_edge| - node.outputs.each do |consumer_edge| - connect( - producer_edge.from, - consumer_edge.to, - producer_edge.type, - producer_edge.label - ) - end - end - end - - # Remove a node from the graph. - def remove(node) - node.inputs.each do |producer_edge| - producer_edge.from.outputs.reject! { |edge| edge.to == node } - end - - node.outputs.each do |consumer_edge| - consumer_edge.to.inputs.reject! { |edge| edge.from == node } - end - - nodes.delete(node) - end - end - - attr_reader :dfg, :nodes, :local_graphs - - def initialize(dfg, nodes, local_graphs) - @dfg = dfg - @nodes = nodes - @local_graphs = local_graphs - end - - def to_mermaid - Mermaid.flowchart do |flowchart| - nodes.each do |node| - flowchart.node("node_#{node.id}", node.label, shape: :rounded) - end - - nodes.each do |producer| - producer.outputs.each do |consumer_edge| - label = - if !consumer_edge.label - # No label. - elsif consumer_edge.to.is_a?(PhiNode) - # Edges into phi nodes are labelled by the offset of the - # instruction going into the merge. - "%04d" % consumer_edge.label - else - consumer_edge.label.to_s - end - - flowchart.link( - flowchart.fetch("node_#{producer.id}"), - flowchart.fetch("node_#{consumer_edge.to.id}"), - label, - type: consumer_edge.type == :info ? :dotted : :directed, - color: { data: :green, control: :red }[consumer_edge.type] - ) - end - end - end - end - - def verify - # Verify edge labels. - nodes.each do |node| - # Not talking about phi nodes right now. - next if node.is_a?(PhiNode) - - if node.is_a?(InsnNode) && node.insn.branch_targets.any? && - !node.insn.is_a?(Leave) - # A branching node must have at least one branch edge and - # potentially a fallthrough edge coming out. - - labels = node.outputs.map(&:label).sort - raise if labels[0] != :branch0 - raise if labels[1] != :fallthrough && labels.size > 2 - else - labels = node.inputs.filter { |e| e.type == :data }.map(&:label) - next if labels.empty? - - # No nil labels - raise if labels.any?(&:nil?) - - # Labels should start at zero. - raise unless labels.min.zero? - - # Labels should be contiguous. - raise unless labels.sort == (labels.min..labels.max).to_a - end - end - end - - def self.compile(dfg) - Compiler.new(dfg).compile - end - end - end -end diff --git a/lib/syntax_tree/yarv/vm.rb b/lib/syntax_tree/yarv/vm.rb deleted file mode 100644 index b303944d..00000000 --- a/lib/syntax_tree/yarv/vm.rb +++ /dev/null @@ -1,628 +0,0 @@ -# frozen_string_literal: true - -require "forwardable" - -module SyntaxTree - # This module provides an object representation of the YARV bytecode. - module YARV - class VM - class Jump - attr_reader :label - - def initialize(label) - @label = label - end - end - - class Leave - attr_reader :value - - def initialize(value) - @value = value - end - end - - class Frame - attr_reader :iseq, :parent, :stack_index, :_self, :nesting, :svars - attr_accessor :line, :pc - - def initialize(iseq, parent, stack_index, _self, nesting) - @iseq = iseq - @parent = parent - @stack_index = stack_index - @_self = _self - @nesting = nesting - - @svars = {} - @line = iseq.line - @pc = 0 - end - end - - class TopFrame < Frame - def initialize(iseq) - super(iseq, nil, 0, TOPLEVEL_BINDING.eval("self"), [Object]) - end - end - - class BlockFrame < Frame - def initialize(iseq, parent, stack_index) - super(iseq, parent, stack_index, parent._self, parent.nesting) - end - end - - class MethodFrame < Frame - attr_reader :name, :block - - def initialize(iseq, nesting, parent, stack_index, _self, name, block) - super(iseq, parent, stack_index, _self, nesting) - @name = name - @block = block - end - end - - class ClassFrame < Frame - def initialize(iseq, parent, stack_index, _self) - super(iseq, parent, stack_index, _self, parent.nesting + [_self]) - end - end - - class RescueFrame < Frame - def initialize(iseq, parent, stack_index) - super(iseq, parent, stack_index, parent._self, parent.nesting) - end - end - - class ThrownError < StandardError - attr_reader :value - - def initialize(value, backtrace) - super("This error was thrown by the Ruby VM.") - @value = value - set_backtrace(backtrace) - end - end - - class ReturnError < ThrownError - end - - class BreakError < ThrownError - end - - class NextError < ThrownError - end - - class FrozenCore - define_method("core#hash_merge_kwd") { |left, right| left.merge(right) } - - define_method("core#hash_merge_ptr") do |hash, *values| - hash.merge(values.each_slice(2).to_h) - end - - define_method("core#set_method_alias") do |clazz, new_name, old_name| - clazz.alias_method(new_name, old_name) - end - - define_method("core#set_variable_alias") do |new_name, old_name| - # Using eval here since there isn't a reflection API to be able to - # alias global variables. - eval("alias #{new_name} #{old_name}", binding, __FILE__, __LINE__) - end - - define_method("core#set_postexe") { |&block| END { block.call } } - - define_method("core#undef_method") do |clazz, name| - clazz.undef_method(name) - nil - end - end - - # This is the main entrypoint for events firing in the VM, which allows - # us to implement tracing. - class NullEvents - def publish_frame_change(frame) - end - - def publish_instruction(iseq, insn) - end - - def publish_stack_change(stack) - end - - def publish_tracepoint(event) - end - end - - # This is a simple implementation of tracing that prints to STDOUT. - class STDOUTEvents - attr_reader :disassembler - - def initialize - @disassembler = Disassembler.new - end - - def publish_frame_change(frame) - puts "%-16s %s" % ["frame-change", "#{frame.iseq.file}@#{frame.line}"] - end - - def publish_instruction(iseq, insn) - disassembler.current_iseq = iseq - puts "%-16s %s" % ["instruction", insn.disasm(disassembler)] - end - - def publish_stack_change(stack) - puts "%-16s %s" % ["stack-change", stack.values.inspect] - end - - def publish_tracepoint(event) - puts "%-16s %s" % ["tracepoint", event.inspect] - end - end - - # This represents the global VM stack. It effectively is an array, but - # wraps mutating functions with instrumentation. - class Stack - attr_reader :events, :values - - def initialize(events) - @events = events - @values = [] - end - - def concat(...) - values.concat(...).tap { events.publish_stack_change(self) } - end - - def last - values.last - end - - def length - values.length - end - - def push(...) - values.push(...).tap { events.publish_stack_change(self) } - end - - def pop(...) - values.pop(...).tap { events.publish_stack_change(self) } - end - - def slice!(...) - values.slice!(...).tap { events.publish_stack_change(self) } - end - - def [](...) - values.[](...) - end - - def []=(...) - values.[]=(...).tap { events.publish_stack_change(self) } - end - end - - FROZEN_CORE = FrozenCore.new.freeze - - extend Forwardable - - attr_reader :events - - attr_reader :stack - def_delegators :stack, :push, :pop - - attr_reader :frame - - def initialize(events = NullEvents.new) - @events = events - @stack = Stack.new(events) - @frame = nil - end - - def self.run(iseq) - new.run_top_frame(iseq) - end - - ########################################################################## - # Helper methods for frames - ########################################################################## - - def run_frame(frame) - # First, set the current frame to the given value. - previous = @frame - @frame = frame - events.publish_frame_change(@frame) - - # Next, set up the local table for the frame. This is actually incorrect - # as it could use the values already on the stack, but for now we're - # just doing this for simplicity. - stack.concat(Array.new(frame.iseq.local_table.size)) - - # Yield so that some frame-specific setup can be done. - start_label = yield if block_given? - frame.pc = frame.iseq.insns.index(start_label) if start_label - - # Finally we can execute the instructions one at a time. If they return - # jumps or leaves we will handle those appropriately. - loop do - case (insn = frame.iseq.insns[frame.pc]) - when Integer - frame.line = insn - frame.pc += 1 - when Symbol - events.publish_tracepoint(insn) - frame.pc += 1 - when InstructionSequence::Label - # skip labels - frame.pc += 1 - else - begin - events.publish_instruction(frame.iseq, insn) - result = insn.call(self) - rescue ReturnError => error - raise if frame.iseq.type != :method - - stack.slice!(frame.stack_index..) - @frame = frame.parent - events.publish_frame_change(@frame) - - return error.value - rescue BreakError => error - raise if frame.iseq.type != :block - - catch_entry = - find_catch_entry(frame, InstructionSequence::CatchBreak) - raise unless catch_entry - - stack.slice!( - ( - frame.stack_index + frame.iseq.local_table.size + - catch_entry.restore_sp - ).. - ) - @frame = frame - events.publish_frame_change(@frame) - - frame.pc = frame.iseq.insns.index(catch_entry.exit_label) - push(result = error.value) - rescue NextError => error - raise if frame.iseq.type != :block - - catch_entry = - find_catch_entry(frame, InstructionSequence::CatchNext) - raise unless catch_entry - - stack.slice!( - ( - frame.stack_index + frame.iseq.local_table.size + - catch_entry.restore_sp - ).. - ) - @frame = frame - events.publish_frame_change(@frame) - - frame.pc = frame.iseq.insns.index(catch_entry.exit_label) - push(result = error.value) - rescue Exception => error - catch_entry = - find_catch_entry(frame, InstructionSequence::CatchRescue) - raise unless catch_entry - - stack.slice!( - ( - frame.stack_index + frame.iseq.local_table.size + - catch_entry.restore_sp - ).. - ) - @frame = frame - events.publish_frame_change(@frame) - - frame.pc = frame.iseq.insns.index(catch_entry.exit_label) - push(result = run_rescue_frame(catch_entry.iseq, frame, error)) - end - - case result - when Jump - frame.pc = frame.iseq.insns.index(result.label) + 1 - when Leave - # this shouldn't be necessary, but is because we're not handling - # the stack correctly at the moment - stack.slice!(frame.stack_index..) - - # restore the previous frame - @frame = previous || frame.parent - events.publish_frame_change(@frame) if @frame - - return result.value - else - frame.pc += 1 - end - end - end - end - - def find_catch_entry(frame, type) - iseq = frame.iseq - iseq.catch_table.find do |catch_entry| - next unless catch_entry.is_a?(type) - - begin_pc = iseq.insns.index(catch_entry.begin_label) - end_pc = iseq.insns.index(catch_entry.end_label) - - (begin_pc...end_pc).cover?(frame.pc) - end - end - - def run_top_frame(iseq) - run_frame(TopFrame.new(iseq)) - end - - def run_block_frame(iseq, frame, *args, **kwargs, &block) - run_frame(BlockFrame.new(iseq, frame, stack.length)) do - setup_arguments(iseq, args, kwargs, block) - end - end - - def run_class_frame(iseq, clazz) - run_frame(ClassFrame.new(iseq, frame, stack.length, clazz)) - end - - def run_method_frame(name, nesting, iseq, _self, *args, **kwargs, &block) - run_frame( - MethodFrame.new( - iseq, - nesting, - frame, - stack.length, - _self, - name, - block - ) - ) { setup_arguments(iseq, args, kwargs, block) } - end - - def run_rescue_frame(iseq, frame, error) - run_frame(RescueFrame.new(iseq, frame, stack.length)) do - local_set(0, 0, error) - nil - end - end - - def setup_arguments(iseq, args, kwargs, block) - locals = [*args] - local_index = 0 - start_label = nil - - # First, set up all of the leading arguments. These are positional and - # required arguments at the start of the argument list. - if (lead_num = iseq.argument_options[:lead_num]) - lead_num.times do - local_set(local_index, 0, locals.shift) - local_index += 1 - end - end - - # Next, set up all of the optional arguments. The opt array contains - # the labels that the frame should start at if the optional is - # present. The last element of the array is the label that the frame - # should start at if all of the optional arguments are present. - if (opt = iseq.argument_options[:opt]) - opt[0...-1].each do |label| - if locals.empty? - start_label = label - break - else - local_set(local_index, 0, locals.shift) - local_index += 1 - end - - start_label = opt.last if start_label.nil? - end - end - - # If there is a splat argument, then we'll set that up here. It will - # grab up all of the remaining positional arguments. - if (rest_start = iseq.argument_options[:rest_start]) - if (post_start = iseq.argument_options[:post_start]) - length = post_start - rest_start - local_set(local_index, 0, locals[0...length]) - locals = locals[length..] - else - local_set(local_index, 0, locals.dup) - locals.clear - end - local_index += 1 - end - - # Next, set up any post arguments. These are positional arguments that - # come after the splat argument. - if (post_num = iseq.argument_options[:post_num]) - post_num.times do - local_set(local_index, 0, locals.shift) - local_index += 1 - end - end - - if (keyword_option = iseq.argument_options[:keyword]) - # First, set up the keyword bits array. - keyword_bits = - keyword_option.map do |config| - kwargs.key?(config.is_a?(Array) ? config[0] : config) - end - - iseq.local_table.locals.each_with_index do |local, index| - # If this is the keyword bits local, then set it appropriately. - if local.name.is_a?(Integer) - local_set(index, 0, keyword_bits) - next - end - - # First, find the configuration for this local in the keywords - # list if it exists. - name = local.name - config = - keyword_option.find do |keyword| - keyword.is_a?(Array) ? keyword[0] == name : keyword == name - end - - # If the configuration doesn't exist, then the local is not a - # keyword local. - next unless config - - if !config.is_a?(Array) - # required keyword - local_set(index, 0, kwargs.fetch(name)) - elsif !config[1].nil? - # optional keyword with embedded default value - local_set(index, 0, kwargs.fetch(name, config[1])) - else - # optional keyword with expression default value - local_set(index, 0, kwargs[name]) - end - end - end - - local_set(local_index, 0, block) if iseq.argument_options[:block_start] - - start_label - end - - ########################################################################## - # Helper methods for instructions - ########################################################################## - - def const_base - frame.nesting.last - end - - def frame_at(level) - current = frame - level.times { current = current.parent } - current - end - - def frame_svar - current = frame - current = current.parent while current.is_a?(BlockFrame) - current - end - - def frame_yield - current = frame - current = current.parent until current.is_a?(MethodFrame) - current - end - - def frozen_core - FROZEN_CORE - end - - def jump(label) - Jump.new(label) - end - - def leave - Leave.new(pop) - end - - def local_get(index, level) - stack[frame_at(level).stack_index + index] - end - - def local_set(index, level, value) - stack[frame_at(level).stack_index + index] = value - end - - ########################################################################## - # Methods for overriding runtime behavior - ########################################################################## - - DLEXT = ".#{RbConfig::CONFIG["DLEXT"]}" - SOEXT = ".#{RbConfig::CONFIG["SOEXT"]}" - - def require_resolved(filepath) - $LOADED_FEATURES << filepath - iseq = RubyVM::InstructionSequence.compile_file(filepath) - run_top_frame(InstructionSequence.from(iseq.to_a)) - end - - def require_internal(filepath, loading: false) - case (extname = File.extname(filepath)) - when "" - # search for all the extensions - searching = filepath - extensions = ["", ".rb", DLEXT, SOEXT] - when ".rb", DLEXT, SOEXT - # search only for the given extension name - searching = File.basename(filepath, extname) - extensions = [extname] - else - # we don't handle these extensions, raise a load error - raise LoadError, "cannot load such file -- #{filepath}" - end - - if filepath.start_with?("/") - # absolute path, search only in the given directory - directories = [File.dirname(searching)] - searching = File.basename(searching) - else - # relative path, search in the load path - directories = $LOAD_PATH - end - - directories.each do |directory| - extensions.each do |extension| - absolute_path = File.join(directory, "#{searching}#{extension}") - next unless File.exist?(absolute_path) - - if !loading && $LOADED_FEATURES.include?(absolute_path) - return false - elsif extension == ".rb" - require_resolved(absolute_path) - return true - elsif loading - return Kernel.send(:yarv_load, filepath) - else - return Kernel.send(:yarv_require, filepath) - end - end - end - - if loading - Kernel.send(:yarv_load, filepath) - else - Kernel.send(:yarv_require, filepath) - end - end - - def require(filepath) - require_internal(filepath, loading: false) - end - - def require_relative(filepath) - Kernel.yarv_require_relative(filepath) - end - - def load(filepath) - require_internal(filepath, loading: true) - end - - def eval( - source, - binding = TOPLEVEL_BINDING, - filename = "(eval)", - lineno = 1 - ) - Kernel.yarv_eval(source, binding, filename, lineno) - end - - def throw(tag, value = nil) - Kernel.throw(tag, value) - end - - def catch(tag, &block) - Kernel.catch(tag, &block) - end - end - end -end diff --git a/test/compiler_test.rb b/test/compiler_test.rb deleted file mode 100644 index 6cf8999e..00000000 --- a/test/compiler_test.rb +++ /dev/null @@ -1,533 +0,0 @@ -# frozen_string_literal: true - -return unless defined?(RubyVM::InstructionSequence) -return if RUBY_VERSION < "3.1" || RUBY_VERSION > "3.3" - -require_relative "test_helper" - -module SyntaxTree - class CompilerTest < Minitest::Test - CASES = [ - # Hooks - "BEGIN { a = 1 }", - "a = 1; END { a = 1 }; a", - # Various literals placed on the stack - "true", - "false", - "nil", - "self", - "0", - "1", - "2", - "1.0", - "1i", - "1r", - "1..2", - "1...2", - "(1)", - "%w[foo bar baz]", - "%W[foo bar baz]", - "%i[foo bar baz]", - "%I[foo bar baz]", - "{ foo: 1, bar: 1.0, baz: 1i }", - "'foo'", - "\"foo\"", - "\"foo\#{bar}\"", - "\"foo\#@bar\"", - "%q[foo]", - "%Q[foo]", - <<~RUBY, - "foo" \\ - "bar" - RUBY - <<~RUBY, - < 2", - "1 >= 2", - "1 == 2", - "1 != 2", - "1 & 2", - "1 | 2", - "1 << 2", - "1 ^ 2", - "foo.empty?", - "foo.length", - "foo.nil?", - "foo.size", - "foo.succ", - "/foo/ =~ \"foo\" && $1", - "\"foo\".freeze", - "\"foo\".freeze(1)", - "-\"foo\"", - "\"foo\".-@", - "\"foo\".-@(1)", - # Various method calls - "foo?", - "foo.bar", - "foo.bar(baz)", - "foo bar", - "foo.bar baz", - "foo(*bar)", - "foo(**bar)", - "foo(&bar)", - "foo.bar = baz", - "not foo", - "!foo", - "~foo", - "+foo", - "-foo", - "`foo`", - "`foo \#{bar} baz`", - # Local variables - "foo", - "foo = 1", - "foo = 1; bar = 2; baz = 3", - "foo = 1; foo", - "foo += 1", - "foo -= 1", - "foo *= 1", - "foo /= 1", - "foo %= 1", - "foo &= 1", - "foo |= 1", - "foo &&= 1", - "foo ||= 1", - "foo <<= 1", - "foo ^= 1", - "foo, bar = 1, 2", - "foo, bar, = 1, 2", - "foo, bar, baz = 1, 2", - "foo, bar = 1, 2, 3", - "foo = 1, 2, 3", - "foo, * = 1, 2, 3", - # Instance variables - "@foo", - "@foo = 1", - "@foo = 1; @bar = 2; @baz = 3", - "@foo = 1; @foo", - "@foo += 1", - "@foo -= 1", - "@foo *= 1", - "@foo /= 1", - "@foo %= 1", - "@foo &= 1", - "@foo |= 1", - "@foo &&= 1", - "@foo ||= 1", - "@foo <<= 1", - "@foo ^= 1", - # Class variables - "@@foo", - "@@foo = 1", - "@@foo = 1; @@bar = 2; @@baz = 3", - "@@foo = 1; @@foo", - "@@foo += 1", - "@@foo -= 1", - "@@foo *= 1", - "@@foo /= 1", - "@@foo %= 1", - "@@foo &= 1", - "@@foo |= 1", - "@@foo &&= 1", - "@@foo ||= 1", - "@@foo <<= 1", - "@@foo ^= 1", - # Global variables - "$foo", - "$foo = 1", - "$foo = 1; $bar = 2; $baz = 3", - "$foo = 1; $foo", - "$foo += 1", - "$foo -= 1", - "$foo *= 1", - "$foo /= 1", - "$foo %= 1", - "$foo &= 1", - "$foo |= 1", - "$foo &&= 1", - "$foo ||= 1", - "$foo <<= 1", - "$foo ^= 1", - # Index access - "foo[bar]", - "foo[bar] = 1", - "foo[bar] += 1", - "foo[bar] -= 1", - "foo[bar] *= 1", - "foo[bar] /= 1", - "foo[bar] %= 1", - "foo[bar] &= 1", - "foo[bar] |= 1", - "foo[bar] &&= 1", - "foo[bar] ||= 1", - "foo[bar] <<= 1", - "foo[bar] ^= 1", - "foo['true']", - "foo['true'] = 1", - # Constants (single) - "Foo", - "Foo = 1", - "Foo += 1", - "Foo -= 1", - "Foo *= 1", - "Foo /= 1", - "Foo %= 1", - "Foo &= 1", - "Foo |= 1", - "Foo &&= 1", - "Foo ||= 1", - "Foo <<= 1", - "Foo ^= 1", - # Constants (top) - "::Foo", - "::Foo = 1", - "::Foo += 1", - "::Foo -= 1", - "::Foo *= 1", - "::Foo /= 1", - "::Foo %= 1", - "::Foo &= 1", - "::Foo |= 1", - "::Foo &&= 1", - "::Foo ||= 1", - "::Foo <<= 1", - "::Foo ^= 1", - # Constants (nested) - "Foo::Bar::Baz", - "Foo::Bar::Baz += 1", - "Foo::Bar::Baz -= 1", - "Foo::Bar::Baz *= 1", - "Foo::Bar::Baz /= 1", - "Foo::Bar::Baz %= 1", - "Foo::Bar::Baz &= 1", - "Foo::Bar::Baz |= 1", - "Foo::Bar::Baz &&= 1", - "Foo::Bar::Baz ||= 1", - "Foo::Bar::Baz <<= 1", - "Foo::Bar::Baz ^= 1", - # Constants (top nested) - "::Foo::Bar::Baz", - "::Foo::Bar::Baz = 1", - "::Foo::Bar::Baz += 1", - "::Foo::Bar::Baz -= 1", - "::Foo::Bar::Baz *= 1", - "::Foo::Bar::Baz /= 1", - "::Foo::Bar::Baz %= 1", - "::Foo::Bar::Baz &= 1", - "::Foo::Bar::Baz |= 1", - "::Foo::Bar::Baz &&= 1", - "::Foo::Bar::Baz ||= 1", - "::Foo::Bar::Baz <<= 1", - "::Foo::Bar::Baz ^= 1", - # Constants (calls) - "Foo::Bar.baz", - "::Foo::Bar.baz", - "Foo::Bar.baz = 1", - "::Foo::Bar.baz = 1", - # Control flow - "foo&.bar", - "foo&.bar(1)", - "foo&.bar 1, 2, 3", - "foo&.bar {}", - "foo && bar", - "foo || bar", - "if foo then bar end", - "if foo then bar else baz end", - "if foo then bar elsif baz then qux end", - "foo if bar", - "unless foo then bar end", - "unless foo then bar else baz end", - "foo unless bar", - "foo while bar", - "while foo do bar end", - "foo until bar", - "until foo do bar end", - "for i in [1, 2, 3] do i end", - "foo ? bar : baz", - "case foo when bar then 1 end", - "case foo when bar then 1 else 2 end", - "baz if (foo == 1) .. (bar == 1)", - # Constructed values - "foo..bar", - "foo...bar", - "[1, 1.0, 1i, 1r]", - "[foo, bar, baz]", - "[@foo, @bar, @baz]", - "[@@foo, @@bar, @@baz]", - "[$foo, $bar, $baz]", - "%W[foo \#{bar} baz]", - "%I[foo \#{bar} baz]", - "[foo, bar] + [baz, qux]", - "[foo, bar, *baz, qux]", - "{ foo: bar, baz: qux }", - "{ :foo => bar, :baz => qux }", - "{ foo => bar, baz => qux }", - "%s[foo]", - "[$1, $2, $3, $4, $5, $6, $7, $8, $9]", - "/foo \#{bar} baz/", - "%r{foo \#{bar} baz}", - "[1, 2, 3].max", - "[foo, bar, baz].max", - "[foo, bar, baz].max(1)", - "[1, 2, 3].min", - "[foo, bar, baz].min", - "[foo, bar, baz].min(1)", - "[1, 2, 3].hash", - "[foo, bar, baz].hash", - "[foo, bar, baz].hash(1)", - "[1, 2, 3].foo", - "[foo, bar, baz].foo", - "[foo, bar, baz].foo(1)", - "[**{ x: true }][0][:x]", - # Core method calls - "alias foo bar", - "alias :foo :bar", - "super", - "super(1)", - "super(1, 2, 3)", - "undef foo", - "undef :foo", - "undef foo, bar, baz", - "undef :foo, :bar, :baz", - "def foo; yield; end", - "def foo; yield(1); end", - "def foo; yield(1, 2, 3); end", - # defined? usage - "defined?(foo)", - "defined?(\"foo\")", - "defined?(:foo)", - "defined?(@foo)", - "defined?(@@foo)", - "defined?($foo)", - "defined?(Foo)", - "defined?(yield)", - "defined?(super)", - "foo = 1; defined?(foo)", - "defined?(self)", - "defined?(true)", - "defined?(false)", - "defined?(nil)", - "defined?(foo = 1)", - # Ignored content - ";;;", - "# comment", - "=begin\nfoo\n=end", - <<~RUBY, - __END__ - RUBY - # Method definitions - "def foo; end", - "def foo(bar); end", - "def foo(bar, baz); end", - "def foo(bar = 1); end", - "def foo(bar = 1, baz = 2); end", - "def foo(*bar); end", - "def foo(bar, *baz); end", - "def foo(*bar, baz, qux); end", - "def foo(bar, *baz, qux); end", - "def foo(bar, baz, *qux, quaz); end", - "def foo(bar, baz, &qux); end", - "def foo(bar, *baz, &qux); end", - "def foo(&qux); qux; end", - "def foo(&qux); qux.call; end", - "def foo(&qux); qux = bar; end", - "def foo(bar:); end", - "def foo(bar:, baz:); end", - "def foo(bar: 1); end", - "def foo(bar: 1, baz: 2); end", - "def foo(bar: baz); end", - "def foo(bar: 1, baz: qux); end", - "def foo(bar: qux, baz: 1); end", - "def foo(bar: baz, qux: qaz); end", - "def foo(**rest); end", - "def foo(bar:, **rest); end", - "def foo(bar:, baz:, **rest); end", - "def foo(bar: 1, **rest); end", - "def foo(bar: 1, baz: 2, **rest); end", - "def foo(bar: baz, **rest); end", - "def foo(bar: 1, baz: qux, **rest); end", - "def foo(bar: qux, baz: 1, **rest); end", - "def foo(bar: baz, qux: qaz, **rest); end", - "def foo(...); end", - "def foo(bar, ...); end", - "def foo(...); bar(...); end", - "def foo(bar, ...); baz(1, 2, 3, ...); end", - "def self.foo; end", - "def foo.bar(baz); end", - # Class/module definitions - "module Foo; end", - "module ::Foo; end", - "module Foo::Bar; end", - "module ::Foo::Bar; end", - "module Foo; module Bar; end; end", - "class Foo; end", - "class ::Foo; end", - "class Foo::Bar; end", - "class ::Foo::Bar; end", - "class Foo; class Bar; end; end", - "class Foo < Baz; end", - "class ::Foo < Baz; end", - "class Foo::Bar < Baz; end", - "class ::Foo::Bar < Baz; end", - "class Foo; class Bar < Baz; end; end", - "class Foo < baz; end", - "class << Object; end", - "class << ::String; end", - # Block - "foo do end", - "foo {}", - "foo do |bar| end", - "foo { |bar| }", - "foo { |bar; baz| }", - "-> do end", - "-> {}", - "-> (bar) do end", - "-> (bar) {}", - "-> (bar; baz) { }", - # Pattern matching - "foo in bar", - "foo in [bar]", - "foo in [bar, baz]", - "foo in [1, 2, 3, bar, 4, 5, 6, baz]", - "foo in Foo[1, 2, 3, bar, 4, 5, 6, baz]", - "foo => bar" - ] - - # These are the combinations of instructions that we're going to test. - OPTIONS = [ - YARV::Compiler::Options.new, - YARV::Compiler::Options.new(frozen_string_literal: true), - YARV::Compiler::Options.new(operands_unification: false), - # TODO: have this work when peephole optimizations are turned off. - # YARV::Compiler::Options.new(peephole_optimization: false), - YARV::Compiler::Options.new(specialized_instruction: false), - YARV::Compiler::Options.new(inline_const_cache: false), - YARV::Compiler::Options.new(tailcall_optimization: true) - ] - - OPTIONS.each do |options| - suffix = options.to_hash.map { |key, value| "#{key}=#{value}" }.join("&") - - CASES.each do |source| - define_method(:"test_compiles_#{source}_(#{suffix})") do - assert_compiles(source, options) - end - - define_method(:"test_loads_#{source}_(#{suffix})") do - assert_loads(source, options) - end - - define_method(:"test_disasms_#{source}_(#{suffix})") do - assert_disasms(source, options) - end - end - end - - def test_evaluation - assert_evaluates 5, "2 + 3" - assert_evaluates 5, "a = 2; b = 3; a + b" - end - - private - - def serialize_iseq(iseq) - serialized = iseq.to_a - - serialized[4].delete(:node_id) - serialized[4].delete(:code_location) - serialized[4].delete(:node_ids) - - serialized[13] = serialized[13].filter_map do |insn| - case insn - when Array - insn.map do |operand| - if operand.is_a?(Array) && - operand[0] == YARV::InstructionSequence::MAGIC - serialize_iseq(operand) - else - operand - end - end - when Integer, :RUBY_EVENT_LINE - # ignore these for now - else - insn - end - end - - serialized - end - - # Check that the compiled instruction sequence matches the expected - # instruction sequence. - def assert_compiles(source, options) - assert_equal( - serialize_iseq(RubyVM::InstructionSequence.compile(source, **options)), - serialize_iseq(YARV.compile(source, options)) - ) - end - - # Check that the compiled instruction sequence matches the instruction - # sequence created directly from the compiled instruction sequence. - def assert_loads(source, options) - compiled = RubyVM::InstructionSequence.compile(source, **options) - - assert_equal( - serialize_iseq(compiled), - serialize_iseq(YARV::InstructionSequence.from(compiled.to_a, options)) - ) - end - - # Check that we can successfully disasm the compiled instruction sequence. - def assert_disasms(source, options) - compiled = RubyVM::InstructionSequence.compile(source, **options) - yarv = YARV::InstructionSequence.from(compiled.to_a, options) - assert_kind_of String, yarv.disasm - end - - def assert_evaluates(expected, source) - assert_equal expected, YARV.compile(source).eval - end - end -end diff --git a/test/yarv_test.rb b/test/yarv_test.rb deleted file mode 100644 index 78622434..00000000 --- a/test/yarv_test.rb +++ /dev/null @@ -1,517 +0,0 @@ -# frozen_string_literal: true - -return if !defined?(RubyVM::InstructionSequence) || RUBY_VERSION < "3.1" -require_relative "test_helper" - -module SyntaxTree - class YARVTest < Minitest::Test - CASES = { - "0" => "return 0\n", - "1" => "return 1\n", - "2" => "return 2\n", - "1.0" => "return 1.0\n", - "1 + 2" => "return 1 + 2\n", - "1 - 2" => "return 1 - 2\n", - "1 * 2" => "return 1 * 2\n", - "1 / 2" => "return 1 / 2\n", - "1 % 2" => "return 1 % 2\n", - "1 < 2" => "return 1 < 2\n", - "1 <= 2" => "return 1 <= 2\n", - "1 > 2" => "return 1 > 2\n", - "1 >= 2" => "return 1 >= 2\n", - "1 == 2" => "return 1 == 2\n", - "1 != 2" => "return 1 != 2\n", - "1 & 2" => "return 1 & 2\n", - "1 | 2" => "return 1 | 2\n", - "1 << 2" => "return 1 << 2\n", - "1 >> 2" => "return 1.>>(2)\n", - "1 ** 2" => "return 1.**(2)\n", - "a = 1; a" => "a = 1\nreturn a\n" - }.freeze - - CASES.each do |source, expected| - define_method("test_disassemble_#{source}") do - assert_decompiles(expected, source) - end - end - - def test_bf - hello_world = - "++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]" \ - ">>.>---.+++++++..+++.>>.<-.<.+++.------.--------.>>+.>++." - - iseq = YARV::Bf.new(hello_world).compile - stdout, = capture_io { iseq.eval } - assert_equal "Hello World!\n", stdout - - Formatter.format(hello_world, YARV::Decompiler.new(iseq).to_ruby) - end - - # rubocop:disable Layout/LineLength - EMULATION_CASES = { - # adjuststack - "x = [true]; x[0] ||= nil; x[0]" => true, - # anytostring - "\"\#{5}\"" => "5", - "class A2Str; def to_s; 1; end; end; \"\#{A2Str.new}\"" => - "#", - # branchif - "x = true; x ||= \"foo\"; x" => true, - # branchnil - "x = nil; if x&.to_s; 'hi'; else; 'bye'; end" => "bye", - # branchunless - "if 2 + 3; 'hi'; else; 'bye'; end" => "hi", - # checkkeyword - # "def evaluate(value: rand); value.floor; end; evaluate" => 0, - # checkmatch - "'foo' in String" => true, - "case 1; when *[1, 2, 3]; true; end" => true, - # checktype - "['foo'] in [String]" => true, - # concatarray - "[1, *2]" => [1, 2], - # concatstrings - "\"\#{7}\"" => "7", - # defineclass - "class DefineClass; def bar; end; end" => :bar, - "module DefineModule; def bar; end; end" => :bar, - "class << self; self; end" => - TOPLEVEL_BINDING.eval("self").singleton_class, - # defined - "defined?(1)" => "expression", - "defined?(foo = 1)" => "assignment", - "defined?(Object)" => "constant", - # definemethod - "def definemethod = 5; definemethod" => 5, - # definesmethod - "def self.definesmethod = 5; self.definesmethod" => 5, - # dup - "$global = 5" => 5, - # duparray - "[true]" => [true], - # duphash - "{ a: 1 }" => { - a: 1 - }, - # dupn - "Object::X ||= true" => true, - # expandarray - "x, = [true, false, nil]" => [true, false, nil], - "*, x = [true, false, nil]" => [true, false, nil], - # getblockparam - "def getblockparam(&block); block; end; getblockparam { 1 }.call" => 1, - # getblockparamproxy - "def getblockparamproxy(&block); block.call; end; getblockparamproxy { 1 }" => - 1, - # getclassvariable - "class CVar; @@foo = 5; end; class << CVar; @@foo; end" => 5, - # getconstant - "Object" => Object, - # getglobal - "$$" => $$, - # getinstancevariable - "@foo = 5; @foo" => 5, - # getlocal - "value = 5; self.then { self.then { self.then { value } } }" => 5, - # getlocalwc0 - "value = 5; value" => 5, - # getlocalwc1 - "value = 5; self.then { value }" => 5, - # getspecial - "1 if (2 == 2) .. (3 == 3)" => 1, - # intern - ":\"foo\#{1}\"" => :foo1, - # invokeblock - "def invokeblock = yield; invokeblock { 1 }" => 1, - # invokesuper - <<~RUBY => 2, - class Parent - def value - 1 - end - end - - class Child < Parent - def value - super + 1 - end - end - - Child.new.value - RUBY - # jump - "x = 0; if x == 0 then 1 else 2 end" => 1, - # newarray - "[\"value\"]" => ["value"], - # newarraykwsplat - "[\"string\", **{ foo: \"bar\" }]" => ["string", { foo: "bar" }], - # newhash - "def newhash(key, value) = { key => value }; newhash(1, 2)" => { - 1 => 2 - }, - # newrange - "x = 0; y = 1; (x..y).to_a" => [0, 1], - # nop - # objtostring - "\"\#{6}\"" => "6", - # once - "/\#{1}/o" => /1/o, - # opt_and - "0b0110 & 0b1011" => 0b0010, - # opt_aref - "x = [1, 2, 3]; x[1]" => 2, - # opt_aref_with - "x = { \"a\" => 1 }; x[\"a\"]" => 1, - # opt_aset - "x = [1, 2, 3]; x[1] = 4; x" => [1, 4, 3], - # opt_aset_with - "x = { \"a\" => 1 }; x[\"a\"] = 2; x" => { - "a" => 2 - }, - # opt_case_dispatch - <<~RUBY => "foo", - case 1 - when 1 - "foo" - else - "bar" - end - RUBY - # opt_div - "5 / 2" => 2, - # opt_empty_p - "[].empty?" => true, - # opt_eq - "1 == 1" => true, - # opt_ge - "1 >= 1" => true, - # opt_getconstant_path - "::Object" => Object, - # opt_gt - "1 > 1" => false, - # opt_le - "1 <= 1" => true, - # opt_length - "[1, 2, 3].length" => 3, - # opt_lt - "1 < 1" => false, - # opt_ltlt - "\"\" << 2" => "\u0002", - # opt_minus - "1 - 1" => 0, - # opt_mod - "5 % 2" => 1, - # opt_mult - "5 * 2" => 10, - # opt_neq - "1 != 1" => false, - # opt_newarray_max - "def opt_newarray_max(a, b, c) = [a, b, c].max; opt_newarray_max(1, 2, 3)" => - 3, - # opt_newarray_min - "def opt_newarray_min(a, b, c) = [a, b, c].min; opt_newarray_min(1, 2, 3)" => - 1, - # opt_nil_p - "nil.nil?" => true, - # opt_not - "!true" => false, - # opt_or - "0b0110 | 0b1011" => 0b1111, - # opt_plus - "1 + 1" => 2, - # opt_regexpmatch2 - "/foo/ =~ \"~~~foo\"" => 3, - # opt_send_without_block - "5.to_s" => "5", - # opt_size - "[1, 2, 3].size" => 3, - # opt_str_freeze - "\"foo\".freeze" => "foo", - # opt_str_uminus - "-\"foo\"" => -"foo", - # opt_succ - "1.succ" => 2, - # pop - "a ||= 2; a" => 2, - # putnil - "[nil]" => [nil], - # putobject - "2" => 2, - # putobject_INT2FIX_0_ - "0" => 0, - # putobject_INT2FIX_1_ - "1" => 1, - # putself - "self" => TOPLEVEL_BINDING.eval("self"), - # putspecialobject - "[class Undef; def foo = 1; undef foo; end]" => [nil], - # putstring - "\"foo\"" => "foo", - # send - "\"hello\".then { |value| value }" => "hello", - # setblockparam - "def setblockparam(&bar); bar = -> { 1 }; bar.call; end; setblockparam" => - 1, - # setclassvariable - "class CVarSet; @@foo = 1; end; class << CVarSet; @@foo = 10; end" => 10, - # setconstant - "SetConstant = 1" => 1, - # setglobal - "$global = 10" => 10, - # setinstancevariable - "@ivar = 5" => 5, - # setlocal - "x = 5; tap { tap { tap { x = 10 } } }; x" => 10, - # setlocal_WC_0 - "x = 5; x" => 5, - # setlocal_WC_1 - "x = 5; tap { x = 10 }; x" => 10, - # setn - "{}[:key] = 'value'" => "value", - # setspecial - "1 if (1 == 1) .. (2 == 2)" => 1, - # splatarray - "x = *(5)" => [5], - # swap - "!!defined?([[]])" => true, - # throw - # topn - "case 3; when 1..5; 'foo'; end" => "foo", - # toregexp - "/abc \#{1 + 2} def/" => /abc 3 def/ - }.freeze - # rubocop:enable Layout/LineLength - - EMULATION_CASES.each do |source, expected| - define_method("test_emulate_#{source}") do - assert_emulates(expected, source) - end - end - - ObjectSpace.each_object(YARV::Instruction.singleton_class) do |instruction| - next if instruction == YARV::Instruction - - define_method("test_instruction_interface_#{instruction.name}") do - methods = instruction.instance_methods(false) - assert_empty(%i[disasm to_a deconstruct_keys call ==] - methods) - end - end - - def test_cfg - iseq = RubyVM::InstructionSequence.compile("100 + (14 < 0 ? -1 : +1)") - iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a) - cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) - - assert_equal(<<~DISASM, cfg.disasm) - == cfg: #@:1 (1,0)-(1,0)> - block_0 - 0000 putobject 100 - 0002 putobject 14 - 0004 putobject_INT2FIX_0_ - 0005 opt_lt - 0007 branchunless 13 - == to: block_13, block_9 - block_9 - == from: block_0 - 0009 putobject -1 - 0011 jump 14 - == to: block_14 - block_13 - == from: block_0 - 0013 putobject_INT2FIX_1_ - == to: block_14 - block_14 - == from: block_9, block_13 - 0014 opt_plus - 0016 leave - == to: leaves - DISASM - end - - def test_dfg - iseq = RubyVM::InstructionSequence.compile("100 + (14 < 0 ? -1 : +1)") - iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a) - cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) - dfg = SyntaxTree::YARV::DataFlowGraph.compile(cfg) - - assert_equal(<<~DISASM, dfg.disasm) - == dfg: #@:1 (1,0)-(1,0)> - block_0 - 0000 putobject 100 # out: out_0 - 0002 putobject 14 # out: 5 - 0004 putobject_INT2FIX_0_ # out: 5 - 0005 opt_lt # in: 2, 4; out: 7 - 0007 branchunless 13 # in: 5 - == to: block_13, block_9 - == out: 0 - block_9 - == from: block_0 - == in: pass_0 - 0009 putobject -1 # out: out_0 - 0011 jump 14 - == to: block_14 - == out: pass_0, 9 - block_13 - == from: block_0 - == in: pass_0 - 0013 putobject_INT2FIX_1_ # out: out_0 - == to: block_14 - == out: pass_0, 13 - block_14 - == from: block_9, block_13 - == in: in_0, in_1 - 0014 opt_plus # in: in_0, in_1; out: 16 - 0016 leave # in: 14 - == to: leaves - DISASM - end - - def test_son - iseq = RubyVM::InstructionSequence.compile("(14 < 0 ? -1 : +1) + 100") - iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a) - cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) - dfg = SyntaxTree::YARV::DataFlowGraph.compile(cfg) - son = SyntaxTree::YARV::SeaOfNodes.compile(dfg) - - assert_equal(<<~MERMAID, son.to_mermaid) - flowchart TD - node_0("0000 putobject 14") - node_2("0002 putobject_INT2FIX_0_") - node_3("0003 opt_lt <calldata!mid:<, argc:1, ARGS_SIMPLE>") - node_5("0005 branchunless 0011") - node_7("0007 putobject -1") - node_11("0011 putobject_INT2FIX_1_") - node_12("0012 putobject 100") - node_14("0014 opt_plus <calldata!mid:+, argc:1, ARGS_SIMPLE>") - node_16("0016 leave") - node_1000("1000 ψ") - node_1001("1001 φ") - node_0 -- "0" --> node_3 - node_2 -- "1" --> node_3 - node_3 --> node_5 - node_3 -- "0" --> node_5 - node_5 -- "branch0" --> node_11 - node_5 -- "fallthrough" --> node_1000 - node_7 -- "0009" --> node_1001 - node_11 -- "branch0" --> node_1000 - node_11 -- "0011" --> node_1001 - node_12 -- "1" --> node_14 - node_14 --> node_16 - node_14 -- "0" --> node_16 - node_1000 --> node_14 - node_1001 -.-> node_1000 - node_1001 -- "0" --> node_14 - linkStyle 0 stroke:green - linkStyle 1 stroke:green - linkStyle 2 stroke:red - linkStyle 3 stroke:green - linkStyle 4 stroke:red - linkStyle 5 stroke:red - linkStyle 6 stroke:green - linkStyle 7 stroke:red - linkStyle 8 stroke:green - linkStyle 9 stroke:green - linkStyle 10 stroke:red - linkStyle 11 stroke:green - linkStyle 12 stroke:red - linkStyle 14 stroke:green - MERMAID - end - - def test_son_indirect_basic_block_argument - iseq = RubyVM::InstructionSequence.compile("100 + (14 < 0 ? -1 : +1)") - iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a) - cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) - dfg = SyntaxTree::YARV::DataFlowGraph.compile(cfg) - son = SyntaxTree::YARV::SeaOfNodes.compile(dfg) - - assert_equal(<<~MERMAID, son.to_mermaid) - flowchart TD - node_0("0000 putobject 100") - node_2("0002 putobject 14") - node_4("0004 putobject_INT2FIX_0_") - node_5("0005 opt_lt <calldata!mid:<, argc:1, ARGS_SIMPLE>") - node_7("0007 branchunless 0013") - node_9("0009 putobject -1") - node_13("0013 putobject_INT2FIX_1_") - node_14("0014 opt_plus <calldata!mid:+, argc:1, ARGS_SIMPLE>") - node_16("0016 leave") - node_1002("1002 ψ") - node_1004("1004 φ") - node_0 -- "0" --> node_14 - node_2 -- "0" --> node_5 - node_4 -- "1" --> node_5 - node_5 --> node_7 - node_5 -- "0" --> node_7 - node_7 -- "branch0" --> node_13 - node_7 -- "fallthrough" --> node_1002 - node_9 -- "0011" --> node_1004 - node_13 -- "branch0" --> node_1002 - node_13 -- "0013" --> node_1004 - node_14 --> node_16 - node_14 -- "0" --> node_16 - node_1002 --> node_14 - node_1004 -.-> node_1002 - node_1004 -- "1" --> node_14 - linkStyle 0 stroke:green - linkStyle 1 stroke:green - linkStyle 2 stroke:green - linkStyle 3 stroke:red - linkStyle 4 stroke:green - linkStyle 5 stroke:red - linkStyle 6 stroke:red - linkStyle 7 stroke:green - linkStyle 8 stroke:red - linkStyle 9 stroke:green - linkStyle 10 stroke:red - linkStyle 11 stroke:green - linkStyle 12 stroke:red - linkStyle 14 stroke:green - MERMAID - end - - private - - def assert_decompiles(expected, source) - ruby = YARV::Decompiler.new(YARV.compile(source)).to_ruby - actual = Formatter.format(source, ruby) - assert_equal expected, actual - end - - def assert_emulates(expected, source) - ruby_iseq = RubyVM::InstructionSequence.compile(source) - yarv_iseq = YARV::InstructionSequence.from(ruby_iseq.to_a) - - exercise_iseq(yarv_iseq) - result = SyntaxTree::YARV::VM.new.run_top_frame(yarv_iseq) - assert_equal(expected, result) - end - - def exercise_iseq(iseq) - iseq.disasm - iseq.to_a - - iseq.insns.each do |insn| - case insn - when YARV::InstructionSequence::Label, Integer, Symbol - next - end - - insn.pushes - insn.pops - insn.canonical - - case insn - when YARV::DefineClass - exercise_iseq(insn.class_iseq) - when YARV::DefineMethod, YARV::DefineSMethod - exercise_iseq(insn.method_iseq) - when YARV::InvokeSuper, YARV::Send - exercise_iseq(insn.block_iseq) if insn.block_iseq - when YARV::Once - exercise_iseq(insn.iseq) - end - end - end - end -end