Skip to content
This repository was archived by the owner on Jun 26, 2020. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 31 additions & 3 deletions cranelift-codegen/meta/src/cdsl/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use crate::cdsl::formats::{
};
use crate::cdsl::operands::Operand;
use crate::cdsl::type_inference::Constraint;
use crate::cdsl::types::{LaneType, ValueType};
use crate::cdsl::types::{LaneType, ValueType, VectorType};
use crate::cdsl::typevar::TypeVar;

#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
Expand Down Expand Up @@ -176,6 +176,11 @@ impl Instruction {
pub fn bind(&self, lane_type: impl Into<LaneType>) -> BoundInstruction {
bind(self.clone(), Some(lane_type.into()), Vec::new())
}

pub fn bind_vector(&self, lane_type: impl Into<LaneType>, num_lanes: u64) -> BoundInstruction {
bind_vector(self.clone(), lane_type.into(), num_lanes, Vec::new())
}

pub fn bind_any(&self) -> BoundInstruction {
bind(self.clone(), None, Vec::new())
}
Expand Down Expand Up @@ -400,6 +405,11 @@ impl BoundInstruction {
pub fn bind(self, lane_type: impl Into<LaneType>) -> BoundInstruction {
bind(self.inst, Some(lane_type.into()), self.value_types)
}

pub fn bind_vector(self, lane_type: impl Into<LaneType>, num_lanes: u64) -> BoundInstruction {
bind_vector(self.inst, lane_type.into(), num_lanes, self.value_types)
}

pub fn bind_any(self) -> BoundInstruction {
bind(self.inst, None, self.value_types)
}
Expand Down Expand Up @@ -1062,6 +1072,26 @@ fn bind(
}
}

verify_polymorphic_binding(&inst, &value_types);

BoundInstruction { inst, value_types }
}

/// Helper bind for vector types reused by {Bound,}Instruction::bind.
fn bind_vector(
inst: Instruction,
lane_type: LaneType,
num_lanes: u64,
mut value_types: Vec<ValueTypeOrAny>,
) -> BoundInstruction {
let vector_type = ValueType::Vector(VectorType::new(lane_type, num_lanes));
value_types.push(ValueTypeOrAny::ValueType(vector_type));
verify_polymorphic_binding(&inst, &value_types);
BoundInstruction { inst, value_types }
}

/// Helper to verify that binding types to the instruction does not violate polymorphic rules
fn verify_polymorphic_binding(inst: &Instruction, value_types: &Vec<ValueTypeOrAny>) {
match &inst.polymorphic_info {
Some(poly) => {
assert!(
Expand All @@ -1076,6 +1106,4 @@ fn bind(
));
}
}

BoundInstruction { inst, value_types }
}
57 changes: 36 additions & 21 deletions cranelift-codegen/meta/src/gen_legalizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ fn unwrap_inst(

fmtln!(
fmt,
"let ({}, predicate) = if let ir::InstructionData::{} {{",
"let ({}, predicate) = if let crate::ir::InstructionData::{} {{",
arg_names,
iform.name
);
Expand Down Expand Up @@ -135,18 +135,16 @@ fn unwrap_inst(
.to_comment_string(var_pool)
));

fmt.line("let results = pos.func.dfg.inst_results(inst);");
fmt.line("let r = pos.func.dfg.inst_results(inst);");
for (i, &var_index) in def.defined_vars.iter().enumerate() {
let var = var_pool.get(var_index);
fmtln!(fmt, "let {} = &results[{}];", var.name, i);
if var.has_free_typevar() {
fmtln!(
fmt,
"let typeof_{} = pos.func.dfg.value_type(*{});",
var.name,
var.name
);
}
fmtln!(fmt, "let {} = &r[{}];", var.name, i);
fmtln!(
fmt,
"let typeof_{} = pos.func.dfg.value_type(*{});",
var.name,
var.name
);
}

replace_inst = true;
Expand Down Expand Up @@ -187,11 +185,15 @@ fn build_derived_expr(tv: &TypeVar) -> String {
Some(base) => base,
None => {
assert!(tv.name.starts_with("typeof_"));
return format!("{}", tv.name);
return format!("Some({})", tv.name);
}
};
let base_expr = build_derived_expr(&base.type_var);
format!("{}.{}()", base_expr, base.derived_func.name())
format!(
"{}.map(|t: crate::ir::Type| t.{}())",
base_expr,
base.derived_func.name()
)
}

/// Emit rust code for the given check.
Expand Down Expand Up @@ -221,18 +223,29 @@ fn emit_runtime_typecheck<'a, 'b>(
Constraint::Eq(tv1, tv2) => {
fmtln!(
fmt,
"let predicate = predicate && {} == {};",
"let predicate = predicate && match ({}, {}) {{",
build_derived_expr(tv1),
build_derived_expr(tv2)
);
fmt.indent(|fmt| {
fmt.line("(Some(a), Some(b)) => a == b,");
fmt.comment("On overflow, constraint doesn\'t apply");
fmt.line("_ => false,");
});
fmtln!(fmt, "};");
}
Constraint::WiderOrEq(tv1, tv2) => {
fmtln!(
fmt,
"let predicate = predicate && {}.wider_or_equal({});",
"let predicate = predicate && match ({}, {}) {{",
build_derived_expr(tv1),
build_derived_expr(tv2)
);
fmt.indent(|fmt| {
fmt.line("(Some(a), Some(b)) => a.wider_or_equal(b),");
fmt.comment("On overflow, constraint doesn\'t apply");
fmt.line("_ => false,");
});
fmtln!(fmt, "};");
}
}
Expand Down Expand Up @@ -290,7 +303,8 @@ fn emit_dst_inst(def: &Def, def_pool: &DefPool, var_pool: &VarPool, fmt: &mut Fo
// Unwrapping would have left the results intact. Replace the whole instruction.
fmtln!(
fmt,
"pos.func.dfg.replace(inst).{};",
"let {} = pos.func.dfg.replace(inst).{};",
defined_vars,
def.apply.rust_builder(&def.defined_vars, var_pool)
);

Expand Down Expand Up @@ -406,16 +420,17 @@ fn gen_transform_group<'a>(
// Function arguments.
fmtln!(fmt, "pub fn {}(", group.name);
fmt.indent(|fmt| {
fmt.line("inst: ir::Inst,");
fmt.line("func: &mut ir::Function,");
fmt.line("cfg: &mut ControlFlowGraph,");
fmt.line("isa: &dyn TargetIsa,");
fmt.line("inst: crate::ir::Inst,");
fmt.line("func: &mut crate::ir::Function,");
fmt.line("cfg: &mut crate::flowgraph::ControlFlowGraph,");
fmt.line("isa: &dyn crate::isa::TargetIsa,");
});
fmtln!(fmt, ") -> bool {");

// Function body.
fmt.indent(|fmt| {
fmt.line("use ir::InstBuilder;");
fmt.line("use crate::ir::InstBuilder;");
fmt.line("use crate::cursor::{Cursor, FuncCursor};");
fmt.line("let mut pos = FuncCursor::new(func).at_inst(inst);");
fmt.line("pos.use_srcloc(inst);");

Expand Down
111 changes: 107 additions & 4 deletions cranelift-codegen/meta/src/isa/x86/encodings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ use crate::cdsl::instructions::{
};
use crate::cdsl::recipes::{EncodingRecipe, EncodingRecipeNumber, Recipes};
use crate::cdsl::settings::{SettingGroup, SettingPredicateNumber};

use crate::shared::types::Bool::B1;
use crate::cdsl::types::ValueType;
use crate::shared::types::Bool::{B1, B16, B32, B64, B8};
use crate::shared::types::Float::{F32, F64};
use crate::shared::types::Int::{I16, I32, I64, I8};
use crate::shared::Definitions as SharedDefinitions;
Expand Down Expand Up @@ -250,6 +250,17 @@ impl PerCpuModeEncodings {
self.enc64(inst.clone().bind(I64).bind_any(), template);
}
}

/// Add the same encoding to both X86_32 and X86_64; assumes configuration (e.g. REX, operand binding) has already happened
fn enc_32_64_isap(
&mut self,
inst: BoundInstruction,
template: Template,
isap: SettingPredicateNumber,
) {
self.enc32_isap(inst.clone(), template.clone(), isap);
self.enc64_isap(inst, template, isap);
}
}

// Definitions.
Expand Down Expand Up @@ -315,6 +326,7 @@ pub fn define(
let ifcmp_sp = shared.by_name("ifcmp_sp");
let imul = shared.by_name("imul");
let indirect_jump_table_br = shared.by_name("indirect_jump_table_br");
let insertlane = shared.by_name("insertlane");
let ireduce = shared.by_name("ireduce");
let ishl = shared.by_name("ishl");
let ishl_imm = shared.by_name("ishl_imm");
Expand All @@ -332,6 +344,7 @@ pub fn define(
let load_complex = shared.by_name("load_complex");
let nearest = shared.by_name("nearest");
let popcnt = shared.by_name("popcnt");
let raw_bitcast = shared.by_name("raw_bitcast");
let regfill = shared.by_name("regfill");
let regmove = shared.by_name("regmove");
let regspill = shared.by_name("regspill");
Expand All @@ -340,6 +353,7 @@ pub fn define(
let rotl_imm = shared.by_name("rotl_imm");
let rotr = shared.by_name("rotr");
let rotr_imm = shared.by_name("rotr_imm");
let scalar_to_vector = shared.by_name("scalar_to_vector");
let selectif = shared.by_name("selectif");
let sextend = shared.by_name("sextend");
let sload16 = shared.by_name("sload16");
Expand Down Expand Up @@ -377,6 +391,8 @@ pub fn define(
let x86_fmax = x86.by_name("x86_fmax");
let x86_fmin = x86.by_name("x86_fmin");
let x86_pop = x86.by_name("x86_pop");
let x86_pshufd = x86.by_name("x86_pshufd");
let x86_pshufb = x86.by_name("x86_pshufb");
let x86_push = x86.by_name("x86_push");
let x86_sdivmodx = x86.by_name("x86_sdivmodx");
let x86_smulx = x86.by_name("x86_smulx");
Expand Down Expand Up @@ -450,6 +466,7 @@ pub fn define(
let rec_ldWithIndexDisp8 = r.template("ldWithIndexDisp8");
let rec_mulx = r.template("mulx");
let rec_null = r.recipe("null");
let rec_null_fpr = r.recipe("null_fpr");
let rec_pcrel_fnaddr8 = r.template("pcrel_fnaddr8");
let rec_pcrel_gvaddr8 = r.template("pcrel_gvaddr8");
let rec_popq = r.template("popq");
Expand All @@ -459,6 +476,8 @@ pub fn define(
let rec_pushq = r.template("pushq");
let rec_ret = r.template("ret");
let rec_r_ib = r.template("r_ib");
let rec_r_ib_unsigned = r.template("r_ib_unsigned");
let rec_r_ib_unsigned_r = r.template("r_ib_unsigned_r");
let rec_r_id = r.template("r_id");
let rec_rcmp = r.template("rcmp");
let rec_rcmp_ib = r.template("rcmp_ib");
Expand Down Expand Up @@ -515,6 +534,8 @@ pub fn define(
let use_popcnt = settings.predicate_by_name("use_popcnt");
let use_lzcnt = settings.predicate_by_name("use_lzcnt");
let use_bmi1 = settings.predicate_by_name("use_bmi1");
let use_sse2 = settings.predicate_by_name("use_sse2");
let use_ssse3 = settings.predicate_by_name("use_ssse3");
let use_sse41 = settings.predicate_by_name("use_sse41");

// Definitions.
Expand Down Expand Up @@ -603,8 +624,14 @@ pub fn define(
// Finally, the 0xb8 opcode takes an 8-byte immediate with a REX.W prefix.
e.enc64(iconst.bind(I64), rec_pu_iq.opcodes(vec![0xb8]).rex().w());

// Bool constants.
e.enc_both(bconst.bind(B1), rec_pu_id_bool.opcodes(vec![0xb8]));
// Bool constants (uses MOV)
for &ty in &[B1, B8, B16, B32] {
e.enc_both(bconst.bind(ty), rec_pu_id_bool.opcodes(vec![0xb8]));
}
e.enc64(
bconst.bind(B64),
rec_pu_id_bool.opcodes(vec![0xb8]).rex().w(),
);

// Shifts and rotates.
// Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit
Expand Down Expand Up @@ -1565,5 +1592,81 @@ pub fn define(
e.enc_both(ffcmp.bind(F32), rec_fcmp.opcodes(vec![0x0f, 0x2e]));
e.enc_both(ffcmp.bind(F64), rec_fcmp.opcodes(vec![0x66, 0x0f, 0x2e]));

// SIMD splat: before x86 can use vector data, it must be moved to XMM registers; see
// legalize.rs for how this is done; once there, x86_pshuf* (below) is used for broadcasting the
// value across the register

// PSHUFB, 8-bit shuffle using two XMM registers
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) {
let number_of_lanes = 128 / ty.lane_bits();
let instruction = x86_pshufb.bind_vector(ty, number_of_lanes);
let template = rec_fa.nonrex().opcodes(vec![0x66, 0x0f, 0x38, 0x00]);
e.enc32_isap(instruction.clone(), template.clone(), use_ssse3);
e.enc64_isap(instruction, template, use_ssse3);
}

// PSHUFD, 32-bit shuffle using one XMM register and a u8 immediate
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) {
let number_of_lanes = 128 / ty.lane_bits();
let instruction = x86_pshufd.bind_vector(ty, number_of_lanes);
let template = rec_r_ib_unsigned.nonrex().opcodes(vec![0x66, 0x0f, 0x70]);
e.enc32_isap(instruction.clone(), template.clone(), use_sse2);
e.enc64_isap(instruction, template, use_sse2);
}

// SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according
// to the Intel manual: "When the destination operand is an XMM register, the source operand is
// written to the low doubleword of the register and the regiser is zero-extended to 128 bits."
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() >= 8) {
let number_of_lanes = 128 / ty.lane_bits();
let instruction = scalar_to_vector.bind_vector(ty, number_of_lanes).bind(ty);
let template = rec_frurm.opcodes(vec![0x66, 0x0f, 0x6e]); // MOVD/MOVQ
if ty.lane_bits() < 64 {
// no 32-bit encodings for 64-bit widths
e.enc32_isap(instruction.clone(), template.clone(), use_sse2);
}
e.enc_x86_64_isap(instruction, template, use_sse2);
}

// SIMD insertlane
let mut insertlane_mapping: HashMap<u64, (Vec<u8>, SettingPredicateNumber)> = HashMap::new();
insertlane_mapping.insert(8, (vec![0x66, 0x0f, 0x3a, 0x20], use_sse41)); // PINSRB
insertlane_mapping.insert(16, (vec![0x66, 0x0f, 0xc4], use_sse2)); // PINSRW
insertlane_mapping.insert(32, (vec![0x66, 0x0f, 0x3a, 0x22], use_sse41)); // PINSRD
insertlane_mapping.insert(64, (vec![0x66, 0x0f, 0x3a, 0x22], use_sse41)); // PINSRQ, only x86_64

for ty in ValueType::all_lane_types() {
if let Some((opcode, isap)) = insertlane_mapping.get(&ty.lane_bits()) {
let number_of_lanes = 128 / ty.lane_bits();
let instruction = insertlane.bind_vector(ty, number_of_lanes);
let template = rec_r_ib_unsigned_r.opcodes(opcode.clone());
if ty.lane_bits() < 64 {
e.enc_32_64_isap(instruction, template.nonrex(), isap.clone());
} else {
// turns out the 64-bit widths have REX/W encodings and only are available on x86_64
e.enc64_isap(instruction, template.rex().w(), isap.clone());
}
}
}

// SIMD bitcast f64 to all 8-bit-lane vectors (for legalizing splat.x8x16); assumes that f64 is stored in an XMM register
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) {
let instruction = bitcast.bind_vector(ty, 16).bind(F64);
e.enc32_rec(instruction.clone(), rec_null_fpr, 0);
e.enc64_rec(instruction, rec_null_fpr, 0);
}

// SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8)
for from_type in ValueType::all_lane_types().filter(|t| t.lane_bits() >= 8) {
for to_type in ValueType::all_lane_types().filter(|t| t.lane_bits() >= 8 && *t != from_type)
{
let instruction = raw_bitcast
.bind_vector(to_type, 128 / to_type.lane_bits())
.bind_vector(from_type, 128 / from_type.lane_bits());
e.enc32_rec(instruction.clone(), rec_null_fpr, 0);
e.enc64_rec(instruction, rec_null_fpr, 0);
}
}

e
}
Loading