Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce -Zvirtual-function-elimination codegen flag #96285

Merged
merged 8 commits into from
Jun 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -3695,6 +3695,7 @@ dependencies = [
"rustc_serialize",
"rustc_session",
"rustc_span",
"rustc_symbol_mangling",
"rustc_target",
"smallvec",
"tracing",
Expand Down
10 changes: 10 additions & 0 deletions compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,16 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
self.context.new_rvalue_from_int(self.int_type, 0)
}

fn type_checked_load(
&mut self,
_llvtable: Self::Value,
_vtable_byte_offset: u64,
_typeid: Self::Value,
) -> Self::Value {
// Unsupported.
self.context.new_rvalue_from_int(self.int_type, 0)
}

fn va_start(&mut self, _va_list: RValue<'gcc>) -> RValue<'gcc> {
unimplemented!();
}
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_codegen_llvm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ rustc-demangle = "0.1.21"
rustc_arena = { path = "../rustc_arena" }
rustc_attr = { path = "../rustc_attr" }
rustc_codegen_ssa = { path = "../rustc_codegen_ssa" }
rustc_symbol_mangling = { path = "../rustc_symbol_mangling" }
rustc_data_structures = { path = "../rustc_data_structures" }
rustc_errors = { path = "../rustc_errors" }
rustc_fs_util = { path = "../rustc_fs_util" }
Expand Down
14 changes: 13 additions & 1 deletion compiler/rustc_codegen_llvm/src/back/lto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -586,9 +586,21 @@ pub(crate) fn run_pass_manager(
// LTO-specific optimization passes that LLVM provides.
//
// This code is based off the code found in llvm's LTO code generator:
// tools/lto/LTOCodeGenerator.cpp
// llvm/lib/LTO/LTOCodeGenerator.cpp
debug!("running the pass manager");
unsafe {
if !llvm::LLVMRustHasModuleFlag(
module.module_llvm.llmod(),
"LTOPostLink".as_ptr().cast(),
11,
) {
llvm::LLVMRustAddModuleFlag(
module.module_llvm.llmod(),
llvm::LLVMModFlagBehavior::Error,
"LTOPostLink\0".as_ptr().cast(),
1,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be 11?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe the HasModuleFlag should be checking for 1, but also adding this flag here seems super sketchy to me. This is a pretty internal thing to LTO AFAICT.

In that context, it would be great if this code had a comment with context as to why this is being done.

Copy link

@tschuett tschuett Apr 24, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The code here is based on the file tschuett linked above. In the LLVM repo that code is responsible to set up LTO. Adding this flag was missing from the Rust code that should emulate the code in LTOCodeGenerator.cpp.

The 11 argument in the HasModuleFlag is the size of the string passed to this function, not the value of the flag. The 1 argument in the AddModuleFlag function is the value of the module flag. LLVM doesn't need a string size here, because it expects a \0 terminated string in this function.

);
}
if llvm_util::should_use_new_llvm_pass_manager(
&config.new_llvm_pass_manager,
&cgcx.target_arch,
Expand Down
17 changes: 14 additions & 3 deletions compiler/rustc_codegen_llvm/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,15 @@ pub unsafe fn create_module<'ll>(
)
}

if sess.opts.debugging_opts.virtual_function_elimination {
llvm::LLVMRustAddModuleFlag(
llmod,
llvm::LLVMModFlagBehavior::Error,
"Virtual Function Elim\0".as_ptr().cast(),
1,
);
}

llmod
}

Expand Down Expand Up @@ -656,6 +665,7 @@ impl<'ll> CodegenCx<'ll, '_> {
let t_isize = self.type_isize();
let t_f32 = self.type_f32();
let t_f64 = self.type_f64();
let t_metadata = self.type_metadata();

ifn!("llvm.wasm.trunc.unsigned.i32.f32", fn(t_f32) -> t_i32);
ifn!("llvm.wasm.trunc.unsigned.i32.f64", fn(t_f64) -> t_i32);
Expand Down Expand Up @@ -881,11 +891,12 @@ impl<'ll> CodegenCx<'ll, '_> {
ifn!("llvm.instrprof.increment", fn(i8p, t_i64, t_i32, t_i32) -> void);
}

ifn!("llvm.type.test", fn(i8p, self.type_metadata()) -> i1);
ifn!("llvm.type.test", fn(i8p, t_metadata) -> i1);
ifn!("llvm.type.checked.load", fn(i8p, t_i32, t_metadata) -> mk_struct! {i8p, i1});

if self.sess().opts.debuginfo != DebugInfo::None {
ifn!("llvm.dbg.declare", fn(self.type_metadata(), self.type_metadata()) -> void);
ifn!("llvm.dbg.value", fn(self.type_metadata(), t_i64, self.type_metadata()) -> void);
ifn!("llvm.dbg.declare", fn(t_metadata, t_metadata) -> void);
ifn!("llvm.dbg.value", fn(t_metadata, t_i64, t_metadata) -> void);
}
None
}
Expand Down
99 changes: 92 additions & 7 deletions compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,21 @@ use rustc_hir::def_id::{DefId, LOCAL_CRATE};
use rustc_index::vec::{Idx, IndexVec};
use rustc_middle::bug;
use rustc_middle::mir::{self, GeneratorLayout};
use rustc_middle::ty::layout::LayoutOf;
use rustc_middle::ty::layout::TyAndLayout;
use rustc_middle::ty::layout::{LayoutOf, TyAndLayout};
use rustc_middle::ty::subst::GenericArgKind;
use rustc_middle::ty::{self, AdtKind, Instance, ParamEnv, Ty, TyCtxt};
use rustc_session::config::{self, DebugInfo};
use rustc_middle::ty::{
self, AdtKind, Instance, ParamEnv, PolyExistentialTraitRef, Ty, TyCtxt, Visibility,
};
use rustc_session::config::{self, DebugInfo, Lto};
use rustc_span::symbol::Symbol;
use rustc_span::FileName;
use rustc_span::FileNameDisplayPreference;
use rustc_span::{self, SourceFile};
use rustc_span::{self, FileNameDisplayPreference, SourceFile};
use rustc_symbol_mangling::typeid_for_trait_ref;
use rustc_target::abi::{Align, Size};
use smallvec::smallvec;
use tracing::debug;

use libc::{c_longlong, c_uint};
use libc::{c_char, c_longlong, c_uint};
use std::borrow::Cow;
use std::fmt::{self, Write};
use std::hash::{Hash, Hasher};
Expand Down Expand Up @@ -1468,6 +1469,84 @@ fn build_vtable_type_di_node<'ll, 'tcx>(
.di_node
}

fn vcall_visibility_metadata<'ll, 'tcx>(
cx: &CodegenCx<'ll, 'tcx>,
ty: Ty<'tcx>,
trait_ref: Option<PolyExistentialTraitRef<'tcx>>,
vtable: &'ll Value,
) {
enum VCallVisibility {
Public = 0,
LinkageUnit = 1,
TranslationUnit = 2,
}

let Some(trait_ref) = trait_ref else { return };

let trait_ref_self = trait_ref.with_self_ty(cx.tcx, ty);
let trait_ref_self = cx.tcx.erase_regions(trait_ref_self);
let trait_def_id = trait_ref_self.def_id();
let trait_vis = cx.tcx.visibility(trait_def_id);
Copy link
Contributor

@tmiasko tmiasko Apr 21, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that the closest existing concept similar to vcall_visibility would be reachability, rather than visibility.

Example with an incorrectly optimized vtables showing why visibility is insufficient
trait Foo { fn foo(&self) { println!("foo") } }
trait Bar { fn bar(&self) { println!("bar") } }
trait Baz { fn baz(&self) { println!("baz") } }

impl Foo for usize {}
impl Bar for usize {}
impl Baz for usize {}

pub struct FooBox(Box<dyn Foo>);
pub struct BarBox(Box<dyn Bar>);
pub struct BazBox(Box<dyn Baz>);

pub fn make_foo() -> FooBox { FooBox(Box::new(0)) }
pub fn make_bar() -> BarBox { BarBox(Box::new(0)) }
pub fn make_baz() -> BazBox { BazBox(Box::new(0)) }

#[inline]
pub fn f(a: FooBox) { a.0.foo() }
pub fn g<T>(b: BarBox) { b.0.bar() }
#[inline]
fn h(c: BazBox) { c.0.baz() }
pub const H: fn(BazBox) = h;

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for pointing this out. I don't really have a solution for this right now. I tried looking at the reachability of the trait_ref and the members of the traits with the privacy_access_levels query. But for all of the traits and the functions in the traits (queried with tcx.vtable_entries) this just returns None, meaning it is not reachable IIUC.

I guess I would somehow have to check the reachability for the dyn Trait types. But I couldn't figure out how to do that.

Do you have ideas on how to best address this issue?


let cgus = cx.sess().codegen_units();
let single_cgu = cgus == 1;

let lto = cx.sess().lto();

// Since LLVM requires full LTO for the virtual function elimination optimization to apply,
// only the `Lto::Fat` cases are relevant currently.
let vcall_visibility = match (lto, trait_vis, single_cgu) {
// If there is not LTO and the visibility in public, we have to assume that the vtable can
// be seen from anywhere. With multiple CGUs, the vtable is quasi-public.
(Lto::No | Lto::ThinLocal, Visibility::Public, _)
| (Lto::No, Visibility::Restricted(_) | Visibility::Invisible, false) => {
VCallVisibility::Public
}
// With LTO and a quasi-public visibility, the usages of the functions of the vtable are
// all known by the `LinkageUnit`.
// FIXME: LLVM only supports this optimization for `Lto::Fat` currently. Once it also
// supports `Lto::Thin` the `VCallVisibility` may have to be adjusted for those.
(Lto::Fat | Lto::Thin, Visibility::Public, _)
| (
Lto::ThinLocal | Lto::Thin | Lto::Fat,
Visibility::Restricted(_) | Visibility::Invisible,
false,
) => VCallVisibility::LinkageUnit,
// If there is only one CGU, private vtables can only be seen by that CGU/translation unit
// and therefore we know of all usages of functions in the vtable.
(_, Visibility::Restricted(_) | Visibility::Invisible, true) => {
VCallVisibility::TranslationUnit
}
};

let trait_ref_typeid = typeid_for_trait_ref(cx.tcx, trait_ref);

unsafe {
let typeid = llvm::LLVMMDStringInContext(
cx.llcx,
trait_ref_typeid.as_ptr() as *const c_char,
trait_ref_typeid.as_bytes().len() as c_uint,
);
let v = [cx.const_usize(0), typeid];
llvm::LLVMRustGlobalAddMetadata(
vtable,
llvm::MD_type as c_uint,
llvm::LLVMValueAsMetadata(llvm::LLVMMDNodeInContext(
cx.llcx,
v.as_ptr(),
v.len() as c_uint,
)),
);
let vcall_visibility = llvm::LLVMValueAsMetadata(cx.const_u64(vcall_visibility as u64));
let vcall_visibility_metadata = llvm::LLVMMDNodeInContext2(cx.llcx, &vcall_visibility, 1);
llvm::LLVMGlobalSetMetadata(
vtable,
llvm::MetadataType::MD_vcall_visibility as c_uint,
vcall_visibility_metadata,
);
}
}

/// Creates debug information for the given vtable, which is for the
/// given type.
///
Expand All @@ -1478,6 +1557,12 @@ pub fn create_vtable_di_node<'ll, 'tcx>(
poly_trait_ref: Option<ty::PolyExistentialTraitRef<'tcx>>,
vtable: &'ll Value,
) {
// FIXME(flip1995): The virtual function elimination optimization only works with full LTO in
// LLVM at the moment.
if cx.sess().opts.debugging_opts.virtual_function_elimination && cx.sess().lto() == Lto::Fat {
vcall_visibility_metadata(cx, ty, poly_trait_ref, vtable);
}

if cx.dbg_cx.is_none() {
return;
}
Expand Down
10 changes: 10 additions & 0 deletions compiler/rustc_codegen_llvm/src/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,16 @@ impl<'ll, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'_, 'll, 'tcx> {
self.call_intrinsic("llvm.type.test", &[bitcast, typeid])
}

fn type_checked_load(
&mut self,
llvtable: &'ll Value,
vtable_byte_offset: u64,
typeid: &'ll Value,
) -> Self::Value {
let vtable_byte_offset = self.const_i32(vtable_byte_offset as i32);
self.call_intrinsic("llvm.type.checked.load", &[llvtable, vtable_byte_offset, typeid])
}

fn va_start(&mut self, va_list: &'ll Value) -> &'ll Value {
self.call_intrinsic("llvm.va_start", &[va_list])
}
Expand Down
8 changes: 8 additions & 0 deletions compiler/rustc_codegen_llvm/src/llvm/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,7 @@ pub enum MetadataType {
MD_nonnull = 11,
MD_align = 17,
MD_type = 19,
MD_vcall_visibility = 28,
MD_noundef = 29,
}

Expand Down Expand Up @@ -1067,6 +1068,7 @@ extern "C" {
pub fn LLVMReplaceAllUsesWith<'a>(OldVal: &'a Value, NewVal: &'a Value);
pub fn LLVMSetMetadata<'a>(Val: &'a Value, KindID: c_uint, Node: &'a Value);
pub fn LLVMGlobalSetMetadata<'a>(Val: &'a Value, KindID: c_uint, Metadata: &'a Metadata);
pub fn LLVMRustGlobalAddMetadata<'a>(Val: &'a Value, KindID: c_uint, Metadata: &'a Metadata);
pub fn LLVMValueAsMetadata(Node: &Value) -> &Metadata;

// Operations on constants of any type
Expand All @@ -1080,6 +1082,11 @@ extern "C" {
Vals: *const &'a Value,
Count: c_uint,
) -> &'a Value;
pub fn LLVMMDNodeInContext2<'a>(
C: &'a Context,
Vals: *const &'a Metadata,
Count: size_t,
) -> &'a Metadata;
pub fn LLVMAddNamedMetadataOperand<'a>(M: &'a Module, Name: *const c_char, Val: &'a Value);

// Operations on scalar constants
Expand Down Expand Up @@ -1936,6 +1943,7 @@ extern "C" {
name: *const c_char,
value: u32,
);
pub fn LLVMRustHasModuleFlag(M: &Module, name: *const c_char, len: size_t) -> bool;

pub fn LLVMRustMetadataAsValue<'a>(C: &'a Context, MD: &'a Metadata) -> &'a Value;

Expand Down
52 changes: 42 additions & 10 deletions compiler/rustc_codegen_ssa/src/meth.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use crate::traits::*;

use rustc_middle::ty::{self, Ty};
use rustc_middle::ty::{self, subst::GenericArgKind, ExistentialPredicate, Ty, TyCtxt};
use rustc_session::config::Lto;
use rustc_symbol_mangling::typeid_for_trait_ref;
use rustc_target::abi::call::FnAbi;

#[derive(Copy, Clone, Debug)]
Expand All @@ -15,20 +17,32 @@ impl<'a, 'tcx> VirtualIndex {
self,
bx: &mut Bx,
llvtable: Bx::Value,
ty: Ty<'tcx>,
fn_abi: &FnAbi<'tcx, Ty<'tcx>>,
) -> Bx::Value {
// Load the data pointer from the object.
debug!("get_fn({:?}, {:?})", llvtable, self);

debug!("get_fn({llvtable:?}, {ty:?}, {self:?})");
let llty = bx.fn_ptr_backend_type(fn_abi);
let llvtable = bx.pointercast(llvtable, bx.type_ptr_to(llty));
let ptr_align = bx.tcx().data_layout.pointer_align.abi;
let gep = bx.inbounds_gep(llty, llvtable, &[bx.const_usize(self.0)]);
let ptr = bx.load(llty, gep, ptr_align);
bx.nonnull_metadata(ptr);
// Vtable loads are invariant.
bx.set_invariant_load(ptr);
ptr

if bx.cx().sess().opts.debugging_opts.virtual_function_elimination
&& bx.cx().sess().lto() == Lto::Fat
{
let typeid =
bx.typeid_metadata(typeid_for_trait_ref(bx.tcx(), get_trait_ref(bx.tcx(), ty)));
let vtable_byte_offset = self.0 * bx.data_layout().pointer_size.bytes();
let type_checked_load = bx.type_checked_load(llvtable, vtable_byte_offset, typeid);
let func = bx.extract_value(type_checked_load, 0);
bx.pointercast(func, llty)
} else {
let ptr_align = bx.tcx().data_layout.pointer_align.abi;
let gep = bx.inbounds_gep(llty, llvtable, &[bx.const_usize(self.0)]);
let ptr = bx.load(llty, gep, ptr_align);
bx.nonnull_metadata(ptr);
// Vtable loads are invariant.
bx.set_invariant_load(ptr);
ptr
}
}

pub fn get_usize<Bx: BuilderMethods<'a, 'tcx>>(
Expand All @@ -50,6 +64,24 @@ impl<'a, 'tcx> VirtualIndex {
}
}

fn get_trait_ref<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> ty::PolyExistentialTraitRef<'tcx> {
for arg in ty.peel_refs().walk() {
if let GenericArgKind::Type(ty) = arg.unpack() {
if let ty::Dynamic(trait_refs, _) = ty.kind() {
return trait_refs[0].map_bound(|trait_ref| match trait_ref {
ExistentialPredicate::Trait(tr) => tr,
ExistentialPredicate::Projection(proj) => proj.trait_ref(tcx),
ExistentialPredicate::AutoTrait(_) => {
bug!("auto traits don't have functions")
}
});
}
}
}

bug!("expected a `dyn Trait` ty, found {ty:?}")
}

/// Creates a dynamic vtable for the given type and vtable origin.
/// This is used only for objects.
///
Expand Down
18 changes: 13 additions & 5 deletions compiler/rustc_codegen_ssa/src/mir/block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
args = &args[..1];
(
meth::VirtualIndex::from_index(ty::COMMON_VTABLE_ENTRIES_DROPINPLACE)
.get_fn(&mut bx, vtable, &fn_abi),
.get_fn(&mut bx, vtable, ty, &fn_abi),
fn_abi,
)
}
Expand Down Expand Up @@ -819,17 +819,25 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
// the data pointer as the first argument
match op.val {
Pair(data_ptr, meta) => {
llfn = Some(
meth::VirtualIndex::from_index(idx).get_fn(&mut bx, meta, &fn_abi),
);
llfn = Some(meth::VirtualIndex::from_index(idx).get_fn(
&mut bx,
meta,
op.layout.ty,
&fn_abi,
));
llargs.push(data_ptr);
continue 'make_args;
}
other => bug!("expected a Pair, got {:?}", other),
}
} else if let Ref(data_ptr, Some(meta), _) = op.val {
// by-value dynamic dispatch
llfn = Some(meth::VirtualIndex::from_index(idx).get_fn(&mut bx, meta, &fn_abi));
llfn = Some(meth::VirtualIndex::from_index(idx).get_fn(
&mut bx,
meta,
op.layout.ty,
&fn_abi,
));
llargs.push(data_ptr);
continue;
} else {
Expand Down
Loading