Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Port to more current rust-nightly #98

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions crates/cuda_builder/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -376,10 +376,12 @@ fn invoke_rustc(builder: &CudaBuilder) -> Result<PathBuf, CudaBuilderError> {

let new_path = get_new_path_var();

let mut rustflags = vec![format!(
"-Zcodegen-backend={}",
rustc_codegen_nvvm.display(),
)];
let mut rustflags = vec![
format!("-Zcodegen-backend={}", rustc_codegen_nvvm.display(),),
"-Zcrate-attr=feature(register_tool)".to_string(),
"-Zcrate-attr=register_tool(rust_cuda)".to_string(),
"-Zcrate-attr=no_std".to_string(),
];

if let Some(emit) = &builder.emit {
let string = match emit {
Expand Down
1 change: 1 addition & 0 deletions crates/cuda_std/src/atomic/intrinsics.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//! Raw CUDA-specific atomic functions that map to PTX instructions.

use crate::gpu_only;
use core::arch::asm;
use core::concat;
use paste::paste;

Expand Down
6 changes: 1 addition & 5 deletions crates/cuda_std/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,11 @@

#![cfg_attr(
target_os = "cuda",
no_std,
feature(
register_attr,
alloc_error_handler,
asm,
asm_experimental_arch,
link_llvm_intrinsics
),
register_attr(nvvm_internal)
)]

extern crate alloc;
Expand All @@ -49,7 +45,7 @@ pub mod cfg;
pub mod ptr;
pub mod shared;
pub mod thread;
pub mod warp;
// pub mod warp;

mod float_ext;

Expand Down
2 changes: 1 addition & 1 deletion crates/cuda_std/src/mem.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
//! Support for allocating memory and using `alloc` using CUDA memory allocation system-calls.

use crate::gpu_only;
#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
use alloc::alloc::*;
use core::arch::asm;
#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
use core::ffi::c_void;

Expand Down
2 changes: 1 addition & 1 deletion crates/cuda_std/src/misc.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! Misc functions that do not exactly fit into other categories.

use crate::gpu_only;
use core::arch::asm;

/// Suspends execution of the kernel, usually to pause at a specific point when debugging in a debugger.
#[gpu_only]
Expand Down
2 changes: 1 addition & 1 deletion crates/cuda_std/src/ptr.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! CUDA-specific pointer handling logic.

use crate::gpu_only;
use core::arch::asm;

/// Special areas of GPU memory where a pointer could reside.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
Expand Down
2 changes: 1 addition & 1 deletion crates/cuda_std/src/shared.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ pub fn dynamic_shared_mem<T>() -> *mut T {
extern "C" {
// need to use nvvm_internal and not address_space because address_space only parses
// static definitions, not extern static definitions.
#[nvvm_internal(addrspace(3))]
#[rust_cuda::nvvm_internal(addrspace(3))]
#[allow(improper_ctypes)]
// mangle it a bit to make sure nobody makes the same thing
#[link_name = "_Zcuda_std_dyn_shared"]
Expand Down
2 changes: 1 addition & 1 deletion crates/cuda_std/src/thread.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
//! The most important structure after threads, thread blocks arrange

// TODO: write some docs about the terms used in this module.

use core::arch::asm;
use cuda_std_macros::gpu_only;
use vek::{Vec2, Vec3};

Expand Down
1 change: 1 addition & 0 deletions crates/cuda_std/src/warp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
//! thread blocks and execute in SIMT fashion.

use crate::gpu_only;
use core::arch::asm;
use half::{bf16, f16};

/// Synchronizes all of the threads inside of this warp according to `mask`.
Expand Down
8 changes: 4 additions & 4 deletions crates/cuda_std_macros/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use proc_macro::TokenStream;
use proc_macro2::Span;
use quote::{quote_spanned, ToTokens};
use proc_macro2::{Span, TokenTree, Delimiter, Group};
use quote::{quote_spanned, ToTokens, quote};
use syn::{
parse::Parse, parse_macro_input, parse_quote, punctuated::Punctuated, spanned::Spanned, Error,
FnArg, Ident, ItemFn, ReturnType, Stmt, Token,
Expand All @@ -27,7 +27,7 @@ pub fn kernel(input: proc_macro::TokenStream, item: proc_macro::TokenStream) ->
let mut item = parse_macro_input!(item as ItemFn);
let no_mangle = parse_quote!(#[no_mangle]);
item.attrs.push(no_mangle);
let internal = parse_quote!(#[cfg_attr(any(target_arch="nvptx", target_arch="nvptx64"), nvvm_internal(kernel(#input)))]);
let internal = parse_quote!(#[cfg_attr(any(target_arch="nvptx", target_arch="nvptx64"), rust_cuda::nvvm_internal(kernel(#input)))]);
item.attrs.push(internal);

// used to guarantee some things about how params are passed in the codegen.
Expand Down Expand Up @@ -231,7 +231,7 @@ pub fn address_space(attr: proc_macro::TokenStream, item: proc_macro::TokenStrea
};

let new_attr =
parse_quote!(#[cfg_attr(target_os = "cuda", nvvm_internal(addrspace(#addrspace_num)))]);
parse_quote!(#[cfg_attr(target_os = "cuda", rust_cuda::nvvm_internal(addrspace(#addrspace_num)))]);
global.attrs.push(new_attr);

global.into_token_stream().into()
Expand Down
2 changes: 1 addition & 1 deletion crates/cust/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ cust_core = { path = "../cust_core", version = "0.1.0"}
cust_raw = { path = "../cust_raw", version = "0.11.2"}
bitflags = "1.2"
cust_derive = { path = "../cust_derive", version = "0.2" }
glam = { version = "0.20", features=["cuda"], optional = true }
glam = { version = "0.21", features=["cuda"], optional = true }
mint = { version = "^0.5", optional = true }
num-complex = { version = "0.4", optional = true }
vek = { version = "0.15.1", optional = true, default-features = false }
Expand Down
2 changes: 1 addition & 1 deletion crates/cust_core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ readme = "../../README.md"

[dependencies]
vek = { version = "0.15.1", default-features=false, features=["libm"], optional = true }
glam = { version = "0.20", features=["cuda", "libm"], default-features=false, optional=true }
glam = { version = "0.21", features=["cuda", "libm"], default-features=false, optional=true }
mint = { version = "^0.5", optional = true }
half = { version = "1.8", optional = true }
num-complex = { version = "0.4", optional = true }
Expand Down
2 changes: 1 addition & 1 deletion crates/cust_core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ pub mod _hidden {

#[cfg(feature = "glam")]
impl_device_copy! {
glam::Vec2, glam::Vec3, glam::Vec4, glam::IVec2, glam::IVec3, glam::IVec4,
glam::Vec2, glam::Vec3, glam::Vec4, glam::IVec2, glam::IVec3, glam::IVec4, glam::Mat3, glam::Mat4
}

#[cfg(feature = "mint")]
Expand Down
2 changes: 1 addition & 1 deletion crates/optix/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ cust = { version = "0.3", path = "../cust", features=["impl_mint"] }
cust_raw = { version = "0.11.2", path = "../cust_raw" }
cfg-if = "1.0.0"
bitflags = "1.3.2"
glam = { version = "0.20", features=["cuda", "libm"], default-features=false, optional=true }
glam = { version = "0.21", features=["cuda", "libm"], default-features=false, optional=true }
half = { version = "^1.8", optional = true }
memoffset = "0.6.4"
mint = "0.5.8"
Expand Down
5 changes: 2 additions & 3 deletions crates/optix/examples/ex02_pipeline/device/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
#![feature(asm)]
#![cfg_attr(
target_os = "cuda",
no_std,
feature(register_attr),
register_attr(nvvm_internal)
feature(register_tool),
register_tool(nvvm_internal)
)]
// #![deny(warnings)]
#![allow(clippy::missing_safety_doc)]
Expand Down
4 changes: 2 additions & 2 deletions crates/optix/examples/rust/ex04_mesh_gpu/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#![cfg_attr(
target_os = "cuda",
no_std,
feature(register_attr),
register_attr(nvvm_internal)
feature(register_tool),
register_tool(nvvm_internal)
)]
#![allow(non_snake_case, clippy::missing_safety_doc)]

Expand Down
2 changes: 1 addition & 1 deletion crates/optix_device/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ authors = ["Anders Langlands <[email protected]>", "Riccardo D'Ambrosio
[dependencies]
bitflags = "1.3.2"
cuda_std = { version = "0.2", path = "../cuda_std" }
glam = { version = "0.20", features=["cuda", "libm"], default-features=false }
glam = { version = "0.21", features=["cuda", "libm"], default-features=false }
paste = "1.0.6"
seq-macro = "0.3.0"
cust_core = { version = "0.1", path = "../cust_core" }
4 changes: 2 additions & 2 deletions crates/optix_device/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#![cfg_attr(
target_arch = "nvptx64",
no_std,
feature(register_attr, asm, asm_experimental_arch),
register_attr(nvvm_internal)
feature(register_tool, asm, asm_experimental_arch),
register_tool(nvvm_internal)
)]

extern crate alloc;
Expand Down
2 changes: 2 additions & 0 deletions crates/rustc_codegen_nvvm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ crate-type = ["dylib"]
[dependencies]
nvvm = { version = "0.1", path = "../nvvm" }
rustc-demangle = "0.1.20"
cstr = "0.2"
libc = "0.2.97"
libloading = "0.7"
tar = "0.4.35"
once_cell = "1.8.0"
bitflags = "1.3.2"
Expand Down
63 changes: 32 additions & 31 deletions crates/rustc_codegen_nvvm/src/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,24 @@ use rustc_target::abi::{self, HasDataLayout, Int};
pub use rustc_target::spec::abi::Abi;
use tracing::trace;

fn clone_pass_mode(original: &PassMode) -> PassMode {
match original {
PassMode::Ignore => PassMode::Ignore,
PassMode::Direct(attrs) => PassMode::Direct(*attrs),
PassMode::Pair(attrs1, attrs2) => PassMode::Pair(*attrs1, *attrs2),
PassMode::Cast(target, bool) => PassMode::Cast(target.clone(), *bool),
PassMode::Indirect {
attrs,
extra_attrs,
on_stack,
} => PassMode::Indirect {
attrs: *attrs,
extra_attrs: *extra_attrs,
on_stack: *on_stack,
},
}
}

pub(crate) fn readjust_fn_abi<'tcx>(
tcx: TyCtxt<'tcx>,
fn_abi: &'tcx FnAbi<'tcx, Ty<'tcx>>,
Expand All @@ -29,8 +47,7 @@ pub(crate) fn readjust_fn_abi<'tcx>(
let readjust_arg_abi = |arg: &ArgAbi<'tcx, Ty<'tcx>>| {
let mut arg = ArgAbi {
layout: arg.layout,
mode: arg.mode,
pad: arg.pad,
mode: clone_pass_mode(&arg.mode),
};

// ignore zsts
Expand Down Expand Up @@ -265,7 +282,7 @@ pub(crate) trait FnAbiLlvmExt<'ll, 'tcx> {
impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
fn llvm_type(&self, cx: &CodegenCx<'ll, 'tcx>) -> &'ll Type {
let args_capacity: usize = self.args.iter().map(|arg|
if arg.pad.is_some() { 1 } else { 0 } +
// if arg.pad.is_some() { 1 } else { 0 } +
if let PassMode::Pair(_, _) = arg.mode { 2 } else { 1 }
).sum();

Expand All @@ -281,10 +298,10 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
} + args_capacity,
);

let mut llreturn_ty = match self.ret.mode {
let mut llreturn_ty = match &self.ret.mode {
PassMode::Ignore => cx.type_void(),
PassMode::Direct(_) | PassMode::Pair(..) => self.ret.layout.immediate_llvm_type(cx),
PassMode::Cast(cast) => cast.llvm_type(cx),
PassMode::Cast(cast, _) => cast.llvm_type(cx),
PassMode::Indirect { .. } => {
idx += 1;
llargument_tys.push(cx.type_ptr_to(self.ret.memory_ty(cx)));
Expand All @@ -302,13 +319,7 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
}

for arg in self.args.iter() {
// add padding
if let Some(ty) = arg.pad {
idx += 1;
llargument_tys.push(ty.llvm_type(cx));
}

let llarg_ty = match arg.mode {
let llarg_ty = match &arg.mode {
PassMode::Ignore => continue,
PassMode::Direct(_) => arg.layout.immediate_llvm_type(cx),
PassMode::Pair(..) => {
Expand All @@ -329,7 +340,7 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
idx += 2;
continue;
}
PassMode::Cast(cast) => cast.llvm_type(cx),
PassMode::Cast(cast, _) => cast.llvm_type(cx),
PassMode::Indirect {
attrs: _,
extra_attrs: None,
Expand Down Expand Up @@ -398,10 +409,7 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
}
_ => {}
}
for arg in &self.args {
if arg.pad.is_some() {
apply(&ArgAttributes::new());
}
for arg in self.args.iter() {
match arg.mode {
PassMode::Ignore => {}
PassMode::Indirect {
Expand Down Expand Up @@ -436,7 +444,7 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
apply(a);
apply(b);
}
PassMode::Cast(_) => {
PassMode::Cast(..) => {
apply(&ArgAttributes::new());
}
}
Expand Down Expand Up @@ -476,16 +484,13 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
// If the value is a boolean, the range is 0..2 and that ultimately
// become 0..0 when the type becomes i1, which would be rejected
// by the LLVM verifier.
if let Int(..) = scalar.value {
if scalar.primitive().is_int() {
if !scalar.is_bool() && !scalar.is_always_valid(bx) {
bx.range_metadata(callsite, scalar.valid_range);
bx.range_metadata(callsite, scalar.valid_range(bx));
}
}
}
for arg in &self.args {
if arg.pad.is_some() {
apply(bx.cx, &ArgAttributes::new());
}
for arg in self.args.iter() {
match arg.mode {
PassMode::Ignore => {}
PassMode::Indirect {
Expand Down Expand Up @@ -515,7 +520,7 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
apply(bx.cx, a);
apply(bx.cx, b);
}
PassMode::Cast(_) => {
PassMode::Cast(..) => {
apply(bx.cx, &ArgAttributes::new());
}
}
Expand All @@ -524,10 +529,6 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
}

impl<'a, 'll, 'tcx> AbiBuilderMethods<'tcx> for Builder<'a, 'll, 'tcx> {
fn apply_attrs_callsite(&mut self, fn_abi: &FnAbi<'tcx, Ty<'tcx>>, callsite: Self::Value) {
fn_abi.apply_attrs_callsite(self, callsite)
}

fn get_param(&mut self, index: usize) -> Self::Value {
let val = llvm::get_param(self.llfn(), index as c_uint);
trace!("Get param `{:?}`", val);
Expand Down Expand Up @@ -588,7 +589,7 @@ impl<'ll, 'tcx> ArgAbiExt<'ll, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> {
OperandValue::Ref(val, None, self.layout.align.abi).store(bx, dst)
} else if self.is_unsized_indirect() {
bug!("unsized `ArgAbi` must be handled through `store_fn_arg`");
} else if let PassMode::Cast(cast) = self.mode {
} else if let PassMode::Cast(cast, _) = &self.mode {
let can_store_through_cast_ptr = false;
if can_store_through_cast_ptr {
let cast_ptr_llty = bx.type_ptr_to(cast.llvm_type(bx));
Expand Down Expand Up @@ -647,7 +648,7 @@ impl<'ll, 'tcx> ArgAbiExt<'ll, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> {
extra_attrs: None,
on_stack: _,
}
| PassMode::Cast(_) => {
| PassMode::Cast(..) => {
let next_arg = next();
self.store(bx, next_arg, dst);
}
Expand Down
8 changes: 2 additions & 6 deletions crates/rustc_codegen_nvvm/src/allocator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ pub(crate) unsafe fn codegen(
_tcx: TyCtxt<'_>,
mods: &mut LlvmMod,
kind: AllocatorKind,
has_alloc_error_handler: bool,
alloc_error_handler_kind: AllocatorKind,
) {
let llcx = &*mods.llcx;
let llmod = mods.llmod.as_ref().unwrap();
Expand Down Expand Up @@ -94,11 +94,7 @@ pub(crate) unsafe fn codegen(
// -> ! DIFlagNoReturn
llvm::Attribute::NoReturn.apply_llfn(llvm::AttributePlace::Function, llfn);

let kind = if has_alloc_error_handler {
AllocatorKind::Global
} else {
AllocatorKind::Default
};
let kind = alloc_error_handler_kind;
let callee = kind.fn_name(sym::oom);
let callee = llvm::LLVMRustGetOrInsertFunction(llmod, callee.as_ptr().cast(), callee.len(), ty);

Expand Down
Loading