Rust编译器研究+.NET9 PreView7

文摘   2024-08-14 11:10   湖北  

点击上方蓝字 江湖评谈设为关注/星标




前言

看下Rust Linux编译+glibc部分,以及.NET9 Pre7的一个更新,

.NET9 PreView7

值类型一般的分配在栈上,但是有时候不得不把值类型封装成对象,进行一些操作。比如:

static bool Compare(object? x, object? y){    if ((x == null) || (y == null))    {        return x == y;    }
return x.Equals(y);}
public static int Main(){ bool result = Compare(3, 4); return result ? 0 : 100;}

3,4是值类型,进行传参的时候构建了堆obj分配了堆内存。但是实际上就这段代码而言,只需要返回100即可。

mov      eax, 100ret

而为优化之前

push     rbxsub      rsp, 32mov      rcx, 0x7FFB9F8074D0      ; System.Int32call     CORINFO_HELP_NEWSFASTmov      rbx, raxmov      dword ptr [rbx+0x08], 3mov      rcx, 0x7FFB9F8074D0      ; System.Int32call     CORINFO_HELP_NEWSFASTmov      dword ptr [rax+0x08], 4add      rbx, 8mov      ecx, dword ptr [rbx]cmp      ecx, dword ptr [rax+0x08]sete     almovzx    rax, alxor      ecx, ecxmov      edx, 100test     eax, eaxmov      eax, edxcmovne   eax, ecxadd      rsp, 32pop      rbxret


Rust详情

Rust的可执行文件被编译器链接之后,进行的执行。它第一步是通过内核态的缺页异常(exc_page_fault)切换,来调用Glibc的_start。然后通过_start调用Rust的main入口。最后启动了整个Rust语言的编译。

编译这块可以大致分为两步,其一对于Rust语言的语法进行加载,分析,变形,这期间形成BB(Basic Block)块,IR中间表象,进行了一定程度的优化之后,通过LLVM后端编译成目标机器码进行运行。这一点与.NET非常相似(当然细节上差异极大,这点是自然的,因为毕竟两种不同的体系),如果有关注本公众号的朋友,可以看下之前文章。唯一不同点,微软的后端机器码生成是JIT来做的,而Rust是LLVM来做的。

下面实际看下,_start作为用户态的入口调用了Glibc:

(lldb) b main(lldb) bt* thread #1, name = 'rustfirstproj', stop reason = breakpoint 1.1  * frame #0: 0x000055555555b850 rustfirstproj`main    frame #1: 0x00007ffff7c29d90 libc.so.6`__libc_start_call_main(main=(rustfirstproj`main), argc=1, argv=0x00007fffffffdf08) at libc_start_call_main.h:58:16    frame #2: 0x00007ffff7c29e40 libc.so.6`__libc_start_main_impl(main=(rustfirstproj`main), argc=1, argv=0x00007fffffffdf08, init=0x00007ffff7ffd040, fini=<unavailable>, rtld_fini=<unavailable>, stack_end=0x00007fffffffdef8) at libc-start.c:392:3    frame #3: 0x000055555555b635 rustfirstproj`_start + 37

Glibc并没有直接到Rust-main的入口,而是调用了lang_start:

(lldb) dirustfirstproj`main:->  0x55555555b850 <+0>:  push   rax    0x55555555b851 <+1>:  mov    rdx, rsi    0x55555555b854 <+4>:  movsxd rsi, edi    0x55555555b857 <+7>:  lea    rdi, [rip - 0x3e]         ; rustfirstproj::main::h2d069b53148117c9    0x55555555b85e <+14>: xor    ecx, ecx    0x55555555b860 <+16>: call   0x55555555b710            ; std::rt::lang_start::hbf501259140f0729    0x55555555b865 <+21>: pop    rcx    0x55555555b866 <+22>: ret    

lang_start源码(调用了lang_start_internal)

//rust-lang/rust/blob/1.80.0/library/std/src/rt.rs#L158#[cfg(not(any(test, doctest)))]#[lang = "start"]fn lang_start<T: crate::process::Termination + 'static>(    main: fn() -> T,    argc: isize,    argv: *const *const u8,    sigpipe: u8,) -> isize {    let Ok(v) = lang_start_internal(        &move || crate::sys_common::backtrace::__rust_begin_short_backtrace(main).report().to_i32(),        argc,        argv,        sigpipe,    );    v}

lang_start_internal(其调用了rust-main入口,可以通过cargo建立的项目分析,单文件不行)

#[cfg(not(test))]fn lang_start_internal(    main: &(dyn Fn() -> i32 + Sync + crate::panic::RefUnwindSafe),    argc: isize,    argv: *const *const u8,    sigpipe: u8,) -> Result<isize, !> {    use crate::{mem, panic};    let rt_abort = move |e| {        mem::forget(e);        rtabort!("initialization or cleanup bug");    };    // Guard against the code called by this function from unwinding outside of the Rust-controlled    // code, which is UB. This is a requirement imposed by a combination of how the    // `#[lang="start"]` attribute is implemented as well as by the implementation of the panicking    // mechanism itself.    //    // There are a couple of instances where unwinding can begin. First is inside of the    // `rt::init`, `rt::cleanup` and similar functions controlled by bstd. In those instances a    // panic is a std implementation bug. A quite likely one too, as there isn't any way to    // prevent std from accidentally introducing a panic to these functions. Another is from    // user code from `main` or, more nefariously, as described in e.g. issue #86030.    // SAFETY: Only called once during runtime initialization.    panic::catch_unwind(move || unsafe { init(argc, argv, sigpipe) }).map_err(rt_abort)?;    let ret_code = panic::catch_unwind(move || panic::catch_unwind(main).unwrap_or(101) as isize)        .map_err(move |e| {            mem::forget(e);            rtabort!("drop of the panic payload panicked");        });    panic::catch_unwind(cleanup).map_err(rt_abort)?;

这里面继续跟踪下去,即来到了main入口

(lldb) bt* thread #1, name = 'hello-rust', stop reason = breakpoint 1.1  * frame #0: 0x000055555555b7b0 hello-rust`hello_rust::main::h90afc128a8411154 at main.rs:1    frame #1: 0x000055555555b76b hello-rust`core::ops::function::FnOnce::call_once::h66ec4e6a79d6f1df((null)=(hello-rust`hello_rust::main::h90afc128a8411154 at main.rs:1), (null)=<unavailable>) at function.rs:250:5    frame #2: 0x000055555555b88e hello-rust`std::sys_common::backtrace::__rust_begin_short_backtrace::h70d836f5ed7ef6e7(f=(hello-rust`hello_rust::main::h90afc128a8411154 at main.rs:1)) at backtrace.rs:155:18    frame #3: 0x000055555555b861 hello-rust`std::rt::lang_start::_$u7b$$u7b$closure$u7d$$u7d$::h2a770df763246f35 at rt.rs:159:18    frame #4: 0x000055555557110d hello-rust`std::rt::lang_start_internal::h63a185b0ddd212e9 [inlined] core::ops::function::impls::_$LT$impl$u20$core..ops..function..FnOnce$LT$A$GT$$u20$for$u20$$RF$F$GT$::call_once::hb84c63630a35bb05 at function.rs:284:13    frame #5: 0x000055555557110a hello-rust`std::rt::lang_start_internal::h63a185b0ddd212e9 [inlined] std::panicking::try::do_call::h8d62108d97b3e028 at panicking.rs:559:40    frame #6: 0x000055555557110a hello-rust`std::rt::lang_start_internal::h63a185b0ddd212e9 [inlined] std::panicking::try::h0c0b9a214b9691f1 at panicking.rs:523:19    frame #7: 0x000055555557110a hello-rust`std::rt::lang_start_internal::h63a185b0ddd212e9 [inlined] std::panic::catch_unwind::ha8912f28da143edb at panic.rs:149:14    frame #8: 0x000055555557110a hello-rust`std::rt::lang_start_internal::h63a185b0ddd212e9 [inlined] std::rt::lang_start_internal::_$u7b$$u7b$closure$u7d$$u7d$::h2baf77487fc7f90d at rt.rs:141:48    frame #9: 0x000055555557110a hello-rust`std::rt::lang_start_internal::h63a185b0ddd212e9 [inlined] std::panicking::try::do_call::h36fdc82521d3343f at panicking.rs:559:40    frame #10: 0x000055555557110a hello-rust`std::rt::lang_start_internal::h63a185b0ddd212e9 [inlined] std::panicking::try::h58aa1415c41e30ec at panicking.rs:523:19    frame #11: 0x000055555557110a hello-rust`std::rt::lang_start_internal::h63a185b0ddd212e9 [inlined] std::panic::catch_unwind::h3c4b5a8c3b1c4acf at panic.rs:149:14    frame #12: 0x000055555557110a hello-rust`std::rt::lang_start_internal::h63a185b0ddd212e9 at rt.rs:141:20    frame #13: 0x000055555555b83a hello-rust`std::rt::lang_start::h1bde715c2df689f0(main=(hello-rust`hello_rust::main::h90afc128a8411154 at main.rs:1), argc=1, argv=0x00007fffffffde98, sigpipe='\0') at rt.rs:158:17    frame #14: 0x000055555555b7fe hello-rust`main + 30    frame #15: 0x00007ffff7c29d90 libc.so.6`__libc_start_call_main(main=(hello-rust`main), argc=1, argv=0x00007fffffffde98) at libc_start_call_main.h:58:16    frame #16: 0x00007ffff7c29e40 libc.so.6`__libc_start_main_impl(main=(hello-rust`main), argc=1, argv=0x00007fffffffde98, init=0x00007ffff7ffd040, fini=<unavailable>, rtld_fini=<unavailable>, stack_end=0x00007fffffffde88) at libc-start.c:392:3    frame #17: 0x000055555555b635 hello-rust`_start + 37

这里的内核态缺页异常切换到用户态的Glibc入口_start,这里似乎有点问题,猜测要通过busybox来构建。

但Go是没有问题的

暂且按下不表。姑且认定它是

DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF,  exc_page_fault);


Go/.NET

Go是不经过Glibc的,它自己通过一套非常底层的汇编库取代了Glibc来进行Go-main的调用。.NET是经过Glibc的,.NET通过Glibc启动了CLR的入口,然后构建MSIL,JIT IR,编译等等,最后才是执行托管的C#代码。Rust经过Glibc,进行了IR构建,调用了LLVM。与.NET非常相似。

往期精彩回顾

Golang入口彻底研究+CMakelist(Clang)+单文件

从.NET9到Rust

从.NET9看Golang


江湖评谈
记录,分享,自由。
 最新文章