From 07239e2c0ba8952fbadcf0e31433d7cfc9ea3e8d Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Thu, 16 Apr 2026 23:37:34 -0400 Subject: [PATCH] arch: add armv7 support --- .github/workflows/rust.yml | 2 + crates/asm/src/arm.rs | 140 +++++++++++++++++++++++++++++++++++++ crates/asm/src/lib.rs | 58 +++++++++++++-- crates/lib/src/cpu.rs | 125 ++++++++++++++++++++++++++------- microfetch/src/main.rs | 25 +++++++ 5 files changed, 318 insertions(+), 32 deletions(-) create mode 100644 crates/asm/src/arm.rs diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 75e3064..c405912 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -32,6 +32,8 @@ jobs: toolchain: nightly - target: powerpc64-unknown-linux-gnu toolchain: nightly + - target: armv7-unknown-linux-gnueabihf + toolchain: stable steps: - name: "Checkout" diff --git a/crates/asm/src/arm.rs b/crates/asm/src/arm.rs new file mode 100644 index 0000000..892b012 --- /dev/null +++ b/crates/asm/src/arm.rs @@ -0,0 +1,140 @@ +//! Syscall implementations for `arm`. + +use super::{StatfsBuf, SysInfo, UtsNameBuf}; + +pub(super) unsafe fn sys_open(path: *const u8, flags: i32) -> i32 { + unsafe { + let ret: i32; + core::arch::asm!( + "svc #0", + in("r7") 5i32, // SYS_open + in("r0") path, + in("r1") flags, + in("r2") 0i32, // mode + lateout("r0") ret, + options(nostack) + ); + ret + } +} + +pub(super) unsafe fn sys_read(fd: i32, buf: *mut u8, count: usize) -> isize { + unsafe { + let ret: i32; + core::arch::asm!( + "svc #0", + in("r7") 3i32, // SYS_read + in("r0") fd, + in("r1") buf, + in("r2") count, + lateout("r0") ret, + options(nostack) + ); + ret as isize + } +} + +pub(super) unsafe fn sys_write(fd: i32, buf: *const u8, count: usize) -> isize { + unsafe { + let ret: i32; + core::arch::asm!( + "svc #0", + in("r7") 4i32, // SYS_write + in("r0") fd, + in("r1") buf, + in("r2") count, + lateout("r0") ret, + options(nostack) + ); + ret as isize + } +} + +pub(super) unsafe fn sys_close(fd: i32) -> i32 { + unsafe { + let ret: i32; + core::arch::asm!( + "svc #0", + in("r7") 6i32, // SYS_close + in("r0") fd, + lateout("r0") ret, + options(nostack) + ); + ret + } +} + +pub(super) unsafe fn sys_uname(buf: *mut UtsNameBuf) -> i32 { + unsafe { + let ret: i32; + core::arch::asm!( + "svc #0", + in("r7") 122i32, // SYS_newuname + in("r0") buf, + lateout("r0") ret, + options(nostack) + ); + ret + } +} + +pub(super) unsafe fn sys_statfs(path: *const u8, buf: *mut StatfsBuf) -> i32 { + unsafe { + let ret: i32; + core::arch::asm!( + "svc #0", + in("r7") 266i32, // SYS_statfs64 + in("r0") path, + in("r1") core::mem::size_of::(), + in("r2") buf, + lateout("r0") ret, + options(nostack) + ); + ret + } +} + +pub(super) unsafe fn sys_sysinfo(info: *mut SysInfo) -> i64 { + unsafe { + let ret: i32; + core::arch::asm!( + "svc #0", + in("r7") 116_i32, // __NR_sysinfo + in("r0") info, + lateout("r0") ret, + options(nostack) + ); + i64::from(ret) + } +} + +pub(super) unsafe fn sys_sched_getaffinity( + pid: i32, + mask_size: usize, + mask: *mut u8, +) -> i32 { + unsafe { + let ret: i32; + core::arch::asm!( + "svc #0", + in("r7") 242i32, // __NR_sched_getaffinity + in("r0") pid, + in("r1") mask_size, + in("r2") mask, + lateout("r0") ret, + options(nostack) + ); + ret + } +} + +pub(super) unsafe fn sys_exit(code: i32) -> ! { + unsafe { + core::arch::asm!( + "svc #0", + in("r7") 1i32, // SYS_exit + in("r0") code, + options(noreturn, nostack) + ); + } +} diff --git a/crates/asm/src/lib.rs b/crates/asm/src/lib.rs index 238cd69..3148f94 100644 --- a/crates/asm/src/lib.rs +++ b/crates/asm/src/lib.rs @@ -5,8 +5,8 @@ //! What do you mean I wasted two whole hours to make the program only 100µs //! faster? //! -//! Supports `x86_64`, `aarch64`, `riscv64`, `loongarch64`, `s390x`, and -//! `powerpc64` architectures. +//! Supports `x86_64`, `aarch64`, `riscv64`, `loongarch64`, `s390x`, +//! `powerpc64`, and `arm` (armv7) architectures. #![no_std] #![cfg_attr(target_arch = "powerpc64", feature(asm_experimental_arch))] @@ -18,11 +18,12 @@ target_arch = "riscv64", target_arch = "loongarch64", target_arch = "s390x", - target_arch = "powerpc64" + target_arch = "powerpc64", + target_arch = "arm" )))] compile_error!( "Unsupported architecture: only x86_64, aarch64, riscv64, loongarch64, \ - s390x, and powerpc64 are supported" + s390x, powerpc64, and arm are supported" ); // Per-arch syscall implementations live in their own module files. @@ -44,6 +45,9 @@ mod arch; #[cfg(target_arch = "powerpc64")] #[path = "powerpc64.rs"] mod arch; +#[cfg(target_arch = "arm")] +#[path = "arm.rs"] +mod arch; /// Copies `n` bytes from `src` to `dest`. /// @@ -277,7 +281,7 @@ pub unsafe fn sys_uname(buf: *mut UtsNameBuf) -> i32 { /// offsets on both architectures. Only the fields needed for disk usage are /// declared; the remainder of the 120-byte struct is covered by `_pad`. #[repr(C)] -#[cfg(not(target_arch = "s390x"))] +#[cfg(not(any(target_arch = "s390x", target_arch = "arm")))] pub struct StatfsBuf { pub f_type: i64, pub f_bsize: i64, @@ -315,6 +319,27 @@ pub struct StatfsBuf { pub _pad: [u32; 5], } +/// on armv7 `statfs64(2)` has 32-bit word fields; see +/// https://github.com/torvalds/linux/blob/v6.19/include/uapi/asm-generic/statfs.h +#[repr(C)] +#[cfg(target_arch = "arm")] +pub struct StatfsBuf { + pub f_type: u32, + pub f_bsize: u32, + pub f_blocks: u64, + pub f_bfree: u64, + pub f_bavail: u64, + pub f_files: u64, + pub f_ffree: u64, + pub f_fsid: [i32; 2], + pub f_namelen: u32, + pub f_frsize: u32, + pub f_flags: u32, + + #[allow(clippy::pub_underscore_fields, reason = "This is not a public API")] + pub _pad: [u32; 4], +} + /// Direct `statfs(2)` syscall /// /// # Returns @@ -384,6 +409,7 @@ pub fn read_file_fast(path: &str, buffer: &mut [u8]) -> Result { /// The layout matches the kernel's `struct sysinfo` *exactly*: /// `mem_unit` ends at offset 108, then 4 bytes of implicit padding to 112. #[repr(C)] +#[cfg(not(target_arch = "arm"))] pub struct SysInfo { pub uptime: i64, pub loads: [u64; 3], @@ -404,6 +430,28 @@ pub struct SysInfo { // needed } +/// on armv7 `__kernel_long_t` is 4 bytes; see +/// https://github.com/torvalds/linux/blob/v6.19/include/uapi/linux/sysinfo.h +#[repr(C)] +#[cfg(target_arch = "arm")] +pub struct SysInfo { + pub uptime: i32, + pub loads: [u32; 3], + pub totalram: u32, + pub freeram: u32, + pub sharedram: u32, + pub bufferram: u32, + pub totalswap: u32, + pub freeswap: u32, + pub procs: u16, + _pad: u16, + pub totalhigh: u32, + pub freehigh: u32, + pub mem_unit: u32, + #[allow(clippy::pub_underscore_fields, reason = "This is not a public API")] + pub _f: [u8; 8], +} + /// Direct `sysinfo(2)` syscall /// /// # Returns diff --git a/crates/lib/src/cpu.rs b/crates/lib/src/cpu.rs index c3e23bd..2b10a32 100644 --- a/crates/lib/src/cpu.rs +++ b/crates/lib/src/cpu.rs @@ -121,24 +121,65 @@ fn parse_num(data: &[u8], i: &mut usize) -> u32 { n } +/// Build `/sys/devices/system/cpu/cpu{n}/cpufreq/cpuinfo_max_freq` into buf, +/// returning the byte length written. +fn format_cpufreq_path(buf: &mut [u8; 64], cpu: u32) -> usize { + const PREFIX: &[u8] = b"/sys/devices/system/cpu/cpu"; + const SUFFIX: &[u8] = b"/cpufreq/cpuinfo_max_freq"; + buf[..PREFIX.len()].copy_from_slice(PREFIX); + let mut i = PREFIX.len(); + let mut tmp = [0u8; 3]; + let mut n = cpu; + let mut digits = 0; + loop { + tmp[digits] = b'0' + (n % 10) as u8; + digits += 1; + n /= 10; + if n == 0 { + break; + } + } + while digits > 0 { + digits -= 1; + buf[i] = tmp[digits]; + i += 1; + } + buf[i..i + SUFFIX.len()].copy_from_slice(SUFFIX); + i + SUFFIX.len() +} + /// Read CPU frequency in MHz. Tries sysfs first, then cpuinfo fields. fn get_cpu_freq_mhz() -> Option { - // Try sysfs cpuinfo_max_freq (in kHz) - let mut buf = [0u8; 32]; - if let Ok(n) = read_file_fast( - "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", - &mut buf, - ) { + // Read cpuinfo_max_freq across all CPUs (in kHz) and take the max so + // heterogeneous (big.LITTLE) topologies report the performance cluster. + let mut max_khz = 0u32; + let mut path = [0u8; 64]; + for cpu in 0u32..64 { + let n = format_cpufreq_path(&mut path, cpu); + let p = match core::str::from_utf8(&path[..n]) { + Ok(s) => s, + Err(_) => continue, + }; + let mut buf = [0u8; 32]; + let Ok(m) = read_file_fast(p, &mut buf) else { + if cpu == 0 { + continue; + } + break; + }; let mut khz = 0u32; - for &b in &buf[..n] { + for &b in &buf[..m] { if b.is_ascii_digit() { khz = khz * 10 + u32::from(b - b'0'); } } - if khz > 0 { - return Some(khz / 1000); + if khz > max_khz { + max_khz = khz; } } + if max_khz > 0 { + return Some(max_khz / 1000); + } // Fall back to cpuinfo fields let mut buf2 = [0u8; 4096]; let n = read_file_fast("/proc/cpuinfo", &mut buf2).ok()?; @@ -175,6 +216,30 @@ fn get_model_name() -> Option { let n = read_file_fast("/proc/cpuinfo", &mut buf).ok()?; let data = &buf[..n]; + let base = extract_name(data)?; + let mut name = base; + if let Some(mhz) = get_cpu_freq_mhz() { + name.push_str(" @ "); + // Round to nearest 0.01 GHz, then split so carries (e.g. 1999 MHz) + // roll into the integer part instead of overflowing the fraction. + let rounded_centesimal = (mhz + 5) / 10; + let ghz_int = rounded_centesimal / 100; + let ghz_frac = rounded_centesimal % 100; + write_u64(&mut name, u64::from(ghz_int)); + name.push('.'); + if ghz_frac < 10 { + name.push('0'); + } + write_u64(&mut name, u64::from(ghz_frac)); + name.push_str(" GHz"); + } + Some(name) +} + +/// Extract a human-readable CPU name. Tries cpuinfo fields first, then +/// falls back to the device-tree `compatible` string on SoCs that don't +/// expose a model through cpuinfo. +fn extract_name(data: &[u8]) -> Option { for key in &[ b"model name" as &[u8], b"Model Name", @@ -187,28 +252,34 @@ fn get_model_name() -> Option { if let Some(val) = extract_field(data, key) { let trimmed = trim(val); if !trimmed.is_empty() { - let mut name = String::from(trimmed); - if let Some(mhz) = get_cpu_freq_mhz() { - name.push_str(" @ "); - // Round to nearest 0.01 GHz, then split so carries (e.g. 1999 MHz) - // roll into the integer part instead of overflowing the fraction. - let rounded_centesimal = (mhz + 5) / 10; - let ghz_int = rounded_centesimal / 100; - let ghz_frac = rounded_centesimal % 100; - write_u64(&mut name, u64::from(ghz_int)); - name.push('.'); - if ghz_frac < 10 { - name.push('0'); - } - write_u64(&mut name, u64::from(ghz_frac)); - name.push_str(" GHz"); - } - return Some(name); + return Some(String::from(trimmed)); } } } + parse_dt_compatible() +} - None +/// Parse the SoC name from `/sys/firmware/devicetree/base/compatible`. +/// The file holds NUL-separated `vendor,model` strings from most-specific +/// (board) to most-generic (SoC); we take the last entry and return just +/// the model portion after the comma. +fn parse_dt_compatible() -> Option { + let mut buf = [0u8; 256]; + let n = read_file_fast("/sys/firmware/devicetree/base/compatible", &mut buf) + .ok()?; + // Drop the terminating NUL so the rposition below locates the entry + // separator rather than the end-of-string marker. + let end = if n > 0 && buf[n - 1] == 0 { n - 1 } else { n }; + let data = &buf[..end]; + let start = data.iter().rposition(|&b| b == 0).map_or(0, |p| p + 1); + let entry = &data[start..]; + let comma = entry.iter().position(|&b| b == b',')?; + let model = core::str::from_utf8(&entry[comma + 1..]).ok()?; + if model.is_empty() { + None + } else { + Some(String::from(model)) + } } /// Extract value of first occurrence of `key` in cpuinfo. diff --git a/microfetch/src/main.rs b/microfetch/src/main.rs index b22f291..7f6a4cb 100644 --- a/microfetch/src/main.rs +++ b/microfetch/src/main.rs @@ -140,6 +140,20 @@ core::arch::global_asm!( " sc", ); +#[cfg(target_arch = "arm")] +#[unsafe(no_mangle)] +#[unsafe(naked)] +unsafe extern "C" fn _start() { + naked_asm!( + "mov r0, sp", // first arg = original sp (argc/argv) + "bic sp, sp, #7", // align sp to 8 bytes (AAPCS) + "bl {entry_rust}", + "mov r7, #1", // SYS_exit + "svc #0", + entry_rust = sym entry_rust, + ); +} + // Global allocator #[global_allocator] static ALLOCATOR: BumpAllocator = BumpAllocator::new(); @@ -193,3 +207,14 @@ const extern "C" fn rust_eh_personality() {} extern "C" fn _Unwind_Resume() -> ! { unsafe { sys_exit(1) } } + +// compiler_builtins emits `.ARM.exidx` entries that reference these even +// with panic=abort. libgcc/libunwind would normally resolve them; we're +// nostdlib, so we stub them. They're never called. +#[cfg(all(not(test), target_arch = "arm"))] +#[unsafe(no_mangle)] +extern "C" fn __aeabi_unwind_cpp_pr0() {} + +#[cfg(all(not(test), target_arch = "arm"))] +#[unsafe(no_mangle)] +extern "C" fn __aeabi_unwind_cpp_pr1() {}