diff --git a/.gitignore b/.gitignore index f8935b427e73c52fd9674b34b300ef73877bcd90..3ee4c55e3d832b1a7a95e2e8812c1a929efbffb1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ *result* nixos-riscv.qcow2 +misc/ +gcroots/ diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000000000000000000000000000000000000..3cdbd5d5931256b88edab73508d3dfe078c366e5 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,6 @@ +build:lagarto-ox: + stage: build + tags: + - nix + script: + - nix develop -L .#lagarto-ox --command fpga/run-remotely.sh fpgalogin1:ci diff --git a/JOURNAL.md b/JOURNAL.md new file mode 100644 index 0000000000000000000000000000000000000000..10275de1493e0c7dd80e187d27486a8ff49c6c34 --- /dev/null +++ b/JOURNAL.md @@ -0,0 +1,5307 @@ +## 2024-07-02 + +Aleix provided some notes on how to trace the kernel using tracepoints that are +printed to the console: + +``` +BOOT TIME TRACING + - compile with CONFIG_BOOTTIME_TRACING=y + - add to kernel parameters: + trace_options=sym-addr trace_event=initcall:* tp_printk trace_buf_size=1M ftrace=function ftrace_filter="vfs*" + trace_options=sym-addr trace_event=sched:* tp_printk trace_buf_size=1M + - tp_printk sends tracepoint (TRACE_EVENT) to printk buffer + - trace_event=[subsytem:event]; accepts + - *: + - :* + - *:* all + - many more options at https://www.kernel.org/doc/html/latest/trace/boottime-trace.html +``` + +Start with: + +``` +trace_options=sym-addr trace_event=sched:* tp_printk trace_buf_size=1M loglevel=7 +``` + +Can it be caused by the D extension? If I set the ISA to: + +``` +riscv,isa = "rv64imaf"; +``` + +It locks the stage1 script without providing any output. Let see trying again. +Locked again. + +I can disable the FPU in the kernel, and then it will act as a detector of any +floating point instruction. + +Then I just need to rebuild busybox without support for double instructions. + +Let see if I can build busybox with double instruction + +Nope, the u-boot is reporting the d extension is in the isa: + +> riscv,isa = "rv64imafd"; + +## 2024-07-03 + +I cannot switch to `gcc.arch = rv64ima` because rust fails to build. + +**Assumption**: The extensions F and D work well and don't cause a hang in the +CPU. + +Let's go back and try to get the initrd shell, so we can systematically hang it +in the `switch_root` + +### OBSERVATION: The riscv-timer seems to be causing interrupts with IRQ 5: + +``` +[ 62.439060] irq_handler_entry: irq=5 name=riscv-timer +[ 62.444980] irq_handler_exit: irq=5 ret=handled +``` + +### OBSERVATION: Rohan reports the serial startup routine being running *after* +the init begins. + +### OBSERVATION: Only interrupts in timer, others are zero. + +With: + + commit 4c656bd8ddd2e41ccaa976ff8d6bd9209175a632 (HEAD -> lagarto-ox) + Author: Rodrigo Arias Mallo + Date: Wed Jul 3 13:21:04 2024 +0200 + + Add busybox patch to see debug lines + + The switch_root command seems to hang in the execv() syscall. + +I can see this: + + ~ # cat /proc/interrupts ; sleep 10 ; cat /proc/interrupts + CPU0 + 10: 42926 RISC-V INTC 5 Edge riscv-timer + IPI0: 0 Rescheduling interrupts + IPI1: 0 Function call interrupts + IPI2: 0 CPU stop interrupts + IPI3: 0 CPU stop (for crash dump) interrupts + IPI4: 0 IRQ work interrupts + IPI5: 0 Timer broadcast interrupts + CPU0 + 10: 46023 RISC-V INTC 5 Edge riscv-timer + IPI0: 0 Rescheduling interrupts + IPI1: 0 Function call interrupts + IPI2: 0 CPU stop interrupts + IPI3: 0 CPU stop (for crash dump) interrupts + IPI4: 0 IRQ work interrupts + IPI5: 0 Timer broadcast interrupts + +### OBSERVATION: There is a timer configured in 0x40170000 but in the device +tree we only have one at `timer@40002000`. + + #define OX_ALVEO_TIMER_BASE 0x40170000 + #define ADDR_TIME_L 0x0u // 32 lower bits of the time register + #define ADDR_TIME_H 0x1u // 32 higher bits of the time register + #define ADDR_TIMECMP_L 0x2u // 32 lower bits of the time comparator + #define ADDR_TIMECMP_H 0x3u // 32 higher bits of the time comparator + + https://gitlab.bsc.es/hwdesign/bsc-linux/-/blob/d6d194bd30d9a8fe49c2a278ffb3c3ae7852e75d/bsc_tree/patches/ox_alveo/opensbi/0001-opensbi-ox_alveo-platform.patch#L63 + +### OBSERVATION: When the serial console starts, the speed of the serial port +changes to 9600: + + [ 6.845400] io scheduler mq-deadline registered + [ 6.851500] io scheduler kyber registered + [ 17.644460] Serial: 8250/16550 driver, 4 ports, IRQ sharing disabled + [ 18.141160] printk: console [ttyS0] disabled + [ 18.229480] 40001000.serial: ttyS0 at MMIO 0x40001000 (irq = 11, base_baud = 3125000) is a 16550 + + *** baud: 230400 *** + + *** baud: 460800 *** + + *** baud: 500000 *** + + *** baud: 576000 *** + + *** baud: 500000 *** + + *** baud: 460800 *** + + *** baud: 230400 *** + + *** baud: 115200 *** + + *** baud: 57600 *** + + *** baud: 38400 *** + + *** baud: 19200 *** + faaa0?xx + +### OBSERVATION: Trying to read from the serial console /dev/ttyS0 causes no +more messages in the console (or a hang). + +### QUESTION: Can we make a heartbeat for the kernel? +The idea is to keep a counter in some memory of the kernel so we can see it from +the host being moved. + +### QUESTION: Can we disable the serial driver 8250 from loading? + + initcall_blacklist= + +I need to know the 8250 init function name: + + drivers/tty/serial/8250/8250_core.c:static int __init serial8250_init(void) + +So... + + initcall_blacklist=serial8250_init + +Yes, but that doesn't seem to do anything. It is hanging: + + + modprobe dm_mod + [ 627.473580] stage-1-init: [Thu Jan 1 00:10:26 UTC 1970] + echo init /nix/store/v6pi2mqfgshxdsbyxlvpm9nvawxrpijv-nixos-system-nixos-riscv-23.11pre-git/init + [ 628.249440] stage-1-init: [Thu Jan 1 00:10:27 UTC 1970] + set -- init /nix/store/v6pi2mqfgshxdsbyxlvpm9nvawxrpijv-nixos-system-nixos-riscv-23.11pre-git/init + [ 629.004840] stage-1-init: [Thu Jan 1 00:10:28 UTC 1970] + stage2Init=/nix/store/v6pi2mqfgshxdsbyxlvpm9nvawxrpijv-nixos-system-nixos-riscv-23.11pre-git/init + [ 629.733920] stage-1-init: [Thu Jan 1 00:10:29 UTC 1970] + echo /nix/store/snvvqpxmryw1szlllk0bxpm37p8vj8sw-extra-utils/bin/modprobe + + +### QUESTION: What happens if we remap the interruptions? + +- Move the serial from 0 to 1 +- Move the plic from 3 to 2 and remove 7 + +Now we have one context only: + + [ 0.000000] riscv-intc: 64 local interrupts mapped + [ 0.000000] plic: plic@40800000: mapped 3 interrupts with 0 handlers for 1 contexts. + [ 0.000000] riscv: providing IPIs using SBI IPI extension + +Rather than two: + + [ 0.000000] riscv-intc: 64 local interrupts mapped + [ 0.000000] plic: plic@40800000: mapped 3 interrupts with 0 handlers for 2 contexts. + [ 0.000000] riscv: providing IPIs using SBI IPI extension + +### QUESTION: What happens if we block the `sbi_ipi` driver? + + initcall_blacklist=sbi_ipi_init + +Nothing, it cannot be disabled it seems. I will remove SMP support so it won't +be compiled in. + +### OBSERVATION: Searching for 'riscv,plic0' only matches irq-sifive-plic driver. + + hut% rg 'riscv,plic0' + Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml + 72: - const: riscv,plic0 + + drivers/irqchip/irq-sifive-plic.c + 572:IRQCHIP_DECLARE(riscv_plic0, "riscv,plic0", plic_init); /* for legacy systems */ + +So it looks that the only driver that setups the plic is the one used by SiFive. +Here is the doc: https://static.dev.sifive.com/U54-MC-RVCoreIP.pdf + +### OBSERVATION: The number of handlers is 0, so there are no interruptions. + +It seems the number next to the phandle of the interrupts-extended attribute in +the plic follows a different convention of values. Using 9 and 11: + + plic: plic@40800000: mapped 3 interrupts with 1 handlers for 2 contexts. + +**Remark**: The key combination to run Magic SysRq using the HVC console is +Ctrl-O and then the SysRq key. It only works it the console is being actively +polled, otherwise it hangs. + +## 2024-07-04 + +### OBSERVATION: I saw they changed this option in Cinco Ranch DTS for the +serial: + +> reg-shift = <0>; // regs are spaced on 8 bit boundary (modified from Xilinx UART16550 to be ns16550 compatible) + +Tested booting with debug1 and the ttyS0 console, and it goes extremely slow +(but still outputs at 115200) and then continues to fail to read keyboard input. + +### QUESTION: Let's try setting the console in poll mode. + +setenv bootargs "root=/dev/ram0 loglevel=7 debug rw earlycon=uart,io,0x40001000,115200n8 boot.trace console=uart,io,0x40001000,115200n8 debug1 init=/nix/store/wavmnv6wjj8y10ha07wxd5f0sqacivj8-nixos-system-nixos-riscv-23.11pre-git/init" + + [ 5.909360] io scheduler mq-deadline registered + [ 5.914900] io scheduler kyber registered + [ 14.405980] Serial: 8250/16550 driver, 4 ports, IRQ sharing disabled + [ 14.856040] 40001000.serial: ttyS0 at MMIO 0x40001000 (irq = 2, base_baud = 3125000) is a 16550 + [ 15.059680] nd_pmem namespace0.0: unable to guarantee persistence of writes + ... + [ 40.250100] clk: Disabling unused clocks + [ 40.256220] Warning: unable to open an initial console. <---- SEE THIS + [ 40.618300] Freeing unused kernel image (initmem) memory: 5592K + [ 40.728300] Checked W+X mappings: passed, no W+X pages found + [ 40.735540] Run /init as init process + [ 40.740080] with arguments: + [ 40.743920] /init + [ 40.746660] with environment: + [ 40.750740] HOME=/ + [ 40.754020] TERM=linux + [ 46.569960] Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000100 + [ 46.578100] CPU: 0 PID: 1 Comm: init Not tainted 6.6.1 #1-NixOS + [ 46.584600] Hardware name: Barcelona Supercomputing Center - Lagarto Ox (NixOS) (DT) + [ 46.592740] Call Trace: + [ 46.595640] [] dump_backtrace+0x38/0x48 + [ 46.601760] [] show_stack+0x50/0x68 + [ 46.607540] [] dump_stack_lvl+0x3c/0x5c + [ 46.613660] [] dump_stack+0x20/0x30 + [ 46.619400] [] panic+0x158/0x374 + [ 46.624900] [] do_exit+0x9e8/0x9f0 + [ 46.630580] [] do_group_exit+0x44/0xb0 + [ 46.636600] [] __wake_up_parent+0x0/0x40 + [ 46.642780] [] do_trap_ecall_u+0x14c/0x168 + [ 46.649140] [] ret_from_exception+0x0/0xac + [ 46.655500] Kernel Offset: 0x0 from 0xffffffff80000000 + [ 46.661160] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000100 ]--- + +Also found: `no_console_suspend` + +### OBSERVATION: There are messages of address space being assigned to +registers: + + Slave segment '/MEEP_uart_0/S_AXI/Reg' is being assigned into address space '/m_axi_uart0' at <0x0000_0000 [ 4K ]>. + Slave segment '/MEEP_uart_1/S_AXI/Reg' is being assigned into address space '/m_axi_uart1' at <0x0000_0000 [ 4K ]>. + +### QUESTION: What happens if I enable `CONFIG_CONSOLE_POLL`? + +With `console=ttyS0,115200n8 debug1` I cannot type. + +### OBSERVATION: I can dump iomem memory with the tool devmem: + +But it seems I cannot dump the registers of the serial io mapped region: + + ~ # cat /proc/iomem + 40001000-400010ff : serial + 60000000-7fffffff : Reserved + 80000000-ffefffff : System RAM + 80201000-81fa0b87 : Kernel image + 80201000-80cb177f : Kernel code + 81400000-819fffff : Kernel rodata + 81c00000-81f18747 : Kernel data + 81f19000-81fa0b87 : Kernel bss + 100000000-1bfffffff : namespace0.0 + + ~ # devmem 0x40001000 + devmem: mmap: Operation not permitted + +It looks like the following options may be required to be disabled to allow +user-space tools read those regions. + + ~ # zcat /proc/config.gz | grep CONFIG_STRICT_DEVMEM + CONFIG_STRICT_DEVMEM=y + ~ # zcat /proc/config.gz | grep CONFIG_IO_STRICT_DEVMEM + CONFIG_IO_STRICT_DEVMEM=y + +Let's try disabling `CONFIG_STRICT_DEVMEM` and see if we can read the serial +registers. + +It works! + + ~ # cat /proc/iomem + 40001000-400010ff : serial + 60000000-7fffffff : Reserved + 80000000-ffefffff : System RAM + 80201000-81fa0b87 : Kernel image + 80201000-80cb159f : Kernel code + 81400000-819fffff : Kernel rodata + 81c00000-81f18707 : Kernel data + 81f19000-81fa0b87 : Kernel bss + 100000000-1bfffffff : namespace0.0 + ~ # devmem 0x40001000 + 0x0000006E + ~ # devmem 0x40001000 + 0x0000006C + ~ # devmem 0x40001000 + 0x00000072 + ~ # devmem 0x40001000 + 0x0000000D + +### OBSERVATION: The interrupt register of the serial console is 0x0: + +Assuming the console registers follow AXI UART 16550, here is the IER: + +> 0x1004 IER R/W Interrupt Enable Register + +Which is zero: + +~ # devmem 0x40001004 +0x00000000 + +The line control register is 0x3: + +~ # devmem 0x4000100C +0x00000003 + +### QUESTION: Can I write to some memory address and see the result from the host? + +For that I would need to find some address that is mapped to the DMA or to the +pmem. Xavi recommended `0x6000_0000` as it is uncached. + +It seems to have some content already: + + ~ # devmem 0x60000000 + 0x00000093 + ~ # devmem 0x60000004 + 0x00000113 + ~ # devmem 0x60000008 + 0x00000193 + ~ # devmem 0x60000010 + 0x00000293 + ~ # devmem 0x60000014 + 0x00000313 + ~ # devmem 0x60000018 + 0x00000393 + +Writing test seems to work: + + ~ # devmem 0x60000000 32 0x11223344 + ~ # devmem 0x60000000 + 0x11223344 + +So, technically we should be using the `/dev/qdma34000-MM-0` device, as we +already use the other one to map the memory. + + [bsc015557@fpgan02 nixos]$ for i in {0..16}; do addr=$(($i * 0x10000000)); \ + printf "addr 0x%09x: " $addr; dd if=/dev/qdma34000-MM-0 count=16 bs=1 skip=$addr 2>/dev/null | xxd; done + addr 0x000000000: 00000000: 4444 4444 4444 4444 4444 4444 4444 4444 DDDDDDDDDDDDDDDD + addr 0x010000000: 00000000: 0000 0000 0000 0000 0000 0000 0000 0000 ................ + addr 0x020000000: 00000000: 3333 3333 3333 3333 3333 3333 3333 3333 3333333333333333 + addr 0x030000000: 00000000: cccc cccc cccc cccc cccc cccc cccc cccc ................ + addr 0x040000000: 00000000: cccc cccc cccc cccc cccc cccc cccc cccc ................ + addr 0x050000000: 00000000: 0000 0000 0000 0000 0000 0000 0000 0000 ................ + addr 0x060000000: 00000000: 0000 0000 0000 0000 0000 0000 0000 0000 ................ + addr 0x070000000: 00000000: 0000 0000 0000 0000 0000 0000 0000 0000 ................ + addr 0x080000000: 00000000: 3333 3333 3333 3333 3333 3333 3333 3333 3333333333333333 + addr 0x090000000: 00000000: 0000 0000 0000 0000 0000 0000 0000 0000 ................ + addr 0x0a0000000: 00000000: 0000 0000 0000 0000 0000 0000 0000 0000 ................ + addr 0x0b0000000: 00000000: 2f2f 2f2f 2f2f 2f2f 2f2f 2f2f 2f2f 2f2f //////////////// + addr 0x0c0000000: 00000000: 6e6e 6e6e 6e6e 6e6e 6e6e 6e6e 6e6e 6e6e nnnnnnnnnnnnnnnn + addr 0x0d0000000: 00000000: 2020 2020 2020 2020 2020 2020 2020 2020 + addr 0x0e0000000: 00000000: 6c6c 6c6c 6c6c 6c6c 6c6c 6c6c 6c6c 6c6c llllllllllllllll + addr 0x0f0000000: 00000000: 6767 6767 6767 6767 6767 6767 6767 6767 gggggggggggggggg + addr 0x100000000: 00000000: 2424 2424 2424 2424 2424 2424 2424 2424 $$$$$$$$$$$$$$$$ + + [bsc015557@fpgan02 nixos]$ for i in {0..16}; do addr=$(($i * 0x10000000)); \ + printf "addr 0x%09x: " $addr; dd if=/dev/qdma34000-MM-1 count=16 bs=1 skip=$addr 2>/dev/null | xxd; done + addr 0x000000000: 00000000: 4444 4444 4444 4444 4444 4444 4444 4444 DDDDDDDDDDDDDDDD + addr 0x010000000: 00000000: 0000 0000 0000 0000 0000 0000 0000 0000 ................ + addr 0x020000000: 00000000: 3333 3333 3333 3333 3333 3333 3333 3333 3333333333333333 + addr 0x030000000: 00000000: cccc cccc cccc cccc cccc cccc cccc cccc ................ + addr 0x040000000: 00000000: cccc cccc cccc cccc cccc cccc cccc cccc ................ + addr 0x050000000: 00000000: 0000 0000 0000 0000 0000 0000 0000 0000 ................ + addr 0x060000000: 00000000: 0000 0000 0000 0000 0000 0000 0000 0000 ................ + addr 0x070000000: 00000000: 0000 0000 0000 0000 0000 0000 0000 0000 ................ + addr 0x080000000: 00000000: 3333 3333 3333 3333 3333 3333 3333 3333 3333333333333333 + addr 0x090000000: 00000000: 0000 0000 0000 0000 0000 0000 0000 0000 ................ + addr 0x0a0000000: 00000000: 0000 0000 0000 0000 0000 0000 0000 0000 ................ + addr 0x0b0000000: 00000000: 2f2f 2f2f 2f2f 2f2f 2f2f 2f2f 2f2f 2f2f //////////////// + addr 0x0c0000000: 00000000: 6e6e 6e6e 6e6e 6e6e 6e6e 6e6e 6e6e 6e6e nnnnnnnnnnnnnnnn + addr 0x0d0000000: 00000000: 2020 2020 2020 2020 2020 2020 2020 2020 + addr 0x0e0000000: 00000000: 6c6c 6c6c 6c6c 6c6c 6c6c 6c6c 6c6c 6c6c llllllllllllllll + addr 0x0f0000000: 00000000: 6767 6767 6767 6767 6767 6767 6767 6767 gggggggggggggggg + addr 0x100000000: 00000000: 2424 2424 2424 2424 2424 2424 2424 2424 $$$$$$$$$$$$$$$$ + +None of the two queues seem to have the value 0x11223344 at any multiple of `0x1000_0000`. + +Let's verify first that this method works. The kernel is loaded here: + + [bsc015557@fpgan02 nixos]$ printf '0x%x\n' $FPGACTL_KERNEL_ADDR + 0x24000000 + +So we should see the same values as the kernel file: + + [bsc015557@fpgan02 nixos]$ dd if=kernel.bin count=16 bs=1 2>/dev/null| xxd + 00000000: 6f00 400d 0000 0000 0000 2000 0000 0000 o.@....... ..... + +But we don't see the same: + + [bsc015557@fpgan02 nixos]$ dd if=/dev/qdma34000-MM-1 count=16 bs=1 skip=$FPGACTL_KERNEL_ADDR 2>/dev/null | xxd + 00000000: 9797 9797 9797 9797 9797 9797 9797 9797 ................ + [bsc015557@fpgan02 nixos]$ dd if=/dev/qdma34000-MM-0 count=16 bs=1 skip=$FPGACTL_KERNEL_ADDR 2>/dev/null | xxd + 00000000: 9797 9797 9797 9797 9797 9797 9797 9797 ................ + + +### QUESTION: Missing forward M to S via Mideleg? + +Can it be happening that he MEDELEG is not forwarding the interruptions to the +Supervisor (kernel)? + + Boot HART MIDELEG : 0x0000000000000222 + Boot HART MEDELEG : 0x000000000000b109 + + +### QUESTION: Can we add a timer to the PLIC to test the interrupts? + + +### OBSERVATION: Here is the PLIC register dump: + + ~ # for i in `seq 0 16`; do addr=$((0x40600000 + $i)); printf '%08x: ' $addr; devmem $addr; done + 40600000: 0x00010002 + 40600001: 0x09000000 + 40600002: 0x00090000 + 40600003: 0x00000900 + 40600004: 0x00010009 + 40600005: 0x00000000 + 40600006: 0x00000000 + 40600007: 0x00000000 + 40600008: 0x00000000 + 40600009: 0x00000000 + 4060000a: 0x00000000 + 4060000b: 0x00000000 + 4060000c: 0x00000000 + 4060000d: 0x00000000 + 4060000e: 0x00000000 + 4060000f: 0x00000000 + 40600010: 0x00000000 + +### QUESTION: Can we boot with the new bitstream that includes the second UART? + +The interruptions are enabled for the UART 1, not the default UART 0. + + +### OBSERVATION: I'm using 0x100 not 0x1000 in the serial range: + + reg = <0x0 0x40003000 0x0 0x100>; + reg = <0x0 0x40003000 0x0 0x1000>; + +Can this produce any problem? + +It doesn't seem to change anything, still unable to send any bytes. + + +### QUESTION: Can we use virtio to mount a FS in the DMA shared memory? + + + +## 2024-07-05 + +### OBSERVATION: The kernel continues working when the console hangs. + +Switching to 0x100000000 as 0x60000000 shows: + + ~ # devmem 0x6000000 + 0xBADCAB1E + +With the following loop: + + ~ # i=0; while [ 1 ]; do let i=$i+1; devmem 0x100000000 32 $i; done & + ~ # cat /dev/ttyS0 + (hangs) + +Shows the kernel works: + + [bsc015557@fpgan02 nixos]$ while [ 1 ]; do xxd -s $((0x100000000 - 0x60000000)) -l 4 /dev/qdma34000-MM-1; sleep 0.2; done + ... + a0000000: 6400 0000 d... + a0000000: 6500 0000 e... + a0000000: 6600 0000 f... + a0000000: 6700 0000 g... + a0000000: 6800 0000 h... + a0000000: 6900 0000 i... + + +### QUESTION: Can we reproduce it with `switch_root`? + +For that I would have to ensure the process continues to operate, even if we +exit the console. Maybe I can make a double fork? + +I cannot use `0x1000_0000` as that is where the pmem will be. But I can try to +use an address in the end, as we are not filling the whole space. + + [0x1_0000_0000, 0x1_c000_0000) -> PMEM (3072 MiB) + +Maybe `0x1_bfff_0000`? Let's try first from the initrd shell. + + i=0; while [ 1 ]; do let i=$i+1; devmem 0x1bfff0000 32 $i; done & + +Then + + while [ 1 ]; do xxd -s $((0x1bfff0000 - 0x60000000)) -l 4 /dev/qdma34000-MM-1; sleep 0.2; done + +Yes, it seems to be working. Let's load the rootfs too. + +I added a loop in the stage1 script. +### QUESTION: Can we see any clock in memory? + +This will allow us to check if the AXI still works. + +### OBSERVATION: The kernel stops updating the counter in the mount phase. + +Managed to reach the mount and hang there: + + [ 337.504740] stage-1-init: [Thu Jan 1 00:05:36 UTC 1970] + '[' -d + /dev/disk/by-label/NIXOS_SD ] + [ 338.284560] stage-1-init: [Thu Jan 1 00:05:37 UTC 1970] + mkdir -m 0755 -p + /mnt-root/ + [ 339.017420] stage-1-init: [Thu Jan 1 00:05:38 UTC 1970] + local 'n=0' + [ 339.752560] stage-1-init: [Thu Jan 1 00:05:39 UTC 1970] + true + [ 340.488960] stage-1-init: [Thu Jan 1 00:05:39 UTC 1970] + mount /mnt-root/ + +After almost 6 minutes, with 571 beats: + + $ xxd -s 5905514496 -l 4 /dev/qdma34000-MM-1 + 5fff0000: 3b02 0000 ;... + +It looks like the kernel is the one getting stuck *or* at least is unable to +propagate the heartbeat changes to the host. It would be nice to monitor a +hardware clock from the DMA region too, so we can discard problems in the AXI. + + +### OBSERVATION: There is an ioctl failed for /dev/console + + [ 177.009540] stage-1-init: [Thu Jan 1 00:02:56 UTC 1970] + udevadm settle + + kbd_mode -u -C /dev/console + kbd_mode: KDSKBMODE: Inappropriate ioctl for device + + printf '\033%%G' + + loadkmap + [ 266.301040] stage-1-init: [Thu Jan 1 00:04:25 UTC 1970] + kbd_mode -u -C /dev/console + + +### ASSUMPTION: The kernel hangs. + +If the kernel hangs, there must be an instruction or sequence of instructions +that causes it. First I need to determine what is being executed by the kernel. +For that I could use `ftrace` to see which program is running at the time it +hangs. + + trace_options=sym-addr trace_event=initcall:* tp_printk trace_buf_size=1M + + (prev_comm != 2 && next_comm != 2) + +So, we can just enable the `tp_printk` but not the tracer. Then in the initrd +script, I enable the function tracer and the filter. + + +### OBSERVATION: It takes a long time to init the pty: + +Interesting timing: + + [ 12.612620] initcall_start: func=pty_init+0x0/0x3f4 + [ 20.962640] initcall_finish: func=pty_init+0x0/0x3f4 ret=0 + + +### OBSERVATION: The kcompactd0 daemon is using the CPU: + + [ 290.394920] sched_switch: prev_comm=devmem prev_pid=129 prev_prio=120 prev_state=R ==> next_comm=init next_pid=69 next_prio=120 + [ 290.408160] sched_switch: prev_comm=init prev_pid=69 prev_prio=120 prev_state=R ==> next_comm=tee next_pid=68 next_prio=120 + [ 290.420720] sched_switch: prev_comm=tee prev_pid=68 prev_prio=120 prev_state=R+ ==> next_comm=ksoftirqd/0 next_pid=12 next_prio=120 + [ 290.433960] sched_switch: prev_comm=ksoftirqd/0 prev_pid=12 prev_prio=120 prev_state=R ==> next_comm=init next_pid=1 next_prio=120 + [ 290.447100] sched_switch: prev_comm=init prev_pid=1 prev_prio=120 prev_state=R ==> next_comm=kcompactd0 next_pid=22 next_prio=120 + [ 290.460180] sched_switch: prev_comm=kcompactd0 prev_pid=22 prev_prio=120 prev_state=R ==> next_comm=khvcd next_pid=31 next_prio=120 + [ 290.473400] sched_switch: prev_comm=khvcd prev_pid=31 prev_prio=120 prev_state=R ==> next_comm=kworker/u2:2 next_pid=19 next_prio=120 + [ 290.486960] sched_switch: prev_comm=kworker/u2:2 prev_pid=19 prev_prio=120 prev_state=R ==> next_comm=khungtaskd next_pid=18 next_prio=120 + [ 290.500800] sched_switch: prev_comm=khungtaskd prev_pid=18 prev_prio=120 prev_state=R ==> next_comm=kworker/0:1 next_pid=13 next_prio=120 + [ 290.514560] sched_switch: prev_comm=kworker/0:1 prev_pid=13 prev_prio=120 prev_state=R ==> next_comm=tee next_pid=68 next_prio=120 + [ 290.527720] sched_switch: prev_comm=tee prev_pid=68 prev_prio=120 prev_state=R+ ==> next_comm=init next_pid=69 next_prio=120 + [ 290.540360] sched_switch: prev_comm=init prev_pid=69 prev_prio=120 prev_state=R ==> next_comm=devmem next_pid=129 next_prio=120 + [ 290.553280] sched_switch: prev_comm=devmem prev_pid=129 prev_prio=120 prev_state=R ==> next_comm=ksoftirqd/0 next_pid=12 next_prio=120 + [ 290.566780] sched_switch: prev_comm=ksoftirqd/0 prev_pid=12 prev_prio=120 prev_state=R ==> next_comm=kcompactd0 next_pid=22 next_prio=120 + [ 290.580500] sched_switch: prev_comm=kcompactd0 prev_pid=22 prev_prio=120 prev_state=R ==> next_comm=init next_pid=1 next_prio=120 + [ 290.593740] sched_switch: prev_comm=init prev_pid=1 prev_prio=120 prev_state=R ==> next_comm=khvcd next_pid=31 next_prio=120 + [ 290.606340] sched_switch: prev_comm=khvcd prev_pid=31 prev_prio=120 prev_state=R ==> next_comm=kworker/u2:2 next_pid=19 next_prio=120 + [ 290.619780] sched_switch: prev_comm=kworker/u2:2 prev_pid=19 prev_prio=120 prev_state=R ==> next_comm=khungtaskd next_pid=18 next_prio=120 + [ 290.633620] sched_switch: prev_comm=khungtaskd prev_pid=18 prev_prio=120 prev_state=R ==> next_comm=tee next_pid=68 next_prio=120 + [ 290.646700] sched_switch: prev_comm=tee prev_pid=68 prev_prio=120 prev_state=R+ ==> next_comm=init next_pid=69 next_prio=120 + [ 290.659320] sched_switch: prev_comm=init prev_pid=69 prev_prio=120 prev_state=R ==> next_comm=kworker/0:1 next_pid=13 next_prio=120 + [ 290.672560] sched_switch: prev_comm=kworker/0:1 prev_pid=13 prev_prio=120 prev_state=R ==> next_comm=devmem next_pid=129 next_prio=120 + [ 290.686080] sched_switch: prev_comm=devmem prev_pid=129 prev_prio=120 prev_state=R ==> next_comm=ksoftirqd/0 next_pid=12 next_prio=120 + [ 290.699720] sched_switch: prev_comm=ksoftirqd/0 prev_pid=12 prev_prio=120 prev_state=R ==> next_comm=init next_pid=1 next_prio=120 + [ 290.712880] sched_switch: prev_comm=init prev_pid=1 prev_prio=120 prev_state=R ==> next_comm=khvcd next_pid=31 next_prio=120 + [ 290.725500] sched_switch: prev_comm=khvcd prev_pid=31 prev_prio=120 prev_state=R ==> next_comm=kcompactd0 next_pid=22 next_prio=120 + + +### QUESTION: Can we reproduce this hang with 6.9.7? + +Disabling clang as it is failing to build: + + hut% nix develop '.#lagarto-ox' + error: builder for '/nix/store/x1nfa792pv28px70kvfakm3aalcfbdyw-clang-epi-479518d.drv' failed with exit code 2; + last 10 log lines: + > | ^~~~~~~~~~~~~~~ + > 2 errors generated. + > make[2]: *** [lib/Support/CMakeFiles/LLVMSupport.dir/build.make:1868: lib/Support/CMakeFiles/LLVMSupport.dir/Signals.cpp.o] Error 1 + > make[2]: *** Waiting for unfinished jobs.... + > [ 9%] Built target obj.clang-tblgen + > 1 warning generated. + > make[1]: *** [CMakeFiles/Makefile2:9468: lib/Support/CMakeFiles/LLVMSupport.dir/all] Error 2 + > make[1]: *** Waiting for unfinished jobs.... + > [ 9%] Built target obj.llvm-tblgen + > make: *** [Makefile:156: all] Error 2 + For full logs, run 'nix log /nix/store/x1nfa792pv28px70kvfakm3aalcfbdyw-clang-epi-479518d.drv'. + error: 1 dependencies of derivation '/nix/store/m54sxxyi3cg062djrcddcawp10z7r49l-riscv64-unknown-linux-gnu-clang-epi-wrapper-479518d.drv' failed to build + error: 1 dependencies of derivation '/nix/store/yl26dbqqj0snl807c0wjabg4dpbq5gvp-stdenv-linux.drv' failed to build + error: 1 dependencies of derivation '/nix/store/h6180fcl30kqy3apaqjsbkkik2p1spmr-rvb-riscv64-unknown-linux-gnu-da202d6.drv' failed to build + error: 1 dependencies of derivation '/nix/store/b13shgqj7128rdsdzzp4qicqbzl0wnfw-system-path.drv' failed to build + error: 1 dependencies of derivation '/nix/store/6qghlihqcyg6155309ldj5xm9m0v835i-nixos-system-nixos-riscv-24.11pre-git.drv' failed to build + error: 1 dependencies of derivation '/nix/store/l2x18cih29r1kn6vi8imwhkyk98yhw4i-nix-shell-riscv64-unknown-linux-gnu-env.drv' failed to build + + +### QUESTION: Missing cache information may affect? + +Other CPUs report the cache details in the DT. For example this one +https://github.com/torvalds/linux/blob/master/arch/riscv/boot/dts/sifive/fu540-c000.dtsi#L45 + + cpu1: cpu@1 { + compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; + d-cache-block-size = <64>; + d-cache-sets = <64>; + d-cache-size = <32768>; + d-tlb-sets = <1>; + d-tlb-size = <32>; + device_type = "cpu"; + i-cache-block-size = <64>; + i-cache-sets = <64>; + i-cache-size = <32768>; + i-tlb-sets = <1>; + i-tlb-size = <32>; + mmu-type = "riscv,sv39"; + reg = <1>; + riscv,isa = "rv64imafdc"; + riscv,isa-base = "rv64i"; + riscv,isa-extensions = "i", "m", "a", "f", "d", "c", "zicntr", "zicsr", + "zifencei", "zihpm"; + tlb-split; + next-level-cache = <&l2cache>; + cpu1_intc: interrupt-controller { + #interrupt-cells = <1>; + compatible = "riscv,cpu-intc"; + interrupt-controller; + }; + }; + +We may want to add it to our DT to be sure that it has no effect. +### OBSERVATION: Arrived to stage 2! + + + kill -9 74 + + readlink /proc/75/exe + [ 374.961120] stage-1-init: [Thu Jan 1 00:06:14 UTC 1970] + test 0 -ge 8 + + '[' 75 -eq 1 ] + + kill -9 75 + + readlink /proc/102/exe + + '[' 102 -eq 1 ] + + kill -9 102 + + readlink /proc/137/exe + + continue + + readlink /proc/674/exe + + continue + + readlink /proc/675/exe + + continue + + test -n + + echo /sbin/modprobe + + '[' '!' -e /mnt-root//nix/store/xwqaqpc66ijvay9wxnm5nqmi30f2lp1i-nixos-system-nixos-riscv-24.11pre-git/init ] + + mkdir -m 0755 -p /mnt-root/proc /mnt-root/sys /mnt-root/dev /mnt-root/run + + mount --move /proc /mnt-root/proc + + mount --move /sys /mnt-root/sys + + mount --move /dev /mnt-root/dev + + mount --move /run /mnt-root/run + + type -P switch_root + + exec env -i /nix/store/988axh0bq3wqp90gms4b4a0hkfwvkd3i-extra-utils/bin/switch_root /mnt-root /nix/store/xwqaqpc66ijvay9wxnm5nqmi30f2lp1i-nixos-system-nixos-riscv-24.11pre-git/init + + <<< NixOS Stage 2 >>> + + [ 384.203680] EXT4-fs (pmem0p2): re-mounted 44444444-4444-4444-8888-888888888888 r/w. Quota mode: none. + [ 384.287600] booting system configuration /nix/store/xwqaqpc66ijvay9wxnm5nqmi30f2lp1i-nixos-system-nixos-riscv-24.11pre-git + running activation script... + [ 388.163860] stage-2-init: running activation script... + [ 391.643500] random: perl: uninitialized urandom read (4 bytes read) + [ 391.884800] random: perl: uninitialized urandom read (4 bytes read) + [ 425.302000] random: perl: uninitialized urandom read (4 bytes read) + +But then it hangs. + + +## 2024-07-08 + +### QUESTION: Who sets the plic interrupts? + +Shouldn't OpenSBI read the DT and do some configuration in the PLIC while in +machine mode? + + +### OBSERVATION: Semi-stack trace from CincoRanch + + hvc_remove? + console_unlock <-- only called from hvc_remove() + prb_read_valid + desc_read_finalized_seq + __memcpy (multiple times) + get_data + do_trap_break + report_bug + is_valid_bugaddr + copy_from_kernel_nofault + copy_from_kernel_nofault_allowed + find_bug + _printk + vprintk + vprintk_default + vprintk_emit + vprintk_store + sched_clock + vsnprintf + format_decode + __memcpy + printk_parse_prefix + prb_reserve + do_page_fault + fixup_exception + search_exception_tables + search_extable + cmp_ex_search (multiple times) + search_module_extables + __module_address + no_context.part.0 + die_kernel_fault <-- last frame(?) + + +### QUESTION: Can we place a trace point in `hvc_remove`? + +If we are getting stuck in the same place, we should be able to see the +backtrace (assuming the console still works) just before we try to remove the +console device. + +Placed, but still unable to see anything in any hang. Here is a hang in the +Stage 2: + + <<< NixOS Stage 2 >>> + + [ 404.158340] EXT4-fs (pmem0p2): re-mounted 44444444-4444-4444-8888-888888888888 r/w. Quota mode: none. + [ 404.242500] booting system configuration /nix/store/0za1vqh5alk7mxqs59qxx8izmwmf21w6-nixos-system-nixos-riscv-24.11pre-git + running activation script... + [ 408.148380] stage-2-init: running activation script... + [ 411.612240] random: perl: uninitialized urandom read (4 bytes read) + [ 411.866440] random: perl: uninitialized urandom read (4 bytes read) + [ 447.588880] random: perl: uninitialized urandom read (4 bytes read) + +Still, it may be hang in a similar way, causing a loop of page faults just +while trying to printk to the console, which would explain why we don't see +anything and why the heartbeat stops. + + + +Fran has created another bitstream with two consoles enabled +(`ox_u55c_a234c132.bit`), let see if I manage to boot with it. + +First I will need to enable the consoles on the DTS. + +It doesn't seem to produce any output in the UART. I cannot see OpenSBI while +loading it on each baud rate: + + Type [C-a] [C-h] to see available commands + Terminal ready + + *** baud: 57600 *** + + *** baud: 38400 *** + + *** baud: 19200 *** + + *** baud: 9600 *** + + [...] + + *** baud: 230400 *** + + *** baud: 460800 *** + +Let's keep the bitstream files in a repository, so I can carefully track them +with git too. + +### OBSERVATION: The new bitstream requires a bootrom to start + +I added it to the bitstream repository, as it is a binary blob too. Now I need +to update the load addresses: + +https://gitlab.bsc.es/hwdesign/fpga/integration-lab/fpga-tools/-/blob/6a63bcea6d1d59df3c7d62311aa4935efd54d3a3/boot_riscv/boot_sa.sh#L36-40 + +Continues to hang just after those perl messages: + + + mount --move /proc /mnt-root/proc + + mount --move /sys /mnt-root/sys + + mount --move /dev /mnt-root/dev + + mount --move /run /mnt-root/run + + type -P switch_root + + exec env -i /nix/store/988axh0bq3wqp90gms4b4a0hkfwvkd3i-extra-utils/bin/switch_root /mnt-root /nix/store/0za1vqh5alk7mxqs59qxx8izmwmf21w6-nixos-system-nixos-riscv-24.11pre-git/init + + <<< NixOS Stage 2 >>> + + [ 541.559320] EXT4-fs (pmem0p2): re-mounted 44444444-4444-4444-8888-888888888888 r/w. Quota mode: none. + [ 541.641280] booting system configuration /nix/store/0za1vqh5alk7mxqs59qxx8izmwmf21w6-nixos-system-nixos-riscv-24.11pre-git + running activation script... + [ 545.569700] stage-2-init: running activation script... + [ 549.019380] random: perl: uninitialized urandom read (4 bytes read) + [ 549.274940] random: perl: uninitialized urandom read (4 bytes read) + +### QUESTION: What happens if we disable `CONFIG_BUG` + +May be a long shot, but if we are experiencing the same page fault loop as in +cincoranch we may as well try. + +## 2024-07-09 + +### QUESTION: Maybe we can try without out-of-order? + +I made a small tool in C to view and change the CSR register that controls the +in-order/out-of-order. Maybe we can try with the "in-order" setting. + +We arrive to execute `systemd`: + + + Starting interactive shell... + + setsid /nix/store/xm3mpj9aldz5r4s5yb7p08jdjv98hj4w-extra-utils/bin/ash -c 'exec /nix/store/xm3mpj9aldz5r4s5yb7p08jdjv98hj4w-extra-utils/bin/ash < /dev/hvc0 >/dev/hvc0 2>/dev/hvc0' + [ 90.077300] stage-1-init: [Thu Jan 1 00:01:27 UTC 1970] + '[' -n 1 -a i '=' f ] + [ 90.639760] stage-1-init: [Thu Jan 1 00:01:28 UTC 1970] + '[' -n 1 -a i '=' i ] + ~ # [ 90.967260] stage-1-init: [Thu Jan 1 00:01:28 UTC 1970] + echo 'Starting interactive shell...' + [ 91.234980] stage-1-init: [Thu Jan 1 00:01:28 UTC 1970] Starting interactive shell... + [ 91.569580] stage-1-init: [Thu Jan 1 00:01:29 UTC 1970] + setsid /nix/store/xm3mpj9aldz5r4s5yb7p08jdjv98hj4w-extra-utils/bin/ash -c 'exec /nix/store/xm3mpj9aldz5r4s5yb7p08jdjv98hj4w-extr + a-utils/bin/ash < /dev/hvc0 >/dev/hvc0 2>/dev/hvc0' + which csrtool + /nix/store/xm3mpj9aldz5r4s5yb7p08jdjv98hj4w-extra-utils/bin/csrtool + ~ # csrtool + CSR 0x801 = 0u + ~ # csrtool o + unknown 'o', use: mem-in-order, all-in-order or all-out-of-order + ~ # csrtool all-in-order + CSR 0x801 = 7u + ~ # csrtool + CSR 0x801 = 7u + ~ # + + IFS='=' + + echo init /nix/store/xmagm60y90pfh3yvqanvmaswa0m3cb0a-nixos-system-nixos-riscv-24.11pre-git/init + + set -- init /nix/store/xmagm60y90pfh3yvqanvmaswa0m3cb0a-nixos-system-nixos-riscv-24.11pre-git/init + + stage2Init=/nix/store/xmagm60y90pfh3yvqanvmaswa0m3cb0a-nixos-system-nixos-riscv-24.11pre-git/init + + echo /nix/store/xm3mpj9aldz5r4s5yb7p08jdjv98hj4w-extra-utils/bin/modprobe + + basename dm_mod + [...] + + echo /sbin/modprobe + + '[' '!' -e /mnt-root//nix/store/xmagm60y90pfh3yvqanvmaswa0m3cb0a-nixos-system-nixos-riscv-24.11pre-git/init ] + + mkdir -m 0755 -p /mnt-root/proc /mnt-root/sys /mnt-root/dev /mnt-root/run + + mount --move /proc /mnt-root/proc + + mount --move /sys /mnt-root/sys + + mount --move /dev /mnt-root/dev + + mount --move /run /mnt-root/run + + type -P switch_root + + exec env -i /nix/store/xm3mpj9aldz5r4s5yb7p08jdjv98hj4w-extra-utils/bin/switch_root /mnt-root /nix/store/xmagm60y90pfh3yvqanvmaswa0m3cb0a-nixos-system-nixos-riscv-24.11pre-git/init + + <<< NixOS Stage 2 >>> + + [ 967.703320] EXT4-fs (pmem0p2): re-mounted 44444444-4444-4444-8888-888888888888 r/w. Quota mode: none. + [ 967.928020] booting system configuration /nix/store/xmagm60y90pfh3yvqanvmaswa0m3cb0a-nixos-system-nixos-riscv-24.11pre-git + running activation script... + [ 977.608980] stage-2-init: running activation script... + bbbbbbbbbbbbbbbbbbbbsetting up /etc... + [ 1084.376420] stage-2-init: setting up /etc... + starting systemd... + +Not sure if it is a good reproducer, as it is taking around 15 minutes to hang +in a very large piece of software, while when we have the out-of-order enabled, +we can hang in half of the time in some script. + +Either way, we need to see a backtrace of where it is hanging to understand why +it does. We may also enable a stage-2 heartbeat to be sure that it is hanging +the kernel and not only the console. + +Another idea is to arrive at a proper bash shell, where we can have debugging +tools, which may allow us to go slowly until we catch the bug. + +### OBSERVATION: Setting memory in-order only causes a hang + +Tested with: + + $ csrtool mem-in-order + +And it hangs just after exiting the tool. + +### QUESTION: Can we see the printk buffer from the host? + +If the problem that we are observing is somehow related to the recursive +segfault of the kernel in Cincoranch, we may be able to see the printk ring +buffer by directly poking at the memory from the host. + +### QUESTION: Can we crash the CPU by exercising the memory? + +I did a small tool `memtool` that performs allocations and +deallocations. + +Good news, the tool has hang the console (potentially the kernel too). + + ~ # which memtool + /nix/store/amj11aclwx62d4mnvkhdgj19kq5gjb9y-extra-utils/bin/memtool + ~ # memtool + iter 0, nblocks 1, nbytes 0.1M (A) + iter 1, nblocks 2, nbytes 3.5M (A) + iter 2, nblocks 3, nbytes 3.8M (A) + iter 3, nblocks 4, nbytes 5.7M (A) + iter 4, nblocks 5, nbytes 6.8M (A) + iter 5, nblocks 6, nbytes 10.7M (A) + iter 6, nblocks 5, nbytes 10.6M (D) + iter 7, nblocks 6, nbytes 13.9M (A) + iter 8, nblocks 7, nbytes 16.4M (A) + iter 9, nblocks 8, nbytes 19.8M (A) + iter 10, nblocks 9, nbytes 21.2M (A) + iter 11, nblocks 10, nbytes 24.3M (A) + iter 12, nblocks 11, nbytes 27.4M (A) + iter 13, nblocks 12, nbytes 28.3M (A) + iter 14, nblocks 13, nbytes 31.6M (A) + iter 15, nblocks 12, nbytes 28.2M (D) + iter 16, nblocks 13, nbytes 29.2M (A) + iter 17, nblocks 14, nbytes 30.8M (A) + iter 18, nblocks 15, nbytes 32.6M (A) + iter 19, nblocks 16, nbytes 32.8M (A) + iter 20, nblocks 17, nbytes 36.8M (A) + iter 21, nblocks 18, nbytes 39.6M (A) + iter 22, nblocks 19, nbytes 41.1M (A) + iter 23, nblocks 20, nbytes 44.1M (A) + iter 24, nblocks 21, nbytes 46.9M (A) + iter 25, nblocks 20, nbytes 46.5M (D) + iter 26, nblocks 21, nbytes 50.2M (A) + iter 27, nblocks 22, nbytes 53.8M (A) + +Let's see if we can reproduce it again in the same position. + + ~ # memtool + iter 0, nblocks 1, nbytes 0.1M (A) + iter 1, nblocks 2, nbytes 3.5M (A) + iter 2, nblocks 3, nbytes 3.8M (A) + iter 3, nblocks 4, nbytes 5.7M (A) + iter 4, nblocks 5, nbytes 6.8M (A) + iter 5, nblocks 6, nbytes 10.7M (A) + iter 6, nblocks 5, nbytes 10.6M (D) + iter 7, nblocks 6, nbytes 13.9M (A) + iter 8, nblocks 7, nbytes 16.4M (A) + iter 9, nblocks 8, nbytes 19.8M (A) + iter 10, nblocks 9, nbytes 21.2M (A) + iter 11, nblocks 10, nbytes 24.3M (A) + iter 12, nblocks 11, nbytes 27.4M (A) + iter 13, nblocks 12, nbytes 28.3M (A) + iter 14, nblocks 13, nbytes 31.6M (A) + iter 15, nblocks 12, nbytes 28.2M (D) + iter 16, nblocks 13, nbytes 29.2M (A) + iter 17, nblocks 14, nbytes 30.8M (A) + iter 18, nblocks 15, nbytes 32.6M (A) + iter 19, nblocks 16, nbytes 32.8M (A) + iter 20, nblocks 17, nbytes 36.8M (A) + iter 21, nblocks 18, nbytes 39.6M (A) + iter 22, nblocks 19, nbytes 41.1M (A) + iter 23, nblocks 20, nbytes 44.1M (A) + iter 24, nblocks 21, nbytes 46.9M (A) + iter 25, nblocks 20, nbytes 46.5M (D) + iter 26, nblocks 21, nbytes 50.2M (A) + iter 27, nblocks 22, nbytes 53.8M (A) + +Let's make it automatic, so we only need to boot and confirm that it +hangs. Just in case we can make it not hang by a miracle. + + +With blocks of up to 64K we hang in the ~50 M region. + + iter=2042 nblocks=1577 allocated=50458K (A) + iter=2043 nblocks=1578 allocated=50489K (A) + iter=2044 nblocks=1579 allocated=50550K (A) + iter=2045 nblocks=1580 allocated=50605K (A) + +With blocks of maxsize=512K it also hangs around ~57M. + + memtool v1.0.0 maxsize=512K + iter=0 nblocks=1 allocated=88K (A) + iter=1 nblocks=2 allocated=464K (A) + ... + iter=275 nblocks=218 allocated=56674K (A) + iter=276 nblocks=219 allocated=56787K (A) + iter=277 nblocks=220 allocated=57252K (A) + iter=278 nblocks=221 allocated=57493K (A) + iter=279 nblocks=222 allocated=57581K (A) + iter=280 nblocks=221 allocated=57416K (D) + iter=281 nblocks=222 allocated=57521K (A) + +Maybe there is a problem in the memory segment? Can we reduce it to 1 GiB only +and see if it has any effect? + + iter=289 nblocks=228 allocated=58636K (A) + iter=290 nblocks=227 allocated=58412K (D) + iter=291 nblocks=228 allocated=58480K (A) + iter=292 nblocks=229 allocated=58599K (A) + +Has changed, but not much. + + aaaiter=291 nblocks=228 allocated=58480K (A) + allocating... + filling... + aaaaaaaaiter=292 nblocks=229 allocated=58599K (A) + allocating... + aafilling... + +It seems to be getting stuck in the filling phase. Can we trace it down with +ftrace? It should be generating page faults. + +### 2024-07-10 + +So, if we manage to crash in the filling phase, we can further pinpoint the +issue and remove any effect of `malloc()`. It would be only related to a page +fault and the MMU at this point. + +Let's make a much simpler program that only allocates once a buffer of N bytes +and then begins filling it, printing the progress in the output. + +### OBSERVATION: Writing to a vector also stops around 58 MiB + + + memtool fill 536870912 + memtool v0.0.1 - Rodrigo Arias Mallo + mode fill: nbytes=512M, n=134217728 + written=0K, addr=0x3f9b800020 OK + written=4096K, addr=0x3f9bc00020 OK + written=8192K, addr=0x3f9c000020 OK + written=12288K, addr=0x3f9c400020 OK + written=16384K, addr=0x3f9c800020 OK + written=20480K, addr=0x3f9cc00020 OK + written=24576K, addr=0x3f9d000020 OK + written=28672K, addr=0x3f9d400020 OK + written=32768K, addr=0x3f9d800020 OK + written=36864K, addr=0x3f9dc00020 OK + written=40960K, addr=0x3f9e000020 OK + written=45056K, addr=0x3f9e400020 OK + written=49152K, addr=0x3f9e800020 OK + written=53248K, addr=0x3f9ec00020 OK + written=57344K, addr=0x3f9f000020 OK + +Let's see if we can trace the page fault. + +In today's meeting, Jonnatan suggests test the memtool program with all-in-order +configuration. I should also try to reproduce this hang with a "production" +bitstream (from master). + +Also, Xavi reports that the memtool chain test continued to run until the memory +was exhausted using an old bistream. + +Let's do the quick CSR test first, and then we go back to the ftrace testing, +which will take more time. + +Here are the commands I was testing, but nothing comes out of the console, even +after booting with the `tp_printk trace_buf_size=1M` boot options: + + mkdir -p /sys/kernel/debug/ + mount -t debugfs none /sys/kernel/debug/ + td=/sys/kernel/debug/tracing + echo nop > $td/current_tracer + echo 100 > $td/max_graph_depth + echo do_page_fault > $td/set_graph_function + echo function_graph > $td/current_tracer + +### OBSERVATION: Using all-in-order causes the hang in the same place + +After setting the CSR 0x801 register to 0x7, the all-in-order configuration, the +memtool fill tests continues to hang in the same position: + + + csrtool all-in-order + CSR 0x801 = 7u + + memtool fill 536870912 + memtool v0.0.1 - Rodrigo Arias Mallo + mode fill: nbytes=512M, n=134217728 + written=0K, addr=0x3f8d600020 OK + written=4096K, addr=0x3f8da00020 OK + written=8192K, addr=0x3f8de00020 OK + written=12288K, addr=0x3f8e200020 OK + written=16384K, addr=0x3f8e600020 OK + written=20480K, addr=0x3f8ea00020 OK + written=24576K, addr=0x3f8ee00020 OK + written=28672K, addr=0x3f8f200020 OK + written=32768K, addr=0x3f8f600020 OK + written=36864K, addr=0x3f8fa00020 OK + written=40960K, addr=0x3f8fe00020 OK + written=45056K, addr=0x3f90200020 OK + written=49152K, addr=0x3f90600020 OK + written=53248K, addr=0x3f90a00020 OK + written=57344K, addr=0x3f90e00020 OK + +It doesn't seem to have any observable effect with this test, other than going +more slow. + +Interesting [article](https://wiki.osdev.org/RISC-V_Bare_Bones) on how to write +a simple bootrom that outputs some ASCII text into the console. + +Regarding the `do_page_fault` filter, it seems that is no available. I need to +pick one from `available_filter_functions` instead. Let's try `handle_page_fault`. + +It still doesn't seem to appear in the console. It is visible with a SysRq +request Ctrl+O then 'z'. + +Maybe I can try disabling the huge pages, just to discard that it may be +related to it: `CONFIG_HUGETLBFS`. + +Same hang with huge pages disabled, but a bit further ~70 MB. + +### OBSERVATION: The Linux memtest fails in the first round + + [ 0.000000] Linux version 6.9.7 (nixbld@localhost) (riscv64-unknown-linux-gnu-gcc (GCC) 13.3.0, GNU ld (GNU Binutils) 2.41) #1-NixOS Thu Jun 27 11:52:32 UTC 2024 + [ 0.000000] Machine model: Barcelona Supercomputing Center - Lagarto Ox (NixOS) + [ 0.000000] SBI specification v2.0 detected + [ 0.000000] SBI implementation ID=0x1 Version=0x10004 + [ 0.000000] SBI TIME extension detected + [ 0.000000] SBI IPI extension detected + [ 0.000000] SBI RFENCE extension detected + [ 0.000000] SBI DBCN extension detected + [ 0.000000] earlycon: sbi0 at I/O port 0x0 (options '') + [ 0.000000] printk: legacy bootconsole [sbi0] enabled + [ 0.000000] Reserved memory: created DMA memory pool at 0x0000000060000000, size 256 MiB + [ 0.000000] OF: reserved mem: initialized node dma_pool@60000000, compatible id shared-dma-pool + [ 0.000000] OF: reserved mem: 0x0000000060000000..0x000000006fffffff (262144 KiB) map non-reusable dma_pool@60000000 + [ 0.000000] Reserved memory: created DMA memory pool at 0x0000000070000000, size 256 MiB + [ 0.000000] OF: reserved mem: initialized node dma_pool@70000000, compatible id shared-dma-pool + [ 0.000000] OF: reserved mem: 0x0000000070000000..0x000000007fffffff (262144 KiB) map non-reusable dma_pool@70000000 + [ 0.000000] cma: Reserved 16 MiB at 0x00000000bf000000 on node -1 + [ 0.000000] early_memtest: # of tests: 3 + [ 0.000000] 0x0000000080000000 - 0x0000000080013000 pattern 5555555555555555 + +This is not suggesting that the problem is not in the virtual memory, but with +the actual physical memory. + +I will try a similar test with uboot with the `mtest` command, but requires +enabling it first. + +### OBSERVATION: Memory in the 0xb0000000..0xc0000000 range is bad + +Reproduced from U-Boot: + + => mtest 0x80000000 0x90000000 0 2 + Testing 80000000 ... 90000000: + Pattern FFFFFFFFFFFFFFFF Writing... Reading...Iteration: 2 + Tested 2 iteration(s) with 0 errors. + => mtest 0x90000000 0xa0000000 0 2 + Testing 90000000 ... a0000000: + Pattern FFFFFFFFFFFFFFFF Writing... Reading...Iteration: 2 + Tested 2 iteration(s) with 0 errors. + => mtest 0xa0000000 0xb0000000 0 2 + Testing a0000000 ... b0000000: + Pattern FFFFFFFFFFFFFFFF Writing... Reading...Iteration: 2 + Tested 2 iteration(s) with 0 errors. + => mtest 0xb0000000 0xc0000000 0 2 + Testing b0000000 ... c0000000: + Pattern 0000000000000000 Writing... + +Let's see if we can fix the boot hang by reducing the memory enough to avoid +this bad region. + +## 2024-07-11 + +### OBSERVATION: U-Boot mtest hangs in the last 256 MiB + +After reducing the size of the RAM segment, I run again the mtest, but this time +it fails in the last 256 MiB block. + +I assume that U-Boot moves itself to the last part of the memory, and them mtest +overwrites the U-Boot code, causing a hang. + +So, I simply changed the FDT from U-Boot to skip the first 2M: + + fdt set /memory@80000000 reg <0x0 0x80200000 0x0 0x40000000> + +And then I enabled the memtest in the kernel boot parameters: + + => fdt set /memory@80000000 reg <0x0 0x80200000 0x0 0x30000000> + => setenv bootargs "root=/dev/ram0 loglevel=7 debug rw earlycon=sbi console=hvc0 boot.trace boot.tracedebug init=/nix/store/zxbq93zfg8ijkyq5cq5sb4742rczqfck-nixos-system-nixos-riscv-24.11pre-git/init" + => setenv ramdisk_size 12611657 + => setenv bootargs "root=/dev/ram0 loglevel=7 debug rw earlycon=sbi console=hvc0 boot.trace boot.tracedebug memtest=3 init=/nix/store/zxbq93zfg8ijkyq5cq5sb4742rczqfck-nixos-system-nixos-risc=> 4.11pre-git/init" + => booti ${kernel_addr_r} ${ramdisk_addr_r}:${ramdisk_size} ${fdtcontroladdr} + Moving Image from 0x84000000 to 0x80200000, end=8303c4d0 + ## Flattened Device Tree blob at 80013000 + Booting using the fdt blob at 0x80013000 + Working FDT set to 80013000 + Using Device Tree in place at 0000000080013000, end 000000008001696f + Working FDT set to 80013000 + + Starting kernel ... + + [ 0.000000] Linux version 6.9.7 (nixbld@localhost) (riscv64-unknown-linux-gnu-gcc (GCC) 13.3.0, GNU ld (GNU Binutils) 2.41) #1-NixOS Thu Jun 27 11:52:32 UTC 2024 + [ 0.000000] Machine model: Barcelona Supercomputing Center - Lagarto Ox (NixOS) + [ 0.000000] SBI specification v2.0 detected + [ 0.000000] SBI implementation ID=0x1 Version=0x10004 + [ 0.000000] SBI TIME extension detected + [ 0.000000] SBI IPI extension detected + [ 0.000000] SBI RFENCE extension detected + [ 0.000000] SBI DBCN extension detected + [ 0.000000] earlycon: sbi0 at I/O port 0x0 (options '') + [ 0.000000] printk: legacy bootconsole [sbi0] enabled + [ 0.000000] Reserved memory: created DMA memory pool at 0x0000000060000000, size 256 MiB + [ 0.000000] OF: reserved mem: initialized node dma_pool@60000000, compatible id shared-dma-pool + [ 0.000000] OF: reserved mem: 0x0000000060000000..0x000000006fffffff (262144 KiB) map non-reusable dma_pool@60000000 + [ 0.000000] Reserved memory: created DMA memory pool at 0x0000000070000000, size 256 MiB + [ 0.000000] OF: reserved mem: initialized node dma_pool@70000000, compatible id shared-dma-pool + [ 0.000000] OF: reserved mem: 0x0000000070000000..0x000000007fffffff (262144 KiB) map non-reusable dma_pool@70000000 + [ 0.000000] cma: Reserved 16 MiB at 0x00000000af000000 on node -1 + [ 0.000000] early_memtest: # of tests: 3 + [ 0.000000] 0x0000000083200000 - 0x000000008c300000 pattern 5555555555555555 + [ 0.000000] 0x000000009e912000 - 0x00000000aeff9308 pattern 5555555555555555 + [ 0.000000] 0x00000000aeff9337 - 0x00000000aeff9338 pattern 5555555555555555 + [ 0.000000] 0x00000000aeff9367 - 0x00000000aeff9368 pattern 5555555555555555 + [ 0.000000] 0x00000000aeffcffc - 0x00000000aeffd000 pattern 5555555555555555 + [ 0.000000] 0x0000000083200000 - 0x000000008c300000 pattern ffffffffffffffff + [ 0.000000] 0x000000009e912000 - 0x00000000aeff9308 pattern ffffffffffffffff + [ 0.000000] 0x00000000aeff9337 - 0x00000000aeff9338 pattern ffffffffffffffff + [ 0.000000] 0x00000000aeff9367 - 0x00000000aeff9368 pattern ffffffffffffffff + [ 0.000000] 0x00000000aeffcffc - 0x00000000aeffd000 pattern ffffffffffffffff + [ 0.000000] 0x0000000083200000 - 0x000000008c300000 pattern 0000000000000000 + [ 0.000000] 0x000000009e912000 - 0x00000000aeff9308 pattern 0000000000000000 + [ 0.000000] 0x00000000aeff9337 - 0x00000000aeff9338 pattern 0000000000000000 + [ 0.000000] 0x00000000aeff9367 - 0x00000000aeff9368 pattern 0000000000000000 + [ 0.000000] 0x00000000aeffcffc - 0x00000000aeffd000 pattern 0000000000000000 + [ 0.000000] Zone ranges: + [ 0.000000] DMA32 [mem 0x0000000080200000-0x00000000b01fffff] + [ 0.000000] Normal empty + [ 0.000000] Movable zone start for each node + [ 0.000000] Early memory node ranges + [ 0.000000] node 0: [mem 0x0000000080200000-0x00000000b01fffff] + +That seems to pass the memtest fine, however the boot process hangs in different +stages. + +### OBSERVATION: Cannot open /dev/ttyS0 + + ~ # setserial -g /dev/ttyS1 -a + /dev/ttyS1, Line 1, UART: 16550, Port: 0x0000, IRQ: 1 + Baud_base: 3125000, close_delay: 50, divisor: 0 + closing_wait: 3000 + Flags: spd_normal + + ~ # setserial -g /dev/ttyS0 -a + (hangs) + +This page seems to have good resources on the serial console: + + https://tldp.org/HOWTO/Serial-HOWTO-16.html + +It seems that there are some differences in the way the serial port is handled +regarding 16550 and 16550A. + +I can write to the UART console from U-Boot by directly writing in the +0x40001000 address (A = 0x41): + + => help mw + mw - memory write (fill) + + Usage: + mw [.b, .w, .l, .q] address value [count] + => mw 0x40001000 0x41 + A=> mw 0x40001000 0x42 + B=> mw 0x40001000 0x43 + C=> + +### OBSERVATION: I can type with the ttyS0 8250 driver + +Tried to boot too, but hangs: + + => fdt set /memory@80000000 reg <0x0 0x80200000 0x0 0x30000000> + => setenv bootargs "root=/dev/ram0 loglevel=7 debug rw earlycon=sbi console=ttyS0,115200n8 boot.trace boot.tracedebug init=/nix/store/zxbq93zfg8ijkyq5cq5sb4742rczqfck-nixos- + system-nixos-riscv-24.11pre-git/init" + => setenv ramdisk_size 12611657 + => booti ${kernel_addr_r} ${ramdisk_addr_r}:${ramdisk_size} ${fdtcontroladdr} + ... + [ 30.740740] riscv-plic 40800000.plic: mapped 3 interrupts with 1 handlers for 2 contexts. + [ 40.048300] Serial: 8250/16550 driver, 4 ports, IRQ sharing disabled + [ 40.420000] of_serial 40001000.serial: error -ENXIO: IRQ index 0 not found + [ 40.496720] printk: legacy console [ttyS0] disabled + [ 40.558860] 40001000.serial: ttyS0 at MMIO 0x40001000 (irq = 0, base_baud = 3125000) is a 16550 + [ 40.583940] printk: legacy console [ttyS0] enabled + [ 40.583940] printk: legacy console [ttyS0] enabled + [ 40.595760] printk: legacy bootconsole [sbi0] disabled + [ 40.595760] printk: legacy bootconsole [sbi0] disabled + [ 40.820380] 40003000.serial: ttyS1 at MMIO 0x40003000 (irq = 1, base_baud = 3125000) is a 16550 + + ... + + <<< NixOS Stage 2 >>> + + [ 394.678980] EXT4-fs (pmem0p2): re-mounted 44444444-4444-4444-8888-888888888888 r/w. Quota mode: none. + [ 394.764620] booting system configuration /nix/store/zxbq93zfg8ijkyq5cq5sb4742rczqfck-nixos-system-nixos-riscv-24.11pre-git + runnin[ 398.543300] stage-2-init: running activation script... + g activation script... + +So, if we observe a hang when writing to a bad memory segment, can there be a +problem in the place we are placing the pmem? Maybe we can test it with u-boot +first. + +Another note, the serial device 16550 doesn't seem to use a FIFO, but the +16550A does. + +We may want to switch to the A variant, as it seems to be supported by U-boot and +the kernel: + + https://github.com/u-boot/u-boot/blob/master/drivers/serial/ns16550.c#L607-L619 + https://github.com/torvalds/linux/blob/v6.9/drivers/tty/serial/8250/8250_of.c#L285 + +And defines a FIFO size of 16 bytes: + + https://github.com/torvalds/linux/blob/v6.9/drivers/tty/serial/8250/8250_port.c#L74-L81 + +Still, we would have to wait for a bitstream that can forward the interrupts +from the host to the serial console to test it. + +### OBSERVATION: The memory for the pmem seems to be ok + + => mtest 0x100000000 0x1c0000000 0 2 + Testing 100000000 ... 1c0000000: + Pattern FFFFFFFFFFFFFFFF Writing... Reading...Iteration: 2 + Tested 2 iteration(s) with 0 errors. + +It may be worth enabling huge pages again, as there didn't seem to have any +effect. + +### OBSERVATION: There are extra regions mapped by OpenSBI + +In the OpenSBI 1.2 test from buildroot: + + Domain0 Region00 : 0x0000000080000000-0x000000008003ffff () + Domain0 Region01 : 0x0000000000000000-0xffffffffffffffff (R,W,X) + Domain0 Next Address : 0x0000000080200000 + Domain0 Next Arg1 : 0x0000000080010000 + +But with OpenSBI 1.4: + + Domain0 Region00 : 0x0000000040000000-0x0000000040000fff M: (I,R,W) S/U: (R,W) + Domain0 Region01 : 0x0000000080020000-0x000000008002ffff M: (R,W) S/U: () + Domain0 Region02 : 0x0000000080000000-0x000000008001ffff M: (R,X) S/U: () + Domain0 Region03 : 0x0000000000000000-0xffffffffffffffff M: () S/U: (R,W,X) + Domain0 Next Address : 0x0000000080200000 + Domain0 Next Arg1 : 0x0000000080013000 + +## 2024-07-12 + +I find it strange that if we are writing to the 0x80000000 region from the +kernel and OpenSBI has mapped a segment for Machine mode only, there is no error +message about it in the console. + +Let's update to OpenSBI 1.5 for now, and see if we get any more information. + +It seems to be booting fine. + +So, I observed the `udevadm settle` to take a lot of time, maybe I can get some +clues by enabling the debug mode. + +If a trap is being issued, we may be able to modify OpenSBI to print some +information to the serial console so we can understand what is going on. + +In the meanwhile, let's try a simple test. We can enable debug output of systemd +with the following boot options: + + systemd.log_level=debug systemd.log_target=console + +We will also need to run `csrtool all-in-order` to arrive to systemd. + +### OBSERVATION: Hangs in `switch_root` again. + +Technically, we cannot discard the hypothesis than only the console has crashed, +as when we switch to the stage 2 we don't have the heartbeat counter. We may as +well run it again before we arrive to systemd just to verify that userland +crashed. + +On the other hand, I don't understand why we hang in such a way when we try to +write to the `0x8000_0000` area from the kernel memtest. I've been reading the +OpenSBI source code and they seem to have a trap handler that can emit verbose +information to the console when a problem with the trap is detected. I would +expect to see some error being dumped to the console in that case. + +From the OpenSBI information, this line: + + Domain0 Region02 : 0x0000000080000000-0x000000008001ffff M: (R,X) S/U: () + +Suggests that it registers a region with no write permission at `0x8000_0000`, +so it should fail right away from the kernel side. However, this is not reported +anywhere in the console. + +As we have an easy way to trigger this situation, maybe we can use it as a test +to modify OpenSBI to report that problem to the console and verify that it is +working. With that information, we could rule out that a similar problem is +happening when we try to run systemd. Maybe we could also try to debug other +traps. + +Another observation is that the memtest lines we see on the console are printed +*before* the actual test begins: + + pr_info(" %pa - %pa pattern %016llx\n", + &this_start, &this_end, cpu_to_be64(pattern)); + memtest(pattern, this_start, this_end - this_start); + +So when this line is shown: + + [ 0.000000] early_memtest: # of tests: 3 + [ 0.000000] 0x0000000080000000 - 0x0000000080013000 pattern 5555555555555555 + +We an infer that the problem is located in that region, which agrees with the +hypothesis that is related with the OpenSBI regions. + +This is the output I get with OpenSBI 1.5: + + OpenSBI v1.5 + ____ _____ ____ _____ + / __ \ / ____| _ \_ _| + | | | |_ __ ___ _ __ | (___ | |_) || | + | | | | '_ \ / _ \ '_ \ \___ \| _ < | | + | |__| | |_) | __/ | | |____) | |_) || |_ + \____/| .__/ \___|_| |_|_____/|____/_____| + | | + |_| + + Platform Name : ox (Rodrigo NixOS version) + Platform Features : medeleg + Platform HART Count : 1 + Platform IPI Device : --- + Platform Timer Device : axi_timer @ 50000000Hz + Platform Console Device : uart8250 + Platform HSM Device : --- + Platform PMU Device : --- + Platform Reboot Device : --- + Platform Shutdown Device : --- + Platform Suspend Device : --- + Platform CPPC Device : --- + Firmware Base : 0x80000000 + Firmware Size : 310 KB + Firmware RW Offset : 0x40000 + Firmware RW Size : 54 KB + Firmware Heap Offset : 0x45000 + Firmware Heap Size : 34 KB (total), 2 KB (reserved), 11 KB (used), 20 KB (free) + Firmware Scratch Size : 4096 B (total), 368 B (used), 3728 B (free) + Runtime SBI Version : 2.0 + + Domain0 Name : root + Domain0 Boot HART : 0 + Domain0 HARTs : 0* + Domain0 Region00 : 0x0000000040000000-0x0000000040000fff M: (I,R,W) S/U: (R,W) + Domain0 Region01 : 0x0000000080040000-0x000000008004ffff M: (R,W) S/U: () + Domain0 Region02 : 0x0000000080000000-0x000000008003ffff M: (R,X) S/U: () + Domain0 Region03 : 0x0000000000000000-0xffffffffffffffff M: () S/U: (R,W,X) + Domain0 Next Address : 0x0000000080200000 + Domain0 Next Arg1 : 0x0000000080017000 + Domain0 Next Mode : S-mode + Domain0 SysReset : yes + Domain0 SysSuspend : yes + + Boot HART ID : 0 + Boot HART Domain : root + Boot HART Priv Version : v1.10 + Boot HART Base ISA : rv64imafdc + Boot HART ISA Extensions : zicntr,zihpm,sdtrig + Boot HART PMP Count : 0 + Boot HART PMP Granularity : 0 bits + Boot HART PMP Address Bits: 0 + Boot HART MHPM Info : 29 (0xfffffff8) + Boot HART Debug Triggers : 0 triggers + Boot HART MIDELEG : 0x0000000000000222 + Boot HART MEDELEG : 0x000000000000b109 + + + Core: 12 devices, 8 uclasses, devicetree: board + Loading Environment from nowhere... OK + In: serial,usbkbd + Out: serial,vidconsole + Err: serial,vidconsole + No working controllers found + Net: No ethernet found. + Working FDT set to 80017000 + Hit any key to stop autoboot: 0 + + Device 0: unknown device + + Device 1: unknown device + scanning bus for devices... + + Device 0: unknown device + starting USB... + No working controllers found + No ethernet found. + No ethernet found. + => + +Where now the regions are slightly off: + + Domain0 Region00 : 0x0000000040000000-0x0000000040000fff M: (I,R,W) S/U: (R,W) + Domain0 Region01 : 0x0000000080040000-0x000000008004ffff M: (R,W) S/U: () + Domain0 Region02 : 0x0000000080000000-0x000000008003ffff M: (R,X) S/U: () + Domain0 Region03 : 0x0000000000000000-0xffffffffffffffff M: () S/U: (R,W,X) + +I would assume that the region 1 is where OpenSBI places its own data, and +region 2 is where it places its own code. Then, in region 0 there is the serial +area. + +Interestingly, I can read and write to the 0x80000000 - 0x81000000 from u-boot +without problems: + + => mtest 0x80000000 0x81000000 0 4 + Testing 80000000 ... 81000000: + Pattern FFFFFFFFFFFFFFFF Writing... Reading...Iteration: 4 + Tested 4 iteration(s) with 0 errors. + +So I suspect that it disables those regions before jumping into U-Boot. + +What I don't understand is why the MMIO region 0 is starting at 0x40000000 while +the UART port should be mapped in 0x40001000 as per the device tree. Maybe we +could try with the generic configuration of OpenSBI and see if it can load the +plic and the serial ports properly directly from the device tree. + +Nice, with OpenSBI 1.5 I can see the console errors when trying the generic +configuration: + + OpenSBI v1.5 + ____ _____ ____ _____ + / __ \ / ____| _ \_ _| + | | | |_ __ ___ _ __ | (___ | |_) || | + | | | | '_ \ / _ \ '_ \ \___ \| _ < | | + | |__| | |_) | __/ | | |____) | |_) || |_ + \____/| .__/ \___|_| |_|_____/|____/_____| + | | + |_| + + init_coldboot: timer init failed (error -3) + +Seems to be failing in `sbi_timer_init()` with `cold_boot = true`. + +And the -3 error seems to be: + + #define SBI_ERR_INVALID_PARAM -3 + +I assume it is calling `fdt_timer_init()`. + +Let's try removing the `reg-names` property, as it seems to cause it to enter a +different branch, but "control" is never matched there. + +Still failing, the problem must be somewhere else. + +Let's try with openpiton configuration instead. + +Doesn't even start the UART: + + GGGGGGGGG + +These G's must be coming from the bootrom. + +So let's go back to the generic platform and place some `printf()` calls to +determine where it is failing. + + OpenSBI v1.5 + ____ _____ ____ _____ + / __ \ / ____| _ \_ _| + | | | |_ __ ___ _ __ | (___ | |_) || | + | | | | '_ \ / _ \ '_ \ \___ \| _ < | | + | |__| | |_) | __/ | | |____) | |_) || |_ + \____/| .__/ \___|_| |_|_____/|____/_____| + | | + |_| + + sbi_timer_init: begins + sbi_timer_init: got Zicntr extension + fdt_timer_cold_init: pos = 0 + fdt_timer_cold_init: got match, name = riscv,clint0 + fdt_timer_cold_init: enabled + fdt_timer_cold_init: drc->cold_init = -3 + fdt_timer_init: fdt_timer_cold_init failed (-3) + sbi_platform_timer: sbi_platform_timer_init failed (-3) + init_coldboot: timer init failed (error -3) + +Okay, now we can see where it failed. I wonder why aren't these messages enabled +by default. I'll guess this is the `timer_mtimer_cold_init()` function, so let's +add some more instrumentation there. + +It seems that SiFive timer has a very long weird offset: + + if (is_clint) { /* SiFive CLINT */ + /* Set CLINT addresses */ + mt->mtimecmp_addr = addr[0] + ACLINT_DEFAULT_MTIMECMP_OFFSET; + mt->mtimecmp_size = ACLINT_DEFAULT_MTIMECMP_SIZE; + if (!quirks->clint_without_mtime) { + mt->mtime_addr = addr[0] + ACLINT_DEFAULT_MTIME_OFFSET; + mt->mtime_size = size[0] - mt->mtimecmp_size; + /* Adjust MTIMER address and size for CLINT device */ + mt->mtime_addr += quirks->clint_mtime_offset; + mt->mtime_size -= quirks->clint_mtime_offset; + } else { + mt->mtime_addr = mt->mtime_size = 0; + } + mt->mtimecmp_addr += quirks->clint_mtime_offset; + } else { /* RISC-V ACLINT MTIMER */ + /* Set ACLINT MTIMER addresses */ + mt->mtime_addr = addr[0]; + mt->mtime_size = size[0]; + mt->mtimecmp_addr = addr[1]; + mt->mtimecmp_size = size[1]; + } + +We may want to use the ACLINT timer instead. Let's first see where the addresses +lay in memory, and then use that to verify we change it to 0x0 and 0x8. + + GGGGGGGGG + OpenSBI v1.5 + ____ _____ ____ _____ + / __ \ / ____| _ \_ _| + | | | |_ __ ___ _ __ | (___ | |_) || | + | | | | '_ \ / _ \ '_ \ \___ \| _ < | | + | |__| | |_) | __/ | | |____) | |_) || |_ + \____/| .__/ \___|_| |_|_____/|____/_____| + | | + |_| + + sbi_timer_init: begins + sbi_timer_init: got Zicntr extension + fdt_timer_cold_init: pos = 0 + fdt_timer_cold_init: got match, name = riscv,clint0 + fdt_timer_cold_init: enabled + timer_mtimer_cold_init: begins, is_clint = 1 + timer_mtimer_cold_init: mtime_addr = 0x4000dff8 + timer_mtimer_cold_init: mtime_size = 0x000b4008 + timer_mtimer_cold_init: mtime_addr = 0x40006000 + timer_mtimer_cold_init: mtime_addr = 0x00007ff8 + fdt_timer_cold_init: drc->cold_init = -3 + fdt_timer_init: fdt_timer_cold_init failed (-3) + sbi_platform_timer: sbi_platform_timer_init failed (-3) + init_coldboot: timer init failed (error -3) + +Yeah, those addresses are not what we want. Based on the device tree, the clint +must be at 0x40002000, so they should be 40002000 and 40002008. Also I made some +typos in the printf command, this is the patch: + + + sbi_printf("timer_mtimer_cold_init: mtime_addr = 0x%08lx\n", mt->mtime_addr); + + sbi_printf("timer_mtimer_cold_init: mtime_size = 0x%08lx\n", mt->mtime_size); + + sbi_printf("timer_mtimer_cold_init: mtime_addr = 0x%08lx\n", mt->mtimecmp_addr); + + sbi_printf("timer_mtimer_cold_init: mtime_addr = 0x%08lx\n", mt->mtimecmp_size); + +So, let's fix the patch and switch to the "riscv,aclint-mtimer" timer, which +doesn't set any quirk or weird offset. + + GGGGGGGGG + OpenSBI v1.5 + ____ _____ ____ _____ + / __ \ / ____| _ \_ _| + | | | |_ __ ___ _ __ | (___ | |_) || | + | | | | '_ \ / _ \ '_ \ \___ \| _ < | | + | |__| | |_) | __/ | | |____) | |_) || |_ + \____/| .__/ \___|_| |_|_____/|____/_____| + | | + |_| + + sbi_timer_init: begins + sbi_timer_init: got Zicntr extension + fdt_timer_cold_init: pos = 0 + fdt_timer_cold_init: got match, name = riscv,aclint-mtimer + fdt_timer_cold_init: enabled + timer_mtimer_cold_init: begins, is_clint = 0 + timer_mtimer_cold_init: mtime_addr = 0x40002000 + timer_mtimer_cold_init: mtime_size = 0x000c0000 + timer_mtimer_cold_init: mtimecmp_addr = 0x00000000 + timer_mtimer_cold_init: mtimecmp_size = 0x00000000 + fdt_timer_cold_init: drc->cold_init = -3 + fdt_timer_init: fdt_timer_cold_init failed (-3) + sbi_platform_timer: sbi_platform_timer_init failed (-3) + init_coldboot: timer init failed (error -3) + +So, now we have better addresses, but this is not what I would expect. The regs +property is being used to set the mtime address and size, while I would expect +it to set the `mtimecmp_addr` too, but it seems it is expecting two pairs of +address and size in the regs: + + reg = <0x0 0x40002000 0x0 0x000c0000>; + +So, first let's add the mtimecmp after the mtime registers and see if that +clears the -3 error. + +I'll leave it at 0x40002000, but I suspect the address might be 0x40170000: + + #define OX_ALVEO_TIMER_BASE 0x40170000 + +After booting again, now it seems to work. But I'm not sure if that +may cause more problems down the line. + + GGGGGGGGG + OpenSBI v1.5 + ____ _____ ____ _____ + / __ \ / ____| _ \_ _| + | | | |_ __ ___ _ __ | (___ | |_) || | + | | | | '_ \ / _ \ '_ \ \___ \| _ < | | + | |__| | |_) | __/ | | |____) | |_) || |_ + \____/| .__/ \___|_| |_|_____/|____/_____| + | | + |_| + + sbi_timer_init: begins + sbi_timer_init: got Zicntr extension + fdt_timer_cold_init: pos = 0 + fdt_timer_cold_init: got match, name = riscv,aclint-mtimer + fdt_timer_cold_init: enabled + timer_mtimer_cold_init: begins, is_clint = 0 + timer_mtimer_cold_init: mtime_addr = 0x40002000 + timer_mtimer_cold_init: mtime_size = 0x00000008 + timer_mtimer_cold_init: mtimecmp_addr = 0x40002008 + timer_mtimer_cold_init: mtimecmp_size = 0x00000008 + fdt_timer_cold_init: drc->cold_init = 0 + fdt_timer_cold_init: pos = 1 + fdt_timer_cold_init: returns 0 + Platform Name : Barcelona Supercomputing Center - Lagarto Ox (NixOS) + Platform Features : medeleg + Platform HART Count : 1 + Platform IPI Device : --- + Platform Timer Device : aclint-mtimer @ 50000Hz + Platform Console Device : uart8250 + Platform HSM Device : --- + Platform PMU Device : --- + Platform Reboot Device : --- + Platform Shutdown Device : --- + Platform Suspend Device : --- + Platform CPPC Device : --- + Firmware Base : 0x80000000 + Firmware Size : 327 KB + Firmware RW Offset : 0x40000 + Firmware RW Size : 71 KB + Firmware Heap Offset : 0x49000 + Firmware Heap Size : 35 KB (total), 2 KB (reserved), 11 KB (used), 21 KB (free) + Firmware Scratch Size : 4096 B (total), 408 B (used), 3688 B (free) + Runtime SBI Version : 2.0 + + Domain0 Name : root + Domain0 Boot HART : 0 + Domain0 HARTs : 0* + Domain0 Region00 : 0x0000000040002000-0x000000004000200f M: (I,R,W) S/U: () + Domain0 Region01 : 0x0000000040001000-0x0000000040001fff M: (I,R,W) S/U: (R,W) + Domain0 Region02 : 0x0000000080040000-0x000000008005ffff M: (R,W) S/U: () + Domain0 Region03 : 0x0000000080000000-0x000000008003ffff M: (R,X) S/U: () + Domain0 Region04 : 0x0000000040800000-0x0000000040bfffff M: (I,R,W) S/U: (R,W) + Domain0 Region05 : 0x0000000000000000-0xffffffffffffffff M: () S/U: (R,W,X) + Domain0 Next Address : 0x0000000080200000 + Domain0 Next Arg1 : 0x0000000082200000 + Domain0 Next Mode : S-mode + Domain0 SysReset : yes + Domain0 SysSuspend : yes + + Boot HART ID : 0 + Boot HART Domain : root + Boot HART Priv Version : v1.10 + Boot HART Base ISA : rv64imafdc + Boot HART ISA Extensions : zicntr,zihpm,sdtrig + Boot HART PMP Count : 0 + Boot HART PMP Granularity : 0 bits + Boot HART PMP Address Bits: 0 + Boot HART MHPM Info : 29 (0xfffffff8) + Boot HART Debug Triggers : 0 triggers + Boot HART MIDELEG : 0x0000000000000222 + Boot HART MEDELEG : 0x000000000000b109 + + + Core: 12 devices, 8 uclasses, devicetree: board + Loading Environment from nowhere... OK + In: serial,usbkbd + Out: serial,vidconsole + Err: serial,vidconsole + No working controllers found + Net: No ethernet found. + Working FDT set to 82200000 + Hit any key to stop autoboot: 0 + + Device 0: unknown device + + Device 1: unknown device + scanning bus for devices... + + Device 0: unknown device + starting USB... + No working controllers found + No ethernet found. + No ethernet found. + +However, now I cannot boot the kernel, as it is overwriting the FDT: + + => printenv fdtcontroladdr + fdtcontroladdr=82200000 + => fdt addr ${fdtcontroladdr } + Working fdt: 82200000 + => setenv bootargs "root=/dev/ram0 loglevel=7 debug rw earlycon=sbi console=hvc0" + => setenv ramdisk_size 12614846 + => #booti ${kernel_addr_r} ${ramdisk_addr_r}:${ramdisk_size} ${fdtcontroladdr} + => printenv kernel_addr_r + kernel_addr_r=0x84000000 + => printenv ramdisk_addr_r + ramdisk_addr_r=0x8c300000 + => booti ${kernel_addr_r} ${ramdisk_addr_r}:${ramdisk_size} ${fdtcontroladdr} + Moving Image from 0x84000000 to 0x80200000, end=83044650 + ERROR: Did not find a cmdline Flattened Device Tree + Could not find a valid device tree + +Let's move the FDT to 0x80100000. + +Now I can load the kernel, but it gets stuck in the middle of the boot: + + GGGGGGGGG + OpenSBI v1.5 + ____ _____ ____ _____ + / __ \ / ____| _ \_ _| + | | | |_ __ ___ _ __ | (___ | |_) || | + | | | | '_ \ / _ \ '_ \ \___ \| _ < | | + | |__| | |_) | __/ | | |____) | |_) || |_ + \____/| .__/ \___|_| |_|_____/|____/_____| + | | + |_| + + sbi_timer_init: begins + sbi_timer_init: got Zicntr extension + fdt_timer_cold_init: pos = 0 + fdt_timer_cold_init: got match, name = riscv,aclint-mtimer + fdt_timer_cold_init: enabled + timer_mtimer_cold_init: begins, is_clint = 0 + timer_mtimer_cold_init: mtime_addr = 0x40002000 + timer_mtimer_cold_init: mtime_size = 0x00000008 + timer_mtimer_cold_init: mtimecmp_addr = 0x40002008 + timer_mtimer_cold_init: mtimecmp_size = 0x00000008 + fdt_timer_cold_init: drc->cold_init = 0 + fdt_timer_cold_init: pos = 1 + fdt_timer_cold_init: returns 0 + Platform Name : Barcelona Supercomputing Center - Lagarto Ox (NixOS) + Platform Features : medeleg + Platform HART Count : 1 + Platform IPI Device : --- + Platform Timer Device : aclint-mtimer @ 50000Hz + Platform Console Device : uart8250 + Platform HSM Device : --- + Platform PMU Device : --- + Platform Reboot Device : --- + Platform Shutdown Device : --- + Platform Suspend Device : --- + Platform CPPC Device : --- + Firmware Base : 0x80000000 + Firmware Size : 327 KB + Firmware RW Offset : 0x40000 + Firmware RW Size : 71 KB + Firmware Heap Offset : 0x49000 + Firmware Heap Size : 35 KB (total), 2 KB (reserved), 11 KB (used), 21 KB (free) + Firmware Scratch Size : 4096 B (total), 408 B (used), 3688 B (free) + Runtime SBI Version : 2.0 + + Domain0 Name : root + Domain0 Boot HART : 0 + Domain0 HARTs : 0* + Domain0 Region00 : 0x0000000040002000-0x000000004000200f M: (I,R,W) S/U: () + Domain0 Region01 : 0x0000000040001000-0x0000000040001fff M: (I,R,W) S/U: (R,W) + Domain0 Region02 : 0x0000000080040000-0x000000008005ffff M: (R,W) S/U: () + Domain0 Region03 : 0x0000000080000000-0x000000008003ffff M: (R,X) S/U: () + Domain0 Region04 : 0x0000000040800000-0x0000000040bfffff M: (I,R,W) S/U: (R,W) + Domain0 Region05 : 0x0000000000000000-0xffffffffffffffff M: () S/U: (R,W,X) + Domain0 Next Address : 0x0000000080200000 + Domain0 Next Arg1 : 0x0000000080100000 + Domain0 Next Mode : S-mode + Domain0 SysReset : yes + Domain0 SysSuspend : yes + + Boot HART ID : 0 + Boot HART Domain : root + Boot HART Priv Version : v1.10 + Boot HART Base ISA : rv64imafdc + Boot HART ISA Extensions : zicntr,zihpm,sdtrig + Boot HART PMP Count : 0 + Boot HART PMP Granularity : 0 bits + Boot HART PMP Address Bits: 0 + Boot HART MHPM Info : 29 (0xfffffff8) + Boot HART Debug Triggers : 0 triggers + Boot HART MIDELEG : 0x0000000000000222 + Boot HART MEDELEG : 0x000000000000b109 + + + Core: 12 devices, 8 uclasses, devicetree: board + Loading Environment from nowhere... OK + In: serial,usbkbd + Out: serial,vidconsole + Err: serial,vidconsole + No working controllers found + Net: No ethernet found. + Working FDT set to 80100000 + Hit any key to stop autoboot: 0 + + Device 0: unknown device + + Device 1: unknown device + scanning bus for devices... + + Device 0: unknown device + starting USB... + No working controllers found + No ethernet found. + No ethernet found. + => setenv bootargs "root=/dev/ram0 loglevel=7 debug rw earlycon=sbi console=hvc0" + => setenv ramdisk_size 12614846 + => booti ${kernel_addr_r} ${ramdisk_addr_r}:${ramdisk_size} ${fdtcontroladdr} + Moving Image from 0x84000000 to 0x80200000, end=83044650 + ## Flattened Device Tree blob at 80100000 + Booting using the fdt blob at 0x80100000 + Working FDT set to 80100000 + ERROR: reserving fdt memory region failed (addr=80000000 size=4000000 flags=4) + Using Device Tree in place at 0000000080100000, end 0000000080103dd0 + Working FDT set to 80100000 + + Starting kernel ... + + [ 0.000000] Linux version 6.9.7 (nixbld@localhost) (riscv64-unknown-linux-gnu-gcc (GCC) 13.3.0, GNU ld (GNU Binutils) 2.41) #1-NixOS Thu Jun 27 11:52:32 UTC 2024 + [ 0.000000] Machine model: Barcelona Supercomputing Center - Lagarto Ox (NixOS) + [ 0.000000] SBI specification v2.0 detected + [ 0.000000] SBI implementation ID=0x1 Version=0x10005 + [ 0.000000] SBI TIME extension detected + [ 0.000000] SBI IPI extension detected + [ 0.000000] SBI RFENCE extension detected + [ 0.000000] SBI DBCN extension detected + [ 0.000000] earlycon: sbi0 at I/O port 0x0 (options '') + [ 0.000000] printk: legacy bootconsole [sbi0] enabled + [ 0.000000] OF: reserved mem: Reserved memory: failed to reserve memory for node 'reserved@80000000': base 0x0000000080000000, size 64 MiB + [ 0.000000] OF: reserved mem: OVERLAP DETECTED! + [ 0.000000] mmode_resv1@80000000 (0x0000000080000000--0x0000000080040000) overlaps with reserved@80000000 (0x0000000080000000--0x0000000084000000) + [ 0.000000] OF: reserved mem: OVERLAP DETECTED! + [ 0.000000] reserved@80000000 (0x0000000080000000--0x0000000084000000) overlaps with mmode_resv0@80040000 (0x0000000080040000--0x0000000080060000) + [ 0.000000] Reserved memory: created DMA memory pool at 0x0000000060000000, size 256 MiB + [ 0.000000] OF: reserved mem: initialized node dma_pool@60000000, compatible id shared-dma-pool + [ 0.000000] OF: reserved mem: 0x0000000060000000..0x000000006fffffff (262144 KiB) map non-reusable dma_pool@60000000 + [ 0.000000] Reserved memory: created DMA memory pool at 0x0000000070000000, size 256 MiB + [ 0.000000] OF: reserved mem: initialized node dma_pool@70000000, compatible id shared-dma-pool + [ 0.000000] OF: reserved mem: 0x0000000070000000..0x000000007fffffff (262144 KiB) map non-reusable dma_pool@70000000 + [ 0.000000] OF: reserved mem: 0x0000000080000000..0x000000008003ffff (256 KiB) nomap non-reusable mmode_resv1@80000000 + [ 0.000000] OF: reserved mem: 0x0000000080000000..0x0000000083ffffff (65536 KiB) nomap non-reusable reserved@80000000 + [ 0.000000] OF: reserved mem: 0x0000000080040000..0x000000008005ffff (128 KiB) nomap non-reusable mmode_resv0@80040000 + [ 0.000000] cma: Reserved 16 MiB at 0x00000000af000000 on node -1 + [ 0.000000] Zone ranges: + [ 0.000000] DMA32 [mem 0x0000000080000000-0x00000000afffffff] + [ 0.000000] Normal empty + [ 0.000000] Movable zone start for each node + [ 0.000000] Early memory node ranges + [ 0.000000] node 0: [mem 0x0000000080000000-0x000000008005ffff] + [ 0.000000] node 0: [mem 0x0000000080060000-0x00000000afffffff] + [ 0.000000] Initmem setup node 0 [mem 0x0000000080000000-0x00000000afffffff] + [ 0.000000] Falling back to deprecated "riscv,isa" + [ 0.000000] riscv: base ISA extensions adfim + [ 0.000000] riscv: ELF capabilities adfim + [ 0.000000] pcpu-alloc: s0 r0 d131072 u131072 alloc=1*131072 + [ 0.000000] pcpu-alloc: [0] 0 + [ 0.000000] Kernel command line: root=/dev/ram0 loglevel=7 debug rw earlycon=sbi console=hvc0 + [ 0.000000] Dentry cache hash table entries: 131072 (order: 8, 1048576 bytes, linear) + [ 0.000000] Inode-cache hash table entries: 65536 (order: 7, 524288 bytes, linear) + [ 0.000000] Built 1 zonelists, mobility grouping on. Total pages: 193536 + [ 0.000000] mem auto-init: stack:all(zero), heap alloc:off, heap free:off + [ 0.000000] Virtual kernel memory layout: + [ 0.000000] fixmap : 0xffffffc6fea00000 - 0xffffffc6ff000000 (6144 kB) + [ 0.000000] pci io : 0xffffffc6ff000000 - 0xffffffc700000000 ( 16 MB) + [ 0.000000] vmemmap : 0xffffffc700000000 - 0xffffffc800000000 (4096 MB) + [ 0.000000] vmalloc : 0xffffffc800000000 - 0xffffffd800000000 ( 64 GB) + [ 0.000000] modules : 0xffffffff02e45000 - 0xffffffff80000000 (2001 MB) + [ 0.000000] lowmem : 0xffffffd800000000 - 0xffffffd830000000 ( 768 MB) + [ 0.000000] kernel : 0xffffffff80000000 - 0xffffffffffffffff (2047 MB) + [ 0.000000] Memory: 386632K/786432K available (17075K kernel code, 9047K rwdata, 10240K rodata, 8737K init, 917K bss, 383416K reserved, 16384K cma-reserved) + [ 0.000000] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=1, Nodes=1 + [ 0.000000] ftrace: allocating 46961 entries in 184 pages + [ 0.000000] ftrace: allocated 184 pages with 4 groups + [ 0.000000] trace event string verifier disabled + [ 0.000000] RCU Tasks Rude: Setting shift to 0 and lim to 1 rcu_task_cb_adjust=1. + [ 0.000000] RCU Tasks Trace: Setting shift to 0 and lim to 1 rcu_task_cb_adjust=1. + [ 0.000000] NR_IRQS: 64, nr_irqs: 64, preallocated irqs: 0 + [ 0.000000] riscv-intc: 64 local interrupts mapped + [ 0.000000] clocksource: riscv_clocksource: mask: 0xffffffffffffffff max_cycles: 0x179dd7f66, max_idle_ns: 56421785867800 ns + [ 0.000020] sched_clock: 64 bits at 50kHz, resolution 20000ns, wraps every 70368744170000ns + [ 0.015020] kfence: initialized - using 2097152 bytes for 255 objects at 0x(____ptrval____)-0x(____ptrval____) + [ 0.035900] Console: colour dummy device 80x25 + [ 0.041720] Calibrating delay loop (skipped), value calculated using timer frequency.. 0.10 BogoMIPS (lpj=200) + [ 0.052480] pid_max: default: 32768 minimum: 301 + [ 0.187380] LSM: initializing lsm=capability,landlock,yama + [ 0.479640] landlock: Up and running. + [ 0.483700] Yama: becoming mindful. + [ 0.521600] Mount-cache hash table entries: 2048 (order: 2, 16384 bytes, linear) + [ 0.529780] Mountpoint-cache hash table entries: 2048 (order: 2, 16384 bytes, linear) + [ 0.784220] riscv: ELF compat mode unsupported + [ 0.784600] ASID allocator disabled (0 bits) + [ 0.867620] devtmpfs: initialized + [ 0.986340] clocksource: jiffies: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 7645041785100000 ns + [ 0.996900] futex hash table entries: 256 (order: 1, 12288 bytes, linear) + [ 1.084560] pinctrl core: initialized pinctrl subsystem + [ 1.195460] NET: Registered PF_NETLINK/PF_ROUTE protocol family + [ 1.244960] DMA: preallocated 128 KiB GFP_KERNEL pool for atomic allocations + [ 1.255960] DMA: preallocated 128 KiB GFP_KERNEL|GFP_DMA32 pool for atomic allocations + [ 1.266500] audit: initializing netlink subsys (disabled) + [ 1.317920] thermal_sys: Registered thermal governor 'step_wise' + [ 1.319800] cpuidle: using governor ladder + +This is very interesting, because all these lockups show the same symptoms and +they may be related with the kernel trying to access an area of memory that it +shouldn't. + +First, let's remove the reserved region, as now OpenSBI properly forwards the +regions to the kernel. + +It continues to hang, but at least now we don't have overlap of memory regions: + + [ 0.000000] Reserved memory: created DMA memory pool at 0x0000000060000000, size 256 MiB + [ 0.000000] OF: reserved mem: initialized node dma_pool@60000000, compatible id shared-dma-pool + [ 0.000000] OF: reserved mem: 0x0000000060000000..0x000000006fffffff (262144 KiB) map non-reusable dma_pool@60000000 + [ 0.000000] Reserved memory: created DMA memory pool at 0x0000000070000000, size 256 MiB + [ 0.000000] OF: reserved mem: initialized node dma_pool@70000000, compatible id shared-dma-pool + [ 0.000000] OF: reserved mem: 0x0000000070000000..0x000000007fffffff (262144 KiB) map non-reusable dma_pool@70000000 + [ 0.000000] OF: reserved mem: 0x0000000080000000..0x000000008003ffff (256 KiB) nomap non-reusable mmode_resv1@80000000 + [ 0.000000] OF: reserved mem: 0x0000000080040000..0x000000008005ffff (128 KiB) nomap non-reusable mmode_resv0@80040000 + [ 0.000000] cma: Reserved 16 MiB at 0x00000000af000000 on node -1 + [ 0.000000] Zone ranges: + [ 0.000000] DMA32 [mem 0x0000000080000000-0x00000000afffffff] + [ 0.000000] Normal empty + [ 0.000000] Movable zone start for each node + [ 0.000000] Early memory node ranges + [ 0.000000] node 0: [mem 0x0000000080000000-0x000000008005ffff] + [ 0.000000] node 0: [mem 0x0000000080060000-0x00000000afffffff] + [ 0.000000] Initmem setup node 0 [mem 0x0000000080000000-0x00000000afffffff] + +Let's remove the clint from the device tree for now, just to see if it is +affecting. + +Disabling the clint doesn't seem to cause any effect. In fact, the kernel is +still able to get a clock as shown in the kernel messages. + +### OBSERVATION: The kernel hangs after the unaligned check begins + +I enabled the ftrace for initcall and I can see that is getting stuck in the +unaligned check: + + Domain0 Name : root + Domain0 Boot HART : 0 + Domain0 HARTs : 0* + Domain0 Region00 : 0x0000000040001000-0x0000000040001fff M: (I,R,W) S/U: (R,W) + Domain0 Region01 : 0x0000000080040000-0x000000008005ffff M: (R,W) S/U: () + Domain0 Region02 : 0x0000000080000000-0x000000008003ffff M: (R,X) S/U: () + Domain0 Region03 : 0x0000000040800000-0x0000000040bfffff M: (I,R,W) S/U: (R,W) + Domain0 Region04 : 0x0000000000000000-0xffffffffffffffff M: () S/U: (R,W,X) + Domain0 Next Address : 0x0000000080200000 + Domain0 Next Arg1 : 0x0000000080100000 + Domain0 Next Mode : S-mode + Domain0 SysReset : yes + Domain0 SysSuspend : yes + ... + => setenv bootargs "root=/dev/ram0 loglevel=7 debug rw earlycon=sbi console=hvc0 trace_event=initcall:* trace_options=sym-addr tp_printk trace_buf_size=1M" + => setenv ramdisk_size 12614846 + => booti ${kernel_addr_r} ${ramdisk_addr_r}:${ramdisk_size} ${fdtcontroladdr} + ... + [ 0.000000] printk: legacy bootconsole [sbi0] enabled + [ 0.000000] Reserved memory: created DMA memory pool at 0x0000000060000000, size 256 MiB + [ 0.000000] OF: reserved mem: initialized node dma_pool@60000000, compatible id shared-dma-pool + [ 0.000000] OF: reserved mem: 0x0000000060000000..0x000000006fffffff (262144 KiB) map non-reusable dma_pool@60000000 + [ 0.000000] Reserved memory: created DMA memory pool at 0x0000000070000000, size 256 MiB + [ 0.000000] OF: reserved mem: initialized node dma_pool@70000000, compatible id shared-dma-pool + [ 0.000000] OF: reserved mem: 0x0000000070000000..0x000000007fffffff (262144 KiB) map non-reusable dma_pool@70000000 + [ 0.000000] OF: reserved mem: 0x0000000080000000..0x000000008003ffff (256 KiB) nomap non-reusable mmode_resv1@80000000 + [ 0.000000] OF: reserved mem: 0x0000000080040000..0x000000008005ffff (128 KiB) nomap non-reusable mmode_resv0@80040000 + [ 0.000000] cma: Reserved 16 MiB at 0x00000000af000000 on node -1 + [ 0.000000] Zone ranges: + [ 0.000000] DMA32 [mem 0x0000000080000000-0x00000000afffffff] + [ 0.000000] Normal empty + [ 0.000000] Movable zone start for each node + [ 0.000000] Early memory node ranges + [ 0.000000] node 0: [mem 0x0000000080000000-0x000000008005ffff] + [ 0.000000] node 0: [mem 0x0000000080060000-0x00000000afffffff] + [ 0.000000] Initmem setup node 0 [mem 0x0000000080000000-0x00000000afffffff] + ... + [ 2.736300] initcall_finish: func=init_ladder+0x0/0x40 ret=0 + [ 2.742640] initcall_start: func=init_menu+0x0/0x38 + [ 2.748200] initcall_finish: func=init_menu+0x0/0x38 ret=0 + [ 2.754400] initcall_start: func=rpmsg_init+0x0/0xc0 + [ 2.763400] initcall_finish: func=rpmsg_init+0x0/0xc0 ret=0 + [ 2.769840] initcall_start: func=rpmsg_chrdev_init+0x0/0xc0 + [ 2.777660] initcall_finish: func=rpmsg_chrdev_init+0x0/0xc0 ret=0 + [ 2.784540] initcall_start: func=rpmsg_ctrldev_init+0x0/0xbc + [ 2.792460] initcall_finish: func=rpmsg_ctrldev_init+0x0/0xbc ret=0 + [ 2.799420] initcall_start: func=rpmsg_ns_init+0x0/0x6c + [ 2.807460] initcall_finish: func=rpmsg_ns_init+0x0/0x6c ret=0 + [ 2.814120] initcall_start: func=kobject_uevent_init+0x0/0x30 + [ 2.821120] initcall_finish: func=kobject_uevent_init+0x0/0x30 ret=0 + [ 2.830980] initcall_level: level=arch + [ 2.835460] initcall_start: func=riscv_cpuinfo_init+0x0/0x78 + [ 2.841960] initcall_finish: func=riscv_cpuinfo_init+0x0/0x78 ret=0 + [ 2.848920] initcall_start: func=vdso_init+0x0/0x4c + [ 2.855560] initcall_finish: func=vdso_init+0x0/0x4c ret=0 + [ 2.861900] initcall_start: func=check_unaligned_access_all_cpus+0x0/0x1d4 + +This makes me wonder if what has just happened is that somehow we have +overwritten the OpenSBI trap for unaligned access and we are now executing some +garbage code in machine mode. + +Based on the OpenSBI regions, I would assume here is where the traps should be: + + Domain0 Region02 : 0x0000000080000000-0x000000008003ffff M: (R,X) S/U: () + +It should be feasible to read the memory from the host where those traps are +placed. + +Here are the FDT reserved areas that U-Boot sees before switching to the kernel: + + => fdt print /reserved-memory + reserved-memory { + #address-cells = <0x00000002>; + #size-cells = <0x00000002>; + ranges; + mmode_resv1@80000000 { + reg = <0x00000000 0x80000000 0x00000000 0x00040000>; + no-map; + }; + mmode_resv0@80040000 { + reg = <0x00000000 0x80040000 0x00000000 0x00020000>; + no-map; + }; + dma_pool@60000000 { + reg = <0x00000000 0x60000000 0x00000000 0x10000000>; + compatible = "shared-dma-pool"; + }; + dma_pool@70000000 { + reg = <0x00000000 0x70000000 0x00000000 0x10000000>; + compatible = "shared-dma-pool"; + }; + }; + +Which match with the ones the kernel parse: + + [ 0.000000] OF: reserved mem: 0x0000000080000000..0x000000008003ffff (256 KiB) nomap non-reusable mmode_resv1@80000000 + [ 0.000000] OF: reserved mem: 0x0000000080040000..0x000000008005ffff (128 KiB) nomap non-reusable mmode_resv0@80040000 + +And with the OpenSBI regions (in reverse order): + + Domain0 Region01 : 0x0000000080040000-0x000000008005ffff M: (R,W) S/U: () + Domain0 Region02 : 0x0000000080000000-0x000000008003ffff M: (R,X) S/U: () + +Let's add some instrumentation in the code that handles the traps in OpenSBI. + +Memory regions: + + 0x8000_0000 to 0x8003_ffff : OpenSBI code + 0x8004_0000 to 0x8005_ffff : OpenSBI data + 0x8010_0000 to 0x801._.... : FDT + 0x8020_0000 to 0x8020_.... : U-Boot (later kernel) + + 0x8020_1000 to 0x81fa_0b87 : Kernel image + 80201000-80cb177f : Kernel code + 81400000-819fffff : Kernel rodata + 81c00000-81f18747 : Kernel data + 81f19000-81fa0b87 : Kernel bss + + 0x8400_0000 to 0x84.._.... M: (R,W) S/U: () Linux kernel + +Okay, so we enter the unaligned access check: + + ![ 2.947680] initcall_start: func=check_unaligned_access_all_cpus+0x0/0x1d4 + $ + !OpenSBI: misaligned load$ + !OpenSBI: misaligned load$ + !OpenSBI: misaligned load$ + !OpenSBI: misaligned load$ + !OpenSBI: misaligned load$ + !OpenSBI: misaligned load$ + !OpenSBI: misaligned load$ + !OpenSBI: misaligned load$ + !OpenSBI: misaligned store$ + !OpenSBI: misaligned store$ + !OpenSBI: misaligned store$ + !OpenSBI: misaligned store$ + !OpenSBI: misaligned store$ + !OpenSBI: misaligned store$ + !OpenSBI: misaligned store$ + !OpenSBI: misaligned store$ + ... + !OpenSBI: misaligned load$ + !OpenSBI: misaligned load$ + !OpenSBI: misaligned load$ + !OpenSBI: misaligned load$ + !OpenSBI: misaligned load$ + !OpenSBI: misaligned load$ + !OpenSBI: misaligned load$ + !OpenSBI: misaligned load$ + !OpenSBI: misaligned store$ + !OpenSBI: misaligned store$ + !OpenSBI: misaligned store$ + !OpenSBI: misaligned store$ + !OpenSBI: misaligned store$ + !OpenSBI: misaligned store$ + !OpenSBI: misaligned store$ + !OpenSBI: misaligned store$ + +But we never arrive to anywhere else. + +Here are the current options that match ALIGN: + + hut% grep ALIGN /nix/store/c9jr35xnh2ffzjvkq8nvzj9i2siz1n4s-linux-config-riscv64-unknown-linux-gnu-6.9.7 + CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW=y + CONFIG_RISCV_MISALIGNED=y + CONFIG_RISCV_PROBE_UNALIGNED_ACCESS=y + # CONFIG_RISCV_EMULATED_UNALIGNED_ACCESS is not set + # CONFIG_RISCV_SLOW_UNALIGNED_ACCESS is not set + # CONFIG_RISCV_EFFICIENT_UNALIGNED_ACCESS is not set + CONFIG_HAVE_64BIT_ALIGNED_ACCESS=y + CONFIG_FUNCTION_ALIGNMENT=0 + CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC=y + CONFIG_CMA_ALIGNMENT=8 + # CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B is not set + +We may want to set these two: + + # CONFIG_RISCV_EMULATED_UNALIGNED_ACCESS is not set + # CONFIG_RISCV_SLOW_UNALIGNED_ACCESS is not set + +And disable + + CONFIG_RISCV_PROBE_UNALIGNED_ACCESS=y + +So we don't perform the probing. However, this may bite later, so probably is a +better idea to debug it now. I could bound where it is failing as it doesn't +seem to be outside the speed check function. + + +Those 8 pairs of load and store calls seem to match this assembly function in +arch/riscv/kernel/copy-unaligned.S: + + /* void __riscv_copy_bytes_unaligned(void *, const void *, size_t) */ + /* Performs a memcpy without aligning buffers, using only byte accesses. */ + /* Note: The size is truncated to a multiple of 8 */ + SYM_FUNC_START(__riscv_copy_bytes_unaligned) + andi a4, a2, ~(8-1) + beqz a4, 2f + add a3, a1, a4 + 1: + lb a4, 0(a1) + lb a5, 1(a1) + lb a6, 2(a1) + lb a7, 3(a1) + lb t0, 4(a1) + lb t1, 5(a1) + lb t2, 6(a1) + lb t3, 7(a1) + sb a4, 0(a0) + sb a5, 1(a0) + sb a6, 2(a0) + sb a7, 3(a0) + sb t0, 4(a0) + sb t1, 5(a0) + sb t2, 6(a0) + sb t3, 7(a0) + addi a0, a0, 8 + addi a1, a1, 8 + bltu a1, a3, 1b + + 2: + ret + SYM_FUNC_END(__riscv_copy_bytes_unaligned) + +Booted again with simpler output: + + + > + + +So here is the simplified output using the format `` where X is one character +that differentiates the type of trap. + +Based on the code that performs the check: + + /* + * For a fixed amount of time, repeatedly try the function, and take + * the best time in cycles as the measurement. + */ + while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { + start_cycles = get_cycles64(); + /* Ensure the CSR read can't reorder WRT to the copy. */ + mb(); + __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); + /* Ensure the copy ends before the end time is snapped. */ + mb(); + end_cycles = get_cycles64(); + if ((end_cycles - start_cycles) < word_cycles) + word_cycles = end_cycles - start_cycles; + } + + byte_cycles = -1ULL; + __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); + start_jiffies = jiffies; + while ((now = jiffies) == start_jiffies) + cpu_relax(); + + while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { + start_cycles = get_cycles64(); + mb(); + __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); + mb(); + end_cycles = get_cycles64(); + if ((end_cycles - start_cycles) < byte_cycles) + byte_cycles = end_cycles - start_cycles; + } + + preempt_enable(); + + /* Don't divide by zero. */ + if (!word_cycles || !byte_cycles) { + pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n", + cpu); + + return 0; + } + + if (word_cycles < byte_cycles) + speed = RISCV_HWPROBE_MISALIGNED_FAST; + + ratio = div_u64((byte_cycles * 100), word_cycles); + pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n", + cpu, + ratio / 100, + ratio % 100, + (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow"); + +The fact that the print line with the "Ratio of byte access..." doesn't appear, +puts the hang place at some point in between the check and the `pr_info()` call. + +If this is a problem on the OpenSBI side, we can bisect the code to find out +where the problem was introduced. But first, I would have to try OpenSBI 1.4 and +ensure we can reproduce it. + +Okay so with OpenSBI 1.4 we have a hang in the same place. + +Lets compare the domain regions: + +With OpenSBI 1.4 `fpga/alveo_ox`: + + Domain0 Region00 : 0x0000000040000000-0x0000000040000fff M: (I,R,W) S/U: (R,W) + Domain0 Region01 : 0x0000000080040000-0x000000008004ffff M: (R,W) S/U: () + Domain0 Region02 : 0x0000000080000000-0x000000008003ffff M: (R,X) S/U: () + Domain0 Region03 : 0x0000000000000000-0xffffffffffffffff M: () S/U: (R,W,X) + Domain0 Next Address : 0x0000000080200000 + Domain0 Next Arg1 : 0x0000000080017000 + +With OpenSBI 1.4 `generic`: + + Domain0 Region00 : 0x0000000040001000-0x0000000040001fff M: (I,R,W) S/U: (R,W) + Domain0 Region01 : 0x0000000080040000-0x000000008005ffff M: (R,W) S/U: () + Domain0 Region02 : 0x0000000080000000-0x000000008003ffff M: (R,X) S/U: () + Domain0 Region03 : 0x0000000040800000-0x0000000040bfffff M: (I,R,W) S/U: (R,W) + Domain0 Region04 : 0x0000000000000000-0xffffffffffffffff M: () S/U: (R,W,X) + Domain0 Next Address : 0x0000000080200000 + Domain0 Next Arg1 : 0x0000000080100000 + +With OpenSBI 1.5 `generic`: + + Domain0 Region00 : 0x0000000040001000-0x0000000040001fff M: (I,R,W) S/U: (R,W) + Domain0 Region01 : 0x0000000080040000-0x000000008005ffff M: (R,W) S/U: () + Domain0 Region02 : 0x0000000080000000-0x000000008003ffff M: (R,X) S/U: () + Domain0 Region03 : 0x0000000040800000-0x0000000040bfffff M: (I,R,W) S/U: (R,W) + Domain0 Region04 : 0x0000000000000000-0xffffffffffffffff M: () S/U: (R,W,X) + Domain0 Next Address : 0x0000000080200000 + Domain0 Next Arg1 : 0x0000000080100000 + +So we have several changes. + +First, the PLIC has a new memory map. Let's comment it out in the device tree, +and see what happens. + +Hangs in the same place, but now we don't have the 0x40800000 region with +OpenSBI 1.4 generic: + + Domain0 Region00 : 0x0000000040001000-0x0000000040001fff M: (I,R,W) S/U: (R,W) + Domain0 Region01 : 0x0000000080040000-0x000000008005ffff M: (R,W) S/U: () + Domain0 Region02 : 0x0000000080000000-0x000000008003ffff M: (R,X) S/U: () + Domain0 Region03 : 0x0000000000000000-0xffffffffffffffff M: () S/U: (R,W,X) + Domain0 Next Address : 0x0000000080200000 + Domain0 Next Arg1 : 0x0000000080100000 + +Let's try to move the serial region to 0x40000000, although that seems to be +wrong. + +Yep, that causes OpenSBI to not emit any message in the console, so let's put it +back in 0x40001000. + +Next, we may want to place the FDT in the previous location, at 0x80017000. + + => setenv bootargs "root=/dev/ram0 loglevel=7 debug rw earlycon=sbi console=hvc0 trace_event=initcall:* trace_options=sym-addr tp_printk trace_buf_size=1M" + => setenv ramdisk_size 12614846 + => booti ${kernel_addr_r} ${ramdisk_addr_r}:${ramdisk_size} ${fdtcontroladdr} + +Hmm, continues to hang in the same point: + + [ 2.852200] initcall_start: func=check_unaligned_access_all_cpus+0x0/0x1d4 + +With this regions: + + OpenSBI v1.4 + ____ _____ ____ _____ + / __ \ / ____| _ \_ _| + | | | |_ __ ___ _ __ | (___ | |_) || | + | | | | '_ \ / _ \ '_ \ \___ \| _ < | | + | |__| | |_) | __/ | | |____) | |_) || |_ + \____/| .__/ \___|_| |_|_____/|____/_____| + | | + |_| + + Platform Name : Barcelona Supercomputing Center - Lagarto Ox (NixOS) + Platform Features : medeleg + Platform HART Count : 1 + Platform IPI Device : --- + Platform Timer Device : --- @ 0Hz + Platform Console Device : uart8250 + Platform HSM Device : --- + Platform PMU Device : --- + Platform Reboot Device : --- + Platform Shutdown Device : --- + Platform Suspend Device : --- + Platform CPPC Device : --- + Firmware Base : 0x80000000 + Firmware Size : 323 KB + Firmware RW Offset : 0x40000 + Firmware RW Size : 67 KB + Firmware Heap Offset : 0x48000 + Firmware Heap Size : 35 KB (total), 2 KB (reserved), 8 KB (used), 24 KB (free) + Firmware Scratch Size : 4096 B (total), 288 B (used), 3808 B (free) + Runtime SBI Version : 2.0 + + Domain0 Name : root + Domain0 Boot HART : 0 + Domain0 HARTs : 0* + Domain0 Region00 : 0x0000000040001000-0x0000000040001fff M: (I,R,W) S/U: (R,W) + Domain0 Region01 : 0x0000000080040000-0x000000008005ffff M: (R,W) S/U: () + Domain0 Region02 : 0x0000000080000000-0x000000008003ffff M: (R,X) S/U: () + Domain0 Region03 : 0x0000000000000000-0xffffffffffffffff M: () S/U: (R,W,X) + Domain0 Next Address : 0x0000000080200000 + Domain0 Next Arg1 : 0x0000000080017000 + Domain0 Next Mode : S-mode + Domain0 SysReset : yes + Domain0 SysSuspend : yes + +I will also try 0x80013000 as address as I saw it being used before, but I don't +think it is the problem. It must be related with how the initialization is now +different. + + OpenSBI v1.4 + ____ _____ ____ _____ + / __ \ / ____| _ \_ _| + | | | |_ __ ___ _ __ | (___ | |_) || | + | | | | '_ \ / _ \ '_ \ \___ \| _ < | | + | |__| | |_) | __/ | | |____) | |_) || |_ + \____/| .__/ \___|_| |_|_____/|____/_____| + | | + |_| + + sbi_trap_error: hart0: illegal instruction handler failed (error -2) + sbi_trap_error: hart0: mcause=0x0000000000000002 mtval=0x0000000000000000 + sbi_trap_error: hart0: mepc=0x00000000800131a4 mstatus=0x8000000a00007800 + sbi_trap_error: hart0: ra=0x0000000080013bb0 sp=0x0000000080046e70 + sbi_trap_error: hart0: gp=0x0000000000000000 tp=0x0000000080047000 + sbi_trap_error: hart0: s0=0x0000000080046eb0 s1=0x0000000000000118 + sbi_trap_error: hart0: a0=0x0000000080013000 a1=0x0000000080046e74 + sbi_trap_error: hart0: a2=0x00000000800133c0 a3=0x0000000000000600 + sbi_trap_error: hart0: a4=0x000000000000062c a5=0x0000000000000284 + sbi_trap_error: hart0: a6=0x0000000000000000 a7=0x000000000000002c + sbi_trap_error: hart0: s2=0x0000000080013000 s3=0x0000000000000000 + sbi_trap_error: hart0: s4=0x0000000080047000 s5=0x0000000000000000 + sbi_trap_error: hart0: s6=0x0000000000000000 s7=0x0000000000000001 + sbi_trap_error: hart0: s8=0x0000000000002000 s9=0x00000000800436f0 + sbi_trap_error: hart0: s10=0x0000000000000000 s11=0x0000000000000000 + sbi_trap_error: hart0: t0=0x0000000080009796 t1=0x0000000000000268 + sbi_trap_error: hart0: t2=0x0000000000001000 t3=0x00000000000000d7 + sbi_trap_error: hart0: t4=0x000000007fffffff t5=0x0000000000000001 + sbi_trap_error: hart0: t6=0x0000000000000004 + +With 0x80013000 we seem to enter OpenSBI code region. + +Let's try with openpiton again, without the FDT address. We should disable the +secondary console from the DT too. + +No output, lets enable the PLIC again in the DT. Same. + +Probably we have a better chance to fix it in the generic platform. + +Let's use a very far away address for the FDT just in case: `0x0_c000_0000` + +This one doesn't work: + + => setenv bootargs "root=/dev/ram0 loglevel=7 debug rw earlycon=sbi console=hvc0 ftrace=function ftrace_filter=* tp_printk" + +This one yes: + + => setenv bootargs "root=/dev/ram0 loglevel=7 debug rw earlycon=sbi console=hvc0 trace_event=*:* tp_printk trace_buf_size=1M" + => setenv ramdisk_size 12614846 + => booti ${kernel_addr_r} ${ramdisk_addr_r}:${ramdisk_size} ${fdtcontroladdr} + + [ 164.849640] initcall_finish: func=vdso_init+0x0/0x4c ret=0 + [ 164.849960] console: initcall_finish: func=vdso_init+0x0/0x4c ret=0 + [ 164.862820] initcall_start: func=check_unaligned_access_all_cpus+0x0/0x1d4 + [ 164.863140] console: initcall_start: func=check_unaligned_access_all_cpus+0x0/0x1d4 + [ 164.878860] kmalloc: call_site=check_unaligned_access_all_cpus+0xa8/0x1d4 ptr=(____ptrval____) bytes_req=8 bytes_alloc=8 gfp_flags=GFP_KERNEL|__GFP_ZERO node=-1 accounted=false + [ 164.879200] console: kmalloc: call_site=check_unaligned_access_all_cpus+0xa8/0x1d4 ptr=(____ptrval____) bytes_req=8 bytes_alloc=8 gfp_flags=GFP_KERNEL|__GFP_ZERO node=-1 accounted=false + [ 164.912380] mm_page_alloc_zone_locked: page=(____ptrval____) pfn=0x83d28 order=2 migratetype=0 percpu_refill=1 + [ 164.912680] console: mm_page_alloc_zone_locked: page=(____ptrval____) pfn=0x83d28 order=2 migratetype=0 percpu_refill=1 + [ 164.934340] mm_page_alloc_zone_locked: page=(____ptrval____) pfn=0x83d2c order=2 migratetype=0 percpu_refill=1 + [ 164.934660] console: mm_page_alloc_zone_locked: page=(____ptrval____) pfn=0x83d2c order=2 migratetype=0 percpu_refill=1 + [ 164.956300] mm_page_alloc_zone_locked: page=(____ptrval____) pfn=0x83d30 order=2 migratetype=0 percpu_refill=1 + [ 164.956620] console: mm_page_alloc_zone_locked: page=(____ptrval____) pfn=0x83d30 order=2 migratetype=0 percpu_refill=1 + [ 164.978280] mm_page_alloc_zone_locked: page=(____ptrval____) pfn=0x83d34 order=2 migratetype=0 percpu_refill=1 + [ 164.978600] console: mm_page_alloc_zone_locked: page=(____ptrval____) pfn=0x83d34 order=2 migratetype=0 percpu_refill=1 + [ 165.000260] mm_page_alloc_zone_locked: page=(____ptrval____) pfn=0x83d38 order=2 migratetype=0 percpu_refill=1 + [ 165.000560] console: mm_page_alloc_zone_locked: page=(____ptrval____) pfn=0x83d38 order=2 migratetype=0 percpu_refill=1 + [ 165.022200] mm_page_alloc_zone_locked: page=(____ptrval____) pfn=0x83d3c order=2 migratetype=0 percpu_refill=1 + [ 165.022540] console: mm_page_alloc_zone_locked: page=(____ptrval____) pfn=0x83d3c order=2 migratetype=0 percpu_refill=1 + [ 165.044200] mm_page_alloc_zone_locked: page=(____ptrval____) pfn=0x83d40 order=2 migratetype=0 percpu_refill=1 + [ 165.044500] console: mm_page_alloc_zone_locked: page=(____ptrval____) pfn=0x83d40 order=2 migratetype=0 percpu_refill=1 + [ 165.066220] mm_page_alloc: page=(____ptrval____) pfn=0x83d28 order=2 migratetype=0 gfp_flags=GFP_KERNEL + [ 165.066540] console: mm_page_alloc: page=(____ptrval____) pfn=0x83d28 order=2 migratetype=0 gfp_flags=GFP_KERNEL + +## 2024-08-01 + +Now that we have a new bitstream with a CLINT connected to a PLIC input, we may +be able to generate an interrupt. + +Here is the comment where I gather the pieces: + +---8<---{{{ + +From https://gitlab.bsc.es/hwdesign/rtl/core-tile/sa-fpga/ I can see that the +auxiliary timer [is in fact another +CLINT](https://gitlab.bsc.es/hwdesign/rtl/core-tile/sa-fpga/-/blob/10ba8b2a11ef105d7cda065e13838a3d28f3c951/fpga_core_bridge/rtl/fpga_core_bridge.sv#L685). + +I don't have access to the [hlib +repository](https://gitlab.bsc.es/hwdesign/hlib.git) (@jmendoza can I get access +to it?) to see the CLINT definition, but based on [this +CLINT](https://github.com/openhwgroup/cva6/blob/master/corev_apu/clint/clint.sv) +and [this one](https://github.com/pulp-platform/clint/blob/master/src/clint.sv) +I can estimate some of the previous information: + +> - The information on which port number of the PLIC the timer is connected to. + +https://gitlab.bsc.es/hwdesign/rtl/core-tile/sa-fpga/-/blob/main/fpga_core_bridge/rtl/fpga_core_bridge.sv#L1114 + +``` + plic #( + .PARAMETER_BITWIDTH (7), + .NUM_TARGETS (1), + .NUM_SOURCES (4) + ) plic_inst ( + .clk_i (clk_i), + .rstn_i (reset), + .irq_sources_i ({plic_timer_eirq,eth_irq,uart1_irq}), + .eip_targets_o (irq), +``` + +If I read it from right to left starting at 1, it should be **at 4**, as the +`eth_irq` has two "wires". + + +> - The memory address of the timer and the mapped registers, so I can see it +> increasing its value. I think the `aux_timer` you had in the past would be +> fine. + +https://gitlab.bsc.es/hwdesign/rtl/core-tile/sa-fpga/-/blob/main/fpga_core_bridge/rtl/local_includes/defines.svh#L33-36 + +``` +//Size: 64KB +`define AUX_TIMER_XBAR_ID 2 +`define AUX_TIMER_BASE_ADDR 64'h0000_0000_4001_0000 // Need to be this space because we use a clint as aux timer +`define AUX_TIMER_END_ADDR 64'h0000_0000_4001_FFFF +``` + +> - The specific operations I need to do in machine mode to configure the timer +> to fire at 1 Hz (probably setting two registers). + +Based on the source of the CLINT, **only one interrupt will be generated** after +setting the mtimecmp register to something larger than the mtime register. Then +I suspect I would have to make the interrupt run some code to rearm it again by +modifying the mtimecmp register to some value in the future: + +``` +// ----------------------------- +// IRQ Generation +// ----------------------------- +// The mtime register has a 64-bit precision on all RV32, RV64, and RV128 systems. Platforms provide a 64-bit +// memory-mapped machine-mode timer compare register (mtimecmp), which causes a timer interrupt to be posted when the +// mtime register contains a value greater than or equal (mtime >= mtimecmp) to the value in the mtimecmp register. +// The interrupt remains posted until it is cleared by writing the mtimecmp register. The interrupt will only be taken +// if interrupts are enabled and the MTIE bit is set in the mie register. +always_comb begin : irq_gen + // check that the mtime cmp register is set to a meaningful value + for (int unsigned i = 0; i < NR_CORES; i++) begin + if (mtime_q >= mtimecmp_q[i]) begin + timer_irq_o[i] = 1'b1; + end else begin + timer_irq_o[i] = 1'b0; + end + end +end +``` + +I could ensure that an interrupt has been fired by reading the mtime and +mtimecmp values, and checking that mtime > mtimecmp. + +Now I only need to find a bitstream that has been generated with +https://gitlab.bsc.es/hwdesign/rtl/core-tile/sa-fpga/-/commit/10ba8b2a11ef105d7cda065e13838a3d28f3c951. + + +This may work: + +https://gitlab.bsc.es/hwdesign/fpga/integration-lab/fpga-shell/-/jobs/968583/raw + +> Submodule path 'sa-fpga': checked out '12b77cb50cf1c416f107d4c7ab1c52d7b5e59056' + +Which is based on fpga-shell https://gitlab.bsc.es/hwdesign/fpga/integration-lab/fpga-shell/-/commit/01265d197f256bce2c7e82d21c7f4bf5dcb44e68 + +Here is the bitstream job: https://gitlab.bsc.es/hwdesign/fpga/integration-lab/fpga-shell/-/jobs/968585 + +And the bitstream: [artifacts.zip](/uploads/d8240a779cd485771b9e3d0147e342d1/artifacts.zip) + +And full log: [job.log](/uploads/a4215e4d039065b77f7a2d2b1403e475/job.log) + +The memory map would need a bit of adjustment in the device tree, but to play with the timer in machine mode not much is needed. + +I think I have all the pieces now. + +---8<---}}} + +I will try with the last bitstream that I already had compiled, as I will have +to rebuild the required packages in nix. + +To compute the memory position of the registers: + + `define AUX_TIMER_XBAR_ID 2 + `define AUX_TIMER_BASE_ADDR 64'h0000_0000_4001_0000 // Need to be this space because we use a clint as aux timer + `define AUX_TIMER_END_ADDR 64'h0000_0000_4001_FFFF + + localparam logic [15:0] MSIP_BASE = 16'h0; + localparam logic [15:0] MTIMECMP_BASE = 16'h4000; + localparam logic [15:0] MTIME_BASE = 16'hbff8; + +So, the base address 0x40010000 and the first MTIME at 0xbff8 would give us a +timer at 0x4001bff8. + +Here it is: + + => md 0x4001bff8 1 + 4001bff8: 006e65b8 .en. + => md 0x4001bff8 1 + 4001bff8: 006e9a26 &.n. + => md 0x4001bff8 1 + 4001bff8: 006ebae1 ..n. + => md 0x4001bff8 1 + 4001bff8: 006eda45 E.n. + => md 0x4001bff8 1 + 4001bff8: 006ef9d4 ..n. + => md 0x4001bff8 1 + 4001bff8: 006f1abb ..o. + +Now, the MTIMECMP should be at 0x40014000, which should be 0. + + => md 0x40014000 1 + 40014000: 00000000 .... + +Good. + +Now, I suspect the MSIP is not used, so it should be 0 at 0x40010000 too: + + => md 0x40010000 1 + 40010000: 00000000 .... + +Nice. + +Just for testing, let's see if I can make the timer cause any change in the MSIP +register by setting the MTIMECMP to a value: + + => mw 0x40014000 0x01700000 # Write the MTIMECMP + => md 0x40014000 1 + 40014000: 01700000 ..p. + => md 0x4001bff8 1 + 4001bff8: 016da81a ..m. + => md 0x40010000 1 + 40010000: 00000000 .... + => md 0x4001bff8 1 + 4001bff8: 016f947c |.o. + => md 0x4001bff8 1 + 4001bff8: 016fff96 ..o. + => md 0x4001bff8 1 + 4001bff8: 01704367 gCp. # Now we passed it + => md 0x40010000 1 + 40010000: 00000000 .... # But MSIP is still 0 + +As expected, nothing happens. We cannot monitor the interrupt line from the +timer itself. + +Now, let see if we can inspect the state of the PLIC. + +From the `plic_interface` I can see where are the memory addresses of the +registers exposed. + +The PLIC is mapped here: + + //Size: 4MB + `define PLIC_XBAR_ID 5 + `define PLIC_BASE_ADDR 64'h0000_0000_4080_0000 + `define PLIC_END_ADDR 64'h0000_0000_40BF_FFFF + +There are several ways in which the interrupts are not forwarded to the +destination, and several destinations. The PLIC specification is a good resource +to understand it: + + https://github.com/riscv/riscv-plic-spec + +This is important: + +> The interrupt gateways are responsible for converting global interrupt signals +> into a common interrupt request format, and for controlling the flow of +> interrupt requests to the PLIC core. At most one interrupt request per +> interrupt source can be pending in the PLIC core at any time, indicated by +> setting the source’s IP bit. The gateway only forwards a new interrupt request +> to the PLIC core after receiving notification that the interrupt handler +> servicing the previous interrupt request from the same source has completed. + +So, there cannot be any pending interrupt, otherwise no more interrupts will be +sent to the core. + +Assuming the PLIC uses the standard memory layout, we should find: + + base + 0x000000: Reserved (interrupt source 0 does not exist) + base + 0x000004: Interrupt source 1 priority + base + 0x000008: Interrupt source 2 priority + +Which they should begin at 0x40800000. + + => md 0x40800000 8 + 40800000: 00000000 00000000 00000000 00000000 ................ + 40800010: 00000000 00000000 00000000 00000000 ................ + +All the priorities are set to 0. + +Let's see the pending interrupts: + + base + 0x000FFC: Interrupt source 1023 priority + base + 0x001000: Interrupt Pending bit 0-31 + base + 0x00107C: Interrupt Pending bit 992-1023 + +They should be at 0x40801000: + + => md 0x40801000 8 + 40801000: 00000010 00000000 00000000 00000000 ................ + 40801010: 00000000 00000000 00000000 00000000 ................ + +Whoa, look at that. + + 4321 + 0x00000010 = 10000 + | | + | int 0 (reserved) + int 4 = timer + +We got the interrupt 4 pending in context 0! + +Other context don't seem to see anything: + + => md 0x40801080 1 + 40801080: 00000000 .... + => md 0x40801100 1 + 40801100: 00000000 .... + => md 0x40801180 1 + 40801180: 00000000 .... + => md 0x40801200 1 + 40801200: 00000000 .... + => md 0x40801280 1 + 40801280: 00000000 .... + => md 0x40801300 1 + 40801300: 00000000 .... + => md 0x40801380 1 + 40801380: 00000000 .... + +So, as the priority is 0, this means it is ignored: + +> If PLIC supports Interrupt Priorities, then each PLIC interrupt source can be +> assigned a priority by writing to its 32-bit memory-mapped priority register. +> A priority value of 0 is reserved to mean "never interrupt" and effectively +> disables the interrupt. Priority 1 is the lowest active priority while the +> maximum level of priority depends on PLIC implementation. Ties between global +> interrupts of the same priority are broken by the Interrupt ID; interrupts +> with the lowest ID have the highest effective priority. + +Let's claim the interrupt, by just performing a read from 0x40a00004: + + => md 0x40801000 1 + 40801000: 00000010 .... + => md 0x40a00004 1 + 40a00004: 00000000 .... + => md 0x40801000 1 + 40801000: 00000010 .... + +So, it continues to be pending. + +We have to write the completed interrupt, by writing the number 4 to the same +register: + + => mw 0x40a00004 4 + => md 0x40801000 1 + 40801000: 00000010 .... + +Still not cleared. + +Let's try making the MTIMECMP value much higher than MTIME: + + => md 0x40014000 1 + 40014000: 01700000 ..p. + => md 0x4001bff8 1 + 4001bff8: 03a4584b KX.. + => mw 0x40014000 0xaaaaaaaa + => md 0x40014000 1 + 40014000: aaaaaaaa .... + => md 0x4001bff8 1 + 4001bff8: 03abc84d M... + +So... the ID that must be written to the completion register is not the +interrupt number, but the value read from the claim register, which is 0. + + => mw 0x40a00004 0 + => md 0x40801000 1 + 40801000: 00000010 .... + +Still, nothing. + +All interrupts are disabled: + + => md 0x40802000 4 + 40802000: 00000000 00000000 00000000 00000000 ................ + +Let's try enabling the interrupt 4, by writting: + + => mw 0x40802000 0x10 + => md 0x40802000 1 + 40802000: 00000010 .... + => md 0x40801000 1 + 40801000: 00000010 .... + +Now, let's set the priority to something else than 0. + +First, lets make sure that the context 0 threshold priority is set to 0, so we +allow all interrupts: + + 0x200000: Priority threshold for context 0 + + => md 0x40a00000 1 + 40a00000: 00000007 .... + +Oh, so we are only receiving interrupts with priority 7 or higher. But our +interrupt has priority 0! + + => md 0x40800004 1 + 40800004: 00000000 .... + +Let's make the threshold 0 and our interrupt have priority 1. + + => mw 0x40a00000 0 + => mw 0x40800004 1 + => md 0x40800004 1 + 40800004: 00000001 .... + => md 0x40a00000 + 40a00000: 00000000 .... + +Not let's see again the interrupt state: + + => md 0x40801000 1 + 40801000: 00000010 .... + +Still on. + +Let's read the claim register again. + + => md 0x40a00004 + 40a00004: 00000000 .... + +Still 0, let's try to complete it: + + => mw 0x40a00004 0 + => md 0x40801000 1 + 40801000: 00000010 .... + +Nope, still pending. + +What, what the hell. The threshold value has changed to 1: + + => md 0x40800004 1 + 40800004: 00000001 .... + => md 0x40a00000 1 + 40a00000: 00000001 .... <-- this was 0 + +Let's configure the interruption priority to something bigger than 1. + +Wait, I put the priority in the wrong source: + + 0x000000: Reserved (interrupt source 0 does not exist) + 0x000004: Interrupt source 1 priority + 0x000008: Interrupt source 2 priority + +Our timer should be the source 4, so 12 or 0xc: + + => md 0x4080000c 1 + 4080000c: 00000000 .... + +(This is wrong, should be 0x40800010, see below) + +Let's make it have priority 0xd: + + => mw 0x4080000c 0xd + => md 0x4080000c 1 + 4080000c: 0000000d .... + +Something weird is going on with the priority register? + + => md 0x40a00000 1 + 40a00000: 00000000 .... + => md 0x40a00000 1 + 40a00000: 0000000d .... + => md 0x40a00000 1 + 40a00000: 0000000d .... + => md 0x40a00000 1 + 40a00000: 0000000d .... + => md 0x40a00000 1 + 40a00000: 0000000d .... + +Let's see the claim register, which should be in the next word: + + => md 0x40a00004 1 + 40a00004: 00000004 .... + +Yes! Now I can see the claim register with a proper ID. Let's complete this +interrupt by writing the 4 back to that register: + + => mw 0x40a00004 4 + => md 0x40801000 1 + 40801000: 00000000 .... + +Perfect! It properly caused the pending interrupt to disappear. + +Let's try now setting the MTIMECMP to something smaller than the MTIME, so it +causes an interrupt. With a value 0 should always work, but lets choose a non +zero value: + + => md 0x40014000 + 40014000: aaaaaaaa .... + => mw 0x40014000 00aaaaaa + => md 0x40014000 + 40014000: 00aaaaaa .... + => md 0x4001bff8 + 4001bff8: 06211a0c ..!. + => md 0x40801000 1 + 40801000: 00000010 .... + +Perfect! It causes the interrupt to appear as pending. + +So, using the context 0, we can properly see the interrupt pending, claim it and +complete it. But the context 0 is not used in OpenSBI, only the 9 and 11: + +From `include/sbi/riscv_encoding.h`: + + #define IRQ_S_SOFT 1 + #define IRQ_VS_SOFT 2 + #define IRQ_M_SOFT 3 + #define IRQ_S_TIMER 5 + #define IRQ_VS_TIMER 6 + #define IRQ_M_TIMER 7 + #define IRQ_S_EXT 9 + #define IRQ_VS_EXT 10 + #define IRQ_M_EXT 11 + #define IRQ_S_GEXT 12 + #define IRQ_PMU_OVF 13 + +And from `lib/utils/irqchip/fdt_irqchip_plic.c`: + + static int irqchip_plic_update_hartid_table(void *fdt, int nodeoff, + struct plic_data *pd) + { + const fdt32_t *val; + u32 phandle, hwirq, hartid; + struct sbi_scratch *scratch; + int i, err, count, cpu_offset, cpu_intc_offset; + + val = fdt_getprop(fdt, nodeoff, "interrupts-extended", &count); + if (!val || count < sizeof(fdt32_t)) + return SBI_EINVAL; + count = count / sizeof(fdt32_t); + + for (i = 0; i < count; i += 2) { + phandle = fdt32_to_cpu(val[i]); + hwirq = fdt32_to_cpu(val[i + 1]); + + cpu_intc_offset = fdt_node_offset_by_phandle(fdt, phandle); + if (cpu_intc_offset < 0) + continue; + + cpu_offset = fdt_parent_offset(fdt, cpu_intc_offset); + if (cpu_offset < 0) + continue; + + err = fdt_parse_hart_id(fdt, cpu_offset, &hartid); + if (err) + continue; + + scratch = sbi_hartid_to_scratch(hartid); + if (!scratch) + continue; + + plic_set_hart_data_ptr(scratch, pd); + switch (hwirq) { + case IRQ_M_EXT: + plic_set_hart_mcontext(scratch, i / 2); + break; + case IRQ_S_EXT: + plic_set_hart_scontext(scratch, i / 2); + break; + } + } + + return 0; + } + +So, lets try to do the same, but with the context 11 for machine mode +`IRQ_M_EXT`. + +Let's compute the address of the input source for context 11: + + base + 0x002000: Enable bits for sources 0-31 on context 0 + base + 0x002004: Enable bits for sources 32-63 on context 0 + ... + base + 0x00207C: Enable bits for sources 992-1023 on context 0 + base + 0x002080: Enable bits for sources 0-31 on context 1 + base + 0x002084: Enable bits for sources 32-63 on context 1 + ... + base + 0x0020FC: Enable bits for sources 992-1023 on context 1 + base + 0x002100: Enable bits for sources 0-31 on context 2 + base + 0x002104: Enable bits for sources 32-63 on context 2 + ... + base + 0x00217C: Enable bits for sources 992-1023 on context 2 + ... + base + 0x1F1F80: Enable bits for sources 0-31 on context 15871 + base + 0x1F1F84: Enable bits for sources 32-63 on context 15871 + base + 0x1F1FFC: Enable bits for sources 992-1023 on context 15871 + ... + +It should be: + + >>> hex(0x40800000 + 0x2000 + (11 * 0x80)) + '0x40802580' + +They are all disabled: + + => md 0x40802580 + 40802580: 00000000 .... + +So, let's enable the source 4 by writing 0x10 + + => mw 0x40802580 0x10 + => md 0x40801000 1 + 40801000: 00000010 .... + +Now, let's check the context 11 priority threshold: + + 0x200000: Priority threshold for context 0 + 0x201000: Priority threshold for context 1 + 0x202000: Priority threshold for context 2 + 0x203000: Priority threshold for context 3 + +The priority threshold for context 11 should be at: + + >>> hex(0x40800000 + 0x200000 + (11 * 0x1000)) + '0x40a0b000' + + => md 0x40a0b000 + 40a0b000: 00000000 .... + +It has value 0, so all interrupts with non-zero priority should pass: + +> For example, a threshold value of zero permits all interrupts with non-zero +> priority. + +Let's see the priority of source 4 in context 11: + + 0x000000: Reserved (interrupt source 0 does not exist) + 0x000004: Interrupt source 1 priority + 0x000008: Interrupt source 2 priority + ... + 0x000FFC: Interrupt source 1023 priority + +The address should be at: + + >>> hex(0x40800000 + (4 * 0x4)) + + => md 0x40800010 + 40800010: 00000000 .... + +It has priority 0, so it would never work. Let's make it priority 1: + + => mw 0x40800010 1 + => md 0x40800010 1 + 40800010: 00000001 + +Let's check the pending interrupts: + + => md 0x40801000 1 + 40801000: 00000010 .... + +It is still pending, so let's clear it my setting the MTIMECMP to a large value. + + => md 0x40014000 + 40014000: 00aaaaaa .... + => mw 0x40014000 0xaaaaaaaa + => md 0x40014000 + 40014000: aaaaaaaa .... + => md 0x4001bff8 + 4001bff8: 0e8e6066 f`.. + => md 0x4001bff8 + 4001bff8: 0e8ea4c9 .... + => md 0x4001bff8 + 4001bff8: 0e8ece24 $... + +Now, let's claim and complete it for the context 0 which was already enabled +from the test before. + + => md 0x40a00004 1 + 40a00004: 00000004 .... + => mw 0x40a00004 4 + => md 0x40801000 1 + 40801000: 00000000 .... + +Perfect, now it is not pending anymore. + +Now, the context 0 is still enabled, so the interruptions may be sent there +instead of context 11. So let's disable the context 0 first. + + => mw 0x40802000 0 + => md 0x40802000 1 + 40802000: 00000000 .... + +Now let's fire the MTIMECMP and see if OpenSBI sees a machine trap. + + => md 0x40014000 1 + 40014000: aaaaaaaa .... + => mw 0x40014000 00aaaaaa + => md 0x40014000 1 + 40014000: 00aaaaaa .... + +Nothing happened. + +The interrupt is pending: + + => md 0x40801000 1 + 40801000: 00000010 .... + +The claim on context 0 returns 0, so not interrupt there which is expected: + + => md 0x40a00004 1 + 40a00004: 00000000 .... + +Let's compute the claim register on context 11: + + 0x200004: Interrupt Claim Process for context 0 + 0x201004: Interrupt Claim Process for context 1 + 0x202004: Interrupt Claim Process for context 2 + 0x203004: Interrupt Claim Process for context 3 + ... + + >>> hex(0x40800000 + 0x200004 + (11 * 0x1000)) + '0x40a0b004' + + => md 0x40a0b004 1 + 40a0b004: 00000000 .... + +Hmm, there is no claim ID. + +So, I checked again, and I cannot enable the interrupt on context 11: + + => md 0x40802580 1 + 40802580: 00000000 .... + => mw 0x40802580 0x10 + => md 0x40802580 1 + 40802580: 00000000 .... + + +Note, the first value is 0 and must be claimed: + + => md 0x40801000 1 + 40801000: 00000010 .... + => md 0x40802000 1 + 40802000: 00000010 + => md 0x40a00004 1 + 40a00004: 00000000 .... + => mw 0x40a00004 1 + => mw 0x40a00004 4 + => md 0x40a00004 1 + 40a00004: 00000004 .... + => mw 0x40a00004 4 + => md 0x40a00004 1 + 40a00004: 00000004 .... + => mw 0x40a00004 4 + => md 0x40a00004 1 + 40a00004: 00000004 .... + +## 2024-08-02 + +I see that the MIE sets the machine mode external interrupt enable in this way: + + int sbi_irqchip_init(struct sbi_scratch *scratch, bool cold_boot) + { + int rc; + const struct sbi_platform *plat = sbi_platform_ptr(scratch); + + rc = sbi_platform_irqchip_init(plat, cold_boot); + if (rc) + return rc; + + if (ext_irqfn != default_irqfn) + csr_set(CSR_MIE, MIP_MEIP); + + return 0; + } + +Only if the external interrupt function is not the default one. But for the +PLIC, it looks like the default one is being used. So let's enable the machine +mode interrupts unconditionally. + +Let's try to cause an interruption. I would need to list all the steps. + + mw 0x40014000 0xffffffff # Disable clock interrupt + md 0x40801000 1 # Show pending interrupts (should be 0x10) + mw 0x40802000 0x10 # Enable interrupt for source 4 (timer) + mw 0x40800010 0xff # Make source 4 priority large + md 0x40a00004 1 # Claim interrupt (should read 4) + mw 0x40a00004 4 # Complete 4 + md 0x40801000 1 # Show pending interrupts (should be 0x00) + mw 0x40014000 0x00000000 # Enable clock interrupt (should cause one) + md 0x40801000 1 # Show pending interrupts (should be 0x10) + +Nice, I can see the trap: + + Boot HART MIDELEG : 0x0000000000000022 + Boot HART MEDELEG : 0x000000000000b109 + ... + => mw 0x40802000 0x10 # Enable interrupt for source 4 (timer) + => mw 0x40800010 0xff # Make source 4 priority large + => md 0x40a00004 1 # Show which value should be claimed + 40a00004: 00000000 .... + => mw 0x40a00004 0 # Claim 0 (weird) + mw 0x40014000 0xffffffff # Disable clock interrupt + => md 0x40801000 1 # Show pending interrupts (should be 0x10) + 40801000: 00000010 .... + => mw 0x40802000 0x10 # Enable interrupt for source 4 (timer) + => mw 0x40800010 0xff # Make source 4 priority large + => md 0x40a00004 1 # Claim interrupt (should read 4) + 40a00004: 00000004 .... + => mw 0x40a00004 4 # Complete 4 + => md 0x40801000 1 # Show pending interrupts (should be 0x00) + 40801000: 00000000 .... + => mw 0x40014000 0x00000000 # Enable clock interrupt (should cause one) + => md 0x40801000 1 # Show pending interrupts (should be 0x10) + 40801000: 00000010 + +I made a small subcommand U-Boot "exception sregs" to be able to dump the +supervisor registers, to check they have the proper values. + + Boot HART MIDELEG : 0x0000000000000222 + Boot HART MEDELEG : 0x000000000000b109 + >>Core: 11 devices, 8 uclasses, devicetree: board + Loading Environment from nowhere... OK + In: serial,usbkbd + Out: serial,vidconsole + Err: serial,vidconsole + No working controllers found + Net: No ethernet found. + Working FDT set to c0000000 + Hit any key to stop autoboot: 0 + => exception sregs + stvec : 0x00000000af6f4400 + sie : 0x0000000000000000 + sip : 0x0000000000000000 + sstatus : 0x8000000200006000 + => exception enable + => exception sregs + stvec : 0x00000000af6f4400 + sie : 0x0000000000000222 + sip : 0x0000000000000000 + sstatus : 0x8000000200006002 + => mw 0x40014000 0xffffffff # Disable clock interrupt + => md 0x40801000 1 # Show pending interrupts (should be 0x10) + 40801000: 00000010 .... + => mw 0x40802000 0x10 # Enable interrupt for source 4 (timer) + => mw 0x40800010 0xff # Make source 4 priority large + => md 0x40a00004 1 # Claim interrupt (should read 4) + 40a00004: 00000004 .... + => mw 0x40a00004 4 # Complete 4 + => md 0x40801000 1 # Show pending interrupts (should be 0x00) + 40801000: 00000000 .... + => mw 0x40014000 0x00000000 # Enable clock interrupt (should cause one) + => md 0x40801000 1 # Show pending interrupts (should be 0x10) + 40801000: 00000010 .... + => exception sregs + stvec : 0x00000000af6f4400 + sie : 0x0000000000000222 + sip : 0x0000000000000200 + sstatus : 0x8000000200006002 + +Now I can see the external interrupt in supervisor arriving to the SIP. + +Let's try to cause an interrupt with the normal CLINT: + + `define CLINT_XBAR_ID 3 + `define CLINT_BASE_ADDR 64'h0000_0000_4010_0000 + `define CLINT_END_ADDR 64'h0000_0000_4010_FFFF + + `define AUX_TIMER_XBAR_ID 2 + `define AUX_TIMER_BASE_ADDR 64'h0000_0000_4001_0000 // Need to be this space because we use a clint as aux timer + `define AUX_TIMER_END_ADDR 64'h0000_0000_4001_FFFF + + => exception sregs + stvec : 0x00000000af6f4400 + sie : 0x0000000000000000 + sip : 0x0000000000000000 + sstatus : 0x8000000200006000 + => exception enable + => exception sregs + stvec : 0x00000000af6f4400 + sie : 0x0000000000000222 + sip : 0x0000000000000000 + sstatus : 0x8000000200006002 + => md 0x4010bff8 # Show normal CLINT mtime value + 4010bff8: 00c8159b .... + => md 0x4010bff8 # Show normal CLINT mtime value + 4010bff8: 00c84453 SD.. + => md 0x4010bff8 # Show normal CLINT mtime value + 4010bff8: 00c865b5 .e.. + => md 0x40104000 # Show normal CLINT mtimecmp value + 40104000: 00000000 .... + => mw 0x40104000 aaaaaaaa # Disable interrupt for CLINT + => md 0x40104000 # Show normal CLINT mtimecmp value + 40104000: aaaaaaaa .... + => exception sregs + stvec : 0x00000000af6f4400 + sie : 0x0000000000000222 + sip : 0x0000000000000000 + sstatus : 0x8000000200006002 + => mw 0x40104000 0 # Enable interrupt for CLINT + => exception sregs + stvec : 0x00000000af6f4400 + sie : 0x0000000000000222 + sip : 0x0000000000000000 <-- nothing here + sstatus : 0x8000000200006002 + => md 0x40104000 # Show normal CLINT mtimecmp value + 40104000: 00000000 + +No interrupts seem to arrive at the SIP register. + +Let's set the stvec to zero, so it causes a machine exception. + +## 2024-08-21 + +I tried with the new bitstream (`ox_u55c_46619ef4.bit`) setting the stvec to zero +and I can see the OpenSBI handler stopping, probably due to the jump to zero +address: + + Boot HART MIDELEG : 0x0000000000000222 + Boot HART MEDELEG : 0x000000000000b109 + >>Core: 11 devices, 8 uclasses, devicetree: board + Loading Environment from nowhere... OK + In: serial,usbkbd + Out: serial,vidconsole + Err: serial,vidconsole + No working controllers found + Net: No ethernet found. + Working FDT set to c0000000 + Hit any key to stop autoboot: 0 + + Device 0: unknown device + + Device 1: unknown device + scanning bus for devices... + + Device 0: unknown device + starting USB... + No working controllers found + No ethernet found. + No ethernet found. + => exception sregs + stvec : 0x00000000af6f4400 + sie : 0x0000000000000000 + sip : 0x0000000000000000 + sstatus : 0x8000000200006000 + => exception enable + => exception sregs + stvec : 0x0000000000000000 + sie : 0x0000000000000222 + sip : 0x0000000000000000 + sstatus : 0x8000000200006002 + => mw 0x40014000 0xffffffff # Disable clock interrupt + => md 0x40801000 1 # Show pending interrupts (should be 0x10) + 40801000: 00000010 .... + => mw 0x40802000 0x10 # Enable interrupt for source 4 (timer) + => mw 0x40800010 0xff # Make source 4 priority large + >Core: 11 devices, 8 uclasses, devicetree: board + Loading Environment from nowhere... OK + In: serial,usbkbd + Out: serial,vidconsole + Err: serial,vidconsole + No working controllers found + Net: No ethernet found. + Working FDT set to c0000000 + Hit any key to stop autoboot: 0 + => exception sregs + stvec : 0x00000000af6f4400 + sie : 0x0000000000000000 + sip : 0x0000000000000000 + sstatus : 0x8000000200006000 + => exception enable + => exception sregs + stvec : 0x00000000af6f4400 <-------- now stvec is left as-is + sie : 0x0000000000000222 + sip : 0x0000000000000000 + sstatus : 0x8000000200006002 + => mw 0x40014000 0xffffffff # Disable clock interrupt + => md 0x40801000 1 # Show pending interrupts (should be 0x10) + 40801000: 00000010 .... + => mw 0x40802000 0x10 # Enable interrupt for source 4 (timer) + => mw 0x40800010 0xff # Make source 4 priority large + >> + + + An error occurred in stage 1 of the boot process, which must mount the + root filesystem on `/mnt-root' and then start stage 2. Press one + of the following keys: + + i) to launch an interactive shell + f) to start an interactive shell having pid 1 (needed if you want to + start stage 2's init manually) + [ 22.365260] stage-1-init: [Thu Jan 1 00:00:22 UTC 1970] An error occurred in stage 1 of the boot process, which must mount the + r) to reboot immediately + *) to ignore the error and continue + [ 22.526780] stage-1-init: [Thu Jan 1 00:00:22 UTC 1970] root filesystem on `/mnt-root' and then start stage 2. Press one + [ 22.611640] stage-1-init: [Thu Jan 1 00:00:22 UTC 1970] of the following keys: + [ 22.697460] stage-1-init: [Thu Jan 1 00:00:22 UTC 1970] i) to launch an interactive shell + [ 22.788100] stage-1-init: [Thu Jan 1 00:00:22 UTC 1970] f) to start an interactive shell having pid 1 (needed if you want to + [ 22.874060] stage-1-init: [Thu Jan 1 00:00:22 UTC 1970] start stage 2's init manually) + [ 22.957940] stage-1-init: [Thu Jan 1 00:00:22 UTC 1970] r) to reboot immediately + [ 23.042520] stage-1-init: [Thu Jan 1 00:00:22 UTC 1970] *) to ignore the error and continue + iStarting interactive shell... + [ 32.314080] stage-1-init: [Thu Jan 1 00:00:32 UTC 1970] Starting interactive shell... + ~ # cat /proc/sys/kernel/random + random/ randomize_va_space + ~ # cat /proc/sys/kernel/random/entropy_avail + 0 + +Let's see what happens with strace. + +## 2024-09-02 + +Interestingly, I managed to reach the login console and run some commands after +fully booting NixOS. I just needed to disable nscd and enable a daemon to fill +the entropy pull, which is depleted on boot. + +I'm not looking at the nscd binary, which works ok when running `nscd -V` but +hangs the CPU when running `nscd --invalidate group`. I tried executing it under +GDB and then ^C, but it doesn't respond. + +As always, we don't have JTAG support ready, so this is going to be an absolute +pain to debug. I could bisect the code and try to guess at which point it must +be failing. But each attempt will take around 30 minutes, so it is extremely +expensive. + +It would be nice to reproduce this in the initrd shell, which loads much faster. + + # not strictly required, but you'll likely want the log anyway + (gdb) set logging on + + # ask gdb to not stop every screen-full + (gdb) set height 0 + + (gdb) while 1 + > x/i $pc + > stepi + > end + +Interestingly, I can step up to the end of the program, but it seems to be +failing: + + (gdb) r + Starting program: /nix/store/nh1f85icnvlqs3dc8lv2ya0ylmljsfax-system-path/bin/nscd --invalidate group + [Thread debugging using libthread_db enabled] + Using host libthread_db library "/nix/store/47a03qi49pwlk3hxpfwx2vq671mlqn57-glibc-riscv64-unknown-linux-gnu-2.39-52/lib/libthread_db.so.1". + + Breakpoint 1, 0x0000002aaaaaf738 in main () + (gdb) while 1 + >x/i $pc + >nexti + >end + => 0x2aaaaaf738 : jal 0x2aaaaaf160 + 0x0000002aaaaaf73c in main () + => 0x2aaaaaf73c : auipc a0,0x19 + 0x0000002aaaaaf740 in main () + => 0x2aaaaaf740 : ld a0,1580(a0) + 0x0000002aaaaaf744 in main () + => 0x2aaaaaf744 : jal 0x2aaaaaf690 + 0x0000002aaaaaf748 in main () + => 0x2aaaaaf748 : li a5,0 + 0x0000002aaaaaf74c in main () + => 0x2aaaaaf74c : addi a4,sp,8 + 0x0000002aaaaaf750 in main () + => 0x2aaaaaf750 : li a3,0 + 0x0000002aaaaaf754 in main () + => 0x2aaaaaf754 : mv a2,s1 + 0x0000002aaaaaf758 in main () + => 0x2aaaaaf758 : mv a1,s0 + 0x0000002aaaaaf75c in main () + => 0x2aaaaaf75c : auipc a0,0x19 + 0x0000002aaaaaf760 in main () + => 0x2aaaaaf760 : addi a0,a0,-1852 + 0x0000002aaaaaf764 in main () + => 0x2aaaaaf764 : jal 0x2aaaaaef30 + [Inferior 1 (process 1962) exited with code 01] + No registers. + +Using passwd instead hangs the CPU before reaching the main: + + [root@nixos-riscv:~]# gdb --args $(which nscd) --invalidate passwd + GNU gdb (GDB) 14.2 + Copyright (C) 2023 Free Software Foundation, Inc. + License GPLv3+: GNU GPL version 3 or later + This is free software: you are free to change and redistribute it. + There is NO WARRANTY, to the extent permitted by law. + Type "show copying" and "show warranty" for details. + This GDB was configured as "riscv64-unknown-linux-gnu". + Type "show configuration" for configuration details. + For bug reporting instructions, please see: + . + Find the GDB manual and other documentation resources online at: + . + + For help, type "help". + Type "apropos word" to search for commands related to "word"... + Reading symbols from /run/current-system/sw/bin/nscd... + (No debugging symbols found in /run/current-system/sw/bin/nscd) + (gdb) b main + Breakpoint 1 at 0x5738 + (gdb) b argp_parse + Breakpoint 2 at 0x4f38 + (gdb) c + The program is not being run. + (gdb) r + Starting program: /nix/store/nh1f85icnvlqs3dc8lv2ya0ylmljsfax-system-path/bin/nscd --invalidate passwd + +It's probably not deterministic. + +I have also disabled kaslr by adding nokaslr to the bootargs. + +Anyway, doesn't make much sense to debug this on userland and pay 30 minutes for +each test. I'll wait until we have proper JTAG support or a similar way to dump +the registers on a crash. + +# 2024-09-03 + +Wrote a small tool `plictool` to dump the state of the PLIC: + + ~ # plictool -c 2 + plic=0x40800000 nsources=1024 ncontexts=2 + src=1 pend=0 prio=1 ctx=1 thre=1 + src=2 pend=0 prio=1 + src=3 pend=0 prio=1 + src=4 pend=1 prio=1 + src=33 pend=0 prio=0 ctx=1 thre=1 + +Interestingly, the auxiliar UART interrupts don't seem to be working very well. +Also, is there another source 33 enabled? + +I have also noticed that the clock is running with the wrong frequency. +Everything is about twice as slow. For example, `sleep 1` takes 2 seconds. +So I assume the kernel thinks the clock goes faster than it actually does. + +The timer is at 50 KHz: + + timebase-frequency = <50000>; /* 50 kHz */ + +So, the frequency of this "clock" is done by using the "general" clock which +goes at 50 MHz, and then using a simple counter that counts up to 1525, so the +frequency is: + + 32786.88524590164 Hz + +Assuming that it actually works well. Let's try that and see if we can have a +more realistic reading for the CLINT interrupt frequency. + +Something is going on with the plictool: + + ~ # plictool + plic=0x40800000 nsources=1024 ncontexts=15872 + src=1 pend=1 prio=1 ctx=1 thre=0 <--- zero + src=2 pend=0 prio=1 + src=3 pend=0 prio=1 + src=4 pend=1 prio=1 + src=33 pend=0 prio=0 ctx=1 thre=2 + ~ # plictool + plic=0x40800000 nsources=1024 ncontexts=15872 + src=1 pend=1 prio=1 ctx=1 thre=2 <--- now changed to 2??? + src=2 pend=0 prio=1 + src=3 pend=0 prio=1 + src=4 pend=1 prio=1 + src=33 pend=0 prio=0 ctx=1 thre=2 + +Either the register is changing its value on its own, or the tool is causing it. + +Let's revert the threshold to 0, but writing to the threshold address, should be +at 0x40a01000. + + ~ # devmem 0x40a01000 + 0x00000002 + ~ # devmem 0x40a01000 + 0x00000002 + ~ # devmem 0x40a01000 + 0x00000002 + ~ # devmem 0x40a01000 8 0 + ~ # devmem 0x40a01000 + 0x00000000 + ~ # devmem 0x40a01000 + 0x00000000 + ~ # devmem 0x40a01000 + 0x00000000 + ~ # plictool -c 2 -s 5 <-- limiting the sources and contexts + plic=0x40800000 nsources=5 ncontexts=2 + src=1 pend=1 prio=1 ctx=1 thre=0 <-- good + src=2 pend=0 prio=1 + src=3 pend=0 prio=1 + src=4 pend=1 prio=1 + ~ # plictool -c 2 -s 5 + plic=0x40800000 nsources=5 ncontexts=2 + src=1 pend=1 prio=1 ctx=1 thre=0 + src=2 pend=0 prio=1 + src=3 pend=0 prio=1 + src=4 pend=1 prio=1 + ~ # plictool -c 2 -s 5 + plic=0x40800000 nsources=5 ncontexts=2 + src=1 pend=1 prio=1 ctx=1 thre=0 <--- stable + src=2 pend=0 prio=1 + src=3 pend=0 prio=1 + src=4 pend=1 prio=1 + ~ # cat /proc/interrupts + CPU0 + 1: 1 SiFive PLIC 1 Edge ttyS1 + 5: 195413 RISC-V INTC 5 Edge riscv-timer + ~ # cat /proc/int^Crupts + + ~ # echo a > /dev/ttyS1 + ~ # cat /proc/interrupts + CPU0 + 1: 1 SiFive PLIC 1 Edge ttyS1 + 5: 197102 RISC-V INTC 5 Edge riscv-timer + ~ # plictool -c 2 -s 5 + plic=0x40800000 nsources=5 ncontexts=2 + src=1 pend=1 <--- wtf prio=1 ctx=1 thre=0 + src=2 pend=0 prio=1 + src=3 pend=0 prio=1 + src=4 pend=1 prio=1 + ~ # devmem 0x40a01004 + 0x00000000 <--- the claim register continues to give 0 instead of 1 + ~ # devmem 0x40a01004 + 0x00000000 + ~ # devmem 0x40a01004 + 0x00000000 + ~ # devmem 0x40a01004 + 0x00000000 + ~ # devmem 0x40a01004 + 0x00000000 + ~ # devmem 0x40a01004 8 5 <-- try writing 5 + ~ # devmem 0x40a01004 + 0x00000000 <--- no change + ~ # devmem 0x40a01004 + 0x00000000 + ~ # devmem 0x40a01004 + 0x00000000 + ~ # devmem 0x40a01004 + 0x00000000 + ~ # devmem 0x40a01004 8 1 <-- try with 1 + ~ # devmem 0x40a01004 + 0x00000000 + ~ # devmem 0x40a01004 + 0x00000000 + ~ # devmem 0x40a01004 + 0x00000000 + ~ # plictool -c 2 -s 5 + plic=0x40800000 nsources=5 ncontexts=2 + src=1 pend=0 <--- now it is gone prio=1 ctx=1 thre=0 + src=2 pend=0 prio=1 + src=3 pend=0 prio=1 + src=4 pend=1 prio=1 + ~ # cat /proc/interrupts + CPU0 + 1: 3 <-- and we have >1 SiFive PLIC 1 Edge ttyS1 + 5: 213775 RISC-V INTC 5 Edge riscv-timer + ~ # echo a > /dev/ttyS1 + ~ # cat /proc/interrupts + CPU0 + 1: 3 <--- stuck again SiFive PLIC 1 Edge ttyS1 + 5: 214551 RISC-V INTC 5 Edge riscv-timer + ~ # plictool -c 2 -s 5 + plic=0x40800000 nsources=5 ncontexts=2 + src=1 pend=1 prio=1 ctx=1 thre=1 <--- again set to 1 + src=2 pend=0 prio=1 + src=3 pend=0 prio=1 + src=4 pend=1 prio=1 + +Clearly something is not working well in the PLIC. + +Let's try to dump the pending and priority registers of the PLIC with devmem, +just to check that the plictool is not doing something wrong. + + devmem 0x40801000 # Dump pending bits of sources 0-31 + devmem 0x40801004 # Dump pending bits of sources 32-63 + devmem 0x40a01000 # Dump priority threshold of context 1 + devmem 0x40a00000 # Dump priority threshold of context 0 + +Interesting output: + + ~ # plictool -c2 + plictool v0.0.3 addr=0x40800000 nsrc=1024 nctx=2 + src=1 pend=0 prio=1 + src=2 pend=0 prio=1 + src=3 pend=0 prio=1 + src=4 pend=1 prio=1 + ~ # plictool -c2 + plictool v0.0.3 addr=0x40800000 nsrc=1024 nctx=2 + src=1 pend=0 prio=1 + src=2 pend=0 prio=1 + src=3 pend=0 prio=1 + src=4 pend=1 prio=1 + ~ # plictool -c2 + plictool v0.0.3 addr=0x40800000 nsrc=1024 nctx=2 + src=1 pend=0 prio=1 + src=2 pend=0 prio=1 + src=3 pend=0 prio=1 + src=4 pend=1 prio=1 + ~ # plictool -c2 + plictool v0.0.3 addr=0x40800000 nsrc=1024 nctx=2 + src=1 pend=0 prio=1 + src=2 pend=0 prio=1 + src=3 pend=0 prio=1 + src=4 pend=1 prio=1 + ~ # dd if=/dev/ttyS1 bs=1 count=1 of=/dev/null & + ~ # plictool -c2 + plictool v0.0.3 addr=0x40800000 nsrc=1024 nctx=2 + src=1 pend=1 prio=1 + ctx=1 thre=0 unmasked + src=2 pend=0 prio=1 + src=3 pend=0 prio=1 + src=4 pend=1 prio=1 + src=33 pend=0 prio=0 + ctx=1 thre=1 masked + ~ # plictool -c2 + plictool v0.0.3 addr=0x40800000 nsrc=1024 nctx=2 + src=1 pend=1 prio=1 + ctx=1 thre=1 masked + src=2 pend=0 prio=1 + src=3 pend=0 prio=1 + src=4 pend=1 prio=1 + src=33 pend=0 prio=0 + ctx=1 thre=1 masked + ~ # plictool -c2 + plictool v0.0.3 addr=0x40800000 nsrc=1024 nctx=2 + src=1 pend=1 prio=1 + ctx=1 thre=1 masked + src=2 pend=0 prio=1 + src=3 pend=0 prio=1 + src=4 pend=1 prio=1 + src=33 pend=0 prio=0 + ctx=1 thre=1 masked + ~ # plictool -c2 + plictool v0.0.3 addr=0x40800000 nsrc=1024 nctx=2 + src=1 pend=1 prio=1 + ctx=1 thre=1 masked + src=2 pend=0 prio=1 + src=3 pend=0 prio=1 + src=4 pend=1 prio=1 + src=33 pend=0 prio=0 + ctx=1 thre=1 masked + +## 2024-09-05 + + devmem 0x4080000C 32 127 # Write 127 to source 3 priority + devmem 0x40a01000 32 0 # Write context 1 threshold to 0 + + # devmem 0x40a01004 # Read claim register context 1 + # devmem 0x40a01004 # Read claim register context 1 + # 0x201004 + + devmem 0x40800008 # Print source 3 priority + devmem 0x40a01000 # Print context 1 threshold + + ifconfig eth0 hw ether 02:05:00:01:00:02 + +- Report specific PLIC issues +- Enable aux timer +- Broken RTC frequency. + +Thu, 26 Sep 2024 10:26:26 +0200 + +## 2024-09-26 + +The seedrng tool can add some entropy in the kernel from a saved seed in the FS. +The source is available here: + +https://git.zx2c4.com/seedrng/tree/seedrng.c + +But it seems to be included with busybox, so it should be already included in +our initrd: + + ~ # which seedrng + /nix/store/8a4i33qxnpwn0q7hs1vx3q5h4y2cym7n-extra-utils/bin/seedrng + ~ # cat /proc/sys/kernel/random/entropy_avail + 0 + ~ # seedrng + seedrng: can't create directory '/var/lib/seedrng': No such file or directory + ~ # mkdir -p /var/lib/seedrng + ~ # seedrng + Saving 256 bits of non-creditable seed for next boot + ~ # cat /proc/sys/kernel/random/entropy_avail + 0 + ~ # ls /var/lib/seedrng + seed.no-credit + ~ # hexdump /var/lib/seedrng/seed.no-credit + 0000000 caba 9c5c c19a 5b1a 97fc 0a2e a805 9608 + 0000010 d860 b3b9 4ade b781 ce23 2fce 19a5 d1b1 + 0000020 + +Okay, so now I can magically convert it into creditable seed: + + ~ # mv /var/lib/seedrng/seed.no-credit /var/lib/seedrng/seed.credit + ~ # cat /proc/sys/kernel/random/entropy_avail + 0 + ~ # seedrng + Seeding 256 bits and crediting + [ 135.171201] random: crng init done + Saving 256 bits of creditable seed for next boot + ~ # cat /proc/sys/kernel/random/entropy_avail + 256 + +Nice. + +So, I can remove the jitter daemon and use this instead, assuming it won't go +down. It doesn't seem to go down: + + ~ # cat /proc/sys/kernel/random/entropy_avail + 256 + ~ # hexdump -n $((1024*1024)) /dev/random > /dev/null + ~ # cat /proc/sys/kernel/random/entropy_avail + 256 + +Another option may be to use a fake HW RNG by pointing to the address of some +timer register: + + https://github.com/torvalds/linux/blob/master/drivers/char/hw_random/timeriomem-rng.c + https://www.kernel.org/doc/Documentation/devicetree/bindings/rng/timeriomem_rng.txt + +Let's try the seedrng approach first. + + ~ # seedrng -h + seedrng: invalid option -- 'h' + BusyBox v1.36.1 () multi-call binary. + + Usage: seedrng [-d DIR] [-n] + + Seed the kernel RNG from seed files + + -d DIR Use seed files in DIR (default: /var/lib/seedrng) + -n Do not credit randomness, even if creditable + +I can work with this. + +It seems to be working. + +After correcting the RTC, now I get a more or less good boot time: + + [root@nixos-riscv:~]# systemd-analyze + Startup finished in 4min 49.469s (kernel) + 10min 48.260s (userspace) = 15min 37.729s + multi-user.target reached after 10min 48.024s in userspace. + +Still, udevd is killing the CPU: + + [root@nixos-riscv:~]# vmstat 1 + procs -----------memory---------- ---swap-- -----io---- -system-- ------cpu----- + r b swpd free buff cache si so bi bo in cs us sy id wa st + 2 0 0 585072 16580 59200 0 0 0 12 164 142 49 51 0 0 0 + 4 0 0 585072 16580 59200 0 0 0 0 159 190 46 54 0 0 0 + 1 0 0 585072 16580 59200 0 0 0 0 157 66 49 51 0 0 0 + 5 0 0 585072 16580 59200 0 0 0 0 156 60 75 25 0 0 0 + 2 0 0 585072 16580 59200 0 0 0 0 165 200 30 70 0 0 0 + 2 0 0 585072 16580 59216 0 0 0 4 157 200 31 69 0 0 0 + 2 0 0 585072 16580 59216 0 0 0 0 157 194 39 61 0 0 0 + 2 0 0 585072 16580 59216 0 0 0 0 158 201 53 47 0 0 0 + 2 0 0 585072 16580 59216 0 0 0 0 156 173 62 38 0 0 0 + 3 0 0 585072 16580 59216 0 0 0 0 162 192 64 36 0 0 0 + 4 0 0 585072 16580 59232 0 0 0 0 163 205 35 65 0 0 0 + 1 0 0 585072 16580 59232 0 0 0 0 165 182 36 64 0 0 0 + 1 0 0 585072 16580 59232 0 0 0 0 155 44 76 24 0 0 0 + 3 0 0 585072 16580 59232 0 0 0 0 157 149 46 54 0 0 0 + 2 0 0 585072 16580 59232 0 0 0 0 158 197 34 66 0 0 0 + 2 0 0 585072 16580 59232 0 0 0 0 158 198 30 70 0 0 0 + 2 0 0 585072 16580 59232 0 0 0 0 156 196 48 52 0 0 0 + 2 0 0 585072 16580 59232 0 0 0 0 158 179 58 42 0 0 0 + 3 0 0 585072 16580 59232 0 0 0 0 158 176 67 33 0 0 0 + 2 0 0 585072 16580 59488 0 0 256 0 159 195 36 64 0 0 0 + 1 0 0 585072 16580 59620 0 0 128 0 156 134 55 45 0 0 0 + +Not sure which device is malfunctioning. + + [root@nixos-riscv:~]# udevadm monitor + monitor will print the received events for: + UDEV - the event which udev sends out after rule processing + KERNEL - the kernel uevent + + UDEV [1738.570531] add /devices/virtual/tty/ptyaa (tty) + UDEV [1741.983817] add /devices/virtual/tty/ptyab (tty) + UDEV [1745.434070] add /devices/virtual/tty/ptyac (tty) + UDEV [1748.846075] add /devices/virtual/tty/ptyad (tty) + UDEV [1752.238682] add /devices/virtual/tty/ptyae (tty) + UDEV [1755.665632] add /devices/virtual/tty/ptyaf (tty) + UDEV [1759.047259] add /devices/virtual/tty/ptyb0 (tty) + +## 2024-09-27 + +Here is the FPGA startup services by duration: + + [root@nixos-riscv:~]# systemd-analyze blame + 3min 59.980s systemd-udev-trigger.service + 2min 6.780s suid-sgid-wrappers.service + 2min 2.677s mount-pstore.service + 1min 46.770s user@0.service + 1min 10.554s systemd-journald.service + 1min 5.538s resolvconf.service + 48.660s systemd-logind.service + 44.747s systemd-sysctl.service + 43.971s sys-fs-fuse-connections.mount + 43.358s dev-mqueue.mount + 43.197s dev-hugepages.mount + 43.043s sys-kernel-debug.mount + 42.959s kmod-static-nodes.service + 42.568s network-setup.service + 42.546s modprobe@configfs.service + 42.285s systemd-tmpfiles-setup-dev-early.service + 40.980s modprobe@efi_pstore.service + 40.510s sys-kernel-config.mount + 39.891s modprobe@fuse.service + 36.103s systemd-random-seed.service + 30.686s systemd-udevd.service + 30.059s systemd-modules-load.service + 29.682s systemd-journal-catalog-update.service + 29.091s systemd-remount-fs.service + 26.090s systemd-tmpfiles-setup.service + 23.570s systemd-journal-flush.service + 21.042s systemd-tmpfiles-setup-dev.service + 19.736s audit.service + 17.794s systemd-update-utmp.service + 15.329s dbus.service + 10.177s modprobe@drm.service + 9.866s systemd-update-done.service + 8.302s user-runtime-dir@0.service + 6.615s systemd-user-sessions.service + 5.384s network-local-commands.service + 4.933s getty@tty1.service + +It doesn't make much sense to have udev on the FPGA, as we are not going to +hotplug anything. Maybe I can just disable the service, and save 4 minutes (at +least). + +Disabling udev by just setting `services.udev.enable = false` doesn't work. It +still gets activated by the kernel socket. + +## 2024-09-30 + +Managed to launch an interactive shell from stage2, before we run systemd. It +boots in less than 5 minutes. + +Now I can run some benchmarks there. + +Let's see if I can enable flow control on the serial console. That would be +helpful. + +For that I need to first switch to the 8250/16550 driver. Let's try switch to +the ttyS0 without enabling flow control yet. We probably need to change the +stage1 and stage2 scripts to follow the proper console device. + +It works, but very slowly: + + [ 0.000000] Kernel command line: root=/dev/ram0 loglevel=7 rw earlycon=sbi console=ttyS0,115200n8 debug2 ... + ... + [ 42.069358] Serial: 8250/16550 driver, 4 ports, IRQ sharing disabled + [ 42.473860] of_serial 40001000.serial: error -ENXIO: IRQ index 0 not found + [ 42.559263] printk: legacy console [ttyS0] disabled + [ 42.614225] 40001000.serial: ttyS0 at MMIO 0x40001000 (irq = 0, base_baud = 3125000) is a 16550 + [ 42.645519] printk: legacy console [ttyS0] enabled + [ 42.645519] printk: legacy console [ttyS0] enabled + [ 42.656865] printk: legacy bootconsole [sbi0] disabled + [ 42.656865] printk: legacy bootconsole [sbi0] disabled + [ 42.896358] 40003000.serial: ttyS1 at MMIO 0x40003000 (irq = 1, base_baud = 3125000) is a 16550 + [ 42.999450] SuperH (H)SCI(F) driver initialized + ... + ~ # stty -a + speed 115200 baud;stty: standard input + line = 0; + intr = ^C; quit = ^\; erase = ^?; kill = ^U; eof = ^D; eol = ; + eol2 = ; swtch = ; start = ^Q; stop = ^S; susp = ^Z; rprnt = ^R; + werase = ^W; lnext = ^V; flush = ^O; min = 1; time = 0; + -parenb -parodd -cmspar cs8 hupcl -cstopb cread clocal -crtscts + -ignbrk -brkint -ignpar -parmrk -inpck -istrip -inlcr -igncr icrnl ixon -ixoff + -iuclc -ixany -imaxbel -iutf8 + opost -olcuc -ocrnl onlcr -onocr -onlret -ofill -ofdel nl0 cr0 tab0 bs0 vt0 ff0 + isig icanon iexten echo echoe echok -echonl -noflsh -xcase -tostop -echoprt + echoctl echoke -flusho -extproc + +Let's add the ability to claim a context in the plictool, as I suspect the claim +may be broken. + +## 2024-10-02 + +Interesting output with ftrace: + + + <<< NixOS Stage 1 >>> + + + An error occurred in stage 1 of the boot process, which must mount the + root filesystem on `/mnt-root' and then start stage 2. Press one + of the following keys: + + i) to launch an interactive shell + f) to start an interactive shell having pid 1 (needed if you want to + start stage 2's init manually) + r) to reboot immediately + *) to ignore the error and continue + iStarting interactive shell... + ~ # mount -t tracefs nodev /sys/kernel/tracing/ + ~ # cd /sys/kernel/tracing/ + /sys/kernel/tracing # cat current_tracer + function + /sys/kernel/tracing # cat enabled_functions + plic_irq_unmask (1) + plic_irq_mask (1) + plic_dying_cpu (1) + plic_starting_cpu (1) + plic_irq_domain_translate (1) + plic_irq_domain_alloc (1) + plic_parse_context_parent (1) + plic_irq_set_type (1) + plic_toggle.constprop.0 (1) + plic_irq_eoi (1) + plic_probe (1) + plic_irq_suspend (1) + plic_handle_irq (1) + plic_irq_enable (1) + plic_irq_disable (1) + plic_irq_resume (1) + /sys/kernel/tracing # cat tracing_on + 1 + /sys/kernel/tracing # cat trac + trace trace_options tracing_max_latency + trace_clock trace_pipe tracing_on + trace_marker trace_stat/ tracing_thresh + trace_marker_raw tracing_cpumask + /sys/kernel/tracing # cat trace + # tracer: function + # + # entries-in-buffer/entries-written: 1586/1586 #P:1 + ... + /sys/kernel/tracing # cat options/function-trace + 1 + /sys/kernel/tracing # plictool + Source Pend Prio C0(7) C1(0) + 1 - 1 - - + 2 - 1 - - + 3 - 1 - - + 4 yes 1 - - + /sys/kernel/tracing # cat tracing_on + 1 + /sys/kernel/tracing # echo 0>tracing_on + + /sys/kernel/tracing # cat tracing_on + 1 + /sys/kernel/tracing # echo 0 > tracing_on + /sys/kernel/tracing # cat tracing_on + 0 + /sys/kernel/tracing # echo plic_handle_irq > set_ftrace_filter + /sys/kernel/tracing # cat enabled_functions + plic_handle_irq (1) + /sys/kernel/tracing # echo plic_handle_irq,plic_irq_eoi,plic_irq_enable,plic_irq + _disable > set_ftrace_filter ^C + + /sys/kernel/tracing # echo plic_irq_eoi >> set_ftrace_filter + /sys/kernel/tracing # cat enabled_functions + plic_irq_eoi (1) + plic_handle_irq (1) + /sys/kernel/tracing # echo plic_irq_enable >> set_ftrace_filter + /sys/kernel/tracing # plictool + Source Pend Prio C0(7) C1(0) + 1 - 1 - - + 2 - 1 - - + 3 - 1 - - + 4 yes 1 - - + /sys/kernel/tracing # ^Ct enabled_functions + + /sys/kernel/tracing # echo hi > /dev/ttyS1 + ^Z^C + /sys/kernel/tracing # plictool + Source Pend Prio C0(7) C1(0) + 1 yes 1 - - + 2 - 1 - - + 3 - 1 - - + 4 yes 1 - - + /sys/kernel/tracing # echo plic_irq_enable >> set_f^Cace_filter + + /sys/kernel/tracing # cat tracing_on + 0 + /sys/kernel/tracing # echo 1 > tracing_on + /sys/kernel/tracing # cat tracing_on + 1 + /sys/kernel/tracing # cat trace_pipe & + /sys/kernel/tracing # fg + cat trace_pipe + ^Z[1]+ Stopped cat trace_pipe + /sys/kernel/tracing # bg + [1] cat trace_pipe + /sys/kernel/tracing # echo hi > /dev/ttyS1 + ash-91 [000] d..2. 669.966205: plic_irq_enable <-irq_enable + ash-91 [000] d..2. 669.966815: plic_irq_enable <-irq_enable + + *** local echo: yes *** + + *** local echo: no *** + ^C + /sys/kernel/tracing # echo 0 > tracing_on + /sys/kernel/tracing # cat tracing_on + 0 + /sys/kernel/tracing # echo 'plic_*' >> set_ftrace_filter + /sys/kernel/tracing # cat enabled_functions + plic_irq_unmask (1) + plic_irq_mask (1) + plic_dying_cpu (1) + plic_starting_cpu (1) + plic_irq_domain_translate (1) + plic_irq_domain_alloc (1) + plic_parse_context_parent (1) + plic_irq_set_type (1) + plic_toggle.constprop.0 (1) + plic_irq_eoi (1) + plic_probe (1) + plic_irq_suspend (1) + plic_handle_irq (1) + plic_irq_enable (1) + plic_irq_disable (1) + plic_irq_resume (1) + /sys/kernel/tracing # echo 1 > tracing_on + /sys/kernel/tracing # cat trace_pipe & + /sys/kernel/tracing # cat: can't open 'trace_pipe': Device or resource busy + + [2]+ Done(1) cat trace_pipe + /sys/kernel/tracing # ps + PID USER TIME COMMAND + 1 0 0:33 {init} /nix/store/y477q6jlbg9b53knnclgib65fbzpazkj-extra-u + 2 0 0:00 [kthreadd] + 3 0 0:00 [pool_workqueue_] + 4 0 0:00 [kworker/R-slub_] + 5 0 0:00 [kworker/R-netns] + 6 0 0:00 [kworker/0:0-eve] + 7 0 0:00 [kworker/0:0H] + 8 0 0:03 [kworker/u4:0-ev] + 9 0 0:00 [kworker/R-mm_pe] + 10 0 0:41 [kworker/u4:1-ev] + 11 0 0:00 [rcu_tasks_rude_] + 12 0 0:00 [rcu_tasks_trace] + 13 0 0:00 [ksoftirqd/0] + 14 0 0:00 [kworker/0:1-eve] + 15 0 0:01 [kdevtmpfs] + 16 0 0:00 [kworker/R-inet_] + 17 0 0:00 [kauditd] + 18 0 0:00 [khungtaskd] + 20 0 0:00 [oom_reaper] + 21 0 0:00 [kworker/R-write] + 22 0 0:01 [kcompactd0] + 23 0 0:00 [ksmd] + 24 0 0:00 [kworker/R-kinte] + 25 0 0:00 [kworker/R-kbloc] + 26 0 0:00 [kworker/R-blkcg] + 27 0 0:00 [kworker/R-ata_s] + 28 0 0:00 [kworker/R-devfr] + 29 0 0:00 [watchdogd] + 30 0 0:01 [kworker/u4:3-ev] + 31 0 0:00 [kworker/R-rpcio] + 32 0 0:00 [kworker/R-xprti] + 33 0 0:00 [kswapd0] + 34 0 0:00 [kworker/R-nfsio] + 35 0 0:00 [kworker/R-kthro] + 36 0 0:09 [khvcd] + 37 0 0:00 [kworker/R-uas] + 38 0 0:00 [kworker/R-mld] + 39 0 0:00 [kworker/R-ipv6_] + 46 0 0:00 [kworker/R-kstrp] + 47 0 0:00 [kworker/u5:0] + 73 0 0:00 tee -i /proc/self/fd/8 + 74 0 0:00 {init} /nix/store/y477q6jlbg9b53knnclgib65fbzpazkj-extra-u + 91 0 0:46 /nix/store/y477q6jlbg9b53knnclgib65fbzpazkj-extra-utils/bi + 113 0 0:00 cat trace_pipe + 117 0 0:00 ps + /sys/kernel/tracing # fg + cat trace_pipe + ^C + /sys/kernel/tracing # cat trace_pipe & + /sys/kernel/tracing # echo 1 > tracing_on + /sys/kernel/tracing # echo hi > /dev/ttyS1 + ash-91 [000] d..2. 748.552248: plic_irq_enable <-irq_enable + ash-91 [000] d..2. 748.552370: plic_toggle.constprop.0 <-plic_irq_enable + ash-91 [000] d..2. 748.552797: plic_irq_disable <-irq_disable + ash-91 [000] d..2. 748.552858: plic_toggle.constprop.0 <-plic_irq_disable + ash-91 [000] d..2. 748.553102: plic_irq_enable <-irq_enable + ash-91 [000] d..2. 748.553163: plic_toggle.constprop.0 <-plic_irq_enable + ^C + /sys/kernel/tracing # ash-91 [000] d..2. 762.013481: plic_irq_disable <-__irq_disable + ash-91 [000] d..2. 762.013603: plic_toggle.constprop.0 <-plic_irq_disable + + /sys/kernel/tracing # + /sys/kernel/tracing # plictool + Source Pend Prio C0(7) C1(0) + 1 yes 1 - - + 2 - 1 - - + 3 - 1 - - + 4 yes 1 - - + /sys/kernel/tracing # plictool -C 1 + 0 + /sys/kernel/tracing # plictool -C 1 -w 4 + /sys/kernel/tracing # plictool -C 1 + 0 + /sys/kernel/tracing # plictool + Source Pend Prio C0(7) C1(0) + 1 yes 1 - - + 2 - 1 - - + 3 - 1 - - + 4 yes 1 - - + /sys/kernel/tracing # devmem 0x40014000 32 0xffffffff + /sys/kernel/tracing # plictool + Source Pend Prio C0(7) C1(0) + 1 yes 1 - - + 2 - 1 - - + 3 - 1 - - + 4 yes 1 - - + /sys/kernel/tracing # plictool -C 1 -w 4 + /sys/kernel/tracing # plictool + Source Pend Prio C0(7) C1(0) + 1 yes 1 - - + 2 - 1 - - + 3 - 1 - - + 4 yes 1 - - + /sys/kernel/tracing # plictool -E + plictool: option requires an argument: E + plictool v0.0.5 -- Rodrigo Arias Mallo + Usage: + plictool [-a addr] [-L] [-n nsrc] [-x nctx] # List (default) + plictool [-a addr] -C ctx [-w value] # Claim + plictool [-a addr] -T ctx [-w value] # Threshold + plictool [-a addr] -I src [-w value] # Priority + plictool [-a addr] -P src [-w value] # Pending + plictool [-a addr] -E src -c ctx [-w value] # Enabled + plictool -v # Version + /sys/kernel/tracing # plictool -E 4 -c 0 -w 1 + /sys/kernel/tracing # plictool + Source Pend Prio C0(7) C1(0) + 1 yes 1 - - + 2 - 1 - - + 3 - 1 - - + 4 yes 1 masked - + 36 - 0 masked - + /sys/kernel/tracing # plictool -C 0 + 4 + /sys/kernel/tracing # plictool -C 0 -w 4 + /sys/kernel/tracing # plictool + Source Pend Prio C0(7) C1(0) + 1 yes 1 - - + 2 - 1 - - + 3 - 1 - - + 4 - 1 masked - + 36 - 0 masked - + /sys/kernel/tracing # plictool -E 4 -c 0 -w 0 + /sys/kernel/tracing # plictool + Source Pend Prio C0(7) C1(0) + 1 yes 1 - - + 2 - 1 - - + 3 - 1 - - + 4 - 1 - - + /sys/kernel/tracing # cat /dev/ttyS1 & + /sys/kernel/tracing # cat-136 [000] d..2. 887.106509: plic_irq_enable <-irq_enable + cat-136 [000] d..2. 887.106631: plic_toggle.constprop.0 <-plic_irq_enable + cat-136 [000] d.h2. 887.106814: plic_handle_irq <-generic_handle_domain_irq + cat-136 [000] d.h3. 887.106997: plic_irq_eoi <-handle_fasteoi_irq + cat-136 [000] d..2. 887.107424: plic_irq_disable <-irq_disable + cat-136 [000] d..2. 887.107546: plic_toggle.constprop.0 <-plic_irq_disable + cat-136 [000] d..2. 887.107729: plic_irq_enable <-irq_enable + cat-136 [000] d..2. 887.107790: plic_toggle.constprop.0 <-plic_irq_enable + + /sys/kernel/tracing # plictool + Source Pend Prio C0(7) C1(0) + 1 yes 1 - firing + 2 - 1 - - + 3 - 1 - - + 4 - 1 - - + 33 - 0 - masked + +So, then an interrupt is fired, we disable the interrupts and then we enable +them again. Can this happen because the next interrupt fired when interrupts +were disabled? + +There are also other cases in which we already have an interrupt pending and we +enable the context 1. For example, by setting the enable bit, changing the prio, +or the threshold for that context or by removing the context 0 enable bit. + +I think I can do some tests with those. I'm not sure why the serial port is +firing an interrupt, but if this is the case, I can try to write a character in +the transmit register and wait for the pending bit, then try to claim the +interrupt for source 1. + +## 2024-10-03 + + /sys/kernel/tracing # plictool + Source Pend Prio C0(7) C1(0) + 1 yes 1 - - + 2 - 1 - - + 3 - 1 - - + 4 yes 1 - - + /sys/kernel/tracing # cat tracing_on + 1 + /sys/kernel/tracing # echo 0 > tracing_on + /sys/kernel/tracing # echo 1 > tracing_on + /sys/kernel/tracing # cat trace_pipe & + /sys/kernel/tracing # cat /dev/ttyS1 & + /sys/kernel/tracing # cat-122 [000] ...1. 2978.202587: serial8250_pm <-uart_port_startup + cat-122 [000] ...1. 2978.202648: serial8250_set_sleep <-serial8250_pm + cat-122 [000] ...1. 2978.202892: serial8250_startup <-uart_port_startup + cat-122 [000] ...1. 2978.203014: serial8250_do_startup <-serial8250_startup + cat-122 [000] d..2. 2978.203319: plic_irq_enable <-irq_enable + cat-122 [000] d..2. 2978.203380: plic_toggle.constprop.0 <-plic_irq_enable + cat-122 [000] d.h2. 2978.203563: plic_handle_irq <-generic_handle_domain_irq + cat-122 [000] d.h2. 2978.203685: serial8250_interrupt <-__handle_irq_event_percpu + cat-122 [000] d.h3. 2978.203746: serial8250_default_handle_irq <-serial8250_interrupt + cat-122 [000] d.h3. 2978.203807: serial8250_handle_irq <-serial8250_default_handle_irq + cat-122 [000] d.h3. 2978.203990: plic_irq_eoi <-handle_fasteoi_irq + cat-122 [000] d..2. 2978.204783: plic_irq_disable <-irq_disable + cat-122 [000] d..2. 2978.204844: plic_toggle.constprop.0 <-plic_irq_disable + cat-122 [000] d..2. 2978.205088: plic_irq_enable <-irq_enable + cat-122 [000] d..2. 2978.205149: plic_toggle.constprop.0 <-plic_irq_enable + cat-122 [000] d..2. 2978.205271: serial8250_do_set_mctrl <-serial8250_do_startup + cat-122 [000] ...1. 2978.205393: serial8250_set_termios <-uart_change_line_settings + cat-122 [000] ...1. 2978.206796: serial8250_do_set_termios <-serial8250_set_termios + cat-122 [000] .n.1. 2978.206918: serial8250_get_divisor <-serial8250_do_set_termios + cat-122 [000] dn.2. 2978.207040: serial8250_do_set_divisor <-serial8250_do_set_termios + cat-122 [000] dn.2. 2978.207101: serial8250_do_set_mctrl <-serial8250_do_set_termios + cat-122 [000] dn.2. 2978.207345: serial8250_set_mctrl <-uart_update_mctrl + cat-122 [000] dn.2. 2978.207406: serial8250_do_set_mctrl <-serial8250_set_mctrl + + /sys/kernel/tracing # plictool + Source Pend Prio C0(7) C1(0) + 1 yes 1 - firing + 2 - 1 - - + 3 - 1 - - + 4 yes 1 - - + 33 - 0 - masked + /sys/kernel/tracing # plictool -C 1 + 0 + +Another test tracing the UART serial too. + +It would be nice to see where those calls are coming from. + +Let's collect the commands I need to use here: + + mount -t tracefs nodev /sys/kernel/tracing/ + cd /sys/kernel/tracing/ + echo 'plic_*' >> set_ftrace_filter + echo 'serial8250_*' >> set_ftrace_filter + cat current_tracer + +Here is a trace with the stack trace printed: + + ~ # plictool + Source Pend Prio C0(7) C1(0) + 1 - 1 - - + 2 - 1 - - + 3 - 1 - - + 4 yes 1 - - + + ~ # + [...] + /sys/kernel/tracing # cat /dev/ttyS1 & + + cat-122 [000] ..... 591.693894: serial8250_pm <-uart_port_startup + cat-122 [000] ..... 591.694077: + => ftrace_call + => uart_port_activate + => tty_port_open + => uart_open + => tty_open + => chrdev_open + => do_dentry_open + => vfs_open + => path_openat + => do_filp_open + => do_sys_openat2 + => __riscv_sys_openat + => do_trap_ecall_u + => ret_from_exception + cat-122 [000] ..... 591.694138: serial8250_set_sleep <-serial8250_pm + cat-122 [000] ..... 591.694260: + => ftrace_call + => uart_port_startup + => uart_port_activate + => tty_port_open + => uart_open + => tty_open + => chrdev_open + => do_dentry_open + => vfs_open + => path_openat + => do_filp_open + => do_sys_openat2 + => __riscv_sys_openat + => do_trap_ecall_u + => ret_from_exception + cat-122 [000] ..... 591.694504: serial8250_startup <-uart_port_startup + cat-122 [000] ..... 591.694687: + => ftrace_call + => uart_port_activate + => tty_port_open + => uart_open + => tty_open + => chrdev_open + => do_dentry_open + => vfs_open + => path_openat + => do_filp_open + => do_sys_openat2 + => __riscv_sys_openat + => do_trap_ecall_u + => ret_from_exception + cat-122 [000] ..... 591.694748: serial8250_do_startup <-serial8250_startup + cat-122 [000] ..... 591.694870: + => ftrace_call + => uart_port_startup + => uart_port_activate + => tty_port_open + => uart_open + => tty_open + => chrdev_open + => do_dentry_open + => vfs_open + => path_openat + => do_filp_open + => do_sys_openat2 + => __riscv_sys_openat + => do_trap_ecall_u + => ret_from_exception + cat-122 [000] d..1. 591.695236: plic_irq_enable <-irq_enable + cat-122 [000] d..1. 591.695358: + => ftrace_call + => irq_startup + => __setup_irq + => request_threaded_irq + => univ8250_setup_irq + => serial8250_do_startup + => serial8250_startup + => uart_port_startup + => uart_port_activate + => tty_port_open + => uart_open + => tty_open + => chrdev_open + => do_dentry_open + => vfs_open + => path_openat + => do_filp_open + => do_sys_openat2 + => __riscv_sys_openat + => do_trap_ecall_u + => ret_from_exception + cat-122 [000] d..1. 591.695480: plic_toggle.constprop.0 <-plic_irq_enable + cat-122 [000] d..1. 591.695602: + => ftrace_call + => irq_enable + => irq_startup + => __setup_irq + => request_threaded_irq + => univ8250_setup_irq + => serial8250_do_startup + => serial8250_startup + => uart_port_startup + => uart_port_activate + => tty_port_open + => uart_open + => tty_open + => chrdev_open + => do_dentry_open + => vfs_open + => path_openat + => do_filp_open + => do_sys_openat2 + => __riscv_sys_openat + => do_trap_ecall_u + => ret_from_exception + cat-122 [000] d..1. 591.696456: plic_irq_disable <-irq_disable + cat-122 [000] d..1. 591.696578: + => ftrace_call + => __disable_irq_nosync + => disable_irq_nosync + => serial8250_do_startup + => serial8250_startup + => uart_port_startup + => uart_port_activate + => tty_port_open + => uart_open + => tty_open + => chrdev_open + => do_dentry_open + => vfs_open + => path_openat + => do_filp_open + => do_sys_openat2 + => __riscv_sys_openat + => do_trap_ecall_u + => ret_from_exception + cat-122 [000] d..1. 591.696639: plic_toggle.constprop.0 <-plic_irq_disable + cat-122 [000] d..1. 591.696822: + => ftrace_call + => irq_disable + => __disable_irq_nosync + => disable_irq_nosync + => serial8250_do_startup + => serial8250_startup + => uart_port_startup + => uart_port_activate + => tty_port_open + => uart_open + => tty_open + => chrdev_open + => do_dentry_open + => vfs_open + => path_openat + => do_filp_open + => do_sys_openat2 + => __riscv_sys_openat + => do_trap_ecall_u + => ret_from_exception + cat-122 [000] d..1. 591.697005: plic_irq_enable <-irq_enable + cat-122 [000] d..1. 591.697188: + => ftrace_call + => irq_startup + => __enable_irq + => enable_irq + => serial8250_do_startup + => serial8250_startup + => uart_port_startup + => uart_port_activate + => tty_port_open + => uart_open + => tty_open + => chrdev_open + => do_dentry_open + => vfs_open + => path_openat + => do_filp_open + => do_sys_openat2 + => __riscv_sys_openat + => do_trap_ecall_u + => ret_from_exception + cat-122 [000] d..1. 591.697249: plic_toggle.constprop.0 <-plic_irq_enable + cat-122 [000] d..1. 591.697432: + => ftrace_call + => irq_enable + => irq_startup + => __enable_irq + => enable_irq + => serial8250_do_startup + => serial8250_startup + => uart_port_startup + => uart_port_activate + => tty_port_open + => uart_open + => tty_open + => chrdev_open + => do_dentry_open + => vfs_open + => path_openat + => do_filp_open + => do_sys_openat2 + => __riscv_sys_openat + => do_trap_ecall_u + => ret_from_exception + cat-122 [000] d.h1. 591.697554: plic_handle_irq <-generic_handle_domain_irq + cat-122 [000] d.h1. 591.697676: + => ftrace_call + => riscv_intc_irq + => handle_riscv_irq + => call_on_irq_stack + => 0x1c003b9260 + cat-122 [000] d.h1. 591.697798: serial8250_interrupt <-__handle_irq_event_percpu + cat-122 [000] d.h1. 591.697920: + => ftrace_call + => handle_irq_event + => handle_fasteoi_irq + => generic_handle_domain_irq + => plic_handle_irq + => generic_handle_domain_irq + => riscv_intc_irq + => handle_riscv_irq + => call_on_irq_stack + => 0x1c001dc940 + cat-122 [000] d.h2. 591.697981: serial8250_default_handle_irq <-serial8250_interrupt + cat-122 [000] d.h2. 591.698103: + => ftrace_call + => __handle_irq_event_percpu + => handle_irq_event + => handle_fasteoi_irq + => generic_handle_domain_irq + => plic_handle_irq + => generic_handle_domain_irq + => riscv_intc_irq + => handle_riscv_irq + => call_on_irq_stack + => 0x1c001dc940 + cat-122 [000] d.h2. 591.698164: serial8250_handle_irq <-serial8250_default_handle_irq + cat-122 [000] d.h2. 591.698286: + => ftrace_call + => serial8250_interrupt + => __handle_irq_event_percpu + => handle_irq_event + => handle_fasteoi_irq + => generic_handle_domain_irq + => plic_handle_irq + => generic_handle_domain_irq + => riscv_intc_irq + => handle_riscv_irq + => call_on_irq_stack + => 0x1c00595ba0 + cat-122 [000] d.h2. 591.698469: plic_irq_eoi <-handle_fasteoi_irq + cat-122 [000] d.h2. 591.698530: + => ftrace_call + => generic_handle_domain_irq + => plic_handle_irq + => generic_handle_domain_irq + => riscv_intc_irq + => handle_riscv_irq + => call_on_irq_stack + => 0x1c00595ba0 + cat-122 [000] d..1. 591.698713: serial8250_do_set_mctrl <-serial8250_do_startup + cat-122 [000] d..1. 591.698835: + => ftrace_call + => serial8250_startup + => uart_port_startup + => uart_port_activate + => tty_port_open + => uart_open + => tty_open + => chrdev_open + => do_dentry_open + => vfs_open + => path_openat + => do_filp_open + => do_sys_openat2 + => __riscv_sys_openat + => do_trap_ecall_u + => ret_from_exception + cat-122 [000] .n... 591.700177: serial8250_set_termios <-uart_change_line_settings + cat-122 [000] .n... 591.700299: + => ftrace_call + => uart_port_startup + => uart_port_activate + => tty_port_open + => uart_open + => tty_open + => chrdev_open + => do_dentry_open + => vfs_open + => path_openat + => do_filp_open + => do_sys_openat2 + => __riscv_sys_openat + => do_trap_ecall_u + => ret_from_exception + cat-122 [000] .n... 591.700421: serial8250_do_set_termios <-serial8250_set_termios + cat-122 [000] .n... 591.700543: + => ftrace_call + => uart_change_line_settings + => uart_port_startup + => uart_port_activate + => tty_port_open + => uart_open + => tty_open + => chrdev_open + => do_dentry_open + => vfs_open + => path_openat + => do_filp_open + => do_sys_openat2 + => __riscv_sys_openat + => do_trap_ecall_u + => ret_from_exception + cat-122 [000] .n... 591.700604: serial8250_get_divisor <-serial8250_do_set_termios + cat-122 [000] .n... 591.700787: + => ftrace_call + => serial8250_set_termios + => uart_change_line_settings + => uart_port_startup + => uart_port_activate + => tty_port_open + => uart_open + => tty_open + => chrdev_open + => do_dentry_open + => vfs_open + => path_openat + => do_filp_open + => do_sys_openat2 + => __riscv_sys_openat + => do_trap_ecall_u + => ret_from_exception + cat-122 [000] dn.1. 591.700909: serial8250_do_set_divisor <-serial8250_do_set_termios + cat-122 [000] dn.1. 591.701031: + => ftrace_call + => serial8250_set_termios + => uart_change_line_settings + => uart_port_startup + => uart_port_activate + => tty_port_open + => uart_open + => tty_open + => chrdev_open + => do_dentry_open + => vfs_open + => path_openat + => do_filp_open + => do_sys_openat2 + => __riscv_sys_openat + => do_trap_ecall_u + => ret_from_exception + cat-122 [000] dn.1. 591.701092: serial8250_do_set_mctrl <-serial8250_do_set_termios + cat-122 [000] dn.1. 591.701214: + => ftrace_call + => serial8250_set_termios + => uart_change_line_settings + => uart_port_startup + => uart_port_activate + => tty_port_open + => uart_open + => tty_open + => chrdev_open + => do_dentry_open + => vfs_open + => path_openat + => do_filp_open + => do_sys_openat2 + => __riscv_sys_openat + => do_trap_ecall_u + => ret_from_exception + cat-122 [000] dn.1. 591.701458: serial8250_set_mctrl <-uart_update_mctrl + cat-122 [000] dn.1. 591.701580: + => ftrace_call + => uart_dtr_rts + => tty_port_block_til_ready + => tty_port_open + => uart_open + => tty_open + => chrdev_open + => do_dentry_open + => vfs_open + => path_openat + => do_filp_open + => do_sys_openat2 + => __riscv_sys_openat + => do_trap_ecall_u + => ret_from_exception + cat-122 [000] dn.1. 591.701702: serial8250_do_set_mctrl <-serial8250_set_mctrl + cat-122 [000] dn.1. 591.701885: + => ftrace_call + => uart_update_mctrl + => uart_dtr_rts + => tty_port_block_til_ready + => tty_port_open + => uart_open + => tty_open + => chrdev_open + => do_dentry_open + => vfs_open + => path_openat + => do_filp_open + => do_sys_openat2 + => __riscv_sys_openat + => do_trap_ecall_u + => ret_from_exception + + /sys/kernel/tracing # plictool + Source Pend Prio C0(7) C1(0) + 1 yes 1 - firing + 2 - 1 - - + 3 - 1 - - + 4 yes 1 - - + 33 - 0 - masked + + /sys/kernel/tracing # plictool -C 1 + 0 + +## 2024-10-04 + +To trace which accesses are done to the PLIC, I can use the mmiotracer, which +will hopefully record how we configure the PLIC and lead to a reproducer. + + CONFIG_MMIOTRACE=y + +## 2024-10-07 + +Let's move on to the SPEC CPU benchmarks. I compiled them again with the current +nixpkgs, but they are too large. I made a "mini" version with only the "speed" +and "integer" variants and removing a couple of large benchmarks. + +Still, the closure is gigantic, as they are collecting the environment during +the build phase and that makes the result depend on the build packages. + +## 2024-10-09 + +One of the problems with the `speccmds.cmd` file is that is assumes that it can +write the output of the benchmarks in the same place that the binaries are +located. + + hut% cat benchspec/CPU/602.gcc_s/run/run_base_test_nix-m64.0000/speccmds.cmd + -r + -N C + -C /build/out/benchspec/CPU/602.gcc_s/run/run_base_test_nix-m64.0000 + -o t1.opts-O3_-finline-limit_50000.out -e t1.opts-O3_-finline-limit_50000.err ../run_base_test_nix-m64.0000/sgcc_base.nix-m64 t1.c -O3 -finline-limit=50000 -o t1.opts-O3_-finline-limit_50000.s > t1.opts-O3_-finline-limit_50000.out 2>> t1.opts-O3_-finline-limit_50000.err + +We can address this problem by modifying the `-C ...` command and just use `-C +602.gcc_s` (not sure if it creates it directly). Then we need to modify the +../run... part to use the full path of the binary. + + hut% cat speccmds.cmd | sed '/^-C/d' + -r + -N C + -o t1.opts-O3_-finline-limit_50000.out -e t1.opts-O3_-finline-limit_50000.err ../run_base_test_nix-m64.0000/sgcc_base.nix-m64 t1.c -O3 -finline-limit=50000 -o t1.opts-O3_-finline-limit_50000.s > t1.opts-O3_-finline-limit_50000.out 2>> t1.opts-O3_-finline-limit_50000.err + +I can create a symlink to the benchmark directory, so it finds it at +`../run_base_test_nix-m64.0000`. + +## 2024-10-10 + +Managed to run some: + + + for srcbench in $SPEC/benchspec/CPU/* + ++ basename /nix/store/h2by3qxqpzy5b1zszz7wviphv6vy1pjs-spec-cpu-mini-riscv64-unknown-linux-gnu-1.1.7/benchspec/CPU/620.omnetpp_s + + name=620.omnetpp_s + + bench=/tmp/spec/620.omnetpp_s + + rm -rf /tmp/spec/620.omnetpp_s + + cp -r /nix/store/h2by3qxqpzy5b1zszz7wviphv6vy1pjs-spec-cpu-mini-riscv64-unknown-linux-gnu-1.1.7/benchspec/CPU/620.omnetpp_s /tmp/spec/620.omnetpp_s + + chmod +w -R /tmp/spec/620.omnetpp_s + + rundir=/tmp/spec/620.omnetpp_s/run/run_base_test_nix-m64.0000 + + sed -i '/^-C/d' /tmp/spec/620.omnetpp_s/run/run_base_test_nix-m64.0000/speccmds.cmd + + echo '--- Running 620.omnetpp_s for 1 iterations ---' + --- Running 620.omnetpp_s for 1 iterations --- + + cd /tmp/spec/620.omnetpp_s/run/run_base_test_nix-m64.0000 + + specinvoke -i 1 -E speccmds.cmd + + awk '/^run [0-9]* elapsed time/{printf \ + "%s\t%s\t%s\t%s\t%s\n", \ + "620.omnetpp_s","test","base",$2,$7}' /tmp/spec/620.omnetpp_s/run/run_base_test_nix-m64.0000/speccmds.out + + cat /tmp/spec/620.omnetpp_s/run/run_base_test_nix-m64.0000/time.csv + 620.omnetpp_s test base 1 1080.495394000 + + cat /tmp/spec/620.omnetpp_s/run/run_base_test_nix-m64.0000/time.csv + + for srcbench in $SPEC/benchspec/CPU/* + ++ basename /nix/store/h2by3qxqpzy5b1zszz7wviphv6vy1pjs-spec-cpu-mini-riscv64-unknown-linux-gnu-1.1.7/benchspec/CPU/631.deepsjeng_s + + name=631.deepsjeng_s + + bench=/tmp/spec/631.deepsjeng_s + + rm -rf /tmp/spec/631.deepsjeng_s + + cp -r /nix/store/h2by3qxqpzy5b1zszz7wviphv6vy1pjs-spec-cpu-mini-riscv64-unknown-linux-gnu-1.1.7/benchspec/CPU/631.deepsjeng_s /tmp/spec/631.deepsjeng_s + + chmod +w -R /tmp/spec/631.deepsjeng_s + + rundir=/tmp/spec/631.deepsjeng_s/run/run_base_test_nix-m64.0000 + + sed -i '/^-C/d' /tmp/spec/631.deepsjeng_s/run/run_base_test_nix-m64.0000/speccmds.cmd + + echo '--- Running 631.deepsjeng_s for 1 iterations ---' + --- Running 631.deepsjeng_s for 1 iterations --- + + cd /tmp/spec/631.deepsjeng_s/run/run_base_test_nix-m64.0000 + + specinvoke -i 1 -E speccmds.cmd + [12274.985482] __vm_enough_memory: pid: 661, comm: deepsjeng_s_bas, bytes: 7200002048 not enough memory for the allocation + [12274.998109] __vm_enough_memory: pid: 661, comm: deepsjeng_s_bas, bytes: 7200071680 not enough memory for the allocation + [12275.010615] __vm_enough_memory: pid: 661, comm: deepsjeng_s_bas, bytes: 7200133120 not enough memory for the allocation + bash-5.2# cat /tmp/spec/time.csv + benchmark size tune iter time_s + 600.perlbench_s test base 1 5326.027877000 + 602.gcc_s test base 1 2.515220000 + 605.mcf_s test base 1 1923.514976000 + 620.omnetpp_s test base 1 1080.495394000 + +After commenting `631.deepsjeng_s`: + + [12736.162509] 196608 pages RAM + [12736.165986] 0 pages HighMem/MovableOnly + [12736.171049] 18239 pages reserved + [12736.174892] 4096 pages cma reserved + [12736.179650] Tasks state (memory values in pages): + [12736.184896] [ pid ] uid tgid total_vm rss rss_anon rss_file rss_shmem pgtables_bytes swapents oom_score_adj name + [12736.197219] [ 221] 0 221 1090 126 125 1 0 28672 0 0 bash + [12736.209480] [ 236] 0 236 1057 84 83 1 0 28672 0 0 bash + [12736.221558] [ 642] 0 642 1057 83 83 0 0 28672 0 0 bash + [12736.233637] [ 643] 0 643 572 25 24 1 0 28672 0 0 specinvoke + [12736.246203] [ 644] 0 644 1057 70 69 1 0 32768 0 0 sh + [12736.258037] [ 645] 0 645 131277 105312 105279 1 32 872448 0 0 xz_s_base.nix-m + [12736.271031] oom-kill:constraint=CONSTRAINT_NONE,nodemask=(null),global_oom,task_memcg=/,task=xz_s_base.nix-m,pid=645,uid=0 + [12736.284939] Out of memory: Killed process 645 (xz_s_base.nix-m) total-vm:525108kB, anon-rss:421116kB, file-rss:4kB, shmem-rss:128kB, UID:0 pgtables:852kB oom_score_adj:0 + [12749.985238] oom_reaper: reaped process 645 (xz_s_base.nix-m), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB + bash-5.2# cat /tmp/spec/time.csv + benchmark size tune iter time_s + 600.perlbench_s test base 1 5338.291831000 + 602.gcc_s test base 1 2.520710000 + 605.mcf_s test base 1 1923.825657000 + 620.omnetpp_s test base 1 1091.905020000 + 641.leela_s test base 1 767.870615000 + 648.exchange2_s test base 1 2815.577807000 + +Running out of memory on `657.xz_s`: + + bash-5.2# free -h + total used free shared buff/cache available + Mem: 696Mi 19Mi 405Mi 255Mi 271Mi 411Mi + Swap: 0B 0B 0B + + bash-5.2# df -h + Filesystem Size Used Avail Use% Mounted on + devtmpfs 35M 0 35M 0% /dev + tmpfs 349M 0 349M 0% /dev/shm + tmpfs 175M 56K 175M 1% /run + tmpfs 349M 0 349M 0% /run/wrappers + /dev/disk/by-label/NIXOS_SD 2.3G 1.9G 275M 88% / + nodev 349M 256M 93M 74% /tmp + + bash-5.2# du -sh /tmp/spec + 256M /tmp/spec + +Let's comment it for now. + +We may want to start running the tests in the CI, so I can read the logs there. +There are some operations we need to do on the FS before running the tests: + + bash-5.2# mkdir /tmp + bash-5.2# mount -t tmpfs nodev /tmp + bash-5.2# mkdir /bin + bash-5.2# mkdir /root + bash-5.2# ln -s $(which sh) /bin/sh + bash-5.2# export TMPDIR=/tmp + bash-5.2# speclaunch + +So, let's prepare a script that runs the SPEC mini. + +The first benchmark to run is `600.perlbench_s` which seems to take 5338 seconds +(1.5 h) to run. I configured the pipeline to stop as soon as we have 2 h of +silence, but after 150 minutes (2.5 h) of execution time it has not finished yet. +Not sure if something is wrong now. Maybe I can run vmstat a few times and see +the mount points to check everything is correct. + +I may want to also increase the RAM available, so we can potentially run other +benchmarks too. + +At some point we may want to be able to specify the bootcmd from fpgactl +directly. + +## 2024-10-11 + +Another successful execution of SPEC mini: + + + + benchmark size tune iter time_s + 600.perlbench_s test base 1 5380.726590000 + 602.gcc_s test base 1 2.525468000 + 605.mcf_s test base 1 1927.921307000 + 620.omnetpp_s test base 1 1110.200756000 + 641.leela_s test base 1 779.333069000 + 648.exchange2_s test base 1 2916.464893000 diff --git a/README.md b/README.md index 3fe8665e02e0703f6516656bba64184c6021b98d..0728bf0c182d4a5fd55a202be241e5324a8e1b7c 100644 --- a/README.md +++ b/README.md @@ -2,45 +2,49 @@ This repository contains NixOS configurations for different RISC-V machines. -## QEMU +## Lagarto Ox on FPGA Alveo U55C -To boot the system in QEMU, first enter the development shell: +To build the system and boot it on an FPGA of the MEEP cluster, you can run the +following: ``` -$ nix develop +$ nix develop -L '.#lagarto-ox' --command fpga/run-remotely.sh fpgalogin1:ci ``` -Then run the boot script: +To do it manually, you can first enter the development shell: ``` -$ ./boot.sh +$ nix develop -L '.#lagarto-ox' ``` -To prevent the GC from erasing the system: +Then upload the files to the target machine (fpgalogin1 by default): ``` -$ nix build .#devShells.x86_64-linux.default --out-link result-env +$ fpga/upload.sh ``` -## Lagarto Hun on FPGA Alveo U55C - -First build required dependencies: +Then connect to the fpgalogin1 machine, allocate a FPGA node and load the +environment there: ``` -$ nix develop '.#lagarto-hun' +$ cd nixos +$ . env.sh ``` -Then upload to destination: +Flash the images to the FPGA: ``` -$ fpga/upload.sh +$ ./fpgactl -w bitstream.bit -b opensbi.bin -k kernel.bin -i initrd.bin -r rootfs.img ``` -And then boot the system there: +And monitor the serial line: ``` -cucu$ ./boot.sh -... +$ picocom -q -b 115200 $FPGACTL_UART ``` -In the U-Boot prompt, paste the commands of the `uboot.env` file. +It should boot without any user interaction. + +## Lagarto Hun + +WIP diff --git a/bootrom/.gitignore b/bootrom/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..7cf238fbc442c19fc729bbe8d393527d5342ca8e --- /dev/null +++ b/bootrom/.gitignore @@ -0,0 +1,3 @@ +*.o +*.elf +*.bin diff --git a/bootrom/Makefile b/bootrom/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..8ac94ef1daafbee72342bb6ce111eae7d168c2d3 --- /dev/null +++ b/bootrom/Makefile @@ -0,0 +1,23 @@ +CC ?= riscv64-unknown-elf-gcc +OBJCOPY ?= riscv64-unknown-elf-objcopy +ROM_BASE ?= 0x00100 + +LDFLAGS=-Tlinker.ld -nostdlib -nostartfiles -nodefaultlibs -static -Wl,--no-gc-sections -Wl,--defsym=ROM_BASE=$(ROM_BASE) + +b=rbootrom + +all: $b.elf $b.bin + +$b.o: $b.S + $(CC) $(CFLAGS) -c -o $@ $< + +$b.elf: $b.o linker.ld + $(CC) $(LDFLAGS) -o $@ $< + +$b.bin: $b.elf + $(OBJCOPY) -O binary $b.elf $b.bin + +clean: + rm -f $b.elf $b.bin $b.o + +.PHONY: clean all diff --git a/bootrom/linker.ld b/bootrom/linker.ld new file mode 100644 index 0000000000000000000000000000000000000000..9a955e79f7dffc85decb9855f19b0759f8d00c8e --- /dev/null +++ b/bootrom/linker.ld @@ -0,0 +1,19 @@ +OUTPUT_ARCH( "riscv" ) +ENTRY(_start) + +SECTIONS +{ + ROM_BASE = DEFINED(ROM_BASE) ? ROM_BASE : 0x00100; /* Default to 0x00100 if ROM_BASE is not defined */ + + . = ROM_BASE; + .text.start : { *(.text.start) } + . = ROM_BASE + 0x80; + .text.hang : { *(.text.hang) } + /* + . = ROM_BASE + 0xC0; + .rodata.dtb : { *(.rodata.dtb) } + */ + + . = ALIGN(0x100); + .text : { *(.text) } +} diff --git a/bootrom/rbootrom.S b/bootrom/rbootrom.S new file mode 100644 index 0000000000000000000000000000000000000000..9a7f02bce6cdd756e4ad76461c71fe0e086732ad --- /dev/null +++ b/bootrom/rbootrom.S @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2024, Barcelona Supercomputing Center (BSC) + * SPDX-License-Identifier: MIT + * + * RBOOTROM v1.0 + * Modified by Rodrigo Arias Mallo + * + * This is a custom bootrom that prints some information to the UART when + * starting, as well as when it hangs. It assumes the UART is at UART_BASE and + * it will jump to DRAM_BASE to continue the boot. + */ + +#define DRAM_BASE 0x80000000 +#define UART_BASE 0x40001000 +#define UART_BAUDRATE 115200 +#define UART_CLOCK 50000000 + +#define UART_BDIV ((UART_CLOCK + 8 * UART_BAUDRATE) / (16 * UART_BAUDRATE)) +#define UART_SHIFT 2 + +#define UART_RBR_OFFSET (0<> 8) & 0xff // Set divisor high byte + sb t1, UART_DLM_OFFSET(t0) + + la t1, 0x03 // 8 bits, no parity, one stop bit + sb t1, UART_LCR_OFFSET(t0) + la t1, 0x01 // Enable FIFO + sb t1, UART_FCR_OFFSET(t0) + la t1, 0x00 // No modem control DTR RTS + sb t1, UART_MCR_OFFSET(t0) + + /* TODO: Clear line status */ + /* TODO: Read receive buffer */ + + la t1, 0x00 // Set scratchpad to 0 + sb t1, UART_SCR_OFFSET(t0) + + ret + +print_hello: + csrr t0, mhartid // Load HART ID into a0 + beq t0, zero, 1f // Print message on HART 0 only + ret +1: + mv s0, ra // Save return address + PUTC '\n' // Identify bootroom + PUTC '\r' + PUTC 'R' + PUTC 'B' + PUTC 'O' + PUTC 'O' + PUTC 'T' + PUTC 'R' + PUTC 'O' + PUTC 'M' + PUTC ' ' + PUTC 'v' + PUTC '1' + PUTC '.' + PUTC '0' + PUTC ' ' + PUTC ':' + PUTC '^' + PUTC ')' + PUTC '\n' + PUTC '\r' + + // Print jumping address + PUTC 'J' + PUTC 'u' + PUTC 'm' + PUTC 'p' + PUTC 'i' + PUTC 'n' + PUTC 'g' + PUTC ' ' + PUTC 't' + PUTC 'o' + PUTC ' ' + PUTC '0' // TODO: Compute from DRAM_BASE + PUTC 'x' + PUTC '8' + PUTC '0' + PUTC '0' + PUTC '0' + PUTC '_' + PUTC '0' + PUTC '0' + PUTC '0' + PUTC '0' + PUTC '.' + PUTC '.' + PUTC '.' + PUTC '\n' + PUTC '\r' + + mv ra, s0 // Restore return address + ret diff --git a/configuration.nix b/configuration.nix index 85296fa186d547f415768cc0ec18f5b02e2c3b89..e0794c5d7509dc167b530aed9f9283f3693b4fcb 100644 --- a/configuration.nix +++ b/configuration.nix @@ -1,13 +1,22 @@ -{ config, lib, pkgs, modulesPath, ... }: +{ config, lib, pkgs, modulesPath, self, ... }: { imports = [ #"${modulesPath}/profiles/base.nix" - #"${modulesPath}/profiles/minimal.nix" + "${modulesPath}/profiles/minimal.nix" ]; - nixpkgs.crossSystem = { - system = "riscv64-linux"; + nixpkgs = { + crossSystem = { + system = "riscv64-linux"; + }; + + overlays = [ + self.inputs.bscpkgs.bscOverlay + (import ./overlay.nix) + ]; + + config.allowUnsupportedSystem = true; }; networking.hostName = "nixos-riscv"; @@ -26,5 +35,33 @@ }; }; + systemd.oomd.enable = false; + networking.firewall.enable = false; + networking.dhcpcd.enable = false; + services.timesyncd.enable = false; + #environment.systemPackages = with pkgs; [ vim gdb neofetch gcc bintools ]; + environment.systemPackages = with pkgs; [ + rvb riscv-tools stream spec-cpu-mini + config.boot.kernelPackages.perf + ]; + + services.getty.helpLine = '' + + + __________________ + < Welcome to NixOS > + ------------------ + \ ^__^ + \ (oo)\_______ + (__)\ )\/\ + ||----w | + || || + + + If you can read this message then then you have + successfully booted NixOS into the login shell. + + + ''; } diff --git a/dts/.gitignore b/dts/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..2008a4a58664a808e324524ed41deb34a47098d3 --- /dev/null +++ b/dts/.gitignore @@ -0,0 +1,2 @@ +*.pp.dts +*.dtb diff --git a/dts/Makefile b/dts/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..4a87c1e522f5d78e3af8824bdfd78d53941b6b6f --- /dev/null +++ b/dts/Makefile @@ -0,0 +1,18 @@ +CC?=gcc +DTC?=dtc + +CPPFLAGS=-E -C -P -nostdinc -undef -x assembler-with-cpp + +all: lagarto_ox.dtb + +clean: + rm -f *.pp.dts *.dtb + +%.pp.dts: %.dts *.h + $(CC) $(CPPFLAGS) $< -o $@ + sed -i 's/@0x0*/@/' $@ + +%.dtb: %.pp.dts + dtc -O dtb -o $@ $^ + +.PRECIOUS: %.pp.dts diff --git a/dts/lagarto_ox.dts b/dts/lagarto_ox.dts new file mode 100644 index 0000000000000000000000000000000000000000..d9c41ce6c254310ca56d72397bff4b4d39589ebd --- /dev/null +++ b/dts/lagarto_ox.dts @@ -0,0 +1,255 @@ +#include "lagarto_ox.h" + +/dts-v1/; +/ { + #address-cells = <2>; + #size-cells = <2>; /* 64 bits memory addresses */ + compatible = "riscv,rv64i"; + model = "Barcelona Supercomputing Center - Lagarto Ox (NixOS)"; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + timebase-frequency = ; + CPU0: cpu@0 { + clock-frequency = ; + device_type = "cpu"; + reg = <0>; + status = "okay"; + compatible = "riscv"; + riscv,isa = "rv64imafd"; + mmu-type = "riscv,sv39"; + tlb-split; + + i-cache-block-size = <64>; // Guess + i-cache-sets = <4>; + i-cache-size = <16384>; + i-tlb-sets = <1>; // Guess + i-tlb-size = <32>; // Guess + + d-cache-block-size = <64>; // Guess + d-cache-sets = <4>; + d-cache-size = <32768>; + d-tlb-sets = <1>; // Guess + d-tlb-size = <32>; // Guess + + /* Hart-Level Interrupt Controller: Every interrupt is + * ultimately routed through a hart's HLIC before it + * interrupts that hart. */ + HLIC0: interrupt-controller { + #interrupt-cells = <1>; + interrupt-controller; /* Receives interrupts */ + compatible = "riscv,cpu-intc"; + }; + }; + cpu-map { + cluster0 { + core0 { + cpu = <&CPU0>; + }; + }; + }; + }; + memory@MEM_ADDR { + device_type = "memory"; + reg = /bits/ 64 ; + }; + reserved-memory { + #address-cells = <2>; /* Starting address and size */ + #size-cells = <2>; /* 64 bits memory addresses */ + ranges; + eth_pool: dma_pool@ETHPOOL_ADDR { + reg = /bits/ 64 ; + compatible = "shared-dma-pool"; + }; + onic_pool: dma_pool@ONICPOOL_ADDR { + reg = /bits/ 64 ; + compatible = "shared-dma-pool"; + }; + }; + pmem@PMEM_ADDR { + /* volatile; This property indicates that this region is + * actually backed by non-persistent memory. This lets the OS + * know that it may skip the cache flushes required to ensure + * data is made persistent after a write. */ + volatile; + compatible = "pmem-region"; + reg = /bits/ 64 ; + }; + soc { + #address-cells = <2>; + #size-cells = <2>; + compatible = "BSC,Lagarto-ox-soc", "simple-bus"; + ranges; + + /* For bitstream e97dd7b2-397f-11ef-abe0-bbd201a5a630 with two + * consoles */ + +#ifdef ENABLE_UART0 + /* The serial for the kernel console */ + uart_console: serial@UART0_ADDR { + compatible = "ns16550"; + reg = /bits/ 64 ; + reg-shift = <2>; + /* No interrupts for this UART, use console=hvc0 */ + /* This clock is the SERIAL_CLK */ + clock-frequency = ; + current-speed = ; + status = "okay"; + }; +#endif /* ENABLE_UART0 */ + +#ifdef ENABLE_UART1 + /* The serial for interrupt tests */ + uart_testing: serial@UART1_ADDR { + compatible = "ns16550"; + reg = /bits/ 64 ; + reg-shift = <2>; + /* Output interrupt 1 (the first one) */ + interrupts = <1>; + interrupt-parent = <&PLIC>; + clock-frequency = ; + current-speed = ; + status = "okay"; + }; +#endif /* ENABLE_UART1 */ + +#ifdef ENABLE_ETHERNET + ethernet0 { + xlnx,rxmem = <1522>; + carv,mtu = <1500>; + carv,no-mac; + device_type = "network"; + // 02:$node:00:01:00:$fpga -> 02:05:00:01:00:02 + // 10.5.1.$N/16 -> 10.5.1.184/16 + // N = 150 + ($node - 1) * 8 + $fpga + local-mac-address = [00 00 00 00 00 00]; + axistream-connected = <&axi_dma>; + compatible = "xlnx,xxv-ethernet-1.0-carv"; + memory-region = <ð_pool>; + }; +#endif /* ENABLE_ETHERNET */ + +#ifdef ENABLE_AXIDMA + dma_clk: dma_clk { + compatible = "fixed-clock"; + #clock-cells = <0x0>; + clock-frequency = ; + }; + axi_dma: dma@AXIDMA_ADDR { + reg = /bits/ 64 ; + reg-shift = <2>; + #address-cells = <2>; + #size-cells = <2>; + xlnx,include-dre; + #dma-cells = <0x1>; + compatible = "xlnx,axi-dma-1.00.a"; + clock-names = "s_axi_lite_aclk", "m_axi_mm2s_aclk", + "m_axi_s2mm_aclk", "m_axi_sg_aclk"; + clocks = <&dma_clk>, <&dma_clk>, <&dma_clk>, <&dma_clk>; + interrupt-names = "mm2s_introut", "s2mm_introut"; + interrupt-parent = <&PLIC>; + interrupts = <2 3>; + xlnx,addrwidth = <0x28>; + xlnx,include-sg; + xlnx,sg-length-width = <0x17>; + dma-channel@AXIDMA_CH0 { + reg = /bits/ 64 ; + compatible = "xlnx,axi-dma-mm2s-channel"; + dma-channels = <0>; + interrupts = <2>; + xlnx,datawidth = <0x40>; + xlnx,device-id = <0x0>; + xlnx,include-dre; + }; + dma-channel@AXIDMA_CH1 { + reg = /bits/ 64 ; + compatible = "xlnx,axi-dma-s2mm-channel"; + dma-channels = <1>; + interrupts = <3>; + xlnx,datawidth = <0x40>; + xlnx,device-id = <0x0>; + xlnx,include-dre; + }; + }; +#endif /* ENABLE_AXIDMA */ + +#ifdef ENABLE_PLIC + /* Platform-Level Interrupt Controller: Delivers interrupts to + * HARTs. */ + PLIC: plic@PLIC_ADDR { + reg = /bits/ 64 ; + compatible = "riscv,plic0"; + interrupt-controller; /* Receives interrupts */ + #address-cells = <0>; + #interrupt-cells = <1>; + /* Sends interrupts to HART interrupt controllers. + * Configures two output targets or contexts: + * - context 0: machine mode external interrupt (11) + * - context 1: supervisor mode external interrupt (9) + */ + interrupts-extended = <&HLIC0 11>, <&HLIC0 9>; + riscv,ndev = ; + //riscv,max-priority = <0x7>; + }; +#endif /* ENABLE_PLIC */ + +#ifdef ENABLE_CLINT + /* Core Local Interruptor: It directly connects to the timer and + * inter-processor interrupt lines of various HARTs (or CPUs) so + * RISC-V per-HART (or per-CPU) local interrupt controller is + * the parent interrupt controller for CLINT device. The clock + * frequency of CLINT is specified via "timebase-frequency" DT + * property of "/cpus" DT node. The "timebase-frequency" DT + * property is described in + * Documentation/devicetree/bindings/riscv/cpus.yaml + */ + clint: clint@CLINT_ADDR { + reg = /bits/ 64 ; + reg-names = "control"; + interrupts-extended = <&HLIC0 3>, <&HLIC0 7>; + compatible = "riscv,clint0"; + }; +#endif /* ENABLE_CLINT */ + +#if 0 + /* There is another auxiliar clint (timer) at 40010000 for + * tests, but we don't tell the kernel so we can use it for + * testing interrupts manually. */ + aux_timer: clint@AUXTIMER_ADDR { + reg = /bits/ 64 ; + reg-names = "control"; + interrupts = <4>; /* PLIC input source 4 */ + interrupt-parent = <&PLIC>; + compatible = "riscv,clint0"; + }; +#endif + +#ifdef ENABLE_SPI + serial@UART2_ADDR { + compatible = "ns16750"; + reg = /bits/ 64 ; + interrupt-parent = <&PLIC>; + interrupts = <5>; + clock-frequency = ; + current-speed = ; + status = "okay"; + }; + spi@SPI_ADDR { + compatible = "ti,keystone-spi"; + reg = /bits/ 64 ; + #address-cells = <1>; + #size-cells = <0>; + interrupt-parent = <&PLIC>; + interrupt-names = "intvec0", "intvec1"; + interrupts = <6 0>, <7 0>; + ti,davinci-spi-intr-line = <0>; + spi-max-frequency = <24000000>; + loopback-mode = <1>; + status = "okay"; + }; +#endif /* ENABLE_SPI */ + + }; +}; diff --git a/dts/lagarto_ox.h b/dts/lagarto_ox.h new file mode 100644 index 0000000000000000000000000000000000000000..4892738d20c0623368cc94e75b9d6e8c216943ec --- /dev/null +++ b/dts/lagarto_ox.h @@ -0,0 +1,76 @@ +/* Toggles */ + +#define ENABLE_UART0 +#define ENABLE_UART1 +#define ENABLE_ETHERNET +#define ENABLE_AXIDMA +#define ENABLE_PLIC +#define ENABLE_CLINT +//#define ENABLE_SPI + +#define CPU_FREQ 50000000 /* 50 MHz */ +/* FIXME: The real RTC frequency is around half that, as the divider was wrongly + * configured. So for now lets use the real frequency: + * 50e6 / (1525*2) = 16393.44262295082 -> 16393 Hz */ +#define RTC_FREQ 16393 + +/* Memory layout: + * + * [0x0_4000_0000, 0x0_6000_0000) -> IO (512 MiB) + * [0x0_6000_0000, 0x0_7000_0000) -> DMA pool (256 MiB) + * [0x0_7000_0000, 0x0_8000_0000) -> DMA pool (256 MiB) + * [0x0_8000_0000, 0x1_8000_0000) -> RAM memory (4 GiB) + * [0x1_8000_0000, 0x1_c000_0000) -> Unused (1 GiB) + * [0x1_c000_0000, 0x2_8000_0000) -> PMEM (3 GiB) + */ + +#define UART0_SPEED 115200 +#define UART0_ADDR 0x40001000 +#define UART0_SIZE 0x00001000 + +#define UART1_SPEED UART0_SPEED +#define UART1_ADDR 0x40003000 +#define UART1_SIZE 0x00001000 + +/* UART2 via SPI */ +#define UART2_SPEED UART0_SPEED +#define UART2_ADDR 0x40005000 +#define UART2_SIZE 0x00001000 + +#define SPI_ADDR 0x40007000 +#define SPI_SIZE 0x00001000 + +#define AUXTIMER_ADDR 0x40010000 +#define AUXTIMER_SIZE 0x00010000 + +#define CLINT_ADDR 0x40100000 +#define CLINT_SIZE 0x00010000 + +#define AXIDMA_ADDR 0x40400000 +#define AXIDMA_SIZE 0x00400000 +#define AXIDMA_CH0 0x40400000 +#define AXIDMA_CH1 0x40400030 +#define AXIDMA_FREQ 156250000 + +#define PLIC_ADDR 0x40800000 +#define PLIC_SIZE 0x00400000 + +#ifdef ENABLE_SPI +# define PLIC_NDEV 7 /* extra UART2 + 2 x SPI */ +#else +# define PLIC_NDEV 4 +#endif + +#define ETHPOOL_ADDR 0x60000000 +#define ETHPOOL_SIZE 0x10000000 + +#define ONICPOOL_ADDR 0x70000000 +#define ONICPOOL_SIZE 0x10000000 + +/* Notice addresses > 32 bits from here */ + +#define MEM_ADDR 0x080000000 +#define MEM_SIZE 0x100000000 + +#define PMEM_ADDR 0x1c0000000 +#define PMEM_SIZE 0x0c0000000 diff --git a/flake.lock b/flake.lock index 1a56d894b2fe82ab92dd6fb967bf020627a5c003..410fe191a2879d45319d146b4b6b1d0a408ec0fe 100644 --- a/flake.lock +++ b/flake.lock @@ -1,24 +1,56 @@ { "nodes": { + "bscpkgs": { + "inputs": { + "nixpkgs": "nixpkgs" + }, + "locked": { + "lastModified": 1713974364, + "narHash": "sha256-ilZTVWSaNP1ibhQIIRXE+q9Lj2XOH+F9W3Co4QyY1eU=", + "ref": "refs/heads/master", + "rev": "de89197a4a7b162db7df9d41c9d07759d87c5709", + "revCount": 937, + "type": "git", + "url": "https://git.sr.ht/~rodarima/bscpkgs" + }, + "original": { + "type": "git", + "url": "https://git.sr.ht/~rodarima/bscpkgs" + } + }, "nixpkgs": { "locked": { - "lastModified": 1706092046, - "narHash": "sha256-Cbethl95Cu+WVIWfaAnRRBZiz5PmXxQvg4vXNqIZQUg=", - "owner": "rodarima", + "lastModified": 1700390070, + "narHash": "sha256-de9KYi8rSJpqvBfNwscWdalIJXPo8NjdIZcEJum1mH0=", + "path": "/nix/store/z7y28qzhk7driiwcw78k0mb24laknm0f-source", + "rev": "e4ad989506ec7d71f7302cc3067abd82730a4beb", + "type": "path" + }, + "original": { + "id": "nixpkgs", + "type": "indirect" + } + }, + "nixpkgs_2": { + "locked": { + "lastModified": 1720031269, + "narHash": "sha256-rwz8NJZV+387rnWpTYcXaRNvzUSnnF9aHONoJIYmiUQ=", + "owner": "NixOS", "repo": "nixpkgs", - "rev": "57e7c8fa4fdc414a936ce83afd0c70fb0a3a31d5", + "rev": "9f4128e00b0ae8ec65918efeba59db998750ead6", "type": "github" }, "original": { - "owner": "rodarima", - "ref": "fix-pkgs-static-gcc-march", + "owner": "NixOS", + "ref": "nixos-unstable", "repo": "nixpkgs", "type": "github" } }, "root": { "inputs": { - "nixpkgs": "nixpkgs" + "bscpkgs": "bscpkgs", + "nixpkgs": "nixpkgs_2" } } }, diff --git a/flake.nix b/flake.nix index baeeb6530bbdbc86662a974f290290d005af6287..f4ac6649907840d4a4d1bf3863625990cafcda5b 100644 --- a/flake.nix +++ b/flake.nix @@ -1,17 +1,20 @@ { - inputs.nixpkgs.url = "github:rodarima/nixpkgs/fix-pkgs-static-gcc-march"; + inputs.nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + inputs.bscpkgs.url = "git+https://git.sr.ht/~rodarima/bscpkgs"; - outputs = { self, nixpkgs, ... }: + outputs = {self, nixpkgs, ...}@inputs: let system = "x86_64-linux"; - overlay = import ./overlay.nix; - pkgs = import nixpkgs { inherit system; }; + nixosSystem = import (nixpkgs + "/nixos/lib/eval-config.nix"); + mkRoots = pkgs: list: pkgs.writeText "gcroots.json" + (builtins.toJSON (map (x: { drv = x; attrs = x.drvAttrs; }) list)); in { - overlay = import ./overlay.nix; + #overlay = import ./overlay.nix; nixosConfigurations = { # The qemu configuration defines a system that runs in the RISC-V # architecture, but is build from an x86 host machine. - qemu = nixpkgs.lib.nixosSystem { + qemu = nixosSystem { + specialArgs = { inherit self; }; system = "${system}"; modules = [ ./configuration.nix @@ -20,7 +23,8 @@ }; # Same, but disable compressed instructions - qemu-nc = nixpkgs.lib.nixosSystem { + qemu-nc = nixosSystem { + specialArgs = { inherit self; }; system = "${system}"; modules = [ ./configuration.nix @@ -30,7 +34,8 @@ }; # FPGA Lagarto Hun CPU - lagarto-hun = nixpkgs.lib.nixosSystem { + lagarto-hun = nixosSystem { + specialArgs = { inherit self; }; system = "${system}"; modules = [ ./configuration.nix @@ -38,35 +43,105 @@ ./no-compressed.nix ]; }; + + # FPGA Lagarto Ox CPU + lagarto-ox = nixosSystem { + specialArgs = { inherit self; }; + system = "${system}"; + modules = [ + ./configuration.nix + ./lagarto-ox.nix + ./no-compressed.nix + ]; + }; }; # A development shell with QEMU ready to boot the RISC-V system in an x86 # machine. - devShells.x86_64-linux.default = + devShells.x86_64-linux.qemu-lagarto-hun = let nixosconf = self.nixosConfigurations.qemu-nc; syspkgs = nixosconf.pkgs; toplevel = nixosconf.config.system.build.toplevel; - in pkgs.mkShell { + in syspkgs.mkShell { pname = "qemu-shell"; - buildInputs = with pkgs; [ qemu e2fsprogs ]; + nativeBuildInputs = with syspkgs; [ qemu e2fsprogs ]; # Here we tell the run script where to find the system NIXOS_SYSTEM_TOPLEVEL = toplevel; OPENSBI = syspkgs.opensbi-uboot; }; + devShells.x86_64-linux.lagarto-hun = let nixosconf = self.nixosConfigurations.lagarto-hun; syspkgs = nixosconf.pkgs; build = nixosconf.config.system.build; - in pkgs.mkShell { + in syspkgs.mkShell { pname = "lagarto-hun-shell"; + COMMIT = if self ? rev then self.rev else "dirty"; TOPLEVEL = build.toplevel; OPENSBI = syspkgs.opensbi; KERNEL = build.kernel; INITRD = build.initialRamdisk; ROOTFS = build.sdImage; UBOOT_ENV = syspkgs.uboot-env; + shellHook = '' + echo "Here are the current system pieces:" + echo " COMMIT = $COMMIT" + echo " TOPLEVEL = $TOPLEVEL" + echo " KERNEL = $KERNEL" + echo " OPENSBI = $OPENSBI" + echo " INITRD = $INITRD" + echo " ROOTFS = $ROOTFS" + echo " UBOOT_ENV = $UBOOT_ENV" + ''; }; + + devShells.x86_64-linux.lagarto-ox = + let + nixosconf = self.nixosConfigurations.lagarto-ox; + syspkgs = nixosconf.pkgs; + build = nixosconf.config.system.build; + in syspkgs.mkShell rec { + pname = "lagarto-ox-shell"; + COMMIT = if self ? rev then self.rev else "dirty"; + TOPLEVEL = build.toplevel; + OPENSBI = syspkgs.opensbi; + KERNEL = build.kernel; + INITRD = build.initialRamdisk; + ROOTFS = build.sdImage; + UBOOT_ENV = syspkgs.uboot-env; + BITSTREAM = syspkgs.bitstream; + BOOTROM = syspkgs.bootrom; + GCROOT = mkRoots syspkgs [ + syspkgs.stdenv KERNEL OPENSBI syspkgs.riscv-tools + ]; + shellHook = '' + echo "Here are the current system pieces:" + echo " COMMIT = $COMMIT" + echo " TOPLEVEL = $TOPLEVEL" + echo " KERNEL = $KERNEL" + echo " OPENSBI = $OPENSBI" + echo " INITRD = $INITRD" + echo " ROOTFS = $ROOTFS" + echo " UBOOT_ENV = $UBOOT_ENV" + echo " BITSTREAM = $BITSTREAM" + echo " BOOTROM = $BOOTROM" + echo " GCROOT = $GCROOT" + ''; + }; + + devShells.x86_64-linux.lagarto-ox-rd = + let + nixosconf = self.nixosConfigurations.lagarto-ox; + syspkgs = nixosconf.pkgs; + in self.outputs.devShells.x86_64-linux.lagarto-ox.overrideAttrs (old:{ + TOPLEVEL = ""; + ROOTFS = ""; + GCROOT = mkRoots syspkgs [ syspkgs.stdenv old.OPENSBI ]; + }); + + devShells.x86_64-linux.default = + self.outputs.devShells.x86_64-linux.lagarto-ox-rd; }; } diff --git a/fpga/boot.sh b/fpga/boot.sh index 2de91b41055b47f3c7b08bf614e75d27bc76dac5..47f24e1213184fb3e9777211d203f68a57d62dfd 100755 --- a/fpga/boot.sh +++ b/fpga/boot.sh @@ -1,12 +1,12 @@ #!/usr/bin/bash -set -x +#set -x set -e source ./env.sh -#bitstream="-w system-acme_ea-4h2v.bit" +bitstream="-w bitstream.bit" -./fpgactl $bitstream -b opensbi.bin -k kernel.bin -i initrd.bin -r rootfs.img +./fpgactl $bitstream -R bootrom.bin -b opensbi.bin -k kernel.bin -i initrd.bin -r rootfs.img -picocom -b 115200 /dev/ttyUSB2 +picocom -b 115200 $FPGACTL_UART diff --git a/fpga/env.sh b/fpga/env.sh index 573d4f613ddb4e50a7577aded40afe085ac5e75c..6a11fffeff251e6dfd196f8835c1760f883b21f5 100644 --- a/fpga/env.sh +++ b/fpga/env.sh @@ -1,21 +1,56 @@ +#!/bin/bash # Source this file to setup the environment -INSTALL_PATH=/home/tools -LOAD_BITSTREAM=$INSTALL_PATH/scripts -HOSTNAME=$(hostname) +function setup_cucu() +{ + INSTALL_PATH=/home/tools + LOAD_BITSTREAM=$INSTALL_PATH/scripts + + export DMA_IP_DRIVERS="$INSTALL_PATH/drivers/$hostname/dma_ip_drivers-onic-gamma/xilinx_pcie_drivers" + + if [ ! -d $DMA_IP_DRIVERS ]; then + echo "error: DMA_IP_DRIVERS $DMA_IP_DRIVERS directory does not exist" >&2 + return + fi + + export PATH="$DMA_IP_DRIVERS/QDMA/linux-kernel/bin/:$PATH" + + if [ -x /opt/Xilinx/Vivado/2020.1/settings64.sh ]; then + source /opt/Xilinx/Vivado/2020.1/settings64.sh + elif [ -x /opt/Xilinx/Vivado/2021.2/settings64.sh ]; then + source /opt/Xilinx/Vivado/2021.2/settings64.sh + fi +} -export DMA_IP_DRIVERS="$INSTALL_PATH/drivers/$HOSTNAME/dma_ip_drivers-onic-gamma/xilinx_pcie_drivers" +function setup_meep() +{ + . /nfs/apps/XILINX/xilinx_22_env.sh + export PATH="$PATH:/apps/QDMA/meep-ionic/2022.1.4.4/linux-kernel/bin/" -if [ ! -d $DMA_IP_DRIVERS ]; then - echo "error: DMA_IP_DRIVERS $DMA_IP_DRIVERS directory does not exist" >&2 - return -fi + # Select the first FPGA in the node + local line=$(grep fpgan /etc/motd | sed -n 2p | tr -d ' ') + export FPGACTL_PCIDEV=$(echo "$line" | awk -F'|' '{print $5}') + export FPGACTL_SERIAL=$(echo "$line" | awk -F'|' '{print $4}') + export FPGACTL_UART=$(echo "$line" | awk -F'|' '{print "/dev/"$7}') -export PATH="$DMA_IP_DRIVERS/QDMA/linux-kernel/bin/:$PATH" + # Setup mappings -if [ -x /opt/Xilinx/Vivado/2020.1/settings64.sh ]; then - source /opt/Xilinx/Vivado/2020.1/settings64.sh -elif [ -x /opt/Xilinx/Vivado/2021.2/settings64.sh ]; then - source /opt/Xilinx/Vivado/2021.2/settings64.sh -fi + # Delta between where we load in the dma device and RAM + local delta_addr=-0x60000000 + + # See https://gitlab.bsc.es/hwdesign/fpga/integration-lab/fpga-tools/-/blob/6a63bcea6d1d59df3c7d62311aa4935efd54d3a3/boot_riscv/boot_sa.sh#L36-40 + export FPGACTL_BOOTLOADER_ADDR=$((0x80000000+$delta_addr)) + export FPGACTL_KERNEL_ADDR=$((0x84000000+$delta_addr)) + export FPGACTL_INITRD_ADDR=$((0x8c300000+$delta_addr)) + export FPGACTL_ROOTFS_ADDR=$((0x1c0000000+$delta_addr)) + export FPGACTL_BOOTROM_ADDR=$((0x00000100)) +} + +hostname=$(hostname) + +case "$hostname" in + cucu) setup_cucu ;; + fpgan*) setup_meep ;; + *) echo "ERROR: unknown host $hostname";; +esac diff --git a/fpga/fpgactl b/fpga/fpgactl index 2e0ac2ce6de5ee21e7f15c692b6c2648ba7ded9a..ef40daff2bd2e8d8255cdd29ebdf30f83d1297b1 100755 --- a/fpga/fpgactl +++ b/fpga/fpgactl @@ -27,9 +27,7 @@ function check_environment() # {{{ } # }}} function create_qdma_queue() # {{{ { - pcidir="/sys/bus/pci/devices/0000:08:00.0" - - if [ ! -d "$pcidir/qdma" ]; then + if [ ! -d "$pcidir" ]; then echo "missing pci directory: $pcidir" >&2 exit 1 fi @@ -44,48 +42,71 @@ function create_qdma_queue() # {{{ exit 1 fi - if [ ! -c "/dev/qdma08000-MM-1" ]; then - echo 2 | sudo dd of="$pcidir/qdma/qmax" - - dma-ctl qdma08000 q add mode mm idx 1 dir bi - dma-ctl qdma08000 q start idx 1 dir bi + if [ ! -r "$pcidir/qdma/qmax" ]; then + echo "cannot read qmax file: $pcidir/qdma/qmax" >&2 + exit 1 + fi - sudo chmod go+rw "/dev/qdma08000-MM-1" - sudo chmod go+rw "$pcidir/resource0" - sudo chmod go+rw "$pcidir/resource0_wc" - sudo chmod go+rw "$pcidir/resource2" - sudo chmod go+rw "$pcidir/resource2_wc" + # There should be two queues + local qmax=$(cat "$pcidir/qdma/qmax") + if [ "$qmax" != 2 ]; then + if [ -w "$pcidir/qdma/qmax" ]; then + echo 2 | dd of="$pcidir/qdma/qmax" + else + echo 2 | sudo dd of="$pcidir/qdma/qmax" + fi fi - if [ ! -c "/dev/qdma08000-MM-0" ]; then - dma-ctl qdma08000 q add mode mm idx 0 dir bi - dma-ctl qdma08000 q start idx 0 dir bi - sudo chmod go+rw "/dev/qdma08000-MM-0" + # Create the two queues if they don't exist + if [ ! -c "/dev/${qdmadev}-MM-1" ]; then + dma-ctl "${qdmadev}" q add mode mm idx 1 dir bi + dma-ctl "${qdmadev}" q start idx 1 dir bi fi + if [ ! -c "/dev/${qdmadev}-MM-0" ]; then + dma-ctl "${qdmadev}" q add mode mm idx 0 dir bi + dma-ctl "${qdmadev}" q start idx 0 dir bi + fi + + # Wait for udev to process the new devices + udevadm settle + + # Ensure we have write access. On some clusters this is automatically done + # by udev rules, on others we are expect to use sudo. + for f in /dev/${qdmadev}-MM-{0,1} ${pcidir}/resource{0,0_wc,2,2_wc}; do + test -w "$f" || sudo chmod go+rw "$f" + done sleep 2 } # }}} -function do_system_reset() # {{{ +function do_cpu_reset() # {{{ { - # UartBootEn (bit2) + system reset (bit0) - dma-ctl qdma08000 reg write bar 2 0x0 0x0 > /dev/null - sleep 0.2 - # Release system reset, we must wait until the memory is filled with 0s - dma-ctl qdma08000 reg write bar 2 0x0 0x1 > /dev/null - #sleep 5 + if [ "$model" == "hun" ]; then + # UartBootEn (bit2) + system reset (bit0) + dma-ctl "${qdmadev}" reg write bar 2 0x0 0x0 + sleep 0.2 + dma-ctl "${qdmadev}" reg write bar 2 0x0 0x1 + elif [ "$model" == "ox" ]; then + dma-ctl "${qdmadev}" reg write bar 2 0x0 0x0 + sleep 0.2 + fi } # }}} -function do_system_release() # {{{ +function do_cpu_release() # {{{ { - # Release Ariane's reset - dma-ctl qdma08000 reg write bar 2 0x0 0x3 > /dev/null + if [ "$model" == "hun" ]; then + # Release Ariane's reset + dma-ctl "${qdmadev}" reg write bar 2 0x0 0x3 + elif [ "$model" == "ox" ]; then + dma-ctl "${qdmadev}" reg write bar 2 0x0 0x1 + fi } # }}} function copy_by_dma() # {{{ { ifile="$1" address="$2" - ofile="/dev/qdma08000-MM-1" - bs=$((8*1024*1024)) # 8 MiB + ofile="/dev/${qdmadev}-MM-1" + #bs=$((8*1024*1024)) # 8 MiB + bs=$((1*1024*1024)) # 1 MiB total_size=$(stat --format "%s" "$ifile") @@ -101,6 +122,7 @@ function copy_by_dma() # {{{ dd if="$ifile" skip=$skip count=1 bs=$bs of="$ofile" seek=$dst oflag=seek_bytes status=none let skip=$skip+1 done + #dma-to-device -d "$ofile" -s "$total_size" -a "$address" -f "$ifile" } # }}} function load_file_in_memory() # {{{ @@ -110,23 +132,24 @@ function load_file_in_memory() # {{{ address=$(($address_hex)) total_size=$(stat --format "%s" "$file") + md5sum=$(md5sum "$file" | cut -d' ' -f1) # Previous tests... - #strace -f dma-to-device -d /dev/qdma08000-MM-1 -a "$address" -s $((8*1024*1024)) -f "$file" - #strace -f dd if="$file" bs=16M seek="${address}" oflag=seek_bytes of=/dev/qdma08000-MM-1 status=progress conv=sync - #strace -f fpgakit/fpgadd -i "$file" -a "$address" -d /dev/qdma08000-MM-1 -c 1024 -s 1024 + #strace -f dma-to-device -d /dev/${qdmadev}-MM-1 -a "$address" -s $((8*1024*1024)) -f "$file" + #strace -f dd if="$file" bs=16M seek="${address}" oflag=seek_bytes of=/dev/${qdmadev}-MM-1 status=progress conv=sync + #strace -f fpgakit/fpgadd -i "$file" -a "$address" -d /dev/${qdmadev}-MM-1 -c 1024 -s 1024 #ID=08 ./load_image.sh "$file" "$address" # Now dd seems to work fine, but I will leave this as fallback: copy_by_dma "$file" "$address" - #dd if="$file" bs=8M seek="${address}" oflag=seek_bytes of=/dev/qdma08000-MM-1 status=none + #dd if="$file" bs=8M seek="${address}" oflag=seek_bytes of=/dev/${qdmadev}-MM-1 status=none - printf "loaded '%s' at 0x%x with size %d\n" "$file" "$address" "$total_size" >&2 + printf "loaded '%s' at 0x%x with size %d and md5 %s\n" "$file" "$address" "$total_size" "$md5sum" >&2 } # }}} function do_boot_only() # {{{ { - do_system_reset + do_cpu_reset ./load_image.sh ${OSBI} $((0x80000000)) && @@ -135,7 +158,7 @@ function do_boot_only() # {{{ sleep 2 && # #Release Ariane's reset - dma-ctl qdma08000 reg write bar 2 0x0 0x3 && + dma-ctl "${qdmadev}" reg write bar 2 0x0 0x3 && sleep 10 && @@ -143,7 +166,7 @@ function do_boot_only() # {{{ echo mount -o nolock -o rw -o retrans=10 192.168.0.16:/media/sda2/scratch/xavim/point /root && - if [ ! -c /dev/qdma08000-MM-0 ] ; then + if [ ! -c "/dev/${qdmadev}-MM-0" ] ; then /home/tools/drivers/`/bin/hostname`/dma_ip_drivers-onic-gamma/create-queue-qdma.sh -2 fi && @@ -155,7 +178,7 @@ function do_boot_only() # {{{ } # }}} function do_reload_fs() # {{{ { - do_system_reset + do_cpu_reset #~xavim/LAGARTO_LINUX-4.1/./load_image.sh \ # /home/xavim/ARIANE_LINUX-3.0/recovery/fedora-fs-dx-java-cucu-0.108.raw.recovered \ @@ -172,7 +195,7 @@ function do_reload_fs() # {{{ sleep 2 - do_system_release + do_cpu_release create_qdma_queue # uncomment to enable eth-over-pcie @@ -180,23 +203,25 @@ function do_reload_fs() # {{{ } # }}} function upload_bitstream_file() # {{{ { - bitfile="$1" + if [ -z "$jtagserial" ]; then + >&2 echo "JTAG serial required" + usage + fi - fpgajtag=$(lsusb -vd 0403: 2>&1 | grep iSerial | awk ' { print $3; }') - if [ -z "$fpgajtag" ]; then - echo "error: cannot find JTAG serial" >&2 - exit 1 + if [ -z "$bitstream" ]; then + >&2 echo "bitstream file required" + usage fi script=$(mktemp vivado-XXXXXXXXXX.tcl) cat > "$script" <&2 echo -e "error: missing PCI device (hint: lspci -d 10ee:902f)" + usage + fi - unload_modules "xocl xclmgmt qdma_pf xdma" # qdma_vf not removable - remove_pci_devices + # Ensure it is ok + local matches=$(lspci -s "$pcidev") + if [ -z "$matches" ]; then + >&2 echo "no match for PCI device '$pcidev'" + exit 1 + fi - upload_bitstream_file "$bitstream" + local n="$(echo "$matches" | wc -l)" + if [ "$n" -gt 1 ]; then + >&2 echo "multiple matches for PCI device '$pcidev'" + exit 1 + fi + # Fill the PCI device with the domain + local fulldev=$(lspci -s "$pcidev" -D | cut -d' ' -f1) + pcidir="/sys/bus/pci/devices/$fulldev" + + if [ ! -d "$pcidir" ]; then + >&2 echo "cannot find PCI dir: $pcidir" + exit 1 + fi + + # Set the PCI device to the full device + pcidev="$fulldev" + + # Find slot + slot=$(lspci -s "$pcidev" -vm | grep PhySlot | cut -f2) + + if [ -z "$slot" ]; then + >&2 echo "cannot find physical slot for PCI '$pcidev'" + exit 1 + fi + + local devid=$(echo "$pcidev" | cut -d: -f2- | tr -d ':.') + qdmadev="qdma${devid}" + +} # }}} +function preload_hook() #{{{ +{ + case "$hostname" in + cucu) + unload_modules "xocl xclmgmt qdma_pf xdma" # qdma_vf not removable + remove_pci_devices + ;; + fpgan*) + ;; + *) + echo "hostname $hostname not known" + exit 1 + ;; + esac +} #}}} +function postload_hook() #{{{ +{ rescan_pci_devices - unload_modules "qdma_pf xdma" # qdma_vf not removable - remove_pci_devices - load_qdma_modules - rescan_pci_devices + + case "$hostname" in + cucu) + unload_modules "qdma_pf xdma" # qdma_vf not removable + remove_pci_devices + load_qdma_modules + rescan_pci_devices + ;; + fpgan*) + ;; + *) + echo "hostname $hostname not known" + exit 1 + ;; + esac + create_qdma_queue +} #}}} +function load_bitstream() # {{{ +{ + preload_hook + upload_bitstream_file + postload_hook } # }}} bitstream= @@ -267,34 +370,51 @@ bootloader= kernel= initrd= rootfs= +bootrom= resetcpu= verbose= - -bootloader_addr=0x80000000 -kernel_addr=0x84000000 -initrd_addr=0x8c300000 -rootfs_addr=0x180000000 +pcidev= +model=ox +# Internal +slot= +pcidir= +qdmadev= + +bootloader_addr="${FPGACTL_BOOTLOADER_ADDR:-0x80000000}" +kernel_addr="${FPGACTL_KERNEL_ADDR:-0x84000000}" +initrd_addr="${FPGACTL_INITRD_ADDR:-0x8c300000}" +rootfs_addr="${FPGACTL_ROOTFS_ADDR:-0x140000000}" +bootrom_addr="${FPGACTL_BOOTROM_ADDR:-0x60000100}" + +hostname="${hostname:-$(hostname)}" +echo "hostname=$hostname" function usage() { echo "" >&2 - echo "Usage: $0 [-v] [-w bitstream] [-b bootloader] [-k kernel] [-i initrd]" >&2 + echo "Usage: $0 [-p pcidev] [-v] [-w bitstream] [-j serial] [-b bootloader] [-k kernel] [-i initrd] [-R bootroom] " >&2 echo "" >&2 echo "First writes the bitstream if given. Then loads the rest of files" >&2 echo "into memory and restarts the CPU." >&2 echo "" >&2 echo "Options" >&2 + echo " -p pcidev Select PCI device (same format as lspci -s)." >&2 + echo " Read from \$FPGACTL_PCIDEV if not given." >&2 echo " -w bitstream Write the bitstream file to the FPGA" >&2 + echo " -j serial JTAG serial (can be found by lsusb -v)" >&2 + echo " Read from \$FPGACTL_SERIAL if not given." >&2 echo " -b bootloader Load the bootloader file in $bootloader_addr" >&2 echo " -k kernel Load the kernel file in $kernel_addr" >&2 echo " -i initrd Load the initrd file in $initrd_addr" >&2 echo " -r rootfs Load the rootfs file in $rootfs_addr" >&2 + echo " -R bootrom Load the bootrom file in $bootrom_addr" >&2 + echo " -m model CPU model: Either 'hun' or 'ox' (default ox)" >&2 echo " -v Be verbose" >&2 echo "" >&2 exit 1 } -while getopts "hvw:b:k:i:r:" opt; do +while getopts "hvw:b:k:i:r:p:j:m:R:" opt; do case "${opt}" in v) verbose=1 ;; w) bitstream="${OPTARG}" ;; @@ -302,21 +422,32 @@ while getopts "hvw:b:k:i:r:" opt; do k) kernel="${OPTARG}"; resetcpu=1 ;; i) initrd="${OPTARG}"; resetcpu=1 ;; r) rootfs="${OPTARG}"; resetcpu=1 ;; + R) bootrom="${OPTARG}"; resetcpu=1 ;; + p) pcidev="${OPTARG}" ;; + j) jtagserial="${OPTARG}" ;; + m) model="${OPTARG}" ;; h) usage ;; *) usage ;; esac done +jtagserial="${jtagserial:-$FPGACTL_SERIAL}" +pcidev="${pcidev:-$FPGACTL_PCIDEV}" + test "$verbose" && set -x check_environment +select_pcidev test "$bitstream" && load_bitstream "$bitstream" -test "$resetcpu" && do_system_reset +test "$resetcpu" && do_cpu_reset test "$bootloader" && load_file_in_memory "$bootloader" $bootloader_addr test "$kernel" && load_file_in_memory "$kernel" $kernel_addr test "$initrd" && load_file_in_memory "$initrd" $initrd_addr test "$rootfs" && load_file_in_memory "$rootfs" $rootfs_addr -test "$resetcpu" && do_system_release +test "$bootrom" && load_file_in_memory "$bootrom" $bootrom_addr +test "$resetcpu" && do_cpu_release + +exit 0 # vim:ts=2:sw=2:ai:foldmethod=marker:foldlevel=0: diff --git a/fpga/run-login.sh b/fpga/run-login.sh new file mode 100755 index 0000000000000000000000000000000000000000..8720bf3c1962f3922235a228bd18dd9603045269 --- /dev/null +++ b/fpga/run-login.sh @@ -0,0 +1,54 @@ +#!/usr/bin/bash + +# There are several situations in which we may find the jobs: +# - There are no jobs queued or running +# - There is at least one job running +# - There is one job queued +# - There was a job running but ended and is now ending + +set -x +set -e + +path="$1" +allocated= + +# First determine if we already have jobs already +n=$(squeue --me -lh | wc -l) + +if [ "$n" -gt 1 ]; then + echo "Too many jobs queued already" >&2 + exit 1 +fi + +if [ "$n" == 0 ]; then + # No running jobs, so allocate a new job + salloc -N 1 --constraint=dmaqdma --no-shell -t 1-00 + allocated=1 + + # Wait until the job is running + while [ "$n" != 1 ]; do + sleep 2 + n=$(squeue --me -lh | grep RUNNING | wc -l) + done +else + # There is one job, ensure it is running + n=$(squeue --me -lh | grep RUNNING | wc -l) + + if [ "$n" != 1 ]; then + echo "The job is not running, stopping" >&2 + exit 1 + fi +fi + +# If this point is reached there is one job running + +host=$(squeue --me -h -o %N) +echo "Switching to $host" + +# Continue the execution there +ssh "$host" "$path/run-node.sh" "$path" + +# Cancel our job if it was successful +if [ "$allocated" ]; then + scancel --me +fi diff --git a/fpga/run-node.sh b/fpga/run-node.sh new file mode 100755 index 0000000000000000000000000000000000000000..c234acb9f0b733f4cd5cb2fcbd8c4a4f5e55445b --- /dev/null +++ b/fpga/run-node.sh @@ -0,0 +1,38 @@ +#!/usr/bin/bash + +#set -x +set -e + +echo "Hello from $(hostname)" + +path="$1" + +cd "$path" + +# First kill any picocom instance +killall picocom || true + +# Setup the environment +. env.sh + +set -x + +# Then perform the boot +./fpgactl -w bitstream.bit -b opensbi.bin -k kernel.bin -i initrd.bin -r rootfs.img + +# Normal timeouts +timeout=$((30 * 60)) # Always stop after 30 min +timeout_silent=$((3 * 60)) # Stop if 3 min without output + +# Timeouts for SPEC benchmarks +#timeout=$((12 * 60 * 60)) # Always stop after 12 h +#timeout_silent=$((4 * 60 * 60)) # Stop if 4 h without output (some benchmarks take 1.6h) + +# Set dead switch +sleep $timeout && killall picocom & + +# Note: --imap igncr is broken so we replace it with LF. +# See https://github.com/npat-efault/picocom/pull/114 +# It looks like picocom is abandoned, we may want to switch to minicom or +# stty+cat +picocom --imap crlf -q -x $(($timeout_silent*1000)) -b 115200 $FPGACTL_UART diff --git a/fpga/run-remotely.sh b/fpga/run-remotely.sh new file mode 100755 index 0000000000000000000000000000000000000000..550db92058e5cbc711ffd042dfb89de716c4e61b --- /dev/null +++ b/fpga/run-remotely.sh @@ -0,0 +1,22 @@ +#!/bin/sh + +# Executes a pipeline in a remote machine taking the values from the environment +# Usage fpga/run-remotely.sh : + +set -e + +dst=fpgalogin1:nixos + +if [ "$1" != "" ]; then + dst="$1" +fi + +path=${dst#*:} + +# Copy all required elements to the destination machine +fpga/upload.sh "$dst" + +# Launch the pipeline from there +set +x +#ssh fpgalogin1 "$path/run-login.sh" "$path" | awk -f fpga/verify.awk +ssh fpgalogin1 "$path/run-login.sh" "$path" diff --git a/fpga/upload.sh b/fpga/upload.sh index 191e3ccb6d7e984189eb69472bb8e6b332320d36..6e4df7cc3c4f395bbfdedcf02aaa562fa2932a2c 100755 --- a/fpga/upload.sh +++ b/fpga/upload.sh @@ -3,15 +3,28 @@ set -e set -x -dst=femu:nixos/ +#dst=femu:nixos +dst=fpgalogin1:nixos -rsync -a fpga/fpgactl "$dst" -rsync -a fpga/boot.sh "$dst" -rsync -a fpga/env.sh "$dst" -rsync "$OPENSBI/share/opensbi/lp64/fpga/openpiton/firmware/fw_payload.bin" "$dst/opensbi.bin" +if [ "$1" != "" ]; then + dst="$1" +fi + +rsync -a fpga/run-login.sh "$dst/" +rsync -a fpga/run-node.sh "$dst/" +rsync -a fpga/fpgactl "$dst/" +rsync -a fpga/boot.sh "$dst/" +rsync -a fpga/env.sh "$dst/" +rsync $(find "$OPENSBI" -name fw_payload.bin) "$dst/opensbi.bin" rsync "$KERNEL/Image" "$dst/kernel.bin" rsync "$INITRD/initrd" "$dst/initrd.bin" -rsync "$ROOTFS/sd-image/rootfs.img" "$dst/rootfs.img" +if [ -n "$ROOTFS" ]; then + rsync "$ROOTFS/sd-image/rootfs.img" "$dst/rootfs.img" +else + echo "Skipping rootfs" +fi +rsync "$BITSTREAM" "$dst/bitstream.bit" +rsync "$BOOTROM" "$dst/bootrom.bin" rsync "$UBOOT_ENV" "$dst/uboot.env" echo "Now go to $dst and run ./boot.sh" diff --git a/fpga/verify.awk b/fpga/verify.awk new file mode 100644 index 0000000000000000000000000000000000000000..1db795951a07950258de321f76b42146ab6ce6d0 --- /dev/null +++ b/fpga/verify.awk @@ -0,0 +1,20 @@ +BEGIN { + bootrom_ok = 0 + opensbi_ok = 0 + test_ok = 0 +} +/RBOOTROM/ { bootrom_ok = 1 } +/^OpenSBI v/ { opensbi_ok = 1 } +/^TEST-RESULT-OK/ { test_ok = 1 } + { printf "line> "; print } +END { + printf "Test summary:\n" + printf " Bootrom: %s\n", bootrom_ok ? "OK" : "FAIL"; + printf " OpenSBI: %s\n", opensbi_ok ? "OK" : "FAIL"; + printf " Result: %s\n", test_ok ? "OK" : "FAIL"; + + if (test_ok) + exit 0; + else + exit 1; +} diff --git a/lagarto-hun.nix b/lagarto-hun.nix index 0e866bac9f25596a36cca1db013a149c5dd74ec4..a1a98a869d59089f09d1c96ccca58ed7ac8bd647 100644 --- a/lagarto-hun.nix +++ b/lagarto-hun.nix @@ -57,6 +57,12 @@ serviceConfig.Restart = "always"; }; + # Disable hvc0 as it is racing for the same console + systemd.services."serial-getty@hvc0" = { + enable = lib.mkForce false; + wantedBy = lib.mkForce [ ]; + }; + sdImage = { # The image will be loaded as-is in memory, so no compression compressImage = false; @@ -96,13 +102,13 @@ initrd = "${config.system.build.initialRamdisk}/initrd"; in prev.runCommand "uboot.txt" {} '' cat > $out < + # Create pmem of 3 GiB [0x140000000, 0x200000000) + fdt mknode / pmem@0x140000000 + fdt set /pmem@0x140000000 compatible "pmem-region" + fdt set /pmem@0x140000000 reg <0x1 0x40000000 0x0 0xc0000000> - # Reduce memory - fdt set /memory@80000000 reg <0x00000000 0x80000000 0x00000001 0x00000000> + # Reduce memory to 3 GiB [0x80000000, 0x140000000) + fdt set /memory@80000000 reg <0x0 0x80000000 0x0 0xc0000000> # Set kernel options setenv bootargs "root=/dev/ram0 loglevel=7 debug rw earlycon=sbi boot.trace console=hvc0 init=${init}" @@ -120,6 +126,7 @@ "PLATFORM=fpga/openpiton" "FW_PAYLOAD_PATH=${final.uboot}/u-boot-nodtb.bin" ]; + patches = [ ./patches/opensbi-lagarto-hun.patch ]; }); }) ]; } diff --git a/lagarto-ox.nix b/lagarto-ox.nix new file mode 100644 index 0000000000000000000000000000000000000000..571cc1c6e20366a376cfb9b72c717bed96ddbbff --- /dev/null +++ b/lagarto-ox.nix @@ -0,0 +1,617 @@ +{ config, lib, utils, pkgs, modulesPath, self, ... }: + +{ + imports = [ + "${modulesPath}/installer/sd-card/sd-image.nix" + ]; + + #nixpkgs.crossSystem = { + # system = "riscv64-linux"; + # gcc.arch = "rv64imafd"; + # gcc.tune = "generic"; + #}; + + # We don't need any firmware + hardware.firmware = lib.mkForce []; + + # Doesn't work, it gets activated via the kernel socket. + # # No need for udev + # services.udev.enable = false; + # systemd.suppressedSystemUnits = [ + # "systemd-udev-trigger.service" + # ]; + + # Output the unit name so we can remove it + systemd.extraConfig = '' + StatusUnitFormat=name + ''; + + # Prevent executing the nscd program as it seems to hang the CPU + system.activationScripts.users = lib.mkForce ( + let + cfg = config.users; + spec = pkgs.writeText "users-groups.json" (builtins.toJSON { + inherit (cfg) mutableUsers; + users = lib.mapAttrsToList (_: u: + { + inherit (u) + name uid group description home homeMode createHome isSystemUser + password hashedPasswordFile hashedPassword + autoSubUidGidRange subUidRanges subGidRanges + initialPassword initialHashedPassword expires; + shell = utils.toShellPath u.shell; + }) cfg.users; + groups = lib.attrValues cfg.groups; + }); + in + if !config.systemd.sysusers.enable then { + supportsDryActivation = true; + text = '' + install -m 0700 -d /root + install -m 0755 -d /home + + ${pkgs.perl.withPackages (p: [ p.FileSlurp p.JSON ])}/bin/perl \ + -w ${./patches/update-users-groups.pl} ${spec} + ''; + } else "" # keep around for backwards compatibility + ); + + # Also disable the nscd daemon + services.nscd.enable = false; + system.nssModules = lib.mkForce []; # Required + + system.build.bootStage2 = let + useHostResolvConf = config.networking.resolvconf.enable && config.networking.useHostResolvConf; + bootStage2 = pkgs.substituteAll { + src = ./patches/stage-2-init.sh; + shellDebug = "${pkgs.bashInteractive}/bin/bash"; + bashInteractive = "${pkgs.bashInteractive}"; + bench2 = "${pkgs.bench2}"; + shell = "${pkgs.bash}/bin/bash"; + inherit (config.boot) readOnlyNixStore systemdExecutable extraSystemdUnitPaths; + inherit (config.system.nixos) distroName; + isExecutable = true; + inherit useHostResolvConf; + inherit (config.system.build) earlyMountScript; + path = lib.makeBinPath ([ + pkgs.coreutils + pkgs.util-linux + pkgs.strace + ] ++ lib.optional useHostResolvConf pkgs.openresolv); + postBootCommands = pkgs.writeText "local-cmds" + '' + ${config.boot.postBootCommands} + ${config.powerManagement.powerUpCommands} + ''; + }; + in lib.mkForce bootStage2; + + boot.kernelPackages = pkgs.linuxPackages_latest; + boot = { + extraModulePackages = [ + # Add the custom Ethernet module + #pkgs.xilinx-axienet-carv + ]; + kernelModules = config.boot.initrd.kernelModules; + kernelPatches = [ + { + name = assert false; "sbi-early-console"; + patch = null; + extraConfig = + # Early console via SBI + '' + RISCV_SBI y + RISCV_SBI_V01 y + SERIAL_EARLYCON y + SERIAL_EARLYCON_RISCV_SBI y + HVC_DRIVER y + HVC_RISCV_SBI y + '' + # Enable console driver + +'' + SERIAL_8250 y + SERIAL_8250_CONSOLE y + SERIAL_OF_PLATFORM y + CONSOLE_POLL y + '' + # Allows regions of persistent memory to be described in the device-tree. + + '' + OF_PMEM y + '' + # Allow you to use a contiguous range of reserved memory as one or more + # persistent block devices (/dev/pmem0) + + '' + LIBNVDIMM y + BLK_DEV_PMEM y + '' + # No vector extensions + + '' + RISCV_ISA_V n + RISCV_ISA_V_DEFAULT_ENABLE n + '' + # Debugging + + '' + DEBUG_KERNEL y + DEBUG_MISC y + DEBUG_WX y + MAGIC_SYSRQ y + SYSRQ_SERIAL y + DEBUG_VM y + SOFTLOCKUP_DETECTOR y + SOFTLOCKUP_DETECTOR_INTR_STORM y + HARDLOCKUP_DETECTOR y + DETECT_HUNG_TASK y + WQ_WATCHDOG y + WQ_CPU_INTENSIVE_REPORT y + TRACING y + BOOTTIME_TRACING y + STRICT_DEVMEM n + MMIOTRACE y + '' + # Disable SMP so we don't have IPI + + '' + SMP n + '' + # SPI driver + + '' + COMPILE_TEST y + SPI y + SPI_DAVINCI m + '' + ; + } + ]; + + initrd = { + # Avoid zstd as we don't have the tools in "cucu" machine + compressor = "gzip"; + kernelModules = [ + # DMA for Ethernet + #"xilinx_dma" + # Load the Ethernet module by default + #"xxvnet_carv" + + # For SPI + "spi_davinci" + "spidev" + ]; + + # Custom init script + extraFiles = { + "/shell".source = pkgs.writeScript "shell" '' + #!${config.system.build.extraUtils}/bin/ash + + set -x + + export PATH=${config.system.build.extraUtils}/bin + ash + ''; + + "/testplic".source = pkgs.writeScript "testplic" '' + #!${config.system.build.extraUtils}/bin/ash + export PATH=${config.system.build.extraUtils}/bin + + set -x + + ( + echo "--- Testing threshold register init value" + # Ensure that reading a few times the threshold value + # always gives the same initial value 0 + t1=$(devmem 0x40a00000) # Read context 1 threshold value + t2=$(devmem 0x40a00000) # Read context 1 threshold value + t3=$(devmem 0x40a00000) # Read context 1 threshold value + found="$t1 $t2 $t3" + expected="0x00000000 0x00000000 0x00000000" + if [ "$found" = "$expected" ]; then + echo "--- Threshold init value: OK" + else + echo "found =$found" + echo "expected=$expected" + echo "--- Threshold init value: FAIL" + fi + ) + + ( + echo "--- Testing threshold register stability" + # Write the priority register of an interrupt and ensure + # the threshold register didn't change + devmem 0x40a00000 32 0 # Write context 1 threshold value 0 + devmem 0x40800010 32 5 # Write source 4 priority value 5 + t1=$(devmem 0x40a00000) # Read context 1 threshold value + t2=$(devmem 0x40a00000) # Read context 1 threshold value + found="$t1 $t2" + expected="0x00000000 0x00000000" + if [ "$found" = "$expected" ]; then + echo "--- Threshold stability: OK" + else + echo "found =$found" + echo "expected=$expected" + echo "--- Threshold stability: FAIL" + fi + ) + + ( + echo "--- Testing claim register" + + # Use aux timer on source 4 for this one + pending=$(devmem 0x40801000) # Dump pending bits of sources 0-31 + + # Ensure the aux timer is pending + if [ "$pending" = "0x00000010" ]; then + + # Make sure the priority is higher than the threshold + devmem 0x40800010 32 0x10 # Write source 4 priority value 16 + devmem 0x40802080 32 0x10 # Enable source 4 in context 1 + # Writing the threshold has to be last, otherwise it will change + devmem 0x40a00000 32 0 # Write context 1 threshold value 0 + c1=$(devmem 0x40a01004) # Claim context 1 + c2=$(devmem 0x40a01004) # Claim context 1 + c3=$(devmem 0x40a01004) # Claim context 1 + found="$c1 $c2 $c3" + expected="0x00000004 0x00000004 0x00000004" + if [ "$found" = "$expected" ]; then + echo "--- Testing claim register: OK" + else + echo "found =$found" + echo "expected=$expected" + echo "--- Testing claim register: FAIL" + fi + else + echo "unknown pending bits: $pending" + echo "--- Testing claim register: SKIP" + fi + ) + set +x + #echo "all done, dropping to a shell..." + #ash + ''; + + "/preinit".source = pkgs.writeScript "preinit" '' + #!${config.system.build.extraUtils}/bin/ash + export PATH=${config.system.build.extraUtils}/bin + # csrtool all-in-order + + # ip addr + # cat /proc/interrupts + + # modprobe xxvnet_carv + + # plictool -c2 + # plictool -c2 + # plictool -c2 + + # ip addr + + exec /init + ''; + }; + + # Add riscv-tools to initrd + extraUtilsCommands = '' + cp -a ${pkgs.riscv-tools}/bin/* $out/bin + ''; + + # Write a counter to the DMA region, so we can check the kernel is not + # dead. Monitor from the host with: + # while [ 1 ]; do xxd -s $((0x1bfff0000 - 0x60000000)) \ + # -l 4 /dev/qdma34000-MM-1; sleep 0.2; done + preDeviceCommands = '' + # Seed RNG + seedrng -d /tmp || true + mv /tmp/seed.no-credit /tmp/seed.credit || true + seedrng -d /tmp || true + echo "Available entropy: $(cat /proc/sys/kernel/random/entropy_avail)" + + # Last chance to enter a shell + if read -t 3 -p 'Press enter for shell... '; then + allowShell=1 + fail + fi + + # echo "Running tests..." + # sh /testplic + + # echo "Creating a heartbeat counter at 0x1bfff0000" + # sh -c 'hb=0; while [ 1 ]; do let hb=$hb+1; devmem 0x1bfff0000 32 $hb; done' & + '' + + + # Disable proactive compaction. May be better to disable CONFIG_COMPACTION. + '' + echo 0 > /proc/sys/vm/compaction_proactiveness + '' +# + +# # Show stacktrace on calls to the hvc_remove function. +# '' +# echo "Mount debugfs" +# mkdir -p /sys/kernel/debug/ +# mount -t debugfs none /sys/kernel/debug/ +# td=/sys/kernel/debug/tracing +# echo hvc_remove > $td/set_ftrace_filter +# echo function > $td/current_tracer +# echo 1 > $td/options/func_stack_trace +# '' +# FIXME: Disable sched_switch for now, as it still hangs the boot... +# + +# # Exclude the second pid, which is the kthread that will dump the trace to +# # the console, otherwise we live lock the kernel. Then enable the +# # sched_switch events. +# '' +# echo "Mount debugfs" +# mkdir -p /sys/kernel/debug/ +# mount -t debugfs none /sys/kernel/debug/ +# echo "Exclude pid 2 from sched" +# echo '(prev_pid != 2 && next_pid != 2)' > /sys/kernel/debug/tracing/events/sched/filter +# echo "Enable sched_switch events" +# echo 1 > /sys/kernel/debug/tracing/events/sched/sched_switch/enable +# '' + ; + }; + + loader = { + grub.enable = false; + generic-extlinux-compatible.enable = true; + }; + }; + + # No network + services.openssh.enable = false; + networking.useDHCP = false; + + # Run getty on /dev/console and restartt until it works + systemd.services."serial-getty@console" = { + enable = true; + after = [ "network.target" ]; + wantedBy = [ "getty.target" ]; # to start at boot + serviceConfig.Restart = "always"; + }; + + # Disable hvc0 as it is racing for the same console + systemd.services."serial-getty@hvc0" = { + enable = lib.mkForce false; + wantedBy = lib.mkForce [ ]; + }; + + services.getty.autologinUser = lib.mkForce "root"; + + sdImage = { + # The image will be loaded as-is in memory, so no compression + compressImage = false; + imageName = "rootfs.img"; + # Not needed for now + expandOnBoot = false; + populateFirmwareCommands = ""; + populateRootCommands = '' + mkdir -p ./files/boot + ${config.boot.loader.generic-extlinux-compatible.populateCmd} \ + -c ${config.system.build.toplevel} \ + -d ./files/boot + ''; + }; + + nixpkgs.overlays = [ (final: prev: { + #busybox = prev.busybox.overrideAttrs (old: { + # # Print some debug lines on switch_root to see where it hangs. + # patches = (old.patches or []) ++ [ ./patches/busybox-debug.patch ]; + #}); + + linuxPackages_latest = prev.linuxPackages_latest; + #linuxPackages_latest = prev.linuxPackages_latest.extend (lib.const (ksuper: { + # kernel = ksuper.kernel.override { + # stdenv = prev.gcc8Stdenv; + # }; + #})); + + bench2 = final.writeShellScript "bench2" '' + # Performs minimal FS setup and runs the SPEC benchmark + mkdir /tmp /bin /root + mount -t tmpfs tmpfs /tmp + export TMPDIR=/tmp + # We need /bin/sh + ln -s $(which sh) /bin/sh + # Check CPU usage + vmstat 5 5 + bash -x speclaunch + cat /tmp/spec/time.csv + # Give me a shell at the end + bash -l + ''; + + ox-dtb = prev.stdenv.mkDerivation rec { + name = "ox.dtb"; + src = ./dts; + dontConfigure = true; + nativeBuildInputs = [ prev.buildPackages.dtc ]; + buildPhase = '' + make lagarto_ox.dtb + ''; + installPhase = '' + mkdir $out + cp lagarto_ox.* $out + ''; + dontFixup = true; + hardeningDisable = [ "all" ]; + }; + + #bitstream = "${final.bitstreams}/lagarto-3-ox/gold.bit"; + bitstream = "${final.bitstreams}/lagarto-3-ox/ox_u55c_87a14c32_fix_threshold.bit"; + + bootrom = "${final.rbootrom}/rbootrom.bin"; + + uboot = prev.ubootQemuRiscv64Smode.override { + filesToInstall = [ "u-boot-nodtb.bin" ]; + #version = "2023.07.02-print-cpu-probe"; + #src = builtins.fetchGit { + # url = "file:///home/Computational/rarias/riscv/u-boot"; + # rev = "f80a22a480f0e4157647bacf90e663be457c72c4"; + #}; + patches = [ + #./patches/u-boot-debug.patch + ./patches/uboot-debug-ext-interrupts.patch + ./patches/uboot-exception-extras.patch + ]; + # Copy our environment to board/emulation/qemu-riscv/environ.env + preConfigure = '' + cp ${final.uboot-env} board/emulation/qemu-riscv/environ.env + ''; + #postConfigure = '' + # echo --------------------------- generated config: + # cat .config + # echo --------------------------- + #''; + #postBuild = '' + # echo --------------------------- generated env starts + # cat include/generated/env.in + # echo --------------------------- generated env ends + #''; + # + # CONFIG_SERIAL_PRESENT=n + # CONFIG_SYS_NS16550=n + extraConfig = '' + CONFIG_RISCV_ISA_C=n + CONFIG_REQUIRE_SERIAL_CONSOLE=n + CONFIG_SERIAL=y + CONFIG_SERIAL_PUTS=y + CONFIG_SHOW_BOOT_PROGRESS=y + CONFIG_SHOW_REGS=y + CONFIG_LIBCOMMON_SUPPORT=y + CONFIG_SERIAL_SEARCH_ALL=n + CONFIG_SERIAL_PROBE_ALL=n + CONFIG_OF_CONTROL=y + CONFIG_OF_EMBED=y + CONFIG_OF_HAS_PRIOR_STAGE=y + CONFIG_BLKMAP=y + CONFIG_CMD_BLKMAP=y + CONFIG_SBI_V01=y + CONFIG_DEBUG_UART=y + CONFIG_DEBUG_UART_ANNOUNCE=y + CONFIG_DEBUG_SBI_CONSOLE=y + CONFIG_SMP=n + CONFIG_TRACE_EARLY=y + CONFIG_CMD_MEMTEST=y + CONFIG_CMD_EXCEPTION=y + CONFIG_CMD_TIMER=y + CONFIG_ENV_SOURCE_FILE="environ" + '' +# # Enable debug logs +# + +# '' +# CONFIG_LOG=y +# CONFIG_LOGLEVEL=9 +# CONFIG_LOG_MAX_LEVEL=9 +# CONFIG_LOG_DEFAULT_LEVEL=9 +# '' + ; + extraMakeFlags = [ + #"V=1" + #"KCPPFLAGS=-DLOG_DEBUG" + #"EXT_DTB=${final.ox-dtb}/lagarto_ox.dtb" + ]; + }; + + uboot-env = let + init = "${config.system.build.toplevel}/init"; + initrd = "${config.system.build.initialRamdisk}/initrd"; + # Create pmem of 3 GiB [0x140000000, 0x200000000) + #fdt mknode / pmem@0x140000000 + #fdt set /pmem@0x140000000 compatible "pmem-region" + #fdt set /pmem@0x140000000 reg <0x1 0x40000000 0x0 0xc0000000> + + # Reduce memory to 3 GiB [0x80000000, 0x140000000) + #fdt set /memory@80000000 reg <0x0 0x80000000 0x0 0xc0000000> + + # Set kernel bootcmd options. + # rdinit=/preinit boot custom preinit script + # console=ttyS0,115200n8 use serial driver (slow) + # Systemd options + # systemd.log_level=debug + # systemd.log_target=console + # NixOS interesting options: + # debug1 enable debug shell in stage 1 + # debug2 enable debug shell in stage 2 (custom) + # bench2 run benchmark on stage 2 (custom) + # boot.trace enable set -x in stage 1 + # boot.tracedebug enable set -x in stage 2 + # Ftrace interesting options: + # trace_event=initcall:* trace the init function of drivers + # trace_options=sym-addr display function address + # tp_printk write ftrace events to console + # trace_buf_size=1M set ftrace buffer to 1M + # + in prev.runCommand "uboot.txt" {} '' + cat > $out < ++ ++int alveo_uart_init(unsigned long base, u32 in_freq, u32 baudrate, u32 reg_shift, ++ u32 reg_width, u32 reg_offset); ++ ++#endif +diff --git a/lib/utils/serial/Kconfig b/lib/utils/serial/Kconfig +index e3589ca..b754c7c 100644 +--- a/lib/utils/serial/Kconfig ++++ b/lib/utils/serial/Kconfig +@@ -84,6 +84,10 @@ config SERIAL_UART8250 + bool "8250 UART support" + default n + ++config SERIAL_ALVEO_UART ++ bool "ALveo UART support" ++ default n ++ + config SERIAL_XILINX_UARTLITE + bool "Xilinx UART Lite support" + default n +diff --git a/lib/utils/serial/alveo_uart.c b/lib/utils/serial/alveo_uart.c +new file mode 100644 +index 0000000..a351741 +--- /dev/null ++++ b/lib/utils/serial/alveo_uart.c +@@ -0,0 +1,123 @@ ++/* ++ * SPDX-License-Identifier: BSD-2-Clause ++ */ ++ ++#include ++#include ++#include ++ ++/* clang-format off */ ++ ++#define UART_RBR_OFFSET 0 /* In: Recieve Buffer Register */ ++#define UART_THR_OFFSET 0 /* Out: Transmitter Holding Register */ ++#define UART_DLL_OFFSET 0 /* Out: Divisor Latch Low */ ++#define UART_IER_OFFSET 1 /* I/O: Interrupt Enable Register */ ++#define UART_DLM_OFFSET 1 /* Out: Divisor Latch High */ ++#define UART_FCR_OFFSET 2 /* Out: FIFO Control Register */ ++#define UART_IIR_OFFSET 2 /* I/O: Interrupt Identification Register */ ++#define UART_LCR_OFFSET 3 /* Out: Line Control Register */ ++#define UART_MCR_OFFSET 4 /* Out: Modem Control Register */ ++#define UART_LSR_OFFSET 5 /* In: Line Status Register */ ++#define UART_MSR_OFFSET 6 /* In: Modem Status Register */ ++#define UART_SCR_OFFSET 7 /* I/O: Scratch Register */ ++#define UART_MDR1_OFFSET 8 /* I/O: Mode Register */ ++ ++#define UART_LSR_FIFOE 0x80 /* Fifo error */ ++#define UART_LSR_TEMT 0x40 /* Transmitter empty */ ++#define UART_LSR_THRE 0x20 /* Transmit-hold-register empty */ ++#define UART_LSR_BI 0x10 /* Break interrupt indicator */ ++#define UART_LSR_FE 0x08 /* Frame error indicator */ ++#define UART_LSR_PE 0x04 /* Parity error indicator */ ++#define UART_LSR_OE 0x02 /* Overrun error indicator */ ++#define UART_LSR_DR 0x01 /* Receiver data ready */ ++#define UART_LSR_BRK_ERROR_BITS 0x1E /* BI, FE, PE, OE bits */ ++ ++/* clang-format on */ ++ ++static volatile char *alveo_uart_base; ++static u32 alveo_uart_in_freq; ++static u32 alveo_uart_baudrate; ++static u32 alveo_uart_reg_width; ++static u32 alveo_uart_reg_shift; ++ ++static u32 get_reg(u32 num) ++{ ++ u32 offset = num << alveo_uart_reg_shift; ++ ++ if (alveo_uart_reg_width == 1) ++ return readb(alveo_uart_base + offset); ++ else if (alveo_uart_reg_width == 2) ++ return readw(alveo_uart_base + offset); ++ else ++ return readl(alveo_uart_base + offset); ++} ++ ++static void set_reg(u32 num, u32 val) ++{ ++ u32 offset = num << alveo_uart_reg_shift; ++ ++ if (alveo_uart_reg_width == 1) ++ writeb(val, alveo_uart_base + offset); ++ else if (alveo_uart_reg_width == 2) ++ writew(val, alveo_uart_base + offset); ++ else ++ writel(val, alveo_uart_base + offset); ++} ++ ++static void alveo_uart_putc(char ch) ++{ ++ while (!(get_reg(UART_LSR_OFFSET) & UART_LSR_TEMT)) ++ ; ++ ++ set_reg(UART_THR_OFFSET, ch); ++} ++ ++static int alveo_uart_getc(void) ++{ ++ if (get_reg(UART_LSR_OFFSET) & UART_LSR_DR) ++ return get_reg(UART_RBR_OFFSET); ++ return -1; ++} ++ ++static struct sbi_console_device alveo_uart_console = { ++ .name = "alveo_uart", ++ .console_putc = alveo_uart_putc, ++ .console_getc = alveo_uart_getc ++}; ++ ++int alveo_uart_init(unsigned long base, u32 in_freq, u32 baudrate, u32 reg_shift, ++ u32 reg_width, u32 reg_offset) ++{ ++ u16 bdiv = 0; ++ ++ alveo_uart_base = (volatile char *)base + reg_offset; ++ alveo_uart_reg_shift = reg_shift; ++ alveo_uart_reg_width = reg_width; ++ alveo_uart_in_freq = in_freq; ++ alveo_uart_baudrate = baudrate; ++ ++ if (alveo_uart_baudrate) { ++ bdiv = alveo_uart_in_freq / (16 * alveo_uart_baudrate); ++ } ++ ++ /* Disable all interrupts */ ++ set_reg(UART_IER_OFFSET, 0x00); ++ /* Enable DLAB */ ++ set_reg(UART_LCR_OFFSET, 0x80); ++ ++ if (bdiv) { ++ /* Set divisor low byte */ ++ set_reg(UART_DLL_OFFSET, bdiv & 0xff); ++ /* Set divisor high byte */ ++ set_reg(UART_DLM_OFFSET, (bdiv >> 8) & 0xff); ++ } ++ ++ /* 8 bits, no parity, one stop bit */ ++ set_reg(UART_LCR_OFFSET, 0x03); // previous was 0x0B ++ /* Disable all interrupts*/ ++ set_reg(UART_IER_OFFSET, 0x00); ++ ++ sbi_console_set_device(&alveo_uart_console); ++ ++ return 0; ++} +diff --git a/lib/utils/serial/objects.mk b/lib/utils/serial/objects.mk +index 1e6bd2e..0268bdf 100644 +--- a/lib/utils/serial/objects.mk ++++ b/lib/utils/serial/objects.mk +@@ -44,5 +44,6 @@ libsbiutils-objs-$(CONFIG_SERIAL_SHAKTI) += serial/shakti-uart.o + libsbiutils-objs-$(CONFIG_SERIAL_SIFIVE) += serial/sifive-uart.o + libsbiutils-objs-$(CONFIG_SERIAL_LITEX) += serial/litex-uart.o + libsbiutils-objs-$(CONFIG_SERIAL_UART8250) += serial/uart8250.o ++libsbiutils-objs-$(CONFIG_SERIAL_ALVEO_UART) += serial/alveo_uart.o + libsbiutils-objs-$(CONFIG_SERIAL_XILINX_UARTLITE) += serial/xlnx-uartlite.o + libsbiutils-objs-$(CONFIG_SERIAL_SEMIHOSTING) += serial/semihosting.o +diff --git a/platform/fpga/sargantana_alveo/Kconfig b/platform/fpga/sargantana_alveo/Kconfig +new file mode 100644 +index 0000000..bf3e7e6 +--- /dev/null ++++ b/platform/fpga/sargantana_alveo/Kconfig +@@ -0,0 +1,10 @@ ++# SPDX-License-Identifier: BSD-2-Clause ++ ++config PLATFORM_SARGANTANA_ALVEO_FPGA ++ bool ++ select FDT ++ select IPI_MSWI ++ select IRQCHIP_PLIC ++ select SERIAL_ALVEO_UART ++ select TIMER_MTIMER ++ default y +diff --git a/platform/fpga/sargantana_alveo/configs/defconfig b/platform/fpga/sargantana_alveo/configs/defconfig +new file mode 100644 +index 0000000..e69de29 +diff --git a/platform/fpga/sargantana_alveo/objects.mk b/platform/fpga/sargantana_alveo/objects.mk +new file mode 100644 +index 0000000..d444abe +--- /dev/null ++++ b/platform/fpga/sargantana_alveo/objects.mk +@@ -0,0 +1,87 @@ ++# ++# SPDX-License-Identifier: BSD-2-Clause ++# ++# Copyright (c) 2019 Western Digital Corporation or its affiliates. ++# ++ ++# Compiler pre-processor flags ++platform-cppflags-y = ++ ++# C Compiler and assembler flags. ++platform-cflags-y = ++platform-asflags-y = ++ ++# Linker flags: additional libraries and object files that the platform ++# code needs can be added here ++platform-ldflags-y = ++ ++# ++# Command for platform specific "make run" ++# Useful for development and debugging on plaftform simulator (such as QEMU) ++# ++# platform-runcmd = your_platform_run.sh ++ ++# ++# Platform RISC-V XLEN, ABI, ISA and Code Model configuration. ++# These are optional parameters but platforms can optionaly provide it. ++# Some of these are guessed based on GCC compiler capabilities ++# ++PLATFORM_RISCV_XLEN = 64 ++PLATFORM_RISCV_ABI = lp64d ++PLATFORM_RISCV_ISA = rv64imafd ++PLATFORM_RISCV_CODE_MODEL = medany ++ ++# Space separated list of object file names to be compiled for the platform ++platform-objs-y += platform.o ++ ++# ++# If the platform support requires a builtin device tree file, the name of ++# the device tree compiled file should be specified here. The device tree ++# source file be in the form
.dts ++# ++# platform-objs-y +=
.o ++ ++# Firmware load address configuration. This is mandatory. ++FW_TEXT_START=0x80000000 ++ ++# Optional parameter for path to external FDT ++# FW_FDT_PATH="path to platform flattened device tree file" ++ ++# ++# Dynamic firmware configuration. ++# Optional parameters are commented out. Uncomment and define these parameters ++# as needed. ++# ++FW_DYNAMIC=n ++ ++# ++# Jump firmware configuration. ++# Optional parameters are commented out. Uncomment and define these parameters ++# as needed. ++# ++FW_JUMP=n ++# This needs to be 4MB aligned for 32-bit support ++# This needs to be 2MB aligned for 64-bit support ++# ifeq ($(PLATFORM_RISCV_XLEN), 32) ++# FW_JUMP_ADDR=0x80400000 ++# else ++# FW_JUMP_ADDR=0x80200000 ++# endif ++# FW_JUMP_FDT_ADDR=0x82200000 ++ ++# ++# Firmware with payload configuration. ++# Optional parameters are commented out. Uncomment and define these parameters ++# as needed. ++# ++FW_PAYLOAD=y ++# This needs to be 4MB aligned for 32-bit support ++# This needs to be 2MB aligned for 64-bit support ++ifeq ($(PLATFORM_RISCV_XLEN), 32) ++FW_PAYLOAD_OFFSET=0x400000 ++else ++FW_PAYLOAD_OFFSET=0x200000 ++endif ++FW_PAYLOAD_ALIGN=0x1000 ++# FW_PAYLOAD_PATH="path to next boot stage binary image file" ++# FW_PAYLOAD_FDT_ADDR=0x82200000 +diff --git a/platform/fpga/sargantana_alveo/platform.c b/platform/fpga/sargantana_alveo/platform.c +new file mode 100644 +index 0000000..a359b34 +--- /dev/null ++++ b/platform/fpga/sargantana_alveo/platform.c +@@ -0,0 +1,146 @@ ++/* ++ * SPDX-License-Identifier: BSD-2-Clause ++ * ++ * Copyright (c) 2019 Western Digital Corporation or its affiliates. ++ */ ++ ++#include ++#include ++#include ++#include ++ ++/* ++ * Include these files as needed. ++ * See objects.mk SARGANTANA_ALVEO_xxx configuration parameters. ++ */ ++ ++#include ++#include ++ ++#define SARGANTANA_ALVEO_HART_COUNT 1 ++ ++#define SARGANTANA_ALVEO_UART_BASE_ADDR 0x40000000 ++#define SARGANTANA_ALVEO_UART_OFFSET 0x1000 ++//#define SARGANTANA_ALVEO_UART_ADDR SARGANTANA_ALVEO_UART_BASE_ADDR + SARGANTANA_ALVEO_UART_XLNX_OFFSET ++#define SARGANTANA_ALVEO_UART_INPUT_FREQ 50000000 ++#define SARGANTANA_ALVEO_UART_BAUDRATE 115200 ++ ++#define SARGANTANA_ALVEO_TIMER_BASE 0x40170000 ++#define ADDR_TIME_L 0x0u // 32 lower bits of the time register ++#define ADDR_TIME_H 0x1u // 32 higher bits of the time register ++#define ADDR_TIMECMP_L 0x2u // 32 lower bits of the time comparator ++#define ADDR_TIMECMP_H 0x3u // 32 higher bits of the time comparator ++ ++volatile uint32_t *timer_base_ptr = (uint32_t *)(SARGANTANA_ALVEO_TIMER_BASE); ++ ++/* ++ * Platform early initialization. ++ */ ++static int sargantana_alveo_early_init(bool cold_boot) ++{ ++ return 0; ++} ++ ++/* ++ * Platform final initialization. ++ */ ++static int sargantana_alveo_final_init(bool cold_boot) ++{ ++ return 0; ++} ++ ++/* ++ * Initialize the platform console. ++ */ ++static int sargantana_alveo_console_init(void) ++{ ++ return alveo_uart_init(SARGANTANA_ALVEO_UART_BASE_ADDR, ++ SARGANTANA_ALVEO_UART_INPUT_FREQ, ++ SARGANTANA_ALVEO_UART_BAUDRATE, ++ 2, 4, ++ SARGANTANA_ALVEO_UART_OFFSET); ++} ++ ++/* ++ * Initialize the platform interrupt controller for current HART. ++ */ ++static int sargantana_alveo_irqchip_init(bool cold_boot) ++{ ++ u32 hartid = current_hartid(); ++ return hartid; ++} ++ ++/* ++ * Initialize IPI for current HART. ++ */ ++static int sargantana_alveo_ipi_init(bool cold_boot) ++{ ++ return 0; ++} ++ ++/* ++ * Get platform timer value. ++ */ ++static u64 sargantana_alveo_timer_value(void) ++{ ++ return ((u64)*(timer_base_ptr + ADDR_TIME_H) << 32) + *(timer_base_ptr + ADDR_TIME_L); ++} ++ ++/* ++ * Start platform timer event for current HART. ++ */ ++static void sargantana_alveo_timer_event_start(u64 next_event) ++{ ++ *(timer_base_ptr + ADDR_TIMECMP_H) = next_event >> 32; ++ *(timer_base_ptr + ADDR_TIMECMP_L) = next_event; ++} ++ ++/* ++ * Stop platform timer event for current HART. ++ */ ++static void sargantana_alveo_timer_event_stop(void) ++{ ++ ++ *(timer_base_ptr + ADDR_TIMECMP_H) = 0; ++ *(timer_base_ptr + ADDR_TIMECMP_L) = 0; ++} ++ ++static struct sbi_timer_device mtimer = { ++ .name = "generic_timer", // TODO Where the timer comes from? I would prefer a better name :p ++ .timer_freq = SARGANTANA_ALVEO_UART_INPUT_FREQ, ++ .timer_value = sargantana_alveo_timer_value, ++ .timer_event_start = sargantana_alveo_timer_event_start, ++ .timer_event_stop = sargantana_alveo_timer_event_stop ++}; ++ ++/* ++ * Initialize platform timer for current HART. ++ */ ++static int sargantana_alveo_timer_init(bool cold_boot) ++{ ++ *(timer_base_ptr + ADDR_TIMECMP_H) = 0; ++ *(timer_base_ptr + ADDR_TIMECMP_L) = 0; ++ sbi_timer_set_device(&mtimer); ++ return 0; ++} ++ ++/* ++ * Platform descriptor. ++ */ ++const struct sbi_platform_operations sargantana_alveo_ops = { ++ .early_init = sargantana_alveo_early_init, ++ .final_init = sargantana_alveo_final_init, ++ .console_init = sargantana_alveo_console_init, ++ .irqchip_init = sargantana_alveo_irqchip_init, ++ .ipi_init = sargantana_alveo_ipi_init, ++ .timer_init = sargantana_alveo_timer_init ++}; ++const struct sbi_platform platform = { ++ .opensbi_version = OPENSBI_VERSION, ++ .platform_version = SBI_PLATFORM_VERSION(0x0, 0x01), ++ .name = "Sargantana (for Xilinx Alveo FPGA)", ++ .features = SBI_PLATFORM_DEFAULT_FEATURES, ++ .hart_count = SARGANTANA_ALVEO_HART_COUNT, ++ .hart_stack_size = SBI_PLATFORM_DEFAULT_HART_STACK_SIZE, ++ .platform_ops_addr = (unsigned long)&sargantana_alveo_ops ++}; diff --git a/patches/busybox-debug.patch b/patches/busybox-debug.patch new file mode 100644 index 0000000000000000000000000000000000000000..7e171c3f164901ff7bd7072359e0590dec89a3c6 --- /dev/null +++ b/patches/busybox-debug.patch @@ -0,0 +1,68 @@ +Only in busybox-1.36.1-mod: tags +diff -up -r busybox-1.36.1/util-linux/switch_root.c busybox-1.36.1-mod/util-linux/switch_root.c +--- busybox-1.36.1/util-linux/switch_root.c 2021-09-30 00:04:47.000000000 +0200 ++++ busybox-1.36.1-mod/util-linux/switch_root.c 2024-07-01 16:08:28.336541504 +0200 +@@ -181,6 +181,8 @@ int switch_root_main(int argc UNUSED_PAR + unsigned dry_run = 0; + dev_t rootdev; + ++ printf("HELLO THIS IS SWITCH ROOT STARTING\n"); ++ + // Parse args. '+': stop at first non-option + if (ENABLE_SWITCH_ROOT && (!ENABLE_RUN_INIT || applet_name[0] == 's')) { + //usage:#define switch_root_trivial_usage +@@ -241,12 +243,15 @@ int switch_root_main(int argc UNUSED_PAR + if (stat("/init", &st) != 0 || !S_ISREG(st.st_mode)) { + bb_error_msg_and_die("'%s' is not a regular file", "/init"); + } ++ printf("SWITCH ROOT LINE %d OK\n", __LINE__); + statfs("/", &stfs); // this never fails ++ printf("SWITCH ROOT LINE %d OK\n", __LINE__); + if ((unsigned)stfs.f_type != RAMFS_MAGIC + && (unsigned)stfs.f_type != TMPFS_MAGIC + ) { + bb_simple_error_msg_and_die("root filesystem is not ramfs/tmpfs"); + } ++ printf("SWITCH ROOT LINE %d OK\n", __LINE__); + + if (!dry_run) { + // Zap everything out of rootdev +@@ -258,19 +263,26 @@ int switch_root_main(int argc UNUSED_PAR + bb_simple_perror_msg_and_die("error moving root"); + } + } ++ printf("SWITCH ROOT LINE %d OK\n", __LINE__); ++ printf("XCHROOT\n"); + xchroot("."); ++ printf("SWITCH ROOT LINE %d OK\n", __LINE__); + // The chdir is needed to recalculate "." and ".." links + /*xchdir("/"); - done in xchroot */ + + // If a new console specified, redirect stdin/stdout/stderr to it + if (console) { ++ printf("REDIRECTING CONSOLE\n"); ++ printf("SWITCH ROOT LINE %d OK\n", __LINE__); + int fd = open_or_warn(console, O_RDWR); + if (fd >= 0) { + xmove_fd(fd, 0); + xdup2(0, 1); + xdup2(0, 2); + } ++ printf("SWITCH ROOT LINE %d OK\n", __LINE__); + } ++ printf("SWITCH ROOT LINE %d OK\n", __LINE__); + + if (dry_run) { + // Does NEW_INIT look like it can be executed? +@@ -280,8 +292,11 @@ int switch_root_main(int argc UNUSED_PAR + if (access(argv[0], X_OK) == 0) + return 0; + } else { ++ printf("SWITCH ROOT LINE %d OK\n", __LINE__); ++ printf("LAUNCHING EXECV\n"); + // Exec NEW_INIT + execv(argv[0], argv); ++ printf("RETURNED FROM EXECV???\n"); + } + bb_perror_msg_and_die("can't execute '%s'", argv[0]); + } diff --git a/patches/ethernet-driver-build.patch b/patches/ethernet-driver-build.patch new file mode 100644 index 0000000000000000000000000000000000000000..a0736a25254a5cacf14545bf0dbe98262a771f1a --- /dev/null +++ b/patches/ethernet-driver-build.patch @@ -0,0 +1,21 @@ +diff --git a/drivers/xxvnet_carv.c b/drivers/xxvnet_carv.c +index d3f60f9..9fd4c21 100644 +--- a/drivers/xxvnet_carv.c ++++ b/drivers/xxvnet_carv.c +@@ -36,6 +36,7 @@ + #include + #include + #include ++#include + + #include "xxvnet_carv.h" + +@@ -1511,7 +1512,7 @@ axienet_dma_probe(struct platform_device *pdev, struct net_device *ndev) + spin_lock_init(&q->tx_lock); + spin_lock_init(&q->rx_lock); + +- netif_napi_add(ndev, &lp->napi, axienet_rx_poll, AXIENET_NAPI_WEIGHT); ++ netif_napi_add(ndev, &lp->napi, axienet_rx_poll); + + return 0; + } diff --git a/patches/ethernet-driver-kbuild.patch b/patches/ethernet-driver-kbuild.patch new file mode 100644 index 0000000000000000000000000000000000000000..32fa1af24d53dccdc4f0e33449dcb2ff0144a3ca --- /dev/null +++ b/patches/ethernet-driver-kbuild.patch @@ -0,0 +1,7 @@ +diff --git a/drivers/Kbuild b/drivers/Kbuild +index 28d6c0f..7f02860 100644 +--- a/drivers/Kbuild ++++ b/drivers/Kbuild +@@ -1,2 +1 @@ + obj-m := xxvnet_carv.o +-obj-m := xilinx_dma.o diff --git a/patches/ethernet-driver-poll.patch b/patches/ethernet-driver-poll.patch new file mode 100644 index 0000000000000000000000000000000000000000..db2b8a9c3aebfc653b132922354a2ba83241fea9 --- /dev/null +++ b/patches/ethernet-driver-poll.patch @@ -0,0 +1,23 @@ +diff --git a/drivers/xxvnet_carv.c b/drivers/xxvnet_carv.c +index eb664bb..d3f60f9 100644 +--- a/drivers/xxvnet_carv.c ++++ b/drivers/xxvnet_carv.c +@@ -1435,12 +1435,12 @@ static void axienet_poll_controller(struct net_device *ndev) + { + struct axienet_local *lp = netdev_priv(ndev); + +- disable_irq(lp->tx_irq); +- disable_irq(lp->rx_irq); +- axienet_rx_irq(lp->tx_irq, ndev); +- axienet_tx_irq(lp->rx_irq, ndev); +- enable_irq(lp->tx_irq); +- enable_irq(lp->rx_irq); ++ disable_irq(lp->dq->tx_irq); ++ disable_irq(lp->dq->rx_irq); ++ axienet_rx_irq(lp->dq->tx_irq, ndev); ++ axienet_tx_irq(lp->dq->rx_irq, ndev); ++ enable_irq(lp->dq->tx_irq); ++ enable_irq(lp->dq->rx_irq); + } + #endif + diff --git a/patches/opensbi-dont-delegate.patch b/patches/opensbi-dont-delegate.patch new file mode 100644 index 0000000000000000000000000000000000000000..343ca9370c8546c3cc73d440cb141224986c114e --- /dev/null +++ b/patches/opensbi-dont-delegate.patch @@ -0,0 +1,37 @@ +diff --git a/lib/sbi/sbi_hart.c b/lib/sbi/sbi_hart.c +index c366701..c5b5249 100644 +--- a/lib/sbi/sbi_hart.c ++++ b/lib/sbi/sbi_hart.c +@@ -199,7 +199,7 @@ static int delegate_traps(struct sbi_scratch *scratch) + return 0; + + /* Send M-mode interrupts and most exceptions to S-mode */ +- interrupts = MIP_SSIP | MIP_STIP | MIP_SEIP; ++ interrupts = MIP_SSIP | MIP_STIP; + interrupts |= sbi_pmu_irq_bit(); + + exceptions = (1U << CAUSE_MISALIGNED_FETCH) | (1U << CAUSE_BREAKPOINT) | +diff --git a/lib/sbi/sbi_irqchip.c b/lib/sbi/sbi_irqchip.c +index 0ae604a..dd4592a 100644 +--- a/lib/sbi/sbi_irqchip.c ++++ b/lib/sbi/sbi_irqchip.c +@@ -37,8 +37,7 @@ int sbi_irqchip_init(struct sbi_scratch *scratch, bool cold_boot) + if (rc) + return rc; + +- if (ext_irqfn != default_irqfn) +- csr_set(CSR_MIE, MIP_MEIP); ++ csr_set(CSR_MIE, MIP_MEIP); + + return 0; + } +@@ -47,8 +46,7 @@ void sbi_irqchip_exit(struct sbi_scratch *scratch) + { + const struct sbi_platform *plat = sbi_platform_ptr(scratch); + +- if (ext_irqfn != default_irqfn) +- csr_clear(CSR_MIE, MIP_MEIP); ++ csr_clear(CSR_MIE, MIP_MEIP); + + sbi_platform_irqchip_exit(plat); + } diff --git a/patches/opensbi-dump-mregs.patch b/patches/opensbi-dump-mregs.patch new file mode 100644 index 0000000000000000000000000000000000000000..3385d4f077c6bca3f435b50e2e1e22f5d44ad4aa --- /dev/null +++ b/patches/opensbi-dump-mregs.patch @@ -0,0 +1,19 @@ +diff --git a/lib/sbi/sbi_hart.c b/lib/sbi/sbi_hart.c +index c366701..1ef6145 100644 +--- a/lib/sbi/sbi_hart.c ++++ b/lib/sbi/sbi_hart.c +@@ -241,6 +241,14 @@ void sbi_hart_delegation_dump(struct sbi_scratch *scratch, + prefix, suffix, csr_read(CSR_MIDELEG)); + sbi_printf("%sMEDELEG%s: 0x%" PRILX "\n", + prefix, suffix, csr_read(CSR_MEDELEG)); ++ sbi_printf("%sMTVEC%s : 0x%" PRILX "\n", ++ prefix, suffix, csr_read(CSR_MTVEC)); ++ sbi_printf("%sMIE%s : 0x%" PRILX "\n", ++ prefix, suffix, csr_read(CSR_MIE)); ++ sbi_printf("%sMIP%s : 0x%" PRILX "\n", ++ prefix, suffix, csr_read(CSR_MIP)); ++ sbi_printf("%sMSTATUS%s: 0x%" PRILX "\n", ++ prefix, suffix, csr_read(CSR_MSTATUS)); + } + + unsigned int sbi_hart_mhpm_mask(struct sbi_scratch *scratch) diff --git a/patches/opensbi-enable-meip.patch b/patches/opensbi-enable-meip.patch new file mode 100644 index 0000000000000000000000000000000000000000..9121efccbefb453d3d40dc52e25fff1d92c8125e --- /dev/null +++ b/patches/opensbi-enable-meip.patch @@ -0,0 +1,24 @@ +diff --git a/lib/sbi/sbi_irqchip.c b/lib/sbi/sbi_irqchip.c +index 0ae604a..dd4592a 100644 +--- a/lib/sbi/sbi_irqchip.c ++++ b/lib/sbi/sbi_irqchip.c +@@ -37,8 +37,7 @@ int sbi_irqchip_init(struct sbi_scratch *scratch, bool cold_boot) + if (rc) + return rc; + +- if (ext_irqfn != default_irqfn) +- csr_set(CSR_MIE, MIP_MEIP); ++ csr_set(CSR_MIE, MIP_MEIP); + + return 0; + } +@@ -47,8 +46,7 @@ void sbi_irqchip_exit(struct sbi_scratch *scratch) + { + const struct sbi_platform *plat = sbi_platform_ptr(scratch); + +- if (ext_irqfn != default_irqfn) +- csr_clear(CSR_MIE, MIP_MEIP); ++ csr_clear(CSR_MIE, MIP_MEIP); + + sbi_platform_irqchip_exit(plat); + } diff --git a/patches/opensbi-enable-seip.patch b/patches/opensbi-enable-seip.patch new file mode 100644 index 0000000000000000000000000000000000000000..d1861e5745fe6ae2ba4a2ea3603a18080077039e --- /dev/null +++ b/patches/opensbi-enable-seip.patch @@ -0,0 +1,26 @@ +diff --git a/lib/sbi/sbi_irqchip.c b/lib/sbi/sbi_irqchip.c +index 0ae604a..94832c8 100644 +--- a/lib/sbi/sbi_irqchip.c ++++ b/lib/sbi/sbi_irqchip.c +@@ -37,8 +37,8 @@ int sbi_irqchip_init(struct sbi_scratch *scratch, bool cold_boot) + if (rc) + return rc; + +- if (ext_irqfn != default_irqfn) +- csr_set(CSR_MIE, MIP_MEIP); ++ csr_set(CSR_MIE, MIP_MEIP | MIP_SEIP); ++ csr_set(CSR_MSTATUS, MSTATUS_MIE | MSTATUS_SIE); + + return 0; + } +@@ -47,8 +47,8 @@ void sbi_irqchip_exit(struct sbi_scratch *scratch) + { + const struct sbi_platform *plat = sbi_platform_ptr(scratch); + +- if (ext_irqfn != default_irqfn) +- csr_clear(CSR_MIE, MIP_MEIP); ++ csr_clear(CSR_MIE, MIP_MEIP | MIP_SEIP); ++ csr_clear(CSR_MSTATUS, MSTATUS_MIE | MSTATUS_SIE); + + sbi_platform_irqchip_exit(plat); + } diff --git a/patches/opensbi-lagarto-hun.patch b/patches/opensbi-lagarto-hun.patch new file mode 100644 index 0000000000000000000000000000000000000000..f7e938c2aaeed5eca7c5db227fe78e00e02032fc --- /dev/null +++ b/patches/opensbi-lagarto-hun.patch @@ -0,0 +1,13 @@ +--- a/platform/fpga/openpiton/platform.c 2024-03-12 16:27:13.886525365 +0100 ++++ b/platform/fpga/openpiton/platform.c 2024-05-27 11:42:47.748244398 +0200 +@@ -24,8 +24,8 @@ + #define OPENPITON_DEFAULT_UART_REG_WIDTH 1 + #define OPENPITON_DEFAULT_UART_REG_OFFSET 0 + #define OPENPITON_DEFAULT_PLIC_ADDR 0xfff1100000 +-#define OPENPITON_DEFAULT_PLIC_NUM_SOURCES 2 +-#define OPENPITON_DEFAULT_HART_COUNT 3 ++#define OPENPITON_DEFAULT_PLIC_NUM_SOURCES 3 ++#define OPENPITON_DEFAULT_HART_COUNT 20 + #define OPENPITON_DEFAULT_CLINT_ADDR 0xfff1020000 + #define OPENPITON_DEFAULT_ACLINT_MTIMER_FREQ 1000000 + #define OPENPITON_DEFAULT_ACLINT_MSWI_ADDR \ diff --git a/patches/opensbi-test-plic.patch b/patches/opensbi-test-plic.patch new file mode 100644 index 0000000000000000000000000000000000000000..d85a3c1c93865d3d6657e363605070d9eff64a94 --- /dev/null +++ b/patches/opensbi-test-plic.patch @@ -0,0 +1,200 @@ +diff --git a/lib/sbi/sbi_irqchip.c b/lib/sbi/sbi_irqchip.c +index 0ae604a..e34e90c 100644 +--- a/lib/sbi/sbi_irqchip.c ++++ b/lib/sbi/sbi_irqchip.c +@@ -9,6 +9,9 @@ + + #include + #include ++#include ++ ++static void do_plic_test(void); + + static int default_irqfn(void) + { +@@ -37,8 +40,10 @@ int sbi_irqchip_init(struct sbi_scratch *scratch, bool cold_boot) + if (rc) + return rc; + +- if (ext_irqfn != default_irqfn) +- csr_set(CSR_MIE, MIP_MEIP); ++ //csr_set(CSR_MIE, MIP_SEIP); ++ //csr_set(CSR_MSTATUS, MSTATUS_SIE); ++ ++ do_plic_test(); + + return 0; + } +@@ -47,8 +52,170 @@ void sbi_irqchip_exit(struct sbi_scratch *scratch) + { + const struct sbi_platform *plat = sbi_platform_ptr(scratch); + +- if (ext_irqfn != default_irqfn) +- csr_clear(CSR_MIE, MIP_MEIP); ++ //csr_clear(CSR_MIE, MIP_SEIP); ++ //csr_clear(CSR_MSTATUS, MSTATUS_SIE); + + sbi_platform_irqchip_exit(plat); + } ++ ++ ++/* ----------------- PLIC tests ---------------- */ ++ ++ ++#define MIE_MEIE (1UL << 11) // Machine External Interrupt Enable ++#define SIE_SEIE (1UL << 9) ++#define MIDELEG_SEIE (1UL << 9) // Delegate Machine External Interrupt to Supervisor ++#define PLIC_TIMER_PORT 4 ++// Base address of PLIC ++#define PLIC_BASE 0x40800000UL ++#define PLIC_PRIORITY_OFFSET 0x0UL ++#define PLIC_PENDING_OFFSET 0x1000UL ++#define PLIC_ENABLE_OFFSET 0x2080UL ++#define PLIC_THRESHOLD_OFFSET 0x201000UL ++#define PLIC_CLAIM_OFFSET 0x201004UL ++ ++// Aux timer ++#define AUX_TIMER_BASE 0x40010000UL ++#define MTIMECMP_OFFSET 0x4000UL ++#define MTIME_OFFSET 0xBFF8UL ++ ++#define MSTATUS_MPP_MASK (3 << 11) ++#define MSTATUS_MPP_SUPERVISOR (1 << 11) ++ ++static volatile unsigned long *mtime = (unsigned long *)(AUX_TIMER_BASE + MTIME_OFFSET); ++static volatile unsigned long *mtimecmp = (unsigned long *)(AUX_TIMER_BASE + MTIMECMP_OFFSET); ++ ++ ++static void dumpregs(int machine) ++{ ++ char *prefix = "\t"; ++ char *suffix = "\t"; ++ sbi_printf("Registers:\n"); ++ if (machine) { ++ sbi_printf("%sMIE%s: 0x%" PRILX "\n", ++ prefix, suffix, csr_read(CSR_MIE)); ++ sbi_printf("%sMIP%s: 0x%" PRILX "\n", ++ prefix, suffix, csr_read(CSR_MIP)); ++ sbi_printf("%sMSTATUS%s: 0x%" PRILX "\n", ++ prefix, suffix, csr_read(CSR_MSTATUS)); ++ sbi_printf("%sMIDELEG%s: 0x%" PRILX "\n", ++ prefix, suffix, csr_read(CSR_MIDELEG)); ++ } ++ sbi_printf("%sSIE%s: 0x%" PRILX "\n", ++ prefix, suffix, csr_read(CSR_SIE)); ++ sbi_printf("%sSIP%s: 0x%" PRILX "\n", ++ prefix, suffix, csr_read(CSR_SIP)); ++ sbi_printf("%sSSTATUS%s: 0x%" PRILX "\n", ++ prefix, suffix, csr_read(CSR_SSTATUS)); ++ sbi_printf("%sSTVEC%s: 0x%" PRILX "\n", ++ prefix, suffix, csr_read(CSR_STVEC)); ++} ++ ++static void __attribute__((optimize("O0"))) switch_to_supervisor_mode(int (*target_address)(void)) ++{ ++ unsigned long mstatus; ++ ++ // Read the current mstatus ++ asm volatile("csrr %0, mstatus" : "=r"(mstatus)); ++ ++ // Set the MPP field to supervisor mode ++ mstatus = (mstatus & ~MSTATUS_MPP_MASK) | MSTATUS_MPP_SUPERVISOR; ++ ++ // Write back the modified mstatus ++ asm volatile("csrw mstatus, %0" : : "r"(mstatus)); ++ ++ // Set the mepc to the target address ++ asm volatile("csrw mepc, %0" : : "r"(target_address)); ++ ++ // Use mret to return to the specified address in supervisor mode ++ asm volatile("mret"); ++} ++ ++static int supervisor_mode_code(void) ++{ ++ sbi_printf("Hello from supervisor\n"); ++ dumpregs(0); ++ ++ /* Enable timer interrupt */ ++ *mtimecmp = *mtime + 10000; ++ ++ sbi_printf("Timer alarm programmed\n"); ++ sbi_printf("Waiting for interrupt...\n"); ++ int i = 0; ++ char *s = "-\\|/"; ++ while (1) { ++ for (volatile unsigned long j = 0; j < 100000; j++); ++ sbi_printf("\r%c", s[i++]); ++ if (i >= 4) ++ i = 0; ++ } ++ return 0; ++} ++ ++static void __attribute__((aligned(4))) __attribute__((interrupt ("supervisor"))) supervisor_trap_entry(void) ++{ ++ sbi_printf("\nSupervisor Trap Entry Reached!\n"); ++ sbi_printf("\nTEST-RESULT-OK\n"); ++ while (1) { ++ } ++} ++ ++static void do_plic_test(void) ++{ ++ sbi_printf("--- TESTING PLIC ---\n"); ++ ++ /* Disable auxiliar timer interrupt */ ++ *mtimecmp = 0xffffffffUL; ++ sbi_printf("Timer interrupt disabled\n"); ++ ++ ++ /* Enable supervisor interrupt delegation */ ++ ++ csr_set(CSR_SIE, SIE_SEIE); // Enable supervisor external interrupts ++ csr_set(CSR_SSTATUS, SSTATUS_SIE); // Enable global interrupts in supervisor mode ++ csr_set(CSR_MIDELEG, MIDELEG_SEIE); // Delegate machine interrupts to supervisor mode ++ csr_write(CSR_STVEC, &supervisor_trap_entry); ++ ++ sbi_printf("Enabled supervisor delegation:\n"); ++ ++ dumpregs(1); ++ ++ /* Configure PLIC aux timer input */ ++ volatile unsigned *plic_priority = (unsigned *)(PLIC_BASE + PLIC_PRIORITY_OFFSET + PLIC_TIMER_PORT * 4); ++ volatile unsigned *plic_enable = (unsigned *)(PLIC_BASE + PLIC_ENABLE_OFFSET); ++ volatile unsigned *plic_threshold = (unsigned *)(PLIC_BASE + PLIC_THRESHOLD_OFFSET); ++ volatile unsigned *plic_claim = (unsigned *)(PLIC_BASE + PLIC_CLAIM_OFFSET); ++ volatile unsigned *plic_pending = (unsigned *)(PLIC_BASE + PLIC_PENDING_OFFSET); ++ ++ sbi_printf("Enabling timer in PLIC\n"); ++ *plic_priority = PLIC_TIMER_PORT; ++ *plic_threshold = PLIC_TIMER_PORT - 1; ++ *plic_enable |= (1 << PLIC_TIMER_PORT); ++ ++ /* Clear interrupt */ ++ sbi_printf("Pending: %d\n", *plic_pending); ++ unsigned claim = *plic_claim; ++ sbi_printf("Claim: %d\n", claim); ++ *plic_claim = claim; ++ sbi_printf("Pending: %d\n", *plic_pending); ++ ++ sbi_printf("Clearing MIP\n"); ++ csr_write(CSR_MIP, 0); ++ ++ /* Enable external timer interrupts */ ++ //sbi_printf("Enabling MEIE in MIE register\n"); ++ //csr_set(CSR_MIE, MIE_MEIE); /* Needed? */ ++ //sbi_printf("Enabling MIE in MSTATUS register\n"); ++ //csr_set(CSR_MSTATUS, MSTATUS_MIE); /* Needed? */ ++ ++ sbi_printf("Switching to supervisor\n"); ++ ++ dumpregs(1); ++ ++ switch_to_supervisor_mode(&supervisor_mode_code); ++ ++ /* Never reached */ ++ while (1); ++} ++ ++ diff --git a/patches/opensbi-timer-debug.patch b/patches/opensbi-timer-debug.patch new file mode 100644 index 0000000000000000000000000000000000000000..cc4723845f5ca50740e9d7406cc5810450e45b10 --- /dev/null +++ b/patches/opensbi-timer-debug.patch @@ -0,0 +1,229 @@ +diff --git a/lib/sbi/sbi_timer.c b/lib/sbi/sbi_timer.c +index 7b618de..65e42b0 100644 +--- a/lib/sbi/sbi_timer.c ++++ b/lib/sbi/sbi_timer.c +@@ -183,13 +183,17 @@ int sbi_timer_init(struct sbi_scratch *scratch, bool cold_boot) + u64 *time_delta; + const struct sbi_platform *plat = sbi_platform_ptr(scratch); + ++ sbi_printf("sbi_timer_init: begins\n"); ++ + if (cold_boot) { + time_delta_off = sbi_scratch_alloc_offset(sizeof(*time_delta)); + if (!time_delta_off) + return SBI_ENOMEM; + +- if (sbi_hart_has_extension(scratch, SBI_HART_EXT_ZICNTR)) ++ if (sbi_hart_has_extension(scratch, SBI_HART_EXT_ZICNTR)) { ++ sbi_printf("sbi_timer_init: got Zicntr extension\n"); + get_time_val = get_ticks; ++ } + } else { + if (!time_delta_off) + return SBI_ENOMEM; +@@ -198,7 +202,10 @@ int sbi_timer_init(struct sbi_scratch *scratch, bool cold_boot) + time_delta = sbi_scratch_offset_ptr(scratch, time_delta_off); + *time_delta = 0; + +- return sbi_platform_timer_init(plat, cold_boot); ++ int rc = sbi_platform_timer_init(plat, cold_boot); ++ if (rc) ++ sbi_printf("sbi_platform_timer: sbi_platform_timer_init failed (%d)\n", rc); ++ return rc; + } + + void sbi_timer_exit(struct sbi_scratch *scratch) +diff --git a/lib/sbi/sbi_trap.c b/lib/sbi/sbi_trap.c +index b4f3a17..cde2073 100644 +--- a/lib/sbi/sbi_trap.c ++++ b/lib/sbi/sbi_trap.c +@@ -283,6 +283,7 @@ static int sbi_trap_aia_irq(void) + */ + struct sbi_trap_context *sbi_trap_handler(struct sbi_trap_context *tcntx) + { ++ sbi_printf("<"); + int rc = SBI_ENOTSUPP; + const char *msg = "trap handler failed"; + struct sbi_scratch *scratch = sbi_scratch_thishart_ptr(); +@@ -295,6 +296,7 @@ struct sbi_trap_context *sbi_trap_handler(struct sbi_trap_context *tcntx) + sbi_trap_set_context(scratch, tcntx); + + if (mcause & MCAUSE_IRQ_MASK) { ++ sbi_printf("i(%lu)", mcause & ~MCAUSE_IRQ_MASK); + if (sbi_hart_has_extension(sbi_scratch_thishart_ptr(), + SBI_HART_EXT_SMAIA)) + rc = sbi_trap_aia_irq(); +@@ -306,35 +308,42 @@ struct sbi_trap_context *sbi_trap_handler(struct sbi_trap_context *tcntx) + + switch (mcause) { + case CAUSE_ILLEGAL_INSTRUCTION: ++ sbi_printf("I"); + rc = sbi_illegal_insn_handler(tcntx); + msg = "illegal instruction handler failed"; + break; + case CAUSE_MISALIGNED_LOAD: ++ sbi_printf("L"); + sbi_pmu_ctr_incr_fw(SBI_PMU_FW_MISALIGNED_LOAD); + rc = sbi_misaligned_load_handler(tcntx); + msg = "misaligned load handler failed"; + break; + case CAUSE_MISALIGNED_STORE: ++ sbi_printf("S"); + sbi_pmu_ctr_incr_fw(SBI_PMU_FW_MISALIGNED_STORE); + rc = sbi_misaligned_store_handler(tcntx); + msg = "misaligned store handler failed"; + break; + case CAUSE_SUPERVISOR_ECALL: + case CAUSE_MACHINE_ECALL: ++ sbi_printf("E"); + rc = sbi_ecall_handler(tcntx); + msg = "ecall handler failed"; + break; + case CAUSE_LOAD_ACCESS: ++ sbi_printf("l"); + sbi_pmu_ctr_incr_fw(SBI_PMU_FW_ACCESS_LOAD); + rc = sbi_load_access_handler(tcntx); + msg = "load fault handler failed"; + break; + case CAUSE_STORE_ACCESS: ++ sbi_printf("s"); + sbi_pmu_ctr_incr_fw(SBI_PMU_FW_ACCESS_STORE); + rc = sbi_store_access_handler(tcntx); + msg = "store fault handler failed"; + break; + default: ++ sbi_printf("R"); + /* If the trap came from S or U mode, redirect it there */ + msg = "trap redirect failed"; + rc = sbi_trap_redirect(regs, trap); +@@ -344,6 +353,8 @@ struct sbi_trap_context *sbi_trap_handler(struct sbi_trap_context *tcntx) + trap_done: + if (rc) + sbi_trap_error(msg, rc, tcntx); ++ else ++ sbi_printf(">"); + + if (((regs->mstatus & MSTATUS_MPP) >> MSTATUS_MPP_SHIFT) != PRV_M) + sbi_sse_process_pending_events(regs); +diff --git a/lib/utils/timer/fdt_timer.c b/lib/utils/timer/fdt_timer.c +index f468730..db20526 100644 +--- a/lib/utils/timer/fdt_timer.c ++++ b/lib/utils/timer/fdt_timer.c +@@ -7,6 +7,7 @@ + * Anup Patel + */ + ++#include + #include + #include + #include +@@ -39,19 +40,26 @@ static int fdt_timer_cold_init(void) + void *fdt = fdt_get_address(); + + for (pos = 0; pos < fdt_timer_drivers_size; pos++) { ++ sbi_printf("fdt_timer_cold_init: pos = %d\n", pos); + drv = fdt_timer_drivers[pos]; + + noff = -1; + while ((noff = fdt_find_match(fdt, noff, + drv->match_table, &match)) >= 0) { ++ ++ sbi_printf("fdt_timer_cold_init: got match, name = %s\n", match->compatible); + if (!fdt_node_is_enabled(fdt, noff)) + continue; + ++ sbi_printf("fdt_timer_cold_init: enabled\n"); ++ + /* drv->cold_init must not be NULL */ + if (drv->cold_init == NULL) + return SBI_EFAIL; + + rc = drv->cold_init(fdt, noff, match); ++ sbi_printf("fdt_timer_cold_init: drc->cold_init = %d\n", rc); ++ + if (rc == SBI_ENODEV) + continue; + if (rc) +@@ -69,6 +77,7 @@ static int fdt_timer_cold_init(void) + * We can't fail here since systems with Sstc might not provide + * mtimer/clint DT node in the device tree. + */ ++ sbi_printf("fdt_timer_cold_init: returns 0\n"); + return 0; + } + +@@ -78,9 +87,15 @@ int fdt_timer_init(bool cold_boot) + + if (cold_boot) { + rc = fdt_timer_cold_init(); +- if (rc) ++ if (rc) { ++ sbi_printf("fdt_timer_init: fdt_timer_cold_init failed (%d)\n", rc); + return rc; ++ } + } + +- return fdt_timer_warm_init(); ++ rc = fdt_timer_warm_init(); ++ if (rc) ++ sbi_printf("fdt_timer_init: fdt_timer_warm_init failed (%d)\n", rc); ++ ++ return rc; + } +diff --git a/lib/utils/timer/fdt_timer_mtimer.c b/lib/utils/timer/fdt_timer_mtimer.c +index 9e27e3a..cef2ee6 100644 +--- a/lib/utils/timer/fdt_timer_mtimer.c ++++ b/lib/utils/timer/fdt_timer_mtimer.c +@@ -8,6 +8,7 @@ + */ + + #include ++#include + #include + #include + #include +@@ -33,6 +34,7 @@ static struct aclint_mtimer_data *mt_reference = NULL; + static int timer_mtimer_cold_init(void *fdt, int nodeoff, + const struct fdt_match *match) + { ++ + int rc; + unsigned long addr[2], size[2]; + struct timer_mtimer_node *mtn, *n; +@@ -40,6 +42,8 @@ static int timer_mtimer_cold_init(void *fdt, int nodeoff, + const struct timer_mtimer_quirks *quirks = match->data; + bool is_clint = quirks && quirks->is_clint; + ++ sbi_printf("timer_mtimer_cold_init: begins, is_clint = %d\n", (int) is_clint); ++ + mtn = sbi_zalloc(sizeof(*mtn)); + if (!mtn) + return SBI_ENOMEM; +@@ -49,6 +53,7 @@ static int timer_mtimer_cold_init(void *fdt, int nodeoff, + &addr[0], &size[0], &addr[1], &size[1], + &mt->first_hartid, &mt->hart_count); + if (rc) { ++ sbi_printf("timer_mtimer_cold_init: fdt_parse_aclint_node failed (%d)\n", rc); + sbi_free(mtn); + return rc; + } +@@ -57,6 +62,7 @@ static int timer_mtimer_cold_init(void *fdt, int nodeoff, + + rc = fdt_parse_timebase_frequency(fdt, &mt->mtime_freq); + if (rc) { ++ sbi_printf("timer_mtimer_cold_init: fdt_parse_timebase_frequency failed (%d)\n", rc); + sbi_free(mtn); + return rc; + } +@@ -83,6 +89,11 @@ static int timer_mtimer_cold_init(void *fdt, int nodeoff, + mt->mtimecmp_size = size[1]; + } + ++ sbi_printf("timer_mtimer_cold_init: mtime_addr = 0x%08lx\n", mt->mtime_addr); ++ sbi_printf("timer_mtimer_cold_init: mtime_size = 0x%08lx\n", mt->mtime_size); ++ sbi_printf("timer_mtimer_cold_init: mtimecmp_addr = 0x%08lx\n", mt->mtimecmp_addr); ++ sbi_printf("timer_mtimer_cold_init: mtimecmp_size = 0x%08lx\n", mt->mtimecmp_size); ++ + /* Apply additional quirks */ + if (quirks) { + mt->has_64bit_mmio = quirks->has_64bit_mmio; diff --git a/patches/ox-alveo-platform-plic.patch b/patches/ox-alveo-platform-plic.patch new file mode 100644 index 0000000000000000000000000000000000000000..cd5404793b52bd178e62ecf000040a7d0286438c --- /dev/null +++ b/patches/ox-alveo-platform-plic.patch @@ -0,0 +1,167 @@ +diff --git a/platform/fpga/ox_alveo/Kconfig b/platform/fpga/ox_alveo/Kconfig +new file mode 100644 +index 0000000..bf3e7e6 +--- /dev/null ++++ b/platform/fpga/ox_alveo/Kconfig +@@ -0,0 +1,5 @@ ++config PLATFORM_OX_ALVEO_FPGA ++ bool ++ select SERIAL_UART8250 ++ select IRQCHIP_PLIC ++ default y +diff --git a/platform/fpga/ox_alveo/configs/defconfig b/platform/fpga/ox_alveo/configs/defconfig +new file mode 100644 +index 0000000..e69de29 +diff --git a/platform/fpga/ox_alveo/objects.mk b/platform/fpga/ox_alveo/objects.mk +new file mode 100644 +index 0000000..d444abe +--- /dev/null ++++ b/platform/fpga/ox_alveo/objects.mk +@@ -0,0 +1,19 @@ ++platform-cppflags-y = ++platform-cflags-y = ++platform-asflags-y = ++platform-ldflags-y = ++PLATFORM_RISCV_XLEN = 64 ++PLATFORM_RISCV_ABI = lp64d ++PLATFORM_RISCV_ISA = rv64g ++PLATFORM_RISCV_CODE_MODEL = medany ++ ++platform-objs-y += platform.o ++ ++FW_TEXT_START=0x80000000 ++ ++FW_DYNAMIC=n ++FW_JUMP=n ++FW_PAYLOAD=y ++ ++FW_PAYLOAD_OFFSET=0x200000 ++FW_PAYLOAD_ALIGN=0x1000 +diff --git a/platform/fpga/ox_alveo/platform.c b/platform/fpga/ox_alveo/platform.c +new file mode 100644 +index 0000000..a359b34 +--- /dev/null ++++ b/platform/fpga/ox_alveo/platform.c +@@ -0,0 +1,122 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define OX_ALVEO_HART_COUNT 1 ++ ++#define OX_ALVEO_UART_BASE_ADDR 0x40000000 ++#define OX_ALVEO_UART_OFFSET 0x1000 ++#define OX_ALVEO_UART_INPUT_FREQ 50000000 ++#define OX_ALVEO_UART_BAUDRATE 115200 ++#define OX_ALVEO_PLIC_ADDR 0x40800000 ++#define OX_ALVEO_PLIC_NUM_SOURCES 3 ++ ++#define OX_ALVEO_TIMER_BASE 0x40170000 ++#define ADDR_TIME_L 0x0u // 32 lower bits of the time register ++#define ADDR_TIME_H 0x1u // 32 higher bits of the time register ++#define ADDR_TIMECMP_L 0x2u // 32 lower bits of the time comparator ++#define ADDR_TIMECMP_H 0x3u // 32 higher bits of the time comparator ++ ++volatile uint32_t *timer_base_ptr = (uint32_t *)(OX_ALVEO_TIMER_BASE); ++ ++static struct plic_data plic = { ++ .addr = OX_ALVEO_PLIC_ADDR, ++ .num_src = OX_ALVEO_PLIC_NUM_SOURCES, ++}; ++ ++static int ox_alveo_early_init(bool cold_boot) // Platform early initialization. ++{ ++ return 0; ++} ++ ++static int ox_alveo_final_init(bool cold_boot) // Platform final initialization. ++{ ++ return 0; ++} ++ ++static int ox_alveo_console_init(void) // Initialize the platform console. ++{ ++ return uart8250_init(OX_ALVEO_UART_BASE_ADDR, ++ OX_ALVEO_UART_INPUT_FREQ, ++ OX_ALVEO_UART_BAUDRATE, ++ 2, 4, ++ OX_ALVEO_UART_OFFSET); ++} ++ ++static int ox_alveo_irqchip_init(bool cold_boot) // Initialize the platform interrupt controller for current HART. ++{ ++ u32 hartid = current_hartid(); ++ int ret; ++ ++ /* Example if the generic PLIC driver is used */ ++ if (cold_boot) { ++ ret = plic_cold_irqchip_init(&plic); ++ if (ret) ++ return ret; ++ } ++ ++ return plic_warm_irqchip_init(&plic, 2 * hartid, -1); ++} ++ ++static int ox_alveo_ipi_init(bool cold_boot) // Initialize IPI for current HART. ++{ ++ return 0; ++} ++ ++static u64 ox_alveo_timer_value(void) // Get platform timer value. ++{ ++ return ((u64)*(timer_base_ptr + ADDR_TIME_H) << 32) + *(timer_base_ptr + ADDR_TIME_L); ++} ++ ++static void ox_alveo_timer_event_start(u64 next_event) // Start platform timer event for current HART. ++{ ++ *(timer_base_ptr + ADDR_TIMECMP_H) = next_event >> 32; ++ *(timer_base_ptr + ADDR_TIMECMP_L) = next_event; ++} ++ ++static void ox_alveo_timer_event_stop(void) // Stop platform timer event for current HART. ++{ ++ ++ *(timer_base_ptr + ADDR_TIMECMP_H) = 0; ++ *(timer_base_ptr + ADDR_TIMECMP_L) = 0; ++} ++ ++static struct sbi_timer_device mtimer = { ++ .name = "axi_timer", ++ .timer_freq = OX_ALVEO_UART_INPUT_FREQ, ++ .timer_value = ox_alveo_timer_value, ++ .timer_event_start = ox_alveo_timer_event_start, ++ .timer_event_stop = ox_alveo_timer_event_stop ++}; ++ ++static int ox_alveo_timer_init(bool cold_boot) // Initialize platform timer for current HART. ++{ ++ *(timer_base_ptr + ADDR_TIMECMP_H) = 0; ++ *(timer_base_ptr + ADDR_TIMECMP_L) = 0; ++ sbi_timer_set_device(&mtimer); ++ return 0; ++} ++ ++const struct sbi_platform_operations ox_alveo_ops = { // Platform descriptor. ++ .early_init = ox_alveo_early_init, ++ .final_init = ox_alveo_final_init, ++ .console_init = ox_alveo_console_init, ++ .irqchip_init = ox_alveo_irqchip_init, ++ .ipi_init = ox_alveo_ipi_init, ++ .timer_init = ox_alveo_timer_init ++}; ++ ++const struct sbi_platform platform = { ++ .opensbi_version = OPENSBI_VERSION, ++ .platform_version = SBI_PLATFORM_VERSION(0x0, 0x01), ++ .name = "ox (Rodrigo NixOS version)", ++ .features = SBI_PLATFORM_DEFAULT_FEATURES, ++ .hart_count = OX_ALVEO_HART_COUNT, ++ .hart_stack_size = SBI_PLATFORM_DEFAULT_HART_STACK_SIZE, ++ .heap_size = SBI_PLATFORM_DEFAULT_HEAP_SIZE(OX_ALVEO_HART_COUNT), ++ .platform_ops_addr = (unsigned long)&ox_alveo_ops ++}; diff --git a/patches/ox-alveo-platform.patch b/patches/ox-alveo-platform.patch new file mode 100644 index 0000000000000000000000000000000000000000..347d0d46d62aa46de63e9ab71c84d098a37a64cd --- /dev/null +++ b/patches/ox-alveo-platform.patch @@ -0,0 +1,148 @@ +diff --git a/platform/fpga/ox_alveo/Kconfig b/platform/fpga/ox_alveo/Kconfig +new file mode 100644 +index 0000000..bf3e7e6 +--- /dev/null ++++ b/platform/fpga/ox_alveo/Kconfig +@@ -0,0 +1,4 @@ ++config PLATFORM_OX_ALVEO_FPGA ++ bool ++ select SERIAL_UART8250 ++ default y +diff --git a/platform/fpga/ox_alveo/configs/defconfig b/platform/fpga/ox_alveo/configs/defconfig +new file mode 100644 +index 0000000..e69de29 +diff --git a/platform/fpga/ox_alveo/objects.mk b/platform/fpga/ox_alveo/objects.mk +new file mode 100644 +index 0000000..d444abe +--- /dev/null ++++ b/platform/fpga/ox_alveo/objects.mk +@@ -0,0 +1,19 @@ ++platform-cppflags-y = ++platform-cflags-y = ++platform-asflags-y = ++platform-ldflags-y = ++PLATFORM_RISCV_XLEN = 64 ++PLATFORM_RISCV_ABI = lp64d ++PLATFORM_RISCV_ISA = rv64imafd ++PLATFORM_RISCV_CODE_MODEL = medany ++ ++platform-objs-y += platform.o ++ ++FW_TEXT_START=0x80000000 ++ ++FW_DYNAMIC=n ++FW_JUMP=n ++FW_PAYLOAD=y ++ ++FW_PAYLOAD_OFFSET=0x200000 ++FW_PAYLOAD_ALIGN=0x1000 +diff --git a/platform/fpga/ox_alveo/platform.c b/platform/fpga/ox_alveo/platform.c +new file mode 100644 +index 0000000..a359b34 +--- /dev/null ++++ b/platform/fpga/ox_alveo/platform.c +@@ -0,0 +1,104 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define OX_ALVEO_HART_COUNT 1 ++ ++#define OX_ALVEO_UART_BASE_ADDR 0x40000000 ++#define OX_ALVEO_UART_OFFSET 0x1000 ++#define OX_ALVEO_UART_INPUT_FREQ 50000000 ++#define OX_ALVEO_UART_BAUDRATE 115200 ++ ++#define OX_ALVEO_TIMER_BASE 0x40170000 ++#define ADDR_TIME_L 0x0u // 32 lower bits of the time register ++#define ADDR_TIME_H 0x1u // 32 higher bits of the time register ++#define ADDR_TIMECMP_L 0x2u // 32 lower bits of the time comparator ++#define ADDR_TIMECMP_H 0x3u // 32 higher bits of the time comparator ++ ++volatile uint32_t *timer_base_ptr = (uint32_t *)(OX_ALVEO_TIMER_BASE); ++ ++static int ox_alveo_early_init(bool cold_boot) // Platform early initialization. ++{ ++ return 0; ++} ++ ++static int ox_alveo_final_init(bool cold_boot) // Platform final initialization. ++{ ++ return 0; ++} ++ ++static int ox_alveo_console_init(void) // Initialize the platform console. ++{ ++ return uart8250_init(OX_ALVEO_UART_BASE_ADDR, ++ OX_ALVEO_UART_INPUT_FREQ, ++ OX_ALVEO_UART_BAUDRATE, ++ 2, 4, ++ OX_ALVEO_UART_OFFSET); ++} ++ ++static int ox_alveo_irqchip_init(bool cold_boot) // Initialize the platform interrupt controller for current HART. ++{ ++ u32 hartid = current_hartid(); ++ return hartid; ++} ++ ++static int ox_alveo_ipi_init(bool cold_boot) // Initialize IPI for current HART. ++{ ++ return 0; ++} ++ ++static u64 ox_alveo_timer_value(void) // Get platform timer value. ++{ ++ return ((u64)*(timer_base_ptr + ADDR_TIME_H) << 32) + *(timer_base_ptr + ADDR_TIME_L); ++} ++ ++static void ox_alveo_timer_event_start(u64 next_event) // Start platform timer event for current HART. ++{ ++ *(timer_base_ptr + ADDR_TIMECMP_H) = next_event >> 32; ++ *(timer_base_ptr + ADDR_TIMECMP_L) = next_event; ++} ++ ++static void ox_alveo_timer_event_stop(void) // Stop platform timer event for current HART. ++{ ++ ++ *(timer_base_ptr + ADDR_TIMECMP_H) = 0; ++ *(timer_base_ptr + ADDR_TIMECMP_L) = 0; ++} ++ ++static struct sbi_timer_device mtimer = { ++ .name = "axi_timer", ++ .timer_freq = OX_ALVEO_UART_INPUT_FREQ, ++ .timer_value = ox_alveo_timer_value, ++ .timer_event_start = ox_alveo_timer_event_start, ++ .timer_event_stop = ox_alveo_timer_event_stop ++}; ++ ++static int ox_alveo_timer_init(bool cold_boot) // Initialize platform timer for current HART. ++{ ++ *(timer_base_ptr + ADDR_TIMECMP_H) = 0; ++ *(timer_base_ptr + ADDR_TIMECMP_L) = 0; ++ sbi_timer_set_device(&mtimer); ++ return 0; ++} ++ ++const struct sbi_platform_operations ox_alveo_ops = { // Platform descriptor. ++ .early_init = ox_alveo_early_init, ++ .final_init = ox_alveo_final_init, ++ .console_init = ox_alveo_console_init, ++ .irqchip_init = ox_alveo_irqchip_init, ++ .ipi_init = ox_alveo_ipi_init, ++ .timer_init = ox_alveo_timer_init ++}; ++ ++const struct sbi_platform platform = { ++ .opensbi_version = OPENSBI_VERSION, ++ .platform_version = SBI_PLATFORM_VERSION(0x0, 0x01), ++ .name = "ox (for Xilinx Alveo FPGA)", ++ .features = SBI_PLATFORM_DEFAULT_FEATURES, ++ .hart_count = OX_ALVEO_HART_COUNT, ++ .hart_stack_size = SBI_PLATFORM_DEFAULT_HART_STACK_SIZE, ++ .platform_ops_addr = (unsigned long)&ox_alveo_ops ++}; diff --git a/patches/sa-fpga-add-plic-claim-test.patch b/patches/sa-fpga-add-plic-claim-test.patch new file mode 100644 index 0000000000000000000000000000000000000000..b9b6cb422025bec8a1a4d912966e77892b2a293a --- /dev/null +++ b/patches/sa-fpga-add-plic-claim-test.patch @@ -0,0 +1,92 @@ +commit 1a2c5f12e7676930123cfe7853f1805cf3680c25 +Author: Rodrigo Arias Mallo +Date: Tue Oct 1 12:59:38 2024 +0200 + + Add claim test + +diff --git a/fpga_core_bridge/simulator/tests/c_tests/Makefile b/fpga_core_bridge/simulator/tests/c_tests/Makefile +index f744131..91d61b6 100644 +--- a/fpga_core_bridge/simulator/tests/c_tests/Makefile ++++ b/fpga_core_bridge/simulator/tests/c_tests/Makefile +@@ -23,6 +23,7 @@ bmarks = \ + plic \ + plic_supervisor \ + plic_threshold \ ++ plic_claim \ + clint_supervisor \ + uart + +diff --git a/fpga_core_bridge/simulator/tests/c_tests/plic_claim/plic_claim.c b/fpga_core_bridge/simulator/tests/c_tests/plic_claim/plic_claim.c +new file mode 100644 +index 0000000..352adb9 +--- /dev/null ++++ b/fpga_core_bridge/simulator/tests/c_tests/plic_claim/plic_claim.c +@@ -0,0 +1,68 @@ ++#include "util.h" ++ ++#define PLIC_BASE 0x40800000 ++ ++int main(void) ++{ ++ uart_init(); ++ ++ /* This test requires the auxiliar timer to cause a pending ++ * interrupt in the source 4. All the other pending bits must be ++ * zero. */ ++ ++ uint32_t src = 4; ++ uint32_t mask = 1 << src; ++ ++ /* 0x001000: Interrupt Source #0 to #31 Pending Bits */ ++ volatile uint32_t *pending = PLIC_BASE + 0x001000; ++ ++ /* Manually enable the pending register on both timer and serial */ ++ *pending = (1<<4) | (1<<1); ++ ++ uint32_t p; ++ /* Wait for a interrupt on the serial line */ ++ while ((p = *pending) != (1<<4) | (1<<1)) { ++ printf("waiting, pending="); ++ printhex(p); ++ printf("\n"); ++ } ++ ++ /* 0x002080: Interrupt Source #0 to #31 Enable Bits on context 1 */ ++ volatile uint32_t *enable = PLIC_BASE + 0x002080; ++ *enable = (1<<4) | (1<<1); /* Enable source 4 (aux timer) and 1 ++ (serial) */ ++ ++ /* 0x000004: Interrupt source 1 priority */ ++ /* 0x000008: Interrupt source 2 priority */ ++ /* 0x00000c: Interrupt source 3 priority */ ++ /* 0x000010: Interrupt source 4 priority */ ++ for (uint32_t i = 1; i <= 4; i++) { ++ volatile uint32_t *priority = PLIC_BASE + i * 4; ++ *priority = 1; /* Make priority larger than threshold */ ++ } ++ ++ /* 0x201000: Priority threshold for context 1 */ ++ volatile uint32_t *threshold = PLIC_BASE + 0x201000; ++ *threshold = 0; /* Make threshold small */ ++ ++ /* Now the context 1 must be receiving interrupts from the aux ++ * timer. Let's try to claim the interrupt. */ ++ ++ /* Read claim */ ++ ++ /* 0x201004: Interrupt Claim Process for context 1 */ ++ volatile uint32_t *claim = PLIC_BASE + 0x201004; ++ while (1) { ++ uint32_t c = *claim; ++ if (c == src) ++ break; ++ printf("ERROR: unexpected claim found, expecting 4: "); ++ printhex(c); ++ printf("\n"); ++ } ++ ++ while (1) ++ printf("SUCCESS: Claim test succeeded\n"); ++ ++ return 0; ++} diff --git a/patches/sa-fpga-crt.patch b/patches/sa-fpga-crt.patch new file mode 100644 index 0000000000000000000000000000000000000000..d3150307f3ad9f6b4a1f177a14ec905a6b6ca69f --- /dev/null +++ b/patches/sa-fpga-crt.patch @@ -0,0 +1,15 @@ +diff --git a/fpga_core_bridge/simulator/tests/c_tests/common/crt.S b/fpga_core_bridge/simulator/tests/c_tests/common/crt.S +index 3f5bb2c..bd738b1 100644 +--- a/fpga_core_bridge/simulator/tests/c_tests/common/crt.S ++++ b/fpga_core_bridge/simulator/tests/c_tests/common/crt.S +@@ -59,10 +59,6 @@ _start: + #else + bltz t0, 1f + #endif +-2: +- li a0, 1 +- sw a0, tohost, t0 +- j 2b + 1: + + #ifdef __riscv_flen diff --git a/patches/sa-fpga-plic-registers.patch b/patches/sa-fpga-plic-registers.patch new file mode 100644 index 0000000000000000000000000000000000000000..2ae848894cc1c0da3dbda89d538726489280b0c1 --- /dev/null +++ b/patches/sa-fpga-plic-registers.patch @@ -0,0 +1,92 @@ +diff --git a/fpga_core_bridge/simulator/tests/c_tests/plic_supervisor/plic_supervisor_test.c b/fpga_core_bridge/simulator/tests/c_tests/plic_supervisor/plic_supervisor_test.c +index 0cfa681..78d97cb 100644 +--- a/fpga_core_bridge/simulator/tests/c_tests/plic_supervisor/plic_supervisor_test.c ++++ b/fpga_core_bridge/simulator/tests/c_tests/plic_supervisor/plic_supervisor_test.c +@@ -68,6 +68,48 @@ uintptr_t handle_trap(uint64_t cause, uint64_t epc, uintptr_t regs[32]) + return epc; + } + ++static void dumpregs(int machine) ++{ ++ printf("Registers:"); ++ if (machine) { ++ uint64_t mie; ++ asm volatile("csrr %0, mie" : "=r"(mie)); ++ printf("\n MIE: "); ++ printhex(mie); ++ ++ uint64_t mip; ++ asm volatile("csrr %0, mip" : "=r"(mip)); ++ printf("\n MIP: "); ++ printhex(mip); ++ ++ uint64_t mstatus; ++ asm volatile("csrr %0, mstatus" : "=r"(mstatus)); ++ printf("\nMSTATUS: "); ++ printhex(mstatus); ++ ++ uint64_t mideleg; ++ asm volatile("csrr %0, mideleg" : "=r"(mideleg)); ++ printf("\nMIDELEG: "); ++ printhex(mideleg); ++ } ++ ++ uint64_t sie; ++ asm volatile("csrr %0, sie" : "=r"(sie)); ++ printf("\n SIE: "); ++ printhex(sie); ++ ++ uint64_t sip; ++ asm volatile("csrr %0, sip" : "=r"(sip)); ++ printf("\n SIP: "); ++ printhex(sip); ++ ++ uint64_t sstatus; ++ asm volatile("csrr %0, sstatus" : "=r"(sstatus)); ++ printf("\nSSTATUS: "); ++ printhex(sstatus); ++ printf("\n"); ++} ++ + // Define the bit positions for the external interrupt enable in mie and mideleg registers + #define SIE_SEIE (1 << 9) // Supervisor External Interrupt Enable + #define MIDELEG_MEIE (1 << 11) // Delegate Machine External Interrupt to Supervisor +@@ -156,16 +198,19 @@ void __attribute__((optimize("O0"))) switch_to_supervisor_mode(uint64_t* target_ + asm volatile("mret"); + } + +-uint64_t supervisor_mode_code() { +- int count = 0; +- while (1) { +- if (count == 10000) { +- uart_write_string("\nWaiting for interrupt in supervisor mode..."); +- count = 0; +- } +- count++; +- } +- return 0; ++uint64_t supervisor_mode_code() ++{ ++ uart_write_string("\nHello from supervisor mode..."); ++ dumpregs(0); ++ int count = 0; ++ while (1) { ++ if (count == 10000) { ++ uart_write_string("\nWaiting for interrupt in supervisor mode..."); ++ count = 0; ++ } ++ count++; ++ } ++ return 0; + } + + void main(void) { +@@ -181,6 +226,8 @@ void main(void) { + // Enable external timer interrupts + // enable_external_timer_interrupt(); + ++ dumpregs(1); ++ + // Switch to supervisor mode and execute supervisor_mode_code + switch_to_supervisor_mode(&supervisor_mode_code); + diff --git a/patches/sa-fpga-text-address.patch b/patches/sa-fpga-text-address.patch new file mode 100644 index 0000000000000000000000000000000000000000..151356176cf626cf2711f9b9a80329addb92c483 --- /dev/null +++ b/patches/sa-fpga-text-address.patch @@ -0,0 +1,13 @@ +diff --git a/fpga_core_bridge/simulator/tests/c_tests/common/test.ld b/fpga_core_bridge/simulator/tests/c_tests/common/test.ld +index 8321d86..f83a9ee 100644 +--- a/fpga_core_bridge/simulator/tests/c_tests/common/test.ld ++++ b/fpga_core_bridge/simulator/tests/c_tests/common/test.ld +@@ -26,7 +26,7 @@ SECTIONS + .tohost : { *(.tohost) } + + /* text: test code section */ +- . = 0x80000000; ++ . = 0x20020000; + .text.init : { *(.text.init) } + + . = ALIGN(0x1000); diff --git a/patches/sa-fpga-uart.patch b/patches/sa-fpga-uart.patch new file mode 100644 index 0000000000000000000000000000000000000000..eed755a1d688a44784be19327931fb90f24a9f7b --- /dev/null +++ b/patches/sa-fpga-uart.patch @@ -0,0 +1,31 @@ +diff --git a/fpga_core_bridge/simulator/tests/c_tests/common/syscalls.c b/fpga_core_bridge/simulator/tests/c_tests/common/syscalls.c +index 278ea97..287e5fc 100644 +--- a/fpga_core_bridge/simulator/tests/c_tests/common/syscalls.c ++++ b/fpga_core_bridge/simulator/tests/c_tests/common/syscalls.c +@@ -592,8 +592,18 @@ int uart_is_transmit_empty() { + + // Function to write a character to the UART + void uart_write_char(char c) { +- while (!uart_is_transmit_empty()); ++ //while (!uart_is_transmit_empty()); ++ ++ /* Delay it a bit, as checking the transmit holding register doesn't seem to ++ * work in the FPGA */ ++ for (volatile long i = 0; i < 10000; i++) ++ ; ++ + *(volatile uint8_t *)(UART_BASE + UART_THR) = c; ++ ++ /* Make new line go back to the start of the line */ ++ if (c == '\n') ++ uart_write_char('\r'); + } + + // Function to write a string to the UART +@@ -602,4 +612,4 @@ void uart_write_string(const char* str) { + uart_write_char(*str++); + asm("fence"); + } +-} +\ No newline at end of file ++} diff --git a/patches/stage-2-init.sh b/patches/stage-2-init.sh new file mode 100755 index 0000000000000000000000000000000000000000..66afd2d6b89a21f1b2caf6651eafe70360b136a7 --- /dev/null +++ b/patches/stage-2-init.sh @@ -0,0 +1,157 @@ +#! @shell@ + +#set -x + +systemConfig=@systemConfig@ + +export HOME=/root PATH="@path@" + + +if [ "${IN_NIXOS_SYSTEMD_STAGE1:-}" != true ]; then + # Print a greeting. + echo + echo -e "\e[1;32m<<< @distroName@ Stage 2 >>>\e[0m" + echo + + # Process the kernel command line. + for o in $(/dev/hvc0 2>/dev/hvc0" + ;; + bench2) + export PATH="@bashInteractive@/bin:@systemConfig@/sw/bin:$PATH" + setsid bash -c "exec @bench2@ /dev/hvc0 2>/dev/hvc0" + ;; + esac + done + + # Normally, stage 1 mounts the root filesystem read/writable. + # However, in some environments, stage 2 is executed directly, and the + # root is read-only. So make it writable here. + if [ -z "$container" ]; then + mount -n -o remount,rw none / + fi +fi + + +# Likewise, stage 1 mounts /proc, /dev and /sys, so if we don't have a +# stage 1, we need to do that here. +if [ ! -e /proc/1 ]; then + specialMount() { + local device="$1" + local mountPoint="$2" + local options="$3" + local fsType="$4" + + # We must not overwrite this mount because it's bind-mounted + # from stage 1's /run + if [ "${IN_NIXOS_SYSTEMD_STAGE1:-}" = true ] && [ "${mountPoint}" = /run ]; then + return + fi + + install -m 0755 -d "$mountPoint" + mount -n -t "$fsType" -o "$options" "$device" "$mountPoint" + } + source @earlyMountScript@ +fi + + +if [ "${IN_NIXOS_SYSTEMD_STAGE1:-}" = true ] || [ ! -c /dev/kmsg ] ; then + echo "booting system configuration ${systemConfig}" +else + echo "booting system configuration $systemConfig" > /dev/kmsg +fi + + +# Make /nix/store a read-only bind mount to enforce immutability of +# the Nix store. Note that we can't use "chown root:nixbld" here +# because users/groups might not exist yet. +# Silence chown/chmod to fail gracefully on a readonly filesystem +# like squashfs. +chown -f 0:30000 /nix/store +chmod -f 1775 /nix/store +if [ -n "@readOnlyNixStore@" ]; then + if ! [[ "$(findmnt --noheadings --output OPTIONS /nix/store)" =~ ro(,|$) ]]; then + if [ -z "$container" ]; then + mount --bind /nix/store /nix/store + else + mount --rbind /nix/store /nix/store + fi + mount -o remount,ro,bind /nix/store + fi +fi + + +if [ "${IN_NIXOS_SYSTEMD_STAGE1:-}" != true ]; then + # Use /etc/resolv.conf supplied by systemd-nspawn, if applicable. + if [ -n "@useHostResolvConf@" ] && [ -e /etc/resolv.conf ]; then + resolvconf -m 1000 -a host &1 {logErrFd}>&2 +# if test -w /dev/kmsg; then +# exec > >(tee -i /proc/self/fd/"$logOutFd" | while read -r line; do +# if test -n "$line"; then +# echo "<7>stage-2-init: $line" > /dev/kmsg +# fi +# done) 2>&1 +# else +# mkdir -p /run/log +# exec > >(tee -i /run/log/stage-2-init.log) 2>&1 +# fi +fi + + +# Required by the activation script +install -m 0755 -d /etc +if [ ! -h "/etc/nixos" ]; then + install -m 0755 -d /etc/nixos +fi +install -m 01777 -d /tmp + + +# Run the script that performs all configuration activation that does +# not have to be done at boot time. +echo "running activation script..." +#strace -f $systemConfig/activate +$systemConfig/activate + +echo "activation script OK" + +# Record the boot configuration. +ln -sfn "$systemConfig" /run/booted-system + + +# Run any user-specified commands. +@shell@ @postBootCommands@ + + +# Ensure systemd doesn't try to populate /etc, by forcing its first-boot +# heuristic off. It doesn't matter what's in /etc/machine-id for this purpose, +# and systemd will immediately fill in the file when it starts, so just +# creating it is enough. This `: >>` pattern avoids forking and avoids changing +# the mtime if the file already exists. +: >> /etc/machine-id + + +# No need to restore the stdout/stderr streams we never redirected and +# especially no need to start systemd +if [ "${IN_NIXOS_SYSTEMD_STAGE1:-}" != true ]; then + # Reset the logging file descriptors. + #exec 1>&$logOutFd 2>&$logErrFd + #exec {logOutFd}>&- {logErrFd}>&- + + + # Start systemd in a clean environment. + echo "starting systemd..." + exec @systemdExecutable@ "$@" +fi diff --git a/patches/u-boot-debug.patch b/patches/u-boot-debug.patch new file mode 100644 index 0000000000000000000000000000000000000000..2b0f46f33fbb567e6ac6b1cf2c1fd34a5ed3e422 --- /dev/null +++ b/patches/u-boot-debug.patch @@ -0,0 +1,83 @@ +diff --git a/common/board_f.c b/common/board_f.c +index 1688e27071..216839febb 100644 +--- a/common/board_f.c ++++ b/common/board_f.c +@@ -978,6 +978,8 @@ static const init_fnc_t init_sequence_f[] = { + + void board_init_f(ulong boot_flags) + { ++ puts("board_init_f() called\n"); ++ + gd->flags = boot_flags; + gd->have_console = 0; + +@@ -990,6 +992,7 @@ void board_init_f(ulong boot_flags) + /* NOTREACHED - jump_to_copy() does not return */ + hang(); + #endif ++ puts("board_init_f() ends ok\n"); + } + + #if defined(CONFIG_X86) || defined(CONFIG_ARC) +diff --git a/common/board_r.c b/common/board_r.c +index d798c00a80..cb8119a603 100644 +--- a/common/board_r.c ++++ b/common/board_r.c +@@ -786,6 +786,8 @@ static init_fnc_t init_sequence_r[] = { + + void board_init_r(gd_t *new_gd, ulong dest_addr) + { ++ puts("board_init_r called\n"); ++ + /* + * The pre-relocation drivers may be using memory that has now gone + * away. Mark serial as unavailable - this will fall back to the debug +diff --git a/drivers/cpu/riscv_cpu.c b/drivers/cpu/riscv_cpu.c +index d6484d7f4b..64a507248d 100644 +--- a/drivers/cpu/riscv_cpu.c ++++ b/drivers/cpu/riscv_cpu.c +@@ -92,10 +92,13 @@ static int riscv_cpu_get_count(const struct udevice *dev) + + static int riscv_cpu_bind(struct udevice *dev) + { ++ puts("riscv_cpu_bind called()\n"); ++ + struct cpu_plat *plat = dev_get_parent_plat(dev); + struct driver *drv; + int ret; + ++ puts("looking for timebase-frequency\n"); + /* save the hart id */ + plat->cpu_id = dev_read_addr(dev); + /* first examine the property in current cpu node */ +@@ -105,6 +108,8 @@ static int riscv_cpu_bind(struct udevice *dev) + dev_read_u32(dev->parent, "timebase-frequency", + &plat->timebase_freq); + ++ printf("timebase-frequency=%lu\n", (unsigned long) plat->timebase_freq); ++ + /* + * Bind riscv-timer driver on boot hart. + * +@@ -125,6 +130,8 @@ static int riscv_cpu_bind(struct udevice *dev) + device_bind_with_driver_data(dev, drv, "riscv_timer", + plat->timebase_freq, ofnode_null(), + NULL); ++ } else { ++ printf("ignoring cpu_id=%d\n", plat->cpu_id); + } + + return 0; +diff --git a/lib/hang.c b/lib/hang.c +index 2735774f9a..84eff21ffc 100644 +--- a/lib/hang.c ++++ b/lib/hang.c +@@ -22,6 +22,8 @@ + */ + void hang(void) + { ++ puts("oh no, we are in hang()\n"); ++ + #if !defined(CONFIG_SPL_BUILD) || \ + (CONFIG_IS_ENABLED(LIBCOMMON_SUPPORT) && \ + CONFIG_IS_ENABLED(SERIAL)) diff --git a/patches/uboot-debug-ext-interrupts.patch b/patches/uboot-debug-ext-interrupts.patch new file mode 100644 index 0000000000000000000000000000000000000000..140ec25fd26cd6c9de029a53d696851eba4d61d3 --- /dev/null +++ b/patches/uboot-debug-ext-interrupts.patch @@ -0,0 +1,31 @@ +diff --git a/arch/riscv/cpu/start.S b/arch/riscv/cpu/start.S +index 6cecadfac5..f649844b23 100644 +--- a/arch/riscv/cpu/start.S ++++ b/arch/riscv/cpu/start.S +@@ -81,7 +81,7 @@ _start: + #if CONFIG_IS_ENABLED(RISCV_MMODE) + li t0, MIE_MSIE + #else +- li t0, SIE_SSIE ++ li t0, (SIE_SSIE + SIE_SEIE + SIE_STIE) + #endif + csrs MODE_PREFIX(ie), t0 + #endif +diff --git a/arch/riscv/lib/interrupts.c b/arch/riscv/lib/interrupts.c +index a26ccc721f..b8d2a71223 100644 +--- a/arch/riscv/lib/interrupts.c ++++ b/arch/riscv/lib/interrupts.c +@@ -193,10 +193,13 @@ ulong handle_trap(ulong cause, ulong epc, ulong tval, struct pt_regs *regs) + switch (irq) { + case IRQ_M_EXT: + case IRQ_S_EXT: ++ printf("u-boot: got ext interrupt %lu\n", irq); ++ show_regs(regs); + external_interrupt(0); /* handle external interrupt */ + break; + case IRQ_M_TIMER: + case IRQ_S_TIMER: ++ printf("u-boot: got timer interrupt %lu\n", irq); + timer_interrupt(0); /* handle timer interrupt */ + break; + default: diff --git a/patches/uboot-exception-extras.patch b/patches/uboot-exception-extras.patch new file mode 100644 index 0000000000000000000000000000000000000000..b2511bab7c0818ffcf8ba97e059ecdb005c8cf37 --- /dev/null +++ b/patches/uboot-exception-extras.patch @@ -0,0 +1,65 @@ +diff --git a/cmd/riscv/exception.c b/cmd/riscv/exception.c +index f38f454a0b..9de4effe47 100644 +--- a/cmd/riscv/exception.c ++++ b/cmd/riscv/exception.c +@@ -56,6 +56,40 @@ static int do_undefined(struct cmd_tbl *cmdtp, int flag, int argc, + return CMD_RET_FAILURE; + } + ++static int do_sregs(struct cmd_tbl *cmdtp, int flag, int argc, ++ char *const argv[]) ++{ ++ ulong stvec, sie, sip, sstatus; ++ ++ asm volatile ("fence"); ++ asm volatile ("csrr %0, stvec" : "=r"(stvec) : ); ++ asm volatile ("csrr %0, sie" : "=r"(sie) : ); ++ asm volatile ("csrr %0, sip" : "=r"(sip) : ); ++ asm volatile ("csrr %0, sstatus" : "=r"(sstatus) : ); ++ ++ printf("stvec : 0x%016lx\n", stvec); ++ printf("sie : 0x%016lx\n", sie); ++ printf("sip : 0x%016lx\n", sip); ++ printf("sstatus : 0x%016lx\n", sstatus); ++ ++ return CMD_RET_SUCCESS; ++} ++ ++static int do_enable(struct cmd_tbl *cmdtp, int flag, int argc, ++ char *const argv[]) ++{ ++ ulong which = SIE_SSIE | SIE_SEIE | SIE_STIE; ++ ++ asm volatile ( ++ "csrsi sstatus, 2\n" /* Enable SIE */ ++ "csrs sie, %0\n" /* Enable selected interrupts */ ++ : /* no output */ ++ : "r" (which) ++ ); ++ ++ return CMD_RET_SUCCESS; ++} ++ + static struct cmd_tbl cmd_sub[] = { + U_BOOT_CMD_MKENT(compressed, CONFIG_SYS_MAXARGS, 1, do_compressed, + "", ""), +@@ -67,6 +101,10 @@ static struct cmd_tbl cmd_sub[] = { + "", ""), + U_BOOT_CMD_MKENT(undefined, CONFIG_SYS_MAXARGS, 1, do_undefined, + "", ""), ++ U_BOOT_CMD_MKENT(sregs, CONFIG_SYS_MAXARGS, 1, do_sregs, ++ "", ""), ++ U_BOOT_CMD_MKENT(enable, CONFIG_SYS_MAXARGS, 1, do_enable, ++ "", ""), + }; + + static char exception_help_text[] = +@@ -77,6 +115,8 @@ static char exception_help_text[] = + " ialign16 - 16 bit aligned instruction\n" + " undefined - illegal instruction\n" + " unaligned - load address misaligned\n" ++ " sregs - print supervisor registers\n" ++ " enable - enable supervisor interrupts\n" + ; + + #include diff --git a/patches/update-users-groups.pl b/patches/update-users-groups.pl new file mode 100644 index 0000000000000000000000000000000000000000..3f859ca92b957eeba5ff202dffb7be144f03ef50 --- /dev/null +++ b/patches/update-users-groups.pl @@ -0,0 +1,379 @@ +use strict; +use warnings; +use File::Path qw(make_path); +use File::Slurp; +use Getopt::Long; +use JSON; +use Time::Piece; + +# Keep track of deleted uids and gids. +my $uidMapFile = "/var/lib/nixos/uid-map"; +my $uidMap = -e $uidMapFile ? decode_json(read_file($uidMapFile)) : {}; + +my $gidMapFile = "/var/lib/nixos/gid-map"; +my $gidMap = -e $gidMapFile ? decode_json(read_file($gidMapFile)) : {}; + +my $is_dry = ($ENV{'NIXOS_ACTION'} // "") eq "dry-activate"; +GetOptions("dry-activate" => \$is_dry); +make_path("/var/lib/nixos", { mode => 0755 }) unless $is_dry; + +sub updateFile { + my ($path, $contents, $perms) = @_; + return if $is_dry; + write_file($path, { atomic => 1, binmode => ':utf8', perms => $perms // 0644 }, $contents) or die; +} + +# Converts an ISO date to number of days since 1970-01-01 +sub dateToDays { + my ($date) = @_; + my $time = Time::Piece->strptime($date, "%Y-%m-%d"); + return $time->epoch / 60 / 60 / 24; +} + +sub nscdInvalidate { + system("echo", "nscd", "--invalidate", $_[0]) unless $is_dry; +} + +sub hashPassword { + my ($password) = @_; + my $salt = ""; + my @chars = ('.', '/', 0..9, 'A'..'Z', 'a'..'z'); + $salt .= $chars[rand 64] for (1..8); + return crypt($password, '$6$' . $salt . '$'); +} + +sub dry_print { + if ($is_dry) { + print STDERR ("$_[1] $_[2]\n") + } else { + print STDERR ("$_[0] $_[2]\n") + } +} + + +# Functions for allocating free GIDs/UIDs. FIXME: respect ID ranges in +# /etc/login.defs. +sub allocId { + my ($used, $prevUsed, $idMin, $idMax, $up, $getid) = @_; + my $id = $up ? $idMin : $idMax; + while ($id >= $idMin && $id <= $idMax) { + if (!$used->{$id} && !$prevUsed->{$id} && !defined &$getid($id)) { + $used->{$id} = 1; + return $id; + } + $used->{$id} = 1; + if ($up) { $id++; } else { $id--; } + } + die "$0: out of free UIDs or GIDs\n"; +} + +my (%gidsUsed, %uidsUsed, %gidsPrevUsed, %uidsPrevUsed); + +sub allocGid { + my ($name) = @_; + my $prevGid = $gidMap->{$name}; + if (defined $prevGid && !defined $gidsUsed{$prevGid}) { + dry_print("reviving", "would revive", "group '$name' with GID $prevGid"); + $gidsUsed{$prevGid} = 1; + return $prevGid; + } + return allocId(\%gidsUsed, \%gidsPrevUsed, 400, 999, 0, sub { my ($gid) = @_; getgrgid($gid) }); +} + +sub allocUid { + my ($name, $isSystemUser) = @_; + my ($min, $max, $up) = $isSystemUser ? (400, 999, 0) : (1000, 29999, 1); + my $prevUid = $uidMap->{$name}; + if (defined $prevUid && $prevUid >= $min && $prevUid <= $max && !defined $uidsUsed{$prevUid}) { + dry_print("reviving", "would revive", "user '$name' with UID $prevUid"); + $uidsUsed{$prevUid} = 1; + return $prevUid; + } + return allocId(\%uidsUsed, \%uidsPrevUsed, $min, $max, $up, sub { my ($uid) = @_; getpwuid($uid) }); +} + +# Read the declared users/groups +my $spec = decode_json(read_file($ARGV[0])); + +# Don't allocate UIDs/GIDs that are manually assigned. +foreach my $g (@{$spec->{groups}}) { + $gidsUsed{$g->{gid}} = 1 if defined $g->{gid}; +} + +foreach my $u (@{$spec->{users}}) { + $uidsUsed{$u->{uid}} = 1 if defined $u->{uid}; +} + +# Likewise for previously used but deleted UIDs/GIDs. +$uidsPrevUsed{$_} = 1 foreach values %{$uidMap}; +$gidsPrevUsed{$_} = 1 foreach values %{$gidMap}; + + +# Read the current /etc/group. +sub parseGroup { + chomp; + my @f = split(':', $_, -4); + my $gid = $f[2] eq "" ? undef : int($f[2]); + $gidsUsed{$gid} = 1 if defined $gid; + return ($f[0], { name => $f[0], password => $f[1], gid => $gid, members => $f[3] }); +} + +my %groupsCur = -f "/etc/group" ? map { parseGroup } read_file("/etc/group", { binmode => ":utf8" }) : (); + +# Read the current /etc/passwd. +sub parseUser { + chomp; + my @f = split(':', $_, -7); + my $uid = $f[2] eq "" ? undef : int($f[2]); + $uidsUsed{$uid} = 1 if defined $uid; + return ($f[0], { name => $f[0], fakePassword => $f[1], uid => $uid, + gid => $f[3], description => $f[4], home => $f[5], shell => $f[6] }); +} +my %usersCur = -f "/etc/passwd" ? map { parseUser } read_file("/etc/passwd", { binmode => ":utf8" }) : (); + +# Read the groups that were created declaratively (i.e. not by groups) +# in the past. These must be removed if they are no longer in the +# current spec. +my $declGroupsFile = "/var/lib/nixos/declarative-groups"; +my %declGroups; +$declGroups{$_} = 1 foreach split / /, -e $declGroupsFile ? read_file($declGroupsFile, { binmode => ":utf8" }) : ""; + +# Idem for the users. +my $declUsersFile = "/var/lib/nixos/declarative-users"; +my %declUsers; +$declUsers{$_} = 1 foreach split / /, -e $declUsersFile ? read_file($declUsersFile, { binmode => ":utf8" }) : ""; + + +# Generate a new /etc/group containing the declared groups. +my %groupsOut; +foreach my $g (@{$spec->{groups}}) { + my $name = $g->{name}; + my $existing = $groupsCur{$name}; + + my %members = map { ($_, 1) } @{$g->{members}}; + + if (defined $existing) { + $g->{gid} = $existing->{gid} if !defined $g->{gid}; + if ($g->{gid} != $existing->{gid}) { + dry_print("warning: not applying", "warning: would not apply", "GID change of group ‘$name’ ($existing->{gid} -> $g->{gid}) in /etc/group"); + $g->{gid} = $existing->{gid}; + } + $g->{password} = $existing->{password}; # do we want this? + if ($spec->{mutableUsers}) { + # Merge in non-declarative group members. + foreach my $uname (split /,/, $existing->{members} // "") { + $members{$uname} = 1 if !defined $declUsers{$uname}; + } + } + } else { + $g->{gid} = allocGid($name) if !defined $g->{gid}; + $g->{password} = "x"; + } + + $g->{members} = join ",", sort(keys(%members)); + $groupsOut{$name} = $g; + + $gidMap->{$name} = $g->{gid}; +} + +# Update the persistent list of declarative groups. +updateFile($declGroupsFile, join(" ", sort(keys %groupsOut))); + +# Merge in the existing /etc/group. +foreach my $name (keys %groupsCur) { + my $g = $groupsCur{$name}; + next if defined $groupsOut{$name}; + if (!$spec->{mutableUsers} || defined $declGroups{$name}) { + dry_print("removing group", "would remove group", "‘$name’"); + } else { + $groupsOut{$name} = $g; + } +} + + +# Rewrite /etc/group. FIXME: acquire lock. +my @lines = map { join(":", $_->{name}, $_->{password}, $_->{gid}, $_->{members}) . "\n" } + (sort { $a->{gid} <=> $b->{gid} } values(%groupsOut)); +updateFile($gidMapFile, to_json($gidMap, {canonical => 1})); +updateFile("/etc/group", \@lines); +nscdInvalidate("group"); + +# Generate a new /etc/passwd containing the declared users. +my %usersOut; +foreach my $u (@{$spec->{users}}) { + my $name = $u->{name}; + + # Resolve the gid of the user. + if ($u->{group} =~ /^[0-9]$/) { + $u->{gid} = $u->{group}; + } elsif (defined $groupsOut{$u->{group}}) { + $u->{gid} = $groupsOut{$u->{group}}->{gid} // die; + } else { + warn "warning: user ‘$name’ has unknown group ‘$u->{group}’\n"; + $u->{gid} = 65534; + } + + my $existing = $usersCur{$name}; + if (defined $existing) { + $u->{uid} = $existing->{uid} if !defined $u->{uid}; + if ($u->{uid} != $existing->{uid}) { + dry_print("warning: not applying", "warning: would not apply", "UID change of user ‘$name’ ($existing->{uid} -> $u->{uid}) in /etc/passwd"); + $u->{uid} = $existing->{uid}; + } + } else { + $u->{uid} = allocUid($name, $u->{isSystemUser}) if !defined $u->{uid}; + + if (!defined $u->{hashedPassword}) { + if (defined $u->{initialPassword}) { + $u->{hashedPassword} = hashPassword($u->{initialPassword}); + } elsif (defined $u->{initialHashedPassword}) { + $u->{hashedPassword} = $u->{initialHashedPassword}; + } + } + } + + # Ensure home directory incl. ownership and permissions. + if ($u->{createHome} and !$is_dry) { + make_path($u->{home}, { mode => oct($u->{homeMode}) }) if ! -e $u->{home}; + chown $u->{uid}, $u->{gid}, $u->{home}; + chmod oct($u->{homeMode}), $u->{home}; + } + + if (defined $u->{hashedPasswordFile}) { + if (-e $u->{hashedPasswordFile}) { + $u->{hashedPassword} = read_file($u->{hashedPasswordFile}); + chomp $u->{hashedPassword}; + } else { + warn "warning: password file ‘$u->{hashedPasswordFile}’ does not exist\n"; + } + } elsif (defined $u->{password}) { + $u->{hashedPassword} = hashPassword($u->{password}); + } + + if (!defined $u->{shell}) { + if (defined $existing) { + $u->{shell} = $existing->{shell}; + } else { + warn "warning: no declarative or previous shell for ‘$name’, setting shell to nologin\n"; + $u->{shell} = "/run/current-system/sw/bin/nologin"; + } + } + + $u->{fakePassword} = $existing->{fakePassword} // "x"; + $usersOut{$name} = $u; + + $uidMap->{$name} = $u->{uid}; +} + +# Update the persistent list of declarative users. +updateFile($declUsersFile, join(" ", sort(keys %usersOut))); + +# Merge in the existing /etc/passwd. +foreach my $name (keys %usersCur) { + my $u = $usersCur{$name}; + next if defined $usersOut{$name}; + if (!$spec->{mutableUsers} || defined $declUsers{$name}) { + dry_print("removing user", "would remove user", "‘$name’"); + } else { + $usersOut{$name} = $u; + } +} + +# Rewrite /etc/passwd. FIXME: acquire lock. +@lines = map { join(":", $_->{name}, $_->{fakePassword}, $_->{uid}, $_->{gid}, $_->{description}, $_->{home}, $_->{shell}) . "\n" } + (sort { $a->{uid} <=> $b->{uid} } (values %usersOut)); +updateFile($uidMapFile, to_json($uidMap, {canonical => 1})); +updateFile("/etc/passwd", \@lines); +nscdInvalidate("passwd"); + + +# Rewrite /etc/shadow to add new accounts or remove dead ones. +my @shadowNew; +my %shadowSeen; + +foreach my $line (-f "/etc/shadow" ? read_file("/etc/shadow", { binmode => ":utf8" }) : ()) { + chomp $line; + # struct name copied from `man 3 shadow` + my ($sp_namp, $sp_pwdp, $sp_lstch, $sp_min, $sp_max, $sp_warn, $sp_inact, $sp_expire, $sp_flag) = split(':', $line, -9); + my $u = $usersOut{$sp_namp};; + next if !defined $u; + $sp_pwdp = "!" if !$spec->{mutableUsers}; + $sp_pwdp = $u->{hashedPassword} if defined $u->{hashedPassword} && !$spec->{mutableUsers}; # FIXME + $sp_expire = dateToDays($u->{expires}) if defined $u->{expires}; + chomp $sp_pwdp; + push @shadowNew, join(":", $sp_namp, $sp_pwdp, $sp_lstch, $sp_min, $sp_max, $sp_warn, $sp_inact, $sp_expire, $sp_flag) . "\n"; + $shadowSeen{$sp_namp} = 1; +} + +foreach my $u (values %usersOut) { + next if defined $shadowSeen{$u->{name}}; + my $hashedPassword = "!"; + $hashedPassword = $u->{hashedPassword} if defined $u->{hashedPassword}; + my $expires = ""; + $expires = dateToDays($u->{expires}) if defined $u->{expires}; + # FIXME: set correct value for sp_lstchg. + push @shadowNew, join(":", $u->{name}, $hashedPassword, "1::::", $expires, "") . "\n"; +} + +updateFile("/etc/shadow", \@shadowNew, 0640); +{ + my $uid = getpwnam "root"; + my $gid = getgrnam "shadow"; + my $path = "/etc/shadow"; + (chown($uid, $gid, $path) || die "Failed to change ownership of $path: $!") unless $is_dry; +} + +# Rewrite /etc/subuid & /etc/subgid to include default container mappings + +my $subUidMapFile = "/var/lib/nixos/auto-subuid-map"; +my $subUidMap = -e $subUidMapFile ? decode_json(read_file($subUidMapFile)) : {}; + +my (%subUidsUsed, %subUidsPrevUsed); + +$subUidsPrevUsed{$_} = 1 foreach values %{$subUidMap}; + +sub allocSubUid { + my ($name, @rest) = @_; + + # TODO: No upper bounds? + my ($min, $max, $up) = (100000, 100000 * 100, 1); + my $prevId = $subUidMap->{$name}; + if (defined $prevId && !defined $subUidsUsed{$prevId}) { + $subUidsUsed{$prevId} = 1; + return $prevId; + } + + my $id = allocId(\%subUidsUsed, \%subUidsPrevUsed, $min, $max, $up, sub { my ($uid) = @_; getpwuid($uid) }); + my $offset = $id - 100000; + my $count = $offset * 65536; + my $subordinate = 100000 + $count; + return $subordinate; +} + +my @subGids; +my @subUids; +foreach my $u (values %usersOut) { + my $name = $u->{name}; + + foreach my $range (@{$u->{subUidRanges}}) { + my $value = join(":", ($name, $range->{startUid}, $range->{count})); + push @subUids, $value; + } + + foreach my $range (@{$u->{subGidRanges}}) { + my $value = join(":", ($name, $range->{startGid}, $range->{count})); + push @subGids, $value; + } + + if($u->{autoSubUidGidRange}) { + my $subordinate = allocSubUid($name); + $subUidMap->{$name} = $subordinate; + my $value = join(":", ($name, $subordinate, 65536)); + push @subUids, $value; + push @subGids, $value; + } +} + +updateFile("/etc/subuid", join("\n", @subUids) . "\n"); +updateFile("/etc/subgid", join("\n", @subGids) . "\n"); +updateFile($subUidMapFile, encode_json($subUidMap) . "\n"); diff --git a/pkgs/llvm-epi/clang.nix b/pkgs/llvm-epi/clang.nix new file mode 100644 index 0000000000000000000000000000000000000000..e3e85f1c5eec150ef2633121f9cc1b0bc3476df7 --- /dev/null +++ b/pkgs/llvm-epi/clang.nix @@ -0,0 +1,124 @@ +{ + stdenv +, llvmPackages_latest +, lib +, fetchFromGitHub +, cmake +, bash +, python3 +, perl +, which +, elfutils +, libffi +, zlib +, pkg-config +, enableDebug ? false +, gitUrl ? "https://repo.hca.bsc.es/gitlab/rferrer/llvm-epi.git" +, gitBranch ? "EPI-0.7" +, gitCommit ? "479518dc58dfceb23fc90667a5d6253e429f0fc2" +}: + +let + llvmPackages = llvmPackages_latest; + llvmStdenv = llvmPackages.stdenv; + # needed to set the rpath of libstdc++ for clang-tblgen + gcc = stdenv.cc; + + git = rec { + version = src.shortRev; + src = builtins.fetchGit { + url = gitUrl; + ref = gitBranch; + rev = gitCommit; + }; + }; + + source = git; + +in llvmStdenv.mkDerivation rec { + pname = "clang-epi"; + inherit (source) src version; + + enableParallelBuilding = true; + isClang = true; + + patches = if (gitBranch == "EPI-0.7") then [ + ./include-cstdint.patch + ] else [ + ]; + + # See https://reviews.llvm.org/D135402 + env.LDFLAGS = "-Wl,--undefined-version"; + + passthru = { + CC = "clang"; + CXX = "clang++"; + }; + + nativeBuildInputs = [ + zlib + gcc.cc.lib # Required for libstdc++.so.6 + ]; + + buildInputs = [ + which + bash + python3 + perl + cmake + llvmPackages.lld + elfutils + libffi + pkg-config + zlib + ]; + + # Error with -D_FORTIFY_SOURCE=2, see https://bugs.gentoo.org/636604: + # /build/source/compiler-rt/lib/tsan/dd/dd_interceptors.cpp:225:20: + # error: redefinition of 'realpath' + # Requires disabling the "fortify" set of flags, however, for performance we + # disable all: + hardeningDisable = [ "all" ]; + + cmakeBuildType = if enableDebug then "Debug" else "Release"; + + dontStrip = enableDebug; + + dontUseCmakeBuildDir = true; + + # Fix shebangs, /usr/bin/env doesn't exist + prePatch = '' + patchShebangs clang/utils/EPI/generate-epi-builtins-def.py + ''; + + # Fix the host triple, as it has changed in a newer config.guess: + # https://git.savannah.gnu.org/gitweb/?p=config.git;a=commitdiff;h=ca9bfb8cc75a2be1819d89c664a867785c96c9ba + preConfigure = '' + mkdir -p build + cd build + cmakeDir="../llvm" + cmakeFlagsArray=( + "-DLLVM_HOST_TRIPLE=${llvmStdenv.targetPlatform.config}" + "-DLLVM_DEFAULT_TARGET_TRIPLE=riscv64-unknown-linux-gnu" + "-DLLVM_TARGETS_TO_BUILD=RISCV" + "-DLLVM_BUILD_LLVM_DYLIB=ON" + "-DLLVM_LINK_LLVM_DYLIB=ON" + # Required to run clang-ast-dump and clang-tblgen during build + "-DCMAKE_BUILD_RPATH=$PWD/lib:${zlib}/lib:${gcc.cc.lib}/lib" + "-DLLVM_ENABLE_LLD=ON" + "-DCMAKE_CXX_FLAGS_DEBUG=-g -ggnu-pubnames" + "-DCMAKE_EXE_LINKER_FLAGS_DEBUG=-Wl,--gdb-index" + "-DLLVM_LIT_ARGS=-sv --xunit-xml-output=xunit.xml" + "-DLLVM_ENABLE_PROJECTS=clang;lld" + "-DLLVM_ENABLE_ASSERTIONS=ON" + "-DLLVM_INSTALL_TOOLCHAIN_ONLY=ON" + "-DCMAKE_INSTALL_BINDIR=bin" + "-DLLVM_ENABLE_ZLIB=FORCE_ON" + "-DLLVM_ENABLE_LIBXML2=OFF" + # Set the rpath to include external libraries (zlib) both on build and + # install + "-DCMAKE_INSTALL_RPATH_USE_LINK_PATH=ON" + "-DCMAKE_INSTALL_RPATH=${zlib}/lib:${gcc.cc.lib}/lib" + ) + ''; +} diff --git a/pkgs/llvm-epi/default.nix b/pkgs/llvm-epi/default.nix new file mode 100644 index 0000000000000000000000000000000000000000..9fe7c8edf6c03c51f9ea380709319953106f50d4 --- /dev/null +++ b/pkgs/llvm-epi/default.nix @@ -0,0 +1,46 @@ +{ + stdenv +, lib +, gcc +, clangEpiUnwrapped +, openmp ? null +, wrapCCWith +, llvmPackages_latest +, ompss2rt ? null +}: + +let + # We need to replace the lld linker from bintools with our linker just built, + # otherwise we run into incompatibility issues when mixing compiler and linker + # versions. + bintools-unwrapped = llvmPackages_latest.tools.bintools-unwrapped.override { + lld = clangEpiUnwrapped; + }; + bintools = llvmPackages_latest.tools.bintools.override { + bintools = bintools-unwrapped; + }; + targetConfig = stdenv.targetPlatform.config; + inherit gcc; + cc = clangEpiUnwrapped; +in wrapCCWith { + inherit cc bintools; + # extraPackages adds packages to depsTargetTargetPropagated + extraPackages = lib.optional (openmp != null) openmp; + extraBuildCommands = '' + echo "-target ${targetConfig}" >> $out/nix-support/cc-cflags + echo "-B${gcc.cc}/lib/gcc/${targetConfig}/${gcc.version}" >> $out/nix-support/cc-cflags + echo "-L${gcc.cc}/lib/gcc/${targetConfig}/${gcc.version}" >> $out/nix-support/cc-ldflags + echo "-L${gcc.cc.lib}/lib" >> $out/nix-support/cc-ldflags + + for dir in ${gcc.cc}/include/c++/*; do + echo "-isystem $dir" >> $out/nix-support/libcxx-cxxflags + done + for dir in ${gcc.cc}/include/c++/*/${targetConfig}; do + echo "-isystem $dir" >> $out/nix-support/libcxx-cxxflags + done + + echo "--gcc-toolchain=${gcc}" >> $out/nix-support/cc-cflags + + wrap clang++ $wrapper $ccPath/clang++ + ''; +} diff --git a/pkgs/llvm-epi/include-cstdint.patch b/pkgs/llvm-epi/include-cstdint.patch new file mode 100644 index 0000000000000000000000000000000000000000..aed75d7da95e43ddf46bac0113af03c05ca093d7 --- /dev/null +++ b/pkgs/llvm-epi/include-cstdint.patch @@ -0,0 +1,10 @@ +--- a/llvm/include/llvm/Support/Signals.h 2024-09-25 08:34:21.257642944 +0200 ++++ b/llvm/include/llvm/Support/Signals.h 2024-09-25 08:35:12.593556793 +0200 +@@ -15,6 +15,7 @@ + #define LLVM_SUPPORT_SIGNALS_H + + #include ++#include + + namespace llvm { + class StringRef; diff --git a/pkgs/rvb/Makefile b/pkgs/rvb/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..39a3524e630ff1eb02e189c0811e239335edb0b4 --- /dev/null +++ b/pkgs/rvb/Makefile @@ -0,0 +1,79 @@ +include Makefile.in + +HPC_BENCHMARKS_DIRS=\ + axpy \ + jacobi-2d \ + somier \ +# Require submodules: +# fft \ +# fftp \ +# Require vehave: +# lulesh \ +# Missing compare_array_double: +# gemm \ +# Broken, not found: +# spmv + +DESKTOP_BENCHMARKS_DIRS=\ + blackscholes \ + canneal \ + particlefilter \ + streamcluster \ + swaptions + #pathfinder + +MICRO_BENCHMARKS_DIRS= + #BuffCopyUnit \ + #BuffCopyStrided \ + #BuffCopyIndexed \ + #FpuMicroKernel \ + #InstrNopBalance \ + #MemArithBalance \ + #LatencyVrgather + +.PHONY: default all clean $(HPC_BENCHMARKS_DIRS) $(MICRO_BENCHMARKS_DIRS) + +all: base + +default: + @cd common; make; cd .. + @$(foreach dir,$(HPC_BENCHMARKS_DIRS),${MAKE} -C hpc_benchmarks/$(dir);) + @$(foreach dir,$(DESKTOP_BENCHMARKS_DIRS),${MAKE} -C desktop_benchmarks/$(dir);) + @$(foreach dir,$(MICRO_BENCHMARKS_DIRS),${MAKE} -C micro_benchmarks/$(dir);) + +all-types: + @cd common; make all; cd .. + @$(foreach dir,$(HPC_BENCHMARKS_DIRS),${MAKE} -C hpc_benchmarks/$(dir) all ;) + @$(foreach dir,$(DESKTOP_BENCHMARKS_DIRS),${MAKE} -C desktop_benchmarks/$(dir) all ;) + @$(foreach dir,$(MICRO_BENCHMARKS_DIRS),${MAKE} -C micro_benchmarks/$(dir) all ;) + +base: + @cd common; make all; cd .. + @$(foreach dir,$(HPC_BENCHMARKS_DIRS),${MAKE} -C hpc_benchmarks/$(dir) base ;) + @$(foreach dir,$(DESKTOP_BENCHMARKS_DIRS),${MAKE} -C desktop_benchmarks/$(dir) base ;) + @$(foreach dir,$(MICRO_BENCHMARKS_DIRS),${MAKE} -C micro_benchmarks/$(dir) base ;) + +install: + @$(foreach dir,$(HPC_BENCHMARKS_DIRS),${MAKE} -C hpc_benchmarks/$(dir) install ;) + @$(foreach dir,$(DESKTOP_BENCHMARKS_DIRS),${MAKE} -C desktop_benchmarks/$(dir) install ;) + @$(foreach dir,$(MICRO_BENCHMARKS_DIRS),${MAKE} -C micro_benchmarks/$(dir) install ;) + +fftp: + ${MAKE} -C third_party fftw + ${MAKE} -C hpc_benchmarks/fftp all + ${MAKE} -C hpc_benchmarks/fftp/test all + +spmv-ellpack: + rm -rf hpc_benchmarks/spmv-ellpack/spmv/build + mkdir -p hpc_benchmarks/spmv-ellpack/spmv/build + cd hpc_benchmarks/spmv-ellpack/spmv/build;\ + ../configure riscv;\ + INDEX64=1 EPI_EXT=07 PATH=${EPI_LLVM_HOME}/bin:${PATH} make + +clean: + @cd common; make clean; cd .. + @$(foreach dir,$(HPC_BENCHMARKS_DIRS),${MAKE} -C hpc_benchmarks/$(dir) clean ;) + @$(foreach dir,$(DESKTOP_BENCHMARKS_DIRS),${MAKE} -C desktop_benchmarks/$(dir) clean ;) + @$(foreach dir,$(MICRO_BENCHMARKS_DIRS),${MAKE} -C micro_benchmarks/$(dir) clean ;) + @rm -rf hpc_benchmarks/spmv-ellpack/spmv/build + diff --git a/pkgs/rvb/Makefile.in b/pkgs/rvb/Makefile.in new file mode 100644 index 0000000000000000000000000000000000000000..8c8b4dc8cf8fc24bd376b29232cfe78725f9cfe0 --- /dev/null +++ b/pkgs/rvb/Makefile.in @@ -0,0 +1,83 @@ +#Compile all benchmarks with individual settings defined in their Makefiles + +_default-target: default + +# RVB_ROOT defined as argument + +# TODO: RVB_COMMON_DIR should substitute COMMON_DIR +COMMON_DIR=$(RVB_ROOT)/common +RVB_COMMON_DIR=$(RVB_ROOT)/common + +#CC=clang +#CXX=clang++ + +# Needs EPI clang +#MEPI=-mepi +MEPI= + +#VREPORT_FLAGS=-Rpass=loop-vectorize -Rpass-missed=loop-vectorize -Rpass-analysis=loop-vectorize +#Available CFLAGS conditional compilation: +# -DUSE_MALLOC_HP, redefines the use of malloc and free + +#Makefile: General compiler flags: CFLAGS, CFLAGS_, CFLAGS_, ... +#CFLAGS=-B ${LLVM_BIN} -Wall -Wextra -march=rv64g -O2 -I${RVB_COMMON_DIR} +CFLAGS=-B ${LLVM_BIN} -Wall -Wextra -O2 -I${RVB_COMMON_DIR} +LDFLAGS= + +#Only scalar instructions +CFLAGS_SCALAR=${CFLAGS} -DRVB_USE_SCALAR +LDFLAGS_SCALAR= + +#NOVEC=-fno-vectorize +NOVEC= + +#Vector instructions using intrinsics +CFLAGS_VECTORIAL=${CFLAGS} ${NOVEC} $(MEPI) +LDFLAGS_VECTORIAL= + +# TODO: RVV should substitute VECTORIAL (?) +#Vector instructions using intrinsics +CFLAGS_RVV=${CFLAGS} -DRVB_USE_RVV ${NOVEC} $(MEPI) +LDFLAGS_RVV=${LDFLAGS} + +#Vector instructions only when code is annotate +CFLAGS_EXPLICIT_AUTOVECTORIZATION=${CFLAGS} -fopenmp-simd ${NOVEC} $(MEPI) ${VREPORT_FLAGS} +LDFLAGS_EXPLICIT_AUTOVECTORIZATION= + +# TODO: OMP substitutes EXPLICIT_AUTOVECTORIZATION +#Vector instructions only when code is annotate +CFLAGS_OMP=${CFLAGS} -DRVB_USE_OMP -fopenmp-simd $(MEPI) ${VREPORT_FLAGS} +LDFLAGS_OMP=${LDFLAGS} + +#Vector instructions when compiler decides +CFLAGS_AUTOVECTORIZATION=${CFLAGS} -fopenmp-simd $(MEPI) ${VREPORT_FLAGS} +LDFLAGS_AUTOVECTORIZATION= + +# TODO: AUTOVECT shoud substitue AUTOVECTORIZATION (?) +#Vector instructions when compiler decides +CFLAGS_AUTOVECT=${CFLAGS} -DRVB_USE_AUTOVECT -fopenmp-simd $(MEPI) ${VREPORT_FLAGS} +LDFLAGS_AUTOVECT=${LDFLAGS} + +# CBLAS library compile and link flags +CFLAGS_CBLAS=${CFLAGS} -DRVB_USE_CBLAS -I${CBLAS_INC} -I${LLVM_INC} +LDFLAGS_CBLAS=${LDFLAGS} -L${CBLAS_LIB} -lblis -Wl,-rpath,${CBLAS_LIB} -fopenmp + +# BARE-METAL compile and link flags +# You may consider to disable OpenPiton Stream: -DDISABLE_OPSTREAM +CFLAGS_BMETAL=${CFLAGS} -DRVB_USE_BMETAL -B ${RVB_BMETAL_DIR} \ + --target=riscv64-unknown-elf \ + -I../ \ + -I/apps/riscv/llvm/EPI-0.7/cross/development/riscv64-unknown-linux-gnu/sysroot/usr/include \ + -I/usr/include/riscv64-linux-gnu \ + -I/usr/include \ + -I${RVB_BMETAL_DIR}/env \ + -I${RVB_BMETAL_DIR} \ + -DPREALLOCATE=1 -mcmodel=medany \ + -static -std=gnu99 \ + -ffast-math \ + -fno-common \ + -fno-builtin-printf \ + ${RVB_BMETAL_DIR}/syscalls.c \ + ${RVB_BMETAL_DIR}/crt.S \ + -static -nostdlib -T \ + ${RVB_BMETAL_DIR}/test.ld diff --git a/pkgs/rvb/default.nix b/pkgs/rvb/default.nix new file mode 100644 index 0000000000000000000000000000000000000000..bf9362897d789ed570a1412c7045be4483f240ee --- /dev/null +++ b/pkgs/rvb/default.nix @@ -0,0 +1,35 @@ +{ + stdenv +, blis +, gitBranch ? "nix-fixes" +, gitURL ? "git@gitlab-internal.bsc.es:rarias/risc-v-benchmarks.git" +, gitCommit ? "da202d6f818421b72e06c39b5417ad2f8f6ca23c" +}: + +stdenv.mkDerivation rec { + pname = "rvb"; + version = "${src.shortRev}"; + + src = builtins.fetchGit { + url = gitURL; + ref = gitBranch; + rev = gitCommit; + submodules = true; + }; + + buildInputs = [ blis ]; + configurePhase = '' + export RVB_ROOT=$(readlink -f .) + export CBLAS_HOME=${blis} + export CBLAS_INC=${blis}/include/blis + export CBLAS_LIB=${blis}/lib + rm Makefile.in + ln -s ${./Makefile.in} Makefile.in + rm Makefile + ln -s ${./Makefile} Makefile + ''; + enableParallelBuilding = false; + hardeningDisable = [ "all" ]; + installFlags = [ "DESTDIR=$(out)" ]; + dontStrip = true; +} diff --git a/pkgs/spec-cpu/Makefile b/pkgs/spec-cpu/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..5d6519ec416f2745dc6e34e1a50c1208ac842a38 --- /dev/null +++ b/pkgs/spec-cpu/Makefile @@ -0,0 +1,66 @@ +include Makefile.in + +HPC_BENCHMARKS_DIRS=\ + axpy \ + jacobi-2d \ + somier \ +# Require submodules: +# fft \ +# fftp \ +# Require vehave: +# lulesh \ +# Missing compare_array_double: +# gemm \ +# Broken, not found: +# spmv + +DESKTOP_BENCHMARKS_DIRS=\ + blackscholes \ + canneal \ + particlefilter \ + pathfinder \ + streamcluster \ + swaptions + +MICRO_BENCHMARKS_DIRS=\ + BuffCopyUnit \ + BuffCopyStrided \ + BuffCopyIndexed \ + FpuMicroKernel \ + InstrNopBalance \ + MemArithBalance \ + LatencyVrgather + +.PHONY: default all clean $(HPC_BENCHMARKS_DIRS) $(MICRO_BENCHMARKS_DIRS) + +default: + @cd common; make; cd .. + @$(foreach dir,$(HPC_BENCHMARKS_DIRS),${MAKE} -C hpc_benchmarks/$(dir);) + @$(foreach dir,$(DESKTOP_BENCHMARKS_DIRS),${MAKE} -C desktop_benchmarks/$(dir);) + @$(foreach dir,$(MICRO_BENCHMARKS_DIRS),${MAKE} -C micro_benchmarks/$(dir);) + +all: + @cd common; make all; cd .. + @$(foreach dir,$(HPC_BENCHMARKS_DIRS),${MAKE} -C hpc_benchmarks/$(dir) all ;) + @$(foreach dir,$(DESKTOP_BENCHMARKS_DIRS),${MAKE} -C desktop_benchmarks/$(dir) all ;) + @$(foreach dir,$(MICRO_BENCHMARKS_DIRS),${MAKE} -C micro_benchmarks/$(dir) all ;) + +fftp: + ${MAKE} -C third_party fftw + ${MAKE} -C hpc_benchmarks/fftp all + ${MAKE} -C hpc_benchmarks/fftp/test all + +spmv-ellpack: + rm -rf hpc_benchmarks/spmv-ellpack/spmv/build + mkdir -p hpc_benchmarks/spmv-ellpack/spmv/build + cd hpc_benchmarks/spmv-ellpack/spmv/build;\ + ../configure riscv;\ + INDEX64=1 EPI_EXT=07 PATH=${EPI_LLVM_HOME}/bin:${PATH} make + +clean: + @cd common; make clean; cd .. + @$(foreach dir,$(HPC_BENCHMARKS_DIRS),${MAKE} -C hpc_benchmarks/$(dir) clean ;) + @$(foreach dir,$(DESKTOP_BENCHMARKS_DIRS),${MAKE} -C desktop_benchmarks/$(dir) clean ;) + @$(foreach dir,$(MICRO_BENCHMARKS_DIRS),${MAKE} -C micro_benchmarks/$(dir) clean ;) + @rm -rf hpc_benchmarks/spmv-ellpack/spmv/build + diff --git a/pkgs/spec-cpu/Makefile.in b/pkgs/spec-cpu/Makefile.in new file mode 100644 index 0000000000000000000000000000000000000000..b828f6e144ebbc84574d01810dbdd60470bb4b6e --- /dev/null +++ b/pkgs/spec-cpu/Makefile.in @@ -0,0 +1,80 @@ +#Compile all benchmarks with individual settings defined in their Makefiles + +_default-target: default + +# RVB_ROOT defined as argument + +# TODO: RVB_COMMON_DIR should substitute COMMON_DIR +COMMON_DIR=$(RVB_ROOT)/common +RVB_COMMON_DIR=$(RVB_ROOT)/common + +CC=clang +CXX=clang++ + +# Needs EPI clang +#MEPI=-mepi +MEPI= + +VREPORT_FLAGS=-Rpass=loop-vectorize -Rpass-missed=loop-vectorize -Rpass-analysis=loop-vectorize +#Available CFLAGS conditional compilation: +# -DUSE_MALLOC_HP, redefines the use of malloc and free + +#Makefile: General compiler flags: CFLAGS, CFLAGS_, CFLAGS_, ... +#CFLAGS=-B ${LLVM_BIN} -Wall -Wextra -march=rv64g -O2 -I${RVB_COMMON_DIR} +CFLAGS=-B ${LLVM_BIN} -Wall -Wextra -O2 -I${RVB_COMMON_DIR} +LDFLAGS= + +#Only scalar instructions +CFLAGS_SCALAR=${CFLAGS} -DRVB_USE_SCALAR +LDFLAGS_SCALAR= + +#Vector instructions using intrinsics +CFLAGS_VECTORIAL=${CFLAGS} -fno-vectorize $(MEPI) +LDFLAGS_VECTORIAL= + +# TODO: RVV should substitute VECTORIAL (?) +#Vector instructions using intrinsics +CFLAGS_RVV=${CFLAGS} -DRVB_USE_RVV -fno-vectorize $(MEPI) +LDFLAGS_RVV=${LDFLAGS} + +#Vector instructions only when code is annotate +CFLAGS_EXPLICIT_AUTOVECTORIZATION=${CFLAGS} -fopenmp-simd -fno-vectorize $(MEPI) ${VREPORT_FLAGS} +LDFLAGS_EXPLICIT_AUTOVECTORIZATION= + +# TODO: OMP substitutes EXPLICIT_AUTOVECTORIZATION +#Vector instructions only when code is annotate +CFLAGS_OMP=${CFLAGS} -DRVB_USE_OMP -fopenmp-simd -fno-vectorize $(MEPI) ${VREPORT_FLAGS} +LDFLAGS_OMP=${LDFLAGS} + +#Vector instructions when compiler decides +CFLAGS_AUTOVECTORIZATION=${CFLAGS} -fopenmp-simd $(MEPI) ${VREPORT_FLAGS} +LDFLAGS_AUTOVECTORIZATION= + +# TODO: AUTOVECT shoud substitue AUTOVECTORIZATION (?) +#Vector instructions when compiler decides +CFLAGS_AUTOVECT=${CFLAGS} -DRVB_USE_AUTOVECT -fopenmp-simd $(MEPI) ${VREPORT_FLAGS} +LDFLAGS_AUTOVECT=${LDFLAGS} + +# CBLAS library compile and link flags +CFLAGS_CBLAS=${CFLAGS} -DRVB_USE_CBLAS -I${CBLAS_INC} -I${LLVM_INC} +LDFLAGS_CBLAS=${LDFLAGS} -L${CBLAS_LIB} -lblis -Wl,-rpath,${CBLAS_LIB} -fopenmp + +# BARE-METAL compile and link flags +# You may consider to disable OpenPiton Stream: -DDISABLE_OPSTREAM +CFLAGS_BMETAL=${CFLAGS} -DRVB_USE_BMETAL -B ${RVB_BMETAL_DIR} \ + --target=riscv64-unknown-elf \ + -I../ \ + -I/apps/riscv/llvm/EPI-0.7/cross/development/riscv64-unknown-linux-gnu/sysroot/usr/include \ + -I/usr/include/riscv64-linux-gnu \ + -I/usr/include \ + -I${RVB_BMETAL_DIR}/env \ + -I${RVB_BMETAL_DIR} \ + -DPREALLOCATE=1 -mcmodel=medany \ + -static -std=gnu99 \ + -ffast-math \ + -fno-common \ + -fno-builtin-printf \ + ${RVB_BMETAL_DIR}/syscalls.c \ + ${RVB_BMETAL_DIR}/crt.S \ + -static -nostdlib -T \ + ${RVB_BMETAL_DIR}/test.ld diff --git a/pkgs/spec-cpu/default.nix b/pkgs/spec-cpu/default.nix new file mode 100644 index 0000000000000000000000000000000000000000..56cd2eb61feb8dbef252e01272048d1d0e207558 --- /dev/null +++ b/pkgs/spec-cpu/default.nix @@ -0,0 +1,61 @@ +{ + stdenv +, spec-cpu-tools +, libarchive +, xz +, gnutar +, gfortran +, coreutils +, requireFile +, autoPatchelfHook +, libxcrypt-legacy +, lib +, benchSize ? "test" +}: + +stdenv.mkDerivation rec { + pname = "spec-cpu"; + version = "1.1.7"; + src = null; + unpackPhase = "true"; + # We need a working specxz binary + config = ./gcc-linux-x86.cfg; + buildPhase = '' + cp ${config} config.cfg + chmod +w config.cfg + export SPEC_NOCHECK=1 + mkdir out + runcpu \ + --verbose=80 \ + --tune=base \ + --size=${benchSize} \ + --output_root=$PWD/out \ + --config=$PWD/config.cfg \ + --define build_ncpus=$NIX_BUILD_CORES \ + --action=runsetup \ + all + ''; + # 519.lbm_r + # 502.gcc_r + # all + + # We only need the run directories + installPhase = '' + # Remove build/ and exe/ directories, we only need run/ + rm -rf out/benchspec/CPU/*/build/ + rm -rf out/benchspec/CPU/*/exe/ + + mkdir -p $out + cp -a out/benchspec $out + ''; + + #buildInputs = [ ]; + nativeBuildInputs = [ spec-cpu-tools gfortran ]; + enableParallelBuilding = false; + hardeningDisable = [ "all" ]; + dontStrip = true; + + meta.broken = (stdenv.buildPlatform.config != "x86_64-unknown-linux-gnu") || + (stdenv.hostPlatform.config != "riscv64-unknown-linux-gnu") || + (stdenv.targetPlatform.config != "riscv64-unknown-linux-gnu"); +} diff --git a/pkgs/spec-cpu/gcc-linux-x86.cfg b/pkgs/spec-cpu/gcc-linux-x86.cfg new file mode 100644 index 0000000000000000000000000000000000000000..7a021825933eaf863b1036935ab8e94081c11035 --- /dev/null +++ b/pkgs/spec-cpu/gcc-linux-x86.cfg @@ -0,0 +1,391 @@ +#------------------------------------------------------------------------------ +# SPEC CPU(R) 2017 config for gcc/g++/gfortran on Linux x86 +#------------------------------------------------------------------------------ +# +# Usage: (1) Copy this to a new name +# cd $SPEC/config +# cp Example-x.cfg myname.cfg +# (2) Change items that are marked 'EDIT' (search for it) +# +# SPEC tested this config file with: +# Compiler version(s): Various. See note "Older GCC" below. +# Operating system(s): Oracle Linux Server 6, 7, 8 / +# Red Hat Enterprise Linux Server 6, 7, 8 +# SUSE Linux Enterprise Server 15 +# Ubuntu 19.04 +# Hardware: Xeon, EPYC +# +# If your system differs, this config file might not work. +# You might find a better config file at https://www.spec.org/cpu2017/results +# +# Note: Older GCC +# +# Please use the newest GCC that you can. The default version packaged with +# your operating system may be very old; look for alternate packages with a +# newer version. +# +# If you have no choice and must use an old version, here is what to expect: +# +# - "peak" tuning: Several benchmarks will fail at peak tuning if you use +# compilers older than GCC 7. +# In that case, please use base only. +# See: https://www.spec.org/cpu2017/Docs/overview.html#Q16 +# https://www.spec.org/cpu2017/Docs/config.html#tune +# Peak tuning is expected to work for all or nearly all +# benchmarks as of GCC 7 or later. +# Exception: +# - See topic "628.pop2_s basepeak", below. +# +# - "base" tuning: This config file is expected to work for base tuning with +# GCC 4.8.5 or later +# Exception: +# - Compilers vintage about 4.9 may need to turn off the +# tree vectorizer, by adding to the base OPTIMIZE flags: +# -fno-tree-loop-vectorize +# +# Unexpected errors? Try reducing the optimization level, or try removing: +# -march=native +# +# Compiler issues: Contact your compiler vendor, not SPEC. +# For SPEC help: https://www.spec.org/cpu2017/Docs/techsupport.html +#------------------------------------------------------------------------------ + + +#--------- Label -------------------------------------------------------------- +# Arbitrary string to tag binaries (no spaces allowed) +# Two Suggestions: # (1) EDIT this label as you try new ideas. +%ifndef %{label} +% define label "nix" # (2) Use a label meaningful to *you*. +%endif + + +#--------- Preprocessor ------------------------------------------------------- +%ifndef %{bits} # EDIT to control 32 or 64 bit compilation. Or, +% define bits 64 # you can set it on the command line using: +%endif # 'runcpu --define bits=nn' + +%ifndef %{build_ncpus} # EDIT to adjust number of simultaneous compiles. +% define build_ncpus 8 # Or, you can set it on the command line: +%endif # 'runcpu --define build_ncpus=nn' + +# Don't change this part. +%if %{bits} == 64 +% define model -m64 +%elif %{bits} == 32 +% define model -m32 +%else +% error Please define number of bits - see instructions in config file +%endif +%if %{label} =~ m/ / +% error Your label "%{label}" contains spaces. Please try underscores instead. +%endif +%if %{label} !~ m/^[a-zA-Z0-9._-]+$/ +% error Illegal character in label "%{label}". Please use only alphanumerics, underscore, hyphen, and period. +%endif + + +#--------- Global Settings ---------------------------------------------------- +# For info, see: +# https://www.spec.org/cpu2017/Docs/config.html#fieldname +# Example: https://www.spec.org/cpu2017/Docs/config.html#tune + +command_add_redirect = 1 +flagsurl = $[top]/config/flags/gcc.xml +ignore_errors = 1 +iterations = 1 +label = %{label}-m%{bits} +line_width = 1020 +log_line_width = 1020 +makeflags = --jobs=%{build_ncpus} +mean_anyway = 1 +output_format = txt,html,cfg,pdf,csv +preenv = 1 +reportable = 0 +tune = base,peak # EDIT if needed: set to "base" for old GCC. + # See note "Older GCC" above. + + +#--------- How Many CPUs? ----------------------------------------------------- +# Both SPECrate and SPECspeed can test multiple chips / cores / hw threads +# - For SPECrate, you set the number of copies. +# - For SPECspeed, you set the number of threads. +# See: https://www.spec.org/cpu2017/Docs/system-requirements.html#MultipleCPUs +# +# q. How many should I set? +# a. Unknown, you will have to try it and see! +# +# To get you started, some suggestions: +# +# copies - This config file defaults to testing only 1 copy. You might +# try changing it to match the number of cores on your system, +# or perhaps the number of virtual CPUs as reported by: +# grep -c processor /proc/cpuinfo +# Be sure you have enough memory. See: +# https://www.spec.org/cpu2017/Docs/system-requirements.html#memory +# +# threads - This config file sets a starting point. You could try raising +# it. A higher thread count is much more likely to be useful for +# fpspeed than for intspeed. +# +intrate,fprate: + copies = 1 # EDIT to change number of copies (see above) +intspeed,fpspeed: + threads = 4 # EDIT to change number of OpenMP threads (see above) + + +#------- Compilers ------------------------------------------------------------ +default: +# EDIT: The parent directory for your compiler. +# Do not include the trailing /bin/ +# Do not include a trailing slash +# Examples: +# 1 On a Red Hat system, you said: +# 'yum install devtoolset-9' +# Use: % define gcc_dir "/opt/rh/devtoolset-9/root/usr" +# +# 2 You built GCC in: /disk1/mybuild/gcc-10.1.0/bin/gcc +# Use: % define gcc_dir "/disk1/mybuild/gcc-10.1.0" +# +# 3 You want: /usr/bin/gcc +# Use: % define gcc_dir "/usr" +# WARNING: See section "Older GCC" above. +# +#%ifndef %{gcc_dir} +#% define gcc_dir "@GCC_DIR@" # EDIT (see above) +#%endif + +# EDIT: If your compiler version is 10 or greater, you must enable the next +# line to avoid compile errors for several FP benchmarks +# +%define GCCge10 # EDIT: remove the '#' from column 1 if using GCC 10 or later + +# EDIT if needed: the preENV line adds library directories to the runtime +# path. You can adjust it, or add lines for other environment variables. +# See: https://www.spec.org/cpu2017/Docs/config.html#preenv +# and: https://gcc.gnu.org/onlinedocs/gcc/Environment-Variables.html + #preENV_LD_LIBRARY_PATH = %{gcc_dir}/lib64/:%{gcc_dir}/lib/:/lib64 + #preENV_LD_LIBRARY_PATH = %{gcc_dir}/lib64/:%{gcc_dir}/lib/:/lib64:%{ENV_LD_LIBRARY_PATH} + #SPECLANG = %{gcc_dir}/bin/ + CC = %{ENV_CC} -std=c99 + CXX = %{ENV_CXX} -std=c++03 + FC = %{ENV_FC} + # How to say "Show me your version, please" + CC_VERSION_OPTION = --version + CXX_VERSION_OPTION = --version + FC_VERSION_OPTION = --version + +default: +%if %{bits} == 64 + sw_base_ptrsize = 64-bit + sw_peak_ptrsize = 64-bit +%else + sw_base_ptrsize = 32-bit + sw_peak_ptrsize = 32-bit +%endif + + +#--------- Portability -------------------------------------------------------- +default: # data model applies to all benchmarks +%if %{bits} == 32 + # Strongly recommended because at run-time, operations using modern file + # systems may fail spectacularly and frequently (or, worse, quietly and + # randomly) if a program does not accommodate 64-bit metadata. + EXTRA_PORTABILITY = -D_FILE_OFFSET_BITS=64 +%else + EXTRA_PORTABILITY = -DSPEC_LP64 +%endif + +# Benchmark-specific portability (ordered by last 2 digits of bmark number) + +500.perlbench_r,600.perlbench_s: #lang='C' +%if %{bits} == 32 +% define suffix IA32 +%else +% define suffix X64 +%endif + PORTABILITY = -DSPEC_LINUX_%{suffix} + +521.wrf_r,621.wrf_s: #lang='F,C' + CPORTABILITY = -DSPEC_CASE_FLAG + FPORTABILITY = -fconvert=big-endian + +523.xalancbmk_r,623.xalancbmk_s: #lang='CXX' + PORTABILITY = -DSPEC_LINUX + +526.blender_r: #lang='CXX,C' + PORTABILITY = -funsigned-char -DSPEC_LINUX + +527.cam4_r,627.cam4_s: #lang='F,C' + PORTABILITY = -DSPEC_CASE_FLAG + +628.pop2_s: #lang='F,C' + CPORTABILITY = -DSPEC_CASE_FLAG + FPORTABILITY = -fconvert=big-endian + +#---------------------------------------------------------------------- +# GCC workarounds that do not count as PORTABILITY +#---------------------------------------------------------------------- +# The workarounds in this section would not qualify under the SPEC CPU +# PORTABILITY rule. +# - In peak, they can be set as needed for individual benchmarks. +# - In base, individual settings are not allowed; set for whole suite. +# See: +# https://www.spec.org/cpu2017/Docs/runrules.html#portability +# https://www.spec.org/cpu2017/Docs/runrules.html#BaseFlags +# +# Integer workarounds - peak +# + 500.perlbench_r,600.perlbench_s=peak: # https://www.spec.org/cpu2017/Docs/benchmarks/500.perlbench_r.html + EXTRA_CFLAGS = -fno-strict-aliasing -fno-unsafe-math-optimizations -fno-finite-math-only + 502.gcc_r,602.gcc_s=peak: # https://www.spec.org/cpu2017/Docs/benchmarks/502.gcc_r.html + EXTRA_CFLAGS = -fno-strict-aliasing -fgnu89-inline + 505.mcf_r,605.mcf_s=peak: # https://www.spec.org/cpu2017/Docs/benchmarks/505.mcf_r.html + EXTRA_CFLAGS = -fno-strict-aliasing + 525.x264_r,625.x264_s=peak: # https://www.spec.org/cpu2017/Docs/benchmarks/525.x264_r.html + EXTRA_CFLAGS = -fcommon +# +# Integer workarounds - base - combine the above - https://www.spec.org/cpu2017/Docs/runrules.html#BaseFlags +# + intrate,intspeed=base: + EXTRA_CFLAGS = -fno-strict-aliasing -fno-unsafe-math-optimizations -fno-finite-math-only -fgnu89-inline -fcommon +# +# Floating Point workarounds - peak +# + 511.povray_r=peak: # https://www.spec.org/cpu2017/Docs/benchmarks/511.povray_r.html + EXTRA_CFLAGS = -fno-strict-aliasing + 521.wrf_r,621.wrf_s=peak: # https://www.spec.org/cpu2017/Docs/benchmarks/521.wrf_r.html +% ifdef %{GCCge10} # workaround for GCC v10 (and presumably later) + EXTRA_FFLAGS = -fallow-argument-mismatch +% endif + 527.cam4_r,627.cam4_s=peak: # https://www.spec.org/cpu2017/Docs/benchmarks/527.cam4_r.html + EXTRA_CFLAGS = -fno-strict-aliasing +% ifdef %{GCCge10} # workaround for GCC v10 (and presumably later) + EXTRA_FFLAGS = -fallow-argument-mismatch +% endif + # See also topic "628.pop2_s basepeak" below + 628.pop2_s=peak: # https://www.spec.org/cpu2017/Docs/benchmarks/628.pop2_s.html +% ifdef %{GCCge10} # workaround for GCC v10 (and presumably later) + EXTRA_FFLAGS = -fallow-argument-mismatch +% endif +# +# FP workarounds - base - combine the above - https://www.spec.org/cpu2017/Docs/runrules.html#BaseFlags +# + fprate,fpspeed=base: + EXTRA_CFLAGS = -fno-strict-aliasing +% ifdef %{GCCge10} # workaround for GCC v10 (and presumably later) + EXTRA_FFLAGS = -fallow-argument-mismatch +% endif + + +#-------- Tuning Flags common to Base and Peak -------------------------------- +# +# Speed (OpenMP and Autopar allowed) +# +%if %{bits} == 32 + intspeed,fpspeed: + # + # Many of the speed benchmarks (6nn.benchmark_s) do not fit in 32 bits + # If you wish to run SPECint2017_speed or SPECfp2017_speed, please use + # + # runcpu --define bits=64 + # + fail_build = 1 +%else + intspeed,fpspeed: + EXTRA_OPTIMIZE = -fopenmp -DSPEC_OPENMP + fpspeed: + # + # 627.cam4 needs a big stack; the preENV will apply it to all + # benchmarks in the set, as required by the rules. + # + preENV_OMP_STACKSIZE = 120M +%endif + +#-------- Base Tuning Flags ---------------------------------------------- +# EDIT if needed -- If you run into errors, you may need to adjust the +# optimization - for example you may need to remove +# the -march=native. See topic "Older GCC" above. +# +default=base: # flags for all base + OPTIMIZE = -g -O3 -march=native + + +#-------- Peak Tuning Flags ---------------------------------------------- +default=peak: + OPTIMIZE = -g -Ofast -march=native -flto + PASS1_FLAGS = -fprofile-generate + PASS2_FLAGS = -fprofile-use + +# 628.pop2_s basepeak: Depending on the interplay of several optimizations, +# 628.pop2_s might not validate with peak tuning. Use the base +# version instead. See: +# https:// www.spec.org/cpu2017/Docs/benchmarks/628.pop2_s.html +628.pop2_s=peak: + basepeak = yes + + +#------------------------------------------------------------------------------ +# Tester and System Descriptions - EDIT all sections below this point +#------------------------------------------------------------------------------ +# For info about any field, see +# https://www.spec.org/cpu2017/Docs/config.html#fieldname +# Example: https://www.spec.org/cpu2017/Docs/config.html#hw_memory +#------------------------------------------------------------------------------- + +#--------- EDIT to match your version ----------------------------------------- +default: + sw_compiler001 = C/C++/Fortran: Version 10.1.0 of GCC, the + sw_compiler002 = GNU Compiler Collection + +#--------- EDIT info about you ------------------------------------------------ +# To understand the difference between hw_vendor/sponsor/tester, see: +# https://www.spec.org/cpu2017/Docs/config.html#test_sponsor +intrate,intspeed,fprate,fpspeed: # Important: keep this line + hw_vendor = My Corporation + tester = My Corporation + test_sponsor = My Corporation + license_num = nnn (Your SPEC license number) +# prepared_by = # Ima Pseudonym # Whatever you like: is never output + + +#--------- EDIT system availability dates ------------------------------------- +intrate,intspeed,fprate,fpspeed: # Important: keep this line + # Example # Brief info about field + hw_avail = # Nov-2099 # Date of LAST hardware component to ship + sw_avail = # Nov-2099 # Date of LAST software component to ship + fw_bios = # Version Mumble released May-2099 # Firmware information + +#--------- EDIT system information -------------------------------------------- +intrate,intspeed,fprate,fpspeed: # Important: keep this line + # Example # Brief info about field +# hw_cpu_name = # Intel Xeon E9-9999 v9 # chip name + hw_cpu_nominal_mhz = # 9999 # Nominal chip frequency, in MHz + hw_cpu_max_mhz = # 9999 # Max chip frequency, in MHz +# hw_disk = # 9 x 9 TB SATA III 9999 RPM # Size, type, other perf-relevant info + hw_model = # TurboBlaster 3000 # system model name +# hw_nchips = # 99 # number chips enabled + hw_ncores = # 9999 # number cores enabled + hw_ncpuorder = # 1-9 chips # Ordering options + hw_nthreadspercore = # 9 # number threads enabled per core + hw_other = # TurboNUMA Router 10 Gb # Other perf-relevant hw, or "None" + +# hw_memory001 = # 999 GB (99 x 9 GB 2Rx4 PC4-2133P-R, # The 'PCn-etc' is from the JEDEC +# hw_memory002 = # running at 1600 MHz) # label on the DIMM. + + hw_pcache = # 99 KB I + 99 KB D on chip per core # Primary cache size, type, location + hw_scache = # 99 KB I+D on chip per 9 cores # Second cache or "None" + hw_tcache = # 9 MB I+D on chip per chip # Third cache or "None" + hw_ocache = # 9 GB I+D off chip per system board # Other cache or "None" + +# sw_file = # ext99 # File system +# sw_os001 = # Linux Sailboat # Operating system +# sw_os002 = # Distribution 7.2 SP1 # and version + sw_other = # TurboHeap Library V8.1 # Other perf-relevant sw, or "None" +# sw_state = # Run level 99 # Software state. + + power_management = # briefly summarize power settings + +# Note: Some commented-out fields above are automatically set to preliminary +# values by sysinfo +# https://www.spec.org/cpu2017/Docs/config.html#sysinfo +# Uncomment lines for which you already know a better answer than sysinfo diff --git a/pkgs/spec-cpu/launcher.sh b/pkgs/spec-cpu/launcher.sh new file mode 100644 index 0000000000000000000000000000000000000000..c43add8e657adf6df4cc662a2dc57f996f15dde7 --- /dev/null +++ b/pkgs/spec-cpu/launcher.sh @@ -0,0 +1,70 @@ +#!/bin/bash + +set -e + +if [ -z "$SPEC" ]; then + SPEC=$(spec-cpu-mini) +fi + +if [ -z "$SPEC" ]; then + echo "cannot find spec, set SPEC variable" + exit 1 +fi + +where=$TMPDIR +if [ -z "$where" ]; then + if [ -d /tmp ]; then + where=/tmp + else + where=$PWD + fi +fi + +cwd=$(readlink -f $where) +# Place the outcome here +wd="$cwd/spec" +mkdir -p "$wd" + +benchniter=1 +benchsize=test +benchtune=base + +echo "--- Placing output in $wd ---" + +printf 'benchmark\tsize\ttune\titer\ttime_s\n' > "$wd/time.csv" + +for srcbench in $SPEC/benchspec/CPU/*; do + name=$(basename $srcbench) + bench="$wd/$name" + bench_out="$wd/$name.csv" + rm -rf "$bench" + cp -r "$srcbench" "$bench" + chmod +w -R "$bench" + + rundir="$bench/run/run_${benchtune}_${benchsize}_nix-m64.0000" + sed -i '/^-C/d' "$rundir/speccmds.cmd" + echo "--- Running $name for $benchniter iterations ---" + ( + #set -x + cd $rundir + specinvoke -i $benchniter -E speccmds.cmd > /dev/null + #set +x + ) + # Print time + awk '/^run [0-9]* elapsed time/{printf \ + "%s\t%s\t%s\t%s\t%s\n", \ + "'$name'","'$benchsize'","'$benchtune'",$2,$7}' \ + "$rundir/speccmds.out" > "$bench_out" + + cat "$bench_out" + + # Accumulate in main CSV + cat "$bench_out" >> "$wd/time.csv" + + # Erase intermediate files as they occupy RAM. + rm -rf "$bench" +done + +echo "--- RESULTS in $wd/time.csv ---" +cat "$wd/time.csv" +echo "---------------------------------------" diff --git a/pkgs/spec-cpu/mini.nix b/pkgs/spec-cpu/mini.nix new file mode 100644 index 0000000000000000000000000000000000000000..008a3d00efed1227655f74d70d6b8fb079e9f588 --- /dev/null +++ b/pkgs/spec-cpu/mini.nix @@ -0,0 +1,60 @@ +{ + stdenv +, spec-cpu +, spec-cpu-tools +, specinvoke +, speclaunch +}: + +stdenv.mkDerivation rec { + pname = "spec-cpu-mini"; + version = spec-cpu.version; + src = null; + unpackPhase = "true"; + + # Select only a subset of the benchmarks: + # https://www.spec.org/cpu2017/Docs/#benchdocs + benchList = [ + # -- SPECspeed 2017 Integer -- + "600.perlbench_s" + "602.gcc_s" + "605.mcf_s" + "620.omnetpp_s" + #"623.xalancbmk_s" # Big + #"625.x264_s" # Big + #"631.deepsjeng_s" # Requires 7 GiB of RAM + "641.leela_s" + "648.exchange2_s" + # "657.xz_s" # Runs out of memory with 700 MiB of RAM + ]; + + dontConfigure = true; + buildPhase = '' + pwd + mkdir -p benchspec/CPU + for bench in $benchList; do + cp -r ${spec-cpu}/benchspec/CPU/$bench benchspec/CPU/ + done + + # Make writable + chmod -R +w benchspec + + # Remove environment + find benchspec -name '*.cmd' | xargs sed -i '/^-E/d' + # Remove compare script as it refers to spec-cpu-tools + find benchspec -name 'compare.cmd' -delete + ''; + installPhase = '' + pwd + mkdir -p $out + cp -r benchspec/ $out + mkdir -p $out/bin + echo -e "#!$SHELL\necho $out" > $out/bin/spec-cpu-mini + chmod +x $out/bin/spec-cpu-mini + cp ${specinvoke}/bin/specinvoke $out/bin + cp ${speclaunch}/bin/speclaunch $out/bin + ''; + enableParallelBuilding = false; + hardeningDisable = [ "all" ]; + dontStrip = true; +} diff --git a/pkgs/spec-cpu/specinvoke-execve.patch b/pkgs/spec-cpu/specinvoke-execve.patch new file mode 100644 index 0000000000000000000000000000000000000000..a237b8abb599b4d1f18cbbad96434917ac1ae317 --- /dev/null +++ b/pkgs/spec-cpu/specinvoke-execve.patch @@ -0,0 +1,15 @@ +--- a/unix.c 2024-10-08 12:30:18.785111397 +0200 ++++ b/unix.c 2024-10-08 12:32:09.580923368 +0200 +@@ -165,7 +165,11 @@ pid_t invoke(copy_info_t *ui, command_in + /* We could redirect them here. This might be useful for VMS? */ + *(si->command_ptr) = cmd; + si->invoke_args[0] = si->shell; +- execve(si->shell, si->invoke_args, env); ++ if (execve(si->shell, si->invoke_args, env) != 0) { ++ fprintf (stderr, "Can't execute command: %s(%d)\n", ++ STRERROR(errno), errno); ++ specinvoke_exit (1, si); ++ } + } else { /* Parent */ + ui->pid = pid; + fprintf (si->outfp, diff --git a/pkgs/spec-cpu/specinvoke.nix b/pkgs/spec-cpu/specinvoke.nix new file mode 100644 index 0000000000000000000000000000000000000000..e61525ac0c2b505d1f7468fa243b8367cde981c7 --- /dev/null +++ b/pkgs/spec-cpu/specinvoke.nix @@ -0,0 +1,33 @@ +{ + stdenv +, libarchive +, spec-cpu-tools +, runCommandNoCC +}: + +let + version = spec-cpu-tools.version; + tar = runCommandNoCC "specinvoke-${version}.tar" { + src = spec-cpu-tools.src; + nativeBuildInputs = [ libarchive ]; + } '' + mkdir iso + bsdtar -C iso -xf $src + cp iso/install_archives/tools-src.tar $out + ''; +in stdenv.mkDerivation { + pname = "specinvoke"; + version = version; + src = tar; + sourceRoot = "tools/src/specinvoke/"; + patches = [ + ./specinvoke-execve.patch + ]; + # Almost no bugs + preInstall = '' + mkdir -p $out/bin + ''; + enableParallelBuilding = false; + hardeningDisable = [ "all" ]; + dontStrip = true; +} diff --git a/pkgs/spec-cpu/speclaunch.nix b/pkgs/spec-cpu/speclaunch.nix new file mode 100644 index 0000000000000000000000000000000000000000..cbbb95b5b82644b3b53b14de0f2abfdeee257ae1 --- /dev/null +++ b/pkgs/spec-cpu/speclaunch.nix @@ -0,0 +1,21 @@ +{ + stdenv +, bash +}: + +stdenv.mkDerivation { + name = "speclaunch"; + src = ./launcher.sh; + dontUnpack = true; + dontConfigure = true; + dontBuild = true; + installPhase = '' + mkdir -p $out/bin + cp $src $out/bin/speclaunch + chmod +x $out/bin/speclaunch + ''; + buildInputs = [ bash ]; + enableParallelBuilding = false; + hardeningDisable = [ "all" ]; + dontStrip = true; +} diff --git a/pkgs/spec-cpu/tools.nix b/pkgs/spec-cpu/tools.nix new file mode 100644 index 0000000000000000000000000000000000000000..e7cfd5fa569ce5687a92ca6e2c15ad8ba7575d6d --- /dev/null +++ b/pkgs/spec-cpu/tools.nix @@ -0,0 +1,106 @@ +{ + stdenv +, libarchive +, xz +, gnutar +, gfortran +, coreutils +, requireFile +, autoPatchelfHook +, libxcrypt-legacy +, glibc +, lib +}: + +stdenv.mkDerivation rec { + pname = "spec-cpu-tools"; + version = "1.1.7"; + + src = requireFile { + name = "cpu2017-1.1.7.iso"; + sha256 = "02630819h64dyy57wkj33fhwwqgbw6mqc5awh1zm48pkvvl0l600"; + message = '' + Missing SPEC CPU 2017 1.1.7. + + Add it to the store with: + + $ nix-prefetch-url file:/path/to/cpu2017-1.1.7.iso + /nix/store/mk4hr8xwd62akp7iw5khq638ssba8qz0-cpu2017-1.1.7.iso + + Notice that the name must match exactly "cpu2017-1.1.7.iso". + ''; + }; + + unpackPhase = '' + set -x + mkdir iso + bsdtar -C iso -xf $src + chmod +w -R iso + + #for f in iso/install_archives/benchball/*; do + # bsdtar -xf $f + #done + #mkdir src + #bsdtar -C src -xf iso/install_archives/benchball/cpu2017-1.1.7.base.tar.xz + #bsdtar -C src -xf iso/install_archives/benchball/519.lbm_r-1.000503.tar.xz + + sourceRoot="$PWD/iso" + set +x + ''; + + # We need a working specxz binary + configurePhase = '' + patchShebangs install.sh + + set -x + + # Replace "spec*" tools by symlinks to working binaries + pushd tools/bin/linux-x86_64 + ln -fs ${xz}/bin/xz specxz + ln -fs ${gnutar}/bin/tar spectar + patchelf \ + --set-interpreter "$(cat $NIX_CC/nix-support/dynamic-linker)" \ + specsha512sum + ls -l spec* + ./specxz -h + ./spectar --help + ldd ./specsha512sum + ./specsha512sum --help + popd + + # sha512sum requires -e, not provided by coreutils + + export SPEC="$sourceRoot" + + # Don't run the tests + sed -i 's/^.*shrc will.*$/exit 0/g' install.sh + + set +x + ''; + + installPhase = '' + bash -x install.sh -f -d $out -u linux-x86_64 + ''; + + preFixup = '' + # Fix temporary directory creation + sed -i '/^sub get_tmp_directory/a\ return tempdir(CLEANUP => 1);' \ + $out/bin/common/util_common.pl + + sed -i '/my $dir = jp($top, $config->resultdir, $subdir);/c\ my $dir = ::get_tmp_directory($config, 1);' \ + $out/bin/harness/log.pl + ''; + + # Missing libdb-4.7.so + autoPatchelfIgnoreMissingDeps = true; + + nativeBuildInputs = [ libxcrypt-legacy libarchive autoPatchelfHook ]; + #buildInputs = [ libxcrypt-legacy ]; + + enableParallelBuilding = false; + hardeningDisable = [ "all" ]; + dontStrip = true; + meta.broken = (stdenv.buildPlatform.config != "x86_64-unknown-linux-gnu") || + (stdenv.hostPlatform.config != "x86_64-unknown-linux-gnu") || + (stdenv.targetPlatform.config != "riscv64-unknown-linux-gnu"); +} diff --git a/pkgs/stream/default.nix b/pkgs/stream/default.nix new file mode 100644 index 0000000000000000000000000000000000000000..84671c6c1c4e166dbbffcb2534acdbf265ba81d6 --- /dev/null +++ b/pkgs/stream/default.nix @@ -0,0 +1,33 @@ +{ + stdenv +, fetchFromGitHub +}: + +stdenv.mkDerivation rec { + pname = "stream"; + version = "4dbce1d0"; + + src = fetchFromGitHub { + owner = "jeffhammond"; + repo = "STREAM"; + rev = "4dbce1d0fdb7410e8f21b48f3381bc0a1341967f"; + sha256 = "sha256-sBwdPeaMyI/wH1Nq0yQtb/kvi5913e0azXaulOJIG3A="; + }; + + # CFLAGS='-O2 -fopenmp -DSTREAM_ARRAY_SIZE=40000000' + buildPhase = '' + set -x + make stream_c.exe CC=$CC FC=$FC CFLAGS=-O2 + set +x + ''; + + #nativeBuildInputs = [ gfortran ]; + dontStrip = true; + dontConfigure = true; + enableParallelBuilding = false; + hardeningDisable = [ "all" ]; + installPhase = '' + mkdir -p $out/bin + cp -a stream_c.exe $out/bin/stream + ''; +} diff --git a/tools/.gitignore b/tools/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..c79a82f85eed0c4e1e7128316c6af6501ca28f5b --- /dev/null +++ b/tools/.gitignore @@ -0,0 +1,4 @@ +plictool +csrtool +memtool +*.bin diff --git a/tools/Makefile b/tools/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..fd445cbf2d525a8d8e7c611b87afa1b58c514eab --- /dev/null +++ b/tools/Makefile @@ -0,0 +1,20 @@ +CFLAGS=-Wall -static +PREFIX?=/usr/local +bin=plictool csrtool memtool unalign + +all: $(bin) + +clean: + rm -f $(bin) + +install: + mkdir -p $(PREFIX)/bin + cp -a $(bin) $(PREFIX)/bin + +plictool: plictool.c + +csrtool: csrtool.c + +memtool: memtool.c + +unalign: unalign.c diff --git a/tools/csrtool.c b/tools/csrtool.c new file mode 100644 index 0000000000000000000000000000000000000000..1ae6cee9022ffc5b821b86ff111f0bcc128295ef --- /dev/null +++ b/tools/csrtool.c @@ -0,0 +1,34 @@ +#include +#include +#include + +int main(int argc, char *argv[]) +{ + /* Print */ + if (argc > 1) { + // Wait for all memory operations to finish + __asm__ volatile ("fence"); + + if (strcmp(argv[1], "mem-in-order") == 0) { + __asm__ volatile ("fence"); + __asm__ volatile ("csrwi 0x801, 2"); + } else if (strcmp(argv[1], "all-in-order") == 0) { + __asm__ volatile ("fence"); + __asm__ volatile ("csrwi 0x801, 7"); + } else if (strcmp(argv[1], "all-out-of-order") == 0) { + __asm__ volatile ("fence"); + __asm__ volatile ("csrwi 0x801, 0"); + } else { + fprintf(stderr, "unknown '%s', use: mem-in-order, all-in-order or all-out-of-order\n", argv[1]); + exit(1); + } + } + + // Wait for all memory operations to finish + __asm__ volatile ("fence"); + unsigned result; + asm("csrr %0, 0x801" : "=r"(result) : ); + printf("CSR 0x801 = %xu\n", result); + + return 0; +} diff --git a/tools/memtool.c b/tools/memtool.c new file mode 100644 index 0000000000000000000000000000000000000000..f961c711f3af7c808d68953f603b15a9e97c61e5 --- /dev/null +++ b/tools/memtool.c @@ -0,0 +1,214 @@ +/* Copyright (c) 2024 Barcelona Supercomputing Center (BSC) + * SPDX-License-Identifier: MIT + * Author: Rodrigo Arias Mallo */ + +/* This is just a small tool to exercise the memory which attempts to + * stress the virtual memory, in a crude attempt to reproduce the hangs + * that we were observing while booting NixOS. */ + +/* Changelog: + * v0.0.1 (2024-07-10): Start version with "chain" and "fill" tests. + */ + +#include +#include +#include +#include + +#define MAX_SIZE (1024L * 1024L) + +struct block { + struct block *next; + size_t size; + uint32_t data[]; +}; + +struct chain { + struct block *front; + struct block *tail; + long maxsize; + long nbytes; + long nblocks; +}; + +static int +allocate(struct chain *chain) +{ + /* Constraint the number of elements based on the maxsize */ + long maxn = chain->maxsize / sizeof(uint32_t); + long n = (long) rand() % maxn; + + size_t size = sizeof(struct block) + n * sizeof(uint32_t); + + printf("allocating...\n"); + + struct block *b = malloc(size); + + /* No mem */ + if (b == NULL) + return -1; + + b->size = size; + b->next = NULL; + + /* Populate the block with some data */ + printf("filling...\n"); + for (long i = 0; i < n; i++) + b->data[i] = rand(); + + /* Add it to the chain */ + if (chain->tail) + chain->tail->next = b; + + chain->tail = b; + + /* And to the front if it is the first */ + if (!chain->front) + chain->front = b; + + chain->nblocks++; + chain->nbytes += size; + + return 0; +} + +static int +deallocate(struct chain *chain) +{ + /* May run out of blocks */ + if (!chain->front) + return -1; + + struct block *b = chain->front; + + chain->front = b->next; + + /* Last block */ + if (chain->tail == b) + chain->tail = NULL; + + chain->nblocks--; + chain->nbytes -= b->size; + + printf("deallocating...\n"); + free(b); + + return 0; +} + +static void +do_chain(int argc, char *argv[]) +{ + struct chain chain = {0}; + + /* Default 1 MiB */ + chain.maxsize = 1024L * 1024L; + + if (argc > 0) + chain.maxsize = atol(argv[0]); + + printf("mode chain: maxsize=%ldK\n", chain.maxsize / 1024); + + srand(123); + + for (long iter = 0; ; iter++) { + int p = rand() % 100; + int is_alloc = (p > 10); + int ret = 0; + char c; + if (is_alloc) { + if (allocate(&chain) == 0) + c = 'A'; + else + c = '-'; + } else { + if (deallocate(&chain) == 0) + c = 'D'; + else + c = '-'; + } + + printf("iter=%ld nblocks=%ld allocated=%ldK (%c)\n", + iter, chain.nblocks, chain.nbytes / 1024, + c); + } +} + +static void +do_fill(int argc, char *argv[]) +{ + /* Default: 256 MiB */ + long nbytes = 256L * 1024L * 1024L; + + if (argc > 0) + nbytes = atol(argv[0]); + + long n = nbytes / sizeof(int); + + printf("mode fill: nbytes=%ldM, n=%ld\n", + nbytes / (1024L * 1024L), n); + + int *buf = malloc(nbytes); + + if (!buf) { + perror("malloc failed"); + exit(1); + } + + for (long i = 0; i < n; i++) { + buf[i] = i; + if ((i % (1024L * 1024L)) == 0) + printf("written=%ldK, addr=%p OK\n", + i * sizeof(int) / 1024L, + &buf[i]); + } + + free(buf); + + printf("fill test OK\n"); +} + +static void +usage(void) +{ + printf( +"Usage: memtool [...]\n" +"\n" +"Available commands:\n" +" chain []\n" +" Creates a chain of blocks of random size, each up to maxsize\n" +" or 1MiB if not given. Blocks are freed with 10% probability\n" +" starting from the oldest.\n" +"\n" +" fill []\n" +" Allocates a vector of the given size (or 256 MiB if not given)\n" +" and initializes it with a increasing value per element.\n" +"\n"); + + exit(1); +} + +int main(int argc, char *argv[]) +{ + printf("memtool v0.0.1 - Rodrigo Arias Mallo \n"); + + if (argc < 2) + usage(); + + /* Skip program name */ + argc--; argv++; + + const char *mode = argv[0]; + + /* Skip mode */ + argc--; argv++; + + if (strcmp(mode, "chain") == 0) + do_chain(argc, argv); + else if (strcmp(mode, "fill") == 0) + do_fill(argc, argv); + else + usage(); + + return 0; +} diff --git a/tools/plictool.c b/tools/plictool.c new file mode 100644 index 0000000000000000000000000000000000000000..e1caa1b734c217a77cc3495b6354cad84781fa2e --- /dev/null +++ b/tools/plictool.c @@ -0,0 +1,388 @@ +/* Copyright (c) 2024 Barcelona Supercomputing Center (BSC) + * SPDX-License-Identifier: MIT + * Author: Rodrigo Arias Mallo */ + +/* Small utility to manage the PLIC. */ + +/* Changelog: + * v0.0.1 (2024-09-03): Initial version. + * v0.0.2 (2024-09-04): Print contexts in another line and masked information. + * v0.0.3 (2024-09-04): Make output format more clear and add manual. + * v0.0.4 (2024-09-30): Implement support for claiming an interrupt. + * v0.0.5 (2024-10-02): Support other read/write operations. + */ + +#define VERSION "v0.0.5" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int operation; +const char *plic_address_str = "0x40800000"; +long ncontexts = 2L; +long maxsources = 1024L; +long nsources = 1024L; + +long context = -1; +long source = -1; +long value = -1; +bool value_set = NULL; + +struct ctx { + uint32_t threshold; +}; + +struct source_ctx { + bool enabled; + bool masked; + uint32_t threshold; + const char *state; +}; + +struct source { + bool pending; + bool show; + uint32_t priority; + struct source_ctx *ctx; + long ncontexts; +}; + +static uint32_t +read_reg(void *base, size_t offset) +{ + volatile uint32_t *p = base + offset; + return *p; +} + +static void +write_reg(void *base, size_t offset, uint32_t value) +{ + volatile uint32_t *p = base + offset; + *p = value; +} + +uint32_t +claim_get(void *base, uint32_t ctx) +{ + return read_reg(base, 0x200004L + ctx * 0x1000); +} + +static void +claim_set(void *base, uint32_t ctx, uint32_t value) +{ + write_reg(base, 0x200004L + ctx * 0x1000, value); +} + +uint32_t +thre_get(void *base, uint32_t ctx) +{ + return read_reg(base, 0x200000L + (ctx * 0x1000L)); +} + +static void +thre_set(void *base, uint32_t ctx, uint32_t value) +{ + write_reg(base, 0x200000L + (ctx * 0x1000L), value); +} + +uint32_t +prio_get(void *base, uint32_t s) +{ + return read_reg(base, s * 4L); +} + +static void +prio_set(void *base, uint32_t s, uint32_t value) +{ + write_reg(base, s * 4L, value); +} + +uint32_t +pending_get(void *base, uint32_t s) +{ + uint32_t offset = 0x1000L + (s / 32L) * 4L; + uint32_t pending = read_reg(base, offset); + long shift = s % 32L; + + return (pending >> shift) & 1; +} + +static void +pending_set(void *base, uint32_t s, uint32_t value) +{ + uint32_t offset = 0x1000L + (s / 32L) * 4L; + uint32_t pending = read_reg(base, offset); + long shift = s % 32L; + + if (value) + pending |= (1L << shift); + else + pending &= ~(1L << shift); + + write_reg(base, offset, pending); +} + +uint32_t +enable_get(void *base, uint32_t c, uint32_t s) +{ + size_t off_en = 0x2000L + 0x80L * c + (s / 32L) * 4L; + uint32_t enabled_reg = read_reg(base, off_en); + long shift = s % 32L; + return (enabled_reg >> shift) & 1; +} + +static void +enable_set(void *base, uint32_t c, uint32_t s, uint32_t value) +{ + size_t off_en = 0x2000L + 0x80L * c + (s / 32L) * 4L; + uint32_t enabled_reg = read_reg(base, off_en); + long shift = s % 32L; + + if (value) + enabled_reg |= (1L << shift); + else + enabled_reg &= ~(1L << shift); + + write_reg(base, off_en, enabled_reg); +} + +static void +source_init(struct source *src, long ncontexts) +{ + memset(src, 0, sizeof(struct source)); + src->ctx = calloc(ncontexts, sizeof(struct source_ctx)); + src->ncontexts = ncontexts; + if (src->ctx == NULL) { + perror("calloc failed"); + exit(1); + } +} + +static void +source_reset(struct source *src) +{ + src->pending = false; + src->show = false; + src->priority = 0; + + memset(src->ctx, 0, src->ncontexts * sizeof(struct source_ctx)); +} + + +static void +source_free(struct source *src) +{ + free(src->ctx); +} + +static void +source_read(struct source *src, void *base, long s) +{ + uint32_t pending_reg = read_reg(base, 0x1000L + (s / 32L) * 4L); + long shift = s % 32L; + src->pending = (pending_reg >> shift) & 1; + src->priority = read_reg(base, 0x0000L + (s * 4L)); + + bool ctx_show = 0; + for (long c = 0; c < src->ncontexts; c++) { + struct source_ctx *ctx = &src->ctx[c]; + + size_t off_en = 0x2000L + 0x80L * c + (s / 32L) * 4L; + uint32_t enabled_reg = read_reg(base, off_en); + ctx->enabled = (enabled_reg >> shift) & 1; + ctx->threshold = read_reg(base, 0x200000L + (c * 0x1000L)); + ctx->masked = src->priority <= ctx->threshold; + ctx_show = ctx_show || ctx->enabled; + + if (!ctx->enabled) + ctx->state = "-"; + else if (ctx->masked) + ctx->state = "masked"; + else + ctx->state = "firing"; + } + + /* Show the source if it has some bit to non-zero */ + src->show = src->pending || src->priority || ctx_show; +} + +static void +list_sources(void *base) +{ + printf("Source\tPend\tPrio"); + for (long i = 0; i < ncontexts; i++) { + uint32_t threshold = read_reg(base, 0x200000L + (i * 0x1000L)); + printf("\tC%ld(%u)", i, threshold); + } + printf("\n"); + + struct source s; + source_init(&s, ncontexts); + for (long i = 0; i < nsources; i++) { + source_reset(&s); + source_read(&s, base, i); + + if (!s.show) + continue; + + printf("%ld\t%s\t%u", i, s.pending ? "yes" : "-", s.priority); + + for (long j = 0; j < ncontexts; j++) + printf("\t%s", s.ctx[j].state); + + printf("\n"); + } + + source_free(&s); +} + +static void usage(void) +{ + printf("plictool "VERSION" -- Rodrigo Arias Mallo \n"); + + fprintf(stderr, +"Usage:\n" +" plictool [-a addr] [-L] [-n nsrc] [-x nctx] # List (default)\n" +" plictool [-a addr] -C ctx [-w value] # Claim\n" +" plictool [-a addr] -T ctx [-w value] # Threshold\n" +" plictool [-a addr] -I src [-w value] # Priority\n" +" plictool [-a addr] -P src [-w value] # Pending\n" +" plictool [-a addr] -E src -c ctx [-w value] # Enabled\n" +" plictool -v # Version\n" +); + exit(1); +} + +int main(int argc, char *argv[]) +{ + const char *memfile = "/dev/mem"; + int opt; + + while ((opt = getopt(argc, argv, "f:a:LC:T:P:I:E:n:x:c:w:vh")) != -1) { + switch (opt) { + /* Common flags */ + case 'f': + memfile = optarg; + break; + case 'a': + plic_address_str = optarg; + break; + case 'n': + nsources = atol(optarg); + break; + case 'x': + ncontexts = atol(optarg); + break; + case 'C': /* claim */ + case 'T': /* threshold */ + operation = opt; + context = atol(optarg); + break; + case 'P': /* pending */ + case 'I': /* priority */ + case 'E': /* enable */ + operation = opt; + source = atol(optarg); + break; + case 'L': /* list */ + operation = opt; + break; + case 'c': + context = atol(optarg); + break; + case 'w': + value = atol(optarg); + value_set = true; + break; + case 'v': + printf("plictool "VERSION"\n"); + exit(0); + case 'h': + default: /* '?' */ + usage(); + break; + } + } + + if (operation == 'P' || operation == 'I' || operation == 'E') { + if (source < 0) { + fprintf(stderr, "missing source\n"); + exit(1); + } + } + + if (operation == 'C' || operation == 'T' || operation == 'E') { + if (context < 0) { + fprintf(stderr, "missing context\n"); + exit(1); + } + } + + unsigned long long plic_address = strtoull(plic_address_str, NULL, 16); + + //printf("plictool "VERSION" addr=0x%08llx nsrc=%ld nctx=%ld\n", + // plic_address, nsources, ncontexts); + + int fd = open(memfile, O_RDWR | O_SYNC); + + if (fd == -1) { + fprintf(stderr, "cannot open %s: %s", memfile, strerror(errno)); + exit(1); + } + + size_t map_size = 0x4000000UL; + void *map_base = mmap(0, map_size, PROT_READ | PROT_WRITE, MAP_SHARED, + fd, plic_address); + + if (map_base == MAP_FAILED) { + perror("mmap failed"); + if (errno == EPERM) { + fprintf(stderr, "Have you disabled 'CONFIG_STRICT_DEVMEM' and " + "'CONFIG_IO_STRICT_DEVMEM' in the kernel config?\n" + "Hint: zgrep STRICT_DEVMEM /proc/config.gz\n"); + } + exit(1); + } + + if (operation == 'C') { /* claim */ + if (value_set) + claim_set(map_base, context, value); + else + printf("%u\n", claim_get(map_base, context)); + } else if (operation == 'T') { /* threshold */ + if (value_set) + thre_set(map_base, context, value); + else + printf("%u\n", thre_get(map_base, context)); + } else if (operation == 'I') { /* priority */ + if (value_set) + prio_set(map_base, source, value); + else + printf("%u\n", prio_get(map_base, source)); + } else if (operation == 'P') { /* pending */ + if (value_set) + pending_set(map_base, source, value); + else + printf("%u\n", pending_get(map_base, source)); + } else if (operation == 'E') { /* enable */ + if (value_set) + enable_set(map_base, context, source, value); + else + printf("%u\n", enable_get(map_base, context, source)); + } else /* list */ { + list_sources(map_base); + } + + munmap(map_base, map_size); + + close(fd); + + return 0; +} diff --git a/tools/unalign.c b/tools/unalign.c new file mode 100644 index 0000000000000000000000000000000000000000..8e575cdcdc2fa16f8985ea11f8af39e81b65dca3 --- /dev/null +++ b/tools/unalign.c @@ -0,0 +1,239 @@ +/* + * unalign_check - check the CPU behaviour on different alignments + * Copyright (C) 2021 Matteo Croce + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include + +#define ACT_READ 0 +#define ACT_WRITE 1 +#define ACT_XOR 2 +#define ACT_COPY 3 + +#define READ(SIZE) \ + case SIZE / 8: { \ + volatile uint##SIZE##_t *buf2 = (uint##SIZE##_t *)buf; \ + int i; \ + for (i = 0; i < count; i++) \ + (void)buf2[i]; \ + break; \ + } + +#define WRITE(SIZE) \ + case SIZE / 8: { \ + volatile uint##SIZE##_t *buf2 = (uint##SIZE##_t *)buf; \ + int i; \ + for (i = 0; i < count; i++) \ + buf2[i] = (uint##SIZE##_t)0xaabbccdd11223344; \ + break; \ + } + +#define XOR(SIZE) \ + case SIZE / 8: { \ + volatile uint##SIZE##_t *buf2 = (uint##SIZE##_t *)buf; \ + int i; \ + for (i = 0; i < count; i++) \ + buf2[i] = ~buf2[i]; \ + break; \ + } + +#define COPY(SIZE) \ + case SIZE / 8: { \ + volatile uint##SIZE##_t *buf2 = (uint##SIZE##_t *)buf; \ + int i; \ + for (i = 0; i < count / 2; i++) \ + buf2[i] = buf2[i + count / 2]; \ + for (i = count / 2; i < count; i++) \ + buf2[i] = buf2[i - count / 2]; \ + break; \ + } + +static void do_read(void *buf, size_t count, int size) +{ + switch (size) { + READ(8); + READ(16); + READ(32); + READ(64); + } +} + +static void do_write(void *buf, size_t count, int size) +{ + switch (size) { + WRITE(8); + WRITE(16); + WRITE(32); + WRITE(64); + } +} + +static void do_xor(void *buf, size_t count, int size) +{ + switch (size) { + XOR(8); + XOR(16); + XOR(32); + XOR(64); + } +} + +static void do_copy(void *buf, size_t count, int size) +{ + switch (size) { + COPY(8); + COPY(16); + COPY(32); + COPY(64); + } +} + +static uint64_t time_sub(struct timespec *since, struct timespec *to) +{ + if (to->tv_sec == since->tv_sec) + return to->tv_nsec - since->tv_nsec; + + return (to->tv_sec - since->tv_sec) * 1000000000 + to->tv_nsec - since->tv_nsec; +} + +static void __attribute__ ((noreturn)) usage(char *argv0, int ret) +{ + fprintf(ret ? stderr : stdout, + "usage: %s [-rwxc1234h] [-l length] [-u unalignment]\n" + "\n" + "Options:\n" + " -r read memory (default)\n" + " -w write memory\n" + " -x xor memory\n" + " -c copy memory\n" + " -l SIZE use SIZE Mb for the test (default 100)\n" + " -u BYTE unalign buffer by BYTE bytes (default 0)\n" + " -1 read 1 byte at time\n" + " -2 read 2 bytes at time\n" + " -4 read 4 bytes at time (default)\n" + " -8 read 8 bytes at time\n" + " -h this help\n", + argv0); + exit(ret); +} + +static const char *actions[] = { + "read", + "write", + "xor", + "copy", +}; + +int main(int argc, char *argv[]) +{ + struct timespec before, after; + uint64_t elapsed; + int action = ACT_READ; + size_t len = 100 * 1024 * 1024; + int shift = 0; + int size = sizeof(long); + char *buf; + int c; + + while((c = getopt(argc, argv, "hrwxc1248l:u:")) != -1) { + switch (c) { + case 'r': + action = ACT_READ; + break; + case 'w': + action = ACT_WRITE; + break; + case 'x': + action = ACT_XOR; + break; + case 'c': + action = ACT_COPY; + break; + case 'l': + len = atol(optarg) * 1024 * 1024; + if (len <= 0) { + fprintf(stderr, "Invalid size %s\n", optarg); + return 1; + } + break; + case 'u': + shift = atoi(optarg); + break; + case '1': + case '2': + case '4': + case '8': + size = c - '0'; + break; + case 'h': + default: + usage(argv[0], c != 'h'); + } + } + + shift %= size; + + if (optind != argc) + usage(argv[0], 1); + + buf = malloc(len); + if (!buf) { + perror("malloc"); + return 1; + } + + if (mlock(buf, len)) { + perror("mlock"); + return 1; + } + + clock_gettime(CLOCK_MONOTONIC, &before); + switch (action) { + case ACT_READ: + do_read(buf + shift, (len - shift) / size, size); + break; + case ACT_WRITE: + do_write(buf + shift, (len - shift) / size, size); + break; + case ACT_XOR: + do_xor(buf + shift, (len - shift) / size, size); + break; + case ACT_COPY: + do_copy(buf + shift, (len - shift) / size, size); + break; + } + clock_gettime(CLOCK_MONOTONIC, &after); + + elapsed = time_sub(&before, &after); + + printf( "size: %lu Mb\n" + "%s size: %d bit\n" + "unalignment: %d byte\n" + "elapsed time: %.2f sec\n" + "throughput: %.2f Mb/s\n", + len / 1024 / 1024, + actions[action], size * 8, + shift, + elapsed / 1E9, + (len / 1024 / 1024) / (elapsed / 1E9)); + + return 0; +} diff --git a/vm.nix b/vm.nix index ff03e65866c4f40958459f47921fd1a1667374bb..b6a356f19c4632e234d5e71828b57c99923807e9 100644 --- a/vm.nix +++ b/vm.nix @@ -47,8 +47,8 @@ INET y NETWORK_FILESYSTEMS y OVERLAY_FS y - "9P_FS" y - "9P_FS_POSIX_ACL" y + #"9P_FS" y + #"9P_FS_POSIX_ACL" y PCI y VIRTIO_PCI y PCI_HOST_GENERIC y @@ -63,7 +63,15 @@ nixpkgs.overlays = [ (final: prev: { - qemu = prev.qemu.override { rutabagaSupport = false; }; + qemu = prev.qemu.override { + pulseSupport = false; + pipewireSupport = false; + sdlSupport = false; + jackSupport = false; + gtkSupport = false; + vncSupport = false; + smartcardSupport = false; + }; uboot-custom = prev.ubootQemuRiscv64Smode.override { # Override preboot to set 'bootcmd' directly to the kernel address in RAM