Compare commits

..

No commits in common. 'master' and 'fpu-init' have entirely different histories.

@ -103,8 +103,14 @@ sudo dnf install fusesoc


``` ```
fusesoc init fusesoc init
fusesoc fetch uart16550 ```
fusesoc library add microwatt /path/to/microwatt
- Create a working directory and point FuseSoC at microwatt:

```
mkdir microwatt-fusesoc
cd microwatt-fusesoc
fusesoc library add microwatt /path/to/microwatt/
``` ```


- Build using FuseSoC. For hello world (Replace nexys_video with your FPGA board such as --target=arty_a7-100): - Build using FuseSoC. For hello world (Replace nexys_video with your FPGA board such as --target=arty_a7-100):
@ -122,68 +128,6 @@ You should then be able to see output via the serial port of the board (/dev/tty
fusesoc run --target=nexys_video microwatt fusesoc run --target=nexys_video microwatt
``` ```


## Linux on Microwatt

Mainline Linux supports Microwatt as of v5.14. The Arty A7 is the best tested
platform, but it's also been tested on the OrangeCrab and ButterStick.

1. Use buildroot to create a userspace

A small change is required to glibc in order to support the VMX/AltiVec-less
Microwatt, as float128 support is mandiatory and for this in GCC requires
VSX/AltiVec. This change is included in Joel's buildroot fork, along with a
defconfig:
```
git clone -b microwatt https://github.com/shenki/buildroot
cd buildroot
make ppc64le_microwatt_defconfig
make
```

The output is `output/images/rootfs.cpio`.

2. Build the Linux kernel
```
git clone https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
cd linux
make ARCH=powerpc microwatt_defconfig
make ARCH=powerpc CROSS_COMPILE=powerpc64le-linux-gnu- \
CONFIG_INITRAMFS_SOURCE=/buildroot/output/images/rootfs.cpio -j`nproc`
```

The output is `arch/powerpc/boot/dtbImage.microwatt.elf`.

3. Build gateware using FuseSoC

First configure FuseSoC as above.
```
fusesoc run --build --target=arty_a7-100 microwatt --no_bram --memory_size=0
```

The output is `build/microwatt_0/arty_a7-100-vivado/microwatt_0.bit`.

4. Program the flash

This operation will overwrite the contents of your flash.

For the Arty A7 A100, set `FLASH_ADDRESS` to `0x400000` and pass `-f a100`.

For the Arty A7 A35, set `FLASH_ADDRESS` to `0x300000` and pass `-f a35`.
```
microwatt/openocd/flash-arty -f a100 build/microwatt_0/arty_a7-100-vivado/microwatt_0.bit
microwatt/openocd/flash-arty -f a100 dtbImage.microwatt.elf -t bin -a $FLASH_ADDRESS
```

5. Connect to the second USB TTY device exposed by the FPGA

```
minicom -D /dev/ttyUSB1
```

The gateware has firmware that will look at `FLASH_ADDRESS` and attempt to
parse an ELF there, loading it to the address specified in the ELF header
and jumping to it.

## Testing ## Testing


- A simple test suite containing random execution test cases and a couple of - A simple test suite containing random execution test cases and a couple of

@ -117,6 +117,7 @@ architecture behave of core is
signal complete: instr_tag_t; signal complete: instr_tag_t;
signal terminate: std_ulogic; signal terminate: std_ulogic;
signal core_rst: std_ulogic; signal core_rst: std_ulogic;
signal icache_inv: std_ulogic;
signal do_interrupt: std_ulogic; signal do_interrupt: std_ulogic;


-- Delayed/Latched resets and alt_reset -- Delayed/Latched resets and alt_reset

@ -1121,6 +1121,7 @@ begin
rams: for i in 0 to NUM_WAYS-1 generate rams: for i in 0 to NUM_WAYS-1 generate
signal do_read : std_ulogic; signal do_read : std_ulogic;
signal rd_addr : std_ulogic_vector(ROW_BITS-1 downto 0); signal rd_addr : std_ulogic_vector(ROW_BITS-1 downto 0);
signal do_write : std_ulogic;
signal wr_addr : std_ulogic_vector(ROW_BITS-1 downto 0); signal wr_addr : std_ulogic_vector(ROW_BITS-1 downto 0);
signal wr_data : std_ulogic_vector(wishbone_data_bits-1 downto 0); signal wr_data : std_ulogic_vector(wishbone_data_bits-1 downto 0);
signal wr_sel : std_ulogic_vector(ROW_SIZE-1 downto 0); signal wr_sel : std_ulogic_vector(ROW_SIZE-1 downto 0);

@ -42,8 +42,6 @@ begin
quot <= (others => '0'); quot <= (others => '0');
running <= '0'; running <= '0';
count <= "0000000"; count <= "0000000";
is_32bit <= '0';
overflow <= '0';
elsif d_in.valid = '1' then elsif d_in.valid = '1' then
if d_in.is_extended = '1' then if d_in.is_extended = '1' then
dend <= '0' & d_in.dividend & x"0000000000000000"; dend <= '0' & d_in.dividend & x"0000000000000000";

@ -113,6 +113,8 @@ architecture behaviour of execute1 is
signal misc_result: std_ulogic_vector(63 downto 0); signal misc_result: std_ulogic_vector(63 downto 0);
signal muldiv_result: std_ulogic_vector(63 downto 0); signal muldiv_result: std_ulogic_vector(63 downto 0);
signal spr_result: std_ulogic_vector(63 downto 0); signal spr_result: std_ulogic_vector(63 downto 0);
signal result_mux_sel: std_ulogic_vector(2 downto 0);
signal sub_mux_sel: std_ulogic_vector(2 downto 0);
signal next_nia : std_ulogic_vector(63 downto 0); signal next_nia : std_ulogic_vector(63 downto 0);
signal current: Decode2ToExecute1Type; signal current: Decode2ToExecute1Type;



@ -555,11 +555,7 @@ begin
-- I prefer not to do just yet as it would force fetch2 to know about -- I prefer not to do just yet as it would force fetch2 to know about
-- some of the cache geometry information. -- some of the cache geometry information.
-- --
if r.hit_valid = '1' then
i_out.insn <= read_insn_word(r.hit_nia, cache_out(r.hit_way)); i_out.insn <= read_insn_word(r.hit_nia, cache_out(r.hit_way));
else
i_out.insn <= (others => '0');
end if;
i_out.valid <= r.hit_valid; i_out.valid <= r.hit_valid;
i_out.nia <= r.hit_nia; i_out.nia <= r.hit_nia;
i_out.stop_mark <= r.hit_smark; i_out.stop_mark <= r.hit_smark;
@ -824,7 +820,4 @@ begin
end process; end process;
log_out <= log_data; log_out <= log_data;
end generate; end generate;

events <= ev;

end; end;

@ -74,9 +74,6 @@ begin
i_out.req <= '0'; i_out.req <= '0';
i_out.nia <= (others => '0'); i_out.nia <= (others => '0');
i_out.stop_mark <= '0'; i_out.stop_mark <= '0';
i_out.priv_mode <= '1';
i_out.virt_mode <= '0';
i_out.big_endian <= '0';


m_out.tlbld <= '0'; m_out.tlbld <= '0';
m_out.tlbie <= '0'; m_out.tlbie <= '0';

@ -275,27 +275,10 @@ begin
if rising_edge(clk) then if rising_edge(clk) then
if rst = '1' then if rst = '1' then
r1.req.valid <= '0'; r1.req.valid <= '0';
r1.req.tlbie <= '0';
r1.req.is_slbia <= '0';
r1.req.instr_fault <= '0';
r1.req.load <= '0';
r1.req.priv_mode <= '0';
r1.req.sprn <= (others => '0');
r1.req.xerc <= xerc_init;

r2.req.valid <= '0'; r2.req.valid <= '0';
r2.req.tlbie <= '0';
r2.req.is_slbia <= '0';
r2.req.instr_fault <= '0';
r2.req.load <= '0';
r2.req.priv_mode <= '0';
r2.req.sprn <= (others => '0');
r2.req.xerc <= xerc_init;

r2.wait_dc <= '0'; r2.wait_dc <= '0';
r2.wait_mmu <= '0'; r2.wait_mmu <= '0';
r2.one_cycle <= '0'; r2.one_cycle <= '0';

r3.dar <= (others => '0'); r3.dar <= (others => '0');
r3.dsisr <= (others => '0'); r3.dsisr <= (others => '0');
r3.state <= IDLE; r3.state <= IDLE;

Loading…
Cancel
Save