diff --git a/countbits.vhdl b/countbits.vhdl index 87417a9..4f40134 100644 --- a/countbits.vhdl +++ b/countbits.vhdl @@ -50,9 +50,11 @@ architecture behaviour of bit_counter is begin countzero_r: process(clk) begin - if rising_edge(clk) and stall = '0' then - inp_r <= inp; - sum_r <= sum; + if rising_edge(clk) then + if stall = '0' then + inp_r <= inp; + sum_r <= sum; + end if; end if; end process; diff --git a/fetch1.vhdl b/fetch1.vhdl index 677fa27..96c16fb 100644 --- a/fetch1.vhdl +++ b/fetch1.vhdl @@ -102,9 +102,6 @@ architecture behaviour of fetch1 is signal itlb_pte : tlb_pte_t; signal itlb_hit : std_ulogic; - -- Privilege bit from PTE EAA field - signal eaa_priv : std_ulogic; - -- Simple hash for direct-mapped TLB index function hash_ea(addr: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0); @@ -155,7 +152,7 @@ begin attribute ram_style of btc_memory : signal is "block"; signal btc_valids : std_ulogic_vector(BTC_SIZE - 1 downto 0); - attribute ram_style of btc_valids : signal is "distributed"; + -- attribute ram_style of btc_valids : signal is "distributed"; signal btc_wr : std_ulogic; signal btc_wr_data : std_ulogic_vector(BTC_WIDTH - 1 downto 0); diff --git a/fpga/arty_a7.xdc b/fpga/arty_a7.xdc index 622b24d..dd35252 100644 --- a/fpga/arty_a7.xdc +++ b/fpga/arty_a7.xdc @@ -171,15 +171,15 @@ set_property -dict { PACKAGE_PIN R15 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_po set_property -dict { PACKAGE_PIN P15 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io33 }]; set_property -dict { PACKAGE_PIN R16 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io34 }]; set_property -dict { PACKAGE_PIN N16 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io35 }]; -set_property -dict { PACKAGE_PIN N14 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io36 }]; -set_property -dict { PACKAGE_PIN U17 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io37 }]; -set_property -dict { PACKAGE_PIN T18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io38 }]; -set_property -dict { PACKAGE_PIN R18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io39 }]; -set_property -dict { PACKAGE_PIN P18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io40 }]; -set_property -dict { PACKAGE_PIN N17 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io41 }]; -set_property -dict { PACKAGE_PIN M17 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io42 }]; # A -set_property -dict { PACKAGE_PIN L18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io43 }]; # SCL -set_property -dict { PACKAGE_PIN M18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io44 }]; # SDA +#set_property -dict { PACKAGE_PIN N14 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io36 }]; +#set_property -dict { PACKAGE_PIN U17 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io37 }]; +#set_property -dict { PACKAGE_PIN T18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io38 }]; +#set_property -dict { PACKAGE_PIN R18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io39 }]; +#set_property -dict { PACKAGE_PIN P18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io40 }]; +#set_property -dict { PACKAGE_PIN N17 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io41 }]; +#set_property -dict { PACKAGE_PIN M17 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io42 }]; # A +#set_property -dict { PACKAGE_PIN L18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io43 }]; # SCL +#set_property -dict { PACKAGE_PIN M18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io44 }]; # SDA #set_property -dict { PACKAGE_PIN C2 IOSTANDARD LVCMOS33 } [get_ports { shield_rst }]; #set_property -dict { PACKAGE_PIN C1 IOSTANDARD LVCMOS33 } [get_ports { spi_hdr_ss }]; diff --git a/fpga/top-arty.vhdl b/fpga/top-arty.vhdl index dc5a0fe..508202c 100644 --- a/fpga/top-arty.vhdl +++ b/fpga/top-arty.vhdl @@ -206,6 +206,9 @@ architecture behaviour of toplevel is signal ddram_clk_p_vec : std_logic_vector(0 downto 0); signal ddram_clk_n_vec : std_logic_vector(0 downto 0); + signal uart1_rxd : std_ulogic; + signal uart1_txd : std_ulogic; + -- Fixup various memory sizes based on generics function get_bram_size return natural is begin @@ -266,8 +269,8 @@ begin uart0_rxd => uart_main_rx, -- UART1 signals - --uart1_txd => uart_pmod_tx, - --uart1_rxd => uart_pmod_rx, + uart1_txd => uart1_txd, + uart1_rxd => uart1_rxd, -- SPI signals spi_flash_sck => spi_sck, @@ -302,7 +305,7 @@ begin wishbone_dma_out => wb_sddma_out ); - --uart_pmod_rts_n <= '0'; + uart1_txd <= '1'; -- SPI Flash -- @@ -415,8 +418,9 @@ begin ); -- Generate SoC reset - soc_rst_gen: process(system_clk) + soc_rst_gen: process(system_clk, ext_rst_n) begin + -- XXX why does this need to be an asynchronous reset? if ext_rst_n = '0' then soc_rst <= '1'; elsif rising_edge(system_clk) then diff --git a/fpu.vhdl b/fpu.vhdl index 3b5dfcb..f07f9d1 100644 --- a/fpu.vhdl +++ b/fpu.vhdl @@ -953,7 +953,6 @@ begin v.denorm := '0'; v.is_subtract := '0'; v.add_bsmall := '0'; - v.doing_ftdiv := "00"; v.int_ovf := '0'; v.div_close := '0'; @@ -1007,7 +1006,7 @@ begin elsif new_exp < min_exp then exp_tiny := '1'; end if; - if is_X(new_exp) or is_X(min_exp) then + if is_X(new_exp) or is_X(max_exp) then exp_huge := 'X'; elsif new_exp > max_exp then exp_huge := '1'; @@ -1038,6 +1037,7 @@ begin v.update_fprf := '0'; v.first := '0'; + v.doing_ftdiv := "00"; v.opsel_a := AIN_R; opsel_ainv <= '0'; opsel_mask <= '0'; @@ -1147,8 +1147,10 @@ begin v.instr_done := '1'; when DO_FTDIV => - v.instr_done := '1'; v.cr_result := "0000"; + -- set result_exp to the exponent of B + re_sel2 <= REXP2_B; + re_set_result <= '1'; if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or (r.b.class = FINITE and r.b.mantissa(UNIT_BIT) = '0') then v.cr_result(2) := '1'; @@ -1157,6 +1159,7 @@ begin r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then v.cr_result(1) := '1'; + v.instr_done := '1'; else v.doing_ftdiv := "11"; v.first := '1'; @@ -1173,7 +1176,7 @@ begin end if; if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then - v.cr_result(1) := '0'; + v.cr_result(1) := '1'; end if; when DO_FCMP => @@ -2148,6 +2151,9 @@ begin v.state := NORMALIZE; when FTDIV_1 => + -- We go through this state up to two times; the first sees if + -- B.exponent is in the range [-1021,1020], and the second tests + -- whether B.exp - A.exp is in the range [-1022,1020]. v.cr_result(1) := exp_tiny or exp_huge; -- set shift to a.exp rs_sel2 <= RSH2_A; diff --git a/icache.vhdl b/icache.vhdl index 8dfbd86..7b0ae59 100644 --- a/icache.vhdl +++ b/icache.vhdl @@ -403,12 +403,12 @@ begin variable snoop_addr : real_addr_t; variable next_raddr : real_addr_t; begin - replace_way := to_unsigned(0, WAY_BITS); - if NUM_WAYS > 1 then - -- Get victim way from plru - replace_way := plru_victim; - end if; if rising_edge(clk) then + replace_way := to_unsigned(0, WAY_BITS); + if NUM_WAYS > 1 then + -- Get victim way from plru + replace_way := plru_victim; + end if; -- Read tags using NIA for next cycle if flush_in = '1' or i_in.req = '0' or (stall_in = '0' and stall_out = '0') then next_raddr := i_in.next_rpn & i_in.next_nia(MIN_LG_PGSZ - 1 downto 0); @@ -649,6 +649,7 @@ begin begin if rising_edge(clk) then ev.icache_miss <= '0'; + ev.itlb_miss_resolved <= '0'; r.recv_valid <= '0'; -- On reset, clear all valid bits to force misses if rst = '1' then diff --git a/microwatt.core b/microwatt.core index 3e65325..508b346 100644 --- a/microwatt.core +++ b/microwatt.core @@ -62,14 +62,13 @@ filesets: - fpga/pp_soc_uart.vhd - fpga/pp_utilities.vhd - fpga/firmware.hex : {copyto : firmware.hex, file_type : user} + - nonrandom.vhdl file_type : vhdlSource-2008 xilinx_specific: files: - xilinx-mult.vhdl : {file_type : vhdlSource-2008} - xilinx-mult-32s.vhdl : {file_type : vhdlSource-2008} - - fpga/fpga-random.vhdl : {file_type : vhdlSource-2008} - - fpga/fpga-random.xdc : {file_type : xdc} debug_xilinx: files: diff --git a/tests/fpu/fpu.c b/tests/fpu/fpu.c index 059d83b..79ba7fa 100644 --- a/tests/fpu/fpu.c +++ b/tests/fpu/fpu.c @@ -1665,6 +1665,65 @@ int fpu_test_25(void) return 0; } +struct ftvals { + unsigned long val_a; + unsigned long val_b; + int cr_ftdiv; + int cr_ftsqrt; +} ftvals[] = { + { 0x3ff0000000000000, 0x3ff0000000000000, 0, 0 }, + { 0x0000000000000000, 0x3ff0000000000000, 0, 6 }, + { 0xfff0000000000000, 0x3ff0000000000000, 6, 6 }, + { 0x7ff1234560000000, 0x3ff0000000000000, 2, 2 }, + { 0x3ff0000000000000, 0xfff0000000000000, 6, 0 }, + { 0x3ff0000000000000, 0x8000000000000000, 6, 0 }, + { 0x3ff0000000000000, 0x7ff9234560000000, 2, 0 }, + { 0x3ff0000000000000, 0x0020000000000000, 0, 0 }, + { 0x3ff0000000000000, 0x0010000000000000, 2, 0 }, + { 0x3ff0000000000000, 0x0001000000000000, 6, 0 }, + { 0x3ff0000000000000, 0x7fb1234500000000, 0, 0 }, + { 0x3ff0000000000000, 0x7fc1234500000000, 2, 0 }, + { 0x3ff0000000000000, 0x7fd1234500000000, 2, 0 }, + { 0x3ff0000000000000, 0x7fe1234500000000, 2, 0 }, + { 0x6000000000000000, 0x2000000000000000, 2, 0 }, + { 0x5ff0000000000000, 0x2000000000000000, 2, 0 }, + { 0x5fe0000000000000, 0x2000000000000000, 0, 0 }, + { 0x2000000000000000, 0x5fc0000000000000, 0, 0 }, + { 0x2000000000000000, 0x5fd0000000000000, 2, 0 }, + { 0x0360000000000000, 0x4320000000000000, 0, 0 }, + { 0x0350000000000000, 0x4310000000000000, 2, 2 }, + { 0x0010000000000000, 0x3fd0000000000000, 2, 2 }, + { 0x0001000000000000, 0x3fd0000000000000, 2, 6 }, + { 0xbff0000000000000, 0x3ff0000000000000, 0, 2 }, + { 0x3fd0000000000000, 0x0001000000000000, 6, 0 }, +}; + +int test26(long arg) +{ + long i; + int cr; + struct ftvals *vp = ftvals; + + set_fpscr(FPS_RN_NEAR); + for (i = 0; i < sizeof(ftvals) / sizeof(ftvals[0]); ++i, ++vp) { + asm("lfd 5,0(%1); lfd 6,8(%1); ftdiv 5,5,6; ftsqrt 4,5; mfcr %0" : + "=r" (cr) : "b" (&vp->val_a) : "cr4", "cr5"); + if (((cr >> 8) & 0xf) != vp->cr_ftdiv || + ((cr >> 12) & 0x1f) != vp->cr_ftsqrt) { + print_hex(i, 2, " "); + print_hex(cr, 8, " "); + return i + 1; + } + } + return 0; +} + +int fpu_test_26(void) +{ + enable_fp(); + return trapit(0, test26); +} + int fail = 0; void do_test(int num, int (*test)(void)) @@ -1715,6 +1774,7 @@ int main(void) do_test(23, fpu_test_23); do_test(24, fpu_test_24); do_test(25, fpu_test_25); + do_test(26, fpu_test_26); return fail; } diff --git a/tests/test_fpu.bin b/tests/test_fpu.bin index e4e2116..cc6c1cc 100755 Binary files a/tests/test_fpu.bin and b/tests/test_fpu.bin differ diff --git a/tests/test_fpu.console_out b/tests/test_fpu.console_out index 3ec9480..987a633 100644 --- a/tests/test_fpu.console_out +++ b/tests/test_fpu.console_out @@ -23,3 +23,4 @@ test 22:PASS test 23:PASS test 24:PASS test 25:PASS +test 26:PASS diff --git a/xics.vhdl b/xics.vhdl index d4adc1e..62faf77 100644 --- a/xics.vhdl +++ b/xics.vhdl @@ -386,15 +386,14 @@ begin reg_write: process(clk) variable be_in : std_ulogic_vector(31 downto 0); begin - -- Byteswapped input - be_in := bswap(wb_in.dat); - if rising_edge(clk) then if rst = '1' then for i in 0 to SRC_NUM - 1 loop xives(i) <= (pri => pri_masked); end loop; elsif wb_valid = '1' and wb_in.we = '1' then + -- Byteswapped input + be_in := bswap(wb_in.dat); if reg_is_xive then -- TODO: When adding support for other bits, make sure to -- properly implement wb_in.sel to allow partial writes. diff --git a/xilinx-mult-32s.vhdl b/xilinx-mult-32s.vhdl index cacc22d..fc2bf76 100644 --- a/xilinx-mult-32s.vhdl +++ b/xilinx-mult-32s.vhdl @@ -286,9 +286,11 @@ begin process(clk) begin - if rising_edge(clk) and stall = '0' then - m_out.valid <= m_in.valid; - product_lo <= m01_p(5 downto 0) & m00_p(16 downto 0); + if rising_edge(clk) then + if stall = '0' then + m_out.valid <= m_in.valid; + product_lo <= m01_p(5 downto 0) & m00_p(16 downto 0); + end if; end if; end process;