diff --git a/common.vhdl b/common.vhdl index 0349a6e..4d6cb91 100644 --- a/common.vhdl +++ b/common.vhdl @@ -280,6 +280,9 @@ package common is reg_1_addr : gspr_index_t; reg_2_addr : gspr_index_t; reg_3_addr : gspr_index_t; + read_1_enable : std_ulogic; + read_2_enable : std_ulogic; + read_3_enable : std_ulogic; end record; type bypass_data_t is record diff --git a/decode1.vhdl b/decode1.vhdl index 36d511b..cc93dfc 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -641,6 +641,7 @@ begin variable bv : br_predictor_t; variable fprs, fprabc : std_ulogic; variable in3rc : std_ulogic; + variable may_read_rb : std_ulogic; begin v := Decode1ToDecode2Init; vi := reg_internal_t_init; @@ -654,6 +655,7 @@ begin fprs := '0'; fprabc := '0'; in3rc := '0'; + may_read_rb := '0'; if f_in.valid = '1' then report "Decode insn " & to_hstring(f_in.insn) & " at " & to_hstring(f_in.nia); @@ -675,10 +677,16 @@ begin vi.override := not decode_op_4_valid(to_integer(unsigned(minor4op))); v.decode := decode_op_4_array(to_integer(unsigned(f_in.insn(5 downto 0)))); in3rc := '1'; + may_read_rb := '1'; + + when 23 => + -- rlwnm[.] + may_read_rb := '1'; when 31 => -- major opcode 31, lots of things v.decode := decode_op_31_array(to_integer(unsigned(f_in.insn(10 downto 1)))); + may_read_rb := '1'; if std_match(f_in.insn(10 downto 1), "01-1010011") then -- mfspr or mtspr @@ -728,6 +736,7 @@ begin when 30 => v.decode := decode_op_30_array(to_integer(unsigned(f_in.insn(4 downto 1)))); + may_read_rb := f_in.insn(4); when 52 | 53 | 54 | 55 => -- stfd[u] and stfs[u] @@ -748,6 +757,7 @@ begin in3rc := '1'; fprabc := '1'; fprs := '1'; + may_read_rb := '1'; end if; when 62 => @@ -764,6 +774,7 @@ begin in3rc := '1'; fprabc := '1'; fprs := '1'; + may_read_rb := '1'; end if; when others => @@ -777,6 +788,9 @@ begin else vr.reg_3_addr := fprs & insn_rs(f_in.insn); end if; + vr.read_1_enable := f_in.valid and not f_in.fetch_failed; + vr.read_2_enable := f_in.valid and not f_in.fetch_failed and may_read_rb; + vr.read_3_enable := f_in.valid and not f_in.fetch_failed; if f_in.fetch_failed = '1' then v.valid := '1'; diff --git a/register_file.vhdl b/register_file.vhdl index bc40c3f..a8ddee2 100644 --- a/register_file.vhdl +++ b/register_file.vhdl @@ -38,17 +38,27 @@ end entity register_file; architecture behaviour of register_file is type regfile is array(0 to 63) of std_ulogic_vector(63 downto 0); signal registers : regfile := (others => (others => '0')); - signal rd_port_b : std_ulogic_vector(63 downto 0); signal dbg_data : std_ulogic_vector(63 downto 0); signal dbg_ack : std_ulogic; + signal dbg_gpr_done : std_ulogic; signal addr_1_reg : gspr_index_t; signal addr_2_reg : gspr_index_t; signal addr_3_reg : gspr_index_t; + signal rd_2 : std_ulogic; + signal fwd_1 : std_ulogic; + signal fwd_2 : std_ulogic; + signal fwd_3 : std_ulogic; + signal data_1 : std_ulogic_vector(63 downto 0); + signal data_2 : std_ulogic_vector(63 downto 0); + signal data_3 : std_ulogic_vector(63 downto 0); + signal prev_write_data : std_ulogic_vector(63 downto 0); + begin - -- synchronous writes + -- synchronous reads and writes register_write_0: process(clk) variable a_addr, b_addr, c_addr : gspr_index_t; variable w_addr : gspr_index_t; + variable b_enable : std_ulogic; begin if rising_edge(clk) then if w_in.write_enable = '1' then @@ -66,57 +76,94 @@ begin a_addr := d1_in.reg_1_addr; b_addr := d1_in.reg_2_addr; c_addr := d1_in.reg_3_addr; - - if stall = '0' then + b_enable := d1_in.read_2_enable; + if stall = '1' then + a_addr := addr_1_reg; + b_addr := addr_2_reg; + c_addr := addr_3_reg; + b_enable := rd_2; + else addr_1_reg <= a_addr; addr_2_reg <= b_addr; addr_3_reg <= c_addr; + rd_2 <= b_enable; end if; + + fwd_1 <= '0'; + fwd_2 <= '0'; + fwd_3 <= '0'; + if w_in.write_enable = '1' then + if w_addr = a_addr then + fwd_1 <= '1'; + end if; + if w_addr = b_addr then + fwd_2 <= '1'; + end if; + if w_addr = c_addr then + fwd_3 <= '1'; + end if; + end if; + + -- Do debug reads to GPRs and FPRs using the B port when it is not in use + if dbg_gpr_req = '1' then + if b_enable = '0' then + b_addr := dbg_gpr_addr(5 downto 0); + dbg_gpr_done <= '1'; + end if; + else + dbg_gpr_done <= '0'; + end if; + + if not HAS_FPU then + -- Make it obvious that we only want 32 GSPRs for a no-FPU implementation + a_addr(5) := '0'; + b_addr(5) := '0'; + c_addr(5) := '0'; + end if; + data_1 <= registers(to_integer(unsigned(a_addr))); + data_2 <= registers(to_integer(unsigned(b_addr))); + data_3 <= registers(to_integer(unsigned(c_addr))); + + prev_write_data <= w_in.write_data; + assert (d_in.read1_enable = '0') or (d_in.read1_reg = addr_1_reg) severity failure; assert (d_in.read2_enable = '0') or (d_in.read2_reg = addr_2_reg) severity failure; assert (d_in.read3_enable = '0') or (d_in.read3_reg = addr_3_reg) severity failure; end if; end process register_write_0; - -- asynchronous reads + -- asynchronous forwarding of write data register_read_0: process(all) - variable a_addr, b_addr, c_addr : gspr_index_t; - variable w_addr : gspr_index_t; + variable out_data_1 : std_ulogic_vector(63 downto 0); + variable out_data_2 : std_ulogic_vector(63 downto 0); + variable out_data_3 : std_ulogic_vector(63 downto 0); begin - a_addr := d_in.read1_reg; - b_addr := d_in.read2_reg; - c_addr := d_in.read3_reg; - w_addr := w_in.write_reg; - if not HAS_FPU then - -- Make it obvious that we only want 32 GSPRs for a no-FPU implementation - a_addr(5) := '0'; - b_addr(5) := '0'; - c_addr(5) := '0'; - w_addr(5) := '0'; + out_data_1 := data_1; + out_data_2 := data_2; + out_data_3 := data_3; + if fwd_1 = '1' then + out_data_1 := prev_write_data; end if; + if fwd_2 = '1' then + out_data_2 := prev_write_data; + end if; + if fwd_3 = '1' then + out_data_3 := prev_write_data; + end if; + if d_in.read1_enable = '1' then - report "Reading GPR " & to_hstring(a_addr) & " " & to_hstring(registers(to_integer(unsigned(a_addr)))); + report "Reading GPR " & to_hstring(addr_1_reg) & " " & to_hstring(out_data_1); end if; if d_in.read2_enable = '1' then - report "Reading GPR " & to_hstring(b_addr) & " " & to_hstring(registers(to_integer(unsigned(b_addr)))); + report "Reading GPR " & to_hstring(addr_2_reg) & " " & to_hstring(out_data_2); end if; if d_in.read3_enable = '1' then - report "Reading GPR " & to_hstring(c_addr) & " " & to_hstring(registers(to_integer(unsigned(c_addr)))); - end if; - d_out.read1_data <= registers(to_integer(unsigned(a_addr))); - -- B read port is multiplexed with reads from the debug circuitry - if d_in.read2_enable = '0' and dbg_gpr_req = '1' and dbg_ack = '0' then - b_addr := dbg_gpr_addr; - if not HAS_FPU then - b_addr(5) := '0'; - end if; + report "Reading GPR " & to_hstring(addr_3_reg) & " " & to_hstring(out_data_3); end if; - rd_port_b <= registers(to_integer(unsigned(b_addr))); - d_out.read2_data <= rd_port_b; - d_out.read3_data <= registers(to_integer(unsigned(c_addr))); - -- Forwarding of written data is now done explicitly with a bypass path - -- from writeback to decode2. + d_out.read1_data <= out_data_1; + d_out.read2_data <= out_data_2; + d_out.read3_data <= out_data_3; end process register_read_0; -- Latch read data and ack if dbg read requested and B port not busy @@ -124,8 +171,8 @@ begin begin if rising_edge(clk) then if dbg_gpr_req = '1' then - if d_in.read2_enable = '0' and dbg_ack = '0' then - dbg_data <= rd_port_b; + if dbg_ack = '0' and dbg_gpr_done = '1' then + dbg_data <= data_2; dbg_ack <= '1'; end if; else