From d358981d4340ce1f3f4008e43852a9da5e1be217 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 23 Dec 2024 22:07:07 +1100 Subject: [PATCH] Generate doubled instructions in decode1 rather than decode2 This will allow us to read different source registers for the two pieces, which will be needed for instructions like stq. Signed-off-by: Paul Mackerras --- common.vhdl | 3 +- decode1.vhdl | 78 ++++++++++++++++++++++++++++++++-------------------- decode2.vhdl | 27 +++++++++++------- 3 files changed, 67 insertions(+), 41 deletions(-) diff --git a/common.vhdl b/common.vhdl index 4f7fced..7c79ccf 100644 --- a/common.vhdl +++ b/common.vhdl @@ -318,6 +318,7 @@ package common is type Decode1ToDecode2Type is record valid: std_ulogic; stop_mark : std_ulogic; + second : std_ulogic; nia: std_ulogic_vector(63 downto 0); prefixed: std_ulogic; prefix: std_ulogic_vector(25 downto 0); @@ -334,7 +335,7 @@ package common is reg_c : gspr_index_t; end record; constant Decode1ToDecode2Init : Decode1ToDecode2Type := - (valid => '0', stop_mark => '0', nia => (others => '0'), + (valid => '0', stop_mark => '0', second => '0', nia => (others => '0'), prefixed => '0', prefix => (others => '0'), insn => (others => '0'), illegal_suffix => '0', misaligned_prefix => '0', decode => decode_rom_init, br_pred => '0', big_endian => '0', diff --git a/decode1.vhdl b/decode1.vhdl index 86fb5cf..ebc5993 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -44,6 +44,8 @@ architecture behaviour of decode1 is signal decode_rom_addr : insn_code; signal decode : decode_rom_t; + signal double : std_ulogic; + type prefix_state_t is record prefixed : std_ulogic; prefix : std_ulogic_vector(25 downto 0); @@ -485,6 +487,8 @@ architecture behaviour of decode1 is end; begin + double <= not r.second when (r.valid = '1' and decode.repeat /= NONE) else '0'; + decode1_0: process(clk) begin if rising_edge(clk) then @@ -497,10 +501,14 @@ begin fetch_failed <= '0'; pr <= prefix_state_init; elsif stall_in = '0' then - r <= rin; - fetch_failed <= f_in.fetch_failed; - if f_in.valid = '1' then - pr <= pr_in; + if double = '0' then + r <= rin; + fetch_failed <= f_in.fetch_failed; + if f_in.valid = '1' then + pr <= pr_in; + end if; + else + r.second <= '1'; end if; end if; if rst = '1' then @@ -511,12 +519,12 @@ begin end if; end process; - busy_out <= stall_in; + busy_out <= stall_in or double; decode1_rom: process(clk) begin if rising_edge(clk) then - if stall_in = '0' then + if stall_in = '0' and double = '0' then decode <= decode_rom(decode_rom_addr); end if; end if; @@ -646,33 +654,43 @@ begin -- Work out GPR/FPR read addresses -- Note that for prefixed instructions we are working this out based -- only on the suffix. - maybe_rb := '0'; - vr.reg_1_addr := '0' & insn_ra(f_in.insn); - vr.reg_2_addr := '0' & insn_rb(f_in.insn); - vr.reg_3_addr := '0' & insn_rs(f_in.insn); - if icode >= INSN_first_rb then - maybe_rb := '1'; - if icode < INSN_first_frs then - if icode >= INSN_first_rc then - vr.reg_3_addr := '0' & insn_rcreg(f_in.insn); - end if; - else - -- access FRS operand - vr.reg_3_addr(5) := '1'; - if icode >= INSN_first_frab then - -- access FRA and/or FRB operands - vr.reg_1_addr(5) := '1'; - vr.reg_2_addr(5) := '1'; - end if; - if icode >= INSN_first_frabc then - -- access FRC operand - vr.reg_3_addr := '1' & insn_rcreg(f_in.insn); + if double = '0' then + maybe_rb := '0'; + vr.reg_1_addr := '0' & insn_ra(f_in.insn); + vr.reg_2_addr := '0' & insn_rb(f_in.insn); + vr.reg_3_addr := '0' & insn_rs(f_in.insn); + if icode >= INSN_first_rb then + maybe_rb := '1'; + if icode < INSN_first_frs then + if icode >= INSN_first_rc then + vr.reg_3_addr := '0' & insn_rcreg(f_in.insn); + end if; + else + -- access FRS operand + vr.reg_3_addr(5) := '1'; + if icode >= INSN_first_frab then + -- access FRA and/or FRB operands + vr.reg_1_addr(5) := '1'; + vr.reg_2_addr(5) := '1'; + end if; + if icode >= INSN_first_frabc then + -- access FRC operand + vr.reg_3_addr := '1' & insn_rcreg(f_in.insn); + end if; end if; end if; + vr.read_1_enable := f_in.valid; + vr.read_2_enable := f_in.valid and maybe_rb; + vr.read_3_enable := f_in.valid; + else + -- second instance of a doubled instruction + vr.reg_1_addr := r.reg_a; + vr.reg_2_addr := r.reg_b; + vr.reg_3_addr := r.reg_c; + vr.read_1_enable := '0'; -- (not actually used) + vr.read_2_enable := '0'; + vr.read_3_enable := '1'; -- (not actually used) end if; - vr.read_1_enable := f_in.valid; - vr.read_2_enable := f_in.valid and maybe_rb; - vr.read_3_enable := f_in.valid; v.reg_a := vr.reg_1_addr; v.reg_b := vr.reg_2_addr; diff --git a/decode2.vhdl b/decode2.vhdl index a747495..4a020da 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -377,6 +377,21 @@ begin dec_b := decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn, d_in.prefix); dec_c := decode_input_reg_c (d_in.decode.input_reg_c, d_in.insn); dec_o := decode_output_reg (d_in.decode.output_reg_a, d_in.insn); + case d_in.decode.repeat is + when DUPD => + if d_in.second = '1' then + -- update-form loads, 2nd instruction writes RA + dec_o.reg := dec_a.reg; + end if; + when others => + end case; + -- For the second instance of a doubled instruction, we ignore the RA + -- and RB operands, in order to avoid false dependencies on the output + -- of the first instance. + if d_in.second = '1' then + dec_a.reg_valid := '0'; + dec_b.reg_valid := '0'; + end if; if d_in.valid = '0' or d_in.illegal_suffix = '1' then dec_a.reg_valid := '0'; dec_b.reg_valid := '0'; @@ -512,10 +527,10 @@ begin end if; v.e.dec_ctr := decctr; - v.repeat := d_in.decode.repeat; if d_in.decode.repeat /= NONE then v.e.repeat := '1'; end if; + v.e.second := d_in.second; if decctr = '1' then -- read and write CTR @@ -627,14 +642,6 @@ begin v.e.prefix := d_in.prefix; v.e.illegal_suffix := d_in.illegal_suffix; v.e.misaligned_prefix := d_in.misaligned_prefix; - - elsif dc2.e.valid = '1' then - -- dc2.busy = 1 and dc2.e.valid = 1, thus this must be a repeated instruction. - -- Set up for the second iteration (if deferred = 1 this will all be ignored) - v.e.second := '1'; - -- DUPD is the only possibility here: - -- update-form loads, 2nd instruction writes RA - v.e.write_reg := dc2.e.read_reg1; end if; -- issue control @@ -723,7 +730,7 @@ begin v.e.valid := control_valid_out; v.e.instr_tag := instr_tag; - v.busy := valid_in and (not control_valid_out or (v.e.repeat and not v.e.second)); + v.busy := valid_in and not control_valid_out; stall_out <= dc2.busy or deferred;