From d1850fea29a88bcb4f7789da1e4e50550c2eb9ec Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 9 Jul 2022 18:29:48 +1000 Subject: [PATCH] Track hazards explicitly for XER overflow bits This provides a mechanism for tracking updates to the XER overflow bits (SO, OV, OV32) and stalling instructions which need current values of those bits (mfxer, integer compare instructions, integer Rc=1 instructions, addex) or which writes carry bits (since all the XER common bits are written together, if we are writing CA/CA32 we need up-to-date values of SO/OV/OV32). This will enable updates to SO/OV/OV32 to be done at other places besides the ex1 stage. Signed-off-by: Paul Mackerras --- control.vhdl | 26 +++++++++++++++++++++++++- decode2.vhdl | 36 +++++++++++++++++++++++++++++++++--- execute1.vhdl | 20 +++++++++++++------- 3 files changed, 71 insertions(+), 11 deletions(-) diff --git a/control.vhdl b/control.vhdl index e5ad1c7..e8c8068 100644 --- a/control.vhdl +++ b/control.vhdl @@ -39,6 +39,8 @@ entity control is cr_read_in : in std_ulogic; cr_write_in : in std_ulogic; + ov_read_in : in std_ulogic; + ov_write_in : in std_ulogic; valid_out : out std_ulogic; stopped_out : out std_ulogic; @@ -55,12 +57,14 @@ end entity control; architecture rtl of control is signal gpr_write_valid : std_ulogic; signal cr_write_valid : std_ulogic; + signal ov_write_valid : std_ulogic; type tag_register is record wr_gpr : std_ulogic; reg : gspr_index_t; recent : std_ulogic; wr_cr : std_ulogic; + wr_ov : std_ulogic; valid : std_ulogic; end record; @@ -71,12 +75,14 @@ architecture rtl of control is signal gpr_tag_stall : std_ulogic; signal cr_tag_stall : std_ulogic; + signal ov_tag_stall : std_ulogic; signal serial_stall : std_ulogic; signal curr_tag : tag_number_t; signal next_tag : tag_number_t; signal curr_cr_tag : tag_number_t; + signal curr_ov_tag : tag_number_t; signal prev_tag : tag_number_t; begin @@ -87,12 +93,14 @@ begin if rst = '1' or flush_in = '1' then tag_regs(i).wr_gpr <= '0'; tag_regs(i).wr_cr <= '0'; + tag_regs(i).wr_ov <= '0'; tag_regs(i).valid <= '0'; else if complete_in.valid = '1' and i = complete_in.tag then assert tag_regs(i).valid = '1' report "spurious completion" severity failure; tag_regs(i).wr_gpr <= '0'; tag_regs(i).wr_cr <= '0'; + tag_regs(i).wr_ov <= '0'; tag_regs(i).valid <= '0'; report "tag " & integer'image(i) & " not valid"; end if; @@ -108,6 +116,7 @@ begin tag_regs(i).reg <= gpr_write_in; tag_regs(i).recent <= gpr_write_valid; tag_regs(i).wr_cr <= cr_write_valid; + tag_regs(i).wr_ov <= ov_write_valid; tag_regs(i).valid <= '1'; if gpr_write_valid = '1' then report "tag " & integer'image(i) & " valid for gpr " & to_hstring(gpr_write_in); @@ -118,12 +127,16 @@ begin if rst = '1' then curr_tag <= 0; curr_cr_tag <= 0; + curr_ov_tag <= 0; prev_tag <= 0; else curr_tag <= next_tag; if instr_tag.valid = '1' and cr_write_valid = '1' then curr_cr_tag <= instr_tag.tag; end if; + if instr_tag.valid = '1' and ov_write_valid = '1' then + curr_ov_tag <= instr_tag.tag; + end if; if valid_out = '1' then prev_tag <= instr_tag.tag; end if; @@ -144,6 +157,7 @@ begin variable byp_c : std_ulogic_vector(1 downto 0); variable tag_cr : instr_tag_t; variable byp_cr : std_ulogic_vector(1 downto 0); + variable tag_ov : instr_tag_t; variable tag_prev : instr_tag_t; begin tag_a := instr_tag_init; @@ -226,6 +240,14 @@ begin cr_bypass <= byp_cr; cr_tag_stall <= tag_cr.valid and not byp_cr(1); + -- OV hazards + tag_ov.tag := curr_ov_tag; + tag_ov.valid := ov_read_in and tag_regs(curr_ov_tag).wr_ov; + if tag_match(tag_ov, complete_in) then + tag_ov.valid := '0'; + end if; + ov_tag_stall <= tag_ov.valid; + tag_prev.tag := prev_tag; tag_prev.valid := tag_regs(prev_tag).valid; if tag_match(tag_prev, complete_in) then @@ -251,12 +273,14 @@ begin -- Don't let it go out if there are GPR or CR hazards -- or we are waiting for the previous instruction to complete - if (gpr_tag_stall or cr_tag_stall or (serialize and serial_stall)) = '1' then + if (gpr_tag_stall or cr_tag_stall or ov_tag_stall or + (serialize and serial_stall)) = '1' then valid_tmp := '0'; end if; gpr_write_valid <= gpr_write_valid_in and valid_tmp; cr_write_valid <= cr_write_in and valid_tmp; + ov_write_valid <= ov_write_in and valid_tmp; -- update outputs valid_out <= valid_tmp; diff --git a/decode2.vhdl b/decode2.vhdl index 500e4f5..a043ef9 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -58,6 +58,8 @@ architecture behaviour of decode2 is reg_b_valid : std_ulogic; reg_c_valid : std_ulogic; reg_o_valid : std_ulogic; + input_ov : std_ulogic; + output_ov : std_ulogic; end record; constant reg_type_init : reg_type := (e => Decode2ToExecute1Init, repeat => NONE, others => '0'); @@ -303,6 +305,9 @@ architecture behaviour of decode2 is signal cr_write_valid : std_ulogic; signal cr_bypass : std_ulogic_vector(1 downto 0); + signal ov_read_valid : std_ulogic; + signal ov_write_valid : std_ulogic; + signal instr_tag : instr_tag_t; begin @@ -342,6 +347,9 @@ begin cr_write_in => cr_write_valid, cr_bypass => cr_bypass, + ov_read_in => ov_read_valid, + ov_write_in => ov_write_valid, + valid_out => control_valid_out, stopped_out => stopped_out, @@ -414,19 +422,39 @@ begin v.e.input_cr := d_in.decode.input_cr; v.e.output_cr := d_in.decode.output_cr; - -- Work out whether XER common bits are set + -- Work out whether XER SO/OV/OV32 bits are set + -- or used by this instruction + v.e.rc := decode_rc(d_in.decode.rc, d_in.insn); v.e.output_xer := d_in.decode.output_carry; + v.input_ov := d_in.decode.output_carry; + v.output_ov := '0'; + if d_in.decode.input_carry = OV then + v.input_ov := '1'; + v.output_ov := '1'; + end if; + if v.e.rc = '1' and d_in.decode.facility /= FPU then + v.input_ov := '1'; + end if; case d_in.decode.insn_type is when OP_ADD | OP_MUL_L64 | OP_DIV | OP_DIVE => -- OE field is valid in OP_ADD/OP_MUL_L64 with major opcode 31 only if d_in.insn(31 downto 26) = "011111" and insn_oe(d_in.insn) = '1' then v.e.oe := '1'; v.e.output_xer := '1'; + v.output_ov := '1'; + v.input_ov := '1'; -- need SO state if setting OV to 0 + end if; + when OP_MFSPR => + if decode_spr_num(d_in.insn) = SPR_XER then + v.input_ov := '1'; end if; when OP_MTSPR => if decode_spr_num(d_in.insn) = SPR_XER then v.e.output_xer := '1'; + v.output_ov := '1'; end if; + when OP_CMP | OP_MCRXRX => + v.input_ov := '1'; when others => end case; @@ -474,8 +502,6 @@ begin v.e.read_reg3 := decoded_reg_c.reg; v.e.write_reg := decoded_reg_o.reg; v.e.write_reg_enable := decoded_reg_o.reg_valid; - v.e.rc := decode_rc(d_in.decode.rc, d_in.insn); - v.e.xerc := c_in.read_xerc_data; v.e.invert_a := d_in.decode.invert_a; v.e.addm1 := '0'; v.e.insn_type := op; @@ -550,6 +576,9 @@ begin -- any op that writes CR effectively also reads it. cr_read_valid <= cr_write_valid or v.e.input_cr; + ov_read_valid <= v.input_ov; + ov_write_valid <= v.output_ov; + -- See if any of the operands can get their value via the bypass path. if dc2.busy = '0' or gpr_a_bypass /= "00" then case gpr_a_bypass is @@ -608,6 +637,7 @@ begin when others => v.e.cr := c_in.read_cr_data; end case; + v.e.xerc := c_in.read_xerc_data; v.e.valid := control_valid_out; v.e.instr_tag := instr_tag; diff --git a/execute1.vhdl b/execute1.vhdl index 57f90b0..6fadc8c 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -435,12 +435,18 @@ begin x_to_pmu.spr_val <= ex1.e.write_data; x_to_pmu.run <= '1'; - -- XER forwarding. To avoid having to track XER hazards, we use - -- the previously latched value. Since the XER common bits - -- (SO, OV[32] and CA[32]) are only modified by instructions that are - -- handled here, we can just use the result most recently sent to - -- writeback, unless a pipeline flush has happened in the meantime. - xerc_in <= ex1.xerc when ex1.xerc_valid = '1' else e_in.xerc; + -- XER forwarding. The CA and CA32 bits are only modified by instructions + -- that are handled here, so for them we can just use the result most + -- recently sent to writeback, unless a pipeline flush has happened in the + -- meantime. + -- Hazards for SO/OV/OV32 are handled by control.vhdl as there may be other + -- units writing to them. No forwarding is done because performance of + -- instructions that alter them is not considered significant. + xerc_in.so <= e_in.xerc.so; + xerc_in.ov <= e_in.xerc.ov; + xerc_in.ov32 <= e_in.xerc.ov32; + xerc_in.ca <= ex1.xerc.ca when ex1.xerc_valid = '1' else e_in.xerc.ca; + xerc_in.ca32 <= ex1.xerc.ca32 when ex1.xerc_valid = '1' else e_in.xerc.ca32; -- N.B. the busy signal from each source includes the -- stage2 stall from that source in it. @@ -1561,7 +1567,7 @@ begin cr_res(31) := sign; cr_res(30) := not (sign or zero); cr_res(29) := zero; - cr_res(28) := ex1.xerc.so; + cr_res(28) := ex1.e.xerc.so; cr_mask(7) := '1'; end if;