From 34cf092bf6da6e1156c4fa8d9b20d02d0e758e76 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 22 Sep 2025 09:15:11 +1000 Subject: [PATCH] control: Fix forwarding when previous result write is suppressed If we have two successive instructions that write the same result register and then a third that uses the same register as an input, and the second instruction suppresses the write of its result, we can currently end up with the third instruction using the wrong value, because it uses the register value from before the first instruction rather than the result of the first instruction. (An example of an instruction suppressing the write of its result is a floating-point instruction that generates an enabled invalid operation exception but not an interrupt.) To fix this, the control module now uses any forwarded value for the register we want, not just the most recent value, but still stalls until it has the most recent value, or the previous instruction completes. Thus in the case described above, decode2 will have latched the value from the first instruction and so the third instruction gets the correct value. Signed-off-by: Paul Mackerras --- control.vhdl | 112 ++++++++++++++++++++++++++++++++++----------------- decode2.vhdl | 1 + 2 files changed, 77 insertions(+), 36 deletions(-) diff --git a/control.vhdl b/control.vhdl index b75fcc1..c34184a 100644 --- a/control.vhdl +++ b/control.vhdl @@ -36,6 +36,7 @@ entity control is execute_next_cr_tag : in instr_tag_t; execute2_next_tag : in instr_tag_t; execute2_next_cr_tag : in instr_tag_t; + writeback_tag : in instr_tag_t; cr_read_in : in std_ulogic; cr_write_in : in std_ulogic; @@ -163,70 +164,109 @@ begin variable byp_cr : std_ulogic_vector(1 downto 0); variable tag_ov : instr_tag_t; variable tag_prev : instr_tag_t; + variable rma : std_ulogic_vector(TAG_COUNT-1 downto 0); + variable rmb : std_ulogic_vector(TAG_COUNT-1 downto 0); + variable rmc : std_ulogic_vector(TAG_COUNT-1 downto 0); + variable tag_a_stall : std_ulogic; + variable tag_b_stall : std_ulogic; + variable tag_c_stall : std_ulogic; begin tag_a := instr_tag_init; + tag_a_stall := '0'; + rma := (others => '0'); for i in tag_number_t loop - if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_a_read_in then - tag_a.valid := '1'; - tag_a.tag := i; - end if; - end loop; - tag_b := instr_tag_init; - for i in tag_number_t loop - if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_b_read_in then - tag_b.valid := '1'; - tag_b.tag := i; - end if; - end loop; - tag_c := instr_tag_init; - for i in tag_number_t loop - if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_c_read_in then - tag_c.valid := '1'; - tag_c.tag := i; + if tag_regs(i).valid = '1' and tag_regs(i).wr_gpr = '1' and + tag_regs(i).reg = gpr_a_read_in and gpr_a_read_valid_in = '1' then + rma(i) := '1'; + if tag_regs(i).recent = '1' then + tag_a_stall := '1'; + end if; end if; end loop; - byp_a := "0000"; - if EX1_BYPASS and tag_match(execute_next_tag, tag_a) then + if EX1_BYPASS and execute_next_tag.valid = '1' and + rma(execute_next_tag.tag) = '1' then byp_a(1) := '1'; - end if; - if EX1_BYPASS and tag_match(execute2_next_tag, tag_a) then + tag_a := execute_next_tag; + elsif EX1_BYPASS and execute2_next_tag.valid = '1' and + rma(execute2_next_tag.tag) = '1' then byp_a(2) := '1'; - end if; - if tag_match(complete_in, tag_a) then + tag_a := execute2_next_tag; + elsif writeback_tag.valid = '1' and rma(writeback_tag.tag) = '1' then byp_a(3) := '1'; + tag_a := writeback_tag; end if; byp_a(0) := gpr_a_read_valid_in and (byp_a(1) or byp_a(2) or byp_a(3)); + if tag_a.valid = '1' and tag_regs(tag_a.tag).valid = '1' and + tag_regs(tag_a.tag).recent = '1' then + tag_a_stall := '0'; + end if; + + tag_b := instr_tag_init; + tag_b_stall := '0'; + rmb := (others => '0'); + for i in tag_number_t loop + if tag_regs(i).valid = '1' and tag_regs(i).wr_gpr = '1' and + tag_regs(i).reg = gpr_b_read_in and gpr_b_read_valid_in = '1' then + rmb(i) := '1'; + if tag_regs(i).recent = '1' then + tag_b_stall := '1'; + end if; + end if; + end loop; byp_b := "0000"; - if EX1_BYPASS and tag_match(execute_next_tag, tag_b) then + if EX1_BYPASS and execute_next_tag.valid = '1' and + rmb(execute_next_tag.tag) = '1' then byp_b(1) := '1'; - end if; - if EX1_BYPASS and tag_match(execute2_next_tag, tag_b) then + tag_b := execute_next_tag; + elsif EX1_BYPASS and execute2_next_tag.valid = '1' and + rmb(execute2_next_tag.tag) = '1' then byp_b(2) := '1'; - end if; - if tag_match(complete_in, tag_b) then + tag_b := execute2_next_tag; + elsif writeback_tag.valid = '1' and rmb(writeback_tag.tag) = '1' then byp_b(3) := '1'; + tag_b := writeback_tag; end if; byp_b(0) := gpr_b_read_valid_in and (byp_b(1) or byp_b(2) or byp_b(3)); + if tag_b.valid = '1' and tag_regs(tag_b.tag).valid = '1' and + tag_regs(tag_b.tag).recent = '1' then + tag_b_stall := '0'; + end if; + + tag_c := instr_tag_init; + tag_c_stall := '0'; + rmc := (others => '0'); + for i in tag_number_t loop + if tag_regs(i).valid = '1' and tag_regs(i).wr_gpr = '1' and + tag_regs(i).reg = gpr_c_read_in and gpr_c_read_valid_in = '1' then + rmc(i) := '1'; + if tag_regs(i).recent = '1' then + tag_c_stall := '1'; + end if; + end if; + end loop; byp_c := "0000"; - if EX1_BYPASS and tag_match(execute_next_tag, tag_c) then + if EX1_BYPASS and execute_next_tag.valid = '1' and rmc(execute_next_tag.tag) = '1' then byp_c(1) := '1'; - end if; - if EX1_BYPASS and tag_match(execute2_next_tag, tag_c) then + tag_c := execute_next_tag; + elsif EX1_BYPASS and execute2_next_tag.valid = '1' and rmc(execute2_next_tag.tag) = '1' then byp_c(2) := '1'; - end if; - if tag_match(complete_in, tag_c) then + tag_c := execute2_next_tag; + elsif writeback_tag.valid = '1' and rmc(writeback_tag.tag) = '1' then byp_c(3) := '1'; + tag_c := writeback_tag; end if; byp_c(0) := gpr_c_read_valid_in and (byp_c(1) or byp_c(2) or byp_c(3)); + if tag_c.valid = '1' and tag_regs(tag_c.tag).valid = '1' and + tag_regs(tag_c.tag).recent = '1' then + tag_c_stall := '0'; + end if; gpr_bypass_a <= byp_a; gpr_bypass_b <= byp_b; gpr_bypass_c <= byp_c; - gpr_tag_stall <= (tag_a.valid and gpr_a_read_valid_in and not byp_a(0)) or - (tag_b.valid and gpr_b_read_valid_in and not byp_b(0)) or - (tag_c.valid and gpr_c_read_valid_in and not byp_c(0)); + gpr_tag_stall <= tag_a_stall or tag_b_stall or tag_c_stall; incr_tag := curr_tag; instr_tag.tag <= curr_tag; diff --git a/decode2.vhdl b/decode2.vhdl index 0290840..eb701e3 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -283,6 +283,7 @@ begin execute_next_cr_tag => execute_cr_bypass.tag, execute2_next_tag => execute2_bypass.tag, execute2_next_cr_tag => execute2_cr_bypass.tag, + writeback_tag => writeback_bypass.tag, cr_read_in => cr_read_valid, cr_write_in => cr_write_valid,