From 9fbaea6f0819bec3f3f7697469c157c80c8332e4 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 9 Sep 2019 09:32:08 +1000 Subject: [PATCH] Rework CR file and add forwarding Handle the CR as a single field with per nibble enables. Forward any writes in the same cycle. If this proves to be an issue for timing, we may want to revisit this in the future. For now, it keeps things simple. Signed-off-by: Anton Blanchard --- common.vhdl | 5 +---- cr_file.vhdl | 55 +++++++++++++++++++++++--------------------------- decode1.vhdl | 18 ++++++++--------- decode2.vhdl | 2 ++ writeback.vhdl | 2 -- 5 files changed, 37 insertions(+), 45 deletions(-) diff --git a/common.vhdl b/common.vhdl index 7455bf3..06ef571 100644 --- a/common.vhdl +++ b/common.vhdl @@ -79,14 +79,11 @@ package common is end record; type Decode2ToCrFileType is record - read_cr_nr_1 : integer; - read_cr_nr_2 : integer; + read : std_ulogic; end record; type CrFileToDecode2Type is record read_cr_data : std_ulogic_vector(31 downto 0); - read_cr_data_1 : std_ulogic_vector(3 downto 0); - read_cr_data_2 : std_ulogic_vector(3 downto 0); end record; type Execute1ToFetch1Type is record diff --git a/cr_file.vhdl b/cr_file.vhdl index 31c66e2..bf1378e 100644 --- a/cr_file.vhdl +++ b/cr_file.vhdl @@ -18,48 +18,43 @@ end entity cr_file; architecture behaviour of cr_file is signal crs : std_ulogic_vector(31 downto 0) := (others => '0'); + signal crs_updated : std_ulogic_vector(31 downto 0) := (others => '0'); begin + cr_create_0: process(all) + variable hi, lo : integer := 0; + begin + for i in 0 to 7 loop + if w_in.write_cr_mask(i) = '1' then + lo := i*4; + hi := lo + 3; + crs_updated(hi downto lo) <= w_in.write_cr_data(hi downto lo); + end if; + end loop; + end process; + -- synchronous writes cr_write_0: process(clk) - variable hi, lo : integer := 0; begin if rising_edge(clk) then if w_in.write_cr_enable = '1' then report "Writing " & to_hstring(w_in.write_cr_data) & " to CR mask " & to_hstring(w_in.write_cr_mask); - - for i in 0 to 7 loop - if w_in.write_cr_mask(i) = '1' then - lo := i*4; - hi := lo + 3; - crs(hi downto lo) <= w_in.write_cr_data(hi downto lo); - end if; - end loop; + crs <= crs_updated; end if; end if; - end process cr_write_0; + end process; -- asynchronous reads cr_read_0: process(all) variable hi, lo : integer := 0; begin - --lo := (7-d_in.read_cr_nr_1)*4; - --hi := lo + 3; - - --report "read " & integer'image(d_in.read_cr_nr_1) & " from CR " & to_hstring(crs(hi downto lo)); - --d_out.read_cr_data_1 <= crs(hi downto lo); - - -- Also return the entire CR to make mfcrf easier for now - report "read CR " & to_hstring(crs); - d_out.read_cr_data <= crs; - --- -- Forward any written data --- if w_in.write_cr_enable = '1' then --- if d_in.read_cr_nr_1 = w_in.write_cr_nr then --- d_out.read_cr_data_1 <= w_in.write_cr_data; --- end if; --- if d_in.read_cr_nr_2 = w_in.write_cr_nr then --- d_out.read_cr_data_2 <= w_in.write_cr_data; --- end if; --- end if; - end process cr_read_0; + -- just return the entire CR to make mfcrf easier for now + if d_in.read = '1' then + report "Reading CR " & to_hstring(crs_updated); + end if; + if w_in.write_cr_enable then + d_out.read_cr_data <= crs_updated; + else + d_out.read_cr_data <= crs; + end if; + end process; end architecture behaviour; diff --git a/decode1.vhdl b/decode1.vhdl index b3c5a0c..394489f 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -43,19 +43,19 @@ architecture behaviour of decode1 is PPC_ATTN => (ALU, OP_ILLEGAL, NONE, NONE, NONE, NONE, NONE, NONE, NONE, '0', '0', '0', '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0'), PPC_B => (ALU, OP_B, NONE, CONST_LI, NONE, NONE, NONE, NONE, NONE, '0', '0', '0', '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1'), --PPC_BA - PPC_BC => (ALU, OP_BC, NONE, CONST_BD, NONE, NONE, BO, BI, NONE, '0', '0', '0', '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1'), + PPC_BC => (ALU, OP_BC, NONE, CONST_BD, NONE, NONE, BO, BI, NONE, '1', '0', '0', '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1'), --PPC_BCA - PPC_BCCTR => (ALU, OP_BCCTR, NONE, NONE, NONE, NONE, BO, BI, BH, '0', '0', '0', '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1'), + PPC_BCCTR => (ALU, OP_BCCTR, NONE, NONE, NONE, NONE, BO, BI, BH, '1', '0', '0', '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1'), --PPC_BCLA - PPC_BCLR => (ALU, OP_BCLR, NONE, NONE, NONE, NONE, BO, BI, BH, '0', '0', '0', '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1'), + PPC_BCLR => (ALU, OP_BCLR, NONE, NONE, NONE, NONE, BO, BI, BH, '1', '0', '0', '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1'), --PPC_BCTAR --PPC_BPERM - PPC_CMP => (ALU, OP_CMP, RA, RB, NONE, NONE, BF, L, NONE, '0', '0', '0', '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0'), - PPC_CMPB => (ALU, OP_CMPB, RS, RB, NONE, RA, NONE, NONE, NONE, '0', '0', '0', '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0'), + PPC_CMP => (ALU, OP_CMP, RA, RB, NONE, NONE, BF, L, NONE, '0', '1', '0', '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0'), + PPC_CMPB => (ALU, OP_CMPB, RS, RB, NONE, RA, NONE, NONE, NONE, '0', '1', '0', '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0'), --PPC_CMPEQB - PPC_CMPI => (ALU, OP_CMP, RA, CONST_SI, NONE, NONE, BF, L, NONE, '0', '0', '0', '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0'), - PPC_CMPL => (ALU, OP_CMPL, RA, RB, NONE, NONE, BF, L, NONE, '0', '0', '0', '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0'), - PPC_CMPLI => (ALU, OP_CMPL, RA, CONST_UI, NONE, NONE, BF, L, NONE, '0', '0', '0', '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0'), + PPC_CMPI => (ALU, OP_CMP, RA, CONST_SI, NONE, NONE, BF, L, NONE, '0', '1', '0', '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0'), + PPC_CMPL => (ALU, OP_CMPL, RA, RB, NONE, NONE, BF, L, NONE, '0', '1', '0', '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0'), + PPC_CMPLI => (ALU, OP_CMPL, RA, CONST_UI, NONE, NONE, BF, L, NONE, '0', '1', '0', '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0'), --PPC_CMPRB PPC_CNTLZD => (ALU, OP_CNTLZD, RS, NONE, NONE, RA, NONE, NONE, NONE, '0', '0', '0', '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0'), PPC_CNTLZW => (ALU, OP_CNTLZW, RS, NONE, NONE, RA, NONE, NONE, NONE, '0', '0', '0', '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0'), @@ -90,7 +90,7 @@ architecture behaviour of decode1 is --PPC_EXTSWSLI --PPC_ICBI PPC_ICBT => (ALU, OP_NOP, NONE, NONE, NONE, NONE, NONE, NONE, NONE, '0', '0', '0', '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0'), - PPC_ISEL => (ALU, OP_ISEL, RA_OR_ZERO, RB, NONE, RT, BC, NONE, NONE, '0', '0', '0', '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0'), + PPC_ISEL => (ALU, OP_ISEL, RA_OR_ZERO, RB, NONE, RT, BC, NONE, NONE, '1', '0', '0', '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0'), PPC_ISYNC => (ALU, OP_NOP, NONE, NONE, NONE, NONE, NONE, NONE, NONE, '0', '0', '0', '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0'), PPC_LBARX => (LDST, OP_LOAD, RA, RB, NONE, RT, NONE, NONE, NONE, '0', '0', '0', '0', is1B, '0', '0', '0', '1', '0', '0', NONE, '0'), --CONST_LI matches CONST_SI, so reuse it diff --git a/decode2.vhdl b/decode2.vhdl index bb25230..5b76269 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -184,6 +184,8 @@ begin r_out.read3_reg <= insn_rs(d.insn) when d.decode.input_reg_c = RS else (others => '0'); + c_out.read <= d.decode.input_cr; + decode2_1: process(all) variable mul_a : std_ulogic_vector(63 downto 0); variable mul_b : std_ulogic_vector(63 downto 0); diff --git a/writeback.vhdl b/writeback.vhdl index a14d3e0..03174f7 100644 --- a/writeback.vhdl +++ b/writeback.vhdl @@ -56,7 +56,6 @@ begin end if; if e.write_cr_enable = '1' then - report "Writing CR "; c_tmp.write_cr_enable <= '1'; c_tmp.write_cr_mask <= e.write_cr_mask; c_tmp.write_cr_data <= e.write_cr_data; @@ -81,7 +80,6 @@ begin w_tmp.write_data <= m.write_reg_data; end if; if m.write_cr_enable = '1' then - report "Writing CR "; c_tmp.write_cr_enable <= '1'; c_tmp.write_cr_mask <= m.write_cr_mask; c_tmp.write_cr_data <= m.write_cr_data;