From fdb3ef6874fb34e67e8d6f136440378c706069e9 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 19 Feb 2022 19:03:49 +1100 Subject: [PATCH] Finish off taking SPRs out of register file With this, the register file now contains 64 entries, for 32 GPRs and 32 FPRs, rather than the 128 it had previously. Several things get simplified - decode1 no longer has to work out the ispr{1,2,o} values, decode_input_reg_{a,b,c} no longer have the t = SPR case, etc. Signed-off-by: Paul Mackerras --- common.vhdl | 45 ++++++------------------------------------- decode1.vhdl | 10 +++------- decode2.vhdl | 48 ++++++++++------------------------------------ decode_types.vhdl | 6 +++--- execute1.vhdl | 5 ++--- loadstore1.vhdl | 2 +- logical.vhdl | 2 +- register_file.vhdl | 26 +++++++++++-------------- 8 files changed, 37 insertions(+), 107 deletions(-) diff --git a/common.vhdl b/common.vhdl index 7df451b..06b62e0 100644 --- a/common.vhdl +++ b/common.vhdl @@ -86,30 +86,19 @@ package common is -- GPR indices in the register file (GPR only) subtype gpr_index_t is std_ulogic_vector(4 downto 0); - -- Extended GPR index (can hold an SPR or a FPR) - subtype gspr_index_t is std_ulogic_vector(6 downto 0); + -- Extended GPR index (can hold a GPR or a FPR) + subtype gspr_index_t is std_ulogic_vector(5 downto 0); -- FPR indices subtype fpr_index_t is std_ulogic_vector(4 downto 0); - -- Some SPRs are stored in the register file, they use the magic - -- GPR numbers above 31. + -- FPRs are stored in the register file, using GSPR + -- numbers from 32 to 63. -- - -- The function fast_spr_num() returns the corresponding fast - -- pseudo-GPR number for a given SPR number. The result MSB - -- indicates if this is indeed a fast SPR. If clear, then - -- the SPR is not stored in the GPR file. - -- - -- FPRs are also stored in the register file, using GSPR - -- numbers from 64 to 95. - -- - function fast_spr_num(spr: spr_num_t) return gspr_index_t; -- Indices conversion functions function gspr_to_gpr(i: gspr_index_t) return gpr_index_t; function gpr_to_gspr(i: gpr_index_t) return gspr_index_t; - function gpr_or_spr_to_gspr(g: gpr_index_t; s: gspr_index_t) return gspr_index_t; - function is_fast_spr(s: gspr_index_t) return std_ulogic; function fpr_to_gspr(f: fpr_index_t) return gspr_index_t; -- The XER is split: the common bits (CA, OV, SO, OV32 and CA32) are @@ -271,9 +260,6 @@ package common is stop_mark : std_ulogic; nia: std_ulogic_vector(63 downto 0); insn: std_ulogic_vector(31 downto 0); - ispr1: gspr_index_t; -- (G)SPR used for branch condition (CTR) or mfspr - ispr2: gspr_index_t; -- (G)SPR used for branch target (CTR, LR, TAR) - ispro: gspr_index_t; -- (G)SPR written with LR or CTR decode: decode_rom_t; br_pred: std_ulogic; -- Branch was predicted to be taken big_endian: std_ulogic; @@ -282,7 +268,6 @@ package common is end record; constant Decode1ToDecode2Init : Decode1ToDecode2Type := (valid => '0', stop_mark => '0', nia => (others => '0'), insn => (others => '0'), - ispr1 => (others => '0'), ispr2 => (others => '0'), ispro => (others => '0'), decode => decode_rom_init, br_pred => '0', big_endian => '0', spr_info => spr_id_init, ram_spr => ram_spr_info_init); @@ -783,10 +768,6 @@ package body common is begin return to_integer(unsigned(insn(15 downto 11) & insn(20 downto 16))); end; - function fast_spr_num(spr: spr_num_t) return gspr_index_t is - begin - return "0000000"; - end; function gspr_to_gpr(i: gspr_index_t) return gpr_index_t is begin @@ -795,26 +776,12 @@ package body common is function gpr_to_gspr(i: gpr_index_t) return gspr_index_t is begin - return "00" & i; - end; - - function gpr_or_spr_to_gspr(g: gpr_index_t; s: gspr_index_t) return gspr_index_t is - begin - if s(5) = '1' then - return s; - else - return gpr_to_gspr(g); - end if; - end; - - function is_fast_spr(s: gspr_index_t) return std_ulogic is - begin - return s(5); + return "0" & i; end; function fpr_to_gspr(f: fpr_index_t) return gspr_index_t is begin - return "10" & f; + return "1" & f; end; function tag_match(tag1 : instr_tag_t; tag2 : instr_tag_t) return boolean is diff --git a/decode1.vhdl b/decode1.vhdl index b6cea31..af8cd6c 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -177,7 +177,7 @@ architecture behaviour of decode1 is -- addpcis 2#001# => (ALU, NONE, OP_ADD, CIA, CONST_DXHI4, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- bclr, bcctr, bctar - 2#100# => (ALU, NONE, OP_BCREG, NONE, NONE, NONE, SPR, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE), + 2#100# => (ALU, NONE, OP_BCREG, NONE, NONE, NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE), -- isync 2#111# => (ALU, NONE, OP_ISYNC, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- rfid @@ -329,7 +329,7 @@ architecture behaviour of decode1 is 2#1001000000# => (ALU, NONE, OP_MCRXRX, NONE, NONE, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- mcrxrx 2#0000010011# => (ALU, NONE, OP_MFCR, NONE, NONE, NONE, RT, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- mfcr/mfocrf 2#0001010011# => (ALU, NONE, OP_MFMSR, NONE, NONE, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE), -- mfmsr - 2#0101010011# => (ALU, NONE, OP_MFSPR, SPR, NONE, RS, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- mfspr + 2#0101010011# => (ALU, NONE, OP_MFSPR, NONE, NONE, RS, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- mfspr 2#0100001001# => (DVU, NONE, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- modud 2#0100001011# => (DVU, NONE, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE), -- moduw 2#1100001001# => (DVU, NONE, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE), -- modsd @@ -337,7 +337,7 @@ architecture behaviour of decode1 is 2#0010010000# => (ALU, NONE, OP_MTCRF, NONE, NONE, RS, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- mtcrf/mtocrf 2#0010010010# => (ALU, NONE, OP_MTMSRD, NONE, NONE, RS, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE), -- mtmsr 2#0010110010# => (ALU, NONE, OP_MTMSRD, NONE, NONE, RS, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- mtmsrd # ignore top bits and d - 2#0111010011# => (ALU, NONE, OP_MTSPR, NONE, NONE, RS, SPR, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- mtspr + 2#0111010011# => (ALU, NONE, OP_MTSPR, NONE, NONE, RS, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- mtspr 2#0001001001# => (ALU, NONE, OP_MUL_H64, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0', NONE), -- mulhd 2#0000001001# => (ALU, NONE, OP_MUL_H64, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), -- mulhdu 2#0001001011# => (ALU, NONE, OP_MUL_H32, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0', NONE), -- mulhw @@ -670,10 +670,6 @@ begin -- major opcode 31, lots of things v.decode := decode_op_31_array(to_integer(unsigned(f_in.insn(10 downto 1)))); - -- Work out ispr1/ispro independent of v.decode since they seem to be critical path - v.ispr1 := fast_spr_num(sprn); - v.ispro := fast_spr_num(sprn); - if std_match(f_in.insn(10 downto 1), "01-1010011") then -- mfspr or mtspr -- Make mtspr to slow SPRs single issue diff --git a/decode2.vhdl b/decode2.vhdl index 928ec94..5a8c2b7 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -82,21 +82,11 @@ architecture behaviour of decode2 is constant decode_output_reg_init : decode_output_reg_t := ('0', (others => '0')); function decode_input_reg_a (t : input_reg_a_t; insn_in : std_ulogic_vector(31 downto 0); - ispr : gspr_index_t; instr_addr : std_ulogic_vector(63 downto 0)) return decode_input_reg_t is begin if t = RA or (t = RA_OR_ZERO and insn_ra(insn_in) /= "00000") then return ('1', gpr_to_gspr(insn_ra(insn_in)), (others => '0')); - elsif t = SPR then - -- ISPR must be either a valid fast SPR number or all 0 for a slow SPR. - -- If it's all 0, we don't treat it as a dependency as slow SPRs - -- operations are single issue. - -- - assert is_fast_spr(ispr) = '1' or ispr = "0000000" - report "Decode A says SPR but ISPR is invalid:" & - to_hstring(ispr) severity failure; - return (is_fast_spr(ispr), ispr, (others => '0')); elsif t = CIA then return ('0', (others => '0'), instr_addr); elsif HAS_FPU and t = FRA then @@ -106,8 +96,8 @@ architecture behaviour of decode2 is end if; end; - function decode_input_reg_b (t : input_reg_b_t; insn_in : std_ulogic_vector(31 downto 0); - ispr : gspr_index_t) return decode_input_reg_t is + function decode_input_reg_b (t : input_reg_b_t; insn_in : std_ulogic_vector(31 downto 0)) + return decode_input_reg_t is variable ret : decode_input_reg_t; begin case t is @@ -143,14 +133,6 @@ architecture behaviour of decode2 is ret := ('0', (others => '0'), x"00000000000000" & "00" & insn_in(1) & insn_in(15 downto 11)); when CONST_SH32 => ret := ('0', (others => '0'), x"00000000000000" & "000" & insn_in(15 downto 11)); - when SPR => - -- ISPR must be either a valid fast SPR number or all 0 for a slow SPR. - -- If it's all 0, we don't treat it as a dependency as slow SPRs - -- operations are single issue. - assert is_fast_spr(ispr) = '1' or ispr = "0000000" - report "Decode B says SPR but ISPR is invalid:" & - to_hstring(ispr) severity failure; - ret := (is_fast_spr(ispr), ispr, (others => '0')); when NONE => ret := ('0', (others => '0'), (others => '0')); end case; @@ -183,8 +165,8 @@ architecture behaviour of decode2 is end case; end; - function decode_output_reg (t : output_reg_a_t; insn_in : std_ulogic_vector(31 downto 0); - ispr : gspr_index_t) return decode_output_reg_t is + function decode_output_reg (t : output_reg_a_t; insn_in : std_ulogic_vector(31 downto 0)) + return decode_output_reg_t is begin case t is when RT => @@ -195,18 +177,10 @@ architecture behaviour of decode2 is if HAS_FPU then return ('1', fpr_to_gspr(insn_frt(insn_in))); else - return ('0', "0000000"); + return ('0', "000000"); end if; - when SPR => - -- ISPR must be either a valid fast SPR number or all 0 for a slow SPR. - -- If it's all 0, we don't treat it as a dependency as slow SPRs - -- operations are single issue. - assert is_fast_spr(ispr) = '1' or ispr = "0000000" - report "Decode B says SPR but ISPR is invalid:" & - to_hstring(ispr) severity failure; - return (is_fast_spr(ispr), ispr); when NONE => - return ('0', "0000000"); + return ('0', "000000"); end case; end; @@ -386,10 +360,10 @@ begin decoded_reg_c <= decode_input_reg_init; decoded_reg_o <= decode_output_reg_init; if d_in.valid = '1' then - decoded_reg_a <= decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, d_in.ispr1, d_in.nia); - decoded_reg_b <= decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn, d_in.ispr2); + decoded_reg_a <= decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, d_in.nia); + decoded_reg_b <= decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn); decoded_reg_c <= decode_input_reg_c (d_in.decode.input_reg_c, d_in.insn); - decoded_reg_o <= decode_output_reg (d_in.decode.output_reg_a, d_in.insn, d_in.ispro); + decoded_reg_o <= decode_output_reg (d_in.decode.output_reg_a, d_in.insn); end if; r_out.read1_enable <= decoded_reg_a.reg_valid; @@ -565,9 +539,7 @@ begin v.e.result_sel := result_select(op); v.e.sub_select := subresult_select(op); if op = OP_MFSPR then - if is_fast_spr(d_in.ispr1) = '1' then - v.e.result_sel := "000"; -- adder_result, effectively a_in - elsif d_in.ram_spr.valid = '1' then + if d_in.ram_spr.valid = '1' then v.e.result_sel := "101"; -- ramspr_result elsif d_in.spr_info.valid = '0' then -- Privileged mfspr to invalid/unimplemented SPR numbers diff --git a/decode_types.vhdl b/decode_types.vhdl index 514bc08..9ee329d 100644 --- a/decode_types.vhdl +++ b/decode_types.vhdl @@ -22,11 +22,11 @@ package decode_types is OP_BCD, OP_ADDG6S, OP_FETCH_FAILED ); - type input_reg_a_t is (NONE, RA, RA_OR_ZERO, SPR, CIA, FRA); + type input_reg_a_t is (NONE, RA, RA_OR_ZERO, CIA, FRA); type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD, - CONST_DXHI4, CONST_DS, CONST_DQ, CONST_M1, CONST_SH, CONST_SH32, SPR, FRB); + CONST_DXHI4, CONST_DS, CONST_DQ, CONST_M1, CONST_SH, CONST_SH32, FRB); type input_reg_c_t is (NONE, RS, RCR, FRC, FRS); - type output_reg_a_t is (NONE, RT, RA, SPR, FRT); + type output_reg_a_t is (NONE, RT, RA, FRT); type rc_t is (NONE, ONE, RC); type carry_in_t is (ZERO, CA, OV, ONE); diff --git a/execute1.vhdl b/execute1.vhdl index 5ee830b..dc68806 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -1135,7 +1135,7 @@ begin when OP_DARN => when OP_MFMSR => when OP_MFSPR => - if is_fast_spr(e_in.read_reg1) = '1' or e_in.spr_is_ram = '1' then + if e_in.spr_is_ram = '1' then if e_in.valid = '1' then report "MFSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) & "=" & to_hstring(alu_result); @@ -1216,8 +1216,7 @@ begin when others => end case; end if; - if e_in.spr_select.valid = '0' and is_fast_spr(e_in.write_reg) = '0' and - e_in.spr_is_ram = '0' then + if e_in.spr_select.valid = '0' and e_in.spr_is_ram = '0' then -- mtspr to unimplemented SPRs should be a nop in -- supervisor mode and a program interrupt for user mode if ex1.msr(MSR_PR) = '1' then diff --git a/loadstore1.vhdl b/loadstore1.vhdl index b556211..9dab15b 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -97,7 +97,7 @@ architecture behave of loadstore1 is mode_32bit => '0', addr => (others => '0'), byte_sel => x"00", second_bytes => x"00", store_data => (others => '0'), instr_tag => instr_tag_init, - write_reg => 7x"00", length => x"0", + write_reg => 6x"00", length => x"0", elt_length => x"0", byte_reverse => '0', brev_mask => "000", sign_extend => '0', update => '0', xerc => xerc_init, reserve => '0', diff --git a/logical.vhdl b/logical.vhdl index 60309ac..77ef29c 100644 --- a/logical.vhdl +++ b/logical.vhdl @@ -167,7 +167,7 @@ begin end if; tmp(7 downto 0) := rs(7 downto 0); when others => - -- e.g. OP_MTSPR + -- e.g. OP_MFSPR tmp := rs; end case; diff --git a/register_file.vhdl b/register_file.vhdl index ed856cb..dcce0a4 100644 --- a/register_file.vhdl +++ b/register_file.vhdl @@ -34,7 +34,7 @@ entity register_file is end entity register_file; architecture behaviour of register_file is - type regfile is array(0 to 127) of std_ulogic_vector(63 downto 0); + type regfile is array(0 to 63) of std_ulogic_vector(63 downto 0); signal registers : regfile := (others => (others => '0')); signal rd_port_b : std_ulogic_vector(63 downto 0); signal dbg_data : std_ulogic_vector(63 downto 0); @@ -47,15 +47,11 @@ begin if rising_edge(clk) then if w_in.write_enable = '1' then w_addr := w_in.write_reg; - if HAS_FPU and w_addr(6) = '1' then + if HAS_FPU and w_addr(5) = '1' then report "Writing FPR " & to_hstring(w_addr(4 downto 0)) & " " & to_hstring(w_in.write_data); else - w_addr(6) := '0'; - if w_addr(5) = '0' then - report "Writing GPR " & to_hstring(w_addr) & " " & to_hstring(w_in.write_data); - else - report "Writing GSPR " & to_hstring(w_addr) & " " & to_hstring(w_in.write_data); - end if; + w_addr(5) := '0'; + report "Writing GPR " & to_hstring(w_addr) & " " & to_hstring(w_in.write_data); end if; assert not(is_x(w_in.write_data)) and not(is_x(w_in.write_reg)) severity failure; registers(to_integer(unsigned(w_addr))) <= w_in.write_data; @@ -73,11 +69,11 @@ begin c_addr := d_in.read3_reg; w_addr := w_in.write_reg; if not HAS_FPU then - -- Make it obvious that we only want 64 GSPRs for a no-FPU implementation - a_addr(6) := '0'; - b_addr(6) := '0'; - c_addr(6) := '0'; - w_addr(6) := '0'; + -- Make it obvious that we only want 32 GSPRs for a no-FPU implementation + a_addr(5) := '0'; + b_addr(5) := '0'; + c_addr(5) := '0'; + w_addr(5) := '0'; end if; if d_in.read1_enable = '1' then report "Reading GPR " & to_hstring(a_addr) & " " & to_hstring(registers(to_integer(unsigned(a_addr)))); @@ -93,7 +89,7 @@ begin if d_in.read2_enable = '0' and dbg_gpr_req = '1' and dbg_ack = '0' then b_addr := dbg_gpr_addr; if not HAS_FPU then - b_addr(6) := '0'; + b_addr(5) := '0'; end if; end if; rd_port_b <= registers(to_integer(unsigned(b_addr))); @@ -150,7 +146,7 @@ begin if rising_edge(clk) then log_data <= w_in.write_data & w_in.write_enable & - w_in.write_reg; + '0' & w_in.write_reg; end if; end process; log_out <= log_data;