register_file: Make read access to register file synchronous

With this, the register RAM is read synchronously using the addresses
supplied by decode1.  That means the register RAM can now be block RAM
rather than LUT RAM.

Debug accesses are done via the B port on cycles when decode1
indicates that there is no valid instruction or the instruction
doesn't use a [F]RB operand.

We latch the addresses being read in each cycle and use the same
address next cycle if stalled.  Data that is being written is latched
and a multiplexer on each read port then supplies the latched write
data if the read address for that port equals the write address.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/379/head
Paul Mackerras 2 years ago
parent 06c13d4988
commit 1d7de2f1da

@ -280,6 +280,9 @@ package common is
reg_1_addr : gspr_index_t; reg_1_addr : gspr_index_t;
reg_2_addr : gspr_index_t; reg_2_addr : gspr_index_t;
reg_3_addr : gspr_index_t; reg_3_addr : gspr_index_t;
read_1_enable : std_ulogic;
read_2_enable : std_ulogic;
read_3_enable : std_ulogic;
end record; end record;


type bypass_data_t is record type bypass_data_t is record

@ -641,6 +641,7 @@ begin
variable bv : br_predictor_t; variable bv : br_predictor_t;
variable fprs, fprabc : std_ulogic; variable fprs, fprabc : std_ulogic;
variable in3rc : std_ulogic; variable in3rc : std_ulogic;
variable may_read_rb : std_ulogic;
begin begin
v := Decode1ToDecode2Init; v := Decode1ToDecode2Init;
vi := reg_internal_t_init; vi := reg_internal_t_init;
@ -654,6 +655,7 @@ begin
fprs := '0'; fprs := '0';
fprabc := '0'; fprabc := '0';
in3rc := '0'; in3rc := '0';
may_read_rb := '0';


if f_in.valid = '1' then if f_in.valid = '1' then
report "Decode insn " & to_hstring(f_in.insn) & " at " & to_hstring(f_in.nia); report "Decode insn " & to_hstring(f_in.insn) & " at " & to_hstring(f_in.nia);
@ -675,10 +677,16 @@ begin
vi.override := not decode_op_4_valid(to_integer(unsigned(minor4op))); vi.override := not decode_op_4_valid(to_integer(unsigned(minor4op)));
v.decode := decode_op_4_array(to_integer(unsigned(f_in.insn(5 downto 0)))); v.decode := decode_op_4_array(to_integer(unsigned(f_in.insn(5 downto 0))));
in3rc := '1'; in3rc := '1';
may_read_rb := '1';

when 23 =>
-- rlwnm[.]
may_read_rb := '1';


when 31 => when 31 =>
-- major opcode 31, lots of things -- major opcode 31, lots of things
v.decode := decode_op_31_array(to_integer(unsigned(f_in.insn(10 downto 1)))); v.decode := decode_op_31_array(to_integer(unsigned(f_in.insn(10 downto 1))));
may_read_rb := '1';


if std_match(f_in.insn(10 downto 1), "01-1010011") then if std_match(f_in.insn(10 downto 1), "01-1010011") then
-- mfspr or mtspr -- mfspr or mtspr
@ -728,6 +736,7 @@ begin


when 30 => when 30 =>
v.decode := decode_op_30_array(to_integer(unsigned(f_in.insn(4 downto 1)))); v.decode := decode_op_30_array(to_integer(unsigned(f_in.insn(4 downto 1))));
may_read_rb := f_in.insn(4);


when 52 | 53 | 54 | 55 => when 52 | 53 | 54 | 55 =>
-- stfd[u] and stfs[u] -- stfd[u] and stfs[u]
@ -748,6 +757,7 @@ begin
in3rc := '1'; in3rc := '1';
fprabc := '1'; fprabc := '1';
fprs := '1'; fprs := '1';
may_read_rb := '1';
end if; end if;


when 62 => when 62 =>
@ -764,6 +774,7 @@ begin
in3rc := '1'; in3rc := '1';
fprabc := '1'; fprabc := '1';
fprs := '1'; fprs := '1';
may_read_rb := '1';
end if; end if;


when others => when others =>
@ -777,6 +788,9 @@ begin
else else
vr.reg_3_addr := fprs & insn_rs(f_in.insn); vr.reg_3_addr := fprs & insn_rs(f_in.insn);
end if; end if;
vr.read_1_enable := f_in.valid and not f_in.fetch_failed;
vr.read_2_enable := f_in.valid and not f_in.fetch_failed and may_read_rb;
vr.read_3_enable := f_in.valid and not f_in.fetch_failed;


if f_in.fetch_failed = '1' then if f_in.fetch_failed = '1' then
v.valid := '1'; v.valid := '1';

@ -38,17 +38,27 @@ end entity register_file;
architecture behaviour of register_file is architecture behaviour of register_file is
type regfile is array(0 to 63) of std_ulogic_vector(63 downto 0); type regfile is array(0 to 63) of std_ulogic_vector(63 downto 0);
signal registers : regfile := (others => (others => '0')); signal registers : regfile := (others => (others => '0'));
signal rd_port_b : std_ulogic_vector(63 downto 0);
signal dbg_data : std_ulogic_vector(63 downto 0); signal dbg_data : std_ulogic_vector(63 downto 0);
signal dbg_ack : std_ulogic; signal dbg_ack : std_ulogic;
signal dbg_gpr_done : std_ulogic;
signal addr_1_reg : gspr_index_t; signal addr_1_reg : gspr_index_t;
signal addr_2_reg : gspr_index_t; signal addr_2_reg : gspr_index_t;
signal addr_3_reg : gspr_index_t; signal addr_3_reg : gspr_index_t;
signal rd_2 : std_ulogic;
signal fwd_1 : std_ulogic;
signal fwd_2 : std_ulogic;
signal fwd_3 : std_ulogic;
signal data_1 : std_ulogic_vector(63 downto 0);
signal data_2 : std_ulogic_vector(63 downto 0);
signal data_3 : std_ulogic_vector(63 downto 0);
signal prev_write_data : std_ulogic_vector(63 downto 0);

begin begin
-- synchronous writes -- synchronous reads and writes
register_write_0: process(clk) register_write_0: process(clk)
variable a_addr, b_addr, c_addr : gspr_index_t; variable a_addr, b_addr, c_addr : gspr_index_t;
variable w_addr : gspr_index_t; variable w_addr : gspr_index_t;
variable b_enable : std_ulogic;
begin begin
if rising_edge(clk) then if rising_edge(clk) then
if w_in.write_enable = '1' then if w_in.write_enable = '1' then
@ -66,57 +76,94 @@ begin
a_addr := d1_in.reg_1_addr; a_addr := d1_in.reg_1_addr;
b_addr := d1_in.reg_2_addr; b_addr := d1_in.reg_2_addr;
c_addr := d1_in.reg_3_addr; c_addr := d1_in.reg_3_addr;

b_enable := d1_in.read_2_enable;
if stall = '0' then if stall = '1' then
a_addr := addr_1_reg;
b_addr := addr_2_reg;
c_addr := addr_3_reg;
b_enable := rd_2;
else
addr_1_reg <= a_addr; addr_1_reg <= a_addr;
addr_2_reg <= b_addr; addr_2_reg <= b_addr;
addr_3_reg <= c_addr; addr_3_reg <= c_addr;
rd_2 <= b_enable;
end if; end if;

fwd_1 <= '0';
fwd_2 <= '0';
fwd_3 <= '0';
if w_in.write_enable = '1' then
if w_addr = a_addr then
fwd_1 <= '1';
end if;
if w_addr = b_addr then
fwd_2 <= '1';
end if;
if w_addr = c_addr then
fwd_3 <= '1';
end if;
end if;

-- Do debug reads to GPRs and FPRs using the B port when it is not in use
if dbg_gpr_req = '1' then
if b_enable = '0' then
b_addr := dbg_gpr_addr(5 downto 0);
dbg_gpr_done <= '1';
end if;
else
dbg_gpr_done <= '0';
end if;

if not HAS_FPU then
-- Make it obvious that we only want 32 GSPRs for a no-FPU implementation
a_addr(5) := '0';
b_addr(5) := '0';
c_addr(5) := '0';
end if;
data_1 <= registers(to_integer(unsigned(a_addr)));
data_2 <= registers(to_integer(unsigned(b_addr)));
data_3 <= registers(to_integer(unsigned(c_addr)));

prev_write_data <= w_in.write_data;

assert (d_in.read1_enable = '0') or (d_in.read1_reg = addr_1_reg) severity failure; assert (d_in.read1_enable = '0') or (d_in.read1_reg = addr_1_reg) severity failure;
assert (d_in.read2_enable = '0') or (d_in.read2_reg = addr_2_reg) severity failure; assert (d_in.read2_enable = '0') or (d_in.read2_reg = addr_2_reg) severity failure;
assert (d_in.read3_enable = '0') or (d_in.read3_reg = addr_3_reg) severity failure; assert (d_in.read3_enable = '0') or (d_in.read3_reg = addr_3_reg) severity failure;
end if; end if;
end process register_write_0; end process register_write_0;


-- asynchronous reads -- asynchronous forwarding of write data
register_read_0: process(all) register_read_0: process(all)
variable a_addr, b_addr, c_addr : gspr_index_t; variable out_data_1 : std_ulogic_vector(63 downto 0);
variable w_addr : gspr_index_t; variable out_data_2 : std_ulogic_vector(63 downto 0);
variable out_data_3 : std_ulogic_vector(63 downto 0);
begin begin
a_addr := d_in.read1_reg; out_data_1 := data_1;
b_addr := d_in.read2_reg; out_data_2 := data_2;
c_addr := d_in.read3_reg; out_data_3 := data_3;
w_addr := w_in.write_reg; if fwd_1 = '1' then
if not HAS_FPU then out_data_1 := prev_write_data;
-- Make it obvious that we only want 32 GSPRs for a no-FPU implementation
a_addr(5) := '0';
b_addr(5) := '0';
c_addr(5) := '0';
w_addr(5) := '0';
end if; end if;
if fwd_2 = '1' then
out_data_2 := prev_write_data;
end if;
if fwd_3 = '1' then
out_data_3 := prev_write_data;
end if;

if d_in.read1_enable = '1' then if d_in.read1_enable = '1' then
report "Reading GPR " & to_hstring(a_addr) & " " & to_hstring(registers(to_integer(unsigned(a_addr)))); report "Reading GPR " & to_hstring(addr_1_reg) & " " & to_hstring(out_data_1);
end if; end if;
if d_in.read2_enable = '1' then if d_in.read2_enable = '1' then
report "Reading GPR " & to_hstring(b_addr) & " " & to_hstring(registers(to_integer(unsigned(b_addr)))); report "Reading GPR " & to_hstring(addr_2_reg) & " " & to_hstring(out_data_2);
end if; end if;
if d_in.read3_enable = '1' then if d_in.read3_enable = '1' then
report "Reading GPR " & to_hstring(c_addr) & " " & to_hstring(registers(to_integer(unsigned(c_addr)))); report "Reading GPR " & to_hstring(addr_3_reg) & " " & to_hstring(out_data_3);
end if;
d_out.read1_data <= registers(to_integer(unsigned(a_addr)));
-- B read port is multiplexed with reads from the debug circuitry
if d_in.read2_enable = '0' and dbg_gpr_req = '1' and dbg_ack = '0' then
b_addr := dbg_gpr_addr;
if not HAS_FPU then
b_addr(5) := '0';
end if;
end if; end if;
rd_port_b <= registers(to_integer(unsigned(b_addr)));
d_out.read2_data <= rd_port_b;
d_out.read3_data <= registers(to_integer(unsigned(c_addr)));


-- Forwarding of written data is now done explicitly with a bypass path d_out.read1_data <= out_data_1;
-- from writeback to decode2. d_out.read2_data <= out_data_2;
d_out.read3_data <= out_data_3;
end process register_read_0; end process register_read_0;


-- Latch read data and ack if dbg read requested and B port not busy -- Latch read data and ack if dbg read requested and B port not busy
@ -124,8 +171,8 @@ begin
begin begin
if rising_edge(clk) then if rising_edge(clk) then
if dbg_gpr_req = '1' then if dbg_gpr_req = '1' then
if d_in.read2_enable = '0' and dbg_ack = '0' then if dbg_ack = '0' and dbg_gpr_done = '1' then
dbg_data <= rd_port_b; dbg_data <= data_2;
dbg_ack <= '1'; dbg_ack <= '1';
end if; end if;
else else

Loading…
Cancel
Save