register_file: Make read access to register file synchronous

With this, the register RAM is read synchronously using the addresses
supplied by decode1.  That means the register RAM can now be block RAM
rather than LUT RAM.

Debug accesses are done via the B port on cycles when decode1
indicates that there is no valid instruction or the instruction
doesn't use a [F]RB operand.

We latch the addresses being read in each cycle and use the same
address next cycle if stalled.  Data that is being written is latched
and a multiplexer on each read port then supplies the latched write
data if the read address for that port equals the write address.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/379/head
Paul Mackerras 2 years ago
parent 06c13d4988
commit 1d7de2f1da

@ -280,6 +280,9 @@ package common is
reg_1_addr : gspr_index_t;
reg_2_addr : gspr_index_t;
reg_3_addr : gspr_index_t;
read_1_enable : std_ulogic;
read_2_enable : std_ulogic;
read_3_enable : std_ulogic;
end record;

type bypass_data_t is record

@ -641,6 +641,7 @@ begin
variable bv : br_predictor_t;
variable fprs, fprabc : std_ulogic;
variable in3rc : std_ulogic;
variable may_read_rb : std_ulogic;
begin
v := Decode1ToDecode2Init;
vi := reg_internal_t_init;
@ -654,6 +655,7 @@ begin
fprs := '0';
fprabc := '0';
in3rc := '0';
may_read_rb := '0';

if f_in.valid = '1' then
report "Decode insn " & to_hstring(f_in.insn) & " at " & to_hstring(f_in.nia);
@ -675,10 +677,16 @@ begin
vi.override := not decode_op_4_valid(to_integer(unsigned(minor4op)));
v.decode := decode_op_4_array(to_integer(unsigned(f_in.insn(5 downto 0))));
in3rc := '1';
may_read_rb := '1';

when 23 =>
-- rlwnm[.]
may_read_rb := '1';

when 31 =>
-- major opcode 31, lots of things
v.decode := decode_op_31_array(to_integer(unsigned(f_in.insn(10 downto 1))));
may_read_rb := '1';

if std_match(f_in.insn(10 downto 1), "01-1010011") then
-- mfspr or mtspr
@ -728,6 +736,7 @@ begin

when 30 =>
v.decode := decode_op_30_array(to_integer(unsigned(f_in.insn(4 downto 1))));
may_read_rb := f_in.insn(4);

when 52 | 53 | 54 | 55 =>
-- stfd[u] and stfs[u]
@ -748,6 +757,7 @@ begin
in3rc := '1';
fprabc := '1';
fprs := '1';
may_read_rb := '1';
end if;

when 62 =>
@ -764,6 +774,7 @@ begin
in3rc := '1';
fprabc := '1';
fprs := '1';
may_read_rb := '1';
end if;

when others =>
@ -777,6 +788,9 @@ begin
else
vr.reg_3_addr := fprs & insn_rs(f_in.insn);
end if;
vr.read_1_enable := f_in.valid and not f_in.fetch_failed;
vr.read_2_enable := f_in.valid and not f_in.fetch_failed and may_read_rb;
vr.read_3_enable := f_in.valid and not f_in.fetch_failed;

if f_in.fetch_failed = '1' then
v.valid := '1';

@ -38,17 +38,27 @@ end entity register_file;
architecture behaviour of register_file is
type regfile is array(0 to 63) of std_ulogic_vector(63 downto 0);
signal registers : regfile := (others => (others => '0'));
signal rd_port_b : std_ulogic_vector(63 downto 0);
signal dbg_data : std_ulogic_vector(63 downto 0);
signal dbg_ack : std_ulogic;
signal dbg_gpr_done : std_ulogic;
signal addr_1_reg : gspr_index_t;
signal addr_2_reg : gspr_index_t;
signal addr_3_reg : gspr_index_t;
signal rd_2 : std_ulogic;
signal fwd_1 : std_ulogic;
signal fwd_2 : std_ulogic;
signal fwd_3 : std_ulogic;
signal data_1 : std_ulogic_vector(63 downto 0);
signal data_2 : std_ulogic_vector(63 downto 0);
signal data_3 : std_ulogic_vector(63 downto 0);
signal prev_write_data : std_ulogic_vector(63 downto 0);

begin
-- synchronous writes
-- synchronous reads and writes
register_write_0: process(clk)
variable a_addr, b_addr, c_addr : gspr_index_t;
variable w_addr : gspr_index_t;
variable b_enable : std_ulogic;
begin
if rising_edge(clk) then
if w_in.write_enable = '1' then
@ -66,57 +76,94 @@ begin
a_addr := d1_in.reg_1_addr;
b_addr := d1_in.reg_2_addr;
c_addr := d1_in.reg_3_addr;

if stall = '0' then
b_enable := d1_in.read_2_enable;
if stall = '1' then
a_addr := addr_1_reg;
b_addr := addr_2_reg;
c_addr := addr_3_reg;
b_enable := rd_2;
else
addr_1_reg <= a_addr;
addr_2_reg <= b_addr;
addr_3_reg <= c_addr;
rd_2 <= b_enable;
end if;

fwd_1 <= '0';
fwd_2 <= '0';
fwd_3 <= '0';
if w_in.write_enable = '1' then
if w_addr = a_addr then
fwd_1 <= '1';
end if;
if w_addr = b_addr then
fwd_2 <= '1';
end if;
if w_addr = c_addr then
fwd_3 <= '1';
end if;
end if;

-- Do debug reads to GPRs and FPRs using the B port when it is not in use
if dbg_gpr_req = '1' then
if b_enable = '0' then
b_addr := dbg_gpr_addr(5 downto 0);
dbg_gpr_done <= '1';
end if;
else
dbg_gpr_done <= '0';
end if;

if not HAS_FPU then
-- Make it obvious that we only want 32 GSPRs for a no-FPU implementation
a_addr(5) := '0';
b_addr(5) := '0';
c_addr(5) := '0';
end if;
data_1 <= registers(to_integer(unsigned(a_addr)));
data_2 <= registers(to_integer(unsigned(b_addr)));
data_3 <= registers(to_integer(unsigned(c_addr)));

prev_write_data <= w_in.write_data;

assert (d_in.read1_enable = '0') or (d_in.read1_reg = addr_1_reg) severity failure;
assert (d_in.read2_enable = '0') or (d_in.read2_reg = addr_2_reg) severity failure;
assert (d_in.read3_enable = '0') or (d_in.read3_reg = addr_3_reg) severity failure;
end if;
end process register_write_0;

-- asynchronous reads
-- asynchronous forwarding of write data
register_read_0: process(all)
variable a_addr, b_addr, c_addr : gspr_index_t;
variable w_addr : gspr_index_t;
variable out_data_1 : std_ulogic_vector(63 downto 0);
variable out_data_2 : std_ulogic_vector(63 downto 0);
variable out_data_3 : std_ulogic_vector(63 downto 0);
begin
a_addr := d_in.read1_reg;
b_addr := d_in.read2_reg;
c_addr := d_in.read3_reg;
w_addr := w_in.write_reg;
if not HAS_FPU then
-- Make it obvious that we only want 32 GSPRs for a no-FPU implementation
a_addr(5) := '0';
b_addr(5) := '0';
c_addr(5) := '0';
w_addr(5) := '0';
out_data_1 := data_1;
out_data_2 := data_2;
out_data_3 := data_3;
if fwd_1 = '1' then
out_data_1 := prev_write_data;
end if;
if fwd_2 = '1' then
out_data_2 := prev_write_data;
end if;
if fwd_3 = '1' then
out_data_3 := prev_write_data;
end if;

if d_in.read1_enable = '1' then
report "Reading GPR " & to_hstring(a_addr) & " " & to_hstring(registers(to_integer(unsigned(a_addr))));
report "Reading GPR " & to_hstring(addr_1_reg) & " " & to_hstring(out_data_1);
end if;
if d_in.read2_enable = '1' then
report "Reading GPR " & to_hstring(b_addr) & " " & to_hstring(registers(to_integer(unsigned(b_addr))));
report "Reading GPR " & to_hstring(addr_2_reg) & " " & to_hstring(out_data_2);
end if;
if d_in.read3_enable = '1' then
report "Reading GPR " & to_hstring(c_addr) & " " & to_hstring(registers(to_integer(unsigned(c_addr))));
end if;
d_out.read1_data <= registers(to_integer(unsigned(a_addr)));
-- B read port is multiplexed with reads from the debug circuitry
if d_in.read2_enable = '0' and dbg_gpr_req = '1' and dbg_ack = '0' then
b_addr := dbg_gpr_addr;
if not HAS_FPU then
b_addr(5) := '0';
end if;
report "Reading GPR " & to_hstring(addr_3_reg) & " " & to_hstring(out_data_3);
end if;
rd_port_b <= registers(to_integer(unsigned(b_addr)));
d_out.read2_data <= rd_port_b;
d_out.read3_data <= registers(to_integer(unsigned(c_addr)));

-- Forwarding of written data is now done explicitly with a bypass path
-- from writeback to decode2.
d_out.read1_data <= out_data_1;
d_out.read2_data <= out_data_2;
d_out.read3_data <= out_data_3;
end process register_read_0;

-- Latch read data and ack if dbg read requested and B port not busy
@ -124,8 +171,8 @@ begin
begin
if rising_edge(clk) then
if dbg_gpr_req = '1' then
if d_in.read2_enable = '0' and dbg_ack = '0' then
dbg_data <= rd_port_b;
if dbg_ack = '0' and dbg_gpr_done = '1' then
dbg_data <= data_2;
dbg_ack <= '1';
end if;
else

Loading…
Cancel
Save