Start removing SPRs from register file

This starts the process of removing SPRs from the register file by
moving SRR0/1, SPRG0-3, HSRR0/1 and HSPRG0/1 out of the register file
and putting them into execute1.  They are stored in a pair of small
RAM arrays, referred to as "even" and "odd".  The reason for having
two arrays is so that two values can be read and written in each
cycle.  For example, SRR0 and SRR1 can be written in parallel by an
interrupt and read in parallel by the rfid instruction.

The addresses in the RAM which will be accessed are determined in the
decode2 stage.  We have one write address for both sides, but two read
addresses, since in future we will want to be able to read CTR at the
same time as either LR or TAR.

We now have a connection from writeback to execute1 which carries the
partial SRR1 value for an interrupt.  SRR0 comes from the execute
pipeline; we no longer need to carry instruction addresses along the
LSU and FPU pipelines.  Since SRR0 and SRR1 can be written in the same
cycle now, we don't need the little state machine in writeback any
more.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/379/head
Paul Mackerras 2 years ago
parent 73cc5167ec
commit bc4d02cb0d

@ -124,6 +124,28 @@ package common is
end record; end record;
constant xerc_init : xer_common_t := (others => '0'); constant xerc_init : xer_common_t := (others => '0');


-- Some SPRs are stored in a pair of small RAMs in execute1
-- Even half:
subtype ramspr_index is natural range 0 to 7;
constant RAMSPR_SRR0 : ramspr_index := 0;
constant RAMSPR_HSRR0 : ramspr_index := 1;
constant RAMSPR_SPRG0 : ramspr_index := 2;
constant RAMSPR_SPRG2 : ramspr_index := 3;
constant RAMSPR_HSPRG0 : ramspr_index := 4;
-- Odd half:
constant RAMSPR_SRR1 : ramspr_index := 0;
constant RAMSPR_HSRR1 : ramspr_index := 1;
constant RAMSPR_SPRG1 : ramspr_index := 2;
constant RAMSPR_SPRG3 : ramspr_index := 3;
constant RAMSPR_HSPRG1 : ramspr_index := 4;

type ram_spr_info is record
index : ramspr_index;
isodd : std_ulogic;
valid : std_ulogic;
end record;
constant ram_spr_info_init: ram_spr_info := (index => 0, others => '0');

subtype spr_selector is std_ulogic_vector(2 downto 0); subtype spr_selector is std_ulogic_vector(2 downto 0);
type spr_id is record type spr_id is record
sel : spr_selector; sel : spr_selector;
@ -253,12 +275,13 @@ package common is
br_pred: std_ulogic; -- Branch was predicted to be taken br_pred: std_ulogic; -- Branch was predicted to be taken
big_endian: std_ulogic; big_endian: std_ulogic;
spr_info : spr_id; spr_info : spr_id;
ram_spr : ram_spr_info;
end record; end record;
constant Decode1ToDecode2Init : Decode1ToDecode2Type := constant Decode1ToDecode2Init : Decode1ToDecode2Type :=
(valid => '0', stop_mark => '0', nia => (others => '0'), insn => (others => '0'), (valid => '0', stop_mark => '0', nia => (others => '0'), insn => (others => '0'),
ispr1 => (others => '0'), ispr2 => (others => '0'), ispro => (others => '0'), ispr1 => (others => '0'), ispr2 => (others => '0'), ispro => (others => '0'),
decode => decode_rom_init, br_pred => '0', big_endian => '0', decode => decode_rom_init, br_pred => '0', big_endian => '0',
spr_info => spr_id_init); spr_info => spr_id_init, ram_spr => ram_spr_info_init);


type Decode1ToFetch1Type is record type Decode1ToFetch1Type is record
redirect : std_ulogic; redirect : std_ulogic;
@ -320,6 +343,13 @@ package common is
repeat : std_ulogic; -- set if instruction is cracked into two ops repeat : std_ulogic; -- set if instruction is cracked into two ops
second : std_ulogic; -- set if this is the second op second : std_ulogic; -- set if this is the second op
spr_select : spr_id; spr_select : spr_id;
spr_is_ram : std_ulogic;
ramspr_even_rdaddr : ramspr_index;
ramspr_odd_rdaddr : ramspr_index;
ramspr_rd_odd : std_ulogic;
ramspr_wraddr : ramspr_index;
ramspr_write_even : std_ulogic;
ramspr_write_odd : std_ulogic;
end record; end record;
constant Decode2ToExecute1Init : Decode2ToExecute1Type := constant Decode2ToExecute1Init : Decode2ToExecute1Type :=
(valid => '0', unit => NONE, fac => NONE, insn_type => OP_ILLEGAL, instr_tag => instr_tag_init, (valid => '0', unit => NONE, fac => NONE, insn_type => OP_ILLEGAL, instr_tag => instr_tag_init,
@ -333,6 +363,9 @@ package common is
cr => (others => '0'), insn => (others => '0'), data_len => (others => '0'), cr => (others => '0'), insn => (others => '0'), data_len => (others => '0'),
result_sel => "000", sub_select => "000", result_sel => "000", sub_select => "000",
repeat => '0', second => '0', spr_select => spr_id_init, repeat => '0', second => '0', spr_select => spr_id_init,
spr_is_ram => '0',
ramspr_even_rdaddr => 0, ramspr_odd_rdaddr => 0, ramspr_rd_odd => '0',
ramspr_wraddr => 0, ramspr_write_even => '0', ramspr_write_odd => '0',
others => (others => '0')); others => (others => '0'));


type MultiplyInputType is record type MultiplyInputType is record
@ -574,7 +607,6 @@ package common is
store_done : std_ulogic; store_done : std_ulogic;
interrupt : std_ulogic; interrupt : std_ulogic;
intr_vec : intr_vector_t; intr_vec : intr_vector_t;
srr0: std_ulogic_vector(63 downto 0);
srr1: std_ulogic_vector(15 downto 0); srr1: std_ulogic_vector(15 downto 0);
end record; end record;
constant Loadstore1ToWritebackInit : Loadstore1ToWritebackType := constant Loadstore1ToWritebackInit : Loadstore1ToWritebackType :=
@ -582,7 +614,7 @@ package common is
write_reg => (others => '0'), write_data => (others => '0'), write_reg => (others => '0'), write_data => (others => '0'),
xerc => xerc_init, rc => '0', store_done => '0', xerc => xerc_init, rc => '0', store_done => '0',
interrupt => '0', intr_vec => 0, interrupt => '0', intr_vec => 0,
srr0 => (others => '0'), srr1 => (others => '0')); srr1 => (others => '0'));


type Loadstore1EventType is record type Loadstore1EventType is record
load_complete : std_ulogic; load_complete : std_ulogic;
@ -675,7 +707,6 @@ package common is
write_xerc : std_ulogic; write_xerc : std_ulogic;
xerc : xer_common_t; xerc : xer_common_t;
intr_vec : intr_vector_t; intr_vec : intr_vector_t;
srr0 : std_ulogic_vector(63 downto 0);
srr1 : std_ulogic_vector(15 downto 0); srr1 : std_ulogic_vector(15 downto 0);
end record; end record;
constant FPUToWritebackInit : FPUToWritebackType := constant FPUToWritebackInit : FPUToWritebackType :=
@ -731,6 +762,11 @@ package common is
write_cr_mask => (others => '0'), write_cr_mask => (others => '0'),
write_cr_data => (others => '0')); write_cr_data => (others => '0'));


type WritebackToExecute1Type is record
intr : std_ulogic;
srr1 : std_ulogic_vector(15 downto 0);
end record;

type WritebackEventType is record type WritebackEventType is record
instr_complete : std_ulogic; instr_complete : std_ulogic;
fp_complete : std_ulogic; fp_complete : std_ulogic;
@ -755,26 +791,6 @@ package body common is
n := 0; -- N.B. decode2 relies on this specific value n := 0; -- N.B. decode2 relies on this specific value
when SPR_CTR => when SPR_CTR =>
n := 1; -- N.B. decode2 relies on this specific value n := 1; -- N.B. decode2 relies on this specific value
when SPR_SRR0 =>
n := 2;
when SPR_SRR1 =>
n := 3;
when SPR_HSRR0 =>
n := 4;
when SPR_HSRR1 =>
n := 5;
when SPR_SPRG0 =>
n := 6;
when SPR_SPRG1 =>
n := 7;
when SPR_SPRG2 =>
n := 8;
when SPR_SPRG3 | SPR_SPRG3U =>
n := 9;
when SPR_HSPRG0 =>
n := 10;
when SPR_HSPRG1 =>
n := 11;
when SPR_TAR => when SPR_TAR =>
n := 13; n := 13;
when others => when others =>

@ -102,6 +102,7 @@ architecture behave of core is


-- Writeback signals -- Writeback signals
signal writeback_bypass: bypass_data_t; signal writeback_bypass: bypass_data_t;
signal wb_interrupt: WritebackToExecute1Type;


-- local signals -- local signals
signal fetch1_stall_in : std_ulogic; signal fetch1_stall_in : std_ulogic;
@ -122,7 +123,6 @@ architecture behave of core is
signal complete: instr_tag_t; signal complete: instr_tag_t;
signal terminate: std_ulogic; signal terminate: std_ulogic;
signal core_rst: std_ulogic; signal core_rst: std_ulogic;
signal do_interrupt: std_ulogic;


-- Delayed/Latched resets and alt_reset -- Delayed/Latched resets and alt_reset
signal rst_fetch1 : std_ulogic; signal rst_fetch1 : std_ulogic;
@ -361,7 +361,7 @@ begin
l_in => loadstore1_to_execute1, l_in => loadstore1_to_execute1,
fp_in => fpu_to_execute1, fp_in => fpu_to_execute1,
ext_irq_in => ext_irq, ext_irq_in => ext_irq,
interrupt_in => do_interrupt, interrupt_in => wb_interrupt,
l_out => execute1_to_loadstore1, l_out => execute1_to_loadstore1,
fp_out => execute1_to_fpu, fp_out => execute1_to_fpu,
e_out => execute1_to_writeback, e_out => execute1_to_writeback,
@ -469,7 +469,7 @@ begin
f_out => writeback_to_fetch1, f_out => writeback_to_fetch1,
wb_bypass => writeback_bypass, wb_bypass => writeback_bypass,
events => writeback_events, events => writeback_events,
interrupt_out => do_interrupt, interrupt_out => wb_interrupt,
complete_out => complete complete_out => complete
); );



@ -181,7 +181,7 @@ architecture behaviour of decode1 is
-- isync -- isync
2#111# => (ALU, NONE, OP_ISYNC, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), 2#111# => (ALU, NONE, OP_ISYNC, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
-- rfid -- rfid
2#101# => (ALU, NONE, OP_RFID, SPR, SPR, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), 2#101# => (ALU, NONE, OP_RFID, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
others => illegal_inst others => illegal_inst
); );


@ -525,6 +525,42 @@ architecture behaviour of decode1 is
constant nop_instr : decode_rom_t := (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE); constant nop_instr : decode_rom_t := (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE);
constant fetch_fail_inst: decode_rom_t := (LDST, NONE, OP_FETCH_FAILED, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE); constant fetch_fail_inst: decode_rom_t := (LDST, NONE, OP_FETCH_FAILED, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE);


function decode_ram_spr(sprn : spr_num_t) return ram_spr_info is
variable ret : ram_spr_info;
begin
ret := (index => 0, isodd => '0', valid => '1');
case sprn is
when SPR_SRR0 =>
ret.index := RAMSPR_SRR0;
when SPR_SRR1 =>
ret.index := RAMSPR_SRR1;
ret.isodd := '1';
when SPR_HSRR0 =>
ret.index := RAMSPR_HSRR0;
when SPR_HSRR1 =>
ret.index := RAMSPR_HSRR1;
ret.isodd := '1';
when SPR_SPRG0 =>
ret.index := RAMSPR_SPRG0;
when SPR_SPRG1 =>
ret.index := RAMSPR_SPRG1;
ret.isodd := '1';
when SPR_SPRG2 =>
ret.index := RAMSPR_SPRG2;
when SPR_SPRG3 | SPR_SPRG3U =>
ret.index := RAMSPR_SPRG3;
ret.isodd := '1';
when SPR_HSPRG0 =>
ret.index := RAMSPR_HSPRG0;
when SPR_HSPRG1 =>
ret.index := RAMSPR_HSPRG1;
ret.isodd := '1';
when others =>
ret.valid := '0';
end case;
return ret;
end;

function map_spr(sprn : spr_num_t) return spr_id is function map_spr(sprn : spr_num_t) return spr_id is
variable i : spr_id; variable i : spr_id;
begin begin
@ -614,6 +650,7 @@ begin


sprn := decode_spr_num(f_in.insn); sprn := decode_spr_num(f_in.insn);
v.spr_info := map_spr(sprn); v.spr_info := map_spr(sprn);
v.ram_spr := decode_ram_spr(sprn);


case to_integer(unsigned(majorop)) is case to_integer(unsigned(majorop)) is
when 4 => when 4 =>
@ -632,9 +669,10 @@ begin


if std_match(f_in.insn(10 downto 1), "01-1010011") then if std_match(f_in.insn(10 downto 1), "01-1010011") then
-- mfspr or mtspr -- mfspr or mtspr
if is_fast_spr(v.ispr1) = '0' then
-- Make mtspr to slow SPRs single issue -- Make mtspr to slow SPRs single issue
if v.spr_info.valid = '1' then
vi.force_single := f_in.insn(8); vi.force_single := f_in.insn(8);
end if;
-- send MMU-related SPRs to loadstore1 -- send MMU-related SPRs to loadstore1
case sprn is case sprn is
when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR => when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR =>
@ -643,7 +681,6 @@ begin
when others => when others =>
end case; end case;
end if; end if;
end if;


when 16 => when 16 =>
-- CTR may be needed as input to bc -- CTR may be needed as input to bc
@ -690,10 +727,6 @@ begin
else else
v.ispr2 := fast_spr_num(SPR_TAR); v.ispr2 := fast_spr_num(SPR_TAR);
end if; end if;
else
-- Could be OP_RFID
v.ispr1 := fast_spr_num(SPR_SRR1);
v.ispr2 := fast_spr_num(SPR_SRR0);
end if; end if;


when 24 => when 24 =>

@ -480,6 +480,23 @@ begin


v.e.spr_select := d_in.spr_info; v.e.spr_select := d_in.spr_info;


case op is
when OP_MFSPR =>
v.e.ramspr_even_rdaddr := d_in.ram_spr.index;
v.e.ramspr_odd_rdaddr := d_in.ram_spr.index;
v.e.ramspr_rd_odd := d_in.ram_spr.isodd;
v.e.spr_is_ram := d_in.ram_spr.valid;
when OP_MTSPR =>
v.e.ramspr_wraddr := d_in.ram_spr.index;
v.e.ramspr_write_even := d_in.ram_spr.valid and not d_in.ram_spr.isodd;
v.e.ramspr_write_odd := d_in.ram_spr.valid and d_in.ram_spr.isodd;
v.e.spr_is_ram := d_in.ram_spr.valid;
when OP_RFID =>
v.e.ramspr_even_rdaddr := RAMSPR_SRR0;
v.e.ramspr_odd_rdaddr := RAMSPR_SRR1;
when others =>
end case;

case d_in.decode.length is case d_in.decode.length is
when is1B => when is1B =>
length := "0001"; length := "0001";
@ -530,6 +547,8 @@ begin
if op = OP_MFSPR then if op = OP_MFSPR then
if is_fast_spr(d_in.ispr1) = '1' then if is_fast_spr(d_in.ispr1) = '1' then
v.e.result_sel := "000"; -- adder_result, effectively a_in v.e.result_sel := "000"; -- adder_result, effectively a_in
elsif d_in.ram_spr.valid = '1' then
v.e.result_sel := "101"; -- ramspr_result
elsif d_in.spr_info.valid = '0' then elsif d_in.spr_info.valid = '0' then
-- Privileged mfspr to invalid/unimplemented SPR numbers -- Privileged mfspr to invalid/unimplemented SPR numbers
-- writes the contents of RT back to RT (i.e. it's a no-op) -- writes the contents of RT back to RT (i.e. it's a no-op)

@ -31,7 +31,7 @@ entity execute1 is
fp_in : in FPUToExecute1Type; fp_in : in FPUToExecute1Type;


ext_irq_in : std_ulogic; ext_irq_in : std_ulogic;
interrupt_in : std_ulogic; interrupt_in : WritebackToExecute1Type;


-- asynchronous -- asynchronous
l_out : out Execute1ToLoadstore1Type; l_out : out Execute1ToLoadstore1Type;
@ -72,6 +72,8 @@ architecture behaviour of execute1 is
write_loga : std_ulogic; write_loga : std_ulogic;
inc_loga : std_ulogic; inc_loga : std_ulogic;
write_pmuspr : std_ulogic; write_pmuspr : std_ulogic;
ramspr_write_even : std_ulogic;
ramspr_write_odd : std_ulogic;
end record; end record;
constant side_effect_init : side_effect_type := (others => '0'); constant side_effect_init : side_effect_type := (others => '0');


@ -119,6 +121,7 @@ architecture behaviour of execute1 is
msr : std_ulogic_vector(63 downto 0); msr : std_ulogic_vector(63 downto 0);
xerc : xer_common_t; xerc : xer_common_t;
xerc_valid : std_ulogic; xerc_valid : std_ulogic;
ramspr_wraddr : ramspr_index;
end record; end record;
constant reg_stage1_type_init : reg_stage1_type := constant reg_stage1_type_init : reg_stage1_type :=
(e => Execute1ToWritebackInit, se => side_effect_init, (e => Execute1ToWritebackInit, se => side_effect_init,
@ -130,7 +133,8 @@ architecture behaviour of execute1 is
no_instr_avail => '0', instr_dispatch => '0', ext_interrupt => '0', no_instr_avail => '0', instr_dispatch => '0', ext_interrupt => '0',
taken_branch_event => '0', br_mispredict => '0', taken_branch_event => '0', br_mispredict => '0',
msr => 64x"0", msr => 64x"0",
xerc => xerc_init, xerc_valid => '0'); xerc => xerc_init, xerc_valid => '0',
ramspr_wraddr => 0);


type reg_stage2_type is record type reg_stage2_type is record
e : Execute1ToWritebackType; e : Execute1ToWritebackType;
@ -203,6 +207,20 @@ architecture behaviour of execute1 is
signal exception_log : std_ulogic; signal exception_log : std_ulogic;
signal irq_valid_log : std_ulogic; signal irq_valid_log : std_ulogic;


-- SPR-related signals
type ramspr_half_t is array(ramspr_index) of std_ulogic_vector(63 downto 0);
signal even_sprs : ramspr_half_t := (others => (others => '0'));
signal odd_sprs : ramspr_half_t := (others => (others => '0'));
signal ramspr_even : std_ulogic_vector(63 downto 0);
signal ramspr_odd : std_ulogic_vector(63 downto 0);
signal ramspr_result : std_ulogic_vector(63 downto 0);
signal ramspr_rd_odd : std_ulogic;
signal ramspr_wr_addr : ramspr_index;
signal ramspr_even_wr_data : std_ulogic_vector(63 downto 0);
signal ramspr_even_wr_enab : std_ulogic;
signal ramspr_odd_wr_data : std_ulogic_vector(63 downto 0);
signal ramspr_odd_wr_enab : std_ulogic;

signal stage2_stall : std_ulogic; signal stage2_stall : std_ulogic;


type privilege_level is (USER, SUPER); type privilege_level is (USER, SUPER);
@ -289,6 +307,18 @@ architecture behaviour of execute1 is
return msr_out; return msr_out;
end; end;


function intr_srr1(msr: std_ulogic_vector; flags: std_ulogic_vector)
return std_ulogic_vector is
variable srr1: std_ulogic_vector(63 downto 0);
begin
srr1(63 downto 31) := msr(63 downto 31);
srr1(30 downto 27) := flags(14 downto 11);
srr1(26 downto 22) := msr(26 downto 22);
srr1(21 downto 16) := flags(5 downto 0);
srr1(15 downto 0) := msr(15 downto 0);
return srr1;
end;

-- Work out whether a signed value fits into n bits, -- Work out whether a signed value fits into n bits,
-- that is, see if it is in the range -2^(n-1) .. 2^(n-1) - 1 -- that is, see if it is in the range -2^(n-1) .. 2^(n-1) - 1
function fits_in_n_bits(val: std_ulogic_vector; n: integer) return boolean is function fits_in_n_bits(val: std_ulogic_vector; n: integer) return boolean is
@ -456,6 +486,78 @@ begin


valid_in <= e_in.valid and not (busy_out or flush_in or ex1.e.redirect or ex1.e.interrupt); valid_in <= e_in.valid and not (busy_out or flush_in or ex1.e.redirect or ex1.e.interrupt);


-- SPRs stored in two small RAM arrays (two so that we can read and write
-- two SPRs in each cycle).

ramspr_read: process(all)
variable even_rd_data, odd_rd_data : std_ulogic_vector(63 downto 0);
variable wr_addr : ramspr_index;
variable even_wr_enab, odd_wr_enab : std_ulogic;
variable even_wr_data, odd_wr_data : std_ulogic_vector(63 downto 0);
variable doit : std_ulogic;
begin
-- Read address mux and async RAM reading
even_rd_data := even_sprs(e_in.ramspr_even_rdaddr);
odd_rd_data := odd_sprs(e_in.ramspr_odd_rdaddr);

-- Write address and data muxes
doit := ex1.e.valid and not stage2_stall and not flush_in;
even_wr_enab := (ex1.se.ramspr_write_even and doit) or interrupt_in.intr;
odd_wr_enab := (ex1.se.ramspr_write_odd and doit) or interrupt_in.intr;
if interrupt_in.intr = '1' then
wr_addr := RAMSPR_SRR0;
else
wr_addr := ex1.ramspr_wraddr;
end if;
if interrupt_in.intr = '1' then
even_wr_data := ex2.e.last_nia;
odd_wr_data := intr_srr1(ctrl.msr, interrupt_in.srr1);
else
even_wr_data := ex1.e.write_data;
odd_wr_data := ex1.e.write_data;
end if;
ramspr_wr_addr <= wr_addr;
ramspr_even_wr_data <= even_wr_data;
ramspr_even_wr_enab <= even_wr_enab;
ramspr_odd_wr_data <= odd_wr_data;
ramspr_odd_wr_enab <= odd_wr_enab;

-- SPR RAM read with write data bypass
-- We assume no instruction executes in the cycle immediately following
-- an interrupt, so we don't need to bypass interrupt data
if ex1.se.ramspr_write_even = '1' and e_in.ramspr_even_rdaddr = ex1.ramspr_wraddr then
ramspr_even <= ex1.e.write_data;
else
ramspr_even <= even_rd_data;
end if;
if ex1.se.ramspr_write_odd = '1' and e_in.ramspr_odd_rdaddr = ex1.ramspr_wraddr then
ramspr_odd <= ex1.e.write_data;
else
ramspr_odd <= odd_rd_data;
end if;
if e_in.ramspr_rd_odd = '0' then
ramspr_result <= ramspr_even;
else
ramspr_result <= ramspr_odd;
end if;
end process;

ramspr_write: process(clk)
begin
if rising_edge(clk) then
if ramspr_even_wr_enab = '1' then
even_sprs(ramspr_wr_addr) <= ramspr_even_wr_data;
report "writing even spr " & integer'image(ramspr_wr_addr) & " data=" &
to_hstring(ramspr_even_wr_data);
end if;
if ramspr_odd_wr_enab = '1' then
odd_sprs(ramspr_wr_addr) <= ramspr_odd_wr_data;
report "writing odd spr " & integer'image(ramspr_wr_addr) & " data=" &
to_hstring(ramspr_odd_wr_data);
end if;
end if;
end process;

-- First stage result mux -- First stage result mux
s1_sel <= e_in.result_sel when ex1.busy = '0' else "100"; s1_sel <= e_in.result_sel when ex1.busy = '0' else "100";
with s1_sel select alu_result <= with s1_sel select alu_result <=
@ -464,6 +566,7 @@ begin
rotator_result when "010", rotator_result when "010",
shortmul_result when "011", shortmul_result when "011",
muldiv_result when "100", muldiv_result when "100",
ramspr_result when "101",
next_nia when "110", next_nia when "110",
misc_result when others; misc_result when others;


@ -830,6 +933,7 @@ begin
variable privileged : std_ulogic; variable privileged : std_ulogic;
variable slow_op : std_ulogic; variable slow_op : std_ulogic;
variable owait : std_ulogic; variable owait : std_ulogic;
variable srr1 : std_ulogic_vector(63 downto 0);
begin begin
v := actions_type_init; v := actions_type_init;
v.e.write_data := alu_result; v.e.write_data := alu_result;
@ -850,6 +954,9 @@ begin
v.e.last_nia := e_in.nia; v.e.last_nia := e_in.nia;
v.e.br_offset := 64x"4"; v.e.br_offset := 64x"4";


v.se.ramspr_write_even := e_in.ramspr_write_even;
v.se.ramspr_write_odd := e_in.ramspr_write_odd;

-- Note the difference between v.exception and v.trap: -- Note the difference between v.exception and v.trap:
-- v.exception signals a condition that prevents execution of the -- v.exception signals a condition that prevents execution of the
-- instruction, and hence shouldn't depend on operand data, so as to -- instruction, and hence shouldn't depend on operand data, so as to
@ -1009,26 +1116,27 @@ begin
end if; end if;


when OP_RFID => when OP_RFID =>
v.e.redir_mode := (a_in(MSR_IR) or a_in(MSR_PR)) & not a_in(MSR_PR) & srr1 := ramspr_odd;
not a_in(MSR_LE) & not a_in(MSR_SF); v.e.redir_mode := (srr1(MSR_IR) or srr1(MSR_PR)) & not srr1(MSR_PR) &
not srr1(MSR_LE) & not srr1(MSR_SF);
-- Can't use msr_copy here because the partial function MSR -- Can't use msr_copy here because the partial function MSR
-- bits should be left unchanged, not zeroed. -- bits should be left unchanged, not zeroed.
v.new_msr(63 downto 31) := a_in(63 downto 31); v.new_msr(63 downto 31) := srr1(63 downto 31);
v.new_msr(26 downto 22) := a_in(26 downto 22); v.new_msr(26 downto 22) := srr1(26 downto 22);
v.new_msr(15 downto 0) := a_in(15 downto 0); v.new_msr(15 downto 0) := srr1(15 downto 0);
if a_in(MSR_PR) = '1' then if srr1(MSR_PR) = '1' then
v.new_msr(MSR_EE) := '1'; v.new_msr(MSR_EE) := '1';
v.new_msr(MSR_IR) := '1'; v.new_msr(MSR_IR) := '1';
v.new_msr(MSR_DR) := '1'; v.new_msr(MSR_DR) := '1';
end if; end if;
v.se.write_msr := '1'; v.se.write_msr := '1';
v.e.br_offset := b_in; v.e.br_offset := ramspr_even;
v.e.abs_br := '1'; v.e.abs_br := '1';
v.e.redirect := '1'; v.e.redirect := '1';
v.se.write_cfar := '1'; v.se.write_cfar := '1';
if HAS_FPU then if HAS_FPU then
v.fp_intr := fp_in.exception and v.fp_intr := fp_in.exception and
(a_in(MSR_FE0) or a_in(MSR_FE1)); (srr1(MSR_FE0) or srr1(MSR_FE1));
end if; end if;
v.do_trace := '0'; v.do_trace := '0';


@ -1041,10 +1149,10 @@ begin
when OP_DARN => when OP_DARN =>
when OP_MFMSR => when OP_MFMSR =>
when OP_MFSPR => when OP_MFSPR =>
if is_fast_spr(e_in.read_reg1) = '1' then if is_fast_spr(e_in.read_reg1) = '1' or e_in.spr_is_ram = '1' then
if e_in.valid = '1' then if e_in.valid = '1' then
report "MFSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) & report "MFSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) &
"=" & to_hstring(a_in); "=" & to_hstring(alu_result);
end if; end if;
elsif e_in.spr_select.valid = '1' then elsif e_in.spr_select.valid = '1' then
if e_in.valid = '1' then if e_in.valid = '1' then
@ -1121,7 +1229,9 @@ begin
v.se.write_loga := '1'; v.se.write_loga := '1';
when others => when others =>
end case; end case;
elsif is_fast_spr(e_in.write_reg) = '0' then end if;
if e_in.spr_select.valid = '0' and is_fast_spr(e_in.write_reg) = '0' and
e_in.spr_is_ram = '0' then
-- mtspr to unimplemented SPRs should be a nop in -- mtspr to unimplemented SPRs should be a nop in
-- supervisor mode and a program interrupt for user mode -- supervisor mode and a program interrupt for user mode
if ex1.msr(MSR_PR) = '1' then if ex1.msr(MSR_PR) = '1' then
@ -1232,6 +1342,7 @@ begin
v.pmu_spr_num := e_in.insn(20 downto 16); v.pmu_spr_num := e_in.insn(20 downto 16);
v.mul_select := e_in.sub_select(1 downto 0); v.mul_select := e_in.sub_select(1 downto 0);
v.se := side_effect_init; v.se := side_effect_init;
v.ramspr_wraddr := e_in.ramspr_wraddr;
end if; end if;


lv := Execute1ToLoadstore1Init; lv := Execute1ToLoadstore1Init;
@ -1402,10 +1513,10 @@ begin
v.mul_finish := '0'; v.mul_finish := '0';
v.xerc_valid := '0'; v.xerc_valid := '0';
end if; end if;
if flush_in = '1' or interrupt_in = '1' then if flush_in = '1' or interrupt_in.intr = '1' then
v.msr := ctrl_tmp.msr; v.msr := ctrl_tmp.msr;
end if; end if;
if interrupt_in = '1' then if interrupt_in.intr = '1' then
v.trace_next := '0'; v.trace_next := '0';
v.fp_exception_next := '0'; v.fp_exception_next := '0';
end if; end if;
@ -1449,7 +1560,6 @@ begin


-- Outputs to FPU -- Outputs to FPU
fv.op := e_in.insn_type; fv.op := e_in.insn_type;
fv.nia := e_in.nia;
fv.insn := e_in.insn; fv.insn := e_in.insn;
fv.itag := e_in.instr_tag; fv.itag := e_in.instr_tag;
fv.single := e_in.is_32bit; fv.single := e_in.is_32bit;
@ -1607,7 +1717,7 @@ begin
x_to_pmu.mtspr <= ex1.se.write_pmuspr; x_to_pmu.mtspr <= ex1.se.write_pmuspr;
end if; end if;


if interrupt_in = '1' then if interrupt_in.intr = '1' then
ctrl_tmp.msr(MSR_SF) <= '1'; ctrl_tmp.msr(MSR_SF) <= '1';
ctrl_tmp.msr(MSR_EE) <= '0'; ctrl_tmp.msr(MSR_EE) <= '0';
ctrl_tmp.msr(MSR_PR) <= '0'; ctrl_tmp.msr(MSR_PR) <= '0';
@ -1659,7 +1769,7 @@ begin
ctrl.msr(MSR_IR) & ctrl.msr(MSR_DR) & ctrl.msr(MSR_IR) & ctrl.msr(MSR_DR) &
exception_log & exception_log &
irq_valid_log & irq_valid_log &
interrupt_in & interrupt_in.intr &
"000" & "000" &
ex2.e.write_enable & ex2.e.write_enable &
ex2.e.valid & ex2.e.valid &

@ -99,7 +99,6 @@ architecture behaviour of fpu is
illegal : std_ulogic; illegal : std_ulogic;
op : insn_type_t; op : insn_type_t;
insn : std_ulogic_vector(31 downto 0); insn : std_ulogic_vector(31 downto 0);
nia : std_ulogic_vector(63 downto 0);
instr_tag : instr_tag_t; instr_tag : instr_tag_t;
dest_fpr : gspr_index_t; dest_fpr : gspr_index_t;
fe_mode : std_ulogic; fe_mode : std_ulogic;
@ -669,7 +668,6 @@ begin
w_out.xerc <= r.xerc_result; w_out.xerc <= r.xerc_result;
w_out.interrupt <= r.do_intr; w_out.interrupt <= r.do_intr;
w_out.intr_vec <= 16#700#; w_out.intr_vec <= 16#700#;
w_out.srr0 <= r.nia;
w_out.srr1 <= (47-44 => r.illegal, 47-43 => not r.illegal, others => '0'); w_out.srr1 <= (47-44 => r.illegal, 47-43 => not r.illegal, others => '0');


fpu_1: process(all) fpu_1: process(all)
@ -756,7 +754,6 @@ begin
-- capture incoming instruction -- capture incoming instruction
if e_in.valid = '1' then if e_in.valid = '1' then
v.insn := e_in.insn; v.insn := e_in.insn;
v.nia := e_in.nia;
v.op := e_in.op; v.op := e_in.op;
v.instr_tag := e_in.itag; v.instr_tag := e_in.itag;
v.fe_mode := or (e_in.fe_mode); v.fe_mode := or (e_in.fe_mode);

@ -90,7 +90,6 @@ architecture behave of loadstore1 is
dword_index : std_ulogic; dword_index : std_ulogic;
two_dwords : std_ulogic; two_dwords : std_ulogic;
incomplete : std_ulogic; incomplete : std_ulogic;
nia : std_ulogic_vector(63 downto 0);
end record; end record;
constant request_init : request_t := (valid => '0', dc_req => '0', load => '0', store => '0', tlbie => '0', constant request_init : request_t := (valid => '0', dc_req => '0', load => '0', store => '0', tlbie => '0',
dcbz => '0', read_spr => '0', write_spr => '0', mmu_op => '0', dcbz => '0', read_spr => '0', write_spr => '0', mmu_op => '0',
@ -105,8 +104,7 @@ architecture behave of loadstore1 is
atomic => '0', atomic_last => '0', rc => '0', nc => '0', atomic => '0', atomic_last => '0', rc => '0', nc => '0',
virt_mode => '0', priv_mode => '0', load_sp => '0', virt_mode => '0', priv_mode => '0', load_sp => '0',
sprn => 10x"0", is_slbia => '0', align_intr => '0', sprn => 10x"0", is_slbia => '0', align_intr => '0',
dword_index => '0', two_dwords => '0', incomplete => '0', dword_index => '0', two_dwords => '0', incomplete => '0');
nia => (others => '0'));


type reg_stage1_t is record type reg_stage1_t is record
req : request_t; req : request_t;
@ -146,7 +144,6 @@ architecture behave of loadstore1 is
stage1_en : std_ulogic; stage1_en : std_ulogic;
interrupt : std_ulogic; interrupt : std_ulogic;
intr_vec : integer range 0 to 16#fff#; intr_vec : integer range 0 to 16#fff#;
nia : std_ulogic_vector(63 downto 0);
srr1 : std_ulogic_vector(15 downto 0); srr1 : std_ulogic_vector(15 downto 0);
events : Loadstore1EventType; events : Loadstore1EventType;
end record; end record;
@ -412,7 +409,6 @@ begin
v.virt_mode := l_in.virt_mode; v.virt_mode := l_in.virt_mode;
v.priv_mode := l_in.priv_mode; v.priv_mode := l_in.priv_mode;
v.sprn := sprn; v.sprn := sprn;
v.nia := l_in.nia;


lsu_sum := std_ulogic_vector(unsigned(l_in.addr1) + unsigned(l_in.addr2)); lsu_sum := std_ulogic_vector(unsigned(l_in.addr1) + unsigned(l_in.addr2));


@ -866,7 +862,6 @@ begin
-- or ISI or ISegI for instruction fetch exceptions -- or ISI or ISegI for instruction fetch exceptions
v.interrupt := exception; v.interrupt := exception;
if exception = '1' then if exception = '1' then
v.nia := r2.req.nia;
if r2.req.align_intr = '1' then if r2.req.align_intr = '1' then
v.intr_vec := 16#600#; v.intr_vec := 16#600#;
v.dar := r2.req.addr; v.dar := r2.req.addr;
@ -962,7 +957,6 @@ begin
l_out.store_done <= d_in.store_done; l_out.store_done <= d_in.store_done;
l_out.interrupt <= r3.interrupt; l_out.interrupt <= r3.interrupt;
l_out.intr_vec <= r3.intr_vec; l_out.intr_vec <= r3.intr_vec;
l_out.srr0 <= r3.nia;
l_out.srr1 <= r3.srr1; l_out.srr1 <= r3.srr1;


-- update busy signal back to execute1 -- update busy signal back to execute1

@ -25,20 +25,12 @@ entity writeback is
events : out WritebackEventType; events : out WritebackEventType;


flush_out : out std_ulogic; flush_out : out std_ulogic;
interrupt_out: out std_ulogic; interrupt_out: out WritebackToExecute1Type;
complete_out : out instr_tag_t complete_out : out instr_tag_t
); );
end entity writeback; end entity writeback;


architecture behaviour of writeback is architecture behaviour of writeback is
type irq_state_t is (WRITE_SRR0, WRITE_SRR1);

type reg_type is record
state : irq_state_t;
srr1 : std_ulogic_vector(63 downto 0);
end record;

signal r, rin : reg_type;


begin begin
writeback_0: process(clk) writeback_0: process(clk)
@ -47,13 +39,6 @@ begin
variable w : std_ulogic_vector(0 downto 0); variable w : std_ulogic_vector(0 downto 0);
begin begin
if rising_edge(clk) then if rising_edge(clk) then
if rst = '1' then
r.state <= WRITE_SRR0;
r.srr1 <= (others => '0');
else
r <= rin;
end if;

-- Do consistency checks only on the clock edge -- Do consistency checks only on the clock edge
x(0) := e_in.valid; x(0) := e_in.valid;
y(0) := l_in.valid; y(0) := l_in.valid;
@ -82,7 +67,6 @@ begin
end process; end process;


writeback_1: process(all) writeback_1: process(all)
variable v : reg_type;
variable f : WritebackToFetch1Type; variable f : WritebackToFetch1Type;
variable scf : std_ulogic_vector(3 downto 0); variable scf : std_ulogic_vector(3 downto 0);
variable vec : integer range 0 to 16#fff#; variable vec : integer range 0 to 16#fff#;
@ -92,9 +76,7 @@ begin
w_out <= WritebackToRegisterFileInit; w_out <= WritebackToRegisterFileInit;
c_out <= WritebackToCrFileInit; c_out <= WritebackToCrFileInit;
f := WritebackToFetch1Init; f := WritebackToFetch1Init;
interrupt_out <= '0';
vec := 0; vec := 0;
v := r;


complete_out <= instr_tag_init; complete_out <= instr_tag_init;
if e_in.valid = '1' then if e_in.valid = '1' then
@ -108,37 +90,21 @@ begin
events.fp_complete <= fp_in.valid; events.fp_complete <= fp_in.valid;


intr := e_in.interrupt or l_in.interrupt or fp_in.interrupt; intr := e_in.interrupt or l_in.interrupt or fp_in.interrupt;
interrupt_out.intr <= intr;


if r.state = WRITE_SRR1 then if intr = '1' then
w_out.write_reg <= fast_spr_num(SPR_SRR1);
w_out.write_data <= r.srr1;
w_out.write_enable <= '1';
interrupt_out <= '1';
v.state := WRITE_SRR0;

elsif intr = '1' then
w_out.write_reg <= fast_spr_num(SPR_SRR0);
w_out.write_enable <= '1';
v.state := WRITE_SRR1;
srr1 := (others => '0'); srr1 := (others => '0');
if e_in.interrupt = '1' then if e_in.interrupt = '1' then
vec := e_in.intr_vec; vec := e_in.intr_vec;
w_out.write_data <= e_in.last_nia;
srr1 := e_in.srr1; srr1 := e_in.srr1;
elsif l_in.interrupt = '1' then elsif l_in.interrupt = '1' then
vec := l_in.intr_vec; vec := l_in.intr_vec;
w_out.write_data <= l_in.srr0;
srr1 := l_in.srr1; srr1 := l_in.srr1;
elsif fp_in.interrupt = '1' then elsif fp_in.interrupt = '1' then
vec := fp_in.intr_vec; vec := fp_in.intr_vec;
w_out.write_data <= fp_in.srr0;
srr1 := fp_in.srr1; srr1 := fp_in.srr1;
end if; end if;
v.srr1(63 downto 31) := e_in.msr(63 downto 31); interrupt_out.srr1 <= srr1;
v.srr1(30 downto 27) := srr1(14 downto 11);
v.srr1(26 downto 22) := e_in.msr(26 downto 22);
v.srr1(21 downto 16) := srr1(5 downto 0);
v.srr1(15 downto 0) := e_in.msr(15 downto 0);


else else
if e_in.write_enable = '1' then if e_in.write_enable = '1' then
@ -229,6 +195,5 @@ begin
wb_bypass.tag.valid <= complete_out.valid and w_out.write_enable; wb_bypass.tag.valid <= complete_out.valid and w_out.write_enable;
wb_bypass.data <= w_out.write_data; wb_bypass.data <= w_out.write_data;


rin <= v;
end process; end process;
end; end;

Loading…
Cancel
Save