core: Add support for floating-point loads and stores

This extends the register file so it can hold FPR values, and
implements the FP loads and stores that do not require conversion
between single and double precision.

We now have the FP, FE0 and FE1 bits in MSR.  FP loads and stores
cause a FP unavailable interrupt if MSR[FP] = 0.

The FPU facilities are optional and their presence is controlled by
the HAS_FPU generic passed down from the top-level board file.  It
defaults to true for all except the A7-35 boards.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
jtag-port
Paul Mackerras 4 years ago
parent e1672ea709
commit 45cd8f4fc3

@ -13,8 +13,11 @@ package common is
constant MSR_SF : integer := (63 - 0); -- Sixty-Four bit mode
constant MSR_EE : integer := (63 - 48); -- External interrupt Enable
constant MSR_PR : integer := (63 - 49); -- PRoblem state
constant MSR_FP : integer := (63 - 50); -- Floating Point available
constant MSR_FE0 : integer := (63 - 52); -- Floating Exception mode
constant MSR_SE : integer := (63 - 53); -- Single-step bit of TE field
constant MSR_BE : integer := (63 - 54); -- Branch trace bit of TE field
constant MSR_FE1 : integer := (63 - 55); -- Floating Exception mode
constant MSR_IR : integer := (63 - 58); -- Instruction Relocation
constant MSR_DR : integer := (63 - 59); -- Data Relocation
constant MSR_RI : integer := (63 - 62); -- Recoverable Interrupt
@ -53,8 +56,11 @@ package common is
-- GPR indices in the register file (GPR only)
subtype gpr_index_t is std_ulogic_vector(4 downto 0);

-- Extended GPR indice (can hold an SPR)
subtype gspr_index_t is std_ulogic_vector(5 downto 0);
-- Extended GPR index (can hold an SPR or a FPR)
subtype gspr_index_t is std_ulogic_vector(6 downto 0);

-- FPR indices
subtype fpr_index_t is std_ulogic_vector(4 downto 0);

-- Some SPRs are stored in the register file, they use the magic
-- GPR numbers above 31.
@ -64,6 +70,9 @@ package common is
-- indicates if this is indeed a fast SPR. If clear, then
-- the SPR is not stored in the GPR file.
--
-- FPRs are also stored in the register file, using GSPR
-- numbers from 64 to 95.
--
function fast_spr_num(spr: spr_num_t) return gspr_index_t;

-- Indices conversion functions
@ -71,6 +80,7 @@ package common is
function gpr_to_gspr(i: gpr_index_t) return gspr_index_t;
function gpr_or_spr_to_gspr(g: gpr_index_t; s: gspr_index_t) return gspr_index_t;
function is_fast_spr(s: gspr_index_t) return std_ulogic;
function fpr_to_gspr(f: fpr_index_t) return gspr_index_t;

-- The XER is split: the common bits (CA, OV, SO, OV32 and CA32) are
-- in the CR file as a kind of CR extension (with a separate write
@ -226,7 +236,7 @@ package common is
read2_enable : std_ulogic;
read2_reg : gspr_index_t;
read3_enable : std_ulogic;
read3_reg : gpr_index_t;
read3_reg : gspr_index_t;
end record;

type RegisterFileToDecode2Type is record
@ -264,7 +274,7 @@ package common is
addr1 : std_ulogic_vector(63 downto 0);
addr2 : std_ulogic_vector(63 downto 0);
data : std_ulogic_vector(63 downto 0); -- data to write, unused for read
write_reg : gpr_index_t;
write_reg : gspr_index_t;
length : std_ulogic_vector(3 downto 0);
ci : std_ulogic; -- cache-inhibited load/store
byte_reverse : std_ulogic;
@ -282,7 +292,8 @@ package common is
sign_extend => '0', update => '0', xerc => xerc_init,
reserve => '0', rc => '0', virt_mode => '0', priv_mode => '0',
nia => (others => '0'), insn => (others => '0'),
addr1 => (others => '0'), addr2 => (others => '0'), data => (others => '0'), length => (others => '0'),
addr1 => (others => '0'), addr2 => (others => '0'), data => (others => '0'),
write_reg => (others => '0'), length => (others => '0'),
mode_32bit => '0', others => (others => '0'));

type Loadstore1ToExecute1Type is record
@ -369,7 +380,7 @@ package common is
type Loadstore1ToWritebackType is record
valid : std_ulogic;
write_enable: std_ulogic;
write_reg : gpr_index_t;
write_reg : gspr_index_t;
write_data : std_ulogic_vector(63 downto 0);
xerc : xer_common_t;
rc : std_ulogic;
@ -473,10 +484,10 @@ package body common is
n := 13;
when others =>
n := 0;
return "000000";
return "0000000";
end case;
tmp := std_ulogic_vector(to_unsigned(n, 5));
return "1" & tmp;
return "01" & tmp;
end;

function gspr_to_gpr(i: gspr_index_t) return gpr_index_t is
@ -486,7 +497,7 @@ package body common is

function gpr_to_gspr(i: gpr_index_t) return gspr_index_t is
begin
return "0" & i;
return "00" & i;
end;

function gpr_or_spr_to_gspr(g: gpr_index_t; s: gspr_index_t) return gspr_index_t is
@ -502,4 +513,9 @@ package body common is
begin
return s(5);
end;

function fpr_to_gspr(f: fpr_index_t) return gspr_index_t is
begin
return "10" & f;
end;
end common;

@ -34,7 +34,7 @@ entity control is
gpr_b_read_in : in gspr_index_t;

gpr_c_read_valid_in : in std_ulogic;
gpr_c_read_in : in gpr_index_t;
gpr_c_read_in : in gspr_index_t;

cr_read_in : in std_ulogic;
cr_write_in : in std_ulogic;
@ -70,7 +70,6 @@ architecture rtl of control is
signal gpr_write_valid : std_ulogic := '0';
signal cr_write_valid : std_ulogic := '0';

signal gpr_c_read_in_fmt : std_ulogic_vector(5 downto 0);
begin
gpr_hazard0: entity work.gpr_hazard
generic map (
@ -122,8 +121,6 @@ begin
use_bypass => gpr_bypass_b
);

gpr_c_read_in_fmt <= "0" & gpr_c_read_in;

gpr_hazard2: entity work.gpr_hazard
generic map (
PIPELINE_DEPTH => PIPELINE_DEPTH
@ -140,7 +137,7 @@ begin
gpr_write_in => gpr_write_in,
bypass_avail => gpr_bypassable,
gpr_read_valid_in => gpr_c_read_valid_in,
gpr_read_in => gpr_c_read_in_fmt,
gpr_read_in => gpr_c_read_in,

ugpr_write_valid => update_gpr_write_valid,
ugpr_write_reg => update_gpr_write_reg,

@ -11,6 +11,7 @@ entity core is
SIM : boolean := false;
DISABLE_FLATTEN : boolean := false;
EX1_BYPASS : boolean := true;
HAS_FPU : boolean := true;
ALT_RESET_ADDRESS : std_ulogic_vector(63 downto 0) := (others => '0');
LOG_LENGTH : natural := 512
);
@ -244,6 +245,7 @@ begin
decode2_0: entity work.decode2
generic map (
EX1_BYPASS => EX1_BYPASS,
HAS_FPU => HAS_FPU,
LOG_LENGTH => LOG_LENGTH
)
port map (
@ -267,6 +269,7 @@ begin
register_file_0: entity work.register_file
generic map (
SIM => SIM,
HAS_FPU => HAS_FPU,
LOG_LENGTH => LOG_LENGTH
)
port map (
@ -280,7 +283,7 @@ begin
dbg_gpr_data => dbg_gpr_data,
sim_dump => terminate,
sim_dump_done => sim_cr_dump,
log_out => log_data(255 downto 185)
log_out => log_data(255 downto 184)
);

cr_file_0: entity work.cr_file
@ -294,12 +297,13 @@ begin
d_out => cr_file_to_decode2,
w_in => writeback_to_cr_file,
sim_dump => sim_cr_dump,
log_out => log_data(184 downto 172)
log_out => log_data(183 downto 171)
);

execute1_0: entity work.execute1
generic map (
EX1_BYPASS => EX1_BYPASS,
HAS_FPU => HAS_FPU,
LOG_LENGTH => LOG_LENGTH
)
port map (
@ -324,6 +328,7 @@ begin

loadstore1_0: entity work.loadstore1
generic map (
HAS_FPU => HAS_FPU,
LOG_LENGTH => LOG_LENGTH
)
port map (
@ -368,7 +373,7 @@ begin
stall_out => dcache_stall_out,
wishbone_in => wishbone_data_in,
wishbone_out => wishbone_data_out,
log_out => log_data(171 downto 152)
log_out => log_data(170 downto 151)
);

writeback_0: entity work.writeback
@ -381,7 +386,7 @@ begin
complete_out => complete
);

log_data(151 downto 150) <= "00";
log_data(150) <= '0';
log_data(139 downto 135) <= "00000";

debug_0: entity work.core_debug

@ -72,6 +72,10 @@ architecture behaviour of decode1 is
10 => (ALU, OP_CMP, RA, CONST_UI, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpli
34 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lbz
35 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- lbzu
50 => (LDST, OP_FPLOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lfd
51 => (LDST, OP_FPLOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- lfdu
-- 48 => (LDST, OP_FPLOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- lfs
-- 49 => (LDST, OP_FPLOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0'), -- lfsu
42 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '0'), -- lha
43 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '1', '0', '0', '0', NONE, '0', '0'), -- lhau
40 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lhz
@ -87,6 +91,10 @@ architecture behaviour of decode1 is
17 => (ALU, OP_SC, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- sc
38 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stb
39 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- stbu
54 => (LDST, OP_FPSTORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stfd
55 => (LDST, OP_FPSTORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- stfdu
-- 52 => (LDST, OP_FPSTORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- stfs
-- 53 => (LDST, OP_FPSTORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0'), -- stfsu
44 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- sth
45 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- sthu
36 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stw
@ -272,6 +280,12 @@ architecture behaviour of decode1 is
2#1101110101# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- ldcix
2#0000110101# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- ldux
2#0000010101# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- ldx
2#1001010111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lfdx
2#1001110111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- lfdux
2#1101010111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '0', '0', '0', '0', NONE, '0', '0'), -- lfiwax
2#1101110111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lfiwzx
-- 2#1000010111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- lfsx
-- 2#1000110111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0'), -- lfsux
2#0001110100# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '1', '0', '0', NONE, '0', '0'), -- lharx
2#0101110111# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '1', '0', '0', '0', NONE, '0', '0'), -- lhaux
2#0101010111# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '0'), -- lhax
@ -350,6 +364,11 @@ architecture behaviour of decode1 is
2#0011010110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '1', '0', '0', ONE, '0', '0'), -- stdcx
2#0010110101# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- stdux
2#0010010101# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stdx
2#1011010111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stfdx
2#1011110111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- stfdux
2#1111010111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stfiwx
-- 2#1010010111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- stfsx
-- 2#1010110111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0'), -- stfsux
2#1110010110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '1', '0', '0', '0', '0', '0', NONE, '0', '0'), -- sthbrx
2#1110110101# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- sthcix
2#1011010110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '1', '0', '0', ONE, '0', '0'), -- sthcx

@ -11,6 +11,7 @@ use work.insn_helpers.all;
entity decode2 is
generic (
EX1_BYPASS : boolean := true;
HAS_FPU : boolean := true;
-- Non-zero to enable log data collection
LOG_LENGTH : natural := 0
);
@ -73,7 +74,7 @@ architecture behaviour of decode2 is
-- If it's all 0, we don't treat it as a dependency as slow SPRs
-- operations are single issue.
--
assert is_fast_spr(ispr) = '1' or ispr = "000000"
assert is_fast_spr(ispr) = '1' or ispr = "0000000"
report "Decode A says SPR but ISPR is invalid:" &
to_hstring(ispr) severity failure;
return (is_fast_spr(ispr), ispr, reg_data);
@ -118,7 +119,7 @@ architecture behaviour of decode2 is
-- ISPR must be either a valid fast SPR number or all 0 for a slow SPR.
-- If it's all 0, we don't treat it as a dependency as slow SPRs
-- operations are single issue.
assert is_fast_spr(ispr) = '1' or ispr = "000000"
assert is_fast_spr(ispr) = '1' or ispr = "0000000"
report "Decode B says SPR but ISPR is invalid:" &
to_hstring(ispr) severity failure;
ret := (is_fast_spr(ispr), ispr, reg_data);
@ -137,6 +138,12 @@ architecture behaviour of decode2 is
return ('1', gpr_to_gspr(insn_rs(insn_in)), reg_data);
when RCR =>
return ('1', gpr_to_gspr(insn_rcreg(insn_in)), reg_data);
when FRS =>
if HAS_FPU then
return ('1', fpr_to_gspr(insn_frt(insn_in)), reg_data);
else
return ('0', (others => '0'), (others => '0'));
end if;
when NONE =>
return ('0', (others => '0'), (others => '0'));
end case;
@ -150,16 +157,22 @@ architecture behaviour of decode2 is
return ('1', gpr_to_gspr(insn_rt(insn_in)));
when RA =>
return ('1', gpr_to_gspr(insn_ra(insn_in)));
when FRT =>
if HAS_FPU then
return ('1', fpr_to_gspr(insn_frt(insn_in)));
else
return ('0', "0000000");
end if;
when SPR =>
-- ISPR must be either a valid fast SPR number or all 0 for a slow SPR.
-- If it's all 0, we don't treat it as a dependency as slow SPRs
-- operations are single issue.
assert is_fast_spr(ispr) = '1' or ispr = "000000"
assert is_fast_spr(ispr) = '1' or ispr = "0000000"
report "Decode B says SPR but ISPR is invalid:" &
to_hstring(ispr) severity failure;
return (is_fast_spr(ispr), ispr);
when NONE =>
return ('0', "000000");
return ('0', "0000000");
end case;
end;

@ -212,7 +225,7 @@ architecture behaviour of decode2 is
signal gpr_b_bypass : std_ulogic;

signal gpr_c_read_valid : std_ulogic;
signal gpr_c_read : gpr_index_t;
signal gpr_c_read : gspr_index_t;
signal gpr_c_bypass : std_ulogic;

signal cr_write_valid : std_ulogic;
@ -284,8 +297,9 @@ begin
else gpr_to_gspr(insn_ra(d_in.insn));
r_out.read2_reg <= d_in.ispr2 when d_in.decode.input_reg_b = SPR
else gpr_to_gspr(insn_rb(d_in.insn));
r_out.read3_reg <= insn_rcreg(d_in.insn) when d_in.decode.input_reg_c = RCR
else insn_rs(d_in.insn);
r_out.read3_reg <= gpr_to_gspr(insn_rcreg(d_in.insn)) when d_in.decode.input_reg_c = RCR
else fpr_to_gspr(insn_frt(d_in.insn)) when d_in.decode.input_reg_c = FRS and HAS_FPU
else gpr_to_gspr(insn_rs(d_in.insn));

c_out.read <= d_in.decode.input_cr;

@ -394,7 +408,7 @@ begin
gpr_b_read <= decoded_reg_b.reg;

gpr_c_read_valid <= decoded_reg_c.reg_valid;
gpr_c_read <= gspr_to_gpr(decoded_reg_c.reg);
gpr_c_read <= decoded_reg_c.reg;

cr_write_valid <= d_in.decode.output_cr or decode_rc(d_in.decode.rc, d_in.insn);
cr_bypass_avail <= '0';

@ -10,6 +10,7 @@ package decode_types is
OP_DCBZ, OP_DIV, OP_DIVE, OP_EXTS,
OP_EXTSWSLI, OP_ICBI, OP_ICBT, OP_ISEL, OP_ISYNC,
OP_LOAD, OP_STORE,
OP_FPLOAD, OP_FPSTORE,
OP_MCRXRX, OP_MFCR, OP_MFMSR, OP_MFSPR, OP_MOD,
OP_MTCRF, OP_MTMSRD, OP_MTSPR, OP_MUL_L64,
OP_MUL_H64, OP_MUL_H32, OP_OR,
@ -24,8 +25,8 @@ package decode_types is
type input_reg_a_t is (NONE, RA, RA_OR_ZERO, SPR, CIA);
type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD,
CONST_DXHI4, CONST_DS, CONST_M1, CONST_SH, CONST_SH32, SPR);
type input_reg_c_t is (NONE, RS, RCR);
type output_reg_a_t is (NONE, RT, RA, SPR);
type input_reg_c_t is (NONE, RS, RCR, FRS);
type output_reg_a_t is (NONE, RT, RA, SPR, FRT);
type rc_t is (NONE, ONE, RC);
type carry_in_t is (ZERO, CA, OV, ONE);


@ -13,6 +13,7 @@ use work.ppc_fx_insns.all;
entity execute1 is
generic (
EX1_BYPASS : boolean := true;
HAS_FPU : boolean := true;
-- Non-zero to enable log data collection
LOG_LENGTH : natural := 0
);
@ -542,6 +543,9 @@ begin
ctrl_tmp.msr(MSR_PR) <= '0';
ctrl_tmp.msr(MSR_SE) <= '0';
ctrl_tmp.msr(MSR_BE) <= '0';
ctrl_tmp.msr(MSR_FP) <= '0';
ctrl_tmp.msr(MSR_FE0) <= '0';
ctrl_tmp.msr(MSR_FE1) <= '0';
ctrl_tmp.msr(MSR_IR) <= '0';
ctrl_tmp.msr(MSR_DR) <= '0';
ctrl_tmp.msr(MSR_RI) <= '0';
@ -579,6 +583,18 @@ begin
ctrl_tmp.srr1(63 - 45) <= '1';
report "privileged instruction";

elsif not HAS_FPU and valid_in = '1' and
(e_in.insn_type = OP_FPLOAD or e_in.insn_type = OP_FPSTORE) then
-- make lfd/stfd/lfs/stfs etc. illegal in no-FPU implementations
illegal := '1';

elsif HAS_FPU and valid_in = '1' and ctrl.msr(MSR_FP) = '0' and
(e_in.insn_type = OP_FPLOAD or e_in.insn_type = OP_FPSTORE) then
-- generate a floating-point unavailable interrupt
exception := '1';
v.f.redirect_nia := std_logic_vector(to_unsigned(16#800#, 64));
report "FP unavailable interrupt";

elsif valid_in = '1' and e_in.unit = ALU then

report "execute nia " & to_hstring(e_in.nia);
@ -1225,7 +1241,7 @@ begin
lv.addr1 := a_in;
lv.addr2 := b_in;
lv.data := c_in;
lv.write_reg := gspr_to_gpr(e_in.write_reg);
lv.write_reg := e_in.write_reg;
lv.length := e_in.data_len;
lv.byte_reverse := e_in.byte_reverse xnor ctrl.msr(MSR_LE);
lv.sign_extend := e_in.sign_extend;

@ -14,6 +14,7 @@ entity toplevel is
RAM_INIT_FILE : string := "firmware.hex";
RESET_LOW : boolean := true;
CLK_FREQUENCY : positive := 100000000;
HAS_FPU : boolean := true;
USE_LITEDRAM : boolean := false;
NO_BRAM : boolean := false;
DISABLE_FLATTEN_CORE : boolean := false;
@ -168,6 +169,7 @@ begin
RAM_INIT_FILE => RAM_INIT_FILE,
SIM => false,
CLK_FREQ => CLK_FREQUENCY,
HAS_FPU => HAS_FPU,
HAS_DRAM => USE_LITEDRAM,
DRAM_SIZE => 256 * 1024 * 1024,
DRAM_INIT_SIZE => PAYLOAD_SIZE,

@ -11,6 +11,7 @@ entity toplevel is
RESET_LOW : boolean := true;
CLK_INPUT : positive := 100000000;
CLK_FREQUENCY : positive := 100000000;
HAS_FPU : boolean := true;
DISABLE_FLATTEN_CORE : boolean := false;
UART_IS_16550 : boolean := true
);
@ -68,6 +69,7 @@ begin
RAM_INIT_FILE => RAM_INIT_FILE,
SIM => false,
CLK_FREQ => CLK_FREQUENCY,
HAS_FPU => HAS_FPU,
DISABLE_FLATTEN_CORE => DISABLE_FLATTEN_CORE,
UART0_IS_16550 => UART_IS_16550
)

@ -14,6 +14,7 @@ entity toplevel is
RAM_INIT_FILE : string := "firmware.hex";
RESET_LOW : boolean := true;
CLK_FREQUENCY : positive := 100000000;
HAS_FPU : boolean := true;
USE_LITEDRAM : boolean := false;
NO_BRAM : boolean := false;
DISABLE_FLATTEN_CORE : boolean := false;
@ -120,6 +121,7 @@ begin
RAM_INIT_FILE => RAM_INIT_FILE,
SIM => false,
CLK_FREQ => CLK_FREQUENCY,
HAS_FPU => HAS_FPU,
HAS_DRAM => USE_LITEDRAM,
DRAM_SIZE => 512 * 1024 * 1024,
DRAM_INIT_SIZE => PAYLOAD_SIZE,

@ -2,6 +2,9 @@ library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.common.all;

entity gpr_hazard is
generic (
PIPELINE_DEPTH : natural := 1
@ -15,13 +18,13 @@ entity gpr_hazard is
issuing : in std_ulogic;

gpr_write_valid_in : in std_ulogic;
gpr_write_in : in std_ulogic_vector(5 downto 0);
gpr_write_in : in gspr_index_t;
bypass_avail : in std_ulogic;
gpr_read_valid_in : in std_ulogic;
gpr_read_in : in std_ulogic_vector(5 downto 0);
gpr_read_in : in gspr_index_t;

ugpr_write_valid : in std_ulogic;
ugpr_write_reg : in std_ulogic_vector(5 downto 0);
ugpr_write_reg : in gspr_index_t;

stall_out : out std_ulogic;
use_bypass : out std_ulogic
@ -31,9 +34,9 @@ architecture behaviour of gpr_hazard is
type pipeline_entry_type is record
valid : std_ulogic;
bypass : std_ulogic;
gpr : std_ulogic_vector(5 downto 0);
gpr : gspr_index_t;
ugpr_valid : std_ulogic;
ugpr : std_ulogic_vector(5 downto 0);
ugpr : gspr_index_t;
end record;
constant pipeline_entry_init : pipeline_entry_type := (valid => '0', bypass => '0', gpr => (others => '0'),
ugpr_valid => '0', ugpr => (others => '0'));

@ -37,6 +37,10 @@ package insn_helpers is
function insn_sh (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_me (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_mb (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_frt (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_fra (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_frb (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_frc (insn_in : std_ulogic_vector) return std_ulogic_vector;
end package insn_helpers;

package body insn_helpers is
@ -214,4 +218,24 @@ package body insn_helpers is
begin
return insn_in(5) & insn_in(10 downto 6);
end;

function insn_frt(insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(25 downto 21);
end;

function insn_fra(insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(20 downto 16);
end;

function insn_frb(insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(15 downto 11);
end;

function insn_frc(insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(10 downto 6);
end;
end package body insn_helpers;

@ -5,12 +5,15 @@ use ieee.numeric_std.all;
library work;
use work.decode_types.all;
use work.common.all;
use work.insn_helpers.all;
use work.helpers.all;

-- 2 cycle LSU
-- We calculate the address in the first cycle

entity loadstore1 is
generic (
HAS_FPU : boolean := true;
-- Non-zero to enable log data collection
LOG_LENGTH : natural := 0
);
@ -58,7 +61,7 @@ architecture behave of loadstore1 is
addr : std_ulogic_vector(63 downto 0);
store_data : std_ulogic_vector(63 downto 0);
load_data : std_ulogic_vector(63 downto 0);
write_reg : gpr_index_t;
write_reg : gspr_index_t;
length : std_ulogic_vector(3 downto 0);
byte_reverse : std_ulogic;
sign_extend : std_ulogic;
@ -431,6 +434,17 @@ begin
v.align_intr := v.nc;
req := '1';
v.dcbz := '1';
when OP_FPSTORE =>
if HAS_FPU then
req := '1';
end if;
when OP_FPLOAD =>
if HAS_FPU then
v.load := '1';
req := '1';
-- Allow an extra cycle for RA update
v.extra_cycle := l_in.update;
end if;
when OP_TLBIE =>
mmureq := '1';
v.tlbie := '1';
@ -523,7 +537,7 @@ begin
l_out.write_data <= r.sprval;
elsif do_update = '1' then
l_out.write_enable <= '1';
l_out.write_reg <= r.update_reg;
l_out.write_reg <= gpr_to_gspr(r.update_reg);
l_out.write_data <= r.addr;
else
l_out.write_enable <= write_enable;

@ -132,6 +132,7 @@ targets:
- disable_flatten_core
- log_length=2048
- uart_is_16550
- has_fpu
tools:
vivado: {part : xc7a100tcsg324-1}
toplevel : toplevel
@ -215,6 +216,7 @@ targets:
- spi_flash_offset=10485760
- log_length=2048
- uart_is_16550
- has_fpu
tools:
vivado: {part : xc7a200tsbg484-1}
toplevel : toplevel
@ -231,6 +233,7 @@ targets:
- spi_flash_offset=10485760
- log_length=2048
- uart_is_16550
- has_fpu
generate: [litedram_nexys_video]
tools:
vivado: {part : xc7a200tsbg484-1}
@ -249,6 +252,7 @@ targets:
- log_length=512
- uart_is_16550
- has_uart1
- has_fpu=false
tools:
vivado: {part : xc7a35ticsg324-1L}
toplevel : toplevel
@ -267,6 +271,7 @@ targets:
- log_length=512
- uart_is_16550
- has_uart1
- has_fpu=false
generate: [litedram_arty, liteeth_arty]
tools:
vivado: {part : xc7a35ticsg324-1L}
@ -285,6 +290,7 @@ targets:
- log_length=2048
- uart_is_16550
- has_uart1
- has_fpu
tools:
vivado: {part : xc7a100ticsg324-1L}
toplevel : toplevel
@ -303,6 +309,7 @@ targets:
- log_length=2048
- uart_is_16550
- has_uart1
- has_fpu
generate: [litedram_arty, liteeth_arty]
tools:
vivado: {part : xc7a100ticsg324-1L}
@ -320,6 +327,7 @@ targets:
- disable_flatten_core
- log_length=512
- uart_is_16550
- has_fpu=false
tools:
vivado: {part : xc7a35tcpg236-1}
toplevel : toplevel
@ -380,6 +388,12 @@ parameters:
paramtype : generic
default : 100000000

has_fpu:
datatype : bool
description : Include a floating-point unit in the core
paramtype : generic
default : true

disable_flatten_core:
datatype : bool
description : Prevent Vivado from flattening the main core components

@ -8,6 +8,7 @@ use work.common.all;
entity register_file is
generic (
SIM : boolean := false;
HAS_FPU : boolean := true;
-- Non-zero to enable log data collection
LOG_LENGTH : natural := 0
);
@ -28,12 +29,12 @@ entity register_file is
sim_dump : in std_ulogic;
sim_dump_done : out std_ulogic;

log_out : out std_ulogic_vector(70 downto 0)
log_out : out std_ulogic_vector(71 downto 0)
);
end entity register_file;

architecture behaviour of register_file is
type regfile is array(0 to 63) of std_ulogic_vector(63 downto 0);
type regfile is array(0 to 127) of std_ulogic_vector(63 downto 0);
signal registers : regfile := (others => (others => '0'));
signal rd_port_b : std_ulogic_vector(63 downto 0);
signal dbg_data : std_ulogic_vector(63 downto 0);
@ -41,53 +42,73 @@ architecture behaviour of register_file is
begin
-- synchronous writes
register_write_0: process(clk)
variable w_addr : gspr_index_t;
begin
if rising_edge(clk) then
if w_in.write_enable = '1' then
if w_in.write_reg(5) = '0' then
report "Writing GPR " & to_hstring(w_in.write_reg) & " " & to_hstring(w_in.write_data);
w_addr := w_in.write_reg;
if HAS_FPU and w_addr(6) = '1' then
report "Writing FPR " & to_hstring(w_addr(4 downto 0)) & " " & to_hstring(w_in.write_data);
else
report "Writing GSPR " & to_hstring(w_in.write_reg) & " " & to_hstring(w_in.write_data);
w_addr(6) := '0';
if w_addr(5) = '0' then
report "Writing GPR " & to_hstring(w_addr) & " " & to_hstring(w_in.write_data);
else
report "Writing GSPR " & to_hstring(w_addr) & " " & to_hstring(w_in.write_data);
end if;
end if;
assert not(is_x(w_in.write_data)) and not(is_x(w_in.write_reg)) severity failure;
registers(to_integer(unsigned(w_in.write_reg))) <= w_in.write_data;
registers(to_integer(unsigned(w_addr))) <= w_in.write_data;
end if;
end if;
end process register_write_0;

-- asynchronous reads
register_read_0: process(all)
variable b_addr : gspr_index_t;
variable a_addr, b_addr, c_addr : gspr_index_t;
variable w_addr : gspr_index_t;
begin
a_addr := d_in.read1_reg;
b_addr := d_in.read2_reg;
c_addr := d_in.read3_reg;
w_addr := w_in.write_reg;
if not HAS_FPU then
-- Make it obvious that we only want 64 GSPRs for a no-FPU implementation
a_addr(6) := '0';
b_addr(6) := '0';
c_addr(6) := '0';
w_addr(6) := '0';
end if;
if d_in.read1_enable = '1' then
report "Reading GPR " & to_hstring(d_in.read1_reg) & " " & to_hstring(registers(to_integer(unsigned(d_in.read1_reg))));
report "Reading GPR " & to_hstring(a_addr) & " " & to_hstring(registers(to_integer(unsigned(a_addr))));
end if;
if d_in.read2_enable = '1' then
report "Reading GPR " & to_hstring(d_in.read2_reg) & " " & to_hstring(registers(to_integer(unsigned(d_in.read2_reg))));
report "Reading GPR " & to_hstring(b_addr) & " " & to_hstring(registers(to_integer(unsigned(b_addr))));
end if;
if d_in.read3_enable = '1' then
report "Reading GPR " & to_hstring(d_in.read3_reg) & " " & to_hstring(registers(to_integer(unsigned(d_in.read3_reg))));
report "Reading GPR " & to_hstring(c_addr) & " " & to_hstring(registers(to_integer(unsigned(c_addr))));
end if;
d_out.read1_data <= registers(to_integer(unsigned(d_in.read1_reg)));
d_out.read1_data <= registers(to_integer(unsigned(a_addr)));
-- B read port is multiplexed with reads from the debug circuitry
if d_in.read2_enable = '0' and dbg_gpr_req = '1' and dbg_ack = '0' then
b_addr := dbg_gpr_addr;
else
b_addr := d_in.read2_reg;
if not HAS_FPU then
b_addr(6) := '0';
end if;
end if;
rd_port_b <= registers(to_integer(unsigned(b_addr)));
d_out.read2_data <= rd_port_b;
d_out.read3_data <= registers(to_integer(unsigned(gpr_to_gspr(d_in.read3_reg))));
d_out.read3_data <= registers(to_integer(unsigned(c_addr)));

-- Forward any written data
if w_in.write_enable = '1' then
if d_in.read1_reg = w_in.write_reg then
if a_addr = w_addr then
d_out.read1_data <= w_in.write_data;
end if;
if d_in.read2_reg = w_in.write_reg then
if b_addr = w_addr then
d_out.read2_data <= w_in.write_data;
end if;
if gpr_to_gspr(d_in.read3_reg) = w_in.write_reg then
if c_addr = w_addr then
d_out.read3_data <= w_in.write_data;
end if;
end if;
@ -136,7 +157,7 @@ begin
end generate;

rf_log: if LOG_LENGTH > 0 generate
signal log_data : std_ulogic_vector(70 downto 0);
signal log_data : std_ulogic_vector(71 downto 0);
begin
reg_log: process(clk)
begin

@ -58,7 +58,7 @@ struct log_entry {
u64 ls_lo_valid: 1;
u64 ls_eo_except: 1;
u64 ls_stall_out: 1;
u64 pad2: 2;
u64 pad2: 1;
u64 dc_state: 3;
u64 dc_ra_valid: 1;
u64 dc_tlb_way: 3;
@ -74,7 +74,7 @@ struct log_entry {
u64 cr_wr_mask: 8;
u64 cr_wr_data: 4;
u64 cr_wr_enable: 1;
u64 reg_wr_reg: 6;
u64 reg_wr_reg: 7;
u64 reg_wr_enable: 1;

u64 reg_wr_data;
@ -90,11 +90,11 @@ const char *ops[64] =
"illegal", "nop ", "add ", "and ", "attn ", "b ", "bc ", "bcreg ",
"bperm ", "cmp ", "cmpb ", "cmpeqb ", "cmprb ", "cntz ", "crop ", "darn ",
"dcbf ", "dcbst ", "dcbt ", "dcbtst ", "dcbz ", "div ", "dive ", "exts ",
"extswsl", "icbi ", "icbt ", "isel ", "isync ", "ld ", "st ", "mcrxrx ",
"mfcr ", "mfmsr ", "mfspr ", "mod ", "mtcrf ", "mtmsr ", "mtspr ", "mull64 ",
"mulh64 ", "mulh32 ", "or ", "popcnt ", "prty ", "rfid ", "rlc ", "rlcl ",
"rlcr ", "sc ", "setb ", "shl ", "shr ", "sync ", "tlbie ", "trap ",
"xor ", "bcd ", "addg6s ", "ffail ", "?60 ", "?61 ", "?62 ", "?63 "
"extswsl", "icbi ", "icbt ", "isel ", "isync ", "ld ", "st ", "fpload ",
"fpstore", "mcrxrx ", "mfcr ", "mfmsr ", "mfspr ", "mod ", "mtcrf ", "mtmsr ",
"mtspr ", "mull64 ", "mulh64 ", "mulh32 ", "or ", "popcnt ", "prty ", "rfid ",
"rlc ", "rlcl ", "rlcr ", "sc ", "setb ", "shl ", "shr ", "sync ",
"tlbie ", "trap ", "xor ", "bcd ", "addg6s ", "ffail ", "?62 ", "?63 "
};

const char *spr_names[13] =

@ -52,6 +52,7 @@ entity soc is
RAM_INIT_FILE : string;
CLK_FREQ : positive;
SIM : boolean;
HAS_FPU : boolean := true;
DISABLE_FLATTEN_CORE : boolean := false;
HAS_DRAM : boolean := false;
DRAM_SIZE : integer := 0;
@ -253,6 +254,7 @@ begin
processor: entity work.core
generic map(
SIM => SIM,
HAS_FPU => HAS_FPU,
DISABLE_FLATTEN => DISABLE_FLATTEN_CORE,
ALT_RESET_ADDRESS => (23 downto 0 => '0', others => '1'),
LOG_LENGTH => LOG_LENGTH

@ -80,7 +80,7 @@ begin
end if;

if l_in.write_enable = '1' then
w_out.write_reg <= gpr_to_gspr(l_in.write_reg);
w_out.write_reg <= l_in.write_reg;
w_out.write_data <= l_in.write_data;
w_out.write_enable <= '1';
end if;

Loading…
Cancel
Save