Merge pull request #245 from paulusmack/fpu

Add a simple FPU
pull/251/head
Michael Neuling 4 years ago committed by GitHub
commit e40e752b9a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -48,7 +48,7 @@ core_files = decode_types.vhdl common.vhdl wishbone_types.vhdl fetch1.vhdl \
cr_file.vhdl crhelpers.vhdl ppc_fx_insns.vhdl rotator.vhdl \
logical.vhdl countzero.vhdl multiply.vhdl divider.vhdl execute1.vhdl \
loadstore1.vhdl mmu.vhdl dcache.vhdl writeback.vhdl core_debug.vhdl \
core.vhdl
core.vhdl fpu.vhdl

soc_files = $(core_files) wishbone_arbiter.vhdl wishbone_bram_wrapper.vhdl sync_fifo.vhdl \
wishbone_debug_master.vhdl xics.vhdl syscon.vhdl soc.vhdl \

@ -13,8 +13,11 @@ package common is
constant MSR_SF : integer := (63 - 0); -- Sixty-Four bit mode
constant MSR_EE : integer := (63 - 48); -- External interrupt Enable
constant MSR_PR : integer := (63 - 49); -- PRoblem state
constant MSR_FP : integer := (63 - 50); -- Floating Point available
constant MSR_FE0 : integer := (63 - 52); -- Floating Exception mode
constant MSR_SE : integer := (63 - 53); -- Single-step bit of TE field
constant MSR_BE : integer := (63 - 54); -- Branch trace bit of TE field
constant MSR_FE1 : integer := (63 - 55); -- Floating Exception mode
constant MSR_IR : integer := (63 - 58); -- Instruction Relocation
constant MSR_DR : integer := (63 - 59); -- Data Relocation
constant MSR_RI : integer := (63 - 62); -- Recoverable Interrupt
@ -53,8 +56,11 @@ package common is
-- GPR indices in the register file (GPR only)
subtype gpr_index_t is std_ulogic_vector(4 downto 0);

-- Extended GPR indice (can hold an SPR)
subtype gspr_index_t is std_ulogic_vector(5 downto 0);
-- Extended GPR index (can hold an SPR or a FPR)
subtype gspr_index_t is std_ulogic_vector(6 downto 0);

-- FPR indices
subtype fpr_index_t is std_ulogic_vector(4 downto 0);

-- Some SPRs are stored in the register file, they use the magic
-- GPR numbers above 31.
@ -64,6 +70,9 @@ package common is
-- indicates if this is indeed a fast SPR. If clear, then
-- the SPR is not stored in the GPR file.
--
-- FPRs are also stored in the register file, using GSPR
-- numbers from 64 to 95.
--
function fast_spr_num(spr: spr_num_t) return gspr_index_t;

-- Indices conversion functions
@ -71,6 +80,7 @@ package common is
function gpr_to_gspr(i: gpr_index_t) return gspr_index_t;
function gpr_or_spr_to_gspr(g: gpr_index_t; s: gspr_index_t) return gspr_index_t;
function is_fast_spr(s: gspr_index_t) return std_ulogic;
function fpr_to_gspr(f: fpr_index_t) return gspr_index_t;

-- The XER is split: the common bits (CA, OV, SO, OV32 and CA32) are
-- in the CR file as a kind of CR extension (with a separate write
@ -84,6 +94,38 @@ package common is
end record;
constant xerc_init : xer_common_t := (others => '0');

-- FPSCR bit numbers
constant FPSCR_FX : integer := 63 - 32;
constant FPSCR_FEX : integer := 63 - 33;
constant FPSCR_VX : integer := 63 - 34;
constant FPSCR_OX : integer := 63 - 35;
constant FPSCR_UX : integer := 63 - 36;
constant FPSCR_ZX : integer := 63 - 37;
constant FPSCR_XX : integer := 63 - 38;
constant FPSCR_VXSNAN : integer := 63 - 39;
constant FPSCR_VXISI : integer := 63 - 40;
constant FPSCR_VXIDI : integer := 63 - 41;
constant FPSCR_VXZDZ : integer := 63 - 42;
constant FPSCR_VXIMZ : integer := 63 - 43;
constant FPSCR_VXVC : integer := 63 - 44;
constant FPSCR_FR : integer := 63 - 45;
constant FPSCR_FI : integer := 63 - 46;
constant FPSCR_C : integer := 63 - 47;
constant FPSCR_FL : integer := 63 - 48;
constant FPSCR_FG : integer := 63 - 49;
constant FPSCR_FE : integer := 63 - 50;
constant FPSCR_FU : integer := 63 - 51;
constant FPSCR_VXSOFT : integer := 63 - 53;
constant FPSCR_VXSQRT : integer := 63 - 54;
constant FPSCR_VXCVI : integer := 63 - 55;
constant FPSCR_VE : integer := 63 - 56;
constant FPSCR_OE : integer := 63 - 57;
constant FPSCR_UE : integer := 63 - 58;
constant FPSCR_ZE : integer := 63 - 59;
constant FPSCR_XE : integer := 63 - 60;
constant FPSCR_NI : integer := 63 - 61;
constant FPSCR_RN : integer := 63 - 63;

type irq_state_t is (WRITE_SRR0, WRITE_SRR1);

-- For now, fixed 16 sources, make this either a parametric
@ -226,7 +268,7 @@ package common is
read2_enable : std_ulogic;
read2_reg : gspr_index_t;
read3_enable : std_ulogic;
read3_reg : gpr_index_t;
read3_reg : gspr_index_t;
end record;

type RegisterFileToDecode2Type is record
@ -264,7 +306,7 @@ package common is
addr1 : std_ulogic_vector(63 downto 0);
addr2 : std_ulogic_vector(63 downto 0);
data : std_ulogic_vector(63 downto 0); -- data to write, unused for read
write_reg : gpr_index_t;
write_reg : gspr_index_t;
length : std_ulogic_vector(3 downto 0);
ci : std_ulogic; -- cache-inhibited load/store
byte_reverse : std_ulogic;
@ -277,13 +319,15 @@ package common is
virt_mode : std_ulogic; -- do translation through TLB
priv_mode : std_ulogic; -- privileged mode (MSR[PR] = 0)
mode_32bit : std_ulogic; -- trim addresses to 32 bits
is_32bit : std_ulogic;
end record;
constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := (valid => '0', op => OP_ILLEGAL, ci => '0', byte_reverse => '0',
sign_extend => '0', update => '0', xerc => xerc_init,
reserve => '0', rc => '0', virt_mode => '0', priv_mode => '0',
nia => (others => '0'), insn => (others => '0'),
addr1 => (others => '0'), addr2 => (others => '0'), data => (others => '0'), length => (others => '0'),
mode_32bit => '0', others => (others => '0'));
addr1 => (others => '0'), addr2 => (others => '0'), data => (others => '0'),
write_reg => (others => '0'), length => (others => '0'),
mode_32bit => '0', is_32bit => '0', others => (others => '0'));

type Loadstore1ToExecute1Type is record
busy : std_ulogic;
@ -369,7 +413,7 @@ package common is
type Loadstore1ToWritebackType is record
valid : std_ulogic;
write_enable: std_ulogic;
write_reg : gpr_index_t;
write_reg : gspr_index_t;
write_data : std_ulogic_vector(63 downto 0);
xerc : xer_common_t;
rc : std_ulogic;
@ -401,6 +445,43 @@ package common is
write_cr_data => (others => '0'), write_reg => (others => '0'),
exc_write_reg => (others => '0'), exc_write_data => (others => '0'));

type Execute1ToFPUType is record
valid : std_ulogic;
op : insn_type_t;
nia : std_ulogic_vector(63 downto 0);
insn : std_ulogic_vector(31 downto 0);
single : std_ulogic;
fe_mode : std_ulogic_vector(1 downto 0);
fra : std_ulogic_vector(63 downto 0);
frb : std_ulogic_vector(63 downto 0);
frc : std_ulogic_vector(63 downto 0);
frt : gspr_index_t;
rc : std_ulogic;
out_cr : std_ulogic;
end record;
constant Execute1ToFPUInit : Execute1ToFPUType := (valid => '0', op => OP_ILLEGAL, nia => (others => '0'),
insn => (others => '0'), fe_mode => "00", rc => '0',
fra => (others => '0'), frb => (others => '0'),
frc => (others => '0'), frt => (others => '0'),
single => '0', out_cr => '0');

type FPUToExecute1Type is record
busy : std_ulogic;
exception : std_ulogic;
interrupt : std_ulogic;
illegal : std_ulogic;
end record;

type FPUToWritebackType is record
valid : std_ulogic;
write_enable : std_ulogic;
write_reg : gspr_index_t;
write_data : std_ulogic_vector(63 downto 0);
write_cr_enable : std_ulogic;
write_cr_mask : std_ulogic_vector(7 downto 0);
write_cr_data : std_ulogic_vector(31 downto 0);
end record;

type DividerToExecute1Type is record
valid: std_ulogic;
write_reg_data: std_ulogic_vector(63 downto 0);
@ -473,10 +554,10 @@ package body common is
n := 13;
when others =>
n := 0;
return "000000";
return "0000000";
end case;
tmp := std_ulogic_vector(to_unsigned(n, 5));
return "1" & tmp;
return "01" & tmp;
end;

function gspr_to_gpr(i: gspr_index_t) return gpr_index_t is
@ -486,7 +567,7 @@ package body common is

function gpr_to_gspr(i: gpr_index_t) return gspr_index_t is
begin
return "0" & i;
return "00" & i;
end;

function gpr_or_spr_to_gspr(g: gpr_index_t; s: gspr_index_t) return gspr_index_t is
@ -502,4 +583,9 @@ package body common is
begin
return s(5);
end;

function fpr_to_gspr(f: fpr_index_t) return gspr_index_t is
begin
return "10" & f;
end;
end common;

@ -34,7 +34,7 @@ entity control is
gpr_b_read_in : in gspr_index_t;

gpr_c_read_valid_in : in std_ulogic;
gpr_c_read_in : in gpr_index_t;
gpr_c_read_in : in gspr_index_t;

cr_read_in : in std_ulogic;
cr_write_in : in std_ulogic;
@ -70,7 +70,6 @@ architecture rtl of control is
signal gpr_write_valid : std_ulogic := '0';
signal cr_write_valid : std_ulogic := '0';

signal gpr_c_read_in_fmt : std_ulogic_vector(5 downto 0);
begin
gpr_hazard0: entity work.gpr_hazard
generic map (
@ -122,8 +121,6 @@ begin
use_bypass => gpr_bypass_b
);

gpr_c_read_in_fmt <= "0" & gpr_c_read_in;

gpr_hazard2: entity work.gpr_hazard
generic map (
PIPELINE_DEPTH => PIPELINE_DEPTH
@ -140,7 +137,7 @@ begin
gpr_write_in => gpr_write_in,
bypass_avail => gpr_bypassable,
gpr_read_valid_in => gpr_c_read_valid_in,
gpr_read_in => gpr_c_read_in_fmt,
gpr_read_in => gpr_c_read_in,

ugpr_write_valid => update_gpr_write_valid,
ugpr_write_reg => update_gpr_write_reg,

@ -11,6 +11,7 @@ entity core is
SIM : boolean := false;
DISABLE_FLATTEN : boolean := false;
EX1_BYPASS : boolean := true;
HAS_FPU : boolean := true;
ALT_RESET_ADDRESS : std_ulogic_vector(63 downto 0) := (others => '0');
LOG_LENGTH : natural := 512
);
@ -79,6 +80,11 @@ architecture behave of core is
signal mmu_to_dcache: MmuToDcacheType;
signal dcache_to_mmu: DcacheToMmuType;

-- FPU signals
signal execute1_to_fpu: Execute1ToFPUType;
signal fpu_to_execute1: FPUToExecute1Type;
signal fpu_to_writeback: FPUToWritebackType;

-- local signals
signal fetch1_stall_in : std_ulogic;
signal icache_stall_out : std_ulogic;
@ -108,6 +114,7 @@ architecture behave of core is
signal rst_dec1 : std_ulogic := '1';
signal rst_dec2 : std_ulogic := '1';
signal rst_ex1 : std_ulogic := '1';
signal rst_fpu : std_ulogic := '1';
signal rst_ls1 : std_ulogic := '1';
signal rst_dbg : std_ulogic := '1';
signal alt_reset_d : std_ulogic;
@ -170,6 +177,7 @@ begin
rst_dec1 <= core_rst;
rst_dec2 <= core_rst;
rst_ex1 <= core_rst;
rst_fpu <= core_rst;
rst_ls1 <= core_rst;
rst_dbg <= rst;
alt_reset_d <= alt_reset;
@ -224,6 +232,7 @@ begin

decode1_0: entity work.decode1
generic map(
HAS_FPU => HAS_FPU,
LOG_LENGTH => LOG_LENGTH
)
port map (
@ -244,6 +253,7 @@ begin
decode2_0: entity work.decode2
generic map (
EX1_BYPASS => EX1_BYPASS,
HAS_FPU => HAS_FPU,
LOG_LENGTH => LOG_LENGTH
)
port map (
@ -267,6 +277,7 @@ begin
register_file_0: entity work.register_file
generic map (
SIM => SIM,
HAS_FPU => HAS_FPU,
LOG_LENGTH => LOG_LENGTH
)
port map (
@ -280,7 +291,7 @@ begin
dbg_gpr_data => dbg_gpr_data,
sim_dump => terminate,
sim_dump_done => sim_cr_dump,
log_out => log_data(255 downto 185)
log_out => log_data(255 downto 184)
);

cr_file_0: entity work.cr_file
@ -294,12 +305,13 @@ begin
d_out => cr_file_to_decode2,
w_in => writeback_to_cr_file,
sim_dump => sim_cr_dump,
log_out => log_data(184 downto 172)
log_out => log_data(183 downto 171)
);

execute1_0: entity work.execute1
generic map (
EX1_BYPASS => EX1_BYPASS,
HAS_FPU => HAS_FPU,
LOG_LENGTH => LOG_LENGTH
)
port map (
@ -309,9 +321,11 @@ begin
busy_out => ex1_busy_out,
e_in => decode2_to_execute1,
l_in => loadstore1_to_execute1,
fp_in => fpu_to_execute1,
ext_irq_in => ext_irq,
l_out => execute1_to_loadstore1,
f_out => execute1_to_fetch1,
fp_out => execute1_to_fpu,
e_out => execute1_to_writeback,
icache_inval => ex1_icache_inval,
dbg_msr_out => msr,
@ -322,8 +336,32 @@ begin
log_wr_addr => log_wr_addr
);

with_fpu: if HAS_FPU generate
begin
fpu_0: entity work.fpu
port map (
clk => clk,
rst => rst_fpu,
e_in => execute1_to_fpu,
e_out => fpu_to_execute1,
w_out => fpu_to_writeback
);
end generate;

no_fpu: if not HAS_FPU generate
begin
fpu_to_execute1.busy <= '0';
fpu_to_execute1.exception <= '0';
fpu_to_execute1.interrupt <= '0';
fpu_to_execute1.illegal <= '0';
fpu_to_writeback.valid <= '0';
fpu_to_writeback.write_enable <= '0';
fpu_to_writeback.write_cr_enable <= '0';
end generate;

loadstore1_0: entity work.loadstore1
generic map (
HAS_FPU => HAS_FPU,
LOG_LENGTH => LOG_LENGTH
)
port map (
@ -368,7 +406,7 @@ begin
stall_out => dcache_stall_out,
wishbone_in => wishbone_data_in,
wishbone_out => wishbone_data_out,
log_out => log_data(171 downto 152)
log_out => log_data(170 downto 151)
);

writeback_0: entity work.writeback
@ -376,12 +414,13 @@ begin
clk => clk,
e_in => execute1_to_writeback,
l_in => loadstore1_to_writeback,
fp_in => fpu_to_writeback,
w_out => writeback_to_register_file,
c_out => writeback_to_cr_file,
complete_out => complete
);

log_data(151 downto 150) <= "00";
log_data(150) <= '0';
log_data(139 downto 135) <= "00000";

debug_0: entity work.core_debug

@ -3,6 +3,7 @@ use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.helpers.all;

entity zero_counter is
port (
@ -15,42 +16,6 @@ entity zero_counter is
end entity zero_counter;

architecture behaviour of zero_counter is
-- Reverse the order of bits in a word
function bit_reverse(a: std_ulogic_vector) return std_ulogic_vector is
variable ret: std_ulogic_vector(a'left downto a'right);
begin
for i in a'right to a'left loop
ret(a'left + a'right - i) := a(i);
end loop;
return ret;
end;

-- If there is only one bit set in a doubleword, return its bit number
-- (counting from the right). Each bit of the result is obtained by
-- ORing together 32 bits of the input:
-- bit 0 = a[1] or a[3] or a[5] or ...
-- bit 1 = a[2] or a[3] or a[6] or a[7] or ...
-- bit 2 = a[4..7] or a[12..15] or ...
-- bit 5 = a[32..63] ORed together
function bit_number(a: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is
variable ret: std_ulogic_vector(5 downto 0);
variable stride: natural;
variable bit: std_ulogic;
variable k: natural;
begin
stride := 2;
for i in 0 to 5 loop
bit := '0';
for j in 0 to (64 / stride) - 1 loop
k := j * stride;
bit := bit or (or a(k + stride - 1 downto k + (stride / 2)));
end loop;
ret(i) := bit;
stride := stride * 2;
end loop;
return ret;
end;

signal inp : std_ulogic_vector(63 downto 0);
signal sum : std_ulogic_vector(64 downto 0);
signal msb_r : std_ulogic;

@ -8,6 +8,7 @@ use work.decode_types.all;

entity decode1 is
generic (
HAS_FPU : boolean := true;
-- Non-zero to enable log data collection
LOG_LENGTH : natural := 0
);
@ -54,7 +55,10 @@ architecture behaviour of decode1 is
type op_19_subop_array_t is array(0 to 7) of decode_rom_t;
type op_30_subop_array_t is array(0 to 15) of decode_rom_t;
type op_31_subop_array_t is array(0 to 1023) of decode_rom_t;
type op_59_subop_array_t is array(0 to 31) of decode_rom_t;
type minor_rom_array_2_t is array(0 to 3) of decode_rom_t;
type op_63_subop_array_0_t is array(0 to 511) of decode_rom_t;
type op_63_subop_array_1_t is array(0 to 16) of decode_rom_t;

constant major_decode_rom_array : major_rom_array_t := (
-- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl
@ -72,6 +76,10 @@ architecture behaviour of decode1 is
10 => (ALU, OP_CMP, RA, CONST_UI, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpli
34 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lbz
35 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- lbzu
50 => (LDST, OP_FPLOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lfd
51 => (LDST, OP_FPLOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- lfdu
48 => (LDST, OP_FPLOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- lfs
49 => (LDST, OP_FPLOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0'), -- lfsu
42 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '0'), -- lha
43 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '1', '0', '0', '0', NONE, '0', '0'), -- lhau
40 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lhz
@ -87,6 +95,10 @@ architecture behaviour of decode1 is
17 => (ALU, OP_SC, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- sc
38 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stb
39 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- stbu
54 => (LDST, OP_FPSTORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stfd
55 => (LDST, OP_FPSTORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- stfdu
52 => (LDST, OP_FPSTORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- stfs
53 => (LDST, OP_FPSTORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0'), -- stfsu
44 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- sth
45 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- sthu
36 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stw
@ -272,6 +284,12 @@ architecture behaviour of decode1 is
2#1101110101# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- ldcix
2#0000110101# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- ldux
2#0000010101# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- ldx
2#1001010111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lfdx
2#1001110111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- lfdux
2#1101010111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '0', '0', '0', '0', NONE, '0', '0'), -- lfiwax
2#1101110111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lfiwzx
2#1000010111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- lfsx
2#1000110111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0'), -- lfsux
2#0001110100# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '1', '0', '0', NONE, '0', '0'), -- lharx
2#0101110111# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '1', '0', '0', '0', NONE, '0', '0'), -- lhaux
2#0101010111# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '0'), -- lhax
@ -350,6 +368,11 @@ architecture behaviour of decode1 is
2#0011010110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '1', '0', '0', ONE, '0', '0'), -- stdcx
2#0010110101# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- stdux
2#0010010101# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stdx
2#1011010111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stfdx
2#1011110111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- stfdux
2#1111010111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stfiwx
2#1010010111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- stfsx
2#1010110111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0'), -- stfsux
2#1110010110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '1', '0', '0', '0', '0', '0', NONE, '0', '0'), -- sthbrx
2#1110110101# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- sthcix
2#1011010110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '1', '0', '0', ONE, '0', '0'), -- sthcx
@ -389,6 +412,24 @@ architecture behaviour of decode1 is
others => decode_rom_init
);

constant decode_op_59_array : op_59_subop_array_t := (
-- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl
-- op in out A out in out len ext pipe
2#01110# => (FPU, OP_FPOP_I, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fcfid[u]s
2#10010# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fdivs
2#10100# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fsubs
2#10101# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fadds
2#10110# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fsqrts
2#11000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fres
2#11001# => (FPU, OP_FPOP, FRA, NONE, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fmuls
2#11010# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- frsqrtes
2#11100# => (FPU, OP_FPOP, FRA, FRB, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fmsubs
2#11101# => (FPU, OP_FPOP, FRA, FRB, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fmadds
2#11110# => (FPU, OP_FPOP, FRA, FRB, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fnmsubs
2#11111# => (FPU, OP_FPOP, FRA, FRB, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fnmadds
others => illegal_inst
);

constant decode_op_62_array : minor_rom_array_2_t := (
-- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl
-- op in out A out in out len ext pipe
@ -397,6 +438,64 @@ architecture behaviour of decode1 is
others => decode_rom_init
);

-- indexed by bits 4..1 and 10..6 of instruction word
constant decode_op_63l_array : op_63_subop_array_0_t := (
-- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl
-- op in out A out in out len ext pipe
2#000000000# => (FPU, OP_FPOP, FRA, FRB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- 0/0=fcmpu
2#000000001# => (FPU, OP_FPOP, FRA, FRB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- 1/0=fcmpo
2#000000010# => (FPU, OP_FPOP, NONE, NONE, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- 2/0=mcrfs
2#000000100# => (FPU, OP_FPOP, FRA, FRB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- 4/0=ftdiv
2#000000101# => (FPU, OP_FPOP, NONE, FRB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- 5/0=ftsqrt
2#011000001# => (FPU, OP_FPOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 1/6=mtfsb1
2#011000010# => (FPU, OP_FPOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 2/6=mtfsb0
2#011000100# => (FPU, OP_FPOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/6=mtfsfi
2#011011010# => (FPU, OP_FPOP_I, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- 26/6=fmrgow
2#011011110# => (FPU, OP_FPOP_I, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- 30/6=fmrgew
2#011110010# => (FPU, OP_FPOP_I, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 18/7=mffs family
2#011110110# => (FPU, OP_FPOP_I, NONE, FRB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 22/7=mtfsf
2#100000000# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 0/8=fcpsgn
2#100000001# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 1/8=fneg
2#100000010# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 2/8=fmr
2#100000100# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/8=fnabs
2#100001000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 8/8=fabs
2#100001100# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 12/8=frin
2#100001101# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 13/8=friz
2#100001110# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 14/8=frip
2#100001111# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 15/8=frim
2#110000000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- 0/12=frsp
2#111000000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 0/14=fctiw
2#111000100# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/14=fctiwu
2#111011001# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 25/14=fctid
2#111011010# => (FPU, OP_FPOP_I, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 26/14=fcfid
2#111011101# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 29/14=fctidu
2#111011110# => (FPU, OP_FPOP_I, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 30/14=fcfidu
2#111100000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 0/15=fctiwz
2#111100100# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/15=fctiwuz
2#111111001# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 25/15=fctidz
2#111111101# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 29/15=fctiduz
others => illegal_inst
);

-- indexed by bits 4..1 of instruction word
constant decode_op_63h_array : op_63_subop_array_1_t := (
-- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl
-- op in out A out in out len ext pipe
2#0010# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fdiv
2#0100# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fsub
2#0101# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fadd
2#0110# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fsqrt
2#0111# => (FPU, OP_FPOP, FRA, FRB, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fsel
2#1000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fre
2#1001# => (FPU, OP_FPOP, FRA, NONE, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fmul
2#1010# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- frsqrte
2#1100# => (FPU, OP_FPOP, FRA, FRB, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fmsub
2#1101# => (FPU, OP_FPOP, FRA, FRB, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fmadd
2#1110# => (FPU, OP_FPOP, FRA, FRB, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fnmsub
2#1111# => (FPU, OP_FPOP, FRA, FRB, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fnmadd
others => illegal_inst
);

-- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl
-- op in out A out in out len ext pipe
constant nop_instr : decode_rom_t := (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0');
@ -547,9 +646,28 @@ begin
when 58 =>
v.decode := decode_op_58_array(to_integer(unsigned(f_in.insn(1 downto 0))));

when 59 =>
if HAS_FPU then
-- floating point operations, mostly single-precision
v.decode := decode_op_59_array(to_integer(unsigned(f_in.insn(5 downto 1))));
if f_in.insn(5) = '0' and not std_match(f_in.insn(10 downto 1), "11-1001110") then
vi.override := '1';
end if;
end if;

when 62 =>
v.decode := decode_op_62_array(to_integer(unsigned(f_in.insn(1 downto 0))));

when 63 =>
if HAS_FPU then
-- floating point operations, general and double-precision
if f_in.insn(5) = '0' then
v.decode := decode_op_63l_array(to_integer(unsigned(f_in.insn(4 downto 1) & f_in.insn(10 downto 6))));
else
v.decode := decode_op_63h_array(to_integer(unsigned(f_in.insn(4 downto 1))));
end if;
end if;

when others =>
end case;


@ -11,6 +11,7 @@ use work.insn_helpers.all;
entity decode2 is
generic (
EX1_BYPASS : boolean := true;
HAS_FPU : boolean := true;
-- Non-zero to enable log data collection
LOG_LENGTH : natural := 0
);
@ -73,12 +74,14 @@ architecture behaviour of decode2 is
-- If it's all 0, we don't treat it as a dependency as slow SPRs
-- operations are single issue.
--
assert is_fast_spr(ispr) = '1' or ispr = "000000"
assert is_fast_spr(ispr) = '1' or ispr = "0000000"
report "Decode A says SPR but ISPR is invalid:" &
to_hstring(ispr) severity failure;
return (is_fast_spr(ispr), ispr, reg_data);
elsif t = CIA then
return ('0', (others => '0'), instr_addr);
elsif HAS_FPU and t = FRA then
return ('1', fpr_to_gspr(insn_fra(insn_in)), reg_data);
else
return ('0', (others => '0'), (others => '0'));
end if;
@ -92,6 +95,12 @@ architecture behaviour of decode2 is
case t is
when RB =>
ret := ('1', gpr_to_gspr(insn_rb(insn_in)), reg_data);
when FRB =>
if HAS_FPU then
ret := ('1', fpr_to_gspr(insn_frb(insn_in)), reg_data);
else
ret := ('0', (others => '0'), (others => '0'));
end if;
when CONST_UI =>
ret := ('0', (others => '0'), std_ulogic_vector(resize(unsigned(insn_ui(insn_in)), 64)));
when CONST_SI =>
@ -118,7 +127,7 @@ architecture behaviour of decode2 is
-- ISPR must be either a valid fast SPR number or all 0 for a slow SPR.
-- If it's all 0, we don't treat it as a dependency as slow SPRs
-- operations are single issue.
assert is_fast_spr(ispr) = '1' or ispr = "000000"
assert is_fast_spr(ispr) = '1' or ispr = "0000000"
report "Decode B says SPR but ISPR is invalid:" &
to_hstring(ispr) severity failure;
ret := (is_fast_spr(ispr), ispr, reg_data);
@ -137,6 +146,18 @@ architecture behaviour of decode2 is
return ('1', gpr_to_gspr(insn_rs(insn_in)), reg_data);
when RCR =>
return ('1', gpr_to_gspr(insn_rcreg(insn_in)), reg_data);
when FRS =>
if HAS_FPU then
return ('1', fpr_to_gspr(insn_frt(insn_in)), reg_data);
else
return ('0', (others => '0'), (others => '0'));
end if;
when FRC =>
if HAS_FPU then
return ('1', fpr_to_gspr(insn_frc(insn_in)), reg_data);
else
return ('0', (others => '0'), (others => '0'));
end if;
when NONE =>
return ('0', (others => '0'), (others => '0'));
end case;
@ -150,16 +171,22 @@ architecture behaviour of decode2 is
return ('1', gpr_to_gspr(insn_rt(insn_in)));
when RA =>
return ('1', gpr_to_gspr(insn_ra(insn_in)));
when FRT =>
if HAS_FPU then
return ('1', fpr_to_gspr(insn_frt(insn_in)));
else
return ('0', "0000000");
end if;
when SPR =>
-- ISPR must be either a valid fast SPR number or all 0 for a slow SPR.
-- If it's all 0, we don't treat it as a dependency as slow SPRs
-- operations are single issue.
assert is_fast_spr(ispr) = '1' or ispr = "000000"
assert is_fast_spr(ispr) = '1' or ispr = "0000000"
report "Decode B says SPR but ISPR is invalid:" &
to_hstring(ispr) severity failure;
return (is_fast_spr(ispr), ispr);
when NONE =>
return ('0', "000000");
return ('0', "0000000");
end case;
end;

@ -212,7 +239,7 @@ architecture behaviour of decode2 is
signal gpr_b_bypass : std_ulogic;

signal gpr_c_read_valid : std_ulogic;
signal gpr_c_read : gpr_index_t;
signal gpr_c_read : gspr_index_t;
signal gpr_c_bypass : std_ulogic;

signal cr_write_valid : std_ulogic;
@ -281,11 +308,15 @@ begin
end process;

r_out.read1_reg <= d_in.ispr1 when d_in.decode.input_reg_a = SPR
else fpr_to_gspr(insn_fra(d_in.insn)) when d_in.decode.input_reg_a = FRA and HAS_FPU
else gpr_to_gspr(insn_ra(d_in.insn));
r_out.read2_reg <= d_in.ispr2 when d_in.decode.input_reg_b = SPR
else fpr_to_gspr(insn_frb(d_in.insn)) when d_in.decode.input_reg_b = FRB and HAS_FPU
else gpr_to_gspr(insn_rb(d_in.insn));
r_out.read3_reg <= insn_rcreg(d_in.insn) when d_in.decode.input_reg_c = RCR
else insn_rs(d_in.insn);
r_out.read3_reg <= gpr_to_gspr(insn_rcreg(d_in.insn)) when d_in.decode.input_reg_c = RCR
else fpr_to_gspr(insn_frc(d_in.insn)) when d_in.decode.input_reg_c = FRC and HAS_FPU
else fpr_to_gspr(insn_frt(d_in.insn)) when d_in.decode.input_reg_c = FRS and HAS_FPU
else gpr_to_gspr(insn_rs(d_in.insn));

c_out.read <= d_in.decode.input_cr;

@ -307,7 +338,7 @@ begin
mul_b := (others => '0');

--v.e.input_cr := d_in.decode.input_cr;
--v.e.output_cr := d_in.decode.output_cr;
v.e.output_cr := d_in.decode.output_cr;
decoded_reg_a := decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, r_in.read1_data, d_in.ispr1,
d_in.nia);
@ -394,11 +425,11 @@ begin
gpr_b_read <= decoded_reg_b.reg;

gpr_c_read_valid <= decoded_reg_c.reg_valid;
gpr_c_read <= gspr_to_gpr(decoded_reg_c.reg);
gpr_c_read <= decoded_reg_c.reg;

cr_write_valid <= d_in.decode.output_cr or decode_rc(d_in.decode.rc, d_in.insn);
cr_bypass_avail <= '0';
if EX1_BYPASS then
if EX1_BYPASS and d_in.decode.unit = ALU then
cr_bypass_avail <= d_in.decode.output_cr;
end if;


@ -7,9 +7,11 @@ package decode_types is
OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPRB,
OP_CNTZ, OP_CROP,
OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST,
OP_DCBZ, OP_DIV, OP_DIVE, OP_EXTS,
OP_EXTSWSLI, OP_ICBI, OP_ICBT, OP_ISEL, OP_ISYNC,
OP_DCBZ, OP_DIV, OP_DIVE, OP_EXTS, OP_EXTSWSLI,
OP_FPOP, OP_FPOP_I,
OP_ICBI, OP_ICBT, OP_ISEL, OP_ISYNC,
OP_LOAD, OP_STORE,
OP_FPLOAD, OP_FPSTORE,
OP_MCRXRX, OP_MFCR, OP_MFMSR, OP_MFSPR, OP_MOD,
OP_MTCRF, OP_MTMSRD, OP_MTSPR, OP_MUL_L64,
OP_MUL_H64, OP_MUL_H32, OP_OR,
@ -21,11 +23,11 @@ package decode_types is
OP_BCD, OP_ADDG6S,
OP_FETCH_FAILED
);
type input_reg_a_t is (NONE, RA, RA_OR_ZERO, SPR, CIA);
type input_reg_a_t is (NONE, RA, RA_OR_ZERO, SPR, CIA, FRA);
type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD,
CONST_DXHI4, CONST_DS, CONST_M1, CONST_SH, CONST_SH32, SPR);
type input_reg_c_t is (NONE, RS, RCR);
type output_reg_a_t is (NONE, RT, RA, SPR);
CONST_DXHI4, CONST_DS, CONST_M1, CONST_SH, CONST_SH32, SPR, FRB);
type input_reg_c_t is (NONE, RS, RCR, FRC, FRS);
type output_reg_a_t is (NONE, RT, RA, SPR, FRT);
type rc_t is (NONE, ONE, RC);
type carry_in_t is (ZERO, CA, OV, ONE);

@ -47,7 +49,7 @@ package decode_types is

constant TOO_OFFSET : integer := 0;

type unit_t is (NONE, ALU, LDST);
type unit_t is (NONE, ALU, LDST, FPU);
type length_t is (NONE, is1B, is2B, is4B, is8B);

type decode_rom_t is record

@ -13,6 +13,7 @@ use work.ppc_fx_insns.all;
entity execute1 is
generic (
EX1_BYPASS : boolean := true;
HAS_FPU : boolean := true;
-- Non-zero to enable log data collection
LOG_LENGTH : natural := 0
);
@ -26,12 +27,14 @@ entity execute1 is

e_in : in Decode2ToExecute1Type;
l_in : in Loadstore1ToExecute1Type;
fp_in : in FPUToExecute1Type;

ext_irq_in : std_ulogic;

-- asynchronous
l_out : out Execute1ToLoadstore1Type;
f_out : out Execute1ToFetch1Type;
fp_out : out Execute1ToFPUType;

e_out : out Execute1ToWritebackType;

@ -53,6 +56,7 @@ architecture behaviour of execute1 is
f : Execute1ToFetch1Type;
busy: std_ulogic;
terminate: std_ulogic;
fp_exception_next : std_ulogic;
trace_next : std_ulogic;
prev_op : insn_type_t;
lr_update : std_ulogic;
@ -71,7 +75,8 @@ architecture behaviour of execute1 is
end record;
constant reg_type_init : reg_type :=
(e => Execute1ToWritebackInit, f => Execute1ToFetch1Init,
busy => '0', lr_update => '0', terminate => '0', trace_next => '0', prev_op => OP_ILLEGAL,
busy => '0', lr_update => '0', terminate => '0',
fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL,
mul_in_progress => '0', mul_finish => '0', div_in_progress => '0', cntz_in_progress => '0',
slow_op_insn => OP_ILLEGAL, slow_op_rc => '0', slow_op_oe => '0', slow_op_xerc => xerc_init,
next_lr => (others => '0'), last_nia => (others => '0'), others => (others => '0'));
@ -267,7 +272,7 @@ begin
b_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data2 = '1' else e_in.read_data2;
c_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data3 = '1' else e_in.read_data3;

busy_out <= l_in.busy or r.busy;
busy_out <= l_in.busy or r.busy or fp_in.busy;
valid_in <= e_in.valid and not busy_out;

terminate_out <= r.terminate;
@ -333,6 +338,7 @@ begin
variable spr_val : std_ulogic_vector(63 downto 0);
variable addend : std_ulogic_vector(127 downto 0);
variable do_trace : std_ulogic;
variable fv : Execute1ToFPUType;
begin
result := (others => '0');
sum_with_carry := (others => '0');
@ -346,6 +352,7 @@ begin
v.e := Execute1ToWritebackInit;
lv := Execute1ToLoadstore1Init;
v.f.redirect := '0';
fv := Execute1ToFPUInit;

-- XER forwarding. To avoid having to track XER hazards, we
-- use the previously latched value.
@ -521,9 +528,11 @@ begin
exception_nextpc := '0';
v.e.exc_write_enable := '0';
v.e.exc_write_reg := fast_spr_num(SPR_SRR0);
v.e.exc_write_data := e_in.nia;
if valid_in = '1' then
v.e.exc_write_data := e_in.nia;
v.last_nia := e_in.nia;
else
v.e.exc_write_data := r.last_nia;
end if;

v.e.mode_32bit := not ctrl.msr(MSR_SF);
@ -542,24 +551,36 @@ begin
ctrl_tmp.msr(MSR_PR) <= '0';
ctrl_tmp.msr(MSR_SE) <= '0';
ctrl_tmp.msr(MSR_BE) <= '0';
ctrl_tmp.msr(MSR_FP) <= '0';
ctrl_tmp.msr(MSR_FE0) <= '0';
ctrl_tmp.msr(MSR_FE1) <= '0';
ctrl_tmp.msr(MSR_IR) <= '0';
ctrl_tmp.msr(MSR_DR) <= '0';
ctrl_tmp.msr(MSR_RI) <= '0';
ctrl_tmp.msr(MSR_LE) <= '1';
v.e.valid := '1';
v.trace_next := '0';
v.fp_exception_next := '0';
report "Writing SRR1: " & to_hstring(ctrl.srr1);

elsif r.trace_next = '1' and valid_in = '1' then
-- Generate a trace interrupt rather than executing the next instruction
-- or taking any asynchronous interrupt
v.f.redirect_nia := std_logic_vector(to_unsigned(16#d00#, 64));
ctrl_tmp.srr1(63 - 33) <= '1';
if r.prev_op = OP_LOAD or r.prev_op = OP_ICBI or r.prev_op = OP_ICBT or
r.prev_op = OP_DCBT or r.prev_op = OP_DCBST or r.prev_op = OP_DCBF then
ctrl_tmp.srr1(63 - 35) <= '1';
elsif r.prev_op = OP_STORE or r.prev_op = OP_DCBZ or r.prev_op = OP_DCBTST then
ctrl_tmp.srr1(63 - 36) <= '1';
elsif valid_in = '1' and ((HAS_FPU and r.fp_exception_next = '1') or r.trace_next = '1') then
if HAS_FPU and r.fp_exception_next = '1' then
-- This is used for FP-type program interrupts that
-- become pending due to MSR[FE0,FE1] changing from 00 to non-zero.
v.f.redirect_nia := std_logic_vector(to_unsigned(16#700#, 64));
ctrl_tmp.srr1(63 - 43) <= '1';
ctrl_tmp.srr1(63 - 47) <= '1';
else
-- Generate a trace interrupt rather than executing the next instruction
-- or taking any asynchronous interrupt
v.f.redirect_nia := std_logic_vector(to_unsigned(16#d00#, 64));
ctrl_tmp.srr1(63 - 33) <= '1';
if r.prev_op = OP_LOAD or r.prev_op = OP_ICBI or r.prev_op = OP_ICBT or
r.prev_op = OP_DCBT or r.prev_op = OP_DCBST or r.prev_op = OP_DCBF then
ctrl_tmp.srr1(63 - 35) <= '1';
elsif r.prev_op = OP_STORE or r.prev_op = OP_DCBZ or r.prev_op = OP_DCBTST then
ctrl_tmp.srr1(63 - 36) <= '1';
end if;
end if;
exception := '1';

@ -578,7 +599,19 @@ begin
-- set bit 45 to indicate privileged instruction type interrupt
ctrl_tmp.srr1(63 - 45) <= '1';
report "privileged instruction";

elsif not HAS_FPU and valid_in = '1' and
(e_in.insn_type = OP_FPLOAD or e_in.insn_type = OP_FPSTORE) then
-- make lfd/stfd/lfs/stfs etc. illegal in no-FPU implementations
illegal := '1';

elsif HAS_FPU and valid_in = '1' and ctrl.msr(MSR_FP) = '0' and
(e_in.unit = FPU or e_in.insn_type = OP_FPLOAD or e_in.insn_type = OP_FPSTORE) then
-- generate a floating-point unavailable interrupt
exception := '1';
v.f.redirect_nia := std_logic_vector(to_unsigned(16#800#, 64));
report "FP unavailable interrupt";

elsif valid_in = '1' and e_in.unit = ALU then

report "execute nia " & to_hstring(e_in.nia);
@ -793,6 +826,10 @@ begin
is_branch := '1';
taken_branch := '1';
abs_branch := '1';
if HAS_FPU then
v.fp_exception_next := fp_in.exception and
(a_in(MSR_FE0) or a_in(MSR_FE1));
end if;
do_trace := '0';

when OP_CNTZ =>
@ -964,6 +1001,10 @@ begin
ctrl_tmp.msr(MSR_IR) <= '1';
ctrl_tmp.msr(MSR_DR) <= '1';
end if;
if HAS_FPU then
v.fp_exception_next := fp_in.exception and
(c_in(MSR_FE0) or c_in(MSR_FE1));
end if;
end if;
when OP_MTSPR =>
report "MTSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) &
@ -1080,6 +1121,8 @@ begin
lv.valid := '1';
elsif e_in.unit = NONE then
illegal := '1';
elsif HAS_FPU and e_in.unit = FPU then
fv.valid := '1';
end if;

elsif r.f.redirect = '1' then
@ -1154,7 +1197,17 @@ begin
v.e.valid := '1';
end if;

if illegal = '1' then
-- Generate FP-type program interrupt. fp_in.interrupt will only
-- be set during the execution of a FP instruction.
-- The case where MSR[FE0,FE1] goes from zero to non-zero is
-- handled above by mtmsrd and rfid setting v.fp_exception_next.
if HAS_FPU and fp_in.interrupt = '1' then
v.f.redirect_nia := std_logic_vector(to_unsigned(16#700#, 64));
ctrl_tmp.srr1(63 - 43) <= '1';
exception := '1';
end if;

if illegal = '1' or (HAS_FPU and fp_in.illegal = '1') then
exception := '1';
v.f.redirect_nia := std_logic_vector(to_unsigned(16#700#, 64));
-- Since we aren't doing Hypervisor emulation assist (0xe40) we
@ -1200,7 +1253,6 @@ begin
end if;
v.e.exc_write_enable := '1';
v.e.exc_write_reg := fast_spr_num(SPR_SRR0);
v.e.exc_write_data := r.last_nia;
report "ldst exception writing srr0=" & to_hstring(r.last_nia);
end if;

@ -1225,7 +1277,7 @@ begin
lv.addr1 := a_in;
lv.addr2 := b_in;
lv.data := c_in;
lv.write_reg := gspr_to_gpr(e_in.write_reg);
lv.write_reg := e_in.write_reg;
lv.length := e_in.data_len;
lv.byte_reverse := e_in.byte_reverse xnor ctrl.msr(MSR_LE);
lv.sign_extend := e_in.sign_extend;
@ -1243,6 +1295,20 @@ begin
lv.virt_mode := ctrl.msr(MSR_DR);
lv.priv_mode := not ctrl.msr(MSR_PR);
lv.mode_32bit := not ctrl.msr(MSR_SF);
lv.is_32bit := e_in.is_32bit;

-- Outputs to FPU
fv.op := e_in.insn_type;
fv.nia := e_in.nia;
fv.insn := e_in.insn;
fv.single := e_in.is_32bit;
fv.fe_mode := ctrl.msr(MSR_FE0) & ctrl.msr(MSR_FE1);
fv.fra := a_in;
fv.frb := b_in;
fv.frc := c_in;
fv.frt := e_in.write_reg;
fv.rc := e_in.rc;
fv.out_cr := e_in.output_cr;

-- Update registers
rin <= v;
@ -1251,6 +1317,7 @@ begin
f_out <= r.f;
l_out <= lv;
e_out <= r.e;
fp_out <= fv;
flush_out <= f_out.redirect;

exception_log <= exception;

@ -14,6 +14,7 @@ entity toplevel is
RAM_INIT_FILE : string := "firmware.hex";
RESET_LOW : boolean := true;
CLK_FREQUENCY : positive := 100000000;
HAS_FPU : boolean := true;
USE_LITEDRAM : boolean := false;
NO_BRAM : boolean := false;
DISABLE_FLATTEN_CORE : boolean := false;
@ -168,6 +169,7 @@ begin
RAM_INIT_FILE => RAM_INIT_FILE,
SIM => false,
CLK_FREQ => CLK_FREQUENCY,
HAS_FPU => HAS_FPU,
HAS_DRAM => USE_LITEDRAM,
DRAM_SIZE => 256 * 1024 * 1024,
DRAM_INIT_SIZE => PAYLOAD_SIZE,

@ -11,6 +11,7 @@ entity toplevel is
RESET_LOW : boolean := true;
CLK_INPUT : positive := 100000000;
CLK_FREQUENCY : positive := 100000000;
HAS_FPU : boolean := true;
DISABLE_FLATTEN_CORE : boolean := false;
UART_IS_16550 : boolean := true
);
@ -68,6 +69,7 @@ begin
RAM_INIT_FILE => RAM_INIT_FILE,
SIM => false,
CLK_FREQ => CLK_FREQUENCY,
HAS_FPU => HAS_FPU,
DISABLE_FLATTEN_CORE => DISABLE_FLATTEN_CORE,
UART0_IS_16550 => UART_IS_16550
)

@ -14,6 +14,7 @@ entity toplevel is
RAM_INIT_FILE : string := "firmware.hex";
RESET_LOW : boolean := true;
CLK_FREQUENCY : positive := 100000000;
HAS_FPU : boolean := true;
USE_LITEDRAM : boolean := false;
NO_BRAM : boolean := false;
DISABLE_FLATTEN_CORE : boolean := false;
@ -120,6 +121,7 @@ begin
RAM_INIT_FILE => RAM_INIT_FILE,
SIM => false,
CLK_FREQ => CLK_FREQUENCY,
HAS_FPU => HAS_FPU,
HAS_DRAM => USE_LITEDRAM,
DRAM_SIZE => 512 * 1024 * 1024,
DRAM_INIT_SIZE => PAYLOAD_SIZE,

2568
fpu.vhdl

File diff suppressed because it is too large Load Diff

@ -2,6 +2,9 @@ library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.common.all;

entity gpr_hazard is
generic (
PIPELINE_DEPTH : natural := 1
@ -15,13 +18,13 @@ entity gpr_hazard is
issuing : in std_ulogic;

gpr_write_valid_in : in std_ulogic;
gpr_write_in : in std_ulogic_vector(5 downto 0);
gpr_write_in : in gspr_index_t;
bypass_avail : in std_ulogic;
gpr_read_valid_in : in std_ulogic;
gpr_read_in : in std_ulogic_vector(5 downto 0);
gpr_read_in : in gspr_index_t;

ugpr_write_valid : in std_ulogic;
ugpr_write_reg : in std_ulogic_vector(5 downto 0);
ugpr_write_reg : in gspr_index_t;

stall_out : out std_ulogic;
use_bypass : out std_ulogic
@ -31,9 +34,9 @@ architecture behaviour of gpr_hazard is
type pipeline_entry_type is record
valid : std_ulogic;
bypass : std_ulogic;
gpr : std_ulogic_vector(5 downto 0);
gpr : gspr_index_t;
ugpr_valid : std_ulogic;
ugpr : std_ulogic_vector(5 downto 0);
ugpr : gspr_index_t;
end record;
constant pipeline_entry_init : pipeline_entry_type := (valid => '0', bypass => '0', gpr => (others => '0'),
ugpr_valid => '0', ugpr => (others => '0'));

@ -25,6 +25,10 @@ package helpers is
function byte_reverse(val: std_ulogic_vector(63 downto 0); size: integer) return std_ulogic_vector;

function sign_extend(val: std_ulogic_vector(63 downto 0); size: natural) return std_ulogic_vector;

function bit_reverse(a: std_ulogic_vector) return std_ulogic_vector;
function bit_number(a: std_ulogic_vector(63 downto 0)) return std_ulogic_vector;
function count_left_zeroes(val: std_ulogic_vector) return std_ulogic_vector;
end package helpers;

package body helpers is
@ -206,4 +210,53 @@ package body helpers is
return std_ulogic_vector(ret);

end;

-- Reverse the order of bits in a word
function bit_reverse(a: std_ulogic_vector) return std_ulogic_vector is
variable ret: std_ulogic_vector(a'left downto a'right);
begin
for i in a'right to a'left loop
ret(a'left + a'right - i) := a(i);
end loop;
return ret;
end;

-- If there is only one bit set in a doubleword, return its bit number
-- (counting from the right). Each bit of the result is obtained by
-- ORing together 32 bits of the input:
-- bit 0 = a[1] or a[3] or a[5] or ...
-- bit 1 = a[2] or a[3] or a[6] or a[7] or ...
-- bit 2 = a[4..7] or a[12..15] or ...
-- bit 5 = a[32..63] ORed together
function bit_number(a: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is
variable ret: std_ulogic_vector(5 downto 0);
variable stride: natural;
variable bit: std_ulogic;
variable k: natural;
begin
stride := 2;
for i in 0 to 5 loop
bit := '0';
for j in 0 to (64 / stride) - 1 loop
k := j * stride;
bit := bit or (or a(k + stride - 1 downto k + (stride / 2)));
end loop;
ret(i) := bit;
stride := stride * 2;
end loop;
return ret;
end;

-- Count leading zeroes operation
-- Assumes the value passed in is not zero (if it is, zero is returned)
function count_left_zeroes(val: std_ulogic_vector) return std_ulogic_vector is
variable rev: std_ulogic_vector(val'left downto val'right);
variable sum: std_ulogic_vector(val'left downto val'right);
variable onehot: std_ulogic_vector(val'left downto val'right);
begin
rev := bit_reverse(val);
sum := std_ulogic_vector(- signed(rev));
onehot := sum and rev;
return bit_number(std_ulogic_vector(resize(unsigned(onehot), 64)));
end;
end package body helpers;

@ -37,6 +37,11 @@ package insn_helpers is
function insn_sh (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_me (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_mb (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_frt (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_fra (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_frb (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_frc (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_u (insn_in : std_ulogic_vector) return std_ulogic_vector;
end package insn_helpers;

package body insn_helpers is
@ -214,4 +219,29 @@ package body insn_helpers is
begin
return insn_in(5) & insn_in(10 downto 6);
end;

function insn_frt(insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(25 downto 21);
end;

function insn_fra(insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(20 downto 16);
end;

function insn_frb(insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(15 downto 11);
end;

function insn_frc(insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(10 downto 6);
end;

function insn_u(insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(15 downto 12);
end;
end package body insn_helpers;

@ -5,12 +5,15 @@ use ieee.numeric_std.all;
library work;
use work.decode_types.all;
use work.common.all;
use work.insn_helpers.all;
use work.helpers.all;

-- 2 cycle LSU
-- We calculate the address in the first cycle

entity loadstore1 is
generic (
HAS_FPU : boolean := true;
-- Non-zero to enable log data collection
LOG_LENGTH : natural := 0
);
@ -42,10 +45,12 @@ architecture behave of loadstore1 is

-- State machine for unaligned loads/stores
type state_t is (IDLE, -- ready for instruction
FPR_CONV, -- converting double to float for store
SECOND_REQ, -- send 2nd request of unaligned xfer
ACK_WAIT, -- waiting for ack from dcache
MMU_LOOKUP, -- waiting for MMU to look up translation
TLBIE_WAIT, -- waiting for MMU to finish doing a tlbie
FINISH_LFS, -- write back converted SP data for lfs*
COMPLETE -- extra cycle to complete an operation
);

@ -58,7 +63,7 @@ architecture behave of loadstore1 is
addr : std_ulogic_vector(63 downto 0);
store_data : std_ulogic_vector(63 downto 0);
load_data : std_ulogic_vector(63 downto 0);
write_reg : gpr_index_t;
write_reg : gspr_index_t;
length : std_ulogic_vector(3 downto 0);
byte_reverse : std_ulogic;
sign_extend : std_ulogic;
@ -86,6 +91,11 @@ architecture behave of loadstore1 is
do_update : std_ulogic;
extra_cycle : std_ulogic;
mode_32bit : std_ulogic;
load_sp : std_ulogic;
ld_sp_data : std_ulogic_vector(31 downto 0);
ld_sp_nz : std_ulogic;
ld_sp_lz : std_ulogic_vector(5 downto 0);
st_sp_data : std_ulogic_vector(31 downto 0);
end record;

type byte_sel_t is array(0 to 7) of std_ulogic;
@ -95,6 +105,9 @@ architecture behave of loadstore1 is
signal r, rin : reg_stage_t;
signal lsu_sum : std_ulogic_vector(63 downto 0);

signal store_sp_data : std_ulogic_vector(31 downto 0);
signal load_dp_data : std_ulogic_vector(63 downto 0);

-- Generate byte enables from sizes
function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is
begin
@ -125,6 +138,72 @@ architecture behave of loadstore1 is
to_integer(unsigned(address))));
end function xfer_data_sel;

-- 23-bit right shifter for DP -> SP float conversions
function shifter_23r(frac: std_ulogic_vector(22 downto 0); shift: unsigned(4 downto 0))
return std_ulogic_vector is
variable fs1 : std_ulogic_vector(22 downto 0);
variable fs2 : std_ulogic_vector(22 downto 0);
begin
case shift(1 downto 0) is
when "00" =>
fs1 := frac;
when "01" =>
fs1 := '0' & frac(22 downto 1);
when "10" =>
fs1 := "00" & frac(22 downto 2);
when others =>
fs1 := "000" & frac(22 downto 3);
end case;
case shift(4 downto 2) is
when "000" =>
fs2 := fs1;
when "001" =>
fs2 := x"0" & fs1(22 downto 4);
when "010" =>
fs2 := x"00" & fs1(22 downto 8);
when "011" =>
fs2 := x"000" & fs1(22 downto 12);
when "100" =>
fs2 := x"0000" & fs1(22 downto 16);
when others =>
fs2 := x"00000" & fs1(22 downto 20);
end case;
return fs2;
end;

-- 23-bit left shifter for SP -> DP float conversions
function shifter_23l(frac: std_ulogic_vector(22 downto 0); shift: unsigned(4 downto 0))
return std_ulogic_vector is
variable fs1 : std_ulogic_vector(22 downto 0);
variable fs2 : std_ulogic_vector(22 downto 0);
begin
case shift(1 downto 0) is
when "00" =>
fs1 := frac;
when "01" =>
fs1 := frac(21 downto 0) & '0';
when "10" =>
fs1 := frac(20 downto 0) & "00";
when others =>
fs1 := frac(19 downto 0) & "000";
end case;
case shift(4 downto 2) is
when "000" =>
fs2 := fs1;
when "001" =>
fs2 := fs1(18 downto 0) & x"0" ;
when "010" =>
fs2 := fs1(14 downto 0) & x"00";
when "011" =>
fs2 := fs1(10 downto 0) & x"000";
when "100" =>
fs2 := fs1(6 downto 0) & x"0000";
when others =>
fs2 := fs1(2 downto 0) & x"00000";
end case;
return fs2;
end;

begin
-- Calculate the address in the first cycle
lsu_sum <= std_ulogic_vector(unsigned(l_in.addr1) + unsigned(l_in.addr2)) when l_in.valid = '1' else (others => '0');
@ -142,6 +221,59 @@ begin
end if;
end process;

ls_fp_conv: if HAS_FPU generate
-- Convert DP data to SP for stfs
dp_to_sp: process(all)
variable exp : unsigned(10 downto 0);
variable frac : std_ulogic_vector(22 downto 0);
variable shift : unsigned(4 downto 0);
begin
store_sp_data(31) <= l_in.data(63);
store_sp_data(30 downto 0) <= (others => '0');
exp := unsigned(l_in.data(62 downto 52));
if exp > 896 then
store_sp_data(30) <= l_in.data(62);
store_sp_data(29 downto 0) <= l_in.data(58 downto 29);
elsif exp >= 874 then
-- denormalization required
frac := '1' & l_in.data(51 downto 30);
shift := 0 - exp(4 downto 0);
store_sp_data(22 downto 0) <= shifter_23r(frac, shift);
end if;
end process;

-- Convert SP data to DP for lfs
sp_to_dp: process(all)
variable exp : unsigned(7 downto 0);
variable exp_dp : unsigned(10 downto 0);
variable exp_nz : std_ulogic;
variable exp_ao : std_ulogic;
variable frac : std_ulogic_vector(22 downto 0);
variable frac_shift : unsigned(4 downto 0);
begin
frac := r.ld_sp_data(22 downto 0);
exp := unsigned(r.ld_sp_data(30 downto 23));
exp_nz := or (r.ld_sp_data(30 downto 23));
exp_ao := and (r.ld_sp_data(30 downto 23));
frac_shift := (others => '0');
if exp_ao = '1' then
exp_dp := to_unsigned(2047, 11); -- infinity or NaN
elsif exp_nz = '1' then
exp_dp := 896 + resize(exp, 11); -- finite normalized value
elsif r.ld_sp_nz = '0' then
exp_dp := to_unsigned(0, 11); -- zero
else
-- denormalized SP operand, need to normalize
exp_dp := 896 - resize(unsigned(r.ld_sp_lz), 11);
frac_shift := unsigned(r.ld_sp_lz(4 downto 0)) + 1;
end if;
load_dp_data(63) <= r.ld_sp_data(31);
load_dp_data(62 downto 52) <= std_ulogic_vector(exp_dp);
load_dp_data(51 downto 29) <= shifter_23l(frac, frac_shift);
load_dp_data(28 downto 0) <= (others => '0');
end process;
end generate;

loadstore1_1: process(all)
variable v : reg_stage_t;
variable brev_lenm1 : unsigned(2 downto 0);
@ -162,6 +294,9 @@ begin
variable data_permuted : std_ulogic_vector(63 downto 0);
variable data_trimmed : std_ulogic_vector(63 downto 0);
variable store_data : std_ulogic_vector(63 downto 0);
variable data_in : std_ulogic_vector(63 downto 0);
variable byte_rev : std_ulogic;
variable length : std_ulogic_vector(3 downto 0);
variable use_second : byte_sel_t;
variable trim_ctl : trim_ctl_t;
variable negative : std_ulogic;
@ -173,6 +308,8 @@ begin
variable mmu_mtspr : std_ulogic;
variable itlb_fault : std_ulogic;
variable misaligned : std_ulogic;
variable fp_reg_conv : std_ulogic;
variable lfs_done : std_ulogic;
begin
v := r;
req := '0';
@ -182,8 +319,10 @@ begin
sprn := std_ulogic_vector(to_unsigned(decode_spr_num(l_in.insn), 10));
dsisr := (others => '0');
mmureq := '0';
fp_reg_conv := '0';

write_enable := '0';
lfs_done := '0';

do_update := r.do_update;
v.do_update := '0';
@ -242,19 +381,38 @@ begin
end case;
end loop;

-- Byte reversing and rotating for stores
-- Done in the first cycle (when l_in.valid = 1)
if HAS_FPU then
-- Single-precision FP conversion
v.st_sp_data := store_sp_data;
v.ld_sp_data := data_trimmed(31 downto 0);
v.ld_sp_nz := or (data_trimmed(22 downto 0));
v.ld_sp_lz := count_left_zeroes(data_trimmed(22 downto 0));
end if;

-- Byte reversing and rotating for stores.
-- Done in the first cycle (when l_in.valid = 1) for integer stores
-- and DP float stores, and in the second cycle for SP float stores.
store_data := r.store_data;
if l_in.valid = '1' then
byte_offset := unsigned(lsu_sum(2 downto 0));
if l_in.valid = '1' or (HAS_FPU and r.state = FPR_CONV) then
if HAS_FPU and r.state = FPR_CONV then
data_in := x"00000000" & r.st_sp_data;
byte_offset := unsigned(r.addr(2 downto 0));
byte_rev := r.byte_reverse;
length := r.length;
else
data_in := l_in.data;
byte_offset := unsigned(lsu_sum(2 downto 0));
byte_rev := l_in.byte_reverse;
length := l_in.length;
end if;
brev_lenm1 := "000";
if l_in.byte_reverse = '1' then
brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1;
if byte_rev = '1' then
brev_lenm1 := unsigned(length(2 downto 0)) - 1;
end if;
for i in 0 to 7 loop
k := (to_unsigned(i, 3) - byte_offset) xor brev_lenm1;
j := to_integer(k) * 8;
store_data(i * 8 + 7 downto i * 8) := l_in.data(j + 7 downto j);
store_data(i * 8 + 7 downto i * 8) := data_in(j + 7 downto j);
end loop;
end if;
v.store_data := store_data;
@ -289,6 +447,14 @@ begin
case r.state is
when IDLE =>

when FPR_CONV =>
req := '1';
if r.second_bytes /= "00000000" then
v.state := SECOND_REQ;
else
v.state := ACK_WAIT;
end if;

when SECOND_REQ =>
req := '1';
v.state := ACK_WAIT;
@ -320,8 +486,13 @@ begin
v.load_data := data_permuted;
end if;
else
write_enable := r.load;
if r.extra_cycle = '1' then
write_enable := r.load and not r.load_sp;
if HAS_FPU and r.load_sp = '1' then
-- SP to DP conversion takes a cycle
-- Write back rA update in this cycle if needed
do_update := r.update;
v.state := FINISH_LFS;
elsif r.extra_cycle = '1' then
-- loads with rA update need an extra cycle
v.state := COMPLETE;
v.do_update := r.update;
@ -359,6 +530,9 @@ begin

when TLBIE_WAIT =>

when FINISH_LFS =>
lfs_done := '1';

when COMPLETE =>
exception := r.align_intr;

@ -392,6 +566,7 @@ begin
v.nc := l_in.ci;
v.virt_mode := l_in.virt_mode;
v.priv_mode := l_in.priv_mode;
v.load_sp := '0';
v.wait_dcache := '0';
v.wait_mmu := '0';
v.do_update := '0';
@ -431,6 +606,27 @@ begin
v.align_intr := v.nc;
req := '1';
v.dcbz := '1';
when OP_FPSTORE =>
if HAS_FPU then
if l_in.is_32bit = '1' then
v.state := FPR_CONV;
fp_reg_conv := '1';
else
req := '1';
end if;
end if;
when OP_FPLOAD =>
if HAS_FPU then
v.load := '1';
req := '1';
-- Allow an extra cycle for SP->DP precision conversion
-- or RA update
v.extra_cycle := l_in.update;
if l_in.is_32bit = '1' then
v.load_sp := '1';
v.extra_cycle := '1';
end if;
end if;
when OP_TLBIE =>
mmureq := '1';
v.tlbie := '1';
@ -486,7 +682,7 @@ begin
end if;
end if;

v.busy := req or mmureq or mmu_mtspr;
v.busy := req or mmureq or mmu_mtspr or fp_reg_conv;
end if;

-- Update outputs to dcache
@ -523,8 +719,12 @@ begin
l_out.write_data <= r.sprval;
elsif do_update = '1' then
l_out.write_enable <= '1';
l_out.write_reg <= r.update_reg;
l_out.write_reg <= gpr_to_gspr(r.update_reg);
l_out.write_data <= r.addr;
elsif lfs_done = '1' then
l_out.write_enable <= '1';
l_out.write_reg <= r.write_reg;
l_out.write_data <= load_dp_data;
else
l_out.write_enable <= write_enable;
l_out.write_reg <= r.write_reg;

@ -23,6 +23,7 @@ filesets:
- cr_hazard.vhdl
- control.vhdl
- execute1.vhdl
- fpu.vhdl
- loadstore1.vhdl
- mmu.vhdl
- dcache.vhdl
@ -132,6 +133,7 @@ targets:
- disable_flatten_core
- log_length=2048
- uart_is_16550
- has_fpu
tools:
vivado: {part : xc7a100tcsg324-1}
toplevel : toplevel
@ -215,6 +217,7 @@ targets:
- spi_flash_offset=10485760
- log_length=2048
- uart_is_16550
- has_fpu
tools:
vivado: {part : xc7a200tsbg484-1}
toplevel : toplevel
@ -231,6 +234,7 @@ targets:
- spi_flash_offset=10485760
- log_length=2048
- uart_is_16550
- has_fpu
generate: [litedram_nexys_video]
tools:
vivado: {part : xc7a200tsbg484-1}
@ -249,6 +253,7 @@ targets:
- log_length=512
- uart_is_16550
- has_uart1
- has_fpu=false
tools:
vivado: {part : xc7a35ticsg324-1L}
toplevel : toplevel
@ -267,6 +272,7 @@ targets:
- log_length=512
- uart_is_16550
- has_uart1
- has_fpu=false
generate: [litedram_arty, liteeth_arty]
tools:
vivado: {part : xc7a35ticsg324-1L}
@ -285,6 +291,7 @@ targets:
- log_length=2048
- uart_is_16550
- has_uart1
- has_fpu
tools:
vivado: {part : xc7a100ticsg324-1L}
toplevel : toplevel
@ -303,6 +310,7 @@ targets:
- log_length=2048
- uart_is_16550
- has_uart1
- has_fpu
generate: [litedram_arty, liteeth_arty]
tools:
vivado: {part : xc7a100ticsg324-1L}
@ -320,6 +328,7 @@ targets:
- disable_flatten_core
- log_length=512
- uart_is_16550
- has_fpu=false
tools:
vivado: {part : xc7a35tcpg236-1}
toplevel : toplevel
@ -380,6 +389,12 @@ parameters:
paramtype : generic
default : 100000000

has_fpu:
datatype : bool
description : Include a floating-point unit in the core
paramtype : generic
default : true

disable_flatten_core:
datatype : bool
description : Prevent Vivado from flattening the main core components

@ -8,6 +8,7 @@ use work.common.all;
entity register_file is
generic (
SIM : boolean := false;
HAS_FPU : boolean := true;
-- Non-zero to enable log data collection
LOG_LENGTH : natural := 0
);
@ -28,12 +29,12 @@ entity register_file is
sim_dump : in std_ulogic;
sim_dump_done : out std_ulogic;

log_out : out std_ulogic_vector(70 downto 0)
log_out : out std_ulogic_vector(71 downto 0)
);
end entity register_file;

architecture behaviour of register_file is
type regfile is array(0 to 63) of std_ulogic_vector(63 downto 0);
type regfile is array(0 to 127) of std_ulogic_vector(63 downto 0);
signal registers : regfile := (others => (others => '0'));
signal rd_port_b : std_ulogic_vector(63 downto 0);
signal dbg_data : std_ulogic_vector(63 downto 0);
@ -41,53 +42,73 @@ architecture behaviour of register_file is
begin
-- synchronous writes
register_write_0: process(clk)
variable w_addr : gspr_index_t;
begin
if rising_edge(clk) then
if w_in.write_enable = '1' then
if w_in.write_reg(5) = '0' then
report "Writing GPR " & to_hstring(w_in.write_reg) & " " & to_hstring(w_in.write_data);
else
report "Writing GSPR " & to_hstring(w_in.write_reg) & " " & to_hstring(w_in.write_data);
end if;
w_addr := w_in.write_reg;
if HAS_FPU and w_addr(6) = '1' then
report "Writing FPR " & to_hstring(w_addr(4 downto 0)) & " " & to_hstring(w_in.write_data);
else
w_addr(6) := '0';
if w_addr(5) = '0' then
report "Writing GPR " & to_hstring(w_addr) & " " & to_hstring(w_in.write_data);
else
report "Writing GSPR " & to_hstring(w_addr) & " " & to_hstring(w_in.write_data);
end if;
end if;
assert not(is_x(w_in.write_data)) and not(is_x(w_in.write_reg)) severity failure;
registers(to_integer(unsigned(w_in.write_reg))) <= w_in.write_data;
registers(to_integer(unsigned(w_addr))) <= w_in.write_data;
end if;
end if;
end process register_write_0;

-- asynchronous reads
register_read_0: process(all)
variable b_addr : gspr_index_t;
variable a_addr, b_addr, c_addr : gspr_index_t;
variable w_addr : gspr_index_t;
begin
a_addr := d_in.read1_reg;
b_addr := d_in.read2_reg;
c_addr := d_in.read3_reg;
w_addr := w_in.write_reg;
if not HAS_FPU then
-- Make it obvious that we only want 64 GSPRs for a no-FPU implementation
a_addr(6) := '0';
b_addr(6) := '0';
c_addr(6) := '0';
w_addr(6) := '0';
end if;
if d_in.read1_enable = '1' then
report "Reading GPR " & to_hstring(d_in.read1_reg) & " " & to_hstring(registers(to_integer(unsigned(d_in.read1_reg))));
report "Reading GPR " & to_hstring(a_addr) & " " & to_hstring(registers(to_integer(unsigned(a_addr))));
end if;
if d_in.read2_enable = '1' then
report "Reading GPR " & to_hstring(d_in.read2_reg) & " " & to_hstring(registers(to_integer(unsigned(d_in.read2_reg))));
report "Reading GPR " & to_hstring(b_addr) & " " & to_hstring(registers(to_integer(unsigned(b_addr))));
end if;
if d_in.read3_enable = '1' then
report "Reading GPR " & to_hstring(d_in.read3_reg) & " " & to_hstring(registers(to_integer(unsigned(d_in.read3_reg))));
report "Reading GPR " & to_hstring(c_addr) & " " & to_hstring(registers(to_integer(unsigned(c_addr))));
end if;
d_out.read1_data <= registers(to_integer(unsigned(d_in.read1_reg)));
d_out.read1_data <= registers(to_integer(unsigned(a_addr)));
-- B read port is multiplexed with reads from the debug circuitry
if d_in.read2_enable = '0' and dbg_gpr_req = '1' and dbg_ack = '0' then
b_addr := dbg_gpr_addr;
else
b_addr := d_in.read2_reg;
if not HAS_FPU then
b_addr(6) := '0';
end if;
end if;
rd_port_b <= registers(to_integer(unsigned(b_addr)));
d_out.read2_data <= rd_port_b;
d_out.read3_data <= registers(to_integer(unsigned(gpr_to_gspr(d_in.read3_reg))));
d_out.read3_data <= registers(to_integer(unsigned(c_addr)));

-- Forward any written data
if w_in.write_enable = '1' then
if d_in.read1_reg = w_in.write_reg then
if a_addr = w_addr then
d_out.read1_data <= w_in.write_data;
end if;
if d_in.read2_reg = w_in.write_reg then
if b_addr = w_addr then
d_out.read2_data <= w_in.write_data;
end if;
if gpr_to_gspr(d_in.read3_reg) = w_in.write_reg then
if c_addr = w_addr then
d_out.read3_data <= w_in.write_data;
end if;
end if;
@ -136,7 +157,7 @@ begin
end generate;

rf_log: if LOG_LENGTH > 0 generate
signal log_data : std_ulogic_vector(70 downto 0);
signal log_data : std_ulogic_vector(71 downto 0);
begin
reg_log: process(clk)
begin

@ -58,7 +58,7 @@ struct log_entry {
u64 ls_lo_valid: 1;
u64 ls_eo_except: 1;
u64 ls_stall_out: 1;
u64 pad2: 2;
u64 pad2: 1;
u64 dc_state: 3;
u64 dc_ra_valid: 1;
u64 dc_tlb_way: 3;
@ -74,7 +74,7 @@ struct log_entry {
u64 cr_wr_mask: 8;
u64 cr_wr_data: 4;
u64 cr_wr_enable: 1;
u64 reg_wr_reg: 6;
u64 reg_wr_reg: 7;
u64 reg_wr_enable: 1;

u64 reg_wr_data;
@ -84,17 +84,17 @@ struct log_entry {
#define FLGA(i, y, z) (log.i? y: z)
#define PNIA(f) (full_nia[log.f] & 0xff)

const char *units[4] = { "--", "al", "ls", "?3" };
const char *units[4] = { "--", "al", "ls", "fp" };
const char *ops[64] =
{
"illegal", "nop ", "add ", "and ", "attn ", "b ", "bc ", "bcreg ",
"bperm ", "cmp ", "cmpb ", "cmpeqb ", "cmprb ", "cntz ", "crop ", "darn ",
"dcbf ", "dcbst ", "dcbt ", "dcbtst ", "dcbz ", "div ", "dive ", "exts ",
"extswsl", "icbi ", "icbt ", "isel ", "isync ", "ld ", "st ", "mcrxrx ",
"mfcr ", "mfmsr ", "mfspr ", "mod ", "mtcrf ", "mtmsr ", "mtspr ", "mull64 ",
"mulh64 ", "mulh32 ", "or ", "popcnt ", "prty ", "rfid ", "rlc ", "rlcl ",
"rlcr ", "sc ", "setb ", "shl ", "shr ", "sync ", "tlbie ", "trap ",
"xor ", "bcd ", "addg6s ", "ffail ", "?60 ", "?61 ", "?62 ", "?63 "
"extswsl", "fpop ", "fpopi ", "icbi ", "icbt ", "isel ", "isync ", "ld ",
"st ", "fpload ", "fpstore", "mcrxrx ", "mfcr ", "mfmsr ", "mfspr ", "mod ",
"mtcrf ", "mtmsr ", "mtspr ", "mull64 ", "mulh64 ", "mulh32 ", "or ", "popcnt ",
"prty ", "rfid ", "rlc ", "rlcl ", "rlcr ", "sc ", "setb ", "shl ",
"shr ", "sync ", "tlbie ", "trap ", "xor ", "bcd ", "addg6s ", "ffail ",
};

const char *spr_names[13] =

@ -52,6 +52,7 @@ entity soc is
RAM_INIT_FILE : string;
CLK_FREQ : positive;
SIM : boolean;
HAS_FPU : boolean := true;
DISABLE_FLATTEN_CORE : boolean := false;
HAS_DRAM : boolean := false;
DRAM_SIZE : integer := 0;
@ -253,6 +254,7 @@ begin
processor: entity work.core
generic map(
SIM => SIM,
HAS_FPU => HAS_FPU,
DISABLE_FLATTEN => DISABLE_FLATTEN_CORE,
ALT_RESET_ADDRESS => (23 downto 0 => '0', others => '1'),
LOG_LENGTH => LOG_LENGTH

@ -0,0 +1,3 @@
TEST=fpu

include ../Makefile.test

File diff suppressed because it is too large Load Diff

@ -0,0 +1,132 @@
/* Copyright 2013-2014 IBM Corp.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/* Load an immediate 64-bit value into a register */
#define LOAD_IMM64(r, e) \
lis r,(e)@highest; \
ori r,r,(e)@higher; \
rldicr r,r, 32, 31; \
oris r,r, (e)@h; \
ori r,r, (e)@l;

.section ".head","ax"

/*
* Microwatt currently enters in LE mode at 0x0, so we don't need to
* do any endian fix ups
*/
. = 0
.global _start
_start:
LOAD_IMM64(%r10,__bss_start)
LOAD_IMM64(%r11,__bss_end)
subf %r11,%r10,%r11
addi %r11,%r11,63
srdi. %r11,%r11,6
beq 2f
mtctr %r11
1: dcbz 0,%r10
addi %r10,%r10,64
bdnz 1b

2: LOAD_IMM64(%r1,__stack_top)
li %r0,0
stdu %r0,-16(%r1)
LOAD_IMM64(%r10, die)
mtsprg0 %r10
LOAD_IMM64(%r12, main)
mtctr %r12
bctrl
die: attn // terminate on exit
b .

.global trapit
trapit:
mflr %r0
std %r0,16(%r1)
stdu %r1,-256(%r1)
mtsprg1 %r1
r = 14
.rept 18
std r,r*8(%r1)
r = r + 1
.endr
mfcr %r0
stw %r0,13*8(%r1)
LOAD_IMM64(%r10, ret)
mtsprg0 %r10
mr %r12,%r4
mtctr %r4
bctrl
ret:
mfsprg1 %r1
LOAD_IMM64(%r10, die)
mtsprg0 %r10
r = 14
.rept 18
ld r,r*8(%r1)
r = r + 1
.endr
lwz %r0,13*8(%r1)
mtcr %r0
ld %r0,256+16(%r1)
addi %r1,%r1,256
mtlr %r0
blr

.global do_rfid
do_rfid:
mtsrr1 %r3
LOAD_IMM64(%r4, do_blr)
mtsrr0 %r4
rfid
blr

.global do_blr
do_blr:
blr

#define EXCEPTION(nr) \
.= nr ;\
mfsprg0 %r0 ;\
mtctr %r0 ;\
li %r3,nr ;\
bctr

EXCEPTION(0x300)
EXCEPTION(0x380)
EXCEPTION(0x400)
EXCEPTION(0x480)
EXCEPTION(0x500)
EXCEPTION(0x600)
EXCEPTION(0x700)
EXCEPTION(0x800)
EXCEPTION(0x900)
EXCEPTION(0x980)
EXCEPTION(0xa00)
EXCEPTION(0xb00)
EXCEPTION(0xc00)
EXCEPTION(0xd00)
EXCEPTION(0xe00)
EXCEPTION(0xe20)
EXCEPTION(0xe40)
EXCEPTION(0xe60)
EXCEPTION(0xe80)
EXCEPTION(0xf00)
EXCEPTION(0xf20)
EXCEPTION(0xf40)
EXCEPTION(0xf60)
EXCEPTION(0xf80)

@ -0,0 +1,27 @@
SECTIONS
{
. = 0;
_start = .;
.head : {
KEEP(*(.head))
}
. = ALIGN(0x1000);
.text : { *(.text) *(.text.*) *(.rodata) *(.rodata.*) }
. = ALIGN(0x1000);
.data : { *(.data) *(.data.*) *(.got) *(.toc) }
. = ALIGN(0x80);
__bss_start = .;
.bss : {
*(.dynsbss)
*(.sbss)
*(.scommon)
*(.dynbss)
*(.bss)
*(.common)
*(.bss.*)
}
. = ALIGN(0x80);
__bss_end = .;
. = . + 0x4000;
__stack_top = .;
}

Binary file not shown.

@ -0,0 +1,23 @@
test 01:PASS
test 02:PASS
test 03:PASS
test 04:PASS
test 05:PASS
test 06:PASS
test 07:PASS
test 08:PASS
test 09:PASS
test 10:PASS
test 11:PASS
test 12:PASS
test 13:PASS
test 14:PASS
test 15:PASS
test 16:PASS
test 17:PASS
test 18:PASS
test 19:PASS
test 20:PASS
test 21:PASS
test 22:PASS
test 23:PASS

@ -3,7 +3,7 @@
# Script to update console related tests from source
#

for i in sc illegal decrementer xics privileged mmu misc modes reservation trace ; do
for i in sc illegal decrementer xics privileged mmu misc modes reservation trace fpu ; do
cd $i
make
cd -

@ -12,6 +12,7 @@ entity writeback is

e_in : in Execute1ToWritebackType;
l_in : in Loadstore1ToWritebackType;
fp_in : in FPUToWritebackType;

w_out : out WritebackToRegisterFileType;
c_out : out WritebackToCrFileType;
@ -31,15 +32,21 @@ begin
-- Do consistency checks only on the clock edge
x(0) := e_in.valid;
y(0) := l_in.valid;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure;
w(0) := fp_in.valid;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) +
to_integer(unsigned(w))) <= 1 severity failure;

x(0) := e_in.write_enable or e_in.exc_write_enable;
y(0) := l_in.write_enable;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure;
w(0) := fp_in.write_enable;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) +
to_integer(unsigned(w))) <= 1 severity failure;

w(0) := e_in.write_cr_enable;
x(0) := (e_in.write_enable and e_in.rc);
assert (to_integer(unsigned(w)) + to_integer(unsigned(x))) <= 1 severity failure;
y(0) := fp_in.write_cr_enable;
assert (to_integer(unsigned(w)) + to_integer(unsigned(x)) +
to_integer(unsigned(y))) <= 1 severity failure;
end if;
end process;

@ -53,7 +60,7 @@ begin
c_out <= WritebackToCrFileInit;

complete_out <= '0';
if e_in.valid = '1' or l_in.valid = '1' then
if e_in.valid = '1' or l_in.valid = '1' or fp_in.valid = '1' then
complete_out <= '1';
end if;

@ -79,8 +86,20 @@ begin
c_out.write_xerc_data <= e_in.xerc;
end if;

if fp_in.write_enable = '1' then
w_out.write_reg <= fp_in.write_reg;
w_out.write_data <= fp_in.write_data;
w_out.write_enable <= '1';
end if;

if fp_in.write_cr_enable = '1' then
c_out.write_cr_enable <= '1';
c_out.write_cr_mask <= fp_in.write_cr_mask;
c_out.write_cr_data <= fp_in.write_cr_data;
end if;

if l_in.write_enable = '1' then
w_out.write_reg <= gpr_to_gspr(l_in.write_reg);
w_out.write_reg <= l_in.write_reg;
w_out.write_data <= l_in.write_data;
w_out.write_enable <= '1';
end if;

Loading…
Cancel
Save