Merge pull request #245 from paulusmack/fpu

Add a simple FPU
jtag-port
Michael Neuling 4 years ago committed by GitHub
commit e40e752b9a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -48,7 +48,7 @@ core_files = decode_types.vhdl common.vhdl wishbone_types.vhdl fetch1.vhdl \
cr_file.vhdl crhelpers.vhdl ppc_fx_insns.vhdl rotator.vhdl \ cr_file.vhdl crhelpers.vhdl ppc_fx_insns.vhdl rotator.vhdl \
logical.vhdl countzero.vhdl multiply.vhdl divider.vhdl execute1.vhdl \ logical.vhdl countzero.vhdl multiply.vhdl divider.vhdl execute1.vhdl \
loadstore1.vhdl mmu.vhdl dcache.vhdl writeback.vhdl core_debug.vhdl \ loadstore1.vhdl mmu.vhdl dcache.vhdl writeback.vhdl core_debug.vhdl \
core.vhdl core.vhdl fpu.vhdl


soc_files = $(core_files) wishbone_arbiter.vhdl wishbone_bram_wrapper.vhdl sync_fifo.vhdl \ soc_files = $(core_files) wishbone_arbiter.vhdl wishbone_bram_wrapper.vhdl sync_fifo.vhdl \
wishbone_debug_master.vhdl xics.vhdl syscon.vhdl soc.vhdl \ wishbone_debug_master.vhdl xics.vhdl syscon.vhdl soc.vhdl \

@ -13,8 +13,11 @@ package common is
constant MSR_SF : integer := (63 - 0); -- Sixty-Four bit mode constant MSR_SF : integer := (63 - 0); -- Sixty-Four bit mode
constant MSR_EE : integer := (63 - 48); -- External interrupt Enable constant MSR_EE : integer := (63 - 48); -- External interrupt Enable
constant MSR_PR : integer := (63 - 49); -- PRoblem state constant MSR_PR : integer := (63 - 49); -- PRoblem state
constant MSR_FP : integer := (63 - 50); -- Floating Point available
constant MSR_FE0 : integer := (63 - 52); -- Floating Exception mode
constant MSR_SE : integer := (63 - 53); -- Single-step bit of TE field constant MSR_SE : integer := (63 - 53); -- Single-step bit of TE field
constant MSR_BE : integer := (63 - 54); -- Branch trace bit of TE field constant MSR_BE : integer := (63 - 54); -- Branch trace bit of TE field
constant MSR_FE1 : integer := (63 - 55); -- Floating Exception mode
constant MSR_IR : integer := (63 - 58); -- Instruction Relocation constant MSR_IR : integer := (63 - 58); -- Instruction Relocation
constant MSR_DR : integer := (63 - 59); -- Data Relocation constant MSR_DR : integer := (63 - 59); -- Data Relocation
constant MSR_RI : integer := (63 - 62); -- Recoverable Interrupt constant MSR_RI : integer := (63 - 62); -- Recoverable Interrupt
@ -53,8 +56,11 @@ package common is
-- GPR indices in the register file (GPR only) -- GPR indices in the register file (GPR only)
subtype gpr_index_t is std_ulogic_vector(4 downto 0); subtype gpr_index_t is std_ulogic_vector(4 downto 0);


-- Extended GPR indice (can hold an SPR) -- Extended GPR index (can hold an SPR or a FPR)
subtype gspr_index_t is std_ulogic_vector(5 downto 0); subtype gspr_index_t is std_ulogic_vector(6 downto 0);

-- FPR indices
subtype fpr_index_t is std_ulogic_vector(4 downto 0);


-- Some SPRs are stored in the register file, they use the magic -- Some SPRs are stored in the register file, they use the magic
-- GPR numbers above 31. -- GPR numbers above 31.
@ -64,6 +70,9 @@ package common is
-- indicates if this is indeed a fast SPR. If clear, then -- indicates if this is indeed a fast SPR. If clear, then
-- the SPR is not stored in the GPR file. -- the SPR is not stored in the GPR file.
-- --
-- FPRs are also stored in the register file, using GSPR
-- numbers from 64 to 95.
--
function fast_spr_num(spr: spr_num_t) return gspr_index_t; function fast_spr_num(spr: spr_num_t) return gspr_index_t;


-- Indices conversion functions -- Indices conversion functions
@ -71,6 +80,7 @@ package common is
function gpr_to_gspr(i: gpr_index_t) return gspr_index_t; function gpr_to_gspr(i: gpr_index_t) return gspr_index_t;
function gpr_or_spr_to_gspr(g: gpr_index_t; s: gspr_index_t) return gspr_index_t; function gpr_or_spr_to_gspr(g: gpr_index_t; s: gspr_index_t) return gspr_index_t;
function is_fast_spr(s: gspr_index_t) return std_ulogic; function is_fast_spr(s: gspr_index_t) return std_ulogic;
function fpr_to_gspr(f: fpr_index_t) return gspr_index_t;


-- The XER is split: the common bits (CA, OV, SO, OV32 and CA32) are -- The XER is split: the common bits (CA, OV, SO, OV32 and CA32) are
-- in the CR file as a kind of CR extension (with a separate write -- in the CR file as a kind of CR extension (with a separate write
@ -84,6 +94,38 @@ package common is
end record; end record;
constant xerc_init : xer_common_t := (others => '0'); constant xerc_init : xer_common_t := (others => '0');


-- FPSCR bit numbers
constant FPSCR_FX : integer := 63 - 32;
constant FPSCR_FEX : integer := 63 - 33;
constant FPSCR_VX : integer := 63 - 34;
constant FPSCR_OX : integer := 63 - 35;
constant FPSCR_UX : integer := 63 - 36;
constant FPSCR_ZX : integer := 63 - 37;
constant FPSCR_XX : integer := 63 - 38;
constant FPSCR_VXSNAN : integer := 63 - 39;
constant FPSCR_VXISI : integer := 63 - 40;
constant FPSCR_VXIDI : integer := 63 - 41;
constant FPSCR_VXZDZ : integer := 63 - 42;
constant FPSCR_VXIMZ : integer := 63 - 43;
constant FPSCR_VXVC : integer := 63 - 44;
constant FPSCR_FR : integer := 63 - 45;
constant FPSCR_FI : integer := 63 - 46;
constant FPSCR_C : integer := 63 - 47;
constant FPSCR_FL : integer := 63 - 48;
constant FPSCR_FG : integer := 63 - 49;
constant FPSCR_FE : integer := 63 - 50;
constant FPSCR_FU : integer := 63 - 51;
constant FPSCR_VXSOFT : integer := 63 - 53;
constant FPSCR_VXSQRT : integer := 63 - 54;
constant FPSCR_VXCVI : integer := 63 - 55;
constant FPSCR_VE : integer := 63 - 56;
constant FPSCR_OE : integer := 63 - 57;
constant FPSCR_UE : integer := 63 - 58;
constant FPSCR_ZE : integer := 63 - 59;
constant FPSCR_XE : integer := 63 - 60;
constant FPSCR_NI : integer := 63 - 61;
constant FPSCR_RN : integer := 63 - 63;

type irq_state_t is (WRITE_SRR0, WRITE_SRR1); type irq_state_t is (WRITE_SRR0, WRITE_SRR1);


-- For now, fixed 16 sources, make this either a parametric -- For now, fixed 16 sources, make this either a parametric
@ -226,7 +268,7 @@ package common is
read2_enable : std_ulogic; read2_enable : std_ulogic;
read2_reg : gspr_index_t; read2_reg : gspr_index_t;
read3_enable : std_ulogic; read3_enable : std_ulogic;
read3_reg : gpr_index_t; read3_reg : gspr_index_t;
end record; end record;


type RegisterFileToDecode2Type is record type RegisterFileToDecode2Type is record
@ -264,7 +306,7 @@ package common is
addr1 : std_ulogic_vector(63 downto 0); addr1 : std_ulogic_vector(63 downto 0);
addr2 : std_ulogic_vector(63 downto 0); addr2 : std_ulogic_vector(63 downto 0);
data : std_ulogic_vector(63 downto 0); -- data to write, unused for read data : std_ulogic_vector(63 downto 0); -- data to write, unused for read
write_reg : gpr_index_t; write_reg : gspr_index_t;
length : std_ulogic_vector(3 downto 0); length : std_ulogic_vector(3 downto 0);
ci : std_ulogic; -- cache-inhibited load/store ci : std_ulogic; -- cache-inhibited load/store
byte_reverse : std_ulogic; byte_reverse : std_ulogic;
@ -277,13 +319,15 @@ package common is
virt_mode : std_ulogic; -- do translation through TLB virt_mode : std_ulogic; -- do translation through TLB
priv_mode : std_ulogic; -- privileged mode (MSR[PR] = 0) priv_mode : std_ulogic; -- privileged mode (MSR[PR] = 0)
mode_32bit : std_ulogic; -- trim addresses to 32 bits mode_32bit : std_ulogic; -- trim addresses to 32 bits
is_32bit : std_ulogic;
end record; end record;
constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := (valid => '0', op => OP_ILLEGAL, ci => '0', byte_reverse => '0', constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := (valid => '0', op => OP_ILLEGAL, ci => '0', byte_reverse => '0',
sign_extend => '0', update => '0', xerc => xerc_init, sign_extend => '0', update => '0', xerc => xerc_init,
reserve => '0', rc => '0', virt_mode => '0', priv_mode => '0', reserve => '0', rc => '0', virt_mode => '0', priv_mode => '0',
nia => (others => '0'), insn => (others => '0'), nia => (others => '0'), insn => (others => '0'),
addr1 => (others => '0'), addr2 => (others => '0'), data => (others => '0'), length => (others => '0'), addr1 => (others => '0'), addr2 => (others => '0'), data => (others => '0'),
mode_32bit => '0', others => (others => '0')); write_reg => (others => '0'), length => (others => '0'),
mode_32bit => '0', is_32bit => '0', others => (others => '0'));


type Loadstore1ToExecute1Type is record type Loadstore1ToExecute1Type is record
busy : std_ulogic; busy : std_ulogic;
@ -369,7 +413,7 @@ package common is
type Loadstore1ToWritebackType is record type Loadstore1ToWritebackType is record
valid : std_ulogic; valid : std_ulogic;
write_enable: std_ulogic; write_enable: std_ulogic;
write_reg : gpr_index_t; write_reg : gspr_index_t;
write_data : std_ulogic_vector(63 downto 0); write_data : std_ulogic_vector(63 downto 0);
xerc : xer_common_t; xerc : xer_common_t;
rc : std_ulogic; rc : std_ulogic;
@ -401,6 +445,43 @@ package common is
write_cr_data => (others => '0'), write_reg => (others => '0'), write_cr_data => (others => '0'), write_reg => (others => '0'),
exc_write_reg => (others => '0'), exc_write_data => (others => '0')); exc_write_reg => (others => '0'), exc_write_data => (others => '0'));


type Execute1ToFPUType is record
valid : std_ulogic;
op : insn_type_t;
nia : std_ulogic_vector(63 downto 0);
insn : std_ulogic_vector(31 downto 0);
single : std_ulogic;
fe_mode : std_ulogic_vector(1 downto 0);
fra : std_ulogic_vector(63 downto 0);
frb : std_ulogic_vector(63 downto 0);
frc : std_ulogic_vector(63 downto 0);
frt : gspr_index_t;
rc : std_ulogic;
out_cr : std_ulogic;
end record;
constant Execute1ToFPUInit : Execute1ToFPUType := (valid => '0', op => OP_ILLEGAL, nia => (others => '0'),
insn => (others => '0'), fe_mode => "00", rc => '0',
fra => (others => '0'), frb => (others => '0'),
frc => (others => '0'), frt => (others => '0'),
single => '0', out_cr => '0');

type FPUToExecute1Type is record
busy : std_ulogic;
exception : std_ulogic;
interrupt : std_ulogic;
illegal : std_ulogic;
end record;

type FPUToWritebackType is record
valid : std_ulogic;
write_enable : std_ulogic;
write_reg : gspr_index_t;
write_data : std_ulogic_vector(63 downto 0);
write_cr_enable : std_ulogic;
write_cr_mask : std_ulogic_vector(7 downto 0);
write_cr_data : std_ulogic_vector(31 downto 0);
end record;

type DividerToExecute1Type is record type DividerToExecute1Type is record
valid: std_ulogic; valid: std_ulogic;
write_reg_data: std_ulogic_vector(63 downto 0); write_reg_data: std_ulogic_vector(63 downto 0);
@ -473,10 +554,10 @@ package body common is
n := 13; n := 13;
when others => when others =>
n := 0; n := 0;
return "000000"; return "0000000";
end case; end case;
tmp := std_ulogic_vector(to_unsigned(n, 5)); tmp := std_ulogic_vector(to_unsigned(n, 5));
return "1" & tmp; return "01" & tmp;
end; end;


function gspr_to_gpr(i: gspr_index_t) return gpr_index_t is function gspr_to_gpr(i: gspr_index_t) return gpr_index_t is
@ -486,7 +567,7 @@ package body common is


function gpr_to_gspr(i: gpr_index_t) return gspr_index_t is function gpr_to_gspr(i: gpr_index_t) return gspr_index_t is
begin begin
return "0" & i; return "00" & i;
end; end;


function gpr_or_spr_to_gspr(g: gpr_index_t; s: gspr_index_t) return gspr_index_t is function gpr_or_spr_to_gspr(g: gpr_index_t; s: gspr_index_t) return gspr_index_t is
@ -502,4 +583,9 @@ package body common is
begin begin
return s(5); return s(5);
end; end;

function fpr_to_gspr(f: fpr_index_t) return gspr_index_t is
begin
return "10" & f;
end;
end common; end common;

@ -34,7 +34,7 @@ entity control is
gpr_b_read_in : in gspr_index_t; gpr_b_read_in : in gspr_index_t;


gpr_c_read_valid_in : in std_ulogic; gpr_c_read_valid_in : in std_ulogic;
gpr_c_read_in : in gpr_index_t; gpr_c_read_in : in gspr_index_t;


cr_read_in : in std_ulogic; cr_read_in : in std_ulogic;
cr_write_in : in std_ulogic; cr_write_in : in std_ulogic;
@ -70,7 +70,6 @@ architecture rtl of control is
signal gpr_write_valid : std_ulogic := '0'; signal gpr_write_valid : std_ulogic := '0';
signal cr_write_valid : std_ulogic := '0'; signal cr_write_valid : std_ulogic := '0';


signal gpr_c_read_in_fmt : std_ulogic_vector(5 downto 0);
begin begin
gpr_hazard0: entity work.gpr_hazard gpr_hazard0: entity work.gpr_hazard
generic map ( generic map (
@ -122,8 +121,6 @@ begin
use_bypass => gpr_bypass_b use_bypass => gpr_bypass_b
); );


gpr_c_read_in_fmt <= "0" & gpr_c_read_in;

gpr_hazard2: entity work.gpr_hazard gpr_hazard2: entity work.gpr_hazard
generic map ( generic map (
PIPELINE_DEPTH => PIPELINE_DEPTH PIPELINE_DEPTH => PIPELINE_DEPTH
@ -140,7 +137,7 @@ begin
gpr_write_in => gpr_write_in, gpr_write_in => gpr_write_in,
bypass_avail => gpr_bypassable, bypass_avail => gpr_bypassable,
gpr_read_valid_in => gpr_c_read_valid_in, gpr_read_valid_in => gpr_c_read_valid_in,
gpr_read_in => gpr_c_read_in_fmt, gpr_read_in => gpr_c_read_in,


ugpr_write_valid => update_gpr_write_valid, ugpr_write_valid => update_gpr_write_valid,
ugpr_write_reg => update_gpr_write_reg, ugpr_write_reg => update_gpr_write_reg,

@ -11,6 +11,7 @@ entity core is
SIM : boolean := false; SIM : boolean := false;
DISABLE_FLATTEN : boolean := false; DISABLE_FLATTEN : boolean := false;
EX1_BYPASS : boolean := true; EX1_BYPASS : boolean := true;
HAS_FPU : boolean := true;
ALT_RESET_ADDRESS : std_ulogic_vector(63 downto 0) := (others => '0'); ALT_RESET_ADDRESS : std_ulogic_vector(63 downto 0) := (others => '0');
LOG_LENGTH : natural := 512 LOG_LENGTH : natural := 512
); );
@ -79,6 +80,11 @@ architecture behave of core is
signal mmu_to_dcache: MmuToDcacheType; signal mmu_to_dcache: MmuToDcacheType;
signal dcache_to_mmu: DcacheToMmuType; signal dcache_to_mmu: DcacheToMmuType;


-- FPU signals
signal execute1_to_fpu: Execute1ToFPUType;
signal fpu_to_execute1: FPUToExecute1Type;
signal fpu_to_writeback: FPUToWritebackType;

-- local signals -- local signals
signal fetch1_stall_in : std_ulogic; signal fetch1_stall_in : std_ulogic;
signal icache_stall_out : std_ulogic; signal icache_stall_out : std_ulogic;
@ -108,6 +114,7 @@ architecture behave of core is
signal rst_dec1 : std_ulogic := '1'; signal rst_dec1 : std_ulogic := '1';
signal rst_dec2 : std_ulogic := '1'; signal rst_dec2 : std_ulogic := '1';
signal rst_ex1 : std_ulogic := '1'; signal rst_ex1 : std_ulogic := '1';
signal rst_fpu : std_ulogic := '1';
signal rst_ls1 : std_ulogic := '1'; signal rst_ls1 : std_ulogic := '1';
signal rst_dbg : std_ulogic := '1'; signal rst_dbg : std_ulogic := '1';
signal alt_reset_d : std_ulogic; signal alt_reset_d : std_ulogic;
@ -170,6 +177,7 @@ begin
rst_dec1 <= core_rst; rst_dec1 <= core_rst;
rst_dec2 <= core_rst; rst_dec2 <= core_rst;
rst_ex1 <= core_rst; rst_ex1 <= core_rst;
rst_fpu <= core_rst;
rst_ls1 <= core_rst; rst_ls1 <= core_rst;
rst_dbg <= rst; rst_dbg <= rst;
alt_reset_d <= alt_reset; alt_reset_d <= alt_reset;
@ -224,6 +232,7 @@ begin


decode1_0: entity work.decode1 decode1_0: entity work.decode1
generic map( generic map(
HAS_FPU => HAS_FPU,
LOG_LENGTH => LOG_LENGTH LOG_LENGTH => LOG_LENGTH
) )
port map ( port map (
@ -244,6 +253,7 @@ begin
decode2_0: entity work.decode2 decode2_0: entity work.decode2
generic map ( generic map (
EX1_BYPASS => EX1_BYPASS, EX1_BYPASS => EX1_BYPASS,
HAS_FPU => HAS_FPU,
LOG_LENGTH => LOG_LENGTH LOG_LENGTH => LOG_LENGTH
) )
port map ( port map (
@ -267,6 +277,7 @@ begin
register_file_0: entity work.register_file register_file_0: entity work.register_file
generic map ( generic map (
SIM => SIM, SIM => SIM,
HAS_FPU => HAS_FPU,
LOG_LENGTH => LOG_LENGTH LOG_LENGTH => LOG_LENGTH
) )
port map ( port map (
@ -280,7 +291,7 @@ begin
dbg_gpr_data => dbg_gpr_data, dbg_gpr_data => dbg_gpr_data,
sim_dump => terminate, sim_dump => terminate,
sim_dump_done => sim_cr_dump, sim_dump_done => sim_cr_dump,
log_out => log_data(255 downto 185) log_out => log_data(255 downto 184)
); );


cr_file_0: entity work.cr_file cr_file_0: entity work.cr_file
@ -294,12 +305,13 @@ begin
d_out => cr_file_to_decode2, d_out => cr_file_to_decode2,
w_in => writeback_to_cr_file, w_in => writeback_to_cr_file,
sim_dump => sim_cr_dump, sim_dump => sim_cr_dump,
log_out => log_data(184 downto 172) log_out => log_data(183 downto 171)
); );


execute1_0: entity work.execute1 execute1_0: entity work.execute1
generic map ( generic map (
EX1_BYPASS => EX1_BYPASS, EX1_BYPASS => EX1_BYPASS,
HAS_FPU => HAS_FPU,
LOG_LENGTH => LOG_LENGTH LOG_LENGTH => LOG_LENGTH
) )
port map ( port map (
@ -309,9 +321,11 @@ begin
busy_out => ex1_busy_out, busy_out => ex1_busy_out,
e_in => decode2_to_execute1, e_in => decode2_to_execute1,
l_in => loadstore1_to_execute1, l_in => loadstore1_to_execute1,
fp_in => fpu_to_execute1,
ext_irq_in => ext_irq, ext_irq_in => ext_irq,
l_out => execute1_to_loadstore1, l_out => execute1_to_loadstore1,
f_out => execute1_to_fetch1, f_out => execute1_to_fetch1,
fp_out => execute1_to_fpu,
e_out => execute1_to_writeback, e_out => execute1_to_writeback,
icache_inval => ex1_icache_inval, icache_inval => ex1_icache_inval,
dbg_msr_out => msr, dbg_msr_out => msr,
@ -322,8 +336,32 @@ begin
log_wr_addr => log_wr_addr log_wr_addr => log_wr_addr
); );


with_fpu: if HAS_FPU generate
begin
fpu_0: entity work.fpu
port map (
clk => clk,
rst => rst_fpu,
e_in => execute1_to_fpu,
e_out => fpu_to_execute1,
w_out => fpu_to_writeback
);
end generate;

no_fpu: if not HAS_FPU generate
begin
fpu_to_execute1.busy <= '0';
fpu_to_execute1.exception <= '0';
fpu_to_execute1.interrupt <= '0';
fpu_to_execute1.illegal <= '0';
fpu_to_writeback.valid <= '0';
fpu_to_writeback.write_enable <= '0';
fpu_to_writeback.write_cr_enable <= '0';
end generate;

loadstore1_0: entity work.loadstore1 loadstore1_0: entity work.loadstore1
generic map ( generic map (
HAS_FPU => HAS_FPU,
LOG_LENGTH => LOG_LENGTH LOG_LENGTH => LOG_LENGTH
) )
port map ( port map (
@ -368,7 +406,7 @@ begin
stall_out => dcache_stall_out, stall_out => dcache_stall_out,
wishbone_in => wishbone_data_in, wishbone_in => wishbone_data_in,
wishbone_out => wishbone_data_out, wishbone_out => wishbone_data_out,
log_out => log_data(171 downto 152) log_out => log_data(170 downto 151)
); );


writeback_0: entity work.writeback writeback_0: entity work.writeback
@ -376,12 +414,13 @@ begin
clk => clk, clk => clk,
e_in => execute1_to_writeback, e_in => execute1_to_writeback,
l_in => loadstore1_to_writeback, l_in => loadstore1_to_writeback,
fp_in => fpu_to_writeback,
w_out => writeback_to_register_file, w_out => writeback_to_register_file,
c_out => writeback_to_cr_file, c_out => writeback_to_cr_file,
complete_out => complete complete_out => complete
); );


log_data(151 downto 150) <= "00"; log_data(150) <= '0';
log_data(139 downto 135) <= "00000"; log_data(139 downto 135) <= "00000";


debug_0: entity work.core_debug debug_0: entity work.core_debug

@ -3,6 +3,7 @@ use ieee.std_logic_1164.all;
use ieee.numeric_std.all; use ieee.numeric_std.all;


library work; library work;
use work.helpers.all;


entity zero_counter is entity zero_counter is
port ( port (
@ -15,42 +16,6 @@ entity zero_counter is
end entity zero_counter; end entity zero_counter;


architecture behaviour of zero_counter is architecture behaviour of zero_counter is
-- Reverse the order of bits in a word
function bit_reverse(a: std_ulogic_vector) return std_ulogic_vector is
variable ret: std_ulogic_vector(a'left downto a'right);
begin
for i in a'right to a'left loop
ret(a'left + a'right - i) := a(i);
end loop;
return ret;
end;

-- If there is only one bit set in a doubleword, return its bit number
-- (counting from the right). Each bit of the result is obtained by
-- ORing together 32 bits of the input:
-- bit 0 = a[1] or a[3] or a[5] or ...
-- bit 1 = a[2] or a[3] or a[6] or a[7] or ...
-- bit 2 = a[4..7] or a[12..15] or ...
-- bit 5 = a[32..63] ORed together
function bit_number(a: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is
variable ret: std_ulogic_vector(5 downto 0);
variable stride: natural;
variable bit: std_ulogic;
variable k: natural;
begin
stride := 2;
for i in 0 to 5 loop
bit := '0';
for j in 0 to (64 / stride) - 1 loop
k := j * stride;
bit := bit or (or a(k + stride - 1 downto k + (stride / 2)));
end loop;
ret(i) := bit;
stride := stride * 2;
end loop;
return ret;
end;

signal inp : std_ulogic_vector(63 downto 0); signal inp : std_ulogic_vector(63 downto 0);
signal sum : std_ulogic_vector(64 downto 0); signal sum : std_ulogic_vector(64 downto 0);
signal msb_r : std_ulogic; signal msb_r : std_ulogic;

@ -8,6 +8,7 @@ use work.decode_types.all;


entity decode1 is entity decode1 is
generic ( generic (
HAS_FPU : boolean := true;
-- Non-zero to enable log data collection -- Non-zero to enable log data collection
LOG_LENGTH : natural := 0 LOG_LENGTH : natural := 0
); );
@ -54,7 +55,10 @@ architecture behaviour of decode1 is
type op_19_subop_array_t is array(0 to 7) of decode_rom_t; type op_19_subop_array_t is array(0 to 7) of decode_rom_t;
type op_30_subop_array_t is array(0 to 15) of decode_rom_t; type op_30_subop_array_t is array(0 to 15) of decode_rom_t;
type op_31_subop_array_t is array(0 to 1023) of decode_rom_t; type op_31_subop_array_t is array(0 to 1023) of decode_rom_t;
type op_59_subop_array_t is array(0 to 31) of decode_rom_t;
type minor_rom_array_2_t is array(0 to 3) of decode_rom_t; type minor_rom_array_2_t is array(0 to 3) of decode_rom_t;
type op_63_subop_array_0_t is array(0 to 511) of decode_rom_t;
type op_63_subop_array_1_t is array(0 to 16) of decode_rom_t;


constant major_decode_rom_array : major_rom_array_t := ( constant major_decode_rom_array : major_rom_array_t := (
-- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl -- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl
@ -72,6 +76,10 @@ architecture behaviour of decode1 is
10 => (ALU, OP_CMP, RA, CONST_UI, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpli 10 => (ALU, OP_CMP, RA, CONST_UI, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpli
34 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lbz 34 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lbz
35 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- lbzu 35 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- lbzu
50 => (LDST, OP_FPLOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lfd
51 => (LDST, OP_FPLOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- lfdu
48 => (LDST, OP_FPLOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- lfs
49 => (LDST, OP_FPLOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0'), -- lfsu
42 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '0'), -- lha 42 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '0'), -- lha
43 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '1', '0', '0', '0', NONE, '0', '0'), -- lhau 43 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '1', '0', '0', '0', NONE, '0', '0'), -- lhau
40 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lhz 40 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lhz
@ -87,6 +95,10 @@ architecture behaviour of decode1 is
17 => (ALU, OP_SC, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- sc 17 => (ALU, OP_SC, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- sc
38 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stb 38 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stb
39 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- stbu 39 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- stbu
54 => (LDST, OP_FPSTORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stfd
55 => (LDST, OP_FPSTORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- stfdu
52 => (LDST, OP_FPSTORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- stfs
53 => (LDST, OP_FPSTORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0'), -- stfsu
44 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- sth 44 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- sth
45 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- sthu 45 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- sthu
36 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stw 36 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stw
@ -272,6 +284,12 @@ architecture behaviour of decode1 is
2#1101110101# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- ldcix 2#1101110101# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- ldcix
2#0000110101# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- ldux 2#0000110101# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- ldux
2#0000010101# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- ldx 2#0000010101# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- ldx
2#1001010111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lfdx
2#1001110111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- lfdux
2#1101010111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '0', '0', '0', '0', NONE, '0', '0'), -- lfiwax
2#1101110111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lfiwzx
2#1000010111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- lfsx
2#1000110111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0'), -- lfsux
2#0001110100# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '1', '0', '0', NONE, '0', '0'), -- lharx 2#0001110100# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '1', '0', '0', NONE, '0', '0'), -- lharx
2#0101110111# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '1', '0', '0', '0', NONE, '0', '0'), -- lhaux 2#0101110111# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '1', '0', '0', '0', NONE, '0', '0'), -- lhaux
2#0101010111# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '0'), -- lhax 2#0101010111# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '0'), -- lhax
@ -350,6 +368,11 @@ architecture behaviour of decode1 is
2#0011010110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '1', '0', '0', ONE, '0', '0'), -- stdcx 2#0011010110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '1', '0', '0', ONE, '0', '0'), -- stdcx
2#0010110101# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- stdux 2#0010110101# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- stdux
2#0010010101# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stdx 2#0010010101# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stdx
2#1011010111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stfdx
2#1011110111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- stfdux
2#1111010111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stfiwx
2#1010010111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- stfsx
2#1010110111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0'), -- stfsux
2#1110010110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '1', '0', '0', '0', '0', '0', NONE, '0', '0'), -- sthbrx 2#1110010110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '1', '0', '0', '0', '0', '0', NONE, '0', '0'), -- sthbrx
2#1110110101# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- sthcix 2#1110110101# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- sthcix
2#1011010110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '1', '0', '0', ONE, '0', '0'), -- sthcx 2#1011010110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '1', '0', '0', ONE, '0', '0'), -- sthcx
@ -389,6 +412,24 @@ architecture behaviour of decode1 is
others => decode_rom_init others => decode_rom_init
); );


constant decode_op_59_array : op_59_subop_array_t := (
-- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl
-- op in out A out in out len ext pipe
2#01110# => (FPU, OP_FPOP_I, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fcfid[u]s
2#10010# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fdivs
2#10100# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fsubs
2#10101# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fadds
2#10110# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fsqrts
2#11000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fres
2#11001# => (FPU, OP_FPOP, FRA, NONE, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fmuls
2#11010# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- frsqrtes
2#11100# => (FPU, OP_FPOP, FRA, FRB, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fmsubs
2#11101# => (FPU, OP_FPOP, FRA, FRB, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fmadds
2#11110# => (FPU, OP_FPOP, FRA, FRB, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fnmsubs
2#11111# => (FPU, OP_FPOP, FRA, FRB, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fnmadds
others => illegal_inst
);

constant decode_op_62_array : minor_rom_array_2_t := ( constant decode_op_62_array : minor_rom_array_2_t := (
-- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl -- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl
-- op in out A out in out len ext pipe -- op in out A out in out len ext pipe
@ -397,6 +438,64 @@ architecture behaviour of decode1 is
others => decode_rom_init others => decode_rom_init
); );


-- indexed by bits 4..1 and 10..6 of instruction word
constant decode_op_63l_array : op_63_subop_array_0_t := (
-- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl
-- op in out A out in out len ext pipe
2#000000000# => (FPU, OP_FPOP, FRA, FRB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- 0/0=fcmpu
2#000000001# => (FPU, OP_FPOP, FRA, FRB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- 1/0=fcmpo
2#000000010# => (FPU, OP_FPOP, NONE, NONE, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- 2/0=mcrfs
2#000000100# => (FPU, OP_FPOP, FRA, FRB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- 4/0=ftdiv
2#000000101# => (FPU, OP_FPOP, NONE, FRB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- 5/0=ftsqrt
2#011000001# => (FPU, OP_FPOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 1/6=mtfsb1
2#011000010# => (FPU, OP_FPOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 2/6=mtfsb0
2#011000100# => (FPU, OP_FPOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/6=mtfsfi
2#011011010# => (FPU, OP_FPOP_I, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- 26/6=fmrgow
2#011011110# => (FPU, OP_FPOP_I, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- 30/6=fmrgew
2#011110010# => (FPU, OP_FPOP_I, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 18/7=mffs family
2#011110110# => (FPU, OP_FPOP_I, NONE, FRB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 22/7=mtfsf
2#100000000# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 0/8=fcpsgn
2#100000001# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 1/8=fneg
2#100000010# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 2/8=fmr
2#100000100# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/8=fnabs
2#100001000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 8/8=fabs
2#100001100# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 12/8=frin
2#100001101# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 13/8=friz
2#100001110# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 14/8=frip
2#100001111# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 15/8=frim
2#110000000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- 0/12=frsp
2#111000000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 0/14=fctiw
2#111000100# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/14=fctiwu
2#111011001# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 25/14=fctid
2#111011010# => (FPU, OP_FPOP_I, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 26/14=fcfid
2#111011101# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 29/14=fctidu
2#111011110# => (FPU, OP_FPOP_I, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 30/14=fcfidu
2#111100000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 0/15=fctiwz
2#111100100# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/15=fctiwuz
2#111111001# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 25/15=fctidz
2#111111101# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 29/15=fctiduz
others => illegal_inst
);

-- indexed by bits 4..1 of instruction word
constant decode_op_63h_array : op_63_subop_array_1_t := (
-- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl
-- op in out A out in out len ext pipe
2#0010# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fdiv
2#0100# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fsub
2#0101# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fadd
2#0110# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fsqrt
2#0111# => (FPU, OP_FPOP, FRA, FRB, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fsel
2#1000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fre
2#1001# => (FPU, OP_FPOP, FRA, NONE, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fmul
2#1010# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- frsqrte
2#1100# => (FPU, OP_FPOP, FRA, FRB, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fmsub
2#1101# => (FPU, OP_FPOP, FRA, FRB, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fmadd
2#1110# => (FPU, OP_FPOP, FRA, FRB, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fnmsub
2#1111# => (FPU, OP_FPOP, FRA, FRB, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fnmadd
others => illegal_inst
);

-- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl -- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl
-- op in out A out in out len ext pipe -- op in out A out in out len ext pipe
constant nop_instr : decode_rom_t := (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'); constant nop_instr : decode_rom_t := (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0');
@ -547,9 +646,28 @@ begin
when 58 => when 58 =>
v.decode := decode_op_58_array(to_integer(unsigned(f_in.insn(1 downto 0)))); v.decode := decode_op_58_array(to_integer(unsigned(f_in.insn(1 downto 0))));


when 59 =>
if HAS_FPU then
-- floating point operations, mostly single-precision
v.decode := decode_op_59_array(to_integer(unsigned(f_in.insn(5 downto 1))));
if f_in.insn(5) = '0' and not std_match(f_in.insn(10 downto 1), "11-1001110") then
vi.override := '1';
end if;
end if;

when 62 => when 62 =>
v.decode := decode_op_62_array(to_integer(unsigned(f_in.insn(1 downto 0)))); v.decode := decode_op_62_array(to_integer(unsigned(f_in.insn(1 downto 0))));


when 63 =>
if HAS_FPU then
-- floating point operations, general and double-precision
if f_in.insn(5) = '0' then
v.decode := decode_op_63l_array(to_integer(unsigned(f_in.insn(4 downto 1) & f_in.insn(10 downto 6))));
else
v.decode := decode_op_63h_array(to_integer(unsigned(f_in.insn(4 downto 1))));
end if;
end if;

when others => when others =>
end case; end case;



@ -11,6 +11,7 @@ use work.insn_helpers.all;
entity decode2 is entity decode2 is
generic ( generic (
EX1_BYPASS : boolean := true; EX1_BYPASS : boolean := true;
HAS_FPU : boolean := true;
-- Non-zero to enable log data collection -- Non-zero to enable log data collection
LOG_LENGTH : natural := 0 LOG_LENGTH : natural := 0
); );
@ -73,12 +74,14 @@ architecture behaviour of decode2 is
-- If it's all 0, we don't treat it as a dependency as slow SPRs -- If it's all 0, we don't treat it as a dependency as slow SPRs
-- operations are single issue. -- operations are single issue.
-- --
assert is_fast_spr(ispr) = '1' or ispr = "000000" assert is_fast_spr(ispr) = '1' or ispr = "0000000"
report "Decode A says SPR but ISPR is invalid:" & report "Decode A says SPR but ISPR is invalid:" &
to_hstring(ispr) severity failure; to_hstring(ispr) severity failure;
return (is_fast_spr(ispr), ispr, reg_data); return (is_fast_spr(ispr), ispr, reg_data);
elsif t = CIA then elsif t = CIA then
return ('0', (others => '0'), instr_addr); return ('0', (others => '0'), instr_addr);
elsif HAS_FPU and t = FRA then
return ('1', fpr_to_gspr(insn_fra(insn_in)), reg_data);
else else
return ('0', (others => '0'), (others => '0')); return ('0', (others => '0'), (others => '0'));
end if; end if;
@ -92,6 +95,12 @@ architecture behaviour of decode2 is
case t is case t is
when RB => when RB =>
ret := ('1', gpr_to_gspr(insn_rb(insn_in)), reg_data); ret := ('1', gpr_to_gspr(insn_rb(insn_in)), reg_data);
when FRB =>
if HAS_FPU then
ret := ('1', fpr_to_gspr(insn_frb(insn_in)), reg_data);
else
ret := ('0', (others => '0'), (others => '0'));
end if;
when CONST_UI => when CONST_UI =>
ret := ('0', (others => '0'), std_ulogic_vector(resize(unsigned(insn_ui(insn_in)), 64))); ret := ('0', (others => '0'), std_ulogic_vector(resize(unsigned(insn_ui(insn_in)), 64)));
when CONST_SI => when CONST_SI =>
@ -118,7 +127,7 @@ architecture behaviour of decode2 is
-- ISPR must be either a valid fast SPR number or all 0 for a slow SPR. -- ISPR must be either a valid fast SPR number or all 0 for a slow SPR.
-- If it's all 0, we don't treat it as a dependency as slow SPRs -- If it's all 0, we don't treat it as a dependency as slow SPRs
-- operations are single issue. -- operations are single issue.
assert is_fast_spr(ispr) = '1' or ispr = "000000" assert is_fast_spr(ispr) = '1' or ispr = "0000000"
report "Decode B says SPR but ISPR is invalid:" & report "Decode B says SPR but ISPR is invalid:" &
to_hstring(ispr) severity failure; to_hstring(ispr) severity failure;
ret := (is_fast_spr(ispr), ispr, reg_data); ret := (is_fast_spr(ispr), ispr, reg_data);
@ -137,6 +146,18 @@ architecture behaviour of decode2 is
return ('1', gpr_to_gspr(insn_rs(insn_in)), reg_data); return ('1', gpr_to_gspr(insn_rs(insn_in)), reg_data);
when RCR => when RCR =>
return ('1', gpr_to_gspr(insn_rcreg(insn_in)), reg_data); return ('1', gpr_to_gspr(insn_rcreg(insn_in)), reg_data);
when FRS =>
if HAS_FPU then
return ('1', fpr_to_gspr(insn_frt(insn_in)), reg_data);
else
return ('0', (others => '0'), (others => '0'));
end if;
when FRC =>
if HAS_FPU then
return ('1', fpr_to_gspr(insn_frc(insn_in)), reg_data);
else
return ('0', (others => '0'), (others => '0'));
end if;
when NONE => when NONE =>
return ('0', (others => '0'), (others => '0')); return ('0', (others => '0'), (others => '0'));
end case; end case;
@ -150,16 +171,22 @@ architecture behaviour of decode2 is
return ('1', gpr_to_gspr(insn_rt(insn_in))); return ('1', gpr_to_gspr(insn_rt(insn_in)));
when RA => when RA =>
return ('1', gpr_to_gspr(insn_ra(insn_in))); return ('1', gpr_to_gspr(insn_ra(insn_in)));
when FRT =>
if HAS_FPU then
return ('1', fpr_to_gspr(insn_frt(insn_in)));
else
return ('0', "0000000");
end if;
when SPR => when SPR =>
-- ISPR must be either a valid fast SPR number or all 0 for a slow SPR. -- ISPR must be either a valid fast SPR number or all 0 for a slow SPR.
-- If it's all 0, we don't treat it as a dependency as slow SPRs -- If it's all 0, we don't treat it as a dependency as slow SPRs
-- operations are single issue. -- operations are single issue.
assert is_fast_spr(ispr) = '1' or ispr = "000000" assert is_fast_spr(ispr) = '1' or ispr = "0000000"
report "Decode B says SPR but ISPR is invalid:" & report "Decode B says SPR but ISPR is invalid:" &
to_hstring(ispr) severity failure; to_hstring(ispr) severity failure;
return (is_fast_spr(ispr), ispr); return (is_fast_spr(ispr), ispr);
when NONE => when NONE =>
return ('0', "000000"); return ('0', "0000000");
end case; end case;
end; end;


@ -212,7 +239,7 @@ architecture behaviour of decode2 is
signal gpr_b_bypass : std_ulogic; signal gpr_b_bypass : std_ulogic;


signal gpr_c_read_valid : std_ulogic; signal gpr_c_read_valid : std_ulogic;
signal gpr_c_read : gpr_index_t; signal gpr_c_read : gspr_index_t;
signal gpr_c_bypass : std_ulogic; signal gpr_c_bypass : std_ulogic;


signal cr_write_valid : std_ulogic; signal cr_write_valid : std_ulogic;
@ -281,11 +308,15 @@ begin
end process; end process;


r_out.read1_reg <= d_in.ispr1 when d_in.decode.input_reg_a = SPR r_out.read1_reg <= d_in.ispr1 when d_in.decode.input_reg_a = SPR
else fpr_to_gspr(insn_fra(d_in.insn)) when d_in.decode.input_reg_a = FRA and HAS_FPU
else gpr_to_gspr(insn_ra(d_in.insn)); else gpr_to_gspr(insn_ra(d_in.insn));
r_out.read2_reg <= d_in.ispr2 when d_in.decode.input_reg_b = SPR r_out.read2_reg <= d_in.ispr2 when d_in.decode.input_reg_b = SPR
else fpr_to_gspr(insn_frb(d_in.insn)) when d_in.decode.input_reg_b = FRB and HAS_FPU
else gpr_to_gspr(insn_rb(d_in.insn)); else gpr_to_gspr(insn_rb(d_in.insn));
r_out.read3_reg <= insn_rcreg(d_in.insn) when d_in.decode.input_reg_c = RCR r_out.read3_reg <= gpr_to_gspr(insn_rcreg(d_in.insn)) when d_in.decode.input_reg_c = RCR
else insn_rs(d_in.insn); else fpr_to_gspr(insn_frc(d_in.insn)) when d_in.decode.input_reg_c = FRC and HAS_FPU
else fpr_to_gspr(insn_frt(d_in.insn)) when d_in.decode.input_reg_c = FRS and HAS_FPU
else gpr_to_gspr(insn_rs(d_in.insn));


c_out.read <= d_in.decode.input_cr; c_out.read <= d_in.decode.input_cr;


@ -307,7 +338,7 @@ begin
mul_b := (others => '0'); mul_b := (others => '0');


--v.e.input_cr := d_in.decode.input_cr; --v.e.input_cr := d_in.decode.input_cr;
--v.e.output_cr := d_in.decode.output_cr; v.e.output_cr := d_in.decode.output_cr;
decoded_reg_a := decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, r_in.read1_data, d_in.ispr1, decoded_reg_a := decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, r_in.read1_data, d_in.ispr1,
d_in.nia); d_in.nia);
@ -394,11 +425,11 @@ begin
gpr_b_read <= decoded_reg_b.reg; gpr_b_read <= decoded_reg_b.reg;


gpr_c_read_valid <= decoded_reg_c.reg_valid; gpr_c_read_valid <= decoded_reg_c.reg_valid;
gpr_c_read <= gspr_to_gpr(decoded_reg_c.reg); gpr_c_read <= decoded_reg_c.reg;


cr_write_valid <= d_in.decode.output_cr or decode_rc(d_in.decode.rc, d_in.insn); cr_write_valid <= d_in.decode.output_cr or decode_rc(d_in.decode.rc, d_in.insn);
cr_bypass_avail <= '0'; cr_bypass_avail <= '0';
if EX1_BYPASS then if EX1_BYPASS and d_in.decode.unit = ALU then
cr_bypass_avail <= d_in.decode.output_cr; cr_bypass_avail <= d_in.decode.output_cr;
end if; end if;



@ -7,9 +7,11 @@ package decode_types is
OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPRB, OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPRB,
OP_CNTZ, OP_CROP, OP_CNTZ, OP_CROP,
OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST, OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST,
OP_DCBZ, OP_DIV, OP_DIVE, OP_EXTS, OP_DCBZ, OP_DIV, OP_DIVE, OP_EXTS, OP_EXTSWSLI,
OP_EXTSWSLI, OP_ICBI, OP_ICBT, OP_ISEL, OP_ISYNC, OP_FPOP, OP_FPOP_I,
OP_ICBI, OP_ICBT, OP_ISEL, OP_ISYNC,
OP_LOAD, OP_STORE, OP_LOAD, OP_STORE,
OP_FPLOAD, OP_FPSTORE,
OP_MCRXRX, OP_MFCR, OP_MFMSR, OP_MFSPR, OP_MOD, OP_MCRXRX, OP_MFCR, OP_MFMSR, OP_MFSPR, OP_MOD,
OP_MTCRF, OP_MTMSRD, OP_MTSPR, OP_MUL_L64, OP_MTCRF, OP_MTMSRD, OP_MTSPR, OP_MUL_L64,
OP_MUL_H64, OP_MUL_H32, OP_OR, OP_MUL_H64, OP_MUL_H32, OP_OR,
@ -21,11 +23,11 @@ package decode_types is
OP_BCD, OP_ADDG6S, OP_BCD, OP_ADDG6S,
OP_FETCH_FAILED OP_FETCH_FAILED
); );
type input_reg_a_t is (NONE, RA, RA_OR_ZERO, SPR, CIA); type input_reg_a_t is (NONE, RA, RA_OR_ZERO, SPR, CIA, FRA);
type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD, type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD,
CONST_DXHI4, CONST_DS, CONST_M1, CONST_SH, CONST_SH32, SPR); CONST_DXHI4, CONST_DS, CONST_M1, CONST_SH, CONST_SH32, SPR, FRB);
type input_reg_c_t is (NONE, RS, RCR); type input_reg_c_t is (NONE, RS, RCR, FRC, FRS);
type output_reg_a_t is (NONE, RT, RA, SPR); type output_reg_a_t is (NONE, RT, RA, SPR, FRT);
type rc_t is (NONE, ONE, RC); type rc_t is (NONE, ONE, RC);
type carry_in_t is (ZERO, CA, OV, ONE); type carry_in_t is (ZERO, CA, OV, ONE);


@ -47,7 +49,7 @@ package decode_types is


constant TOO_OFFSET : integer := 0; constant TOO_OFFSET : integer := 0;


type unit_t is (NONE, ALU, LDST); type unit_t is (NONE, ALU, LDST, FPU);
type length_t is (NONE, is1B, is2B, is4B, is8B); type length_t is (NONE, is1B, is2B, is4B, is8B);


type decode_rom_t is record type decode_rom_t is record

@ -13,6 +13,7 @@ use work.ppc_fx_insns.all;
entity execute1 is entity execute1 is
generic ( generic (
EX1_BYPASS : boolean := true; EX1_BYPASS : boolean := true;
HAS_FPU : boolean := true;
-- Non-zero to enable log data collection -- Non-zero to enable log data collection
LOG_LENGTH : natural := 0 LOG_LENGTH : natural := 0
); );
@ -26,12 +27,14 @@ entity execute1 is


e_in : in Decode2ToExecute1Type; e_in : in Decode2ToExecute1Type;
l_in : in Loadstore1ToExecute1Type; l_in : in Loadstore1ToExecute1Type;
fp_in : in FPUToExecute1Type;


ext_irq_in : std_ulogic; ext_irq_in : std_ulogic;


-- asynchronous -- asynchronous
l_out : out Execute1ToLoadstore1Type; l_out : out Execute1ToLoadstore1Type;
f_out : out Execute1ToFetch1Type; f_out : out Execute1ToFetch1Type;
fp_out : out Execute1ToFPUType;


e_out : out Execute1ToWritebackType; e_out : out Execute1ToWritebackType;


@ -53,6 +56,7 @@ architecture behaviour of execute1 is
f : Execute1ToFetch1Type; f : Execute1ToFetch1Type;
busy: std_ulogic; busy: std_ulogic;
terminate: std_ulogic; terminate: std_ulogic;
fp_exception_next : std_ulogic;
trace_next : std_ulogic; trace_next : std_ulogic;
prev_op : insn_type_t; prev_op : insn_type_t;
lr_update : std_ulogic; lr_update : std_ulogic;
@ -71,7 +75,8 @@ architecture behaviour of execute1 is
end record; end record;
constant reg_type_init : reg_type := constant reg_type_init : reg_type :=
(e => Execute1ToWritebackInit, f => Execute1ToFetch1Init, (e => Execute1ToWritebackInit, f => Execute1ToFetch1Init,
busy => '0', lr_update => '0', terminate => '0', trace_next => '0', prev_op => OP_ILLEGAL, busy => '0', lr_update => '0', terminate => '0',
fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL,
mul_in_progress => '0', mul_finish => '0', div_in_progress => '0', cntz_in_progress => '0', mul_in_progress => '0', mul_finish => '0', div_in_progress => '0', cntz_in_progress => '0',
slow_op_insn => OP_ILLEGAL, slow_op_rc => '0', slow_op_oe => '0', slow_op_xerc => xerc_init, slow_op_insn => OP_ILLEGAL, slow_op_rc => '0', slow_op_oe => '0', slow_op_xerc => xerc_init,
next_lr => (others => '0'), last_nia => (others => '0'), others => (others => '0')); next_lr => (others => '0'), last_nia => (others => '0'), others => (others => '0'));
@ -267,7 +272,7 @@ begin
b_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data2 = '1' else e_in.read_data2; b_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data2 = '1' else e_in.read_data2;
c_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data3 = '1' else e_in.read_data3; c_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data3 = '1' else e_in.read_data3;


busy_out <= l_in.busy or r.busy; busy_out <= l_in.busy or r.busy or fp_in.busy;
valid_in <= e_in.valid and not busy_out; valid_in <= e_in.valid and not busy_out;


terminate_out <= r.terminate; terminate_out <= r.terminate;
@ -333,6 +338,7 @@ begin
variable spr_val : std_ulogic_vector(63 downto 0); variable spr_val : std_ulogic_vector(63 downto 0);
variable addend : std_ulogic_vector(127 downto 0); variable addend : std_ulogic_vector(127 downto 0);
variable do_trace : std_ulogic; variable do_trace : std_ulogic;
variable fv : Execute1ToFPUType;
begin begin
result := (others => '0'); result := (others => '0');
sum_with_carry := (others => '0'); sum_with_carry := (others => '0');
@ -346,6 +352,7 @@ begin
v.e := Execute1ToWritebackInit; v.e := Execute1ToWritebackInit;
lv := Execute1ToLoadstore1Init; lv := Execute1ToLoadstore1Init;
v.f.redirect := '0'; v.f.redirect := '0';
fv := Execute1ToFPUInit;


-- XER forwarding. To avoid having to track XER hazards, we -- XER forwarding. To avoid having to track XER hazards, we
-- use the previously latched value. -- use the previously latched value.
@ -521,9 +528,11 @@ begin
exception_nextpc := '0'; exception_nextpc := '0';
v.e.exc_write_enable := '0'; v.e.exc_write_enable := '0';
v.e.exc_write_reg := fast_spr_num(SPR_SRR0); v.e.exc_write_reg := fast_spr_num(SPR_SRR0);
v.e.exc_write_data := e_in.nia;
if valid_in = '1' then if valid_in = '1' then
v.e.exc_write_data := e_in.nia;
v.last_nia := e_in.nia; v.last_nia := e_in.nia;
else
v.e.exc_write_data := r.last_nia;
end if; end if;


v.e.mode_32bit := not ctrl.msr(MSR_SF); v.e.mode_32bit := not ctrl.msr(MSR_SF);
@ -542,24 +551,36 @@ begin
ctrl_tmp.msr(MSR_PR) <= '0'; ctrl_tmp.msr(MSR_PR) <= '0';
ctrl_tmp.msr(MSR_SE) <= '0'; ctrl_tmp.msr(MSR_SE) <= '0';
ctrl_tmp.msr(MSR_BE) <= '0'; ctrl_tmp.msr(MSR_BE) <= '0';
ctrl_tmp.msr(MSR_FP) <= '0';
ctrl_tmp.msr(MSR_FE0) <= '0';
ctrl_tmp.msr(MSR_FE1) <= '0';
ctrl_tmp.msr(MSR_IR) <= '0'; ctrl_tmp.msr(MSR_IR) <= '0';
ctrl_tmp.msr(MSR_DR) <= '0'; ctrl_tmp.msr(MSR_DR) <= '0';
ctrl_tmp.msr(MSR_RI) <= '0'; ctrl_tmp.msr(MSR_RI) <= '0';
ctrl_tmp.msr(MSR_LE) <= '1'; ctrl_tmp.msr(MSR_LE) <= '1';
v.e.valid := '1'; v.e.valid := '1';
v.trace_next := '0'; v.trace_next := '0';
v.fp_exception_next := '0';
report "Writing SRR1: " & to_hstring(ctrl.srr1); report "Writing SRR1: " & to_hstring(ctrl.srr1);


elsif r.trace_next = '1' and valid_in = '1' then elsif valid_in = '1' and ((HAS_FPU and r.fp_exception_next = '1') or r.trace_next = '1') then
-- Generate a trace interrupt rather than executing the next instruction if HAS_FPU and r.fp_exception_next = '1' then
-- or taking any asynchronous interrupt -- This is used for FP-type program interrupts that
v.f.redirect_nia := std_logic_vector(to_unsigned(16#d00#, 64)); -- become pending due to MSR[FE0,FE1] changing from 00 to non-zero.
ctrl_tmp.srr1(63 - 33) <= '1'; v.f.redirect_nia := std_logic_vector(to_unsigned(16#700#, 64));
if r.prev_op = OP_LOAD or r.prev_op = OP_ICBI or r.prev_op = OP_ICBT or ctrl_tmp.srr1(63 - 43) <= '1';
r.prev_op = OP_DCBT or r.prev_op = OP_DCBST or r.prev_op = OP_DCBF then ctrl_tmp.srr1(63 - 47) <= '1';
ctrl_tmp.srr1(63 - 35) <= '1'; else
elsif r.prev_op = OP_STORE or r.prev_op = OP_DCBZ or r.prev_op = OP_DCBTST then -- Generate a trace interrupt rather than executing the next instruction
ctrl_tmp.srr1(63 - 36) <= '1'; -- or taking any asynchronous interrupt
v.f.redirect_nia := std_logic_vector(to_unsigned(16#d00#, 64));
ctrl_tmp.srr1(63 - 33) <= '1';
if r.prev_op = OP_LOAD or r.prev_op = OP_ICBI or r.prev_op = OP_ICBT or
r.prev_op = OP_DCBT or r.prev_op = OP_DCBST or r.prev_op = OP_DCBF then
ctrl_tmp.srr1(63 - 35) <= '1';
elsif r.prev_op = OP_STORE or r.prev_op = OP_DCBZ or r.prev_op = OP_DCBTST then
ctrl_tmp.srr1(63 - 36) <= '1';
end if;
end if; end if;
exception := '1'; exception := '1';


@ -578,7 +599,19 @@ begin
-- set bit 45 to indicate privileged instruction type interrupt -- set bit 45 to indicate privileged instruction type interrupt
ctrl_tmp.srr1(63 - 45) <= '1'; ctrl_tmp.srr1(63 - 45) <= '1';
report "privileged instruction"; report "privileged instruction";

elsif not HAS_FPU and valid_in = '1' and
(e_in.insn_type = OP_FPLOAD or e_in.insn_type = OP_FPSTORE) then
-- make lfd/stfd/lfs/stfs etc. illegal in no-FPU implementations
illegal := '1';

elsif HAS_FPU and valid_in = '1' and ctrl.msr(MSR_FP) = '0' and
(e_in.unit = FPU or e_in.insn_type = OP_FPLOAD or e_in.insn_type = OP_FPSTORE) then
-- generate a floating-point unavailable interrupt
exception := '1';
v.f.redirect_nia := std_logic_vector(to_unsigned(16#800#, 64));
report "FP unavailable interrupt";

elsif valid_in = '1' and e_in.unit = ALU then elsif valid_in = '1' and e_in.unit = ALU then


report "execute nia " & to_hstring(e_in.nia); report "execute nia " & to_hstring(e_in.nia);
@ -793,6 +826,10 @@ begin
is_branch := '1'; is_branch := '1';
taken_branch := '1'; taken_branch := '1';
abs_branch := '1'; abs_branch := '1';
if HAS_FPU then
v.fp_exception_next := fp_in.exception and
(a_in(MSR_FE0) or a_in(MSR_FE1));
end if;
do_trace := '0'; do_trace := '0';


when OP_CNTZ => when OP_CNTZ =>
@ -964,6 +1001,10 @@ begin
ctrl_tmp.msr(MSR_IR) <= '1'; ctrl_tmp.msr(MSR_IR) <= '1';
ctrl_tmp.msr(MSR_DR) <= '1'; ctrl_tmp.msr(MSR_DR) <= '1';
end if; end if;
if HAS_FPU then
v.fp_exception_next := fp_in.exception and
(c_in(MSR_FE0) or c_in(MSR_FE1));
end if;
end if; end if;
when OP_MTSPR => when OP_MTSPR =>
report "MTSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) & report "MTSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) &
@ -1080,6 +1121,8 @@ begin
lv.valid := '1'; lv.valid := '1';
elsif e_in.unit = NONE then elsif e_in.unit = NONE then
illegal := '1'; illegal := '1';
elsif HAS_FPU and e_in.unit = FPU then
fv.valid := '1';
end if; end if;


elsif r.f.redirect = '1' then elsif r.f.redirect = '1' then
@ -1154,7 +1197,17 @@ begin
v.e.valid := '1'; v.e.valid := '1';
end if; end if;


if illegal = '1' then -- Generate FP-type program interrupt. fp_in.interrupt will only
-- be set during the execution of a FP instruction.
-- The case where MSR[FE0,FE1] goes from zero to non-zero is
-- handled above by mtmsrd and rfid setting v.fp_exception_next.
if HAS_FPU and fp_in.interrupt = '1' then
v.f.redirect_nia := std_logic_vector(to_unsigned(16#700#, 64));
ctrl_tmp.srr1(63 - 43) <= '1';
exception := '1';
end if;

if illegal = '1' or (HAS_FPU and fp_in.illegal = '1') then
exception := '1'; exception := '1';
v.f.redirect_nia := std_logic_vector(to_unsigned(16#700#, 64)); v.f.redirect_nia := std_logic_vector(to_unsigned(16#700#, 64));
-- Since we aren't doing Hypervisor emulation assist (0xe40) we -- Since we aren't doing Hypervisor emulation assist (0xe40) we
@ -1200,7 +1253,6 @@ begin
end if; end if;
v.e.exc_write_enable := '1'; v.e.exc_write_enable := '1';
v.e.exc_write_reg := fast_spr_num(SPR_SRR0); v.e.exc_write_reg := fast_spr_num(SPR_SRR0);
v.e.exc_write_data := r.last_nia;
report "ldst exception writing srr0=" & to_hstring(r.last_nia); report "ldst exception writing srr0=" & to_hstring(r.last_nia);
end if; end if;


@ -1225,7 +1277,7 @@ begin
lv.addr1 := a_in; lv.addr1 := a_in;
lv.addr2 := b_in; lv.addr2 := b_in;
lv.data := c_in; lv.data := c_in;
lv.write_reg := gspr_to_gpr(e_in.write_reg); lv.write_reg := e_in.write_reg;
lv.length := e_in.data_len; lv.length := e_in.data_len;
lv.byte_reverse := e_in.byte_reverse xnor ctrl.msr(MSR_LE); lv.byte_reverse := e_in.byte_reverse xnor ctrl.msr(MSR_LE);
lv.sign_extend := e_in.sign_extend; lv.sign_extend := e_in.sign_extend;
@ -1243,6 +1295,20 @@ begin
lv.virt_mode := ctrl.msr(MSR_DR); lv.virt_mode := ctrl.msr(MSR_DR);
lv.priv_mode := not ctrl.msr(MSR_PR); lv.priv_mode := not ctrl.msr(MSR_PR);
lv.mode_32bit := not ctrl.msr(MSR_SF); lv.mode_32bit := not ctrl.msr(MSR_SF);
lv.is_32bit := e_in.is_32bit;

-- Outputs to FPU
fv.op := e_in.insn_type;
fv.nia := e_in.nia;
fv.insn := e_in.insn;
fv.single := e_in.is_32bit;
fv.fe_mode := ctrl.msr(MSR_FE0) & ctrl.msr(MSR_FE1);
fv.fra := a_in;
fv.frb := b_in;
fv.frc := c_in;
fv.frt := e_in.write_reg;
fv.rc := e_in.rc;
fv.out_cr := e_in.output_cr;


-- Update registers -- Update registers
rin <= v; rin <= v;
@ -1251,6 +1317,7 @@ begin
f_out <= r.f; f_out <= r.f;
l_out <= lv; l_out <= lv;
e_out <= r.e; e_out <= r.e;
fp_out <= fv;
flush_out <= f_out.redirect; flush_out <= f_out.redirect;


exception_log <= exception; exception_log <= exception;

@ -14,6 +14,7 @@ entity toplevel is
RAM_INIT_FILE : string := "firmware.hex"; RAM_INIT_FILE : string := "firmware.hex";
RESET_LOW : boolean := true; RESET_LOW : boolean := true;
CLK_FREQUENCY : positive := 100000000; CLK_FREQUENCY : positive := 100000000;
HAS_FPU : boolean := true;
USE_LITEDRAM : boolean := false; USE_LITEDRAM : boolean := false;
NO_BRAM : boolean := false; NO_BRAM : boolean := false;
DISABLE_FLATTEN_CORE : boolean := false; DISABLE_FLATTEN_CORE : boolean := false;
@ -168,6 +169,7 @@ begin
RAM_INIT_FILE => RAM_INIT_FILE, RAM_INIT_FILE => RAM_INIT_FILE,
SIM => false, SIM => false,
CLK_FREQ => CLK_FREQUENCY, CLK_FREQ => CLK_FREQUENCY,
HAS_FPU => HAS_FPU,
HAS_DRAM => USE_LITEDRAM, HAS_DRAM => USE_LITEDRAM,
DRAM_SIZE => 256 * 1024 * 1024, DRAM_SIZE => 256 * 1024 * 1024,
DRAM_INIT_SIZE => PAYLOAD_SIZE, DRAM_INIT_SIZE => PAYLOAD_SIZE,

@ -11,6 +11,7 @@ entity toplevel is
RESET_LOW : boolean := true; RESET_LOW : boolean := true;
CLK_INPUT : positive := 100000000; CLK_INPUT : positive := 100000000;
CLK_FREQUENCY : positive := 100000000; CLK_FREQUENCY : positive := 100000000;
HAS_FPU : boolean := true;
DISABLE_FLATTEN_CORE : boolean := false; DISABLE_FLATTEN_CORE : boolean := false;
UART_IS_16550 : boolean := true UART_IS_16550 : boolean := true
); );
@ -68,6 +69,7 @@ begin
RAM_INIT_FILE => RAM_INIT_FILE, RAM_INIT_FILE => RAM_INIT_FILE,
SIM => false, SIM => false,
CLK_FREQ => CLK_FREQUENCY, CLK_FREQ => CLK_FREQUENCY,
HAS_FPU => HAS_FPU,
DISABLE_FLATTEN_CORE => DISABLE_FLATTEN_CORE, DISABLE_FLATTEN_CORE => DISABLE_FLATTEN_CORE,
UART0_IS_16550 => UART_IS_16550 UART0_IS_16550 => UART_IS_16550
) )

@ -14,6 +14,7 @@ entity toplevel is
RAM_INIT_FILE : string := "firmware.hex"; RAM_INIT_FILE : string := "firmware.hex";
RESET_LOW : boolean := true; RESET_LOW : boolean := true;
CLK_FREQUENCY : positive := 100000000; CLK_FREQUENCY : positive := 100000000;
HAS_FPU : boolean := true;
USE_LITEDRAM : boolean := false; USE_LITEDRAM : boolean := false;
NO_BRAM : boolean := false; NO_BRAM : boolean := false;
DISABLE_FLATTEN_CORE : boolean := false; DISABLE_FLATTEN_CORE : boolean := false;
@ -120,6 +121,7 @@ begin
RAM_INIT_FILE => RAM_INIT_FILE, RAM_INIT_FILE => RAM_INIT_FILE,
SIM => false, SIM => false,
CLK_FREQ => CLK_FREQUENCY, CLK_FREQ => CLK_FREQUENCY,
HAS_FPU => HAS_FPU,
HAS_DRAM => USE_LITEDRAM, HAS_DRAM => USE_LITEDRAM,
DRAM_SIZE => 512 * 1024 * 1024, DRAM_SIZE => 512 * 1024 * 1024,
DRAM_INIT_SIZE => PAYLOAD_SIZE, DRAM_INIT_SIZE => PAYLOAD_SIZE,

2568
fpu.vhdl

File diff suppressed because it is too large Load Diff

@ -2,6 +2,9 @@ library ieee;
use ieee.std_logic_1164.all; use ieee.std_logic_1164.all;
use ieee.numeric_std.all; use ieee.numeric_std.all;


library work;
use work.common.all;

entity gpr_hazard is entity gpr_hazard is
generic ( generic (
PIPELINE_DEPTH : natural := 1 PIPELINE_DEPTH : natural := 1
@ -15,13 +18,13 @@ entity gpr_hazard is
issuing : in std_ulogic; issuing : in std_ulogic;


gpr_write_valid_in : in std_ulogic; gpr_write_valid_in : in std_ulogic;
gpr_write_in : in std_ulogic_vector(5 downto 0); gpr_write_in : in gspr_index_t;
bypass_avail : in std_ulogic; bypass_avail : in std_ulogic;
gpr_read_valid_in : in std_ulogic; gpr_read_valid_in : in std_ulogic;
gpr_read_in : in std_ulogic_vector(5 downto 0); gpr_read_in : in gspr_index_t;


ugpr_write_valid : in std_ulogic; ugpr_write_valid : in std_ulogic;
ugpr_write_reg : in std_ulogic_vector(5 downto 0); ugpr_write_reg : in gspr_index_t;


stall_out : out std_ulogic; stall_out : out std_ulogic;
use_bypass : out std_ulogic use_bypass : out std_ulogic
@ -31,9 +34,9 @@ architecture behaviour of gpr_hazard is
type pipeline_entry_type is record type pipeline_entry_type is record
valid : std_ulogic; valid : std_ulogic;
bypass : std_ulogic; bypass : std_ulogic;
gpr : std_ulogic_vector(5 downto 0); gpr : gspr_index_t;
ugpr_valid : std_ulogic; ugpr_valid : std_ulogic;
ugpr : std_ulogic_vector(5 downto 0); ugpr : gspr_index_t;
end record; end record;
constant pipeline_entry_init : pipeline_entry_type := (valid => '0', bypass => '0', gpr => (others => '0'), constant pipeline_entry_init : pipeline_entry_type := (valid => '0', bypass => '0', gpr => (others => '0'),
ugpr_valid => '0', ugpr => (others => '0')); ugpr_valid => '0', ugpr => (others => '0'));

@ -25,6 +25,10 @@ package helpers is
function byte_reverse(val: std_ulogic_vector(63 downto 0); size: integer) return std_ulogic_vector; function byte_reverse(val: std_ulogic_vector(63 downto 0); size: integer) return std_ulogic_vector;


function sign_extend(val: std_ulogic_vector(63 downto 0); size: natural) return std_ulogic_vector; function sign_extend(val: std_ulogic_vector(63 downto 0); size: natural) return std_ulogic_vector;

function bit_reverse(a: std_ulogic_vector) return std_ulogic_vector;
function bit_number(a: std_ulogic_vector(63 downto 0)) return std_ulogic_vector;
function count_left_zeroes(val: std_ulogic_vector) return std_ulogic_vector;
end package helpers; end package helpers;


package body helpers is package body helpers is
@ -206,4 +210,53 @@ package body helpers is
return std_ulogic_vector(ret); return std_ulogic_vector(ret);


end; end;

-- Reverse the order of bits in a word
function bit_reverse(a: std_ulogic_vector) return std_ulogic_vector is
variable ret: std_ulogic_vector(a'left downto a'right);
begin
for i in a'right to a'left loop
ret(a'left + a'right - i) := a(i);
end loop;
return ret;
end;

-- If there is only one bit set in a doubleword, return its bit number
-- (counting from the right). Each bit of the result is obtained by
-- ORing together 32 bits of the input:
-- bit 0 = a[1] or a[3] or a[5] or ...
-- bit 1 = a[2] or a[3] or a[6] or a[7] or ...
-- bit 2 = a[4..7] or a[12..15] or ...
-- bit 5 = a[32..63] ORed together
function bit_number(a: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is
variable ret: std_ulogic_vector(5 downto 0);
variable stride: natural;
variable bit: std_ulogic;
variable k: natural;
begin
stride := 2;
for i in 0 to 5 loop
bit := '0';
for j in 0 to (64 / stride) - 1 loop
k := j * stride;
bit := bit or (or a(k + stride - 1 downto k + (stride / 2)));
end loop;
ret(i) := bit;
stride := stride * 2;
end loop;
return ret;
end;

-- Count leading zeroes operation
-- Assumes the value passed in is not zero (if it is, zero is returned)
function count_left_zeroes(val: std_ulogic_vector) return std_ulogic_vector is
variable rev: std_ulogic_vector(val'left downto val'right);
variable sum: std_ulogic_vector(val'left downto val'right);
variable onehot: std_ulogic_vector(val'left downto val'right);
begin
rev := bit_reverse(val);
sum := std_ulogic_vector(- signed(rev));
onehot := sum and rev;
return bit_number(std_ulogic_vector(resize(unsigned(onehot), 64)));
end;
end package body helpers; end package body helpers;

@ -37,6 +37,11 @@ package insn_helpers is
function insn_sh (insn_in : std_ulogic_vector) return std_ulogic_vector; function insn_sh (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_me (insn_in : std_ulogic_vector) return std_ulogic_vector; function insn_me (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_mb (insn_in : std_ulogic_vector) return std_ulogic_vector; function insn_mb (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_frt (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_fra (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_frb (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_frc (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_u (insn_in : std_ulogic_vector) return std_ulogic_vector;
end package insn_helpers; end package insn_helpers;


package body insn_helpers is package body insn_helpers is
@ -214,4 +219,29 @@ package body insn_helpers is
begin begin
return insn_in(5) & insn_in(10 downto 6); return insn_in(5) & insn_in(10 downto 6);
end; end;

function insn_frt(insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(25 downto 21);
end;

function insn_fra(insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(20 downto 16);
end;

function insn_frb(insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(15 downto 11);
end;

function insn_frc(insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(10 downto 6);
end;

function insn_u(insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(15 downto 12);
end;
end package body insn_helpers; end package body insn_helpers;

@ -5,12 +5,15 @@ use ieee.numeric_std.all;
library work; library work;
use work.decode_types.all; use work.decode_types.all;
use work.common.all; use work.common.all;
use work.insn_helpers.all;
use work.helpers.all;


-- 2 cycle LSU -- 2 cycle LSU
-- We calculate the address in the first cycle -- We calculate the address in the first cycle


entity loadstore1 is entity loadstore1 is
generic ( generic (
HAS_FPU : boolean := true;
-- Non-zero to enable log data collection -- Non-zero to enable log data collection
LOG_LENGTH : natural := 0 LOG_LENGTH : natural := 0
); );
@ -42,10 +45,12 @@ architecture behave of loadstore1 is


-- State machine for unaligned loads/stores -- State machine for unaligned loads/stores
type state_t is (IDLE, -- ready for instruction type state_t is (IDLE, -- ready for instruction
FPR_CONV, -- converting double to float for store
SECOND_REQ, -- send 2nd request of unaligned xfer SECOND_REQ, -- send 2nd request of unaligned xfer
ACK_WAIT, -- waiting for ack from dcache ACK_WAIT, -- waiting for ack from dcache
MMU_LOOKUP, -- waiting for MMU to look up translation MMU_LOOKUP, -- waiting for MMU to look up translation
TLBIE_WAIT, -- waiting for MMU to finish doing a tlbie TLBIE_WAIT, -- waiting for MMU to finish doing a tlbie
FINISH_LFS, -- write back converted SP data for lfs*
COMPLETE -- extra cycle to complete an operation COMPLETE -- extra cycle to complete an operation
); );


@ -58,7 +63,7 @@ architecture behave of loadstore1 is
addr : std_ulogic_vector(63 downto 0); addr : std_ulogic_vector(63 downto 0);
store_data : std_ulogic_vector(63 downto 0); store_data : std_ulogic_vector(63 downto 0);
load_data : std_ulogic_vector(63 downto 0); load_data : std_ulogic_vector(63 downto 0);
write_reg : gpr_index_t; write_reg : gspr_index_t;
length : std_ulogic_vector(3 downto 0); length : std_ulogic_vector(3 downto 0);
byte_reverse : std_ulogic; byte_reverse : std_ulogic;
sign_extend : std_ulogic; sign_extend : std_ulogic;
@ -86,6 +91,11 @@ architecture behave of loadstore1 is
do_update : std_ulogic; do_update : std_ulogic;
extra_cycle : std_ulogic; extra_cycle : std_ulogic;
mode_32bit : std_ulogic; mode_32bit : std_ulogic;
load_sp : std_ulogic;
ld_sp_data : std_ulogic_vector(31 downto 0);
ld_sp_nz : std_ulogic;
ld_sp_lz : std_ulogic_vector(5 downto 0);
st_sp_data : std_ulogic_vector(31 downto 0);
end record; end record;


type byte_sel_t is array(0 to 7) of std_ulogic; type byte_sel_t is array(0 to 7) of std_ulogic;
@ -95,6 +105,9 @@ architecture behave of loadstore1 is
signal r, rin : reg_stage_t; signal r, rin : reg_stage_t;
signal lsu_sum : std_ulogic_vector(63 downto 0); signal lsu_sum : std_ulogic_vector(63 downto 0);


signal store_sp_data : std_ulogic_vector(31 downto 0);
signal load_dp_data : std_ulogic_vector(63 downto 0);

-- Generate byte enables from sizes -- Generate byte enables from sizes
function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is
begin begin
@ -125,6 +138,72 @@ architecture behave of loadstore1 is
to_integer(unsigned(address)))); to_integer(unsigned(address))));
end function xfer_data_sel; end function xfer_data_sel;


-- 23-bit right shifter for DP -> SP float conversions
function shifter_23r(frac: std_ulogic_vector(22 downto 0); shift: unsigned(4 downto 0))
return std_ulogic_vector is
variable fs1 : std_ulogic_vector(22 downto 0);
variable fs2 : std_ulogic_vector(22 downto 0);
begin
case shift(1 downto 0) is
when "00" =>
fs1 := frac;
when "01" =>
fs1 := '0' & frac(22 downto 1);
when "10" =>
fs1 := "00" & frac(22 downto 2);
when others =>
fs1 := "000" & frac(22 downto 3);
end case;
case shift(4 downto 2) is
when "000" =>
fs2 := fs1;
when "001" =>
fs2 := x"0" & fs1(22 downto 4);
when "010" =>
fs2 := x"00" & fs1(22 downto 8);
when "011" =>
fs2 := x"000" & fs1(22 downto 12);
when "100" =>
fs2 := x"0000" & fs1(22 downto 16);
when others =>
fs2 := x"00000" & fs1(22 downto 20);
end case;
return fs2;
end;

-- 23-bit left shifter for SP -> DP float conversions
function shifter_23l(frac: std_ulogic_vector(22 downto 0); shift: unsigned(4 downto 0))
return std_ulogic_vector is
variable fs1 : std_ulogic_vector(22 downto 0);
variable fs2 : std_ulogic_vector(22 downto 0);
begin
case shift(1 downto 0) is
when "00" =>
fs1 := frac;
when "01" =>
fs1 := frac(21 downto 0) & '0';
when "10" =>
fs1 := frac(20 downto 0) & "00";
when others =>
fs1 := frac(19 downto 0) & "000";
end case;
case shift(4 downto 2) is
when "000" =>
fs2 := fs1;
when "001" =>
fs2 := fs1(18 downto 0) & x"0" ;
when "010" =>
fs2 := fs1(14 downto 0) & x"00";
when "011" =>
fs2 := fs1(10 downto 0) & x"000";
when "100" =>
fs2 := fs1(6 downto 0) & x"0000";
when others =>
fs2 := fs1(2 downto 0) & x"00000";
end case;
return fs2;
end;

begin begin
-- Calculate the address in the first cycle -- Calculate the address in the first cycle
lsu_sum <= std_ulogic_vector(unsigned(l_in.addr1) + unsigned(l_in.addr2)) when l_in.valid = '1' else (others => '0'); lsu_sum <= std_ulogic_vector(unsigned(l_in.addr1) + unsigned(l_in.addr2)) when l_in.valid = '1' else (others => '0');
@ -142,6 +221,59 @@ begin
end if; end if;
end process; end process;


ls_fp_conv: if HAS_FPU generate
-- Convert DP data to SP for stfs
dp_to_sp: process(all)
variable exp : unsigned(10 downto 0);
variable frac : std_ulogic_vector(22 downto 0);
variable shift : unsigned(4 downto 0);
begin
store_sp_data(31) <= l_in.data(63);
store_sp_data(30 downto 0) <= (others => '0');
exp := unsigned(l_in.data(62 downto 52));
if exp > 896 then
store_sp_data(30) <= l_in.data(62);
store_sp_data(29 downto 0) <= l_in.data(58 downto 29);
elsif exp >= 874 then
-- denormalization required
frac := '1' & l_in.data(51 downto 30);
shift := 0 - exp(4 downto 0);
store_sp_data(22 downto 0) <= shifter_23r(frac, shift);
end if;
end process;

-- Convert SP data to DP for lfs
sp_to_dp: process(all)
variable exp : unsigned(7 downto 0);
variable exp_dp : unsigned(10 downto 0);
variable exp_nz : std_ulogic;
variable exp_ao : std_ulogic;
variable frac : std_ulogic_vector(22 downto 0);
variable frac_shift : unsigned(4 downto 0);
begin
frac := r.ld_sp_data(22 downto 0);
exp := unsigned(r.ld_sp_data(30 downto 23));
exp_nz := or (r.ld_sp_data(30 downto 23));
exp_ao := and (r.ld_sp_data(30 downto 23));
frac_shift := (others => '0');
if exp_ao = '1' then
exp_dp := to_unsigned(2047, 11); -- infinity or NaN
elsif exp_nz = '1' then
exp_dp := 896 + resize(exp, 11); -- finite normalized value
elsif r.ld_sp_nz = '0' then
exp_dp := to_unsigned(0, 11); -- zero
else
-- denormalized SP operand, need to normalize
exp_dp := 896 - resize(unsigned(r.ld_sp_lz), 11);
frac_shift := unsigned(r.ld_sp_lz(4 downto 0)) + 1;
end if;
load_dp_data(63) <= r.ld_sp_data(31);
load_dp_data(62 downto 52) <= std_ulogic_vector(exp_dp);
load_dp_data(51 downto 29) <= shifter_23l(frac, frac_shift);
load_dp_data(28 downto 0) <= (others => '0');
end process;
end generate;

loadstore1_1: process(all) loadstore1_1: process(all)
variable v : reg_stage_t; variable v : reg_stage_t;
variable brev_lenm1 : unsigned(2 downto 0); variable brev_lenm1 : unsigned(2 downto 0);
@ -162,6 +294,9 @@ begin
variable data_permuted : std_ulogic_vector(63 downto 0); variable data_permuted : std_ulogic_vector(63 downto 0);
variable data_trimmed : std_ulogic_vector(63 downto 0); variable data_trimmed : std_ulogic_vector(63 downto 0);
variable store_data : std_ulogic_vector(63 downto 0); variable store_data : std_ulogic_vector(63 downto 0);
variable data_in : std_ulogic_vector(63 downto 0);
variable byte_rev : std_ulogic;
variable length : std_ulogic_vector(3 downto 0);
variable use_second : byte_sel_t; variable use_second : byte_sel_t;
variable trim_ctl : trim_ctl_t; variable trim_ctl : trim_ctl_t;
variable negative : std_ulogic; variable negative : std_ulogic;
@ -173,6 +308,8 @@ begin
variable mmu_mtspr : std_ulogic; variable mmu_mtspr : std_ulogic;
variable itlb_fault : std_ulogic; variable itlb_fault : std_ulogic;
variable misaligned : std_ulogic; variable misaligned : std_ulogic;
variable fp_reg_conv : std_ulogic;
variable lfs_done : std_ulogic;
begin begin
v := r; v := r;
req := '0'; req := '0';
@ -182,8 +319,10 @@ begin
sprn := std_ulogic_vector(to_unsigned(decode_spr_num(l_in.insn), 10)); sprn := std_ulogic_vector(to_unsigned(decode_spr_num(l_in.insn), 10));
dsisr := (others => '0'); dsisr := (others => '0');
mmureq := '0'; mmureq := '0';
fp_reg_conv := '0';


write_enable := '0'; write_enable := '0';
lfs_done := '0';


do_update := r.do_update; do_update := r.do_update;
v.do_update := '0'; v.do_update := '0';
@ -242,19 +381,38 @@ begin
end case; end case;
end loop; end loop;


-- Byte reversing and rotating for stores if HAS_FPU then
-- Done in the first cycle (when l_in.valid = 1) -- Single-precision FP conversion
v.st_sp_data := store_sp_data;
v.ld_sp_data := data_trimmed(31 downto 0);
v.ld_sp_nz := or (data_trimmed(22 downto 0));
v.ld_sp_lz := count_left_zeroes(data_trimmed(22 downto 0));
end if;

-- Byte reversing and rotating for stores.
-- Done in the first cycle (when l_in.valid = 1) for integer stores
-- and DP float stores, and in the second cycle for SP float stores.
store_data := r.store_data; store_data := r.store_data;
if l_in.valid = '1' then if l_in.valid = '1' or (HAS_FPU and r.state = FPR_CONV) then
byte_offset := unsigned(lsu_sum(2 downto 0)); if HAS_FPU and r.state = FPR_CONV then
data_in := x"00000000" & r.st_sp_data;
byte_offset := unsigned(r.addr(2 downto 0));
byte_rev := r.byte_reverse;
length := r.length;
else
data_in := l_in.data;
byte_offset := unsigned(lsu_sum(2 downto 0));
byte_rev := l_in.byte_reverse;
length := l_in.length;
end if;
brev_lenm1 := "000"; brev_lenm1 := "000";
if l_in.byte_reverse = '1' then if byte_rev = '1' then
brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1; brev_lenm1 := unsigned(length(2 downto 0)) - 1;
end if; end if;
for i in 0 to 7 loop for i in 0 to 7 loop
k := (to_unsigned(i, 3) - byte_offset) xor brev_lenm1; k := (to_unsigned(i, 3) - byte_offset) xor brev_lenm1;
j := to_integer(k) * 8; j := to_integer(k) * 8;
store_data(i * 8 + 7 downto i * 8) := l_in.data(j + 7 downto j); store_data(i * 8 + 7 downto i * 8) := data_in(j + 7 downto j);
end loop; end loop;
end if; end if;
v.store_data := store_data; v.store_data := store_data;
@ -289,6 +447,14 @@ begin
case r.state is case r.state is
when IDLE => when IDLE =>


when FPR_CONV =>
req := '1';
if r.second_bytes /= "00000000" then
v.state := SECOND_REQ;
else
v.state := ACK_WAIT;
end if;

when SECOND_REQ => when SECOND_REQ =>
req := '1'; req := '1';
v.state := ACK_WAIT; v.state := ACK_WAIT;
@ -320,8 +486,13 @@ begin
v.load_data := data_permuted; v.load_data := data_permuted;
end if; end if;
else else
write_enable := r.load; write_enable := r.load and not r.load_sp;
if r.extra_cycle = '1' then if HAS_FPU and r.load_sp = '1' then
-- SP to DP conversion takes a cycle
-- Write back rA update in this cycle if needed
do_update := r.update;
v.state := FINISH_LFS;
elsif r.extra_cycle = '1' then
-- loads with rA update need an extra cycle -- loads with rA update need an extra cycle
v.state := COMPLETE; v.state := COMPLETE;
v.do_update := r.update; v.do_update := r.update;
@ -359,6 +530,9 @@ begin


when TLBIE_WAIT => when TLBIE_WAIT =>


when FINISH_LFS =>
lfs_done := '1';

when COMPLETE => when COMPLETE =>
exception := r.align_intr; exception := r.align_intr;


@ -392,6 +566,7 @@ begin
v.nc := l_in.ci; v.nc := l_in.ci;
v.virt_mode := l_in.virt_mode; v.virt_mode := l_in.virt_mode;
v.priv_mode := l_in.priv_mode; v.priv_mode := l_in.priv_mode;
v.load_sp := '0';
v.wait_dcache := '0'; v.wait_dcache := '0';
v.wait_mmu := '0'; v.wait_mmu := '0';
v.do_update := '0'; v.do_update := '0';
@ -431,6 +606,27 @@ begin
v.align_intr := v.nc; v.align_intr := v.nc;
req := '1'; req := '1';
v.dcbz := '1'; v.dcbz := '1';
when OP_FPSTORE =>
if HAS_FPU then
if l_in.is_32bit = '1' then
v.state := FPR_CONV;
fp_reg_conv := '1';
else
req := '1';
end if;
end if;
when OP_FPLOAD =>
if HAS_FPU then
v.load := '1';
req := '1';
-- Allow an extra cycle for SP->DP precision conversion
-- or RA update
v.extra_cycle := l_in.update;
if l_in.is_32bit = '1' then
v.load_sp := '1';
v.extra_cycle := '1';
end if;
end if;
when OP_TLBIE => when OP_TLBIE =>
mmureq := '1'; mmureq := '1';
v.tlbie := '1'; v.tlbie := '1';
@ -486,7 +682,7 @@ begin
end if; end if;
end if; end if;


v.busy := req or mmureq or mmu_mtspr; v.busy := req or mmureq or mmu_mtspr or fp_reg_conv;
end if; end if;


-- Update outputs to dcache -- Update outputs to dcache
@ -523,8 +719,12 @@ begin
l_out.write_data <= r.sprval; l_out.write_data <= r.sprval;
elsif do_update = '1' then elsif do_update = '1' then
l_out.write_enable <= '1'; l_out.write_enable <= '1';
l_out.write_reg <= r.update_reg; l_out.write_reg <= gpr_to_gspr(r.update_reg);
l_out.write_data <= r.addr; l_out.write_data <= r.addr;
elsif lfs_done = '1' then
l_out.write_enable <= '1';
l_out.write_reg <= r.write_reg;
l_out.write_data <= load_dp_data;
else else
l_out.write_enable <= write_enable; l_out.write_enable <= write_enable;
l_out.write_reg <= r.write_reg; l_out.write_reg <= r.write_reg;

@ -23,6 +23,7 @@ filesets:
- cr_hazard.vhdl - cr_hazard.vhdl
- control.vhdl - control.vhdl
- execute1.vhdl - execute1.vhdl
- fpu.vhdl
- loadstore1.vhdl - loadstore1.vhdl
- mmu.vhdl - mmu.vhdl
- dcache.vhdl - dcache.vhdl
@ -132,6 +133,7 @@ targets:
- disable_flatten_core - disable_flatten_core
- log_length=2048 - log_length=2048
- uart_is_16550 - uart_is_16550
- has_fpu
tools: tools:
vivado: {part : xc7a100tcsg324-1} vivado: {part : xc7a100tcsg324-1}
toplevel : toplevel toplevel : toplevel
@ -215,6 +217,7 @@ targets:
- spi_flash_offset=10485760 - spi_flash_offset=10485760
- log_length=2048 - log_length=2048
- uart_is_16550 - uart_is_16550
- has_fpu
tools: tools:
vivado: {part : xc7a200tsbg484-1} vivado: {part : xc7a200tsbg484-1}
toplevel : toplevel toplevel : toplevel
@ -231,6 +234,7 @@ targets:
- spi_flash_offset=10485760 - spi_flash_offset=10485760
- log_length=2048 - log_length=2048
- uart_is_16550 - uart_is_16550
- has_fpu
generate: [litedram_nexys_video] generate: [litedram_nexys_video]
tools: tools:
vivado: {part : xc7a200tsbg484-1} vivado: {part : xc7a200tsbg484-1}
@ -249,6 +253,7 @@ targets:
- log_length=512 - log_length=512
- uart_is_16550 - uart_is_16550
- has_uart1 - has_uart1
- has_fpu=false
tools: tools:
vivado: {part : xc7a35ticsg324-1L} vivado: {part : xc7a35ticsg324-1L}
toplevel : toplevel toplevel : toplevel
@ -267,6 +272,7 @@ targets:
- log_length=512 - log_length=512
- uart_is_16550 - uart_is_16550
- has_uart1 - has_uart1
- has_fpu=false
generate: [litedram_arty, liteeth_arty] generate: [litedram_arty, liteeth_arty]
tools: tools:
vivado: {part : xc7a35ticsg324-1L} vivado: {part : xc7a35ticsg324-1L}
@ -285,6 +291,7 @@ targets:
- log_length=2048 - log_length=2048
- uart_is_16550 - uart_is_16550
- has_uart1 - has_uart1
- has_fpu
tools: tools:
vivado: {part : xc7a100ticsg324-1L} vivado: {part : xc7a100ticsg324-1L}
toplevel : toplevel toplevel : toplevel
@ -303,6 +310,7 @@ targets:
- log_length=2048 - log_length=2048
- uart_is_16550 - uart_is_16550
- has_uart1 - has_uart1
- has_fpu
generate: [litedram_arty, liteeth_arty] generate: [litedram_arty, liteeth_arty]
tools: tools:
vivado: {part : xc7a100ticsg324-1L} vivado: {part : xc7a100ticsg324-1L}
@ -320,6 +328,7 @@ targets:
- disable_flatten_core - disable_flatten_core
- log_length=512 - log_length=512
- uart_is_16550 - uart_is_16550
- has_fpu=false
tools: tools:
vivado: {part : xc7a35tcpg236-1} vivado: {part : xc7a35tcpg236-1}
toplevel : toplevel toplevel : toplevel
@ -380,6 +389,12 @@ parameters:
paramtype : generic paramtype : generic
default : 100000000 default : 100000000


has_fpu:
datatype : bool
description : Include a floating-point unit in the core
paramtype : generic
default : true

disable_flatten_core: disable_flatten_core:
datatype : bool datatype : bool
description : Prevent Vivado from flattening the main core components description : Prevent Vivado from flattening the main core components

@ -8,6 +8,7 @@ use work.common.all;
entity register_file is entity register_file is
generic ( generic (
SIM : boolean := false; SIM : boolean := false;
HAS_FPU : boolean := true;
-- Non-zero to enable log data collection -- Non-zero to enable log data collection
LOG_LENGTH : natural := 0 LOG_LENGTH : natural := 0
); );
@ -28,12 +29,12 @@ entity register_file is
sim_dump : in std_ulogic; sim_dump : in std_ulogic;
sim_dump_done : out std_ulogic; sim_dump_done : out std_ulogic;


log_out : out std_ulogic_vector(70 downto 0) log_out : out std_ulogic_vector(71 downto 0)
); );
end entity register_file; end entity register_file;


architecture behaviour of register_file is architecture behaviour of register_file is
type regfile is array(0 to 63) of std_ulogic_vector(63 downto 0); type regfile is array(0 to 127) of std_ulogic_vector(63 downto 0);
signal registers : regfile := (others => (others => '0')); signal registers : regfile := (others => (others => '0'));
signal rd_port_b : std_ulogic_vector(63 downto 0); signal rd_port_b : std_ulogic_vector(63 downto 0);
signal dbg_data : std_ulogic_vector(63 downto 0); signal dbg_data : std_ulogic_vector(63 downto 0);
@ -41,53 +42,73 @@ architecture behaviour of register_file is
begin begin
-- synchronous writes -- synchronous writes
register_write_0: process(clk) register_write_0: process(clk)
variable w_addr : gspr_index_t;
begin begin
if rising_edge(clk) then if rising_edge(clk) then
if w_in.write_enable = '1' then if w_in.write_enable = '1' then
if w_in.write_reg(5) = '0' then w_addr := w_in.write_reg;
report "Writing GPR " & to_hstring(w_in.write_reg) & " " & to_hstring(w_in.write_data); if HAS_FPU and w_addr(6) = '1' then
else report "Writing FPR " & to_hstring(w_addr(4 downto 0)) & " " & to_hstring(w_in.write_data);
report "Writing GSPR " & to_hstring(w_in.write_reg) & " " & to_hstring(w_in.write_data); else
end if; w_addr(6) := '0';
if w_addr(5) = '0' then
report "Writing GPR " & to_hstring(w_addr) & " " & to_hstring(w_in.write_data);
else
report "Writing GSPR " & to_hstring(w_addr) & " " & to_hstring(w_in.write_data);
end if;
end if;
assert not(is_x(w_in.write_data)) and not(is_x(w_in.write_reg)) severity failure; assert not(is_x(w_in.write_data)) and not(is_x(w_in.write_reg)) severity failure;
registers(to_integer(unsigned(w_in.write_reg))) <= w_in.write_data; registers(to_integer(unsigned(w_addr))) <= w_in.write_data;
end if; end if;
end if; end if;
end process register_write_0; end process register_write_0;


-- asynchronous reads -- asynchronous reads
register_read_0: process(all) register_read_0: process(all)
variable b_addr : gspr_index_t; variable a_addr, b_addr, c_addr : gspr_index_t;
variable w_addr : gspr_index_t;
begin begin
a_addr := d_in.read1_reg;
b_addr := d_in.read2_reg;
c_addr := d_in.read3_reg;
w_addr := w_in.write_reg;
if not HAS_FPU then
-- Make it obvious that we only want 64 GSPRs for a no-FPU implementation
a_addr(6) := '0';
b_addr(6) := '0';
c_addr(6) := '0';
w_addr(6) := '0';
end if;
if d_in.read1_enable = '1' then if d_in.read1_enable = '1' then
report "Reading GPR " & to_hstring(d_in.read1_reg) & " " & to_hstring(registers(to_integer(unsigned(d_in.read1_reg)))); report "Reading GPR " & to_hstring(a_addr) & " " & to_hstring(registers(to_integer(unsigned(a_addr))));
end if; end if;
if d_in.read2_enable = '1' then if d_in.read2_enable = '1' then
report "Reading GPR " & to_hstring(d_in.read2_reg) & " " & to_hstring(registers(to_integer(unsigned(d_in.read2_reg)))); report "Reading GPR " & to_hstring(b_addr) & " " & to_hstring(registers(to_integer(unsigned(b_addr))));
end if; end if;
if d_in.read3_enable = '1' then if d_in.read3_enable = '1' then
report "Reading GPR " & to_hstring(d_in.read3_reg) & " " & to_hstring(registers(to_integer(unsigned(d_in.read3_reg)))); report "Reading GPR " & to_hstring(c_addr) & " " & to_hstring(registers(to_integer(unsigned(c_addr))));
end if; end if;
d_out.read1_data <= registers(to_integer(unsigned(d_in.read1_reg))); d_out.read1_data <= registers(to_integer(unsigned(a_addr)));
-- B read port is multiplexed with reads from the debug circuitry -- B read port is multiplexed with reads from the debug circuitry
if d_in.read2_enable = '0' and dbg_gpr_req = '1' and dbg_ack = '0' then if d_in.read2_enable = '0' and dbg_gpr_req = '1' and dbg_ack = '0' then
b_addr := dbg_gpr_addr; b_addr := dbg_gpr_addr;
else if not HAS_FPU then
b_addr := d_in.read2_reg; b_addr(6) := '0';
end if;
end if; end if;
rd_port_b <= registers(to_integer(unsigned(b_addr))); rd_port_b <= registers(to_integer(unsigned(b_addr)));
d_out.read2_data <= rd_port_b; d_out.read2_data <= rd_port_b;
d_out.read3_data <= registers(to_integer(unsigned(gpr_to_gspr(d_in.read3_reg)))); d_out.read3_data <= registers(to_integer(unsigned(c_addr)));


-- Forward any written data -- Forward any written data
if w_in.write_enable = '1' then if w_in.write_enable = '1' then
if d_in.read1_reg = w_in.write_reg then if a_addr = w_addr then
d_out.read1_data <= w_in.write_data; d_out.read1_data <= w_in.write_data;
end if; end if;
if d_in.read2_reg = w_in.write_reg then if b_addr = w_addr then
d_out.read2_data <= w_in.write_data; d_out.read2_data <= w_in.write_data;
end if; end if;
if gpr_to_gspr(d_in.read3_reg) = w_in.write_reg then if c_addr = w_addr then
d_out.read3_data <= w_in.write_data; d_out.read3_data <= w_in.write_data;
end if; end if;
end if; end if;
@ -136,7 +157,7 @@ begin
end generate; end generate;


rf_log: if LOG_LENGTH > 0 generate rf_log: if LOG_LENGTH > 0 generate
signal log_data : std_ulogic_vector(70 downto 0); signal log_data : std_ulogic_vector(71 downto 0);
begin begin
reg_log: process(clk) reg_log: process(clk)
begin begin

@ -58,7 +58,7 @@ struct log_entry {
u64 ls_lo_valid: 1; u64 ls_lo_valid: 1;
u64 ls_eo_except: 1; u64 ls_eo_except: 1;
u64 ls_stall_out: 1; u64 ls_stall_out: 1;
u64 pad2: 2; u64 pad2: 1;
u64 dc_state: 3; u64 dc_state: 3;
u64 dc_ra_valid: 1; u64 dc_ra_valid: 1;
u64 dc_tlb_way: 3; u64 dc_tlb_way: 3;
@ -74,7 +74,7 @@ struct log_entry {
u64 cr_wr_mask: 8; u64 cr_wr_mask: 8;
u64 cr_wr_data: 4; u64 cr_wr_data: 4;
u64 cr_wr_enable: 1; u64 cr_wr_enable: 1;
u64 reg_wr_reg: 6; u64 reg_wr_reg: 7;
u64 reg_wr_enable: 1; u64 reg_wr_enable: 1;


u64 reg_wr_data; u64 reg_wr_data;
@ -84,17 +84,17 @@ struct log_entry {
#define FLGA(i, y, z) (log.i? y: z) #define FLGA(i, y, z) (log.i? y: z)
#define PNIA(f) (full_nia[log.f] & 0xff) #define PNIA(f) (full_nia[log.f] & 0xff)


const char *units[4] = { "--", "al", "ls", "?3" }; const char *units[4] = { "--", "al", "ls", "fp" };
const char *ops[64] = const char *ops[64] =
{ {
"illegal", "nop ", "add ", "and ", "attn ", "b ", "bc ", "bcreg ", "illegal", "nop ", "add ", "and ", "attn ", "b ", "bc ", "bcreg ",
"bperm ", "cmp ", "cmpb ", "cmpeqb ", "cmprb ", "cntz ", "crop ", "darn ", "bperm ", "cmp ", "cmpb ", "cmpeqb ", "cmprb ", "cntz ", "crop ", "darn ",
"dcbf ", "dcbst ", "dcbt ", "dcbtst ", "dcbz ", "div ", "dive ", "exts ", "dcbf ", "dcbst ", "dcbt ", "dcbtst ", "dcbz ", "div ", "dive ", "exts ",
"extswsl", "icbi ", "icbt ", "isel ", "isync ", "ld ", "st ", "mcrxrx ", "extswsl", "fpop ", "fpopi ", "icbi ", "icbt ", "isel ", "isync ", "ld ",
"mfcr ", "mfmsr ", "mfspr ", "mod ", "mtcrf ", "mtmsr ", "mtspr ", "mull64 ", "st ", "fpload ", "fpstore", "mcrxrx ", "mfcr ", "mfmsr ", "mfspr ", "mod ",
"mulh64 ", "mulh32 ", "or ", "popcnt ", "prty ", "rfid ", "rlc ", "rlcl ", "mtcrf ", "mtmsr ", "mtspr ", "mull64 ", "mulh64 ", "mulh32 ", "or ", "popcnt ",
"rlcr ", "sc ", "setb ", "shl ", "shr ", "sync ", "tlbie ", "trap ", "prty ", "rfid ", "rlc ", "rlcl ", "rlcr ", "sc ", "setb ", "shl ",
"xor ", "bcd ", "addg6s ", "ffail ", "?60 ", "?61 ", "?62 ", "?63 " "shr ", "sync ", "tlbie ", "trap ", "xor ", "bcd ", "addg6s ", "ffail ",
}; };


const char *spr_names[13] = const char *spr_names[13] =

@ -52,6 +52,7 @@ entity soc is
RAM_INIT_FILE : string; RAM_INIT_FILE : string;
CLK_FREQ : positive; CLK_FREQ : positive;
SIM : boolean; SIM : boolean;
HAS_FPU : boolean := true;
DISABLE_FLATTEN_CORE : boolean := false; DISABLE_FLATTEN_CORE : boolean := false;
HAS_DRAM : boolean := false; HAS_DRAM : boolean := false;
DRAM_SIZE : integer := 0; DRAM_SIZE : integer := 0;
@ -253,6 +254,7 @@ begin
processor: entity work.core processor: entity work.core
generic map( generic map(
SIM => SIM, SIM => SIM,
HAS_FPU => HAS_FPU,
DISABLE_FLATTEN => DISABLE_FLATTEN_CORE, DISABLE_FLATTEN => DISABLE_FLATTEN_CORE,
ALT_RESET_ADDRESS => (23 downto 0 => '0', others => '1'), ALT_RESET_ADDRESS => (23 downto 0 => '0', others => '1'),
LOG_LENGTH => LOG_LENGTH LOG_LENGTH => LOG_LENGTH

@ -0,0 +1,3 @@
TEST=fpu

include ../Makefile.test

File diff suppressed because it is too large Load Diff

@ -0,0 +1,132 @@
/* Copyright 2013-2014 IBM Corp.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/* Load an immediate 64-bit value into a register */
#define LOAD_IMM64(r, e) \
lis r,(e)@highest; \
ori r,r,(e)@higher; \
rldicr r,r, 32, 31; \
oris r,r, (e)@h; \
ori r,r, (e)@l;

.section ".head","ax"

/*
* Microwatt currently enters in LE mode at 0x0, so we don't need to
* do any endian fix ups
*/
. = 0
.global _start
_start:
LOAD_IMM64(%r10,__bss_start)
LOAD_IMM64(%r11,__bss_end)
subf %r11,%r10,%r11
addi %r11,%r11,63
srdi. %r11,%r11,6
beq 2f
mtctr %r11
1: dcbz 0,%r10
addi %r10,%r10,64
bdnz 1b

2: LOAD_IMM64(%r1,__stack_top)
li %r0,0
stdu %r0,-16(%r1)
LOAD_IMM64(%r10, die)
mtsprg0 %r10
LOAD_IMM64(%r12, main)
mtctr %r12
bctrl
die: attn // terminate on exit
b .

.global trapit
trapit:
mflr %r0
std %r0,16(%r1)
stdu %r1,-256(%r1)
mtsprg1 %r1
r = 14
.rept 18
std r,r*8(%r1)
r = r + 1
.endr
mfcr %r0
stw %r0,13*8(%r1)
LOAD_IMM64(%r10, ret)
mtsprg0 %r10
mr %r12,%r4
mtctr %r4
bctrl
ret:
mfsprg1 %r1
LOAD_IMM64(%r10, die)
mtsprg0 %r10
r = 14
.rept 18
ld r,r*8(%r1)
r = r + 1
.endr
lwz %r0,13*8(%r1)
mtcr %r0
ld %r0,256+16(%r1)
addi %r1,%r1,256
mtlr %r0
blr

.global do_rfid
do_rfid:
mtsrr1 %r3
LOAD_IMM64(%r4, do_blr)
mtsrr0 %r4
rfid
blr

.global do_blr
do_blr:
blr

#define EXCEPTION(nr) \
.= nr ;\
mfsprg0 %r0 ;\
mtctr %r0 ;\
li %r3,nr ;\
bctr

EXCEPTION(0x300)
EXCEPTION(0x380)
EXCEPTION(0x400)
EXCEPTION(0x480)
EXCEPTION(0x500)
EXCEPTION(0x600)
EXCEPTION(0x700)
EXCEPTION(0x800)
EXCEPTION(0x900)
EXCEPTION(0x980)
EXCEPTION(0xa00)
EXCEPTION(0xb00)
EXCEPTION(0xc00)
EXCEPTION(0xd00)
EXCEPTION(0xe00)
EXCEPTION(0xe20)
EXCEPTION(0xe40)
EXCEPTION(0xe60)
EXCEPTION(0xe80)
EXCEPTION(0xf00)
EXCEPTION(0xf20)
EXCEPTION(0xf40)
EXCEPTION(0xf60)
EXCEPTION(0xf80)

@ -0,0 +1,27 @@
SECTIONS
{
. = 0;
_start = .;
.head : {
KEEP(*(.head))
}
. = ALIGN(0x1000);
.text : { *(.text) *(.text.*) *(.rodata) *(.rodata.*) }
. = ALIGN(0x1000);
.data : { *(.data) *(.data.*) *(.got) *(.toc) }
. = ALIGN(0x80);
__bss_start = .;
.bss : {
*(.dynsbss)
*(.sbss)
*(.scommon)
*(.dynbss)
*(.bss)
*(.common)
*(.bss.*)
}
. = ALIGN(0x80);
__bss_end = .;
. = . + 0x4000;
__stack_top = .;
}

Binary file not shown.

@ -0,0 +1,23 @@
test 01:PASS
test 02:PASS
test 03:PASS
test 04:PASS
test 05:PASS
test 06:PASS
test 07:PASS
test 08:PASS
test 09:PASS
test 10:PASS
test 11:PASS
test 12:PASS
test 13:PASS
test 14:PASS
test 15:PASS
test 16:PASS
test 17:PASS
test 18:PASS
test 19:PASS
test 20:PASS
test 21:PASS
test 22:PASS
test 23:PASS

@ -3,7 +3,7 @@
# Script to update console related tests from source # Script to update console related tests from source
# #


for i in sc illegal decrementer xics privileged mmu misc modes reservation trace ; do for i in sc illegal decrementer xics privileged mmu misc modes reservation trace fpu ; do
cd $i cd $i
make make
cd - cd -

@ -12,6 +12,7 @@ entity writeback is


e_in : in Execute1ToWritebackType; e_in : in Execute1ToWritebackType;
l_in : in Loadstore1ToWritebackType; l_in : in Loadstore1ToWritebackType;
fp_in : in FPUToWritebackType;


w_out : out WritebackToRegisterFileType; w_out : out WritebackToRegisterFileType;
c_out : out WritebackToCrFileType; c_out : out WritebackToCrFileType;
@ -31,15 +32,21 @@ begin
-- Do consistency checks only on the clock edge -- Do consistency checks only on the clock edge
x(0) := e_in.valid; x(0) := e_in.valid;
y(0) := l_in.valid; y(0) := l_in.valid;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure; w(0) := fp_in.valid;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) +
to_integer(unsigned(w))) <= 1 severity failure;


x(0) := e_in.write_enable or e_in.exc_write_enable; x(0) := e_in.write_enable or e_in.exc_write_enable;
y(0) := l_in.write_enable; y(0) := l_in.write_enable;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure; w(0) := fp_in.write_enable;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) +
to_integer(unsigned(w))) <= 1 severity failure;


w(0) := e_in.write_cr_enable; w(0) := e_in.write_cr_enable;
x(0) := (e_in.write_enable and e_in.rc); x(0) := (e_in.write_enable and e_in.rc);
assert (to_integer(unsigned(w)) + to_integer(unsigned(x))) <= 1 severity failure; y(0) := fp_in.write_cr_enable;
assert (to_integer(unsigned(w)) + to_integer(unsigned(x)) +
to_integer(unsigned(y))) <= 1 severity failure;
end if; end if;
end process; end process;


@ -53,7 +60,7 @@ begin
c_out <= WritebackToCrFileInit; c_out <= WritebackToCrFileInit;


complete_out <= '0'; complete_out <= '0';
if e_in.valid = '1' or l_in.valid = '1' then if e_in.valid = '1' or l_in.valid = '1' or fp_in.valid = '1' then
complete_out <= '1'; complete_out <= '1';
end if; end if;


@ -79,8 +86,20 @@ begin
c_out.write_xerc_data <= e_in.xerc; c_out.write_xerc_data <= e_in.xerc;
end if; end if;


if fp_in.write_enable = '1' then
w_out.write_reg <= fp_in.write_reg;
w_out.write_data <= fp_in.write_data;
w_out.write_enable <= '1';
end if;

if fp_in.write_cr_enable = '1' then
c_out.write_cr_enable <= '1';
c_out.write_cr_mask <= fp_in.write_cr_mask;
c_out.write_cr_data <= fp_in.write_cr_data;
end if;

if l_in.write_enable = '1' then if l_in.write_enable = '1' then
w_out.write_reg <= gpr_to_gspr(l_in.write_reg); w_out.write_reg <= l_in.write_reg;
w_out.write_data <= l_in.write_data; w_out.write_data <= l_in.write_data;
w_out.write_enable <= '1'; w_out.write_enable <= '1';
end if; end if;

Loading…
Cancel
Save