core: Add support for single-precision FP loads and stores

This adds code to loadstore1 to convert between single-precision and
double-precision formats, and implements the lfs* and stfs*
instructions.  The conversion processes are described in Power ISA
v3.1 Book 1 sections 4.6.2 and 4.6.3.

These conversions take one cycle, so lfs* and stfs* are one cycle
slower than lfd* and stfd*.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/245/head
Paul Mackerras 4 years ago
parent bcac4b9b2f
commit 9d285a265c

@ -287,6 +287,7 @@ package common is
virt_mode : std_ulogic; -- do translation through TLB virt_mode : std_ulogic; -- do translation through TLB
priv_mode : std_ulogic; -- privileged mode (MSR[PR] = 0) priv_mode : std_ulogic; -- privileged mode (MSR[PR] = 0)
mode_32bit : std_ulogic; -- trim addresses to 32 bits mode_32bit : std_ulogic; -- trim addresses to 32 bits
is_32bit : std_ulogic;
end record; end record;
constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := (valid => '0', op => OP_ILLEGAL, ci => '0', byte_reverse => '0', constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := (valid => '0', op => OP_ILLEGAL, ci => '0', byte_reverse => '0',
sign_extend => '0', update => '0', xerc => xerc_init, sign_extend => '0', update => '0', xerc => xerc_init,
@ -294,7 +295,7 @@ package common is
nia => (others => '0'), insn => (others => '0'), nia => (others => '0'), insn => (others => '0'),
addr1 => (others => '0'), addr2 => (others => '0'), data => (others => '0'), addr1 => (others => '0'), addr2 => (others => '0'), data => (others => '0'),
write_reg => (others => '0'), length => (others => '0'), write_reg => (others => '0'), length => (others => '0'),
mode_32bit => '0', others => (others => '0')); mode_32bit => '0', is_32bit => '0', others => (others => '0'));


type Loadstore1ToExecute1Type is record type Loadstore1ToExecute1Type is record
busy : std_ulogic; busy : std_ulogic;

@ -3,6 +3,7 @@ use ieee.std_logic_1164.all;
use ieee.numeric_std.all; use ieee.numeric_std.all;


library work; library work;
use work.helpers.all;


entity zero_counter is entity zero_counter is
port ( port (
@ -15,42 +16,6 @@ entity zero_counter is
end entity zero_counter; end entity zero_counter;


architecture behaviour of zero_counter is architecture behaviour of zero_counter is
-- Reverse the order of bits in a word
function bit_reverse(a: std_ulogic_vector) return std_ulogic_vector is
variable ret: std_ulogic_vector(a'left downto a'right);
begin
for i in a'right to a'left loop
ret(a'left + a'right - i) := a(i);
end loop;
return ret;
end;

-- If there is only one bit set in a doubleword, return its bit number
-- (counting from the right). Each bit of the result is obtained by
-- ORing together 32 bits of the input:
-- bit 0 = a[1] or a[3] or a[5] or ...
-- bit 1 = a[2] or a[3] or a[6] or a[7] or ...
-- bit 2 = a[4..7] or a[12..15] or ...
-- bit 5 = a[32..63] ORed together
function bit_number(a: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is
variable ret: std_ulogic_vector(5 downto 0);
variable stride: natural;
variable bit: std_ulogic;
variable k: natural;
begin
stride := 2;
for i in 0 to 5 loop
bit := '0';
for j in 0 to (64 / stride) - 1 loop
k := j * stride;
bit := bit or (or a(k + stride - 1 downto k + (stride / 2)));
end loop;
ret(i) := bit;
stride := stride * 2;
end loop;
return ret;
end;

signal inp : std_ulogic_vector(63 downto 0); signal inp : std_ulogic_vector(63 downto 0);
signal sum : std_ulogic_vector(64 downto 0); signal sum : std_ulogic_vector(64 downto 0);
signal msb_r : std_ulogic; signal msb_r : std_ulogic;

@ -74,8 +74,8 @@ architecture behaviour of decode1 is
35 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- lbzu 35 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- lbzu
50 => (LDST, OP_FPLOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lfd 50 => (LDST, OP_FPLOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lfd
51 => (LDST, OP_FPLOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- lfdu 51 => (LDST, OP_FPLOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- lfdu
-- 48 => (LDST, OP_FPLOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- lfs 48 => (LDST, OP_FPLOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- lfs
-- 49 => (LDST, OP_FPLOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0'), -- lfsu 49 => (LDST, OP_FPLOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0'), -- lfsu
42 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '0'), -- lha 42 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '0'), -- lha
43 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '1', '0', '0', '0', NONE, '0', '0'), -- lhau 43 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '1', '0', '0', '0', NONE, '0', '0'), -- lhau
40 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lhz 40 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lhz
@ -93,8 +93,8 @@ architecture behaviour of decode1 is
39 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- stbu 39 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- stbu
54 => (LDST, OP_FPSTORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stfd 54 => (LDST, OP_FPSTORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stfd
55 => (LDST, OP_FPSTORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- stfdu 55 => (LDST, OP_FPSTORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- stfdu
-- 52 => (LDST, OP_FPSTORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- stfs 52 => (LDST, OP_FPSTORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- stfs
-- 53 => (LDST, OP_FPSTORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0'), -- stfsu 53 => (LDST, OP_FPSTORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0'), -- stfsu
44 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- sth 44 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- sth
45 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- sthu 45 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- sthu
36 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stw 36 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stw
@ -284,8 +284,8 @@ architecture behaviour of decode1 is
2#1001110111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- lfdux 2#1001110111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- lfdux
2#1101010111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '0', '0', '0', '0', NONE, '0', '0'), -- lfiwax 2#1101010111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '0', '0', '0', '0', NONE, '0', '0'), -- lfiwax
2#1101110111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lfiwzx 2#1101110111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lfiwzx
-- 2#1000010111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- lfsx 2#1000010111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- lfsx
-- 2#1000110111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0'), -- lfsux 2#1000110111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0'), -- lfsux
2#0001110100# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '1', '0', '0', NONE, '0', '0'), -- lharx 2#0001110100# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '1', '0', '0', NONE, '0', '0'), -- lharx
2#0101110111# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '1', '0', '0', '0', NONE, '0', '0'), -- lhaux 2#0101110111# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '1', '0', '0', '0', NONE, '0', '0'), -- lhaux
2#0101010111# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '0'), -- lhax 2#0101010111# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '0'), -- lhax
@ -367,8 +367,8 @@ architecture behaviour of decode1 is
2#1011010111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stfdx 2#1011010111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stfdx
2#1011110111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- stfdux 2#1011110111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- stfdux
2#1111010111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stfiwx 2#1111010111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stfiwx
-- 2#1010010111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- stfsx 2#1010010111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- stfsx
-- 2#1010110111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0'), -- stfsux 2#1010110111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0'), -- stfsux
2#1110010110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '1', '0', '0', '0', '0', '0', NONE, '0', '0'), -- sthbrx 2#1110010110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '1', '0', '0', '0', '0', '0', NONE, '0', '0'), -- sthbrx
2#1110110101# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- sthcix 2#1110110101# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- sthcix
2#1011010110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '1', '0', '0', ONE, '0', '0'), -- sthcx 2#1011010110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '1', '0', '0', ONE, '0', '0'), -- sthcx

@ -1259,6 +1259,7 @@ begin
lv.virt_mode := ctrl.msr(MSR_DR); lv.virt_mode := ctrl.msr(MSR_DR);
lv.priv_mode := not ctrl.msr(MSR_PR); lv.priv_mode := not ctrl.msr(MSR_PR);
lv.mode_32bit := not ctrl.msr(MSR_SF); lv.mode_32bit := not ctrl.msr(MSR_SF);
lv.is_32bit := e_in.is_32bit;


-- Update registers -- Update registers
rin <= v; rin <= v;

@ -25,6 +25,10 @@ package helpers is
function byte_reverse(val: std_ulogic_vector(63 downto 0); size: integer) return std_ulogic_vector; function byte_reverse(val: std_ulogic_vector(63 downto 0); size: integer) return std_ulogic_vector;


function sign_extend(val: std_ulogic_vector(63 downto 0); size: natural) return std_ulogic_vector; function sign_extend(val: std_ulogic_vector(63 downto 0); size: natural) return std_ulogic_vector;

function bit_reverse(a: std_ulogic_vector) return std_ulogic_vector;
function bit_number(a: std_ulogic_vector(63 downto 0)) return std_ulogic_vector;
function count_left_zeroes(val: std_ulogic_vector) return std_ulogic_vector;
end package helpers; end package helpers;


package body helpers is package body helpers is
@ -206,4 +210,53 @@ package body helpers is
return std_ulogic_vector(ret); return std_ulogic_vector(ret);


end; end;

-- Reverse the order of bits in a word
function bit_reverse(a: std_ulogic_vector) return std_ulogic_vector is
variable ret: std_ulogic_vector(a'left downto a'right);
begin
for i in a'right to a'left loop
ret(a'left + a'right - i) := a(i);
end loop;
return ret;
end;

-- If there is only one bit set in a doubleword, return its bit number
-- (counting from the right). Each bit of the result is obtained by
-- ORing together 32 bits of the input:
-- bit 0 = a[1] or a[3] or a[5] or ...
-- bit 1 = a[2] or a[3] or a[6] or a[7] or ...
-- bit 2 = a[4..7] or a[12..15] or ...
-- bit 5 = a[32..63] ORed together
function bit_number(a: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is
variable ret: std_ulogic_vector(5 downto 0);
variable stride: natural;
variable bit: std_ulogic;
variable k: natural;
begin
stride := 2;
for i in 0 to 5 loop
bit := '0';
for j in 0 to (64 / stride) - 1 loop
k := j * stride;
bit := bit or (or a(k + stride - 1 downto k + (stride / 2)));
end loop;
ret(i) := bit;
stride := stride * 2;
end loop;
return ret;
end;

-- Count leading zeroes operation
-- Assumes the value passed in is not zero (if it is, zero is returned)
function count_left_zeroes(val: std_ulogic_vector) return std_ulogic_vector is
variable rev: std_ulogic_vector(val'left downto val'right);
variable sum: std_ulogic_vector(val'left downto val'right);
variable onehot: std_ulogic_vector(val'left downto val'right);
begin
rev := bit_reverse(val);
sum := std_ulogic_vector(- signed(rev));
onehot := sum and rev;
return bit_number(std_ulogic_vector(resize(unsigned(onehot), 64)));
end;
end package body helpers; end package body helpers;

@ -45,10 +45,12 @@ architecture behave of loadstore1 is


-- State machine for unaligned loads/stores -- State machine for unaligned loads/stores
type state_t is (IDLE, -- ready for instruction type state_t is (IDLE, -- ready for instruction
FPR_CONV, -- converting double to float for store
SECOND_REQ, -- send 2nd request of unaligned xfer SECOND_REQ, -- send 2nd request of unaligned xfer
ACK_WAIT, -- waiting for ack from dcache ACK_WAIT, -- waiting for ack from dcache
MMU_LOOKUP, -- waiting for MMU to look up translation MMU_LOOKUP, -- waiting for MMU to look up translation
TLBIE_WAIT, -- waiting for MMU to finish doing a tlbie TLBIE_WAIT, -- waiting for MMU to finish doing a tlbie
FINISH_LFS, -- write back converted SP data for lfs*
COMPLETE -- extra cycle to complete an operation COMPLETE -- extra cycle to complete an operation
); );


@ -89,6 +91,11 @@ architecture behave of loadstore1 is
do_update : std_ulogic; do_update : std_ulogic;
extra_cycle : std_ulogic; extra_cycle : std_ulogic;
mode_32bit : std_ulogic; mode_32bit : std_ulogic;
load_sp : std_ulogic;
ld_sp_data : std_ulogic_vector(31 downto 0);
ld_sp_nz : std_ulogic;
ld_sp_lz : std_ulogic_vector(5 downto 0);
st_sp_data : std_ulogic_vector(31 downto 0);
end record; end record;


type byte_sel_t is array(0 to 7) of std_ulogic; type byte_sel_t is array(0 to 7) of std_ulogic;
@ -98,6 +105,9 @@ architecture behave of loadstore1 is
signal r, rin : reg_stage_t; signal r, rin : reg_stage_t;
signal lsu_sum : std_ulogic_vector(63 downto 0); signal lsu_sum : std_ulogic_vector(63 downto 0);


signal store_sp_data : std_ulogic_vector(31 downto 0);
signal load_dp_data : std_ulogic_vector(63 downto 0);

-- Generate byte enables from sizes -- Generate byte enables from sizes
function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is
begin begin
@ -128,6 +138,72 @@ architecture behave of loadstore1 is
to_integer(unsigned(address)))); to_integer(unsigned(address))));
end function xfer_data_sel; end function xfer_data_sel;


-- 23-bit right shifter for DP -> SP float conversions
function shifter_23r(frac: std_ulogic_vector(22 downto 0); shift: unsigned(4 downto 0))
return std_ulogic_vector is
variable fs1 : std_ulogic_vector(22 downto 0);
variable fs2 : std_ulogic_vector(22 downto 0);
begin
case shift(1 downto 0) is
when "00" =>
fs1 := frac;
when "01" =>
fs1 := '0' & frac(22 downto 1);
when "10" =>
fs1 := "00" & frac(22 downto 2);
when others =>
fs1 := "000" & frac(22 downto 3);
end case;
case shift(4 downto 2) is
when "000" =>
fs2 := fs1;
when "001" =>
fs2 := x"0" & fs1(22 downto 4);
when "010" =>
fs2 := x"00" & fs1(22 downto 8);
when "011" =>
fs2 := x"000" & fs1(22 downto 12);
when "100" =>
fs2 := x"0000" & fs1(22 downto 16);
when others =>
fs2 := x"00000" & fs1(22 downto 20);
end case;
return fs2;
end;

-- 23-bit left shifter for SP -> DP float conversions
function shifter_23l(frac: std_ulogic_vector(22 downto 0); shift: unsigned(4 downto 0))
return std_ulogic_vector is
variable fs1 : std_ulogic_vector(22 downto 0);
variable fs2 : std_ulogic_vector(22 downto 0);
begin
case shift(1 downto 0) is
when "00" =>
fs1 := frac;
when "01" =>
fs1 := frac(21 downto 0) & '0';
when "10" =>
fs1 := frac(20 downto 0) & "00";
when others =>
fs1 := frac(19 downto 0) & "000";
end case;
case shift(4 downto 2) is
when "000" =>
fs2 := fs1;
when "001" =>
fs2 := fs1(18 downto 0) & x"0" ;
when "010" =>
fs2 := fs1(14 downto 0) & x"00";
when "011" =>
fs2 := fs1(10 downto 0) & x"000";
when "100" =>
fs2 := fs1(6 downto 0) & x"0000";
when others =>
fs2 := fs1(2 downto 0) & x"00000";
end case;
return fs2;
end;

begin begin
-- Calculate the address in the first cycle -- Calculate the address in the first cycle
lsu_sum <= std_ulogic_vector(unsigned(l_in.addr1) + unsigned(l_in.addr2)) when l_in.valid = '1' else (others => '0'); lsu_sum <= std_ulogic_vector(unsigned(l_in.addr1) + unsigned(l_in.addr2)) when l_in.valid = '1' else (others => '0');
@ -145,6 +221,59 @@ begin
end if; end if;
end process; end process;


ls_fp_conv: if HAS_FPU generate
-- Convert DP data to SP for stfs
dp_to_sp: process(all)
variable exp : unsigned(10 downto 0);
variable frac : std_ulogic_vector(22 downto 0);
variable shift : unsigned(4 downto 0);
begin
store_sp_data(31) <= l_in.data(63);
store_sp_data(30 downto 0) <= (others => '0');
exp := unsigned(l_in.data(62 downto 52));
if exp > 896 then
store_sp_data(30) <= l_in.data(62);
store_sp_data(29 downto 0) <= l_in.data(58 downto 29);
elsif exp >= 874 then
-- denormalization required
frac := '1' & l_in.data(51 downto 30);
shift := 0 - exp(4 downto 0);
store_sp_data(22 downto 0) <= shifter_23r(frac, shift);
end if;
end process;

-- Convert SP data to DP for lfs
sp_to_dp: process(all)
variable exp : unsigned(7 downto 0);
variable exp_dp : unsigned(10 downto 0);
variable exp_nz : std_ulogic;
variable exp_ao : std_ulogic;
variable frac : std_ulogic_vector(22 downto 0);
variable frac_shift : unsigned(4 downto 0);
begin
frac := r.ld_sp_data(22 downto 0);
exp := unsigned(r.ld_sp_data(30 downto 23));
exp_nz := or (r.ld_sp_data(30 downto 23));
exp_ao := and (r.ld_sp_data(30 downto 23));
frac_shift := (others => '0');
if exp_ao = '1' then
exp_dp := to_unsigned(2047, 11); -- infinity or NaN
elsif exp_nz = '1' then
exp_dp := 896 + resize(exp, 11); -- finite normalized value
elsif r.ld_sp_nz = '0' then
exp_dp := to_unsigned(0, 11); -- zero
else
-- denormalized SP operand, need to normalize
exp_dp := 896 - resize(unsigned(r.ld_sp_lz), 11);
frac_shift := unsigned(r.ld_sp_lz(4 downto 0)) + 1;
end if;
load_dp_data(63) <= r.ld_sp_data(31);
load_dp_data(62 downto 52) <= std_ulogic_vector(exp_dp);
load_dp_data(51 downto 29) <= shifter_23l(frac, frac_shift);
load_dp_data(28 downto 0) <= (others => '0');
end process;
end generate;

loadstore1_1: process(all) loadstore1_1: process(all)
variable v : reg_stage_t; variable v : reg_stage_t;
variable brev_lenm1 : unsigned(2 downto 0); variable brev_lenm1 : unsigned(2 downto 0);
@ -165,6 +294,9 @@ begin
variable data_permuted : std_ulogic_vector(63 downto 0); variable data_permuted : std_ulogic_vector(63 downto 0);
variable data_trimmed : std_ulogic_vector(63 downto 0); variable data_trimmed : std_ulogic_vector(63 downto 0);
variable store_data : std_ulogic_vector(63 downto 0); variable store_data : std_ulogic_vector(63 downto 0);
variable data_in : std_ulogic_vector(63 downto 0);
variable byte_rev : std_ulogic;
variable length : std_ulogic_vector(3 downto 0);
variable use_second : byte_sel_t; variable use_second : byte_sel_t;
variable trim_ctl : trim_ctl_t; variable trim_ctl : trim_ctl_t;
variable negative : std_ulogic; variable negative : std_ulogic;
@ -176,6 +308,8 @@ begin
variable mmu_mtspr : std_ulogic; variable mmu_mtspr : std_ulogic;
variable itlb_fault : std_ulogic; variable itlb_fault : std_ulogic;
variable misaligned : std_ulogic; variable misaligned : std_ulogic;
variable fp_reg_conv : std_ulogic;
variable lfs_done : std_ulogic;
begin begin
v := r; v := r;
req := '0'; req := '0';
@ -185,8 +319,10 @@ begin
sprn := std_ulogic_vector(to_unsigned(decode_spr_num(l_in.insn), 10)); sprn := std_ulogic_vector(to_unsigned(decode_spr_num(l_in.insn), 10));
dsisr := (others => '0'); dsisr := (others => '0');
mmureq := '0'; mmureq := '0';
fp_reg_conv := '0';


write_enable := '0'; write_enable := '0';
lfs_done := '0';


do_update := r.do_update; do_update := r.do_update;
v.do_update := '0'; v.do_update := '0';
@ -245,19 +381,38 @@ begin
end case; end case;
end loop; end loop;


-- Byte reversing and rotating for stores if HAS_FPU then
-- Done in the first cycle (when l_in.valid = 1) -- Single-precision FP conversion
v.st_sp_data := store_sp_data;
v.ld_sp_data := data_trimmed(31 downto 0);
v.ld_sp_nz := or (data_trimmed(22 downto 0));
v.ld_sp_lz := count_left_zeroes(data_trimmed(22 downto 0));
end if;

-- Byte reversing and rotating for stores.
-- Done in the first cycle (when l_in.valid = 1) for integer stores
-- and DP float stores, and in the second cycle for SP float stores.
store_data := r.store_data; store_data := r.store_data;
if l_in.valid = '1' then if l_in.valid = '1' or (HAS_FPU and r.state = FPR_CONV) then
byte_offset := unsigned(lsu_sum(2 downto 0)); if HAS_FPU and r.state = FPR_CONV then
data_in := x"00000000" & r.st_sp_data;
byte_offset := unsigned(r.addr(2 downto 0));
byte_rev := r.byte_reverse;
length := r.length;
else
data_in := l_in.data;
byte_offset := unsigned(lsu_sum(2 downto 0));
byte_rev := l_in.byte_reverse;
length := l_in.length;
end if;
brev_lenm1 := "000"; brev_lenm1 := "000";
if l_in.byte_reverse = '1' then if byte_rev = '1' then
brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1; brev_lenm1 := unsigned(length(2 downto 0)) - 1;
end if; end if;
for i in 0 to 7 loop for i in 0 to 7 loop
k := (to_unsigned(i, 3) - byte_offset) xor brev_lenm1; k := (to_unsigned(i, 3) - byte_offset) xor brev_lenm1;
j := to_integer(k) * 8; j := to_integer(k) * 8;
store_data(i * 8 + 7 downto i * 8) := l_in.data(j + 7 downto j); store_data(i * 8 + 7 downto i * 8) := data_in(j + 7 downto j);
end loop; end loop;
end if; end if;
v.store_data := store_data; v.store_data := store_data;
@ -292,6 +447,14 @@ begin
case r.state is case r.state is
when IDLE => when IDLE =>


when FPR_CONV =>
req := '1';
if r.second_bytes /= "00000000" then
v.state := SECOND_REQ;
else
v.state := ACK_WAIT;
end if;

when SECOND_REQ => when SECOND_REQ =>
req := '1'; req := '1';
v.state := ACK_WAIT; v.state := ACK_WAIT;
@ -323,8 +486,13 @@ begin
v.load_data := data_permuted; v.load_data := data_permuted;
end if; end if;
else else
write_enable := r.load; write_enable := r.load and not r.load_sp;
if r.extra_cycle = '1' then if HAS_FPU and r.load_sp = '1' then
-- SP to DP conversion takes a cycle
-- Write back rA update in this cycle if needed
do_update := r.update;
v.state := FINISH_LFS;
elsif r.extra_cycle = '1' then
-- loads with rA update need an extra cycle -- loads with rA update need an extra cycle
v.state := COMPLETE; v.state := COMPLETE;
v.do_update := r.update; v.do_update := r.update;
@ -362,6 +530,9 @@ begin


when TLBIE_WAIT => when TLBIE_WAIT =>


when FINISH_LFS =>
lfs_done := '1';

when COMPLETE => when COMPLETE =>
exception := r.align_intr; exception := r.align_intr;


@ -395,6 +566,7 @@ begin
v.nc := l_in.ci; v.nc := l_in.ci;
v.virt_mode := l_in.virt_mode; v.virt_mode := l_in.virt_mode;
v.priv_mode := l_in.priv_mode; v.priv_mode := l_in.priv_mode;
v.load_sp := '0';
v.wait_dcache := '0'; v.wait_dcache := '0';
v.wait_mmu := '0'; v.wait_mmu := '0';
v.do_update := '0'; v.do_update := '0';
@ -436,14 +608,24 @@ begin
v.dcbz := '1'; v.dcbz := '1';
when OP_FPSTORE => when OP_FPSTORE =>
if HAS_FPU then if HAS_FPU then
req := '1'; if l_in.is_32bit = '1' then
v.state := FPR_CONV;
fp_reg_conv := '1';
else
req := '1';
end if;
end if; end if;
when OP_FPLOAD => when OP_FPLOAD =>
if HAS_FPU then if HAS_FPU then
v.load := '1'; v.load := '1';
req := '1'; req := '1';
-- Allow an extra cycle for RA update -- Allow an extra cycle for SP->DP precision conversion
-- or RA update
v.extra_cycle := l_in.update; v.extra_cycle := l_in.update;
if l_in.is_32bit = '1' then
v.load_sp := '1';
v.extra_cycle := '1';
end if;
end if; end if;
when OP_TLBIE => when OP_TLBIE =>
mmureq := '1'; mmureq := '1';
@ -500,7 +682,7 @@ begin
end if; end if;
end if; end if;


v.busy := req or mmureq or mmu_mtspr; v.busy := req or mmureq or mmu_mtspr or fp_reg_conv;
end if; end if;


-- Update outputs to dcache -- Update outputs to dcache
@ -539,6 +721,10 @@ begin
l_out.write_enable <= '1'; l_out.write_enable <= '1';
l_out.write_reg <= gpr_to_gspr(r.update_reg); l_out.write_reg <= gpr_to_gspr(r.update_reg);
l_out.write_data <= r.addr; l_out.write_data <= r.addr;
elsif lfs_done = '1' then
l_out.write_enable <= '1';
l_out.write_reg <= r.write_reg;
l_out.write_data <= load_dp_data;
else else
l_out.write_enable <= write_enable; l_out.write_enable <= write_enable;
l_out.write_reg <= r.write_reg; l_out.write_reg <= r.write_reg;

Loading…
Cancel
Save