execute1: Move EXTS* instruction back into execute1

This moves the sign extension done by the extsb, extsh and extsw
instructions back into execute1.  This means that we no longer need
any data formatting in writeback for results coming from execute1,
so this modifies writeback so the data formatter inputs come
directly from the loadstore unit output.  The condition code
updates for RC=1 form instructions are now done on the value from
execute1 rather than the output of the data formatter, which should
help timing.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/134/head
Paul Mackerras 4 years ago
parent c9a2076dd3
commit d956846667

@ -240,16 +240,14 @@ package common is
write_enable : std_ulogic;
write_reg: gspr_index_t;
write_data: std_ulogic_vector(63 downto 0);
write_len : std_ulogic_vector(3 downto 0);
write_cr_enable : std_ulogic;
write_cr_mask : std_ulogic_vector(7 downto 0);
write_cr_data : std_ulogic_vector(31 downto 0);
write_xerc_enable : std_ulogic;
xerc : xer_common_t;
sign_extend: std_ulogic;
end record;
constant Execute1ToWritebackInit : Execute1ToWritebackType := (valid => '0', rc => '0', write_enable => '0',
write_cr_enable => '0', sign_extend => '0',
write_cr_enable => '0',
write_xerc_enable => '0', xerc => xerc_init,
others => (others => '0'));


@ -192,6 +192,7 @@ begin
variable sign1, sign2 : std_ulogic;
variable abs1, abs2 : signed(63 downto 0);
variable overflow : std_ulogic;
variable negative : std_ulogic;
begin
result := (others => '0');
result_with_carry := (others => '0');
@ -335,8 +336,6 @@ begin

v.e.valid := '1';
v.e.write_reg := e_in.write_reg;
v.e.write_len := x"8";
v.e.sign_extend := '0';
v.slow_op_dest := gspr_to_gpr(e_in.write_reg);
v.slow_op_rc := e_in.rc;
v.slow_op_oe := e_in.oe;
@ -438,10 +437,19 @@ begin
when OP_CNTZ =>
result := countzero_result;
result_en := '1';
when OP_EXTS =>
v.e.write_len := e_in.data_len;
v.e.sign_extend := '1';
result := e_in.read_data3;
when OP_EXTS =>
-- note data_len is a 1-hot encoding
negative := (e_in.data_len(0) and e_in.read_data3(7)) or
(e_in.data_len(1) and e_in.read_data3(15)) or
(e_in.data_len(2) and e_in.read_data3(31));
result := (others => negative);
if e_in.data_len(2) = '1' then
result(31 downto 16) := e_in.read_data3(31 downto 16);
end if;
if e_in.data_len(2) = '1' or e_in.data_len(1) = '1' then
result(15 downto 8) := e_in.read_data3(15 downto 8);
end if;
result(7 downto 0) := e_in.read_data3(7 downto 0);
result_en := '1';
when OP_ISEL =>
crbit := to_integer(unsigned(insn_bc(e_in.insn)));
@ -660,8 +668,6 @@ begin
result_en := '1';
result := r.next_lr;
v.e.write_reg := fast_spr_num(SPR_LR);
v.e.write_len := x"8";
v.e.sign_extend := '0';
v.e.valid := '1';
elsif r.mul_in_progress = '1' or r.div_in_progress = '1' then
if (r.mul_in_progress = '1' and multiply_to_x.valid = '1') or
@ -687,8 +693,6 @@ begin
v.e.xerc.so := v.slow_op_xerc.so or overflow;
end if;
v.e.valid := '1';
v.e.write_len := x"8";
v.e.sign_extend := '0';
else
stall_out <= '1';
v.mul_in_progress := r.mul_in_progress;

@ -42,7 +42,6 @@ architecture behaviour of writeback is
signal sign_extend : std_ulogic;
signal negative : std_ulogic;
signal second_word : std_ulogic;
signal zero : std_ulogic;
begin
writeback_0: process(clk)
begin
@ -62,6 +61,8 @@ begin
variable k : unsigned(3 downto 0);
variable cf: std_ulogic_vector(3 downto 0);
variable xe: xer_common_t;
variable zero : std_ulogic;
variable sign : std_ulogic;
begin
x := "" & e_in.valid;
y := "" & l_in.valid;
@ -85,10 +86,7 @@ begin

rc <= '0';
brev_lenm1 <= "000";
byte_offset <= "000";
data_len <= x"8";
partial_write <= '0';
sign_extend <= '0';
second_word <= '0';
xe := e_in.xerc;
data_in <= (others => '0');
@ -96,9 +94,6 @@ begin
if e_in.write_enable = '1' then
w_out.write_reg <= e_in.write_reg;
w_out.write_enable <= '1';
data_in <= e_in.write_data;
data_len <= unsigned(e_in.write_len);
sign_extend <= e_in.sign_extend;
rc <= e_in.rc;
end if;

@ -113,12 +108,11 @@ begin
c_out.write_xerc_data <= e_in.xerc;
end if;

sign_extend <= l_in.sign_extend;
data_len <= unsigned(l_in.write_len);
byte_offset <= unsigned(l_in.write_shift);
if l_in.write_enable = '1' then
w_out.write_reg <= gpr_to_gspr(l_in.write_reg);
data_in <= l_in.write_data;
data_len <= unsigned(l_in.write_len);
byte_offset <= unsigned(l_in.write_shift);
sign_extend <= l_in.sign_extend;
if l_in.byte_reverse = '1' then
brev_lenm1 <= unsigned(l_in.write_len(2 downto 0)) - 1;
end if;
@ -138,7 +132,7 @@ begin
end loop;
for i in 0 to 7 loop
j := to_integer(perm(i)) * 8;
data_permuted(i * 8 + 7 downto i * 8) <= data_in(j + 7 downto j);
data_permuted(i * 8 + 7 downto i * 8) <= l_in.write_data(j + 7 downto j);
end loop;

-- If the data can arrive split over two cycles, this will be correct
@ -160,16 +154,12 @@ begin
trim_ctl(i) <= '0' & (negative and sign_extend);
end if;
end loop;
zero <= not negative;
for i in 0 to 7 loop
case trim_ctl(i) is
when "11" =>
data_trimmed(i * 8 + 7 downto i * 8) <= data_latched(i * 8 + 7 downto i * 8);
when "10" =>
data_trimmed(i * 8 + 7 downto i * 8) <= data_permuted(i * 8 + 7 downto i * 8);
if or data_permuted(i * 8 + 7 downto i * 8) /= '0' then
zero <= '0';
end if;
when "01" =>
data_trimmed(i * 8 + 7 downto i * 8) <= x"FF";
when others =>
@ -178,14 +168,21 @@ begin
end loop;

-- deliver to regfile
w_out.write_data <= data_trimmed;
if l_in.write_enable = '1' then
w_out.write_data <= data_trimmed;
else
w_out.write_data <= e_in.write_data;
end if;

-- Perform CR0 update for RC forms
-- Note that loads never have a form with an RC bit, therefore this can test e_in.write_data
if rc = '1' then
sign := e_in.write_data(63);
zero := not (or e_in.write_data);
c_out.write_cr_enable <= '1';
c_out.write_cr_mask <= num_to_fxm(0);
cf(3) := negative;
cf(2) := not negative and not zero;
cf(3) := sign;
cf(2) := not sign and not zero;
cf(1) := zero;
cf(0) := xe.so;
c_out.write_cr_data(31 downto 28) <= cf;

Loading…
Cancel
Save