From d956846667ef558e51705c0d22152aa912629454 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 12 Dec 2019 15:25:45 +1100 Subject: [PATCH] execute1: Move EXTS* instruction back into execute1 This moves the sign extension done by the extsb, extsh and extsw instructions back into execute1. This means that we no longer need any data formatting in writeback for results coming from execute1, so this modifies writeback so the data formatter inputs come directly from the loadstore unit output. The condition code updates for RC=1 form instructions are now done on the value from execute1 rather than the output of the data formatter, which should help timing. Signed-off-by: Paul Mackerras --- common.vhdl | 4 +--- execute1.vhdl | 24 ++++++++++++++---------- writeback.vhdl | 35 ++++++++++++++++------------------- 3 files changed, 31 insertions(+), 32 deletions(-) diff --git a/common.vhdl b/common.vhdl index 639f0f7..8612389 100644 --- a/common.vhdl +++ b/common.vhdl @@ -240,16 +240,14 @@ package common is write_enable : std_ulogic; write_reg: gspr_index_t; write_data: std_ulogic_vector(63 downto 0); - write_len : std_ulogic_vector(3 downto 0); write_cr_enable : std_ulogic; write_cr_mask : std_ulogic_vector(7 downto 0); write_cr_data : std_ulogic_vector(31 downto 0); write_xerc_enable : std_ulogic; xerc : xer_common_t; - sign_extend: std_ulogic; end record; constant Execute1ToWritebackInit : Execute1ToWritebackType := (valid => '0', rc => '0', write_enable => '0', - write_cr_enable => '0', sign_extend => '0', + write_cr_enable => '0', write_xerc_enable => '0', xerc => xerc_init, others => (others => '0')); diff --git a/execute1.vhdl b/execute1.vhdl index 94845d8..1991009 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -192,6 +192,7 @@ begin variable sign1, sign2 : std_ulogic; variable abs1, abs2 : signed(63 downto 0); variable overflow : std_ulogic; + variable negative : std_ulogic; begin result := (others => '0'); result_with_carry := (others => '0'); @@ -335,8 +336,6 @@ begin v.e.valid := '1'; v.e.write_reg := e_in.write_reg; - v.e.write_len := x"8"; - v.e.sign_extend := '0'; v.slow_op_dest := gspr_to_gpr(e_in.write_reg); v.slow_op_rc := e_in.rc; v.slow_op_oe := e_in.oe; @@ -438,10 +437,19 @@ begin when OP_CNTZ => result := countzero_result; result_en := '1'; - when OP_EXTS => - v.e.write_len := e_in.data_len; - v.e.sign_extend := '1'; - result := e_in.read_data3; + when OP_EXTS => + -- note data_len is a 1-hot encoding + negative := (e_in.data_len(0) and e_in.read_data3(7)) or + (e_in.data_len(1) and e_in.read_data3(15)) or + (e_in.data_len(2) and e_in.read_data3(31)); + result := (others => negative); + if e_in.data_len(2) = '1' then + result(31 downto 16) := e_in.read_data3(31 downto 16); + end if; + if e_in.data_len(2) = '1' or e_in.data_len(1) = '1' then + result(15 downto 8) := e_in.read_data3(15 downto 8); + end if; + result(7 downto 0) := e_in.read_data3(7 downto 0); result_en := '1'; when OP_ISEL => crbit := to_integer(unsigned(insn_bc(e_in.insn))); @@ -660,8 +668,6 @@ begin result_en := '1'; result := r.next_lr; v.e.write_reg := fast_spr_num(SPR_LR); - v.e.write_len := x"8"; - v.e.sign_extend := '0'; v.e.valid := '1'; elsif r.mul_in_progress = '1' or r.div_in_progress = '1' then if (r.mul_in_progress = '1' and multiply_to_x.valid = '1') or @@ -687,8 +693,6 @@ begin v.e.xerc.so := v.slow_op_xerc.so or overflow; end if; v.e.valid := '1'; - v.e.write_len := x"8"; - v.e.sign_extend := '0'; else stall_out <= '1'; v.mul_in_progress := r.mul_in_progress; diff --git a/writeback.vhdl b/writeback.vhdl index 08efe91..e53f46b 100644 --- a/writeback.vhdl +++ b/writeback.vhdl @@ -42,7 +42,6 @@ architecture behaviour of writeback is signal sign_extend : std_ulogic; signal negative : std_ulogic; signal second_word : std_ulogic; - signal zero : std_ulogic; begin writeback_0: process(clk) begin @@ -62,6 +61,8 @@ begin variable k : unsigned(3 downto 0); variable cf: std_ulogic_vector(3 downto 0); variable xe: xer_common_t; + variable zero : std_ulogic; + variable sign : std_ulogic; begin x := "" & e_in.valid; y := "" & l_in.valid; @@ -85,10 +86,7 @@ begin rc <= '0'; brev_lenm1 <= "000"; - byte_offset <= "000"; - data_len <= x"8"; partial_write <= '0'; - sign_extend <= '0'; second_word <= '0'; xe := e_in.xerc; data_in <= (others => '0'); @@ -96,9 +94,6 @@ begin if e_in.write_enable = '1' then w_out.write_reg <= e_in.write_reg; w_out.write_enable <= '1'; - data_in <= e_in.write_data; - data_len <= unsigned(e_in.write_len); - sign_extend <= e_in.sign_extend; rc <= e_in.rc; end if; @@ -113,12 +108,11 @@ begin c_out.write_xerc_data <= e_in.xerc; end if; + sign_extend <= l_in.sign_extend; + data_len <= unsigned(l_in.write_len); + byte_offset <= unsigned(l_in.write_shift); if l_in.write_enable = '1' then w_out.write_reg <= gpr_to_gspr(l_in.write_reg); - data_in <= l_in.write_data; - data_len <= unsigned(l_in.write_len); - byte_offset <= unsigned(l_in.write_shift); - sign_extend <= l_in.sign_extend; if l_in.byte_reverse = '1' then brev_lenm1 <= unsigned(l_in.write_len(2 downto 0)) - 1; end if; @@ -138,7 +132,7 @@ begin end loop; for i in 0 to 7 loop j := to_integer(perm(i)) * 8; - data_permuted(i * 8 + 7 downto i * 8) <= data_in(j + 7 downto j); + data_permuted(i * 8 + 7 downto i * 8) <= l_in.write_data(j + 7 downto j); end loop; -- If the data can arrive split over two cycles, this will be correct @@ -160,16 +154,12 @@ begin trim_ctl(i) <= '0' & (negative and sign_extend); end if; end loop; - zero <= not negative; for i in 0 to 7 loop case trim_ctl(i) is when "11" => data_trimmed(i * 8 + 7 downto i * 8) <= data_latched(i * 8 + 7 downto i * 8); when "10" => data_trimmed(i * 8 + 7 downto i * 8) <= data_permuted(i * 8 + 7 downto i * 8); - if or data_permuted(i * 8 + 7 downto i * 8) /= '0' then - zero <= '0'; - end if; when "01" => data_trimmed(i * 8 + 7 downto i * 8) <= x"FF"; when others => @@ -178,14 +168,21 @@ begin end loop; -- deliver to regfile - w_out.write_data <= data_trimmed; + if l_in.write_enable = '1' then + w_out.write_data <= data_trimmed; + else + w_out.write_data <= e_in.write_data; + end if; -- Perform CR0 update for RC forms + -- Note that loads never have a form with an RC bit, therefore this can test e_in.write_data if rc = '1' then + sign := e_in.write_data(63); + zero := not (or e_in.write_data); c_out.write_cr_enable <= '1'; c_out.write_cr_mask <= num_to_fxm(0); - cf(3) := negative; - cf(2) := not negative and not zero; + cf(3) := sign; + cf(2) := not sign and not zero; cf(1) := zero; cf(0) := xe.so; c_out.write_cr_data(31 downto 28) <= cf;