execute1: Reduce width of the result mux to help timing

This reduces the number of different things that are assigned to
the result variable.

- The computations for the popcnt, prty, cmpb and exts instruction
  families are moved into the logical unit.
- The result of mfspr from the slow SPRs is computed in 'spr_val'
  before being assigned to 'result'.
- Writes to LR as a result of a blr or bclr instruction are done
  through the exc_write path to writeback.

This eases timing considerably.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/208/head
Paul Mackerras 4 years ago
parent 6687aae4d6
commit ec2fa61792

@ -82,8 +82,6 @@ architecture behaviour of execute1 is
signal rotator_carry: std_ulogic;
signal logical_result: std_ulogic_vector(63 downto 0);
signal countzero_result: std_ulogic_vector(63 downto 0);
signal popcnt_result: std_ulogic_vector(63 downto 0);
signal parity_result: std_ulogic_vector(63 downto 0);

-- multiply signals
signal x_to_multiply: Execute1ToMultiplyType;
@ -208,9 +206,7 @@ begin
invert_in => e_in.invert_a,
invert_out => e_in.invert_out,
result => logical_result,
datalen => e_in.data_len,
popcnt => popcnt_result,
parity => parity_result
datalen => e_in.data_len
);

countzero_0: entity work.zero_counter
@ -295,7 +291,6 @@ begin
variable sign1, sign2 : std_ulogic;
variable abs1, abs2 : signed(63 downto 0);
variable overflow : std_ulogic;
variable negative : std_ulogic;
variable zerohi, zerolo : std_ulogic;
variable msb_a, msb_b : std_ulogic;
variable a_lt : std_ulogic;
@ -308,6 +303,7 @@ begin
variable is_branch : std_ulogic;
variable taken_branch : std_ulogic;
variable abs_branch : std_ulogic;
variable spr_val : std_ulogic_vector(63 downto 0);
begin
result := (others => '0');
result_with_carry := (others => '0');
@ -627,7 +623,7 @@ begin
end if;
end if;
end if;
when OP_AND | OP_OR | OP_XOR =>
when OP_AND | OP_OR | OP_XOR | OP_POPCNT | OP_PRTY | OP_CMPB | OP_EXTS =>
result := logical_result;
result_en := '1';
when OP_B =>
@ -677,27 +673,10 @@ begin
ctrl_tmp.msr(MSR_DR) <= '1';
end if;

when OP_CMPB =>
result := ppc_cmpb(c_in, b_in);
result_en := '1';
when OP_CNTZ =>
v.e.valid := '0';
v.cntz_in_progress := '1';
v.busy := '1';
when OP_EXTS =>
-- note data_len is a 1-hot encoding
negative := (e_in.data_len(0) and c_in(7)) or
(e_in.data_len(1) and c_in(15)) or
(e_in.data_len(2) and c_in(31));
result := (others => negative);
if e_in.data_len(2) = '1' then
result(31 downto 16) := c_in(31 downto 16);
end if;
if e_in.data_len(2) = '1' or e_in.data_len(1) = '1' then
result(15 downto 8) := c_in(15 downto 8);
end if;
result(7 downto 0) := c_in(7 downto 0);
result_en := '1';
when OP_ISEL =>
crbit := to_integer(unsigned(insn_bc(e_in.insn)));
if e_in.cr(31-crbit) = '1' then
@ -769,24 +748,25 @@ begin
result(63-45) := v.e.xerc.ca32;
end if;
else
spr_val := c_in;
case decode_spr_num(e_in.insn) is
when SPR_TB =>
result := ctrl.tb;
spr_val := ctrl.tb;
when SPR_DEC =>
result := ctrl.dec;
spr_val := ctrl.dec;
when 724 => -- LOG_ADDR SPR
result := log_wr_addr & r.log_addr_spr;
spr_val := log_wr_addr & r.log_addr_spr;
when 725 => -- LOG_DATA SPR
result := log_rd_data;
spr_val := log_rd_data;
v.log_addr_spr := std_ulogic_vector(unsigned(r.log_addr_spr) + 1);
when others =>
-- mfspr from unimplemented SPRs should be a nop in
-- supervisor mode and a program interrupt for user mode
result := c_in;
if ctrl.msr(MSR_PR) = '1' then
illegal := '1';
end if;
end case;
result := spr_val;
end if;
when OP_MFCR =>
if e_in.insn(20) = '0' then
@ -862,12 +842,6 @@ begin
end if;
end case;
end if;
when OP_POPCNT =>
result := popcnt_result;
result_en := '1';
when OP_PRTY =>
result := parity_result;
result_en := '1';
when OP_RLC | OP_RLCL | OP_RLCR | OP_SHL | OP_SHR | OP_EXTSWSLI =>
result := rotator_result;
if e_in.output_carry = '1' then
@ -917,12 +891,14 @@ begin

-- Update LR on the next cycle after a branch link
-- If we're not writing back anything else, we can write back LR
-- this cycle, otherwise we take an extra cycle.
-- this cycle, otherwise we take an extra cycle. We use the
-- exc_write path since next_nia is written through that path
-- in other places.
if e_in.lr = '1' then
if result_en = '0' then
result_en := '1';
result := next_nia;
v.e.write_reg := fast_spr_num(SPR_LR);
v.e.exc_write_enable := '1';
v.e.exc_write_data := next_nia;
v.e.exc_write_reg := fast_spr_num(SPR_LR);
else
v.lr_update := '1';
v.next_lr := next_nia;
@ -939,9 +915,9 @@ begin
end if;

elsif r.lr_update = '1' then
result_en := '1';
result := r.next_lr;
v.e.write_reg := fast_spr_num(SPR_LR);
v.e.exc_write_enable := '1';
v.e.exc_write_data := r.next_lr;
v.e.exc_write_reg := fast_spr_num(SPR_LR);
v.e.valid := '1';
elsif r.cntz_in_progress = '1' then
-- cnt[lt]z always takes two cycles

@ -4,6 +4,7 @@ use ieee.numeric_std.all;

library work;
use work.decode_types.all;
use work.ppc_fx_insns.all;

entity logical is
port (
@ -13,9 +14,7 @@ entity logical is
invert_in : in std_ulogic;
invert_out : in std_ulogic;
result : out std_ulogic_vector(63 downto 0);
datalen : in std_logic_vector(3 downto 0);
popcnt : out std_ulogic_vector(63 downto 0);
parity : out std_ulogic_vector(63 downto 0)
datalen : in std_logic_vector(3 downto 0)
);
end entity logical;

@ -34,30 +33,14 @@ architecture behaviour of logical is
type sixbit2 is array(0 to 1) of sixbit;
signal pc32 : sixbit2;
signal par0, par1 : std_ulogic;
signal popcnt : std_ulogic_vector(63 downto 0);
signal parity : std_ulogic_vector(63 downto 0);

begin
logical_0: process(all)
variable rb_adj, tmp : std_ulogic_vector(63 downto 0);
variable negative : std_ulogic;
begin
rb_adj := rb;
if invert_in = '1' then
rb_adj := not rb;
end if;

case op is
when OP_AND =>
tmp := rs and rb_adj;
when OP_OR =>
tmp := rs or rb_adj;
when others =>
tmp := rs xor rb_adj;
end case;

result <= tmp;
if invert_out = '1' then
result <= not tmp;
end if;

-- population counts
for i in 0 to 31 loop
pc2(i) <= unsigned("0" & rs(i * 2 downto i * 2)) + unsigned("0" & rs(i * 2 + 1 downto i * 2 + 1));
@ -98,5 +81,44 @@ begin
parity(32) <= par1;
end if;

rb_adj := rb;
if invert_in = '1' then
rb_adj := not rb;
end if;

case op is
when OP_AND =>
tmp := rs and rb_adj;
when OP_OR =>
tmp := rs or rb_adj;
when OP_XOR =>
tmp := rs xor rb_adj;
when OP_POPCNT =>
tmp := popcnt;
when OP_PRTY =>
tmp := parity;
when OP_CMPB =>
tmp := ppc_cmpb(rs, rb);
when others =>
-- EXTS
-- note datalen is a 1-hot encoding
negative := (datalen(0) and rs(7)) or
(datalen(1) and rs(15)) or
(datalen(2) and rs(31));
tmp := (others => negative);
if datalen(2) = '1' then
tmp(31 downto 16) := rs(31 downto 16);
end if;
if datalen(2) = '1' or datalen(1) = '1' then
tmp(15 downto 8) := rs(15 downto 8);
end if;
tmp(7 downto 0) := rs(7 downto 0);
end case;

if invert_out = '1' then
tmp := not tmp;
end if;
result <= tmp;

end process;
end behaviour;

Loading…
Cancel
Save