Add a rotate/mask/shift unit and use it in execute1

This adds a new entity 'rotator' which contains combinatorial logic
for rotating and masking 64-bit values.  It implements the operations
of the rlwinm, rlwnm, rlwimi, rldicl, rldicr, rldic, rldimi, rldcl,
rldcr, sld, slw, srd, srw, srad, sradi, sraw and srawi instructions.
It consists of a 3-stage 64-bit rotator using 4:1 multiplexors at
each stage, two mask generators, output logic and control logic.

The insn_type_t values used for these instructions have been reduced
to just 5: OP_RLC, OP_RLCL and OP_RLCR for the rotate and mask
instructions (clear both left and right, clear left, clear right
variants), OP_SHL for left shifts, and OP_SHR for right shifts.
The control signals for the rotator are derived from the opcode
and from the is_32bit and is_signed fields of the decode_rom_t.

The rotator is instantiated as an entity in execute1 so that we can
be sure we only have one of it.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
jtag-port
Paul Mackerras 5 years ago
parent 90b6e27380
commit f7c393ba7e

@ -2,7 +2,8 @@ GHDL=ghdl
GHDLFLAGS=--std=08 -Psim-unisim
CFLAGS=-O2 -Wall

all = core_tb simple_ram_behavioural_tb soc_reset_tb icache_tb multiply_tb dmi_dtm_tb divider_tb
all = core_tb simple_ram_behavioural_tb soc_reset_tb icache_tb multiply_tb dmi_dtm_tb divider_tb \
rotator_tb

# XXX
# loadstore_tb fetch_tb
@ -22,7 +23,7 @@ crhelpers.o: common.o
decode1.o: common.o decode_types.o
decode2.o: decode_types.o common.o helpers.o insn_helpers.o
decode_types.o:
execute1.o: decode_types.o common.o helpers.o crhelpers.o ppc_fx_insns.o insn_helpers.o
execute1.o: decode_types.o common.o helpers.o crhelpers.o ppc_fx_insns.o insn_helpers.o rotator.o
execute2.o: common.o crhelpers.o ppc_fx_insns.o
fetch1.o: common.o
fetch2.o: common.o wishbone_types.o
@ -40,6 +41,8 @@ divider_tb.o: decode_types.o common.o glibc_random.o ppc_fx_insns.o divider.o
divider.o: common.o decode_types.o crhelpers.o
ppc_fx_insns.o: helpers.o
register_file.o: common.o
rotator.o: common.o
rotator_tb.o: common.o glibc_random.o ppc_fx_insns.o insn_helpers.o rotator.o
sim_console.o:
simple_ram_behavioural_helpers.o:
simple_ram_behavioural_tb.o: wishbone_types.o simple_ram_behavioural.o
@ -81,6 +84,9 @@ multiply_tb: multiply_tb.o
divider_tb: divider_tb.o
$(GHDL) -e $(GHDLFLAGS) $@

rotator_tb: rotator_tb.o
$(GHDL) -e $(GHDLFLAGS) $@

simple_ram_tb: simple_ram_tb.o
$(GHDL) -e $(GHDLFLAGS) $@


@ -26,7 +26,7 @@ architecture behaviour of decode1 is
type major_rom_array_t is array(0 to 63) of decode_rom_t;
type minor_valid_array_t is array(0 to 1023) of std_ulogic;
type op_19_subop_array_t is array(0 to 7) of decode_rom_t;
type op_30_subop_array_t is array(0 to 7) of decode_rom_t;
type op_30_subop_array_t is array(0 to 15) of decode_rom_t;
type op_31_subop_array_t is array(0 to 1023) of decode_rom_t;
type minor_rom_array_2_t is array(0 to 3) of decode_rom_t;

@ -57,9 +57,9 @@ architecture behaviour of decode1 is
7 => (MUL, OP_MUL_L64, RA, CONST_SI, NONE, RT, '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '1'), -- mulli
24 => (ALU, OP_OR, NONE, CONST_UI, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- ori
25 => (ALU, OP_OR, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- oris
20 => (ALU, OP_RLWIMI, RA, NONE, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- rlwimi
21 => (ALU, OP_RLWINM, NONE, NONE, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- rlwinm
23 => (ALU, OP_RLWNM, NONE, RB, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- rlwnm
20 => (ALU, OP_RLC, RA, CONST_SH32, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '1'), -- rlwimi
21 => (ALU, OP_RLC, NONE, CONST_SH32, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '1'), -- rlwinm
23 => (ALU, OP_RLC, NONE, RB, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '1'), -- rlwnm
38 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- stb
39 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', RC, '0', '1'), -- stbu
44 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- sth
@ -115,12 +115,16 @@ architecture behaviour of decode1 is
constant decode_op_30_array : op_30_subop_array_t := (
-- unit internal in1 in2 in3 out CR CR inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl
-- op in out A in out len ext pipe
2#010# => (ALU, OP_RLDIC, NONE, NONE, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'),
2#000# => (ALU, OP_RLDICL, NONE, NONE, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'),
2#001# => (ALU, OP_RLDICR, NONE, NONE, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'),
2#011# => (ALU, OP_RLDIMI, RA, NONE, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'),
-- rldcl, rldcr
2#100# => (ALU, OP_RLDCX, NONE, RB, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'),
2#0100# => (ALU, OP_RLC, NONE, CONST_SH, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- rldic
2#0101# => (ALU, OP_RLC, NONE, CONST_SH, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- rldic
2#0000# => (ALU, OP_RLCL, NONE, CONST_SH, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- rldicl
2#0001# => (ALU, OP_RLCL, NONE, CONST_SH, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- rldicl
2#0010# => (ALU, OP_RLCR, NONE, CONST_SH, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- rldicr
2#0011# => (ALU, OP_RLCR, NONE, CONST_SH, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- rldicr
2#0110# => (ALU, OP_RLC, RA, CONST_SH, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- rldimi
2#0111# => (ALU, OP_RLC, RA, CONST_SH, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- rldimi
2#1000# => (ALU, OP_RLCL, NONE, RB, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- rldcl
2#1001# => (ALU, OP_RLCR, NONE, RB, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- rldcr
others => illegal_inst
);

@ -249,15 +253,15 @@ architecture behaviour of decode1 is
2#0010111010# => (ALU, OP_PRTYD, NONE, NONE, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- prtyd
2#0010011010# => (ALU, OP_PRTYW, NONE, NONE, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- prtyw
-- 2#0010000000# setb
2#0000011011# => (ALU, OP_SLD, NONE, RB, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- sld
2#0000011000# => (ALU, OP_SLW, NONE, RB, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- slw
2#1100011010# => (ALU, OP_SRAD, NONE, RB, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- srad
2#1100111010# => (ALU, OP_SRADI, NONE, NONE, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- sradi
2#1100111011# => (ALU, OP_SRADI, NONE, NONE, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- sradi
2#1100011000# => (ALU, OP_SRAW, NONE, RB, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- sraw
2#1100111000# => (ALU, OP_SRAWI, NONE, NONE, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- srawi
2#1000011011# => (ALU, OP_SRD, NONE, RB, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- srd
2#1000011000# => (ALU, OP_SRW, NONE, RB, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- srw
2#0000011011# => (ALU, OP_SHL, NONE, RB, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- sld
2#0000011000# => (ALU, OP_SHL, NONE, RB, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '1'), -- slw
2#1100011010# => (ALU, OP_SHR, NONE, RB, RS, RA, '0', '0', '0', ZERO, '1', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '1'), -- srad
2#1100111010# => (ALU, OP_SHR, NONE, CONST_SH, RS, RA, '0', '0', '0', ZERO, '1', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '1'), -- sradi
2#1100111011# => (ALU, OP_SHR, NONE, CONST_SH, RS, RA, '0', '0', '0', ZERO, '1', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '1'), -- sradi
2#1100011000# => (ALU, OP_SHR, NONE, RB, RS, RA, '0', '0', '0', ZERO, '1', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '1'), -- sraw
2#1100111000# => (ALU, OP_SHR, NONE, CONST_SH32, RS, RA, '0', '0', '0', ZERO, '1', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '1'), -- srawi
2#1000011011# => (ALU, OP_SHR, NONE, RB, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- srd
2#1000011000# => (ALU, OP_SHR, NONE, RB, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '1'), -- srw
2#1010110110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '1', '0', '0', RC, '0', '1'), -- stbcx
2#0011110111# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', RC, '0', '1'), -- stbux
2#0011010111# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- stbx
@ -323,7 +327,6 @@ begin
variable v : Decode1ToDecode2Type;
variable majorop : major_opcode_t;
variable op_19_bits: std_ulogic_vector(2 downto 0);
variable op_30_bits: std_ulogic_vector(2 downto 0);
begin
v := r;

@ -352,7 +355,7 @@ begin
end if;

elsif majorop = "011110" then
v.decode := decode_op_30_array(to_integer(unsigned(f_in.insn(4 downto 2))));
v.decode := decode_op_30_array(to_integer(unsigned(f_in.insn(4 downto 1))));

elsif majorop = "111010" then
v.decode := decode_op_58_array(to_integer(unsigned(f_in.insn(1 downto 0))));

@ -90,7 +90,11 @@ architecture behaviour of decode2 is
when CONST_DS =>
return ('0', (others => '0'), std_ulogic_vector(resize(signed(insn_ds(insn_in)) & "00", 64)));
when CONST_M1 =>
return ('0', (others => '0'), x"FFFFFFFFFFFFFFFF");
return ('0', (others => '0'), x"FFFFFFFFFFFFFFFF");
when CONST_SH =>
return ('0', (others => '0'), x"00000000000000" & "00" & insn_in(1) & insn_in(15 downto 11));
when CONST_SH32 =>
return ('0', (others => '0'), x"00000000000000" & "000" & insn_in(15 downto 11));
when NONE =>
return ('0', (others => '0'), (others => '0'));
end case;

@ -15,14 +15,13 @@ package decode_types is
OP_MTCRF, OP_MTSPR, OP_MUL_L64,
OP_MUL_H64, OP_MUL_H32, OP_NAND, OP_NEG, OP_NOR, OP_OR,
OP_ORC, OP_POPCNTB, OP_POPCNTD, OP_POPCNTW, OP_PRTYD,
OP_PRTYW, OP_RLDCX, OP_RLDIC, OP_RLDICL, OP_RLDICR,
OP_RLDIMI, OP_RLWIMI, OP_RLWINM, OP_RLWNM, OP_SETB, OP_SLD,
OP_SLW, OP_SRAD, OP_SRADI, OP_SRAW, OP_SRAWI, OP_SRD, OP_SRW,
OP_PRTYW, OP_RLC, OP_RLCL, OP_RLCR, OP_SETB,
OP_SHL, OP_SHR,
OP_SYNC, OP_TD, OP_TDI, OP_TW,
OP_TWI, OP_XOR, OP_SIM_CONFIG);

type input_reg_a_t is (NONE, RA, RA_OR_ZERO);
type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD, CONST_DS, CONST_M1);
type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD, CONST_DS, CONST_M1, CONST_SH, CONST_SH32);
type input_reg_c_t is (NONE, RS);
type output_reg_a_t is (NONE, RT, RA);
type rc_t is (NONE, ONE, RC);

@ -42,6 +42,10 @@ architecture behaviour of execute1 is
signal ctrl: ctrl_t := (carry => '0', others => (others => '0'));
signal ctrl_tmp: ctrl_t := (carry => '0', others => (others => '0'));

signal right_shift, rot_clear_left, rot_clear_right: std_ulogic;
signal rotator_result: std_ulogic_vector(63 downto 0);
signal rotator_carry: std_ulogic;

function decode_input_carry (carry_sel : carry_in_t; ca_in : std_ulogic) return std_ulogic is
begin
case carry_sel is
@ -54,6 +58,22 @@ architecture behaviour of execute1 is
end case;
end;
begin

rotator_0: entity work.rotator
port map (
rs => e_in.read_data3,
ra => e_in.read_data1,
shift => e_in.read_data2(6 downto 0),
insn => e_in.insn,
is_32bit => e_in.is_32bit,
right_shift => right_shift,
arith => e_in.is_signed,
clear_left => rot_clear_left,
clear_right => rot_clear_right,
result => rotator_result,
carry_out => rotator_carry
);

execute1_0: process(clk)
begin
if rising_edge(clk) then
@ -93,6 +113,11 @@ begin
terminate_out <= '0';
f_out <= Execute1ToFetch1TypeInit;

-- rotator control signals
right_shift <= '1' when e_in.insn_type = OP_SHR else '0';
rot_clear_left <= '1' when e_in.insn_type = OP_RLC or e_in.insn_type = OP_RLCL else '0';
rot_clear_right <= '1' when e_in.insn_type = OP_RLC or e_in.insn_type = OP_RLCR else '0';

if e_in.valid = '1' then

v.e.valid := '1';
@ -310,85 +335,11 @@ begin
when OP_PRTYW =>
result := ppc_prtyw(e_in.read_data3);
result_en := 1;
when OP_RLDCX =>
-- note rldcl mb field and rldcr me field are in the same place
mb := insn_mb(e_in.insn);
if e_in.insn(1) = '0' then
result := ppc_rldcl(e_in.read_data3, e_in.read_data2, mb);
else
result := ppc_rldcr(e_in.read_data3, e_in.read_data2, mb);
end if;
result_en := 1;
when OP_RLDICL =>
sh := insn_sh(e_in.insn);
mb := insn_mb(e_in.insn);
result := ppc_rldicl(e_in.read_data3, sh, mb);
result_en := 1;
when OP_RLDICR =>
sh := insn_sh(e_in.insn);
me := insn_me(e_in.insn);
result := ppc_rldicr(e_in.read_data3, sh, me);
result_en := 1;
when OP_RLWNM =>
mb32 := insn_mb32(e_in.insn);
me32 := insn_me32(e_in.insn);
result := ppc_rlwnm(e_in.read_data3, e_in.read_data2, mb32, me32);
result_en := 1;
when OP_RLWINM =>
sh32 := insn_sh32(e_in.insn);
mb32 := insn_mb32(e_in.insn);
me32 := insn_me32(e_in.insn);
result := ppc_rlwinm(e_in.read_data3, sh32, mb32, me32);
result_en := 1;
when OP_RLDIC =>
sh := insn_sh(e_in.insn);
mb := insn_mb(e_in.insn);
result := ppc_rldic(e_in.read_data3, sh, mb);
result_en := 1;
when OP_RLDIMI =>
sh := insn_sh(e_in.insn);
mb := insn_mb(e_in.insn);
result := ppc_rldimi(e_in.read_data1, e_in.read_data3, sh, mb);
result_en := 1;
when OP_RLWIMI =>
sh32 := insn_sh32(e_in.insn);
mb32 := insn_mb32(e_in.insn);
me32 := insn_me32(e_in.insn);
result := ppc_rlwimi(e_in.read_data1, e_in.read_data3, sh32, mb32, me32);
result_en := 1;
when OP_SLD =>
result := ppc_sld(e_in.read_data3, e_in.read_data2);
result_en := 1;
when OP_SLW =>
result := ppc_slw(e_in.read_data3, e_in.read_data2);
result_en := 1;
when OP_SRAW =>
result_with_carry := ppc_sraw(e_in.read_data3, e_in.read_data2);
result := result_with_carry(63 downto 0);
ctrl_tmp.carry <= result_with_carry(64);
result_en := 1;
when OP_SRAWI =>
sh := '0' & insn_sh32(e_in.insn);
result_with_carry := ppc_srawi(e_in.read_data3, sh);
result := result_with_carry(63 downto 0);
ctrl_tmp.carry <= result_with_carry(64);
result_en := 1;
when OP_SRAD =>
result_with_carry := ppc_srad(e_in.read_data3, e_in.read_data2);
result := result_with_carry(63 downto 0);
ctrl_tmp.carry <= result_with_carry(64);
result_en := 1;
when OP_SRADI =>
sh := insn_sh(e_in.insn);
result_with_carry := ppc_sradi(e_in.read_data3, sh);
result := result_with_carry(63 downto 0);
ctrl_tmp.carry <= result_with_carry(64);
result_en := 1;
when OP_SRD =>
result := ppc_srd(e_in.read_data3, e_in.read_data2);
result_en := 1;
when OP_SRW =>
result := ppc_srw(e_in.read_data3, e_in.read_data2);
when OP_RLC | OP_RLCL | OP_RLCR | OP_SHL | OP_SHR =>
result := rotator_result;
if e_in.output_carry = '1' then
ctrl_tmp.carry <= rotator_carry;
end if;
result_en := 1;
when OP_XOR =>
result := ppc_xor(e_in.read_data3, e_in.read_data2);

@ -24,6 +24,7 @@ filesets:
- loadstore2.vhdl
- multiply.vhdl
- divider.vhdl
- rotator.vhdl
- writeback.vhdl
- insn_helpers.vhdl
- core.vhdl

@ -0,0 +1,183 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.common.all;

entity rotator is
port (rs: in std_ulogic_vector(63 downto 0);
ra: in std_ulogic_vector(63 downto 0);
shift: in std_ulogic_vector(6 downto 0);
insn: in std_ulogic_vector(31 downto 0);
is_32bit: in std_ulogic;
right_shift: in std_ulogic;
arith: in std_ulogic;
clear_left: in std_ulogic;
clear_right: in std_ulogic;
result: out std_ulogic_vector(63 downto 0);
carry_out: out std_ulogic
);
end entity rotator;

architecture behaviour of rotator is
signal repl32: std_ulogic_vector(63 downto 0);
signal rot_count: std_ulogic_vector(5 downto 0);
signal rot1, rot2, rot: std_ulogic_vector(63 downto 0);
signal sh, mb, me: std_ulogic_vector(6 downto 0);
signal mr, ml: std_ulogic_vector(63 downto 0);
signal output_mode: std_ulogic_vector(1 downto 0);

-- note BE bit numbering
function right_mask(mask_begin: std_ulogic_vector(6 downto 0)) return std_ulogic_vector is
variable ret: std_ulogic_vector(63 downto 0);
begin
ret := (others => '0');
for i in 0 to 63 loop
if i >= to_integer(unsigned(mask_begin)) then
ret(63 - i) := '1';
end if;
end loop;
return ret;
end;

function left_mask(mask_end: std_ulogic_vector(6 downto 0)) return std_ulogic_vector is
variable ret: std_ulogic_vector(63 downto 0);
begin
ret := (others => '0');
if mask_end(6) = '0' then
for i in 0 to 63 loop
if i <= to_integer(unsigned(mask_end)) then
ret(63 - i) := '1';
end if;
end loop;
end if;
return ret;
end;

begin
rotator_0: process(all)
begin
-- First replicate bottom 32 bits to both halves if 32-bit
if is_32bit = '1' then
repl32 <= rs(31 downto 0) & rs(31 downto 0);
else
repl32 <= rs;
end if;

-- Negate shift count for right shifts
if right_shift = '1' then
rot_count <= std_ulogic_vector(- signed(shift(5 downto 0)));
else
rot_count <= shift(5 downto 0);
end if;

-- Rotator works in 3 stages using 2 bits of the rotate count each
-- time. This gives 4:1 multiplexors which is ideal for the 6-input
-- LUTs in the Xilinx Artix 7.
-- We look at the low bits of the rotate count first because they will
-- have less delay through the negation above.
-- First rotate by 0, 1, 2, or 3
case rot_count(1 downto 0) is
when "00" =>
rot1 <= repl32;
when "01" =>
rot1 <= repl32(62 downto 0) & repl32(63);
when "10" =>
rot1 <= repl32(61 downto 0) & repl32(63 downto 62);
when others =>
rot1 <= repl32(60 downto 0) & repl32(63 downto 61);
end case;
-- Next rotate by 0, 4, 8 or 12
case rot_count(3 downto 2) is
when "00" =>
rot2 <= rot1;
when "01" =>
rot2 <= rot1(59 downto 0) & rot1(63 downto 60);
when "10" =>
rot2 <= rot1(55 downto 0) & rot1(63 downto 56);
when others =>
rot2 <= rot1(51 downto 0) & rot1(63 downto 52);
end case;
-- Lastly rotate by 0, 16, 32 or 48
case rot_count(5 downto 4) is
when "00" =>
rot <= rot2;
when "01" =>
rot <= rot2(47 downto 0) & rot2(63 downto 48);
when "10" =>
rot <= rot2(31 downto 0) & rot2(63 downto 32);
when others =>
rot <= rot2(15 downto 0) & rot2(63 downto 16);
end case;

-- Trim shift count to 6 bits for 32-bit shifts
sh <= (shift(6) and not is_32bit) & shift(5 downto 0);

-- Work out mask begin/end indexes (caution, big-endian bit numbering)
if clear_left = '1' then
if is_32bit = '1' then
mb <= "01" & insn(10 downto 6);
else
mb <= "0" & insn(5) & insn(10 downto 6);
end if;
elsif right_shift = '1' then
-- this is basically mb <= sh + (is_32bit? 32: 0);
if is_32bit = '1' then
mb <= sh(5) & not sh(5) & sh(4 downto 0);
else
mb <= sh;
end if;
else
mb <= ('0' & is_32bit & "00000");
end if;
if clear_right = '1' and is_32bit = '1' then
me <= "01" & insn(5 downto 1);
elsif clear_right = '1' and clear_left = '0' then
me <= "0" & insn(5) & insn(10 downto 6);
else
-- effectively, 63 - sh
me <= sh(6) & not sh(5 downto 0);
end if;

-- Calculate left and right masks
mr <= right_mask(mb);
ml <= left_mask(me);

-- Work out output mode
-- 00 for sl[wd]
-- 0w for rlw*, rldic, rldicr, rldimi, where w = 1 iff mb > me
-- 10 for rldicl, sr[wd]
-- 1z for sra[wd][i], z = 1 if rs is negative
if (clear_left = '1' and clear_right = '0') or right_shift = '1' then
output_mode(1) <= '1';
output_mode(0) <= arith and repl32(63);
else
output_mode(1) <= '0';
if clear_right = '1' and unsigned(mb(5 downto 0)) > unsigned(me(5 downto 0)) then
output_mode(0) <= '1';
else
output_mode(0) <= '0';
end if;
end if;

-- Generate output from rotated input and masks
case output_mode is
when "00" =>
result <= (rot and (mr and ml)) or (ra and not (mr and ml));
when "01" =>
result <= (rot and (mr or ml)) or (ra and not (mr or ml));
when "10" =>
result <= rot and mr;
when others =>
result <= rot or not mr;
end case;

-- Generate carry output for arithmetic shift right of negative value
if output_mode = "11" then
carry_out <= or (rs and not ml);
else
carry_out <= '0';
end if;
end process;
end behaviour;

@ -0,0 +1,269 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.common.all;
use work.glibc_random.all;
use work.ppc_fx_insns.all;
use work.insn_helpers.all;

entity rotator_tb is
end rotator_tb;

architecture behave of rotator_tb is
constant clk_period: time := 10 ns;
signal ra, rs: std_ulogic_vector(63 downto 0);
signal shift: std_ulogic_vector(6 downto 0) := (others => '0');
signal insn: std_ulogic_vector(31 downto 0) := (others => '0');
signal is_32bit, right_shift, arith, clear_left, clear_right: std_ulogic := '0';
signal result: std_ulogic_vector(63 downto 0);
signal carry_out: std_ulogic;

begin
rotator_0: entity work.rotator
port map (
rs => rs,
ra => ra,
shift => shift,
insn => insn,
is_32bit => is_32bit,
right_shift => right_shift,
arith => arith,
clear_left => clear_left,
clear_right => clear_right,
result => result,
carry_out => carry_out
);

stim_process: process
variable behave_ra: std_ulogic_vector(63 downto 0);
variable behave_ca_ra: std_ulogic_vector(64 downto 0);
begin
-- rlwinm, rlwnm
report "test rlw[i]nm";
ra <= (others => '0');
is_32bit <= '1';
right_shift <= '0';
arith <= '0';
clear_left <= '1';
clear_right <= '1';
rlwnm_loop : for i in 0 to 1000 loop
rs <= pseudorand(64);
shift <= pseudorand(7);
insn <= x"00000" & '0' & pseudorand(10) & '0';
wait for clk_period;
behave_ra := ppc_rlwinm(rs, shift(4 downto 0), insn_mb32(insn), insn_me32(insn));
assert behave_ra = result
report "bad rlwnm expected " & to_hstring(behave_ra) & " got " & to_hstring(result);
end loop;

-- rlwimi
report "test rlwimi";
is_32bit <= '1';
right_shift <= '0';
arith <= '0';
clear_left <= '1';
clear_right <= '1';
rlwimi_loop : for i in 0 to 1000 loop
rs <= pseudorand(64);
ra <= pseudorand(64);
shift <= "00" & pseudorand(5);
insn <= x"00000" & '0' & pseudorand(10) & '0';
wait for clk_period;
behave_ra := ppc_rlwimi(ra, rs, shift(4 downto 0), insn_mb32(insn), insn_me32(insn));
assert behave_ra = result
report "bad rlwimi expected " & to_hstring(behave_ra) & " got " & to_hstring(result);
end loop;

-- rldicl, rldcl
report "test rld[i]cl";
ra <= (others => '0');
is_32bit <= '0';
right_shift <= '0';
arith <= '0';
clear_left <= '1';
clear_right <= '0';
rldicl_loop : for i in 0 to 1000 loop
rs <= pseudorand(64);
shift <= pseudorand(7);
insn <= x"00000" & '0' & pseudorand(10) & '0';
wait for clk_period;
behave_ra := ppc_rldicl(rs, shift(5 downto 0), insn_mb(insn));
assert behave_ra = result
report "bad rldicl expected " & to_hstring(behave_ra) & " got " & to_hstring(result);
end loop;

-- rldicr, rldcr
report "test rld[i]cr";
ra <= (others => '0');
is_32bit <= '0';
right_shift <= '0';
arith <= '0';
clear_left <= '0';
clear_right <= '1';
rldicr_loop : for i in 0 to 1000 loop
rs <= pseudorand(64);
shift <= pseudorand(7);
insn <= x"00000" & '0' & pseudorand(10) & '0';
wait for clk_period;
behave_ra := ppc_rldicr(rs, shift(5 downto 0), insn_me(insn));
--report "rs = " & to_hstring(rs);
--report "ra = " & to_hstring(ra);
--report "shift = " & to_hstring(shift);
--report "insn me = " & to_hstring(insn_me(insn));
--report "result = " & to_hstring(result);
assert behave_ra = result
report "bad rldicr expected " & to_hstring(behave_ra) & " got " & to_hstring(result);
end loop;

-- rldic
report "test rldic";
ra <= (others => '0');
is_32bit <= '0';
right_shift <= '0';
arith <= '0';
clear_left <= '1';
clear_right <= '1';
rldic_loop : for i in 0 to 1000 loop
rs <= pseudorand(64);
shift <= '0' & pseudorand(6);
insn <= x"00000" & '0' & pseudorand(10) & '0';
wait for clk_period;
behave_ra := ppc_rldic(rs, shift(5 downto 0), insn_mb(insn));
assert behave_ra = result
report "bad rldic expected " & to_hstring(behave_ra) & " got " & to_hstring(result);
end loop;

-- rldimi
report "test rldimi";
is_32bit <= '0';
right_shift <= '0';
arith <= '0';
clear_left <= '1';
clear_right <= '1';
rldimi_loop : for i in 0 to 1000 loop
rs <= pseudorand(64);
ra <= pseudorand(64);
shift <= '0' & pseudorand(6);
insn <= x"00000" & '0' & pseudorand(10) & '0';
wait for clk_period;
behave_ra := ppc_rldimi(ra, rs, shift(5 downto 0), insn_mb(insn));
assert behave_ra = result
report "bad rldimi expected " & to_hstring(behave_ra) & " got " & to_hstring(result);
end loop;

-- slw
report "test slw";
ra <= (others => '0');
is_32bit <= '1';
right_shift <= '0';
arith <= '0';
clear_left <= '0';
clear_right <= '0';
slw_loop : for i in 0 to 1000 loop
rs <= pseudorand(64);
shift <= pseudorand(7);
wait for clk_period;
behave_ra := ppc_slw(rs, std_ulogic_vector(resize(unsigned(shift), 64)));
assert behave_ra = result
report "bad slw expected " & to_hstring(behave_ra) & " got " & to_hstring(result);
end loop;

-- sld
report "test sld";
ra <= (others => '0');
is_32bit <= '0';
right_shift <= '0';
arith <= '0';
clear_left <= '0';
clear_right <= '0';
sld_loop : for i in 0 to 1000 loop
rs <= pseudorand(64);
shift <= pseudorand(7);
wait for clk_period;
behave_ra := ppc_sld(rs, std_ulogic_vector(resize(unsigned(shift), 64)));
assert behave_ra = result
report "bad sld expected " & to_hstring(behave_ra) & " got " & to_hstring(result);
end loop;

-- srw
report "test srw";
ra <= (others => '0');
is_32bit <= '1';
right_shift <= '1';
arith <= '0';
clear_left <= '0';
clear_right <= '0';
srw_loop : for i in 0 to 1000 loop
rs <= pseudorand(64);
shift <= pseudorand(7);
wait for clk_period;
behave_ra := ppc_srw(rs, std_ulogic_vector(resize(unsigned(shift), 64)));
assert behave_ra = result
report "bad srw expected " & to_hstring(behave_ra) & " got " & to_hstring(result);
end loop;

-- srd
report "test srd";
ra <= (others => '0');
is_32bit <= '0';
right_shift <= '1';
arith <= '0';
clear_left <= '0';
clear_right <= '0';
srd_loop : for i in 0 to 1000 loop
rs <= pseudorand(64);
shift <= pseudorand(7);
wait for clk_period;
behave_ra := ppc_srd(rs, std_ulogic_vector(resize(unsigned(shift), 64)));
assert behave_ra = result
report "bad srd expected " & to_hstring(behave_ra) & " got " & to_hstring(result);
end loop;

-- sraw[i]
report "test sraw[i]";
ra <= (others => '0');
is_32bit <= '1';
right_shift <= '1';
arith <= '1';
clear_left <= '0';
clear_right <= '0';
sraw_loop : for i in 0 to 1000 loop
rs <= pseudorand(64);
shift <= '0' & pseudorand(6);
wait for clk_period;
behave_ca_ra := ppc_sraw(rs, std_ulogic_vector(resize(unsigned(shift), 64)));
--report "rs = " & to_hstring(rs);
--report "ra = " & to_hstring(ra);
--report "shift = " & to_hstring(shift);
--report "result = " & to_hstring(carry_out & result);
assert behave_ca_ra(63 downto 0) = result and behave_ca_ra(64) = carry_out
report "bad sraw expected " & to_hstring(behave_ca_ra) & " got " & to_hstring(carry_out & result);
end loop;

-- srad[i]
report "test srad[i]";
ra <= (others => '0');
is_32bit <= '0';
right_shift <= '1';
arith <= '1';
clear_left <= '0';
clear_right <= '0';
srad_loop : for i in 0 to 1000 loop
rs <= pseudorand(64);
shift <= pseudorand(7);
wait for clk_period;
behave_ca_ra := ppc_srad(rs, std_ulogic_vector(resize(unsigned(shift), 64)));
--report "rs = " & to_hstring(rs);
--report "ra = " & to_hstring(ra);
--report "shift = " & to_hstring(shift);
--report "result = " & to_hstring(carry_out & result);
assert behave_ca_ra(63 downto 0) = result and behave_ca_ra(64) = carry_out
report "bad srad expected " & to_hstring(behave_ca_ra) & " got " & to_hstring(carry_out & result);
end loop;

assert false report "end of test" severity failure;
wait;
end process;
end behave;
Loading…
Cancel
Save