From 167e37d6675136d26acdb6f7aba0a7f7ad1e60d8 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 3 Apr 2020 14:50:17 +1100 Subject: [PATCH] Plumb insn_type through to loadstore1 In preparation for adding a TLB to the dcache, this plumbs the insn_type from execute1 through to loadstore1, so that we can have other operations besides loads and stores (e.g. tlbie) going to loadstore1 and thence to the dcache. This also plumbs the unit field of the decode ROM from decode2 through to execute1 to simplify the logic around which ops need to go to loadstore1. The load and store data formatting are now not conditional on the op being OP_LOAD or OP_STORE. This eliminates the inferred latches clocked by each of the bits of r.op that we were getting previously. Signed-off-by: Paul Mackerras --- Makefile | 2 +- common.vhdl | 7 +-- decode2.vhdl | 1 + execute1.vhdl | 19 ++++---- loadstore1.vhdl | 116 ++++++++++++++++++++++++------------------------ 5 files changed, 74 insertions(+), 71 deletions(-) diff --git a/Makefile b/Makefile index 8c3133d..c09696a 100644 --- a/Makefile +++ b/Makefile @@ -58,7 +58,7 @@ icache_tb.o: common.o wishbone_types.o icache.o wishbone_bram_wrapper.o dcache.o: utils.o common.o wishbone_types.o plru.o cache_ram.o utils.o dcache_tb.o: common.o wishbone_types.o dcache.o wishbone_bram_wrapper.o insn_helpers.o: -loadstore1.o: common.o helpers.o +loadstore1.o: common.o helpers.o decode_types.o logical.o: decode_types.o multiply_tb.o: decode_types.o common.o glibc_random.o ppc_fx_insns.o multiply.o multiply.o: common.o decode_types.o diff --git a/common.vhdl b/common.vhdl index 9041d32..65e40c1 100644 --- a/common.vhdl +++ b/common.vhdl @@ -118,6 +118,7 @@ package common is type Decode2ToExecute1Type is record valid: std_ulogic; + unit : unit_t; insn_type: insn_type_t; nia: std_ulogic_vector(63 downto 0); write_reg: gspr_index_t; @@ -150,7 +151,7 @@ package common is reserve : std_ulogic; -- set for larx/stcx end record; constant Decode2ToExecute1Init : Decode2ToExecute1Type := - (valid => '0', insn_type => OP_ILLEGAL, bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0', + (valid => '0', unit => NONE, insn_type => OP_ILLEGAL, bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0', lr => '0', rc => '0', oe => '0', invert_a => '0', invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0', is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0', @@ -213,7 +214,7 @@ package common is type Execute1ToLoadstore1Type is record valid : std_ulogic; - load : std_ulogic; -- is this a load or store + op : insn_type_t; -- what ld/st op to do addr1 : std_ulogic_vector(63 downto 0); addr2 : std_ulogic_vector(63 downto 0); data : std_ulogic_vector(63 downto 0); -- data to write, unused for read @@ -228,7 +229,7 @@ package common is reserve : std_ulogic; -- set for larx/stcx. rc : std_ulogic; -- set for stcx. end record; - constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := (valid => '0', load => '0', ci => '0', byte_reverse => '0', + constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := (valid => '0', op => OP_ILLEGAL, ci => '0', byte_reverse => '0', sign_extend => '0', update => '0', xerc => xerc_init, reserve => '0', rc => '0', others => (others => '0')); diff --git a/decode2.vhdl b/decode2.vhdl index ff773aa..edcc50c 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -304,6 +304,7 @@ begin -- execute unit v.e.nia := d_in.nia; + v.e.unit := d_in.decode.unit; v.e.insn_type := d_in.decode.insn_type; v.e.read_reg1 := decoded_reg_a.reg; v.e.read_data1 := decoded_reg_a.data; diff --git a/execute1.vhdl b/execute1.vhdl index 9153b37..abd4a18 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -464,7 +464,7 @@ begin ctrl_tmp.srr1(63 - 45) <= '1'; report "privileged instruction"; - elsif e_in.valid = '1' then + elsif e_in.valid = '1' and e_in.unit = ALU then v.e.valid := '1'; v.e.write_reg := e_in.write_reg; @@ -844,11 +844,6 @@ begin stall_out <= '1'; x_to_divider.valid <= '1'; - when OP_LOAD | OP_STORE => - -- loadstore/dcache has its own port to writeback - v.e.valid := '0'; - lv.valid := '1'; - when others => terminate_out <= '1'; report "illegal"; @@ -874,6 +869,14 @@ begin report "Delayed LR update to " & to_hstring(next_nia); stall_out <= '1'; end if; + + elsif e_in.valid = '1' then + -- instruction for other units, i.e. LDST + v.e.valid := '0'; + if e_in.unit = LDST then + lv.valid := '1'; + end if; + elsif r.lr_update = '1' then result_en := '1'; result := r.next_lr; @@ -940,9 +943,7 @@ begin v.e.write_enable := result_en; -- Outputs to loadstore1 (async) - if e_in.insn_type = OP_LOAD then - lv.load := '1'; - end if; + lv.op := e_in.insn_type; lv.addr1 := a_in; lv.addr2 := b_in; lv.data := c_in; diff --git a/loadstore1.vhdl b/loadstore1.vhdl index 518feee..664e396 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -3,6 +3,7 @@ use ieee.std_logic_1164.all; use ieee.numeric_std.all; library work; +use work.decode_types.all; use work.common.all; use work.helpers.all; @@ -41,7 +42,7 @@ architecture behave of loadstore1 is type reg_stage_t is record -- latch most of the input request - load : std_ulogic; + load : std_ulogic; addr : std_ulogic_vector(63 downto 0); store_data : std_ulogic_vector(63 downto 0); load_data : std_ulogic_vector(63 downto 0); @@ -146,59 +147,60 @@ begin two_dwords := or (r.second_bytes); -- load data formatting - if r.load = '1' then - byte_offset := unsigned(r.addr(2 downto 0)); - brev_lenm1 := "000"; - if r.byte_reverse = '1' then - brev_lenm1 := unsigned(r.length(2 downto 0)) - 1; - end if; + byte_offset := unsigned(r.addr(2 downto 0)); + brev_lenm1 := "000"; + if r.byte_reverse = '1' then + brev_lenm1 := unsigned(r.length(2 downto 0)) - 1; + end if; - -- shift and byte-reverse data bytes - for i in 0 to 7 loop - kk := ('0' & (to_unsigned(i, 3) xor brev_lenm1)) + ('0' & byte_offset); - use_second(i) := kk(3); - j := to_integer(kk(2 downto 0)) * 8; - data_permuted(i * 8 + 7 downto i * 8) := d_in.data(j + 7 downto j); - end loop; - - -- Work out the sign bit for sign extension. - -- Assumes we are not doing both sign extension and byte reversal, - -- in that for unaligned loads crossing two dwords we end up - -- using a bit from the second dword, whereas for a byte-reversed - -- (i.e. big-endian) load the sign bit would be in the first dword. - negative := (r.length(3) and data_permuted(63)) or - (r.length(2) and data_permuted(31)) or - (r.length(1) and data_permuted(15)) or - (r.length(0) and data_permuted(7)); - - -- trim and sign-extend - for i in 0 to 7 loop - if i < to_integer(unsigned(r.length)) then - if two_dwords = '1' then - trim_ctl(i) := '1' & not use_second(i); - else - trim_ctl(i) := not use_second(i) & '0'; - end if; + -- shift and byte-reverse data bytes + for i in 0 to 7 loop + kk := ('0' & (to_unsigned(i, 3) xor brev_lenm1)) + ('0' & byte_offset); + use_second(i) := kk(3); + j := to_integer(kk(2 downto 0)) * 8; + data_permuted(i * 8 + 7 downto i * 8) := d_in.data(j + 7 downto j); + end loop; + + -- Work out the sign bit for sign extension. + -- Assumes we are not doing both sign extension and byte reversal, + -- in that for unaligned loads crossing two dwords we end up + -- using a bit from the second dword, whereas for a byte-reversed + -- (i.e. big-endian) load the sign bit would be in the first dword. + negative := (r.length(3) and data_permuted(63)) or + (r.length(2) and data_permuted(31)) or + (r.length(1) and data_permuted(15)) or + (r.length(0) and data_permuted(7)); + + -- trim and sign-extend + for i in 0 to 7 loop + if i < to_integer(unsigned(r.length)) then + if two_dwords = '1' then + trim_ctl(i) := '1' & not use_second(i); else - trim_ctl(i) := '0' & (negative and r.sign_extend); + trim_ctl(i) := not use_second(i) & '0'; end if; - case trim_ctl(i) is - when "11" => - data_trimmed(i * 8 + 7 downto i * 8) := r.load_data(i * 8 + 7 downto i * 8); - when "10" => - data_trimmed(i * 8 + 7 downto i * 8) := data_permuted(i * 8 + 7 downto i * 8); - when "01" => - data_trimmed(i * 8 + 7 downto i * 8) := x"FF"; - when others => - data_trimmed(i * 8 + 7 downto i * 8) := x"00"; - end case; - end loop; - end if; + else + trim_ctl(i) := '0' & (negative and r.sign_extend); + end if; + case trim_ctl(i) is + when "11" => + data_trimmed(i * 8 + 7 downto i * 8) := r.load_data(i * 8 + 7 downto i * 8); + when "10" => + data_trimmed(i * 8 + 7 downto i * 8) := data_permuted(i * 8 + 7 downto i * 8); + when "01" => + data_trimmed(i * 8 + 7 downto i * 8) := x"FF"; + when others => + data_trimmed(i * 8 + 7 downto i * 8) := x"00"; + end case; + end loop; case r.state is when IDLE => if l_in.valid = '1' then - v.load := l_in.load; + v.load := '0'; + if l_in.op = OP_LOAD then + v.load := '1'; + end if; v.addr := lsu_sum; v.write_reg := l_in.write_reg; v.length := l_in.length; @@ -229,18 +231,16 @@ begin v.addr := lsu_sum; -- Do byte reversing and rotating for stores in the first cycle - if v.load = '0' then - byte_offset := unsigned(lsu_sum(2 downto 0)); - brev_lenm1 := "000"; - if l_in.byte_reverse = '1' then - brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1; - end if; - for i in 0 to 7 loop - k := (to_unsigned(i, 3) xor brev_lenm1) + byte_offset; - j := to_integer(k) * 8; - v.store_data(j + 7 downto j) := l_in.data(i * 8 + 7 downto i * 8); - end loop; + byte_offset := unsigned(lsu_sum(2 downto 0)); + brev_lenm1 := "000"; + if l_in.byte_reverse = '1' then + brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1; end if; + for i in 0 to 7 loop + k := (to_unsigned(i, 3) xor brev_lenm1) + byte_offset; + j := to_integer(k) * 8; + v.store_data(j + 7 downto j) := l_in.data(i * 8 + 7 downto i * 8); + end loop; req := '1'; stall := '1';