diff --git a/Makefile b/Makefile index 8c3133d..c09696a 100644 --- a/Makefile +++ b/Makefile @@ -58,7 +58,7 @@ icache_tb.o: common.o wishbone_types.o icache.o wishbone_bram_wrapper.o dcache.o: utils.o common.o wishbone_types.o plru.o cache_ram.o utils.o dcache_tb.o: common.o wishbone_types.o dcache.o wishbone_bram_wrapper.o insn_helpers.o: -loadstore1.o: common.o helpers.o +loadstore1.o: common.o helpers.o decode_types.o logical.o: decode_types.o multiply_tb.o: decode_types.o common.o glibc_random.o ppc_fx_insns.o multiply.o multiply.o: common.o decode_types.o diff --git a/common.vhdl b/common.vhdl index 9041d32..65e40c1 100644 --- a/common.vhdl +++ b/common.vhdl @@ -118,6 +118,7 @@ package common is type Decode2ToExecute1Type is record valid: std_ulogic; + unit : unit_t; insn_type: insn_type_t; nia: std_ulogic_vector(63 downto 0); write_reg: gspr_index_t; @@ -150,7 +151,7 @@ package common is reserve : std_ulogic; -- set for larx/stcx end record; constant Decode2ToExecute1Init : Decode2ToExecute1Type := - (valid => '0', insn_type => OP_ILLEGAL, bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0', + (valid => '0', unit => NONE, insn_type => OP_ILLEGAL, bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0', lr => '0', rc => '0', oe => '0', invert_a => '0', invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0', is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0', @@ -213,7 +214,7 @@ package common is type Execute1ToLoadstore1Type is record valid : std_ulogic; - load : std_ulogic; -- is this a load or store + op : insn_type_t; -- what ld/st op to do addr1 : std_ulogic_vector(63 downto 0); addr2 : std_ulogic_vector(63 downto 0); data : std_ulogic_vector(63 downto 0); -- data to write, unused for read @@ -228,7 +229,7 @@ package common is reserve : std_ulogic; -- set for larx/stcx. rc : std_ulogic; -- set for stcx. end record; - constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := (valid => '0', load => '0', ci => '0', byte_reverse => '0', + constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := (valid => '0', op => OP_ILLEGAL, ci => '0', byte_reverse => '0', sign_extend => '0', update => '0', xerc => xerc_init, reserve => '0', rc => '0', others => (others => '0')); diff --git a/decode2.vhdl b/decode2.vhdl index ff773aa..edcc50c 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -304,6 +304,7 @@ begin -- execute unit v.e.nia := d_in.nia; + v.e.unit := d_in.decode.unit; v.e.insn_type := d_in.decode.insn_type; v.e.read_reg1 := decoded_reg_a.reg; v.e.read_data1 := decoded_reg_a.data; diff --git a/execute1.vhdl b/execute1.vhdl index 9153b37..abd4a18 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -464,7 +464,7 @@ begin ctrl_tmp.srr1(63 - 45) <= '1'; report "privileged instruction"; - elsif e_in.valid = '1' then + elsif e_in.valid = '1' and e_in.unit = ALU then v.e.valid := '1'; v.e.write_reg := e_in.write_reg; @@ -844,11 +844,6 @@ begin stall_out <= '1'; x_to_divider.valid <= '1'; - when OP_LOAD | OP_STORE => - -- loadstore/dcache has its own port to writeback - v.e.valid := '0'; - lv.valid := '1'; - when others => terminate_out <= '1'; report "illegal"; @@ -874,6 +869,14 @@ begin report "Delayed LR update to " & to_hstring(next_nia); stall_out <= '1'; end if; + + elsif e_in.valid = '1' then + -- instruction for other units, i.e. LDST + v.e.valid := '0'; + if e_in.unit = LDST then + lv.valid := '1'; + end if; + elsif r.lr_update = '1' then result_en := '1'; result := r.next_lr; @@ -940,9 +943,7 @@ begin v.e.write_enable := result_en; -- Outputs to loadstore1 (async) - if e_in.insn_type = OP_LOAD then - lv.load := '1'; - end if; + lv.op := e_in.insn_type; lv.addr1 := a_in; lv.addr2 := b_in; lv.data := c_in; diff --git a/loadstore1.vhdl b/loadstore1.vhdl index 518feee..664e396 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -3,6 +3,7 @@ use ieee.std_logic_1164.all; use ieee.numeric_std.all; library work; +use work.decode_types.all; use work.common.all; use work.helpers.all; @@ -41,7 +42,7 @@ architecture behave of loadstore1 is type reg_stage_t is record -- latch most of the input request - load : std_ulogic; + load : std_ulogic; addr : std_ulogic_vector(63 downto 0); store_data : std_ulogic_vector(63 downto 0); load_data : std_ulogic_vector(63 downto 0); @@ -146,59 +147,60 @@ begin two_dwords := or (r.second_bytes); -- load data formatting - if r.load = '1' then - byte_offset := unsigned(r.addr(2 downto 0)); - brev_lenm1 := "000"; - if r.byte_reverse = '1' then - brev_lenm1 := unsigned(r.length(2 downto 0)) - 1; - end if; + byte_offset := unsigned(r.addr(2 downto 0)); + brev_lenm1 := "000"; + if r.byte_reverse = '1' then + brev_lenm1 := unsigned(r.length(2 downto 0)) - 1; + end if; - -- shift and byte-reverse data bytes - for i in 0 to 7 loop - kk := ('0' & (to_unsigned(i, 3) xor brev_lenm1)) + ('0' & byte_offset); - use_second(i) := kk(3); - j := to_integer(kk(2 downto 0)) * 8; - data_permuted(i * 8 + 7 downto i * 8) := d_in.data(j + 7 downto j); - end loop; - - -- Work out the sign bit for sign extension. - -- Assumes we are not doing both sign extension and byte reversal, - -- in that for unaligned loads crossing two dwords we end up - -- using a bit from the second dword, whereas for a byte-reversed - -- (i.e. big-endian) load the sign bit would be in the first dword. - negative := (r.length(3) and data_permuted(63)) or - (r.length(2) and data_permuted(31)) or - (r.length(1) and data_permuted(15)) or - (r.length(0) and data_permuted(7)); - - -- trim and sign-extend - for i in 0 to 7 loop - if i < to_integer(unsigned(r.length)) then - if two_dwords = '1' then - trim_ctl(i) := '1' & not use_second(i); - else - trim_ctl(i) := not use_second(i) & '0'; - end if; + -- shift and byte-reverse data bytes + for i in 0 to 7 loop + kk := ('0' & (to_unsigned(i, 3) xor brev_lenm1)) + ('0' & byte_offset); + use_second(i) := kk(3); + j := to_integer(kk(2 downto 0)) * 8; + data_permuted(i * 8 + 7 downto i * 8) := d_in.data(j + 7 downto j); + end loop; + + -- Work out the sign bit for sign extension. + -- Assumes we are not doing both sign extension and byte reversal, + -- in that for unaligned loads crossing two dwords we end up + -- using a bit from the second dword, whereas for a byte-reversed + -- (i.e. big-endian) load the sign bit would be in the first dword. + negative := (r.length(3) and data_permuted(63)) or + (r.length(2) and data_permuted(31)) or + (r.length(1) and data_permuted(15)) or + (r.length(0) and data_permuted(7)); + + -- trim and sign-extend + for i in 0 to 7 loop + if i < to_integer(unsigned(r.length)) then + if two_dwords = '1' then + trim_ctl(i) := '1' & not use_second(i); else - trim_ctl(i) := '0' & (negative and r.sign_extend); + trim_ctl(i) := not use_second(i) & '0'; end if; - case trim_ctl(i) is - when "11" => - data_trimmed(i * 8 + 7 downto i * 8) := r.load_data(i * 8 + 7 downto i * 8); - when "10" => - data_trimmed(i * 8 + 7 downto i * 8) := data_permuted(i * 8 + 7 downto i * 8); - when "01" => - data_trimmed(i * 8 + 7 downto i * 8) := x"FF"; - when others => - data_trimmed(i * 8 + 7 downto i * 8) := x"00"; - end case; - end loop; - end if; + else + trim_ctl(i) := '0' & (negative and r.sign_extend); + end if; + case trim_ctl(i) is + when "11" => + data_trimmed(i * 8 + 7 downto i * 8) := r.load_data(i * 8 + 7 downto i * 8); + when "10" => + data_trimmed(i * 8 + 7 downto i * 8) := data_permuted(i * 8 + 7 downto i * 8); + when "01" => + data_trimmed(i * 8 + 7 downto i * 8) := x"FF"; + when others => + data_trimmed(i * 8 + 7 downto i * 8) := x"00"; + end case; + end loop; case r.state is when IDLE => if l_in.valid = '1' then - v.load := l_in.load; + v.load := '0'; + if l_in.op = OP_LOAD then + v.load := '1'; + end if; v.addr := lsu_sum; v.write_reg := l_in.write_reg; v.length := l_in.length; @@ -229,18 +231,16 @@ begin v.addr := lsu_sum; -- Do byte reversing and rotating for stores in the first cycle - if v.load = '0' then - byte_offset := unsigned(lsu_sum(2 downto 0)); - brev_lenm1 := "000"; - if l_in.byte_reverse = '1' then - brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1; - end if; - for i in 0 to 7 loop - k := (to_unsigned(i, 3) xor brev_lenm1) + byte_offset; - j := to_integer(k) * 8; - v.store_data(j + 7 downto j) := l_in.data(i * 8 + 7 downto i * 8); - end loop; + byte_offset := unsigned(lsu_sum(2 downto 0)); + brev_lenm1 := "000"; + if l_in.byte_reverse = '1' then + brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1; end if; + for i in 0 to 7 loop + k := (to_unsigned(i, 3) xor brev_lenm1) + byte_offset; + j := to_integer(k) * 8; + v.store_data(j + 7 downto j) := l_in.data(i * 8 + 7 downto i * 8); + end loop; req := '1'; stall := '1';