diff --git a/common.vhdl b/common.vhdl index bd9210b..9ed07b3 100644 --- a/common.vhdl +++ b/common.vhdl @@ -108,6 +108,7 @@ package common is req: std_ulogic; virt_mode : std_ulogic; priv_mode : std_ulogic; + big_endian : std_ulogic; stop_mark: std_ulogic; sequential: std_ulogic; nia: std_ulogic_vector(63 downto 0); @@ -245,10 +246,12 @@ package common is redirect: std_ulogic; virt_mode: std_ulogic; priv_mode: std_ulogic; + big_endian: std_ulogic; redirect_nia: std_ulogic_vector(63 downto 0); end record; constant Execute1ToFetch1Init : Execute1ToFetch1Type := (redirect => '0', virt_mode => '0', - priv_mode => '0', others => (others => '0')); + priv_mode => '0', big_endian => '0', + others => (others => '0')); type Execute1ToLoadstore1Type is record valid : std_ulogic; diff --git a/execute1.vhdl b/execute1.vhdl index 1b83997..99553cc 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -496,9 +496,10 @@ begin v.terminate := '0'; icache_inval <= '0'; v.busy := '0'; - -- send MSR[IR] and ~MSR[PR] up to fetch1 + -- send MSR[IR], ~MSR[PR] and ~MSR[LE] up to fetch1 v.f.virt_mode := ctrl.msr(MSR_IR); v.f.priv_mode := not ctrl.msr(MSR_PR); + v.f.big_endian := not ctrl.msr(MSR_LE); -- Next insn adder used in a couple of places next_nia := std_ulogic_vector(unsigned(e_in.nia) + 4); @@ -740,6 +741,7 @@ begin when OP_RFID => v.f.virt_mode := a_in(MSR_IR) or a_in(MSR_PR); v.f.priv_mode := not a_in(MSR_PR); + v.f.big_endian := not a_in(MSR_LE); -- Can't use msr_copy here because the partial function MSR -- bits should be left unchanged, not zeroed. ctrl_tmp.msr(63 downto 31) <= a_in(63 downto 31); @@ -1161,6 +1163,8 @@ begin v.f.redirect := '1'; v.f.virt_mode := '0'; v.f.priv_mode := '1'; + -- XXX need an interrupt LE bit here, e.g. from LPCR + v.f.big_endian := '0'; end if; if v.f.redirect = '1' then @@ -1176,7 +1180,7 @@ begin lv.data := c_in; lv.write_reg := gspr_to_gpr(e_in.write_reg); lv.length := e_in.data_len; - lv.byte_reverse := e_in.byte_reverse; + lv.byte_reverse := e_in.byte_reverse xnor ctrl.msr(MSR_LE); lv.sign_extend := e_in.sign_extend; lv.update := e_in.update; lv.update_reg := gspr_to_gpr(e_in.read_reg1); diff --git a/fetch1.vhdl b/fetch1.vhdl index a56f33d..63672cb 100644 --- a/fetch1.vhdl +++ b/fetch1.vhdl @@ -50,8 +50,9 @@ begin log_nia <= r.nia(63) & r.nia(43 downto 2); if r /= r_next then report "fetch1 rst:" & std_ulogic'image(rst) & - " IR:" & std_ulogic'image(e_in.virt_mode) & - " P:" & std_ulogic'image(e_in.priv_mode) & + " IR:" & std_ulogic'image(r_next.virt_mode) & + " P:" & std_ulogic'image(r_next.priv_mode) & + " E:" & std_ulogic'image(r_next.big_endian) & " R:" & std_ulogic'image(e_in.redirect) & std_ulogic'image(d_in.redirect) & " S:" & std_ulogic'image(stall_in) & " T:" & std_ulogic'image(stop_in) & @@ -81,11 +82,13 @@ begin end if; v.virt_mode := '0'; v.priv_mode := '1'; + v.big_endian := '0'; v_int.stop_state := RUNNING; elsif e_in.redirect = '1' then v.nia := e_in.redirect_nia(63 downto 2) & "00"; v.virt_mode := e_in.virt_mode; v.priv_mode := e_in.priv_mode; + v.big_endian := e_in.big_endian; elsif d_in.redirect = '1' then v.nia := d_in.redirect_nia(63 downto 2) & "00"; elsif stall_in = '0' then diff --git a/icache.vhdl b/icache.vhdl index 3f1c15f..d24a146 100644 --- a/icache.vhdl +++ b/icache.vhdl @@ -98,7 +98,8 @@ architecture rtl of icache is -- SET_SIZE_BITS is the log base 2 of the set size constant SET_SIZE_BITS : natural := LINE_OFF_BITS + INDEX_BITS; -- TAG_BITS is the number of bits of the tag part of the address - constant TAG_BITS : natural := REAL_ADDR_BITS - SET_SIZE_BITS; + -- the +1 is to allow the endianness to be stored in the tag + constant TAG_BITS : natural := REAL_ADDR_BITS - SET_SIZE_BITS + 1; -- WAY_BITS is the number of bits to select a way constant WAY_BITS : natural := log2(NUM_WAYS); @@ -289,9 +290,10 @@ architecture rtl of icache is end; -- Get the tag value from the address - function get_tag(addr: std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0)) return cache_tag_t is + function get_tag(addr: std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0); + endian: std_ulogic) return cache_tag_t is begin - return addr(REAL_ADDR_BITS - 1 downto SET_SIZE_BITS); + return endian & addr(REAL_ADDR_BITS - 1 downto SET_SIZE_BITS); end; -- Read a tag from a tag memory row @@ -327,9 +329,9 @@ begin report "geometry bits don't add up" severity FAILURE; assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS) report "geometry bits don't add up" severity FAILURE; - assert (REAL_ADDR_BITS = TAG_BITS + INDEX_BITS + LINE_OFF_BITS) + assert (REAL_ADDR_BITS + 1 = TAG_BITS + INDEX_BITS + LINE_OFF_BITS) report "geometry bits don't add up" severity FAILURE; - assert (REAL_ADDR_BITS = TAG_BITS + ROW_BITS + ROW_OFF_BITS) + assert (REAL_ADDR_BITS + 1 = TAG_BITS + ROW_BITS + ROW_OFF_BITS) report "geometry bits don't add up" severity FAILURE; sim_debug: if SIM generate @@ -359,6 +361,7 @@ begin signal wr_addr : std_ulogic_vector(ROW_BITS-1 downto 0); signal dout : cache_row_t; signal wr_sel : std_ulogic_vector(ROW_SIZE-1 downto 0); + signal wr_dat : std_ulogic_vector(wishbone_in.dat'left downto 0); begin way: entity work.cache_ram generic map ( @@ -372,10 +375,20 @@ begin rd_data => dout, wr_sel => wr_sel, wr_addr => wr_addr, - wr_data => wishbone_in.dat + wr_data => wr_dat ); process(all) + variable j: integer; begin + -- byte-swap read data if big endian + if r.store_tag(TAG_BITS - 1) = '0' then + wr_dat <= wishbone_in.dat; + else + for i in 0 to (wishbone_in.dat'length / 8) - 1 loop + j := ((i / 4) * 4) + (3 - (i mod 4)); + wr_dat(i * 8 + 7 downto i * 8) <= wishbone_in.dat(j * 8 + 7 downto j * 8); + end loop; + end if; do_read <= not (stall_in or use_previous); do_write <= '0'; if wishbone_in.ack = '1' and replace_way = i then @@ -494,7 +507,7 @@ begin -- Extract line, row and tag from request req_index <= get_index(i_in.nia); req_row <= get_row(i_in.nia); - req_tag <= get_tag(real_addr); + req_tag <= get_tag(real_addr, i_in.big_endian); -- Calculate address of beginning of cache row, will be -- used for cache miss processing if needed diff --git a/loadstore1.vhdl b/loadstore1.vhdl index 62914c0..123c8ad 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -201,14 +201,20 @@ begin end loop; -- Work out the sign bit for sign extension. - -- Assumes we are not doing both sign extension and byte reversal, - -- in that for unaligned loads crossing two dwords we end up - -- using a bit from the second dword, whereas for a byte-reversed - -- (i.e. big-endian) load the sign bit would be in the first dword. - negative := (r.length(3) and data_permuted(63)) or - (r.length(2) and data_permuted(31)) or - (r.length(1) and data_permuted(15)) or - (r.length(0) and data_permuted(7)); + -- For unaligned loads crossing two dwords, the sign bit is in the + -- first dword for big-endian (byte_reverse = 1), or the second dword + -- for little-endian. + if r.dwords_done = '1' and r.byte_reverse = '1' then + negative := (r.length(3) and r.load_data(63)) or + (r.length(2) and r.load_data(31)) or + (r.length(1) and r.load_data(15)) or + (r.length(0) and r.load_data(7)); + else + negative := (r.length(3) and data_permuted(63)) or + (r.length(2) and data_permuted(31)) or + (r.length(1) and data_permuted(15)) or + (r.length(0) and data_permuted(7)); + end if; -- trim and sign-extend for i in 0 to 7 loop