@ -7,9 +7,6 @@
 
		
	
		
			
				-- * Complete load misses on the cycle when WB data comes instead of
 
		
	
		
			
				--   at the end of line (this requires dealing with requests coming in
 
		
	
		
			
				--   while not idle...)
 
		
	
		
			
				-- * Load with update could use one less non-pipelined cycle by moving
 
		
	
		
			
				--   the register update to the pipeline bubble that exists when going
 
		
	
		
			
				--   back to the IDLE state.
 
		
	
		
			
				--
 
		
	
		
			
				library ieee;
 
		
	
		
			
				use ieee.std_logic_1164.all;
 
		
	
	
		
			
				
					
						
						
						
							
								 
						
					 
				
			
			@ -35,7 +32,7 @@ entity dcache is
 
		
	
		
			
				        rst          : in std_ulogic;
 
		
	
		
			
				 
		
	
		
			
				        d_in         : in Loadstore1ToDcacheType;
 
		
	
		
			
				        d_out        : out DcacheToWritebackType;
 
		
	
		
			
				        d_out        : out DcacheToLoadstore1Type;
 
		
	
		
			
				 
		
	
		
			
					stall_out    : out std_ulogic;
 
		
	
		
			
				 
		
	
	
		
			
				
					
						
							
								 
						
						
							
								 
						
						
					 
				
			
			@ -113,6 +110,8 @@ architecture rtl of dcache is
 
		
	
		
			
				    attribute ram_style : string;
 
		
	
		
			
				    attribute ram_style of cache_tags : signal is "distributed";
 
		
	
		
			
				 
		
	
		
			
				    signal r0 : Loadstore1ToDcacheType;
 
		
	
		
			
				 
		
	
		
			
				    -- Type of operation on a "valid" input
 
		
	
		
			
				    type op_t is (OP_NONE,
 
		
	
		
			
						  OP_LOAD_HIT,      -- Cache hit on load
 
		
	
	
		
			
				
					
						
						
						
							
								 
						
					 
				
			
			@ -124,10 +123,8 @@ architecture rtl of dcache is
 
		
	
		
			
						      
 
		
	
		
			
				    -- Cache state machine
 
		
	
		
			
				    type state_t is (IDLE,	       -- Normal load hit processing
 
		
	
		
			
				                     PRE_NEXT_DWORD,   -- Extra state before NEXT_DWORD
 
		
	
		
			
				                     NEXT_DWORD,       -- Starting the 2nd xfer of misaligned
 
		
	
		
			
						     LOAD_UPDATE,      -- Load with update extra cycle
 
		
	
		
			
						     RELOAD_WAIT_ACK,  -- Cache reload wait ack
 
		
	
		
			
				                     FINISH_LD_MISS,   -- Extra cycle after load miss
 
		
	
		
			
						     STORE_WAIT_ACK,   -- Store wait ack
 
		
	
		
			
						     NC_LOAD_WAIT_ACK);-- Non-cachable load wait ack
 
		
	
		
			
				 
		
	
	
		
			
				
					
						
							
								 
						
						
							
								 
						
						
					 
				
			
			@ -158,15 +155,6 @@ architecture rtl of dcache is
 
		
	
		
			
					hit_way          : way_t;
 
		
	
		
			
					hit_load_valid   : std_ulogic;
 
		
	
		
			
				 
		
	
		
			
				        -- Info for doing the second transfer of a misaligned load/store
 
		
	
		
			
				        two_dwords       : std_ulogic;
 
		
	
		
			
				        second_dword     : std_ulogic;
 
		
	
		
			
				        next_addr        : std_ulogic_vector(63 downto 0);
 
		
	
		
			
				        next_sel         : std_ulogic_vector(7 downto 0);
 
		
	
		
			
				 
		
	
		
			
					-- Register update (load/store with update)
 
		
	
		
			
					update_valid     : std_ulogic;
 
		
	
		
			
				 
		
	
		
			
					-- Data buffer for "slow" read ops (load miss and NC loads).
 
		
	
		
			
					slow_data        : std_ulogic_vector(63 downto 0);
 
		
	
		
			
					slow_valid       : std_ulogic;
 
		
	
	
		
			
				
					
						
							
								 
						
						
							
								 
						
						
					 
				
			
			@ -200,12 +188,8 @@ architecture rtl of dcache is
 
		
	
		
			
				    signal req_tag     : cache_tag_t;
 
		
	
		
			
				    signal req_op      : op_t;
 
		
	
		
			
				    signal req_data    : std_ulogic_vector(63 downto 0);
 
		
	
		
			
				    signal req_addr    : std_ulogic_vector(63 downto 0);
 
		
	
		
			
				    signal req_laddr   : std_ulogic_vector(63 downto 0);
 
		
	
		
			
				    signal req_sel     : std_ulogic_vector(7 downto 0);
 
		
	
		
			
				    signal next_addr   : std_ulogic_vector(63 downto 0);
 
		
	
		
			
				 
		
	
		
			
				    signal early_req_addr : std_ulogic_vector(11 downto 0);
 
		
	
		
			
				    signal early_req_row  : row_t;
 
		
	
		
			
				 
		
	
		
			
				    signal cancel_store : std_ulogic;
 
		
	
	
		
			
				
					
						
						
						
							
								 
						
					 
				
			
			@ -222,10 +206,8 @@ architecture rtl of dcache is
 
		
	
		
			
				    signal replace_way : way_t;
 
		
	
		
			
				 
		
	
		
			
				    -- Wishbone read/write/cache write formatting signals
 
		
	
		
			
				    signal bus_sel     : std_ulogic_vector(15 downto 0);
 
		
	
		
			
				    signal bus_sel     : std_ulogic_vector(7 downto 0);
 
		
	
		
			
				 
		
	
		
			
				    signal two_dwords  : std_ulogic;
 
		
	
		
			
				    
 
		
	
		
			
				    --
 
		
	
		
			
				    -- Helper functions to decode incoming requests
 
		
	
		
			
				    --
 
		
	
	
		
			
				
					
						
							
								 
						
						
							
								 
						
						
					 
				
			
			@ -305,37 +287,6 @@ architecture rtl of dcache is
 
		
	
		
			
					tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS) := tag;
 
		
	
		
			
				    end;
 
		
	
		
			
				 
		
	
		
			
				    -- Generate byte enables from sizes
 
		
	
		
			
				    function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is
 
		
	
		
			
				    begin
 
		
	
		
			
				        case length is
 
		
	
		
			
				            when "0001" =>
 
		
	
		
			
				                return "00000001";
 
		
	
		
			
				            when "0010" =>
 
		
	
		
			
				                return "00000011";
 
		
	
		
			
				            when "0100" =>
 
		
	
		
			
				                return "00001111";
 
		
	
		
			
				            when "1000" =>
 
		
	
		
			
				                return "11111111";
 
		
	
		
			
				            when others =>
 
		
	
		
			
				                return "00000000";
 
		
	
		
			
				        end case;
 
		
	
		
			
				    end function length_to_sel;
 
		
	
		
			
				 
		
	
		
			
				    -- Calculate byte enables for wishbone
 
		
	
		
			
				    -- This returns 16 bits, giving the select signals for two transfers,
 
		
	
		
			
				    -- to account for unaligned loads or stores
 
		
	
		
			
				    function wishbone_data_sel(size : in std_logic_vector(3 downto 0);
 
		
	
		
			
							       address : in std_logic_vector(63 downto 0))
 
		
	
		
			
					return std_ulogic_vector is
 
		
	
		
			
				        variable longsel : std_ulogic_vector(15 downto 0);
 
		
	
		
			
				    begin
 
		
	
		
			
				        longsel := (others => '0');
 
		
	
		
			
				        longsel(7 downto 0) := length_to_sel(size);
 
		
	
		
			
				        return std_ulogic_vector(shift_left(unsigned(longsel),
 
		
	
		
			
									    to_integer(unsigned(address(2 downto 0)))));
 
		
	
		
			
				    end function wishbone_data_sel;
 
		
	
		
			
				 
		
	
		
			
				begin
 
		
	
		
			
				 
		
	
		
			
				    assert LINE_SIZE mod ROW_SIZE = 0 report "LINE_SIZE not multiple of ROW_SIZE" severity FAILURE;
 
		
	
	
		
			
				
					
						
							
								 
						
						
							
								 
						
						
					 
				
			
			@ -390,11 +341,17 @@ begin
 
		
	
		
			
					end generate;
 
		
	
		
			
				    end generate;
 
		
	
		
			
				 
		
	
		
			
				    -- Wishbone read and write and BRAM write sel bits generation
 
		
	
		
			
				    bus_sel     <= wishbone_data_sel(d_in.length, d_in.addr);
 
		
	
		
			
				 
		
	
		
			
				    -- See if the operation crosses two doublewords
 
		
	
		
			
				    two_dwords  <= or (bus_sel(15 downto 8));
 
		
	
		
			
				    -- Latch the request in r0 as long as we're not stalling
 
		
	
		
			
				    stage_0 : process(clk)
 
		
	
		
			
				    begin
 
		
	
		
			
				        if rising_edge(clk) then
 
		
	
		
			
				            if rst = '1' then
 
		
	
		
			
				                r0.valid <= '0';
 
		
	
		
			
				            elsif stall_out = '0' then
 
		
	
		
			
				                r0 <= d_in;
 
		
	
		
			
				            end if;
 
		
	
		
			
				        end if;
 
		
	
		
			
				    end process;
 
		
	
		
			
				 
		
	
		
			
				    -- Cache request parsing and hit detection
 
		
	
		
			
				    dcache_request : process(all)
 
		
	
	
		
			
				
					
						
						
						
							
								 
						
					 
				
			
			@ -405,40 +362,21 @@ begin
 
		
	
		
			
				        variable data    : std_ulogic_vector(63 downto 0);
 
		
	
		
			
					variable opsel   : std_ulogic_vector(3 downto 0);
 
		
	
		
			
				        variable go      : std_ulogic;
 
		
	
		
			
				        variable is_load : std_ulogic;
 
		
	
		
			
				        variable is_nc   : std_ulogic;
 
		
	
		
			
				    begin
 
		
	
		
			
					-- Extract line, row and tag from request
 
		
	
		
			
				        if r1.state /= NEXT_DWORD then
 
		
	
		
			
				            req_addr <= d_in.addr;
 
		
	
		
			
				            req_data <= d_in.data;
 
		
	
		
			
				            req_sel <= bus_sel(7 downto 0);
 
		
	
		
			
				            go := d_in.valid;
 
		
	
		
			
				            is_load := d_in.load;
 
		
	
		
			
				            is_nc := d_in.nc;
 
		
	
		
			
				 
		
	
		
			
				        else
 
		
	
		
			
				            req_addr <= r1.next_addr;
 
		
	
		
			
				            req_data <= r1.req.data;
 
		
	
		
			
				            req_sel <= r1.next_sel;
 
		
	
		
			
				            go := '1';
 
		
	
		
			
				            is_load := r1.req.load;
 
		
	
		
			
				            is_nc := r1.req.nc;
 
		
	
		
			
				        end if;
 
		
	
		
			
				        req_index <= get_index(r0.addr);
 
		
	
		
			
				        req_row <= get_row(r0.addr);
 
		
	
		
			
				        req_tag <= get_tag(r0.addr);
 
		
	
		
			
				 
		
	
		
			
				        req_index <= get_index(req_addr);
 
		
	
		
			
				        req_row <= get_row(req_addr);
 
		
	
		
			
				        req_tag <= get_tag(req_addr);
 
		
	
		
			
				        -- Only do anything if not being stalled by stage 1
 
		
	
		
			
				        go := r0.valid and not stall_out;
 
		
	
		
			
				 
		
	
		
			
				        -- Calculate address of beginning of cache line, will be
 
		
	
		
			
				        -- used for cache miss processing if needed
 
		
	
		
			
				        --
 
		
	
		
			
				        req_laddr <= req_addr(63 downto LINE_OFF_BITS) &
 
		
	
		
			
				        req_laddr <= r0.addr(63 downto LINE_OFF_BITS) &
 
		
	
		
			
				                     (LINE_OFF_BITS-1 downto 0 => '0');
 
		
	
		
			
				 
		
	
		
			
				        -- Address of next doubleword, used for unaligned accesses
 
		
	
		
			
				        next_addr <= std_ulogic_vector(unsigned(d_in.addr(63 downto 3)) + 1) & "000";
 
		
	
		
			
				 
		
	
		
			
					-- Test if pending request is a hit on any way
 
		
	
		
			
					hit_way := 0;
 
		
	
		
			
					is_hit := '0';
 
		
	
	
		
			
				
					
						
						
						
							
								 
						
					 
				
			
			@ -460,7 +398,7 @@ begin
 
		
	
		
			
					-- Combine the request and cache his status to decide what
 
		
	
		
			
					-- operation needs to be done
 
		
	
		
			
					--
 
		
	
		
			
					opsel := go & is_load & is_nc & is_hit;
 
		
	
		
			
					opsel := go & r0.load & r0.nc & is_hit;
 
		
	
		
			
					case opsel is
 
		
	
		
			
					when "1101" => op := OP_LOAD_HIT;
 
		
	
		
			
					when "1100" => op := OP_LOAD_MISS;
 
		
	
	
		
			
				
					
						
						
						
							
								 
						
					 
				
			
			@ -475,16 +413,15 @@ begin
 
		
	
		
			
				 
		
	
		
			
					req_op <= op;
 
		
	
		
			
				 
		
	
		
			
				        -- Versions of the address and row number that are valid one cycle earlier
 
		
	
		
			
				        -- Version of the row number that is valid one cycle earlier
 
		
	
		
			
				        -- in the cases where we need to read the cache data BRAM.
 
		
	
		
			
				        if r1.state = IDLE and op = OP_LOAD_HIT and two_dwords = '1' then
 
		
	
		
			
				            early_req_addr <= next_addr(11 downto 0);
 
		
	
		
			
				        elsif r1.state /= IDLE and r1.two_dwords = '1' and r1.second_dword = '0' then
 
		
	
		
			
				            early_req_addr <= r1.next_addr(11 downto 0);
 
		
	
		
			
				        -- If we're stalling then we need to keep reading the last
 
		
	
		
			
				        -- row requested.
 
		
	
		
			
				        if stall_out = '0' then
 
		
	
		
			
				            early_req_row <= get_row(d_in.addr);
 
		
	
		
			
				        else
 
		
	
		
			
				            early_req_addr <= d_in.early_low_addr;
 
		
	
		
			
				            early_req_row <= req_row;
 
		
	
		
			
				        end if;
 
		
	
		
			
				        early_req_row <= get_row(x"0000000000000" & early_req_addr);
 
		
	
		
			
				    end process;
 
		
	
		
			
				 
		
	
		
			
				    -- Wire up wishbone request latch out of stage 1
 
		
	
	
		
			
				
					
						
						
						
							
								 
						
					 
				
			
			@ -502,17 +439,17 @@ begin
 
		
	
		
			
				        cancel_store <= '0';
 
		
	
		
			
				        set_rsrv <= '0';
 
		
	
		
			
				        clear_rsrv <= '0';
 
		
	
		
			
				        if d_in.valid = '1' and d_in.reserve = '1' then
 
		
	
		
			
				        if stall_out = '0' and r0.valid = '1' and r0.reserve = '1' then
 
		
	
		
			
				            -- XXX generate alignment interrupt if address is not aligned
 
		
	
		
			
				            -- XXX or if d_in.nc = '1'
 
		
	
		
			
				            if d_in.load = '1' then
 
		
	
		
			
				            -- XXX or if r0.nc = '1'
 
		
	
		
			
				            if r0.load = '1' then
 
		
	
		
			
				                -- load with reservation
 
		
	
		
			
				                set_rsrv <= '1';
 
		
	
		
			
				            else
 
		
	
		
			
				                -- store conditional
 
		
	
		
			
				                clear_rsrv <= '1';
 
		
	
		
			
				                if reservation.valid = '0' or
 
		
	
		
			
				                    d_in.addr(63 downto LINE_OFF_BITS) /= reservation.addr then
 
		
	
		
			
				                    r0.addr(63 downto LINE_OFF_BITS) /= reservation.addr then
 
		
	
		
			
				                    cancel_store <= '1';
 
		
	
		
			
				                end if;
 
		
	
		
			
				            end if;
 
		
	
	
		
			
				
					
						
						
						
							
								 
						
					 
				
			
			@ -526,28 +463,19 @@ begin
 
		
	
		
			
				                reservation.valid <= '0';
 
		
	
		
			
				            elsif set_rsrv = '1' then
 
		
	
		
			
				                reservation.valid <= '1';
 
		
	
		
			
				                reservation.addr <= d_in.addr(63 downto LINE_OFF_BITS);
 
		
	
		
			
				                reservation.addr <= r0.addr(63 downto LINE_OFF_BITS);
 
		
	
		
			
				            end if;
 
		
	
		
			
				        end if;
 
		
	
		
			
				    end process;
 
		
	
		
			
				 
		
	
		
			
				    -- Writeback (loads and reg updates) & completion control logic
 
		
	
		
			
				    -- Return data for loads & completion control logic
 
		
	
		
			
				    --
 
		
	
		
			
				    writeback_control: process(all)
 
		
	
		
			
				    begin
 
		
	
		
			
				 
		
	
		
			
					-- The mux on d_out.write reg defaults to the normal load hit case.
 
		
	
		
			
					d_out.write_enable <= '0';
 
		
	
		
			
					-- The mux on d_out.data defaults to the normal load hit case.
 
		
	
		
			
					d_out.valid <= '0';
 
		
	
		
			
					d_out.write_reg <= r1.req.write_reg;
 
		
	
		
			
					d_out.write_data <= cache_out(r1.hit_way);
 
		
	
		
			
					d_out.write_len <= r1.req.length;
 
		
	
		
			
					d_out.write_shift <= r1.req.addr(2 downto 0);
 
		
	
		
			
					d_out.sign_extend <= r1.req.sign_extend;
 
		
	
		
			
					d_out.byte_reverse <= r1.req.byte_reverse;
 
		
	
		
			
					d_out.second_word <= r1.second_dword;
 
		
	
		
			
					d_out.xerc <= r1.req.xerc;
 
		
	
		
			
				        d_out.rc <= '0';                -- loads never have rc=1
 
		
	
		
			
					d_out.data <= cache_out(r1.hit_way);
 
		
	
		
			
				        d_out.store_done <= '0';
 
		
	
		
			
				 
		
	
		
			
					-- We have a valid load or store hit or we just completed a slow
 
		
	
	
		
			
				
					
						
						
						
							
								 
						
					 
				
			
			@ -561,30 +489,17 @@ begin
 
		
	
		
			
					--
 
		
	
		
			
				 
		
	
		
			
					-- Sanity: Only one of these must be set in any given cycle
 
		
	
		
			
					assert (r1.update_valid and r1.hit_load_valid) /= '1' report
 
		
	
		
			
					    "unexpected hit_load_delayed collision with update_valid"
 
		
	
		
			
					    severity FAILURE;
 
		
	
		
			
					assert (r1.slow_valid and r1.stcx_fail) /= '1' report
 
		
	
		
			
					    "unexpected slow_valid collision with stcx_fail"
 
		
	
		
			
					    severity FAILURE;
 
		
	
		
			
					assert ((r1.slow_valid or r1.stcx_fail) and r1.hit_load_valid) /= '1' report
 
		
	
		
			
					    "unexpected hit_load_delayed collision with slow_valid"
 
		
	
		
			
					    severity FAILURE;
 
		
	
		
			
					assert ((r1.slow_valid or r1.stcx_fail) and r1.update_valid) /= '1' report
 
		
	
		
			
					    "unexpected update_valid collision with slow_valid or stcx_fail"
 
		
	
		
			
					    severity FAILURE;
 
		
	
		
			
				 
		
	
		
			
					-- Load hit case is the standard path
 
		
	
		
			
					if r1.hit_load_valid = '1' then
 
		
	
		
			
					    d_out.write_enable <= '1';
 
		
	
		
			
				 
		
	
		
			
				            -- If there isn't another dword to go and
 
		
	
		
			
					    -- it's not a load with update, complete it now
 
		
	
		
			
					    if (r1.second_dword or not r1.two_dwords) = '1' and
 
		
	
		
			
				                r1.req.update = '0' then
 
		
	
		
			
				                report "completing load hit";
 
		
	
		
			
						d_out.valid <= '1';
 
		
	
		
			
				            end if;
 
		
	
		
			
				            report "completing load hit";
 
		
	
		
			
				            d_out.valid <= '1';
 
		
	
		
			
					end if;
 
		
	
		
			
				 
		
	
		
			
					-- Slow ops (load miss, NC, stores)
 
		
	
	
		
			
				
					
						
						
						
							
								 
						
					 
				
			
			@ -593,63 +508,20 @@ begin
 
		
	
		
			
					    -- mux accordingly
 
		
	
		
			
					    --
 
		
	
		
			
					    if r1.req.load then
 
		
	
		
			
						d_out.write_reg <= r1.req.write_reg;
 
		
	
		
			
						d_out.write_enable <= '1';
 
		
	
		
			
				 
		
	
		
			
						-- Read data comes from the slow data latch, formatter
 
		
	
		
			
						-- from the latched request.
 
		
	
		
			
						--
 
		
	
		
			
						d_out.write_data <= r1.slow_data;
 
		
	
		
			
						d_out.write_shift <= r1.req.addr(2 downto 0);
 
		
	
		
			
						d_out.sign_extend <= r1.req.sign_extend;
 
		
	
		
			
						d_out.byte_reverse <= r1.req.byte_reverse;
 
		
	
		
			
						d_out.write_len <= r1.req.length;
 
		
	
		
			
						d_out.xerc <= r1.req.xerc;
 
		
	
		
			
				                d_out.second_word <= r1.second_dword;
 
		
	
		
			
						-- Read data comes from the slow data latch
 
		
	
		
			
						d_out.data <= r1.slow_data;
 
		
	
		
			
					    end if;
 
		
	
		
			
				            d_out.rc <= r1.req.rc;
 
		
	
		
			
				            d_out.store_done <= '1';
 
		
	
		
			
				 
		
	
		
			
					    -- If it's a store or a non-update load form, complete now
 
		
	
		
			
				            -- unless we need to do another dword transfer
 
		
	
		
			
					    if (r1.req.load = '0' or r1.req.update = '0') and
 
		
	
		
			
				                (r1.two_dwords = '0' or r1.second_dword = '1') then
 
		
	
		
			
				                report "completing store or load miss";
 
		
	
		
			
						d_out.valid <= '1';
 
		
	
		
			
					    end if;
 
		
	
		
			
				            report "completing store or load miss";
 
		
	
		
			
				            d_out.valid <= '1';
 
		
	
		
			
					end if;
 
		
	
		
			
				 
		
	
		
			
				        if r1.stcx_fail = '1' then
 
		
	
		
			
				            d_out.rc <= r1.req.rc;
 
		
	
		
			
				            d_out.store_done <= '0';
 
		
	
		
			
				            d_out.valid <= '1';
 
		
	
		
			
				        end if;
 
		
	
		
			
				 
		
	
		
			
					-- We have a register update to do.
 
		
	
		
			
					if r1.update_valid = '1' then
 
		
	
		
			
					    d_out.write_enable <= '1';
 
		
	
		
			
					    d_out.write_reg <= r1.req.update_reg;
 
		
	
		
			
				 
		
	
		
			
					    -- Change the read data mux to the address that's going into
 
		
	
		
			
					    -- the register and the formatter does nothing.
 
		
	
		
			
					    --
 
		
	
		
			
					    d_out.write_data <= r1.req.addr;
 
		
	
		
			
					    d_out.write_shift <= "000";
 
		
	
		
			
					    d_out.write_len <= "1000";
 
		
	
		
			
					    d_out.sign_extend <= '0';
 
		
	
		
			
					    d_out.byte_reverse <= '0';
 
		
	
		
			
					    d_out.xerc <= r1.req.xerc;
 
		
	
		
			
				            d_out.second_word <= '0';
 
		
	
		
			
				 
		
	
		
			
					    -- If it was a load, this completes the operation (load with
 
		
	
		
			
					    -- update case).
 
		
	
		
			
					    --
 
		
	
		
			
					    if r1.req.load = '1' then
 
		
	
		
			
				                report "completing after load update";
 
		
	
		
			
						d_out.valid <= '1';
 
		
	
		
			
					    end if;
 
		
	
		
			
					end if;
 
		
	
		
			
				 
		
	
		
			
				    end process;
 
		
	
		
			
				 
		
	
		
			
				    --
 
		
	
	
		
			
				
					
						
							
								 
						
						
							
								 
						
						
					 
				
			
			@ -703,11 +575,11 @@ begin
 
		
	
		
			
					    -- For timing, the mux on wr_data/sel/addr is not dependent on anything
 
		
	
		
			
					    -- other than the current state. Only the do_write signal is.
 
		
	
		
			
					    --
 
		
	
		
			
					    if r1.state = IDLE or r1.state = NEXT_DWORD then
 
		
	
		
			
						-- In these states, the only write path is the store-hit update case
 
		
	
		
			
					    if r1.state = IDLE then
 
		
	
		
			
						-- In IDLE state, the only write path is the store-hit update case
 
		
	
		
			
						wr_addr  <= std_ulogic_vector(to_unsigned(req_row, ROW_BITS));
 
		
	
		
			
						wr_data  <= req_data;
 
		
	
		
			
						wr_sel   <= req_sel;
 
		
	
		
			
						wr_data  <= r0.data;
 
		
	
		
			
						wr_sel   <= r0.byte_sel;
 
		
	
		
			
					    else
 
		
	
		
			
						-- Otherwise, we might be doing a reload
 
		
	
		
			
						wr_data <= wishbone_in.dat;
 
		
	
	
		
			
				
					
						
						
						
							
								 
						
					 
				
			
			@ -731,35 +603,25 @@ begin
 
		
	
		
			
				    end generate;
 
		
	
		
			
				 
		
	
		
			
				    --
 
		
	
		
			
				    -- Cache hit synchronous machine for the easy case. This handles
 
		
	
		
			
				    -- non-update form load hits
 
		
	
		
			
				    -- Cache hit synchronous machine for the easy case. This handles load hits.
 
		
	
		
			
				    --
 
		
	
		
			
				    dcache_fast_hit : process(clk)
 
		
	
		
			
				    begin
 
		
	
		
			
				        if rising_edge(clk) then
 
		
	
		
			
					    -- If we have a request incoming, we have to latch it as d_in.valid
 
		
	
		
			
					    -- If we have a request incoming, we have to latch it as r0.valid
 
		
	
		
			
					    -- is only set for a single cycle. It's up to the control logic to
 
		
	
		
			
					    -- ensure we don't override an uncompleted request (for now we are
 
		
	
		
			
					    -- single issue on load/stores so we are fine, later, we can generate
 
		
	
		
			
					    -- a stall output if necessary).
 
		
	
		
			
				 
		
	
		
			
					    if req_op /= OP_NONE and d_in.valid = '1' then
 
		
	
		
			
						r1.req <= d_in;
 
		
	
		
			
				                r1.second_dword <= '0';
 
		
	
		
			
				                r1.two_dwords <= two_dwords;
 
		
	
		
			
				                r1.next_addr <= next_addr;
 
		
	
		
			
				                r1.next_sel <= bus_sel(15 downto 8);
 
		
	
		
			
				 
		
	
		
			
					    if req_op /= OP_NONE and stall_out = '0' then
 
		
	
		
			
						r1.req <= r0;
 
		
	
		
			
						report "op:" & op_t'image(req_op) &
 
		
	
		
			
						    " addr:" & to_hstring(d_in.addr) &
 
		
	
		
			
						    " upd:" & std_ulogic'image(d_in.update) &
 
		
	
		
			
						    " nc:" & std_ulogic'image(d_in.nc) &
 
		
	
		
			
						    " reg:" & to_hstring(d_in.write_reg) &
 
		
	
		
			
						    " addr:" & to_hstring(r0.addr) &
 
		
	
		
			
						    " nc:" & std_ulogic'image(r0.nc) &
 
		
	
		
			
						    " idx:" & integer'image(req_index) &
 
		
	
		
			
						    " tag:" & to_hstring(req_tag) &
 
		
	
		
			
						    " way: " & integer'image(req_hit_way);
 
		
	
		
			
				            elsif r1.state = NEXT_DWORD then
 
		
	
		
			
				                r1.second_dword <= '1';
 
		
	
		
			
					    end if;
 
		
	
		
			
				 
		
	
		
			
					    -- Fast path for load/store hits. Set signals for the writeback controls.
 
		
	
	
		
			
				
					
						
						
						
							
								 
						
					 
				
			
			@ -776,7 +638,6 @@ begin
 
		
	
		
			
				    -- Every other case is handled by this state machine:
 
		
	
		
			
				    --
 
		
	
		
			
				    --   * Cache load miss/reload (in conjunction with "rams")
 
		
	
		
			
				    --   * Load hits for update forms
 
		
	
		
			
				    --   * Load hits for non-cachable forms
 
		
	
		
			
				    --   * Stores (the collision case is handled in "rams")
 
		
	
		
			
				    --
 
		
	
	
		
			
				
					
						
						
						
							
								 
						
					 
				
			
			@ -795,7 +656,6 @@ begin
 
		
	
		
			
						end loop;
 
		
	
		
			
				                r1.state <= IDLE;
 
		
	
		
			
						r1.slow_valid <= '0';
 
		
	
		
			
						r1.update_valid <= '0';
 
		
	
		
			
				                r1.wb.cyc <= '0';
 
		
	
		
			
				                r1.wb.stb <= '0';
 
		
	
		
			
				 
		
	
	
		
			
				
					
						
						
						
							
								 
						
					 
				
			
			@ -804,39 +664,19 @@ begin
 
		
	
		
			
				            else
 
		
	
		
			
						-- One cycle pulses reset
 
		
	
		
			
						r1.slow_valid <= '0';
 
		
	
		
			
						r1.update_valid <= '0';
 
		
	
		
			
				                r1.stcx_fail <= '0';
 
		
	
		
			
				 
		
	
		
			
						-- We cannot currently process a new request when not idle
 
		
	
		
			
						assert d_in.valid = '0' or r1.state = IDLE report "request " &
 
		
	
		
			
						    op_t'image(req_op) & " while in state " & state_t'image(r1.state)
 
		
	
		
			
						    severity FAILURE;
 
		
	
		
			
				 
		
	
		
			
						-- Main state machine
 
		
	
		
			
						case r1.state is
 
		
	
		
			
				                when IDLE | NEXT_DWORD =>
 
		
	
		
			
				                when IDLE =>
 
		
	
		
			
						    case req_op is
 
		
	
		
			
				                    when OP_LOAD_HIT =>
 
		
	
		
			
				                        if r1.state = IDLE then
 
		
	
		
			
				                            -- If the load is misaligned then we will need to start
 
		
	
		
			
				                            -- the state machine
 
		
	
		
			
				                            if two_dwords = '1' then
 
		
	
		
			
				                                r1.state <= NEXT_DWORD;
 
		
	
		
			
				                            elsif d_in.update = '1' then
 
		
	
		
			
				                                r1.state <= LOAD_UPDATE;
 
		
	
		
			
				                            end if;
 
		
	
		
			
				                        else
 
		
	
		
			
				                            if r1.req.update = '1' then
 
		
	
		
			
				                                r1.state <= LOAD_UPDATE;
 
		
	
		
			
				                            else
 
		
	
		
			
				                                r1.state <= IDLE;
 
		
	
		
			
				                            end if;
 
		
	
		
			
				                        end if;
 
		
	
		
			
				                        -- stay in IDLE state
 
		
	
		
			
				 
		
	
		
			
						    when OP_LOAD_MISS =>
 
		
	
		
			
				                    when OP_LOAD_MISS =>
 
		
	
		
			
							-- Normal load cache miss, start the reload machine
 
		
	
		
			
							--
 
		
	
		
			
							report "cache miss addr:" & to_hstring(req_addr) &
 
		
	
		
			
							report "cache miss addr:" & to_hstring(r0.addr) &
 
		
	
		
			
							    " idx:" & integer'image(req_index) &
 
		
	
		
			
							    " way:" & integer'image(replace_way) &
 
		
	
		
			
							    " tag:" & to_hstring(req_tag);
 
		
	
	
		
			
				
					
						
							
								 
						
						
							
								 
						
						
					 
				
			
			@ -871,19 +711,17 @@ begin
 
		
	
		
			
							r1.state <= RELOAD_WAIT_ACK;
 
		
	
		
			
				 
		
	
		
			
						    when OP_LOAD_NC =>
 
		
	
		
			
				                        r1.wb.sel <= req_sel;
 
		
	
		
			
				                        r1.wb.adr <= req_addr(r1.wb.adr'left downto 3) & "000";
 
		
	
		
			
				                        r1.wb.sel <= r0.byte_sel;
 
		
	
		
			
				                        r1.wb.adr <= r0.addr(r1.wb.adr'left downto 3) & "000";
 
		
	
		
			
				                        r1.wb.cyc <= '1';
 
		
	
		
			
				                        r1.wb.stb <= '1';
 
		
	
		
			
							r1.wb.we <= '0';
 
		
	
		
			
							r1.state <= NC_LOAD_WAIT_ACK;
 
		
	
		
			
				 
		
	
		
			
						    when OP_STORE_HIT | OP_STORE_MISS =>
 
		
	
		
			
							-- For store-with-update do the register update
 
		
	
		
			
				                        r1.update_valid <= d_in.valid and d_in.update;
 
		
	
		
			
				                        r1.wb.sel <= req_sel;
 
		
	
		
			
				                        r1.wb.adr <= req_addr(r1.wb.adr'left downto 3) & "000";
 
		
	
		
			
							r1.wb.dat <= req_data;
 
		
	
		
			
				                        r1.wb.sel <= r0.byte_sel;
 
		
	
		
			
				                        r1.wb.adr <= r0.addr(r1.wb.adr'left downto 3) & "000";
 
		
	
		
			
							r1.wb.dat <= r0.data;
 
		
	
		
			
				                        if cancel_store = '0' then
 
		
	
		
			
				                            r1.wb.cyc <= '1';
 
		
	
		
			
				                            r1.wb.stb <= '1';
 
		
	
	
		
			
				
					
						
						
						
							
								 
						
					 
				
			
			@ -899,9 +737,6 @@ begin
 
		
	
		
			
						    when OP_BAD =>
 
		
	
		
			
						    end case;
 
		
	
		
			
				 
		
	
		
			
				                when PRE_NEXT_DWORD =>
 
		
	
		
			
				                    r1.state <= NEXT_DWORD;
 
		
	
		
			
				 
		
	
		
			
						when RELOAD_WAIT_ACK =>
 
		
	
		
			
						    -- Requests are all sent if stb is 0
 
		
	
		
			
						    stbs_done := r1.wb.stb = '0';
 
		
	
	
		
			
				
					
						
							
								 
						
						
							
								 
						
						
					 
				
			
			@ -943,31 +778,23 @@ begin
 
		
	
		
			
							    -- Cache line is now valid
 
		
	
		
			
							    cache_valids(r1.store_index)(r1.store_way) <= '1';
 
		
	
		
			
				 
		
	
		
			
							    -- Write back the load data that we got, and start
 
		
	
		
			
				                            -- the second dword if necessary.  Otherwise, see if
 
		
	
		
			
							    -- we also need to do the deferred update cycle.
 
		
	
		
			
							    r1.slow_valid <= '1';
 
		
	
		
			
				                            if r1.two_dwords and not r1.second_dword then
 
		
	
		
			
				                                r1.state <= PRE_NEXT_DWORD;
 
		
	
		
			
							    elsif r1.req.update = '1' then
 
		
	
		
			
								r1.state <= LOAD_UPDATE;
 
		
	
		
			
								report "completing miss with load-update !";
 
		
	
		
			
							    else
 
		
	
		
			
								r1.state <= IDLE;
 
		
	
		
			
								report "completing miss !";
 
		
	
		
			
							    end if;
 
		
	
		
			
				                            -- Don't complete and go idle until next cycle, in
 
		
	
		
			
				                            -- case the next request is for the last dword of
 
		
	
		
			
				                            -- the cache line we just loaded.
 
		
	
		
			
				                            r1.state <= FINISH_LD_MISS;
 
		
	
		
			
							end if;
 
		
	
		
			
				 
		
	
		
			
							-- Increment store row counter
 
		
	
		
			
							r1.store_row <= next_row(r1.store_row);
 
		
	
		
			
						    end if;
 
		
	
		
			
				 
		
	
		
			
						when LOAD_UPDATE =>
 
		
	
		
			
						    -- We need the extra cycle to complete a load with update
 
		
	
		
			
						    r1.update_valid <= '1';
 
		
	
		
			
						    r1.state <= IDLE;
 
		
	
		
			
				                when FINISH_LD_MISS =>
 
		
	
		
			
				                    -- Write back the load data that we got
 
		
	
		
			
				                    r1.slow_valid <= '1';
 
		
	
		
			
				                    r1.state <= IDLE;
 
		
	
		
			
				                    report "completing miss !";
 
		
	
		
			
				 
		
	
		
			
						when STORE_WAIT_ACK | NC_LOAD_WAIT_ACK =>
 
		
	
		
			
				                when STORE_WAIT_ACK | NC_LOAD_WAIT_ACK =>
 
		
	
		
			
						    -- Clear stb when slave accepted request
 
		
	
		
			
				                    if wishbone_in.stall = '0' then
 
		
	
		
			
							r1.wb.stb <= '0';
 
		
	
	
		
			
				
					
						
						
						
							
								 
						
					 
				
			
			@ -975,16 +802,10 @@ begin
 
		
	
		
			
				 
		
	
		
			
						    -- Got ack ? complete.
 
		
	
		
			
						    if wishbone_in.ack = '1' then
 
		
	
		
			
				                        if r1.two_dwords and not r1.second_dword then
 
		
	
		
			
				                            r1.state <= NEXT_DWORD;
 
		
	
		
			
				                        elsif r1.state = NC_LOAD_WAIT_ACK and r1.req.update = '1' then
 
		
	
		
			
				                            r1.state <= LOAD_UPDATE;
 
		
	
		
			
				                        else
 
		
	
		
			
				                            r1.state <= IDLE;
 
		
	
		
			
				                        end if;
 
		
	
		
			
							if r1.state = NC_LOAD_WAIT_ACK then
 
		
	
		
			
							    r1.slow_data <= wishbone_in.dat;
 
		
	
		
			
							end if;
 
		
	
		
			
				                        r1.state <= IDLE;
 
		
	
		
			
							r1.slow_valid <= '1';
 
		
	
		
			
							r1.wb.cyc <= '0';
 
		
	
		
			
							r1.wb.stb <= '0';