microwatt/fetch1.vhdl

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.utils.all;
use work.common.all;

entity fetch1 is
    generic(
	RESET_ADDRESS     : std_logic_vector(63 downto 0) := (others => '0');
	ALT_RESET_ADDRESS : std_logic_vector(63 downto 0) := (others => '0');
        TLB_SIZE          : positive := 64;        -- L1 ITLB number of entries (direct mapped)
        HAS_BTC           : boolean := true
	);
    port(
	clk           : in std_ulogic;
	rst           : in std_ulogic;

	-- Control inputs:
	stall_in      : in std_ulogic;
	flush_in      : in std_ulogic;
        inval_btc     : in std_ulogic;
	stop_in       : in std_ulogic;
	alt_reset_in  : in std_ulogic;
        m_in          : in MmuToITLBType;

	-- redirect from writeback unit
	w_in          : in WritebackToFetch1Type;

        -- redirect from decode1
        d_in          : in Decode1ToFetch1Type;

	-- Request to icache
	i_out         : out Fetch1ToIcacheType;

        -- outputs to logger
        log_out       : out std_ulogic_vector(42 downto 0)
	);
end entity fetch1;

architecture behaviour of fetch1 is
    type reg_internal_t is record
        mode_32bit: std_ulogic;
        rd_is_niap4: std_ulogic;
        tlbcheck: std_ulogic;
        tlbstall: std_ulogic;
        next_nia: std_ulogic_vector(63 downto 0);
    end record;

    -- Mini effective to real translation cache
    type erat_t is record
        epn0: std_ulogic_vector(63 - MIN_LG_PGSZ downto 0);
        epn1: std_ulogic_vector(63 - MIN_LG_PGSZ downto 0);
        rpn0: std_ulogic_vector(REAL_ADDR_BITS - MIN_LG_PGSZ - 1 downto 0);
        rpn1: std_ulogic_vector(REAL_ADDR_BITS - MIN_LG_PGSZ - 1 downto 0);
        priv0: std_ulogic;
        priv1: std_ulogic;
        valid: std_ulogic_vector(1 downto 0);
        mru: std_ulogic;        -- '1' => entry 1 most recently used
    end record;

    signal r, r_next : Fetch1ToIcacheType;
    signal r_int, r_next_int : reg_internal_t;
    signal advance_nia : std_ulogic;
    signal log_nia : std_ulogic_vector(42 downto 0);

    signal erat : erat_t;
    signal erat_hit : std_ulogic;
    signal erat_sel : std_ulogic;

    constant BTC_ADDR_BITS : integer := 10;
    constant BTC_TAG_BITS : integer := 62 - BTC_ADDR_BITS;
    constant BTC_TARGET_BITS : integer := 62;
    constant BTC_SIZE : integer := 2 ** BTC_ADDR_BITS;
    constant BTC_WIDTH : integer := BTC_TAG_BITS + BTC_TARGET_BITS + 2;
    type btc_mem_type is array (0 to BTC_SIZE - 1) of std_ulogic_vector(BTC_WIDTH - 1 downto 0);

    signal btc_rd_addr : unsigned(BTC_ADDR_BITS - 1 downto 0);
    signal btc_rd_data : std_ulogic_vector(BTC_WIDTH - 1 downto 0) := (others => '0');
    signal btc_rd_valid : std_ulogic := '0';

    -- L1 ITLB.
    constant TLB_BITS : natural := log2(TLB_SIZE);
    constant TLB_EA_TAG_BITS : natural := 64 - (MIN_LG_PGSZ + TLB_BITS);
    constant TLB_PTE_BITS : natural := 64;

    subtype tlb_index_t is integer range 0 to TLB_SIZE - 1;
    type tlb_valids_t is array(tlb_index_t) of std_ulogic;
    subtype tlb_tag_t is std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0);
    type tlb_tags_t is array(tlb_index_t) of tlb_tag_t;
    subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0);
    type tlb_ptes_t is array(tlb_index_t) of tlb_pte_t;

    signal itlb_valids : tlb_valids_t;
    signal itlb_tags : tlb_tags_t;
    signal itlb_ptes : tlb_ptes_t;

    -- Values read from above arrays on a clock edge
    signal itlb_valid : std_ulogic;
    signal itlb_ttag : tlb_tag_t;
    signal itlb_pte : tlb_pte_t;
    signal itlb_hit : std_ulogic;

    -- Simple hash for direct-mapped TLB index
    function hash_ea(addr: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is
        variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0);
    begin
        hash := addr(MIN_LG_PGSZ + TLB_BITS - 1 downto MIN_LG_PGSZ)
                xor addr(MIN_LG_PGSZ + 2 * TLB_BITS - 1 downto MIN_LG_PGSZ + TLB_BITS)
                xor addr(MIN_LG_PGSZ + 3 * TLB_BITS - 1 downto MIN_LG_PGSZ + 2 * TLB_BITS);
        return hash;
    end;

begin

    regs : process(clk)
    begin
	if rising_edge(clk) then
            log_nia <= r.nia(63) & r.nia(43 downto 2);
	    if r /= r_next and advance_nia = '1' then
		report "fetch1 rst:" & std_ulogic'image(rst) &
                    " IR:" & std_ulogic'image(r_next.virt_mode) &
                    " P:" & std_ulogic'image(r_next.priv_mode) &
                    " E:" & std_ulogic'image(r_next.big_endian) &
                    " 32:" & std_ulogic'image(r_next_int.mode_32bit) &
                    " I:" & std_ulogic'image(w_in.interrupt) &
		    " R:" & std_ulogic'image(w_in.redirect) & std_ulogic'image(d_in.redirect) &
		    " S:" & std_ulogic'image(stall_in) &
		    " T:" & std_ulogic'image(stop_in) &
		    " nia:" & to_hstring(r_next.nia) &
                    " req:" & std_ulogic'image(r_next.req) &
                    " FF:" & std_ulogic'image(r_next.fetch_fail);
	    end if;
            if advance_nia = '1' then
                r <= r_next;
                r_int <= r_next_int;
            end if;
            -- always send the up-to-date stop mark and req
            r.stop_mark <= stop_in;
            r.req <= r_next.req;
            r.fetch_fail <= r_next.fetch_fail;
            r_int.tlbcheck <= r_next_int.tlbcheck;
            r_int.tlbstall <= r_next_int.tlbstall;
	end if;
    end process;
    log_out <= log_nia;

    btc : if HAS_BTC generate
        signal btc_memory : btc_mem_type;
        attribute ram_style : string;
        attribute ram_style of btc_memory : signal is "block";

        signal btc_valids : std_ulogic_vector(BTC_SIZE - 1 downto 0);
        -- attribute ram_style of btc_valids : signal is "distributed";

        signal btc_wr : std_ulogic;
        signal btc_wr_data : std_ulogic_vector(BTC_WIDTH - 1 downto 0);
        signal btc_wr_addr : std_ulogic_vector(BTC_ADDR_BITS - 1 downto 0);
    begin
        btc_wr_data <= w_in.br_taken &
                       r.virt_mode &
                       w_in.br_nia(63 downto BTC_ADDR_BITS + 2) &
                       w_in.redirect_nia(63 downto 2);
        btc_wr_addr <= w_in.br_nia(BTC_ADDR_BITS + 1 downto 2);
        btc_wr <= w_in.br_last;

        btc_ram : process(clk)
            variable raddr : unsigned(BTC_ADDR_BITS - 1 downto 0);
        begin
            if rising_edge(clk) then
                if advance_nia = '1' then
		    if is_X(btc_rd_addr) then
			btc_rd_data <= (others => 'X');
			btc_rd_valid <= 'X';
		    else
			btc_rd_data <= btc_memory(to_integer(btc_rd_addr));
			btc_rd_valid <= btc_valids(to_integer(btc_rd_addr));
		    end if;
                end if;
                if btc_wr = '1' then
		    assert not is_X(btc_wr_addr) report "Writing to unknown address" severity FAILURE;
                    btc_memory(to_integer(unsigned(btc_wr_addr))) <= btc_wr_data;
                end if;
                if inval_btc = '1' or rst = '1' then
                    btc_valids <= (others => '0');
                elsif btc_wr = '1' then
		    assert not is_X(btc_wr_addr) report "Writing to unknown address" severity FAILURE;
                    btc_valids(to_integer(unsigned(btc_wr_addr))) <= '1';
                end if;
            end if;
        end process;
    end generate;

    erat_sync : process(clk)
    begin
        if rising_edge(clk) then
            if rst /= '0' or m_in.tlbie = '1' then
                erat.valid <= "00";
                erat.mru <= '0';
            else
                if erat_hit = '1' then
                    erat.mru <= erat_sel;
                end if;
                if m_in.tlbld = '1' then
                    erat.epn0 <= m_in.addr(63 downto MIN_LG_PGSZ);
                    erat.rpn0 <= m_in.pte(REAL_ADDR_BITS-1 downto MIN_LG_PGSZ);
                    erat.priv0 <= m_in.pte(3);
                    erat.valid(0) <= '1';
                    erat.valid(1) <= '0';
                    erat.mru <= '0';
                elsif r_int.tlbcheck = '1' and itlb_hit = '1' then
                    if erat.mru = '0' then
                        erat.epn1 <= r.nia(63 downto MIN_LG_PGSZ);
                        erat.rpn1 <= itlb_pte(REAL_ADDR_BITS-1 downto MIN_LG_PGSZ);
                        erat.priv1 <= itlb_pte(3);
                        erat.valid(1) <= '1';
                    else
                        erat.epn0 <= r.nia(63 downto MIN_LG_PGSZ);
                        erat.rpn0 <= itlb_pte(REAL_ADDR_BITS-1 downto MIN_LG_PGSZ);
                        erat.priv0 <= itlb_pte(3);
                        erat.valid(0) <= '1';
                    end if;
                    erat.mru <= not erat.mru;
                end if;
            end if;
        end if;
    end process;

    -- Read TLB using the NIA for the next cycle
    itlb_read : process(clk)
	variable tlb_req_index : std_ulogic_vector(TLB_BITS - 1 downto 0);
    begin
        if rising_edge(clk) then
            if advance_nia = '1' then
                tlb_req_index := hash_ea(r_next.nia);
                if is_X(tlb_req_index) then
                    itlb_pte <= (others => 'X');
                    itlb_ttag <= (others => 'X');
		    itlb_valid <= 'X';
                else
                    itlb_pte <= itlb_ptes(to_integer(unsigned(tlb_req_index)));
                    itlb_ttag <= itlb_tags(to_integer(unsigned(tlb_req_index)));
		    itlb_valid <= itlb_valids(to_integer(unsigned(tlb_req_index)));
                end if;
            end if;
        end if;
    end process;

    -- TLB hit detection
    itlb_lookup : process(all)
    begin
        itlb_hit <= '0';
        if itlb_ttag = r.nia(63 downto MIN_LG_PGSZ + TLB_BITS) then
            itlb_hit <= itlb_valid;
        end if;
    end process;

    -- iTLB update
    itlb_update: process(clk)
	variable wr_index : std_ulogic_vector(TLB_BITS - 1 downto 0);
    begin
        if rising_edge(clk) then
            wr_index := hash_ea(m_in.addr);
            if rst = '1' or (m_in.tlbie = '1' and m_in.doall = '1') then
                -- clear all valid bits
                for i in tlb_index_t loop
                    itlb_valids(i) <= '0';
                end loop;
            elsif m_in.tlbie = '1' then
		assert not is_X(wr_index) report "icache index invalid on write" severity FAILURE;
                -- clear entry regardless of hit or miss
                itlb_valids(to_integer(unsigned(wr_index))) <= '0';
            elsif m_in.tlbld = '1' then
		assert not is_X(wr_index) report "icache index invalid on write" severity FAILURE;
                itlb_tags(to_integer(unsigned(wr_index))) <= m_in.addr(63 downto MIN_LG_PGSZ + TLB_BITS);
                itlb_ptes(to_integer(unsigned(wr_index))) <= m_in.pte;
                itlb_valids(to_integer(unsigned(wr_index))) <= '1';
            end if;
            --ev.itlb_miss_resolved <= m_in.tlbld and not rst;
        end if;
    end process;

    comb : process(all)
	variable v : Fetch1ToIcacheType;
	variable v_int : reg_internal_t;
        variable next_nia : std_ulogic_vector(63 downto 0);
        variable m32 : std_ulogic;
        variable ehit, esel : std_ulogic;
        variable eaa_priv : std_ulogic;
    begin
	v := r;
	v_int := r_int;
        v.predicted := '0';
        v.pred_ntaken := '0';
        v.req := not stop_in;
        v_int.tlbstall := r_int.tlbcheck;
        v_int.tlbcheck := '0';

        if r_int.tlbcheck = '1' and itlb_hit = '0' then
            v.fetch_fail := '1';
        end if;

        -- Combinatorial computation of the CIA for the next cycle.
        -- Needs to be simple so the result can be used for RAM
        -- and TLB access in the icache.
        -- If we are stalled, this still advances, and the assumption
        -- is that it will not be used.
        m32 := r_int.mode_32bit;
        if w_in.redirect = '1' then
            next_nia := w_in.redirect_nia(63 downto 2) & "00";
            m32 := w_in.mode_32bit;
            v.virt_mode := w_in.virt_mode;
            v.priv_mode := w_in.priv_mode;
            v.big_endian := w_in.big_endian;
            v_int.mode_32bit := w_in.mode_32bit;
            v.fetch_fail := '0';
        elsif d_in.redirect = '1' then
            next_nia := d_in.redirect_nia(63 downto 2) & "00";
            v.fetch_fail := '0';
        elsif r_int.tlbstall = '1' then
            -- this case is needed so that the correct icache tags are read
            next_nia := r.nia;
        else
            next_nia := r_int.next_nia;
        end if;
        if m32 = '1' then
            next_nia(63 downto 32) := (others => '0');
        end if;
        v.nia := next_nia;

        v_int.next_nia := std_ulogic_vector(unsigned(next_nia) + 4);

        -- Use v_int.next_nia as the BTC read address before it gets possibly
        -- overridden with the reset or interrupt address or the predicted branch
        -- target address, in order to improve timing.  If it gets overridden then
        -- rd_is_niap4 gets cleared to indicate that the BTC data doesn't apply.
        btc_rd_addr <= unsigned(v_int.next_nia(BTC_ADDR_BITS + 1 downto 2));
        v_int.rd_is_niap4 := '1';

        -- If the last NIA value went down with a stop mark, it didn't get
        -- executed, and hence we shouldn't increment NIA.
        advance_nia <= rst or w_in.interrupt or w_in.redirect or d_in.redirect or
                       (not r.stop_mark and not (r.req and stall_in));
        -- reduce metavalue warnings in sim
        if is_X(rst) then
            advance_nia <= '1';
        end if;

        -- Translate next_nia to real if possible, otherwise we have to stall
        -- and look up the TLB.
        ehit := '0';
        esel := '0';
        eaa_priv := '1';
        if next_nia(63 downto MIN_LG_PGSZ) = erat.epn1 and erat.valid(1) = '1' then
            ehit := '1';
            esel := '1';
        end if;
        if next_nia(63 downto MIN_LG_PGSZ) = erat.epn0 and erat.valid(0) = '1' then
            ehit := '1';
        end if;
        if v.virt_mode = '0' then
            v.rpn := v.nia(REAL_ADDR_BITS - 1 downto MIN_LG_PGSZ);
            eaa_priv := '1';
        elsif esel = '1' then
            v.rpn := erat.rpn1;
            eaa_priv := erat.priv1;
        else
            v.rpn := erat.rpn0;
            eaa_priv := erat.priv0;
        end if;
        if advance_nia = '1' and ehit = '0' and v.virt_mode = '1' and
                r_int.tlbcheck = '0' and v.fetch_fail = '0' then
            v_int.tlbstall := '1';
            v_int.tlbcheck := '1';
        end if;
        if ehit = '1' or v.virt_mode = '0' then
            if eaa_priv = '1' and v.priv_mode = '0' then
                v.fetch_fail := '1';
            else
                v.fetch_fail := '0';
            end if;
        end if;
        erat_hit <= ehit and advance_nia;
        erat_sel <= esel;

	if rst /= '0' then
	    if alt_reset_in = '1' then
		v_int.next_nia :=  ALT_RESET_ADDRESS;
	    else
		v_int.next_nia :=  RESET_ADDRESS;
	    end if;
        elsif w_in.interrupt = '1' then
            v_int.next_nia := 52x"0" & w_in.intr_vec(11 downto 2) & "00";
        end if;
	if rst /= '0' or w_in.interrupt = '1' then
            v.req := '0';
            v.virt_mode := '0';
            v.priv_mode := '1';
            v.big_endian := '0';
            v_int.mode_32bit := '0';
            v_int.rd_is_niap4 := '0';
            v_int.tlbstall := '0';
            v_int.tlbcheck := '0';
            v.fetch_fail := '0';
        end if;
        if v.fetch_fail = '1' then
            v_int.tlbstall := '1';
        end if;
        if v_int.tlbstall = '1' then
            v.req := '0';
        end if;

        -- If there is a valid entry in the BTC which corresponds to the next instruction,
        -- use that to predict the address of the instruction after that.
        -- (w_in.redirect = '0' and d_in.redirect = '0' and r_int.tlbstall = '0')
        -- implies v.nia = r_int.next_nia.
        -- r_int.rd_is_niap4 implies r_int.next_nia is the address used to read the BTC.
	if v.req = '1' and w_in.redirect = '0' and d_in.redirect = '0' and r_int.tlbstall = '0' and 
                btc_rd_valid = '1' and r_int.rd_is_niap4 = '1' and
                btc_rd_data(BTC_WIDTH - 2) = r.virt_mode and
                btc_rd_data(BTC_WIDTH - 3 downto BTC_TARGET_BITS)
                    = r_int.next_nia(BTC_TAG_BITS + BTC_ADDR_BITS + 1 downto BTC_ADDR_BITS + 2) then
            v.predicted := btc_rd_data(BTC_WIDTH - 1);
            v.pred_ntaken := not btc_rd_data(BTC_WIDTH - 1);
            if btc_rd_data(BTC_WIDTH - 1) = '1' then
                v_int.next_nia := btc_rd_data(BTC_TARGET_BITS - 1 downto 0) & "00";
                v_int.rd_is_niap4 := '0';
            end if;
        end if;

	r_next <= v;
	r_next_int <= v_int;

	-- Update outputs to the icache
	i_out <= r;
        i_out.next_nia <= next_nia;
        i_out.next_rpn <= v.rpn;

    end process;

end architecture behaviour;