microwatt/fetch1.vhdl

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.common.all;

entity fetch1 is
    generic(
	RESET_ADDRESS     : std_logic_vector(63 downto 0) := (others => '0');
	ALT_RESET_ADDRESS : std_logic_vector(63 downto 0) := (others => '0');
        HAS_BTC           : boolean := true
	);
    port(
	clk           : in std_ulogic;
	rst           : in std_ulogic;

	-- Control inputs:
	stall_in      : in std_ulogic;
	flush_in      : in std_ulogic;
        inval_btc     : in std_ulogic;
	stop_in       : in std_ulogic;
	alt_reset_in  : in std_ulogic;

	-- redirect from writeback unit
	w_in          : in WritebackToFetch1Type;

        -- redirect from decode1
        d_in          : in Decode1ToFetch1Type;

	-- Request to icache
	i_out         : out Fetch1ToIcacheType;

        -- outputs to logger
        log_out       : out std_ulogic_vector(42 downto 0)
	);
end entity fetch1;

architecture behaviour of fetch1 is
    type reg_internal_t is record
        mode_32bit: std_ulogic;
        rd_is_niap4: std_ulogic;
        next_nia: std_ulogic_vector(63 downto 0);
    end record;
    signal r, r_next : Fetch1ToIcacheType;
    signal r_int, r_next_int : reg_internal_t;
    signal advance_nia : std_ulogic;
    signal log_nia : std_ulogic_vector(42 downto 0);

    constant BTC_ADDR_BITS : integer := 10;
    constant BTC_TAG_BITS : integer := 62 - BTC_ADDR_BITS;
    constant BTC_TARGET_BITS : integer := 62;
    constant BTC_SIZE : integer := 2 ** BTC_ADDR_BITS;
    constant BTC_WIDTH : integer := BTC_TAG_BITS + BTC_TARGET_BITS + 2;
    type btc_mem_type is array (0 to BTC_SIZE - 1) of std_ulogic_vector(BTC_WIDTH - 1 downto 0);

    signal btc_rd_addr : unsigned(BTC_ADDR_BITS - 1 downto 0);
    signal btc_rd_data : std_ulogic_vector(BTC_WIDTH - 1 downto 0) := (others => '0');
    signal btc_rd_valid : std_ulogic := '0';

begin

    regs : process(clk)
    begin
	if rising_edge(clk) then
            log_nia <= r.nia(63) & r.nia(43 downto 2);
	    if r /= r_next and advance_nia = '1' then
		report "fetch1 rst:" & std_ulogic'image(rst) &
                    " IR:" & std_ulogic'image(r_next.virt_mode) &
                    " P:" & std_ulogic'image(r_next.priv_mode) &
                    " E:" & std_ulogic'image(r_next.big_endian) &
                    " 32:" & std_ulogic'image(r_next_int.mode_32bit) &
		    " R:" & std_ulogic'image(w_in.redirect) & std_ulogic'image(d_in.redirect) &
		    " S:" & std_ulogic'image(stall_in) &
		    " T:" & std_ulogic'image(stop_in) &
		    " nia:" & to_hstring(r_next.nia) &
                    " req:" & std_ulogic'image(r_next.req);
	    end if;
            if advance_nia = '1' then
                r <= r_next;
                r_int <= r_next_int;
            end if;
            -- always send the up-to-date stop mark and req
            r.stop_mark <= stop_in;
            r.req <= r_next.req;
	end if;
    end process;
    log_out <= log_nia;

    btc : if HAS_BTC generate
        signal btc_memory : btc_mem_type;
        attribute ram_style : string;
        attribute ram_style of btc_memory : signal is "block";

        signal btc_valids : std_ulogic_vector(BTC_SIZE - 1 downto 0);
        attribute ram_style of btc_valids : signal is "distributed";

        signal btc_wr : std_ulogic;
        signal btc_wr_data : std_ulogic_vector(BTC_WIDTH - 1 downto 0);
        signal btc_wr_addr : std_ulogic_vector(BTC_ADDR_BITS - 1 downto 0);
    begin
        btc_wr_data <= w_in.br_taken &
                       r.virt_mode &
                       w_in.br_nia(63 downto BTC_ADDR_BITS + 2) &
                       w_in.redirect_nia(63 downto 2);
        btc_wr_addr <= w_in.br_nia(BTC_ADDR_BITS + 1 downto 2);
        btc_wr <= w_in.br_last;

        btc_ram : process(clk)
            variable raddr : unsigned(BTC_ADDR_BITS - 1 downto 0);
        begin
            if rising_edge(clk) then
                if advance_nia = '1' then
		    if is_X(btc_rd_addr) then
			btc_rd_data <= (others => 'X');
			btc_rd_valid <= 'X';
		    else
			btc_rd_data <= btc_memory(to_integer(btc_rd_addr));
			btc_rd_valid <= btc_valids(to_integer(btc_rd_addr));
		    end if;
                end if;
                if btc_wr = '1' then
		    assert not is_X(btc_wr_addr) report "Writing to unknown address" severity FAILURE;
                    btc_memory(to_integer(unsigned(btc_wr_addr))) <= btc_wr_data;
                end if;
                if inval_btc = '1' or rst = '1' then
                    btc_valids <= (others => '0');
                elsif btc_wr = '1' then
		    assert not is_X(btc_wr_addr) report "Writing to unknown address" severity FAILURE;
                    btc_valids(to_integer(unsigned(btc_wr_addr))) <= '1';
                end if;
            end if;
        end process;
    end generate;

    comb : process(all)
	variable v : Fetch1ToIcacheType;
	variable v_int : reg_internal_t;
        variable next_nia : std_ulogic_vector(63 downto 0);
        variable m32 : std_ulogic;
    begin
	v := r;
	v_int := r_int;
        v.predicted := '0';
        v.pred_ntaken := '0';
        v.req := not (rst or stop_in);
        -- reduce metavalue warnings in sim
        if is_X(rst) then
            v.req := '0';
        end if;

        -- Combinatorial computation of the CIA for the next cycle.
        -- Needs to be simple so the result can be used for RAM
        -- and TLB access in the icache.
        -- If we are stalled, this still advances, and the assumption
        -- is that it will not be used.
        m32 := r_int.mode_32bit;
        if w_in.redirect = '1' then
            next_nia := w_in.redirect_nia(63 downto 2) & "00";
            m32 := w_in.mode_32bit;
            v.virt_mode := w_in.virt_mode;
            v.priv_mode := w_in.priv_mode;
            v.big_endian := w_in.big_endian;
            v_int.mode_32bit := w_in.mode_32bit;
        elsif d_in.redirect = '1' then
            next_nia := d_in.redirect_nia(63 downto 2) & "00";
        else
            next_nia := r_int.next_nia;
        end if;
        if m32 = '1' then
            next_nia(63 downto 32) := (others => '0');
        end if;
        v.nia := next_nia;

        v_int.next_nia := std_ulogic_vector(unsigned(next_nia) + 4);

        -- Use v_int.next_nia as the BTC read address before it gets possibly
        -- overridden with the reset address or the predicted branch target
        -- address, in order to improve timing.  If it gets overridden then
        -- rd_is_niap4 gets cleared to indicate that the BTC data doesn't apply.
        btc_rd_addr <= unsigned(v_int.next_nia(BTC_ADDR_BITS + 1 downto 2));
        v_int.rd_is_niap4 := '1';

	if rst /= '0' then
	    if alt_reset_in = '1' then
		v_int.next_nia :=  ALT_RESET_ADDRESS;
	    else
		v_int.next_nia :=  RESET_ADDRESS;
	    end if;
            v.virt_mode := '0';
            v.priv_mode := '1';
            v.big_endian := '0';
            v_int.mode_32bit := '0';
            v_int.rd_is_niap4 := '0';
        end if;

        -- If there is a valid entry in the BTC which corresponds to the next instruction,
        -- use that to predict the address of the instruction after that.
	if rst = '0' and w_in.redirect = '0' and d_in.redirect = '0' and
                btc_rd_valid = '1' and r_int.rd_is_niap4 = '1' and
                btc_rd_data(BTC_WIDTH - 2) = r.virt_mode and
                btc_rd_data(BTC_WIDTH - 3 downto BTC_TARGET_BITS)
                    = r_int.next_nia(BTC_TAG_BITS + BTC_ADDR_BITS + 1 downto BTC_ADDR_BITS + 2) then
            v.predicted := btc_rd_data(BTC_WIDTH - 1);
            v.pred_ntaken := not btc_rd_data(BTC_WIDTH - 1);
            if btc_rd_data(BTC_WIDTH - 1) = '1' then
                v_int.next_nia := btc_rd_data(BTC_TARGET_BITS - 1 downto 0) & "00";
                v_int.rd_is_niap4 := '0';
            end if;
        end if;

        -- If the last NIA value went down with a stop mark, it didn't get
        -- executed, and hence we shouldn't increment NIA.
        advance_nia <= rst or w_in.redirect or d_in.redirect or (not r.stop_mark and not stall_in);
        -- reduce metavalue warnings in sim
        if is_X(rst) then
            advance_nia <= '1';
        end if;

	r_next <= v;
	r_next_int <= v_int;

	-- Update outputs to the icache
	i_out <= r;
        i_out.next_nia <= next_nia;

    end process;

end architecture behaviour;